Merge branch 'master' of file:///home/lavaux/Dropbox/gitRoot/CosmoToolbox

This commit is contained in:
Guilhem Lavaux 2013-03-07 09:39:14 -06:00
commit b1ad29a99b
81 changed files with 19752 additions and 12 deletions

View file

@ -4,23 +4,58 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}")
project(CosmoToolbox)
include(GetGitRevisionDescription)
include(ExternalProject)
include(FindOpenMP)
get_git_head_revision(HEAD GIT_VER)
option(BUILD_SHARED_LIBS "Build shared libraries." OFF)
option(BUILD_STATIC_LIBS "Build static libraries." ON)
option(ENABLE_OPENMP "Enable OpenMP support." OFF)
find_path(NETCDF_INCLUDE_PATH NAMES netcdf.h)
find_path(NETCDFCPP_INCLUDE_PATH NAMES netcdfcpp.h netcdf)
find_path(GSL_INCLUDE_PATH NAMES gsl/gsl_blas.h)
IF(EXISTS ${NETCDFCPP_INCLUDE_PATH}/netcdf)
SET(FOUND_NETCDF4 1)
FILE(WRITE ${CMAKE_BINARY_DIR}/src/ctool_netcdf_ver.hpp "#define NETCDFCPP4 1")
ELSE(EXISTS ${NETCDFCPP_INCLUDE_PATH}/netcdf)
SET(FOUND_NETCDF3 1)
FILE(WRITE ${CMAKE_BINARY_DIR}/src/ctool_netcdf_ver.hpp "#undef NETCDFCPP4")
ENDIF(EXISTS ${NETCDFCPP_INCLUDE_PATH}/netcdf)
find_library(NETCDF_LIBRARY netcdf)
find_library(NETCDFCPP_LIBRARY netcdf_c++)
find_library(NETCDFCPP_LIBRARY NAMES netcdf_c++ netcdf_c++4)
find_library(GSL_LIBRARY gsl)
find_library(GSLCBLAS_LIBRARY gslcblas)
set(HDF5_FIND_COMPONENTS CXX)
SET(SHARP_SOURCE ${CMAKE_SOURCE_DIR}/external/sharp)
SET(DEP_BUILD ${CMAKE_SOURCE_DIR}/external/sharp/auto)
ExternalProject_Add(sharp
SOURCE_DIR ${SHARP_SOURCE}
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ${SHARP_SOURCE}/configure --prefix=${DEP_BUILD}
BUILD_COMMAND make
INSTALL_COMMAND echo "No install"
)
SET(SHARP_LIBRARY ${DEP_BUILD}/lib/libsharp.a)
SET(FFTPACK_LIBRARY ${DEP_BUILD}/lib/libfftpack.a)
SET(CUTILS_LIBRARY ${DEP_BUILD}/lib/libc_utils.a)
SET(SHARP_LIBRARIES ${SHARP_LIBRARY} ${FFTPACK_LIBRARY} ${CUTILS_LIBRARY})
SET(SHARP_INCLUDE_PATH ${DEP_BUILD}/include)
set(HDF5_FIND_COMPONENTS HL CXX)
if(HDF5_ROOTDIR)
SET(ENV{HDF5_ROOT} ${HDF5_ROOTDIR})
endif(HDF5_ROOTDIR)
include(FindHDF5)
include(FindPkgConfig)
pkg_check_modules(FFTW3 fftw3>=3.3)
pkg_check_modules(EIGEN3 eigen3)
include(FindPackageHandleStandardArgs)
set(NETCDF_FIND_REQUIRED TRUE)

339
external/sharp/COPYING vendored Normal file
View file

@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

55
external/sharp/Makefile vendored Normal file
View file

@ -0,0 +1,55 @@
SHARP_TARGET?=auto
ifndef SHARP_TARGET
SHARP_TARGET:=$(error SHARP_TARGET undefined. Please see README.compilation for help)UNDEFINED
endif
default: compile_all
SRCROOT:=$(shell pwd)
include $(SRCROOT)/config/config.$(SHARP_TARGET)
include $(SRCROOT)/config/rules.common
all_hdr:=
all_lib:=
all_cbin:=
FULL_INCLUDE:=
include c_utils/planck.make
include libfftpack/planck.make
include libsharp/planck.make
include docsrc/planck.make
$(all_lib): %: | $(LIBDIR)_mkdir
@echo "# creating library $*"
$(ARCREATE) $@ $^
$(all_cbin): %: | $(BINDIR)_mkdir
@echo "# linking C binary $*"
$(CL) -o $@ $^ $(CLFLAGS)
compile_all: $(all_cbin) hdrcopy
hdrclean:
@if [ -d $(INCDIR) ]; then rm -rf $(INCDIR)/* ; fi
hdrcopy: | $(INCDIR)_mkdir
@if [ "$(all_hdr)" ]; then cp -p $(all_hdr) $(INCDIR); fi
$(notdir $(all_cbin)) : % : $(BINDIR)/%
test: compile_all
$(BINDIR)/sharp_acctest && \
$(BINDIR)/sharp_test healpix 2048 1024 1 0 1 && \
$(BINDIR)/sharp_test ecp 2047 4096 0 2 1 && \
$(BINDIR)/sharp_test gauss 2047 4096 0 0 2
perftest: compile_all
$(BINDIR)/sharp_test healpix 2048 1024 0 0 1 && \
$(BINDIR)/sharp_test gauss 63 128 0 0 1 && \
$(BINDIR)/sharp_test gauss 127 256 0 0 1 && \
$(BINDIR)/sharp_test gauss 255 512 0 0 1 && \
$(BINDIR)/sharp_test gauss 511 1024 0 0 1 && \
$(BINDIR)/sharp_test gauss 1023 2048 0 0 1 && \
$(BINDIR)/sharp_test gauss 2047 4096 0 0 1 && \
$(BINDIR)/sharp_test gauss 4095 8192 0 0 1 && \
$(BINDIR)/sharp_test gauss 8191 16384 0 0 1

13
external/sharp/README.compilation vendored Normal file
View file

@ -0,0 +1,13 @@
GNU make and GNU gcc (version 4.x) are required for compilation.
Simply run "./configure"; if this fails, please refer to the output of
"./configure --help" for additional hints and, if necessary, provide
additional flags to the configure script.
Once the script finishes successfully, run "make"
(or "gmake"). This should install the compilation products in the
subdirectory "auto/".
Documentation can be created by the command "(g)make doc".
However this requires the doxygen application to be installed
on your system.
The documentation will be created in the subdirectory doc/.

3974
external/sharp/autom4te.cache/output.0 vendored Normal file

File diff suppressed because it is too large Load diff

79
external/sharp/autom4te.cache/requests vendored Normal file
View file

@ -0,0 +1,79 @@
# This file was generated by Autom4te Sun Nov 6 20:57:04 UTC 2011.
# It contains the lists of macros which have been traced.
# It can be safely removed.
@request = (
bless( [
'0',
1,
[
'/usr/share/autoconf'
],
[
'/usr/share/autoconf/autoconf/autoconf.m4f',
'configure.ac'
],
{
'AM_PROG_F77_C_O' => 1,
'_LT_AC_TAGCONFIG' => 1,
'm4_pattern_forbid' => 1,
'AC_INIT' => 1,
'AC_CANONICAL_TARGET' => 1,
'_AM_COND_IF' => 1,
'AC_CONFIG_LIBOBJ_DIR' => 1,
'AC_SUBST' => 1,
'AC_CANONICAL_HOST' => 1,
'AC_FC_SRCEXT' => 1,
'AC_DEFUN' => 1,
'AC_PROG_LIBTOOL' => 1,
'AM_INIT_AUTOMAKE' => 1,
'AC_CONFIG_SUBDIRS' => 1,
'AM_PATH_GUILE' => 1,
'AM_AUTOMAKE_VERSION' => 1,
'LT_CONFIG_LTDL_DIR' => 1,
'AC_CONFIG_LINKS' => 1,
'AC_REQUIRE_AUX_FILE' => 1,
'LT_SUPPORTED_TAG' => 1,
'm4_sinclude' => 1,
'AM_MAINTAINER_MODE' => 1,
'AC_DEFUN_ONCE' => 1,
'AM_NLS' => 1,
'AM_GNU_GETTEXT_INTL_SUBDIR' => 1,
'_m4_warn' => 1,
'AM_MAKEFILE_INCLUDE' => 1,
'AM_PROG_CXX_C_O' => 1,
'_AM_MAKEFILE_INCLUDE' => 1,
'_AM_COND_ENDIF' => 1,
'AM_ENABLE_MULTILIB' => 1,
'AM_SILENT_RULES' => 1,
'AM_PROG_MOC' => 1,
'AC_CONFIG_FILES' => 1,
'LT_INIT' => 1,
'include' => 1,
'AM_GNU_GETTEXT' => 1,
'AM_PROG_AR' => 1,
'AC_LIBSOURCE' => 1,
'AC_CANONICAL_BUILD' => 1,
'AM_PROG_FC_C_O' => 1,
'AC_FC_FREEFORM' => 1,
'AH_OUTPUT' => 1,
'AC_CONFIG_AUX_DIR' => 1,
'_AM_SUBST_NOTMAKE' => 1,
'm4_pattern_allow' => 1,
'_AM_AUTOCONF_VERSION' => 1,
'AM_PROG_CC_C_O' => 1,
'sinclude' => 1,
'AM_CONDITIONAL' => 1,
'AC_CANONICAL_SYSTEM' => 1,
'AM_XGETTEXT_OPTION' => 1,
'AC_CONFIG_HEADERS' => 1,
'AC_DEFINE_TRACE_LITERAL' => 1,
'AM_POT_TOOLS' => 1,
'm4_include' => 1,
'_AM_COND_ELSE' => 1,
'AU_DEFUN' => 1,
'AC_SUBST_TRACE' => 1
}
], 'Autom4te::Request' )
);

229
external/sharp/autom4te.cache/traces.0 vendored Normal file
View file

@ -0,0 +1,229 @@
m4trace:configure.ac:1: -1- AC_INIT([config/config.auto.in])
m4trace:configure.ac:1: -1- m4_pattern_forbid([^_?A[CHUM]_])
m4trace:configure.ac:1: -1- m4_pattern_forbid([_AC_])
m4trace:configure.ac:1: -1- m4_pattern_forbid([^LIBOBJS$], [do not use LIBOBJS directly, use AC_LIBOBJ (see section `AC_LIBOBJ vs LIBOBJS'])
m4trace:configure.ac:1: -1- m4_pattern_allow([^AS_FLAGS$])
m4trace:configure.ac:1: -1- m4_pattern_forbid([^_?m4_])
m4trace:configure.ac:1: -1- m4_pattern_forbid([^dnl$])
m4trace:configure.ac:1: -1- m4_pattern_forbid([^_?AS_])
m4trace:configure.ac:1: -1- AC_SUBST([SHELL])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([SHELL])
m4trace:configure.ac:1: -1- m4_pattern_allow([^SHELL$])
m4trace:configure.ac:1: -1- AC_SUBST([PATH_SEPARATOR])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([PATH_SEPARATOR])
m4trace:configure.ac:1: -1- m4_pattern_allow([^PATH_SEPARATOR$])
m4trace:configure.ac:1: -1- AC_SUBST([PACKAGE_NAME], [m4_ifdef([AC_PACKAGE_NAME], ['AC_PACKAGE_NAME'])])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([PACKAGE_NAME])
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_NAME$])
m4trace:configure.ac:1: -1- AC_SUBST([PACKAGE_TARNAME], [m4_ifdef([AC_PACKAGE_TARNAME], ['AC_PACKAGE_TARNAME'])])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([PACKAGE_TARNAME])
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_TARNAME$])
m4trace:configure.ac:1: -1- AC_SUBST([PACKAGE_VERSION], [m4_ifdef([AC_PACKAGE_VERSION], ['AC_PACKAGE_VERSION'])])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([PACKAGE_VERSION])
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_VERSION$])
m4trace:configure.ac:1: -1- AC_SUBST([PACKAGE_STRING], [m4_ifdef([AC_PACKAGE_STRING], ['AC_PACKAGE_STRING'])])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([PACKAGE_STRING])
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_STRING$])
m4trace:configure.ac:1: -1- AC_SUBST([PACKAGE_BUGREPORT], [m4_ifdef([AC_PACKAGE_BUGREPORT], ['AC_PACKAGE_BUGREPORT'])])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([PACKAGE_BUGREPORT])
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_BUGREPORT$])
m4trace:configure.ac:1: -1- AC_SUBST([PACKAGE_URL], [m4_ifdef([AC_PACKAGE_URL], ['AC_PACKAGE_URL'])])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([PACKAGE_URL])
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_URL$])
m4trace:configure.ac:1: -1- AC_SUBST([exec_prefix], [NONE])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([exec_prefix])
m4trace:configure.ac:1: -1- m4_pattern_allow([^exec_prefix$])
m4trace:configure.ac:1: -1- AC_SUBST([prefix], [NONE])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([prefix])
m4trace:configure.ac:1: -1- m4_pattern_allow([^prefix$])
m4trace:configure.ac:1: -1- AC_SUBST([program_transform_name], [s,x,x,])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([program_transform_name])
m4trace:configure.ac:1: -1- m4_pattern_allow([^program_transform_name$])
m4trace:configure.ac:1: -1- AC_SUBST([bindir], ['${exec_prefix}/bin'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([bindir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^bindir$])
m4trace:configure.ac:1: -1- AC_SUBST([sbindir], ['${exec_prefix}/sbin'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([sbindir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^sbindir$])
m4trace:configure.ac:1: -1- AC_SUBST([libexecdir], ['${exec_prefix}/libexec'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([libexecdir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^libexecdir$])
m4trace:configure.ac:1: -1- AC_SUBST([datarootdir], ['${prefix}/share'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([datarootdir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^datarootdir$])
m4trace:configure.ac:1: -1- AC_SUBST([datadir], ['${datarootdir}'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([datadir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^datadir$])
m4trace:configure.ac:1: -1- AC_SUBST([sysconfdir], ['${prefix}/etc'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([sysconfdir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^sysconfdir$])
m4trace:configure.ac:1: -1- AC_SUBST([sharedstatedir], ['${prefix}/com'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([sharedstatedir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^sharedstatedir$])
m4trace:configure.ac:1: -1- AC_SUBST([localstatedir], ['${prefix}/var'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([localstatedir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^localstatedir$])
m4trace:configure.ac:1: -1- AC_SUBST([includedir], ['${prefix}/include'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([includedir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^includedir$])
m4trace:configure.ac:1: -1- AC_SUBST([oldincludedir], ['/usr/include'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([oldincludedir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^oldincludedir$])
m4trace:configure.ac:1: -1- AC_SUBST([docdir], [m4_ifset([AC_PACKAGE_TARNAME],
['${datarootdir}/doc/${PACKAGE_TARNAME}'],
['${datarootdir}/doc/${PACKAGE}'])])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([docdir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^docdir$])
m4trace:configure.ac:1: -1- AC_SUBST([infodir], ['${datarootdir}/info'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([infodir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^infodir$])
m4trace:configure.ac:1: -1- AC_SUBST([htmldir], ['${docdir}'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([htmldir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^htmldir$])
m4trace:configure.ac:1: -1- AC_SUBST([dvidir], ['${docdir}'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([dvidir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^dvidir$])
m4trace:configure.ac:1: -1- AC_SUBST([pdfdir], ['${docdir}'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([pdfdir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^pdfdir$])
m4trace:configure.ac:1: -1- AC_SUBST([psdir], ['${docdir}'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([psdir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^psdir$])
m4trace:configure.ac:1: -1- AC_SUBST([libdir], ['${exec_prefix}/lib'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([libdir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^libdir$])
m4trace:configure.ac:1: -1- AC_SUBST([localedir], ['${datarootdir}/locale'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([localedir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^localedir$])
m4trace:configure.ac:1: -1- AC_SUBST([mandir], ['${datarootdir}/man'])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([mandir])
m4trace:configure.ac:1: -1- m4_pattern_allow([^mandir$])
m4trace:configure.ac:1: -1- AC_DEFINE_TRACE_LITERAL([PACKAGE_NAME])
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_NAME$])
m4trace:configure.ac:1: -1- AH_OUTPUT([PACKAGE_NAME], [/* Define to the full name of this package. */
@%:@undef PACKAGE_NAME])
m4trace:configure.ac:1: -1- AC_DEFINE_TRACE_LITERAL([PACKAGE_TARNAME])
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_TARNAME$])
m4trace:configure.ac:1: -1- AH_OUTPUT([PACKAGE_TARNAME], [/* Define to the one symbol short name of this package. */
@%:@undef PACKAGE_TARNAME])
m4trace:configure.ac:1: -1- AC_DEFINE_TRACE_LITERAL([PACKAGE_VERSION])
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_VERSION$])
m4trace:configure.ac:1: -1- AH_OUTPUT([PACKAGE_VERSION], [/* Define to the version of this package. */
@%:@undef PACKAGE_VERSION])
m4trace:configure.ac:1: -1- AC_DEFINE_TRACE_LITERAL([PACKAGE_STRING])
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_STRING$])
m4trace:configure.ac:1: -1- AH_OUTPUT([PACKAGE_STRING], [/* Define to the full name and version of this package. */
@%:@undef PACKAGE_STRING])
m4trace:configure.ac:1: -1- AC_DEFINE_TRACE_LITERAL([PACKAGE_BUGREPORT])
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_BUGREPORT$])
m4trace:configure.ac:1: -1- AH_OUTPUT([PACKAGE_BUGREPORT], [/* Define to the address where bug reports for this package should be sent. */
@%:@undef PACKAGE_BUGREPORT])
m4trace:configure.ac:1: -1- AC_DEFINE_TRACE_LITERAL([PACKAGE_URL])
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_URL$])
m4trace:configure.ac:1: -1- AH_OUTPUT([PACKAGE_URL], [/* Define to the home page for this package. */
@%:@undef PACKAGE_URL])
m4trace:configure.ac:1: -1- AC_SUBST([DEFS])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([DEFS])
m4trace:configure.ac:1: -1- m4_pattern_allow([^DEFS$])
m4trace:configure.ac:1: -1- AC_SUBST([ECHO_C])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([ECHO_C])
m4trace:configure.ac:1: -1- m4_pattern_allow([^ECHO_C$])
m4trace:configure.ac:1: -1- AC_SUBST([ECHO_N])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([ECHO_N])
m4trace:configure.ac:1: -1- m4_pattern_allow([^ECHO_N$])
m4trace:configure.ac:1: -1- AC_SUBST([ECHO_T])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([ECHO_T])
m4trace:configure.ac:1: -1- m4_pattern_allow([^ECHO_T$])
m4trace:configure.ac:1: -1- AC_SUBST([LIBS])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([LIBS])
m4trace:configure.ac:1: -1- m4_pattern_allow([^LIBS$])
m4trace:configure.ac:1: -1- AC_SUBST([build_alias])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([build_alias])
m4trace:configure.ac:1: -1- m4_pattern_allow([^build_alias$])
m4trace:configure.ac:1: -1- AC_SUBST([host_alias])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([host_alias])
m4trace:configure.ac:1: -1- m4_pattern_allow([^host_alias$])
m4trace:configure.ac:1: -1- AC_SUBST([target_alias])
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([target_alias])
m4trace:configure.ac:1: -1- m4_pattern_allow([^target_alias$])
m4trace:configure.ac:3: -1- AC_SUBST([uname_found])
m4trace:configure.ac:3: -1- AC_SUBST_TRACE([uname_found])
m4trace:configure.ac:3: -1- m4_pattern_allow([^uname_found$])
m4trace:configure.ac:12: -1- _m4_warn([obsolete], [The macro `AC_TRY_COMPILE' is obsolete.
You should run autoupdate.], [../../lib/autoconf/general.m4:2615: AC_TRY_COMPILE is expanded from...
configure.ac:12: the top level])
m4trace:configure.ac:12: -1- AC_SUBST([CC])
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([CC])
m4trace:configure.ac:12: -1- m4_pattern_allow([^CC$])
m4trace:configure.ac:12: -1- AC_SUBST([CFLAGS])
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([CFLAGS])
m4trace:configure.ac:12: -1- m4_pattern_allow([^CFLAGS$])
m4trace:configure.ac:12: -1- AC_SUBST([LDFLAGS])
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([LDFLAGS])
m4trace:configure.ac:12: -1- m4_pattern_allow([^LDFLAGS$])
m4trace:configure.ac:12: -1- AC_SUBST([LIBS])
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([LIBS])
m4trace:configure.ac:12: -1- m4_pattern_allow([^LIBS$])
m4trace:configure.ac:12: -1- AC_SUBST([CPPFLAGS])
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([CPPFLAGS])
m4trace:configure.ac:12: -1- m4_pattern_allow([^CPPFLAGS$])
m4trace:configure.ac:12: -1- AC_SUBST([CC])
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([CC])
m4trace:configure.ac:12: -1- m4_pattern_allow([^CC$])
m4trace:configure.ac:12: -1- AC_SUBST([CC])
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([CC])
m4trace:configure.ac:12: -1- m4_pattern_allow([^CC$])
m4trace:configure.ac:12: -1- AC_SUBST([CC])
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([CC])
m4trace:configure.ac:12: -1- m4_pattern_allow([^CC$])
m4trace:configure.ac:12: -1- AC_SUBST([CC])
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([CC])
m4trace:configure.ac:12: -1- m4_pattern_allow([^CC$])
m4trace:configure.ac:12: -1- AC_SUBST([ac_ct_CC])
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([ac_ct_CC])
m4trace:configure.ac:12: -1- m4_pattern_allow([^ac_ct_CC$])
m4trace:configure.ac:12: -1- AC_SUBST([EXEEXT], [$ac_cv_exeext])
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([EXEEXT])
m4trace:configure.ac:12: -1- m4_pattern_allow([^EXEEXT$])
m4trace:configure.ac:12: -1- AC_SUBST([OBJEXT], [$ac_cv_objext])
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([OBJEXT])
m4trace:configure.ac:12: -1- m4_pattern_allow([^OBJEXT$])
m4trace:configure.ac:20: -1- AC_SUBST([OPENMP_CFLAGS])
m4trace:configure.ac:20: -1- AC_SUBST_TRACE([OPENMP_CFLAGS])
m4trace:configure.ac:20: -1- m4_pattern_allow([^OPENMP_CFLAGS$])
m4trace:configure.ac:55: -1- AC_SUBST([GCCVERSION])
m4trace:configure.ac:55: -1- AC_SUBST_TRACE([GCCVERSION])
m4trace:configure.ac:55: -1- m4_pattern_allow([^GCCVERSION$])
m4trace:configure.ac:106: -1- AC_SUBST([SILENT_RULE])
m4trace:configure.ac:106: -1- AC_SUBST_TRACE([SILENT_RULE])
m4trace:configure.ac:106: -1- m4_pattern_allow([^SILENT_RULE$])
m4trace:configure.ac:107: -1- AC_SUBST([CC])
m4trace:configure.ac:107: -1- AC_SUBST_TRACE([CC])
m4trace:configure.ac:107: -1- m4_pattern_allow([^CC$])
m4trace:configure.ac:108: -1- AC_SUBST([CCFLAGS_NO_C])
m4trace:configure.ac:108: -1- AC_SUBST_TRACE([CCFLAGS_NO_C])
m4trace:configure.ac:108: -1- m4_pattern_allow([^CCFLAGS_NO_C$])
m4trace:configure.ac:109: -1- AC_SUBST([LDCCFLAGS])
m4trace:configure.ac:109: -1- AC_SUBST_TRACE([LDCCFLAGS])
m4trace:configure.ac:109: -1- m4_pattern_allow([^LDCCFLAGS$])
m4trace:configure.ac:110: -1- AC_SUBST([ARCREATE])
m4trace:configure.ac:110: -1- AC_SUBST_TRACE([ARCREATE])
m4trace:configure.ac:110: -1- m4_pattern_allow([^ARCREATE$])
m4trace:configure.ac:112: -1- AC_CONFIG_FILES([config/config.auto])
m4trace:configure.ac:112: -1- _m4_warn([obsolete], [AC_OUTPUT should be used without arguments.
You should run autoupdate.], [])
m4trace:configure.ac:112: -1- AC_SUBST([LIB@&t@OBJS], [$ac_libobjs])
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([LIB@&t@OBJS])
m4trace:configure.ac:112: -1- m4_pattern_allow([^LIB@&t@OBJS$])
m4trace:configure.ac:112: -1- AC_SUBST([LTLIBOBJS], [$ac_ltlibobjs])
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([LTLIBOBJS])
m4trace:configure.ac:112: -1- m4_pattern_allow([^LTLIBOBJS$])
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([top_builddir])
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([top_build_prefix])
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([srcdir])
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([abs_srcdir])
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([top_srcdir])
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([abs_top_srcdir])
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([builddir])
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([abs_builddir])
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([abs_top_builddir])

81
external/sharp/c_utils/c_utils.c vendored Normal file
View file

@ -0,0 +1,81 @@
/*
* This file is part of libc_utils.
*
* libc_utils is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libc_utils is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libc_utils; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
* Convenience functions
*
* Copyright (C) 2008, 2009, 2010, 2011, 2012 Max-Planck-Society
* Author: Martin Reinecke
*/
#include <stdio.h>
#include "c_utils.h"
void util_fail_ (const char *file, int line, const char *func, const char *msg)
{
fprintf(stderr,"%s, %i (%s):\n%s\n",file,line,func,msg);
exit(1);
}
void util_warn_ (const char *file, int line, const char *func, const char *msg)
{
fprintf(stderr,"%s, %i (%s):\n%s\n",file,line,func,msg);
}
/* This function tries to avoid allocations with a total size close to a high
power of two (called the "critical stride" here), by adding a few more bytes
if necssary. This lowers the probability that two arrays differ by a multiple
of the critical stride in their starting address, which in turn lowers the
risk of cache line contention. */
static size_t manipsize(size_t sz)
{
const size_t critical_stride=4096, cacheline=64, overhead=32;
if (sz < (critical_stride/2)) return sz;
if (((sz+overhead)%critical_stride)>(2*cacheline)) return sz;
return sz+2*cacheline;
}
#ifdef __SSE__
#include <xmmintrin.h>
void *util_malloc_ (size_t sz)
{
void *res;
if (sz==0) return NULL;
res = _mm_malloc(manipsize(sz),16);
UTIL_ASSERT(res,"_mm_malloc() failed");
return res;
}
void util_free_ (void *ptr)
{ if ((ptr)!=NULL) _mm_free(ptr); }
#else
void *util_malloc_ (size_t sz)
{
void *res;
if (sz==0) return NULL;
res = malloc(manipsize(sz));
UTIL_ASSERT(res,"malloc() failed");
return res;
}
void util_free_ (void *ptr)
{ if ((ptr)!=NULL) free(ptr); }
#endif

147
external/sharp/c_utils/c_utils.h vendored Normal file
View file

@ -0,0 +1,147 @@
/*
* This file is part of libc_utils.
*
* libc_utils is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libc_utils is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libc_utils; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file c_utils.h
* Convenience functions
*
* Copyright (C) 2008, 2009, 2010, 2011 Max-Planck-Society
* \author Martin Reinecke
* \note This file should only be included from .c files, NOT from .h files.
*/
#ifndef PLANCK_C_UTILS_H
#define PLANCK_C_UTILS_H
#include <math.h>
#include <stdlib.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
void util_fail_ (const char *file, int line, const char *func, const char *msg);
void util_warn_ (const char *file, int line, const char *func, const char *msg);
void *util_malloc_ (size_t sz);
void util_free_ (void *ptr);
#if defined (__GNUC__)
#define UTIL_FUNC_NAME__ __func__
#else
#define UTIL_FUNC_NAME__ "unknown"
#endif
/*! \def UTIL_ASSERT(cond,msg)
If \a cond is false, print an error message containing function name,
source file name and line number of the call, as well as \a msg;
then exit the program with an error status. */
#define UTIL_ASSERT(cond,msg) \
if(!(cond)) util_fail_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
/*! \def UTIL_WARN(cond,msg)
If \a cond is false, print an warning containing function name,
source file name and line number of the call, as well as \a msg. */
#define UTIL_WARN(cond,msg) \
if(!(cond)) util_warn_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
/*! \def UTIL_FAIL(msg)
Print an error message containing function name,
source file name and line number of the call, as well as \a msg;
then exit the program with an error status. */
#define UTIL_FAIL(msg) \
util_fail_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
/*! \def ALLOC(ptr,type,num)
Allocate space for \a num objects of type \a type. Make sure that the
allocation succeeded, else stop the program with an error. Return the
resulting pointer in \a ptr. */
#define ALLOC(ptr,type,num) \
do { (ptr)=(type *)util_malloc_((num)*sizeof(type)); } while (0)
/*! \def RALLOC(type,num)
Allocate space for \a num objects of type \a type. Make sure that the
allocation succeeded, else stop the program with an error. Cast the
resulting pointer to \a (type*). */
#define RALLOC(type,num) \
((type *)util_malloc_((num)*sizeof(type)))
/*! \def DEALLOC(ptr)
Deallocate \a ptr. It must have been allocated using \a ALLOC or
\a RALLOC. */
#define DEALLOC(ptr) \
do { util_free_(ptr); (ptr)=NULL; } while(0)
#define RESIZE(ptr,type,num) \
do { util_free_(ptr); ALLOC(ptr,type,num); } while(0)
#define GROW(ptr,type,sz_old,sz_new) \
do { \
if ((sz_new)>(sz_old)) \
{ RESIZE(ptr,type,2*(sz_new));sz_old=2*(sz_new); } \
} while(0)
/*! \def SET_ARRAY(ptr,i1,i2,val)
Set the entries \a ptr[i1] ... \a ptr[i2-1] to \a val. */
#define SET_ARRAY(ptr,i1,i2,val) \
do { \
ptrdiff_t cnt_; \
for (cnt_=(i1);cnt_<(i2);++cnt_) (ptr)[cnt_]=(val); \
} while(0)
/*! \def COPY_ARRAY(src,dest,i1,i2)
Copy the entries \a src[i1] ... \a src[i2-1] to
\a dest[i1] ... \a dest[i2-1]. */
#define COPY_ARRAY(src,dest,i1,i2) \
do { \
ptrdiff_t cnt_; \
for (cnt_=(i1);cnt_<(i2);++cnt_) (dest)[cnt_]=(src)[cnt_]; \
} while(0)
#define ALLOC2D(ptr,type,num1,num2) \
do { \
size_t cnt_, num1_=(num1), num2_=(num2); \
ALLOC(ptr,type *,num1_); \
ALLOC(ptr[0],type,num1_*num2_); \
for (cnt_=1; cnt_<num1_; ++cnt_) \
ptr[cnt_]=ptr[cnt_-1]+num2_; \
} while(0)
#define DEALLOC2D(ptr) \
do { if(ptr) DEALLOC((ptr)[0]); DEALLOC(ptr); } while(0)
#define FAPPROX(a,b,eps) \
(fabs((a)-(b))<((eps)*fabs(b)))
#define ABSAPPROX(a,b,eps) \
(fabs((a)-(b))<(eps))
#define IMAX(a,b) \
(((a)>(b)) ? (a) : (b))
#define IMIN(a,b) \
(((a)<(b)) ? (a) : (b))
#define SWAP(a,b,type) \
do { type tmp_=(a); (a)=(b); (b)=tmp_; } while(0)
#define CHECK_STACK_ALIGN(align) \
do { \
double foo; \
UTIL_WARN((((size_t)(&foo))&(align-1))==0, \
"WARNING: stack not sufficiently aligned!"); \
} while(0)
#ifdef __cplusplus
}
#endif
#endif

67
external/sharp/c_utils/memusage.c vendored Normal file
View file

@ -0,0 +1,67 @@
/*
* This file is part of libc_utils.
*
* libc_utils is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libc_utils is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libc_utils; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
* Functionality for measuring memory consumption
*
* Copyright (C) 2012 Max-Planck-Society
* Author: Martin Reinecke
*/
#include <stdio.h>
#include <string.h>
#include "memusage.h"
double residentSetSize(void)
{
FILE *statm = fopen("/proc/self/statm","r");
double res;
if (!statm) return -1.0;
fscanf(statm,"%*f %lf",&res);
fclose(statm);
return (res*4096);
}
double VmHWM(void)
{
char word[1024];
FILE *f = fopen("/proc/self/status", "r");
double res;
if (!f) return -1.0;
while(1)
{
if (fscanf (f,"%1023s",word)<0)
{ fclose(f); return -1.0; }
if (!strncmp(word, "VmHWM:", 6))
{
fscanf(f,"%lf",&res);
fscanf(f,"%2s",word);
if (strncmp(word, "kB", 2))
{ fclose(f); return -1.0; }
res *=1024;
fclose(f);
return res;
}
}
}

51
external/sharp/c_utils/memusage.h vendored Normal file
View file

@ -0,0 +1,51 @@
/*
* This file is part of libc_utils.
*
* libc_utils is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libc_utils is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libc_utils; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file memusage.h
* Functionality for measuring memory consumption
*
* Copyright (C) 2012 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_MEMUSAGE_H
#define PLANCK_MEMUSAGE_H
#ifdef __cplusplus
extern "C" {
#endif
/*! Returns the current resident set size in bytes.
\note Currently only supported on Linux. Returns -1 if unsupported. */
double residentSetSize(void);
/*! Returns the high water mark of the resident set size in bytes.
\note Currently only supported on Linux. Returns -1 if unsupported. */
double VmHWM(void);
#ifdef __cplusplus
}
#endif
#endif

18
external/sharp/c_utils/planck.make vendored Normal file
View file

@ -0,0 +1,18 @@
PKG:=c_utils
SD:=$(SRCROOT)/$(PKG)
OD:=$(BLDROOT)/$(PKG)
FULL_INCLUDE+= -I$(SD)
HDR_$(PKG):=$(SD)/*.h
LIB_$(PKG):=$(LIBDIR)/libc_utils.a
OBJ:=c_utils.o walltime_c.o memusage.o
OBJ:=$(OBJ:%=$(OD)/%)
$(OBJ): $(HDR_$(PKG)) | $(OD)_mkdir
$(LIB_$(PKG)): $(OBJ)
all_hdr+=$(HDR_$(PKG))
all_lib+=$(LIB_$(PKG))

54
external/sharp/c_utils/walltime_c.c vendored Normal file
View file

@ -0,0 +1,54 @@
/*
* This file is part of libc_utils.
*
* libc_utils is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libc_utils is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libc_utils; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
* Functionality for reading wall clock time
*
* Copyright (C) 2010, 2011 Max-Planck-Society
* Author: Martin Reinecke
*/
#if defined (_OPENMP)
#include <omp.h>
#elif defined (USE_MPI)
#include "mpi.h"
#else
#include <sys/time.h>
#include <stdlib.h>
#endif
#include "walltime_c.h"
double wallTime(void)
{
#if defined (_OPENMP)
return omp_get_wtime();
#elif defined (USE_MPI)
return MPI_Wtime();
#else
struct timeval t;
gettimeofday(&t, NULL);
return t.tv_sec + 1e-6*t.tv_usec;
#endif
}

54
external/sharp/c_utils/walltime_c.h vendored Normal file
View file

@ -0,0 +1,54 @@
/*
* This file is part of libc_utils.
*
* libc_utils is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libc_utils is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libc_utils; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file walltime_c.h
* Functionality for reading wall clock time
*
* Copyright (C) 2010 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_WALLTIME_C_H
#define PLANCK_WALLTIME_C_H
#ifdef __cplusplus
extern "C" {
#endif
/*! Returns an approximation of the current wall time (in seconds).
The first available of the following timers will be used:
<ul>
<li> \a omp_get_wtime(), if OpenMP is available
<li> \a MPI_Wtime(), if MPI is available
<li> \a gettimeofday() otherwise
</ul>
\note Only useful for measuring time differences.
\note This function has an execution time between 10 and 100 nanoseconds. */
double wallTime(void);
#ifdef __cplusplus
}
#endif
#endif

9
external/sharp/config/config.auto.in vendored Normal file
View file

@ -0,0 +1,9 @@
@SILENT_RULE@
CC=@CC@
CL=@CC@
CCFLAGS_NO_C=@CCFLAGS_NO_C@
CCFLAGS=$(CCFLAGS_NO_C) -c
CLFLAGS=-L. -L$(LIBDIR) @LDCCFLAGS@ -lm
ARCREATE=@ARCREATE@

31
external/sharp/config/rules.common vendored Normal file
View file

@ -0,0 +1,31 @@
BLDROOT = $(SRCROOT)/build.$(SHARP_TARGET)
PREFIX = $(SRCROOT)/$(SHARP_TARGET)
BINDIR = $(PREFIX)/bin
INCDIR = $(PREFIX)/include
LIBDIR = $(PREFIX)/lib
DOCDIR = $(SRCROOT)/doc
# do not use any suffix rules
.SUFFIXES:
# do not use any default rules
.DEFAULT:
echo_config:
@echo using configuration \'$(SHARP_TARGET)\'
$(BLDROOT)/%.o : $(SRCROOT)/%.c | echo_config
@echo "# compiling $*.c"
cd $(@D) && $(CC) $(FULL_INCLUDE) -I$(BLDROOT) $(CCFLAGS) $<
$(BLDROOT)/%.o : $(SRCROOT)/%.cc | echo_config
@echo "# compiling $*.cc"
cd $(@D) && $(CXX) $(FULL_INCLUDE) -I$(BLDROOT) $(CXXCFLAGS) $<
%_mkdir:
@if [ ! -d $* ]; then mkdir -p $* ; fi
clean:
rm -rf $(BLDROOT) $(PREFIX) $(DOCDIR) autom4te.cache/ config.log config.status
distclean: clean
rm -f config/config.auto

3974
external/sharp/configure vendored Executable file

File diff suppressed because it is too large Load diff

112
external/sharp/configure.ac vendored Normal file
View file

@ -0,0 +1,112 @@
AC_INIT(config/config.auto.in)
AC_CHECK_PROG([uname_found],[uname],[1],[0])
if test $uname_found -eq 0 ; then
echo "No uname found; setting system type to unknown."
system="unknown"
else
system=`uname -s`-`uname -r`
fi
AC_LANG([C])
AC_TRY_COMPILE([], [@%:@ifndef __INTEL_COMPILER
choke me
@%:@endif], [ICC=[yes]], [ICC=[no]])
if test $ICC = yes; then GCC=no; fi
CCTYPE=unknown
if test $GCC = yes; then CCTYPE=gcc; fi
if test $ICC = yes; then CCTYPE=icc; fi
AC_OPENMP
SILENT_RULE=".SILENT:"
AC_ARG_ENABLE(noisy-make,
[ --enable-noisy-make enable detailed make output],
[if test "$enableval" = yes; then
SILENT_RULE=""
fi])
ENABLE_MPI=no
AC_ARG_ENABLE(mpi,
[ --enable-mpi enable generation of MPI-parallel code],
[if test "$enableval" = yes; then
ENABLE_MPI=yes
fi])
ENABLE_DEBUG=no
AC_ARG_ENABLE(debug,
[ --enable-debug enable generation of debugging symbols],
[if test "$enableval" = yes; then
ENABLE_DEBUG=yes
fi])
ENABLE_PIC=no
AC_ARG_ENABLE(pic,
[ --enable-pic enable generation of position independent code],
[if test "$enableval" = yes; then
ENABLE_PIC=yes
fi])
case $CCTYPE in
gcc)
CCFLAGS="-O3 -fno-tree-vectorize -ffast-math -fomit-frame-pointer -std=c99 -pedantic -Wextra -Wall -Wno-unknown-pragmas -Wshadow -Wmissing-prototypes -Wfatal-errors"
GCCVERSION="`$CC -dumpversion 2>&1`"
echo "Using gcc version $GCCVERSION"
AC_SUBST(GCCVERSION)
case $system in
Darwin-*)
;;
*)
CCFLAGS="$CCFLAGS -ffunction-sections -fdata-sections"
;;
esac
changequote(,)
gcc43=`echo $GCCVERSION | grep -c '4\.[3456789]'`
changequote([,])
if test $gcc43 -gt 0; then
CCFLAGS="$CCFLAGS -march=native"
fi
;;
icc)
CCFLAGS="-O3 -xHOST -std=c99 -ip -Wbrief -Wall -vec-report0 -openmp-report0 -wd383,981,1419,1572"
;;
*)
CCFLAGS="-O2"
# Don't do anything now
;;
esac
case $system in
Darwin-*)
ARCREATE="libtool -static -o"
;;
*)
ARCREATE="ar cr"
;;
esac
CCFLAGS="$CCFLAGS $OPENMP_CFLAGS"
if test $ENABLE_DEBUG = yes; then
CCFLAGS="$CCFLAGS -g"
fi
if test $ENABLE_PIC = yes; then
CCFLAGS="$CCFLAGS -fPIC"
fi
if test $ENABLE_MPI = yes; then
CCFLAGS="$CCFLAGS -DUSE_MPI"
fi
CCFLAGS_NO_C="$CCFLAGS $CPPFLAGS"
LDCCFLAGS="$LDFLAGS $CCFLAGS"
AC_SUBST(SILENT_RULE)
AC_SUBST(CC)
AC_SUBST(CCFLAGS_NO_C)
AC_SUBST(LDCCFLAGS)
AC_SUBST(ARCREATE)
AC_OUTPUT(config/config.auto)

290
external/sharp/docsrc/c_utils.dox vendored Normal file
View file

@ -0,0 +1,290 @@
# Doxyfile 1.8.1
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
DOXYFILE_ENCODING = UTF-8
PROJECT_NAME = "LevelS C support library"
PROJECT_NUMBER = 0.1
PROJECT_BRIEF =
PROJECT_LOGO =
OUTPUT_DIRECTORY = .
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
BRIEF_MEMBER_DESC = NO
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF =
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = NO
STRIP_FROM_PATH =
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = NO
QT_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = NO
INHERIT_DOCS = YES
SEPARATE_MEMBER_PAGES = NO
TAB_SIZE = 8
ALIASES =
TCL_SUBST =
OPTIMIZE_OUTPUT_FOR_C = YES
OPTIMIZE_OUTPUT_JAVA = NO
OPTIMIZE_FOR_FORTRAN = NO
OPTIMIZE_OUTPUT_VHDL = NO
EXTENSION_MAPPING =
MARKDOWN_SUPPORT = YES
BUILTIN_STL_SUPPORT = NO
CPP_CLI_SUPPORT = NO
SIP_SUPPORT = NO
IDL_PROPERTY_SUPPORT = YES
DISTRIBUTE_GROUP_DOC = NO
SUBGROUPING = YES
INLINE_GROUPED_CLASSES = NO
INLINE_SIMPLE_STRUCTS = NO
TYPEDEF_HIDES_STRUCT = NO
SYMBOL_CACHE_SIZE = 0
LOOKUP_CACHE_SIZE = 0
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = NO
EXTRACT_PRIVATE = NO
EXTRACT_PACKAGE = NO
EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_METHODS = NO
EXTRACT_ANON_NSPACES = NO
HIDE_UNDOC_MEMBERS = YES
HIDE_UNDOC_CLASSES = YES
HIDE_FRIEND_COMPOUNDS = YES
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = NO
CASE_SENSE_NAMES = YES
HIDE_SCOPE_NAMES = NO
SHOW_INCLUDE_FILES = YES
FORCE_LOCAL_INCLUDES = NO
INLINE_INFO = YES
SORT_MEMBER_DOCS = NO
SORT_BRIEF_DOCS = NO
SORT_MEMBERS_CTORS_1ST = NO
SORT_GROUP_NAMES = NO
SORT_BY_SCOPE_NAME = NO
STRICT_PROTO_MATCHING = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
SHOW_FILES = YES
SHOW_NAMESPACES = YES
FILE_VERSION_FILTER =
LAYOUT_FILE =
CITE_BIB_FILES =
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = YES
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_NO_PARAMDOC = NO
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = ../c_utils
INPUT_ENCODING = UTF-8
FILE_PATTERNS = *.h \
*.c \
*.dox
RECURSIVE = YES
EXCLUDE =
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS =
EXCLUDE_SYMBOLS =
EXAMPLE_PATH =
EXAMPLE_PATTERNS =
EXAMPLE_RECURSIVE = NO
IMAGE_PATH =
INPUT_FILTER =
FILTER_PATTERNS =
FILTER_SOURCE_FILES = NO
FILTER_SOURCE_PATTERNS =
#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = YES
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = NO
REFERENCED_BY_RELATION = NO
REFERENCES_RELATION = NO
REFERENCES_LINK_SOURCE = YES
USE_HTAGS = NO
VERBATIM_HEADERS = YES
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
ALPHABETICAL_INDEX = YES
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
GENERATE_HTML = YES
HTML_OUTPUT = htmldoc
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER = footer.html
HTML_STYLESHEET =
HTML_EXTRA_FILES =
HTML_COLORSTYLE_HUE = 220
HTML_COLORSTYLE_SAT = 100
HTML_COLORSTYLE_GAMMA = 80
HTML_TIMESTAMP = YES
HTML_DYNAMIC_SECTIONS = NO
HTML_INDEX_NUM_ENTRIES = 100
GENERATE_DOCSET = NO
DOCSET_FEEDNAME = "Doxygen generated docs"
DOCSET_BUNDLE_ID = org.doxygen.Project
DOCSET_PUBLISHER_ID = org.doxygen.Publisher
DOCSET_PUBLISHER_NAME = Publisher
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
CHM_INDEX_ENCODING =
BINARY_TOC = NO
TOC_EXPAND = NO
GENERATE_QHP = NO
QCH_FILE =
QHP_NAMESPACE = org.doxygen.Project
QHP_VIRTUAL_FOLDER = doc
QHP_CUST_FILTER_NAME =
QHP_CUST_FILTER_ATTRS =
QHP_SECT_FILTER_ATTRS =
QHG_LOCATION =
GENERATE_ECLIPSEHELP = NO
ECLIPSE_DOC_ID = org.doxygen.Project
DISABLE_INDEX = NO
GENERATE_TREEVIEW = NO
ENUM_VALUES_PER_LINE = 4
TREEVIEW_WIDTH = 250
EXT_LINKS_IN_WINDOW = NO
FORMULA_FONTSIZE = 10
FORMULA_TRANSPARENT = YES
USE_MATHJAX = NO
MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
MATHJAX_EXTENSIONS =
SEARCHENGINE = NO
SERVER_BASED_SEARCH = NO
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = NO
LATEX_OUTPUT = latex
LATEX_CMD_NAME = latex
MAKEINDEX_CMD_NAME = makeindex
COMPACT_LATEX = YES
PAPER_TYPE = a4wide
EXTRA_PACKAGES =
LATEX_HEADER =
LATEX_FOOTER =
PDF_HYPERLINKS = YES
USE_PDFLATEX = YES
LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
LATEX_SOURCE_CODE = NO
LATEX_BIB_STYLE = plain
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
GENERATE_RTF = NO
RTF_OUTPUT = rtf
COMPACT_RTF = NO
RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
GENERATE_MAN = NO
MAN_OUTPUT = man
MAN_EXTENSION = .3
MAN_LINKS = NO
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = NO
XML_OUTPUT = xml
XML_SCHEMA =
XML_DTD =
XML_PROGRAMLISTING = YES
#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------
GENERATE_PERLMOD = NO
PERLMOD_LATEX = NO
PERLMOD_PRETTY = YES
PERLMOD_MAKEVAR_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = NO
EXPAND_ONLY_PREDEF = NO
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED =
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration::additions related to external references
#---------------------------------------------------------------------------
TAGFILES =
GENERATE_TAGFILE = c_utils.tag
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
CLASS_DIAGRAMS = YES
MSCGEN_PATH =
HIDE_UNDOC_RELATIONS = YES
HAVE_DOT = NO
DOT_NUM_THREADS = 0
DOT_FONTNAME = FreeSans
DOT_FONTSIZE = 10
DOT_FONTPATH =
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
GROUP_GRAPHS = YES
UML_LOOK = NO
UML_LIMIT_NUM_FIELDS = 10
TEMPLATE_RELATIONS = YES
INCLUDE_GRAPH = NO
INCLUDED_BY_GRAPH = NO
CALL_GRAPH = NO
CALLER_GRAPH = NO
GRAPHICAL_HIERARCHY = NO
DIRECTORY_GRAPH = YES
DOT_IMAGE_FORMAT = png
INTERACTIVE_SVG = NO
DOT_PATH =
DOTFILE_DIRS =
MSCFILE_DIRS =
DOT_GRAPH_MAX_NODES = 50
MAX_DOT_GRAPH_DEPTH = 0
DOT_TRANSPARENT = NO
DOT_MULTI_TARGETS = NO
GENERATE_LEGEND = YES
DOT_CLEANUP = YES

5
external/sharp/docsrc/footer.html vendored Normal file
View file

@ -0,0 +1,5 @@
<hr><address style="align: right;"><small>
Generated on $datetime for $projectname
</a> </small></address>
</body>
</html>

15
external/sharp/docsrc/index_code.html vendored Normal file
View file

@ -0,0 +1,15 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1">
<title>Libsharp source code documentation</title>
</head><body>
<H1>Libsharp source code documentation</H1>
<H2>C interfaces</H2>
<ul>
<li><a href="c_utils/index.html">C support library</a>
<li><a href="libfftpack/index.html">FFT interface</a>
<li><a href="libsharp/index.html">Library for spherical harmonic transforms</a>
</ul>
</body>
</html>

290
external/sharp/docsrc/libfftpack.dox vendored Normal file
View file

@ -0,0 +1,290 @@
# Doxyfile 1.8.1
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
DOXYFILE_ENCODING = UTF-8
PROJECT_NAME = "LevelS FFT library"
PROJECT_NUMBER = 0.1
PROJECT_BRIEF =
PROJECT_LOGO =
OUTPUT_DIRECTORY = .
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
BRIEF_MEMBER_DESC = NO
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF =
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = NO
STRIP_FROM_PATH =
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = NO
QT_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = NO
INHERIT_DOCS = YES
SEPARATE_MEMBER_PAGES = NO
TAB_SIZE = 8
ALIASES =
TCL_SUBST =
OPTIMIZE_OUTPUT_FOR_C = YES
OPTIMIZE_OUTPUT_JAVA = NO
OPTIMIZE_FOR_FORTRAN = NO
OPTIMIZE_OUTPUT_VHDL = NO
EXTENSION_MAPPING =
MARKDOWN_SUPPORT = YES
BUILTIN_STL_SUPPORT = NO
CPP_CLI_SUPPORT = NO
SIP_SUPPORT = NO
IDL_PROPERTY_SUPPORT = YES
DISTRIBUTE_GROUP_DOC = NO
SUBGROUPING = YES
INLINE_GROUPED_CLASSES = NO
INLINE_SIMPLE_STRUCTS = NO
TYPEDEF_HIDES_STRUCT = NO
SYMBOL_CACHE_SIZE = 0
LOOKUP_CACHE_SIZE = 0
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = NO
EXTRACT_PRIVATE = NO
EXTRACT_PACKAGE = NO
EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_METHODS = NO
EXTRACT_ANON_NSPACES = NO
HIDE_UNDOC_MEMBERS = YES
HIDE_UNDOC_CLASSES = YES
HIDE_FRIEND_COMPOUNDS = YES
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = NO
CASE_SENSE_NAMES = YES
HIDE_SCOPE_NAMES = NO
SHOW_INCLUDE_FILES = YES
FORCE_LOCAL_INCLUDES = NO
INLINE_INFO = YES
SORT_MEMBER_DOCS = NO
SORT_BRIEF_DOCS = NO
SORT_MEMBERS_CTORS_1ST = NO
SORT_GROUP_NAMES = NO
SORT_BY_SCOPE_NAME = NO
STRICT_PROTO_MATCHING = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
SHOW_FILES = YES
SHOW_NAMESPACES = YES
FILE_VERSION_FILTER =
LAYOUT_FILE =
CITE_BIB_FILES =
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = YES
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_NO_PARAMDOC = NO
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = ../libfftpack
INPUT_ENCODING = UTF-8
FILE_PATTERNS = *.h \
*.c \
*.dox
RECURSIVE = YES
EXCLUDE =
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS =
EXCLUDE_SYMBOLS =
EXAMPLE_PATH =
EXAMPLE_PATTERNS =
EXAMPLE_RECURSIVE = NO
IMAGE_PATH =
INPUT_FILTER =
FILTER_PATTERNS =
FILTER_SOURCE_FILES = NO
FILTER_SOURCE_PATTERNS =
#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = YES
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = NO
REFERENCED_BY_RELATION = NO
REFERENCES_RELATION = NO
REFERENCES_LINK_SOURCE = YES
USE_HTAGS = NO
VERBATIM_HEADERS = YES
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
ALPHABETICAL_INDEX = YES
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
GENERATE_HTML = YES
HTML_OUTPUT = htmldoc
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER = footer.html
HTML_STYLESHEET =
HTML_EXTRA_FILES =
HTML_COLORSTYLE_HUE = 220
HTML_COLORSTYLE_SAT = 100
HTML_COLORSTYLE_GAMMA = 80
HTML_TIMESTAMP = YES
HTML_DYNAMIC_SECTIONS = NO
HTML_INDEX_NUM_ENTRIES = 100
GENERATE_DOCSET = NO
DOCSET_FEEDNAME = "Doxygen generated docs"
DOCSET_BUNDLE_ID = org.doxygen.Project
DOCSET_PUBLISHER_ID = org.doxygen.Publisher
DOCSET_PUBLISHER_NAME = Publisher
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
CHM_INDEX_ENCODING =
BINARY_TOC = NO
TOC_EXPAND = NO
GENERATE_QHP = NO
QCH_FILE =
QHP_NAMESPACE = org.doxygen.Project
QHP_VIRTUAL_FOLDER = doc
QHP_CUST_FILTER_NAME =
QHP_CUST_FILTER_ATTRS =
QHP_SECT_FILTER_ATTRS =
QHG_LOCATION =
GENERATE_ECLIPSEHELP = NO
ECLIPSE_DOC_ID = org.doxygen.Project
DISABLE_INDEX = NO
GENERATE_TREEVIEW = NO
ENUM_VALUES_PER_LINE = 4
TREEVIEW_WIDTH = 250
EXT_LINKS_IN_WINDOW = NO
FORMULA_FONTSIZE = 10
FORMULA_TRANSPARENT = YES
USE_MATHJAX = NO
MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
MATHJAX_EXTENSIONS =
SEARCHENGINE = NO
SERVER_BASED_SEARCH = NO
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = NO
LATEX_OUTPUT = latex
LATEX_CMD_NAME = latex
MAKEINDEX_CMD_NAME = makeindex
COMPACT_LATEX = YES
PAPER_TYPE = a4wide
EXTRA_PACKAGES =
LATEX_HEADER =
LATEX_FOOTER =
PDF_HYPERLINKS = YES
USE_PDFLATEX = YES
LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
LATEX_SOURCE_CODE = NO
LATEX_BIB_STYLE = plain
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
GENERATE_RTF = NO
RTF_OUTPUT = rtf
COMPACT_RTF = NO
RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
GENERATE_MAN = NO
MAN_OUTPUT = man
MAN_EXTENSION = .3
MAN_LINKS = NO
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = NO
XML_OUTPUT = xml
XML_SCHEMA =
XML_DTD =
XML_PROGRAMLISTING = YES
#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------
GENERATE_PERLMOD = NO
PERLMOD_LATEX = NO
PERLMOD_PRETTY = YES
PERLMOD_MAKEVAR_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = NO
EXPAND_ONLY_PREDEF = NO
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED =
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration::additions related to external references
#---------------------------------------------------------------------------
TAGFILES = c_utils.tag=../c_utils
GENERATE_TAGFILE = libfftpack.tag
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
CLASS_DIAGRAMS = YES
MSCGEN_PATH =
HIDE_UNDOC_RELATIONS = YES
HAVE_DOT = NO
DOT_NUM_THREADS = 0
DOT_FONTNAME = FreeSans
DOT_FONTSIZE = 10
DOT_FONTPATH =
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
GROUP_GRAPHS = YES
UML_LOOK = NO
UML_LIMIT_NUM_FIELDS = 10
TEMPLATE_RELATIONS = YES
INCLUDE_GRAPH = NO
INCLUDED_BY_GRAPH = NO
CALL_GRAPH = NO
CALLER_GRAPH = NO
GRAPHICAL_HIERARCHY = NO
DIRECTORY_GRAPH = YES
DOT_IMAGE_FORMAT = png
INTERACTIVE_SVG = NO
DOT_PATH =
DOTFILE_DIRS =
MSCFILE_DIRS =
DOT_GRAPH_MAX_NODES = 50
MAX_DOT_GRAPH_DEPTH = 0
DOT_TRANSPARENT = NO
DOT_MULTI_TARGETS = NO
GENERATE_LEGEND = YES
DOT_CLEANUP = YES

291
external/sharp/docsrc/libsharp.dox vendored Normal file
View file

@ -0,0 +1,291 @@
# Doxyfile 1.8.1
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
DOXYFILE_ENCODING = UTF-8
PROJECT_NAME = "LevelS SHT library"
PROJECT_NUMBER = 0.1
PROJECT_BRIEF =
PROJECT_LOGO =
OUTPUT_DIRECTORY = .
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
BRIEF_MEMBER_DESC = NO
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF =
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = NO
STRIP_FROM_PATH =
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = NO
QT_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = NO
INHERIT_DOCS = YES
SEPARATE_MEMBER_PAGES = NO
TAB_SIZE = 8
ALIASES =
TCL_SUBST =
OPTIMIZE_OUTPUT_FOR_C = YES
OPTIMIZE_OUTPUT_JAVA = NO
OPTIMIZE_FOR_FORTRAN = NO
OPTIMIZE_OUTPUT_VHDL = NO
EXTENSION_MAPPING =
MARKDOWN_SUPPORT = YES
BUILTIN_STL_SUPPORT = NO
CPP_CLI_SUPPORT = NO
SIP_SUPPORT = NO
IDL_PROPERTY_SUPPORT = YES
DISTRIBUTE_GROUP_DOC = NO
SUBGROUPING = YES
INLINE_GROUPED_CLASSES = NO
INLINE_SIMPLE_STRUCTS = NO
TYPEDEF_HIDES_STRUCT = NO
SYMBOL_CACHE_SIZE = 0
LOOKUP_CACHE_SIZE = 0
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = NO
EXTRACT_PRIVATE = NO
EXTRACT_PACKAGE = NO
EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_METHODS = NO
EXTRACT_ANON_NSPACES = NO
HIDE_UNDOC_MEMBERS = YES
HIDE_UNDOC_CLASSES = YES
HIDE_FRIEND_COMPOUNDS = YES
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = NO
CASE_SENSE_NAMES = YES
HIDE_SCOPE_NAMES = NO
SHOW_INCLUDE_FILES = YES
FORCE_LOCAL_INCLUDES = NO
INLINE_INFO = YES
SORT_MEMBER_DOCS = NO
SORT_BRIEF_DOCS = NO
SORT_MEMBERS_CTORS_1ST = NO
SORT_GROUP_NAMES = NO
SORT_BY_SCOPE_NAME = NO
STRICT_PROTO_MATCHING = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
SHOW_FILES = YES
SHOW_NAMESPACES = YES
FILE_VERSION_FILTER =
LAYOUT_FILE =
CITE_BIB_FILES =
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = YES
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_NO_PARAMDOC = NO
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = ../libsharp
INPUT_ENCODING = UTF-8
FILE_PATTERNS = *.h \
*.c \
*.dox
RECURSIVE = YES
EXCLUDE =
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS =
EXCLUDE_SYMBOLS =
EXAMPLE_PATH =
EXAMPLE_PATTERNS =
EXAMPLE_RECURSIVE = NO
IMAGE_PATH =
INPUT_FILTER =
FILTER_PATTERNS =
FILTER_SOURCE_FILES = NO
FILTER_SOURCE_PATTERNS =
#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = YES
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = NO
REFERENCED_BY_RELATION = NO
REFERENCES_RELATION = NO
REFERENCES_LINK_SOURCE = YES
USE_HTAGS = NO
VERBATIM_HEADERS = YES
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
ALPHABETICAL_INDEX = YES
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
GENERATE_HTML = YES
HTML_OUTPUT = htmldoc
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER = footer.html
HTML_STYLESHEET =
HTML_EXTRA_FILES =
HTML_COLORSTYLE_HUE = 220
HTML_COLORSTYLE_SAT = 100
HTML_COLORSTYLE_GAMMA = 80
HTML_TIMESTAMP = YES
HTML_DYNAMIC_SECTIONS = NO
HTML_INDEX_NUM_ENTRIES = 100
GENERATE_DOCSET = NO
DOCSET_FEEDNAME = "Doxygen generated docs"
DOCSET_BUNDLE_ID = org.doxygen.Project
DOCSET_PUBLISHER_ID = org.doxygen.Publisher
DOCSET_PUBLISHER_NAME = Publisher
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
CHM_INDEX_ENCODING =
BINARY_TOC = NO
TOC_EXPAND = NO
GENERATE_QHP = NO
QCH_FILE =
QHP_NAMESPACE = org.doxygen.Project
QHP_VIRTUAL_FOLDER = doc
QHP_CUST_FILTER_NAME =
QHP_CUST_FILTER_ATTRS =
QHP_SECT_FILTER_ATTRS =
QHG_LOCATION =
GENERATE_ECLIPSEHELP = NO
ECLIPSE_DOC_ID = org.doxygen.Project
DISABLE_INDEX = NO
GENERATE_TREEVIEW = NO
ENUM_VALUES_PER_LINE = 4
TREEVIEW_WIDTH = 250
EXT_LINKS_IN_WINDOW = NO
FORMULA_FONTSIZE = 10
FORMULA_TRANSPARENT = YES
USE_MATHJAX = NO
MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
MATHJAX_EXTENSIONS =
SEARCHENGINE = NO
SERVER_BASED_SEARCH = NO
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = NO
LATEX_OUTPUT = latex
LATEX_CMD_NAME = latex
MAKEINDEX_CMD_NAME = makeindex
COMPACT_LATEX = YES
PAPER_TYPE = a4wide
EXTRA_PACKAGES =
LATEX_HEADER =
LATEX_FOOTER =
PDF_HYPERLINKS = YES
USE_PDFLATEX = YES
LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
LATEX_SOURCE_CODE = NO
LATEX_BIB_STYLE = plain
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
GENERATE_RTF = NO
RTF_OUTPUT = rtf
COMPACT_RTF = NO
RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
GENERATE_MAN = NO
MAN_OUTPUT = man
MAN_EXTENSION = .3
MAN_LINKS = NO
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = NO
XML_OUTPUT = xml
XML_SCHEMA =
XML_DTD =
XML_PROGRAMLISTING = YES
#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------
GENERATE_PERLMOD = NO
PERLMOD_LATEX = NO
PERLMOD_PRETTY = YES
PERLMOD_MAKEVAR_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = NO
EXPAND_ONLY_PREDEF = NO
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED =
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration::additions related to external references
#---------------------------------------------------------------------------
TAGFILES = libfftpack.tag=../libfftpack \
c_utils.tag=../c_utils
GENERATE_TAGFILE = libsharp.tag
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
CLASS_DIAGRAMS = YES
MSCGEN_PATH =
HIDE_UNDOC_RELATIONS = YES
HAVE_DOT = NO
DOT_NUM_THREADS = 0
DOT_FONTNAME = FreeSans
DOT_FONTSIZE = 10
DOT_FONTPATH =
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
GROUP_GRAPHS = YES
UML_LOOK = NO
UML_LIMIT_NUM_FIELDS = 10
TEMPLATE_RELATIONS = YES
INCLUDE_GRAPH = NO
INCLUDED_BY_GRAPH = NO
CALL_GRAPH = NO
CALLER_GRAPH = NO
GRAPHICAL_HIERARCHY = NO
DIRECTORY_GRAPH = YES
DOT_IMAGE_FORMAT = png
INTERACTIVE_SVG = NO
DOT_PATH =
DOTFILE_DIRS =
MSCFILE_DIRS =
DOT_GRAPH_MAX_NODES = 50
MAX_DOT_GRAPH_DEPTH = 0
DOT_TRANSPARENT = NO
DOT_MULTI_TARGETS = NO
GENERATE_LEGEND = YES
DOT_CLEANUP = YES

20
external/sharp/docsrc/planck.make vendored Normal file
View file

@ -0,0 +1,20 @@
PKG:=docsrc
docsrc_idx: $(DOCDIR)_mkdir
cp $(SRCROOT)/docsrc/index_code.html $(DOCDIR)/index.html
docsrc_code_doc: $(DOCDIR)_mkdir docsrc_idx
cd $(SRCROOT)/docsrc; \
for i in c_utils libfftpack libsharp; do \
doxygen $${i}.dox; \
rm -rf $(DOCDIR)/$${i}; mv htmldoc $(DOCDIR)/$${i}; \
done; \
rm *.tag;
docsrc_clean:
cd $(SRCROOT)/docsrc; \
rm -f *.tag
cd $(SRCROOT)/docsrc; \
rm -rf htmldoc
doc: docsrc_code_doc

34
external/sharp/libfftpack/README vendored Normal file
View file

@ -0,0 +1,34 @@
ls_fft description:
This package is intended to calculate one-dimensional real or complex FFTs
with high accuracy and good efficiency even for lengths containing large
prime factors.
The code is written in C, but a Fortran wrapper exists as well.
Before any FFT is executed, a plan must be generated for it. Plan creation
is designed to be fast, so that there is no significant overhead if the
plan is only used once or a few times.
The main component of the code is based on Paul N. Swarztrauber's FFTPACK in the
double precision incarnation by Hugh C. Pumphrey
(http://www.netlib.org/fftpack/dp.tgz).
I replaced the iterative sine and cosine calculations in radfg() and radbg()
by an exact calculation, which slightly improves the transform accuracy for
real FFTs with lengths containing large prime factors.
Since FFTPACK becomes quite slow for FFT lengths with large prime factors
(in the worst case of prime lengths it reaches O(n*n) complexity), I
implemented Bluestein's algorithm, which computes a FFT of length n by
several FFTs of length n2>=2*n-1 and a convolution. Since n2 can be chosen
to be highly composite, this algorithm is more efficient if n has large
prime factors. The longer FFTs themselves are then computed using the FFTPACK
routines.
Bluestein's algorithm was implemented according to the description at
http://en.wikipedia.org/wiki/Bluestein's_FFT_algorithm.
Thread-safety:
All routines can be called concurrently; all information needed by ls_fft
is stored in the plan variable. However, using the same plan variable on
multiple threads simultaneously is not supported and will lead to data
corruption.

173
external/sharp/libfftpack/bluestein.c vendored Normal file
View file

@ -0,0 +1,173 @@
/*
* This file is part of libfftpack.
*
* libfftpack is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libfftpack is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libfftpack; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
* Copyright (C) 2005, 2006, 2007, 2008 Max-Planck-Society
* \author Martin Reinecke
*/
#include <math.h>
#include <stdlib.h>
#include "fftpack.h"
#include "bluestein.h"
/* returns the sum of all prime factors of n */
size_t prime_factor_sum (size_t n)
{
size_t result=0,x,limit,tmp;
while (((tmp=(n>>1))<<1)==n)
{ result+=2; n=tmp; }
limit=(size_t)sqrt(n+0.01);
for (x=3; x<=limit; x+=2)
while ((tmp=(n/x))*x==n)
{
result+=x;
n=tmp;
limit=(size_t)sqrt(n+0.01);
}
if (n>1) result+=n;
return result;
}
/* returns the smallest composite of 2, 3 and 5 which is >= n */
static size_t good_size(size_t n)
{
size_t f2, f23, f235, bestfac=2*n;
if (n<=6) return n;
for (f2=1; f2<bestfac; f2*=2)
for (f23=f2; f23<bestfac; f23*=3)
for (f235=f23; f235<bestfac; f235*=5)
if (f235>=n) bestfac=f235;
return bestfac;
}
void bluestein_i (size_t n, double **tstorage, size_t *worksize)
{
static const double pi=3.14159265358979323846;
size_t n2=good_size(n*2-1);
size_t m, coeff;
double angle, xn2;
double *bk, *bkf, *work;
double pibyn=pi/n;
*worksize=2+2*n+8*n2+16;
*tstorage = RALLOC(double,2+2*n+8*n2+16);
((size_t *)(*tstorage))[0]=n2;
bk = *tstorage+2;
bkf = *tstorage+2+2*n;
work= *tstorage+2+2*(n+n2);
/* initialize b_k */
bk[0] = 1;
bk[1] = 0;
coeff=0;
for (m=1; m<n; ++m)
{
coeff+=2*m-1;
if (coeff>=2*n) coeff-=2*n;
angle = pibyn*coeff;
bk[2*m] = cos(angle);
bk[2*m+1] = sin(angle);
}
/* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */
xn2 = 1./n2;
bkf[0] = bk[0]*xn2;
bkf[1] = bk[1]*xn2;
for (m=2; m<2*n; m+=2)
{
bkf[m] = bkf[2*n2-m] = bk[m] *xn2;
bkf[m+1] = bkf[2*n2-m+1] = bk[m+1] *xn2;
}
for (m=2*n;m<=(2*n2-2*n+1);++m)
bkf[m]=0.;
cffti (n2,work);
cfftf (n2,bkf,work);
}
void bluestein (size_t n, double *data, double *tstorage, int isign)
{
size_t n2=*((size_t *)tstorage);
size_t m;
double *bk, *bkf, *akf, *work;
bk = tstorage+2;
bkf = tstorage+2+2*n;
work= tstorage+2+2*(n+n2);
akf = tstorage+2+2*n+6*n2+16;
/* initialize a_k and FFT it */
if (isign>0)
for (m=0; m<2*n; m+=2)
{
akf[m] = data[m]*bk[m] - data[m+1]*bk[m+1];
akf[m+1] = data[m]*bk[m+1] + data[m+1]*bk[m];
}
else
for (m=0; m<2*n; m+=2)
{
akf[m] = data[m]*bk[m] + data[m+1]*bk[m+1];
akf[m+1] =-data[m]*bk[m+1] + data[m+1]*bk[m];
}
for (m=2*n; m<2*n2; ++m)
akf[m]=0;
cfftf (n2,akf,work);
/* do the convolution */
if (isign>0)
for (m=0; m<2*n2; m+=2)
{
double im = -akf[m]*bkf[m+1] + akf[m+1]*bkf[m];
akf[m ] = akf[m]*bkf[m] + akf[m+1]*bkf[m+1];
akf[m+1] = im;
}
else
for (m=0; m<2*n2; m+=2)
{
double im = akf[m]*bkf[m+1] + akf[m+1]*bkf[m];
akf[m ] = akf[m]*bkf[m] - akf[m+1]*bkf[m+1];
akf[m+1] = im;
}
/* inverse FFT */
cfftb (n2,akf,work);
/* multiply by b_k* */
if (isign>0)
for (m=0; m<2*n; m+=2)
{
data[m] = bk[m] *akf[m] - bk[m+1]*akf[m+1];
data[m+1] = bk[m+1]*akf[m] + bk[m] *akf[m+1];
}
else
for (m=0; m<2*n; m+=2)
{
data[m] = bk[m] *akf[m] + bk[m+1]*akf[m+1];
data[m+1] =-bk[m+1]*akf[m] + bk[m] *akf[m+1];
}
}

48
external/sharp/libfftpack/bluestein.h vendored Normal file
View file

@ -0,0 +1,48 @@
/*
* This file is part of libfftpack.
*
* libfftpack is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libfftpack is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libfftpack; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
* Copyright (C) 2005 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_BLUESTEIN_H
#define PLANCK_BLUESTEIN_H
#include "c_utils.h"
#ifdef __cplusplus
extern "C" {
#endif
size_t prime_factor_sum (size_t n);
void bluestein_i (size_t n, double **tstorage, size_t *worksize);
void bluestein (size_t n, double *data, double *tstorage, int isign);
#ifdef __cplusplus
}
#endif
#endif

833
external/sharp/libfftpack/fftpack.c vendored Normal file
View file

@ -0,0 +1,833 @@
/*
* This file is part of libfftpack.
*
* libfftpack is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libfftpack is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libfftpack; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
fftpack.c : A set of FFT routines in C.
Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
(Version 4, 1985).
C port by Martin Reinecke (2010)
*/
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "fftpack.h"
#define WA(x,i) wa[(i)+(x)*ido]
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
#define PM(a,b,c,d) { a=c+d; b=c-d; }
#define PMC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
#define ADDC(a,b,c) { a.r=b.r+c.r; a.i=b.i+c.i; }
#define SCALEC(a,b) { a.r*=b; a.i*=b; }
#define CONJFLIPC(a) { double tmp_=a.r; a.r=-a.i; a.i=tmp_; }
/* (a+ib) = conj(c+id) * (e+if) */
#define MULPM(a,b,c,d,e,f) { a=c*e+d*f; b=c*f-d*e; }
typedef struct {
double r,i;
} cmplx;
#define CONCAT(a,b) a ## b
#define X(arg) CONCAT(passb,arg)
#define BACKWARD
#include "fftpack_inc.c"
#undef BACKWARD
#undef X
#define X(arg) CONCAT(passf,arg)
#include "fftpack_inc.c"
#undef X
#undef CC
#undef CH
#define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))]
#define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))]
static void radf2 (size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=2;
size_t i, k, ic;
double ti2, tr2;
for (k=0; k<l1; k++)
PM (CH(0,0,k),CH(ido-1,1,k),CC(0,k,0),CC(0,k,1))
if ((ido&1)==0)
for (k=0; k<l1; k++)
{
CH( 0,1,k) = -CC(ido-1,k,1);
CH(ido-1,0,k) = CC(ido-1,k,0);
}
if (ido<=2) return;
for (k=0; k<l1; k++)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
MULPM (tr2,ti2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
PM (CH(i-1,0,k),CH(ic-1,1,k),CC(i-1,k,0),tr2)
PM (CH(i ,0,k),CH(ic ,1,k),ti2,CC(i ,k,0))
}
}
static void radf3(size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=3;
static const double taur=-0.5, taui=0.86602540378443864676;
size_t i, k, ic;
double ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3;
for (k=0; k<l1; k++)
{
cr2=CC(0,k,1)+CC(0,k,2);
CH(0,0,k) = CC(0,k,0)+cr2;
CH(0,2,k) = taui*(CC(0,k,2)-CC(0,k,1));
CH(ido-1,1,k) = CC(0,k,0)+taur*cr2;
}
if (ido==1) return;
for (k=0; k<l1; k++)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
cr2=dr2+dr3;
ci2=di2+di3;
CH(i-1,0,k) = CC(i-1,k,0)+cr2;
CH(i ,0,k) = CC(i ,k,0)+ci2;
tr2 = CC(i-1,k,0)+taur*cr2;
ti2 = CC(i ,k,0)+taur*ci2;
tr3 = taui*(di2-di3);
ti3 = taui*(dr3-dr2);
PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr3)
PM(CH(i ,2,k),CH(ic ,1,k),ti3,ti2)
}
}
static void radf4(size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=4;
static const double hsqt2=0.70710678118654752440;
size_t i, k, ic;
double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
for (k=0; k<l1; k++)
{
PM (tr1,CH(0,2,k),CC(0,k,3),CC(0,k,1))
PM (tr2,CH(ido-1,1,k),CC(0,k,0),CC(0,k,2))
PM (CH(0,0,k),CH(ido-1,3,k),tr2,tr1)
}
if ((ido&1)==0)
for (k=0; k<l1; k++)
{
ti1=-hsqt2*(CC(ido-1,k,1)+CC(ido-1,k,3));
tr1= hsqt2*(CC(ido-1,k,1)-CC(ido-1,k,3));
PM (CH(ido-1,0,k),CH(ido-1,2,k),CC(ido-1,k,0),tr1)
PM (CH( 0,3,k),CH( 0,1,k),ti1,CC(ido-1,k,2))
}
if (ido<=2) return;
for (k=0; k<l1; k++)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
MULPM(cr2,ci2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
MULPM(cr3,ci3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
MULPM(cr4,ci4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
PM(tr1,tr4,cr4,cr2)
PM(ti1,ti4,ci2,ci4)
PM(tr2,tr3,CC(i-1,k,0),cr3)
PM(ti2,ti3,CC(i ,k,0),ci3)
PM(CH(i-1,0,k),CH(ic-1,3,k),tr2,tr1)
PM(CH(i ,0,k),CH(ic ,3,k),ti1,ti2)
PM(CH(i-1,2,k),CH(ic-1,1,k),tr3,ti4)
PM(CH(i ,2,k),CH(ic ,1,k),tr4,ti3)
}
}
static void radf5(size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=5;
static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
size_t i, k, ic;
double ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3,
dr4, dr5, cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5;
for (k=0; k<l1; k++)
{
PM (cr2,ci5,CC(0,k,4),CC(0,k,1))
PM (cr3,ci4,CC(0,k,3),CC(0,k,2))
CH(0,0,k)=CC(0,k,0)+cr2+cr3;
CH(ido-1,1,k)=CC(0,k,0)+tr11*cr2+tr12*cr3;
CH(0,2,k)=ti11*ci5+ti12*ci4;
CH(ido-1,3,k)=CC(0,k,0)+tr12*cr2+tr11*cr3;
CH(0,4,k)=ti12*ci5-ti11*ci4;
}
if (ido==1) return;
for (k=0; k<l1;++k)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
MULPM (dr4,di4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
MULPM (dr5,di5,WA(3,i-2),WA(3,i-1),CC(i-1,k,4),CC(i,k,4))
PM(cr2,ci5,dr5,dr2)
PM(ci2,cr5,di2,di5)
PM(cr3,ci4,dr4,dr3)
PM(ci3,cr4,di3,di4)
CH(i-1,0,k)=CC(i-1,k,0)+cr2+cr3;
CH(i ,0,k)=CC(i ,k,0)+ci2+ci3;
tr2=CC(i-1,k,0)+tr11*cr2+tr12*cr3;
ti2=CC(i ,k,0)+tr11*ci2+tr12*ci3;
tr3=CC(i-1,k,0)+tr12*cr2+tr11*cr3;
ti3=CC(i ,k,0)+tr12*ci2+tr11*ci3;
MULPM(tr5,tr4,cr5,cr4,ti11,ti12)
MULPM(ti5,ti4,ci5,ci4,ti11,ti12)
PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr5)
PM(CH(i ,2,k),CH(ic ,1,k),ti5,ti2)
PM(CH(i-1,4,k),CH(ic-1,3,k),tr3,tr4)
PM(CH(i ,4,k),CH(ic ,3,k),ti4,ti3)
}
}
#undef CH
#undef CC
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
#define C1(a,b,c) cc[(a)+ido*((b)+l1*(c))]
#define C2(a,b) cc[(a)+idl1*(b)]
#define CH2(a,b) ch[(a)+idl1*(b)]
static void radfg(size_t ido, size_t ip, size_t l1, size_t idl1,
double *cc, double *ch, const double *wa)
{
const size_t cdim=ip;
static const double twopi=6.28318530717958647692;
size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
double ai1, ai2, ar1, ar2, arg;
double *csarr;
size_t aidx;
ipph=(ip+1)/ 2;
if(ido!=1)
{
memcpy(ch,cc,idl1*sizeof(double));
for(j=1; j<ip; j++)
for(k=0; k<l1; k++)
{
CH(0,k,j)=C1(0,k,j);
idij=(j-1)*ido+1;
for(i=2; i<ido; i+=2,idij+=2)
MULPM(CH(i-1,k,j),CH(i,k,j),wa[idij-1],wa[idij],C1(i-1,k,j),C1(i,k,j))
}
for(j=1,jc=ip-1; j<ipph; j++,jc--)
for(k=0; k<l1; k++)
for(i=2; i<ido; i+=2)
{
PM(C1(i-1,k,j),C1(i ,k,jc),CH(i-1,k,jc),CH(i-1,k,j ))
PM(C1(i ,k,j),C1(i-1,k,jc),CH(i ,k,j ),CH(i ,k,jc))
}
}
else
memcpy(cc,ch,idl1*sizeof(double));
for(j=1,jc=ip-1; j<ipph; j++,jc--)
for(k=0; k<l1; k++)
PM(C1(0,k,j),C1(0,k,jc),CH(0,k,jc),CH(0,k,j))
csarr=RALLOC(double,2*ip);
arg=twopi / ip;
csarr[0]=1.;
csarr[1]=0.;
csarr[2]=csarr[2*ip-2]=cos(arg);
csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
for (i=2; i<=ip/2; ++i)
{
csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
csarr[2*i+1]=sin(i*arg);
csarr[2*ip-2*i+1]=-csarr[2*i+1];
}
for(l=1,lc=ip-1; l<ipph; l++,lc--)
{
ar1=csarr[2*l];
ai1=csarr[2*l+1];
for(ik=0; ik<idl1; ik++)
{
CH2(ik,l)=C2(ik,0)+ar1*C2(ik,1);
CH2(ik,lc)=ai1*C2(ik,ip-1);
}
aidx=2*l;
for(j=2,jc=ip-2; j<ipph; j++,jc--)
{
aidx+=2*l;
if (aidx>=2*ip) aidx-=2*ip;
ar2=csarr[aidx];
ai2=csarr[aidx+1];
for(ik=0; ik<idl1; ik++)
{
CH2(ik,l )+=ar2*C2(ik,j );
CH2(ik,lc)+=ai2*C2(ik,jc);
}
}
}
DEALLOC(csarr);
for(j=1; j<ipph; j++)
for(ik=0; ik<idl1; ik++)
CH2(ik,0)+=C2(ik,j);
for(k=0; k<l1; k++)
memcpy(&CC(0,0,k),&CH(0,k,0),ido*sizeof(double));
for(j=1; j<ipph; j++)
{
jc=ip-j;
j2=2*j;
for(k=0; k<l1; k++)
{
CC(ido-1,j2-1,k) = CH(0,k,j );
CC(0 ,j2 ,k) = CH(0,k,jc);
}
}
if(ido==1) return;
for(j=1; j<ipph; j++)
{
jc=ip-j;
j2=2*j;
for(k=0; k<l1; k++)
for(i=2; i<ido; i+=2)
{
ic=ido-i;
PM (CC(i-1,j2,k),CC(ic-1,j2-1,k),CH(i-1,k,j ),CH(i-1,k,jc))
PM (CC(i ,j2,k),CC(ic ,j2-1,k),CH(i ,k,jc),CH(i ,k,j ))
}
}
}
#undef CC
#undef CH
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
static void radb2(size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=2;
size_t i, k, ic;
double ti2, tr2;
for (k=0; k<l1; k++)
PM (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(ido-1,1,k))
if ((ido&1)==0)
for (k=0; k<l1; k++)
{
CH(ido-1,k,0) = 2*CC(ido-1,0,k);
CH(ido-1,k,1) = -2*CC(0 ,1,k);
}
if (ido<=2) return;
for (k=0; k<l1;++k)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
PM (CH(i-1,k,0),tr2,CC(i-1,0,k),CC(ic-1,1,k))
PM (ti2,CH(i ,k,0),CC(i ,0,k),CC(ic ,1,k))
MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ti2,tr2)
}
}
static void radb3(size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=3;
static const double taur=-0.5, taui=0.86602540378443864676;
size_t i, k, ic;
double ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
for (k=0; k<l1; k++)
{
tr2=2*CC(ido-1,1,k);
cr2=CC(0,0,k)+taur*tr2;
CH(0,k,0)=CC(0,0,k)+tr2;
ci3=2*taui*CC(0,2,k);
PM (CH(0,k,2),CH(0,k,1),cr2,ci3);
}
if (ido==1) return;
for (k=0; k<l1; k++)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
tr2=CC(i-1,2,k)+CC(ic-1,1,k);
ti2=CC(i ,2,k)-CC(ic ,1,k);
cr2=CC(i-1,0,k)+taur*tr2;
ci2=CC(i ,0,k)+taur*ti2;
CH(i-1,k,0)=CC(i-1,0,k)+tr2;
CH(i ,k,0)=CC(i ,0,k)+ti2;
cr3=taui*(CC(i-1,2,k)-CC(ic-1,1,k));
ci3=taui*(CC(i ,2,k)+CC(ic ,1,k));
PM(dr3,dr2,cr2,ci3)
PM(di2,di3,ci2,cr3)
MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
}
}
static void radb4(size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=4;
static const double sqrt2=1.41421356237309504880;
size_t i, k, ic;
double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
for (k=0; k<l1; k++)
{
PM (tr2,tr1,CC(0,0,k),CC(ido-1,3,k))
tr3=2*CC(ido-1,1,k);
tr4=2*CC(0,2,k);
PM (CH(0,k,0),CH(0,k,2),tr2,tr3)
PM (CH(0,k,3),CH(0,k,1),tr1,tr4)
}
if ((ido&1)==0)
for (k=0; k<l1; k++)
{
PM (ti1,ti2,CC(0 ,3,k),CC(0 ,1,k))
PM (tr2,tr1,CC(ido-1,0,k),CC(ido-1,2,k))
CH(ido-1,k,0)=tr2+tr2;
CH(ido-1,k,1)=sqrt2*(tr1-ti1);
CH(ido-1,k,2)=ti2+ti2;
CH(ido-1,k,3)=-sqrt2*(tr1+ti1);
}
if (ido<=2) return;
for (k=0; k<l1;++k)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
PM (tr2,tr1,CC(i-1,0,k),CC(ic-1,3,k))
PM (ti1,ti2,CC(i ,0,k),CC(ic ,3,k))
PM (tr4,ti3,CC(i ,2,k),CC(ic ,1,k))
PM (tr3,ti4,CC(i-1,2,k),CC(ic-1,1,k))
PM (CH(i-1,k,0),cr3,tr2,tr3)
PM (CH(i ,k,0),ci3,ti2,ti3)
PM (cr4,cr2,tr1,tr4)
PM (ci2,ci4,ti1,ti4)
MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ci2,cr2)
MULPM (CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),ci3,cr3)
MULPM (CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),ci4,cr4)
}
}
static void radb5(size_t ido, size_t l1, const double *cc, double *ch,
const double *wa)
{
const size_t cdim=5;
static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
size_t i, k, ic;
double ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4,
ti2, ti3, ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
for (k=0; k<l1; k++)
{
ti5=2*CC(0,2,k);
ti4=2*CC(0,4,k);
tr2=2*CC(ido-1,1,k);
tr3=2*CC(ido-1,3,k);
CH(0,k,0)=CC(0,0,k)+tr2+tr3;
cr2=CC(0,0,k)+tr11*tr2+tr12*tr3;
cr3=CC(0,0,k)+tr12*tr2+tr11*tr3;
MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
PM(CH(0,k,4),CH(0,k,1),cr2,ci5)
PM(CH(0,k,3),CH(0,k,2),cr3,ci4)
}
if (ido==1) return;
for (k=0; k<l1;++k)
for (i=2; i<ido; i+=2)
{
ic=ido-i;
PM(tr2,tr5,CC(i-1,2,k),CC(ic-1,1,k))
PM(ti5,ti2,CC(i ,2,k),CC(ic ,1,k))
PM(tr3,tr4,CC(i-1,4,k),CC(ic-1,3,k))
PM(ti4,ti3,CC(i ,4,k),CC(ic ,3,k))
CH(i-1,k,0)=CC(i-1,0,k)+tr2+tr3;
CH(i ,k,0)=CC(i ,0,k)+ti2+ti3;
cr2=CC(i-1,0,k)+tr11*tr2+tr12*tr3;
ci2=CC(i ,0,k)+tr11*ti2+tr12*ti3;
cr3=CC(i-1,0,k)+tr12*tr2+tr11*tr3;
ci3=CC(i ,0,k)+tr12*ti2+tr11*ti3;
MULPM(cr5,cr4,tr5,tr4,ti11,ti12)
MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
PM(dr4,dr3,cr3,ci4)
PM(di3,di4,ci3,cr4)
PM(dr5,dr2,cr2,ci5)
PM(di2,di5,ci2,cr5)
MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
MULPM(CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),di4,dr4)
MULPM(CH(i,k,4),CH(i-1,k,4),WA(3,i-2),WA(3,i-1),di5,dr5)
}
}
static void radbg(size_t ido, size_t ip, size_t l1, size_t idl1,
double *cc, double *ch, const double *wa)
{
const size_t cdim=ip;
static const double twopi=6.28318530717958647692;
size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
double ai1, ai2, ar1, ar2, arg;
double *csarr;
size_t aidx;
ipph=(ip+1)/ 2;
for(k=0; k<l1; k++)
memcpy(&CH(0,k,0),&CC(0,0,k),ido*sizeof(double));
for(j=1; j<ipph; j++)
{
jc=ip-j;
j2=2*j;
for(k=0; k<l1; k++)
{
CH(0,k,j )=2*CC(ido-1,j2-1,k);
CH(0,k,jc)=2*CC(0 ,j2 ,k);
}
}
if(ido!=1)
for(j=1,jc=ip-1; j<ipph; j++,jc--)
for(k=0; k<l1; k++)
for(i=2; i<ido; i+=2)
{
ic=ido-i;
PM (CH(i-1,k,j ),CH(i-1,k,jc),CC(i-1,2*j,k),CC(ic-1,2*j-1,k))
PM (CH(i ,k,jc),CH(i ,k,j ),CC(i ,2*j,k),CC(ic ,2*j-1,k))
}
csarr=RALLOC(double,2*ip);
arg=twopi/ip;
csarr[0]=1.;
csarr[1]=0.;
csarr[2]=csarr[2*ip-2]=cos(arg);
csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
for (i=2; i<=ip/2; ++i)
{
csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
csarr[2*i+1]=sin(i*arg);
csarr[2*ip-2*i+1]=-csarr[2*i+1];
}
for(l=1; l<ipph; l++)
{
lc=ip-l;
ar1=csarr[2*l];
ai1=csarr[2*l+1];
for(ik=0; ik<idl1; ik++)
{
C2(ik,l)=CH2(ik,0)+ar1*CH2(ik,1);
C2(ik,lc)=ai1*CH2(ik,ip-1);
}
aidx=2*l;
for(j=2; j<ipph; j++)
{
jc=ip-j;
aidx+=2*l;
if (aidx>=2*ip) aidx-=2*ip;
ar2=csarr[aidx];
ai2=csarr[aidx+1];
for(ik=0; ik<idl1; ik++)
{
C2(ik,l )+=ar2*CH2(ik,j );
C2(ik,lc)+=ai2*CH2(ik,jc);
}
}
}
DEALLOC(csarr);
for(j=1; j<ipph; j++)
for(ik=0; ik<idl1; ik++)
CH2(ik,0)+=CH2(ik,j);
for(j=1,jc=ip-1; j<ipph; j++,jc--)
for(k=0; k<l1; k++)
PM (CH(0,k,jc),CH(0,k,j),C1(0,k,j),C1(0,k,jc))
if(ido==1)
return;
for(j=1,jc=ip-1; j<ipph; j++,jc--)
for(k=0; k<l1; k++)
for(i=2; i<ido; i+=2)
{
PM (CH(i-1,k,jc),CH(i-1,k,j ),C1(i-1,k,j),C1(i ,k,jc))
PM (CH(i ,k,j ),CH(i ,k,jc),C1(i ,k,j),C1(i-1,k,jc))
}
memcpy(cc,ch,idl1*sizeof(double));
for(j=1; j<ip; j++)
for(k=0; k<l1; k++)
{
C1(0,k,j)=CH(0,k,j);
idij=(j-1)*ido+1;
for(i=2; i<ido; i+=2,idij+=2)
MULPM (C1(i,k,j),C1(i-1,k,j),wa[idij-1],wa[idij],CH(i,k,j),CH(i-1,k,j))
}
}
#undef CC
#undef CH
#undef PM
#undef MULPM
/*----------------------------------------------------------------------
cfftf1, cfftb1, cfftf, cfftb, cffti1, cffti. Complex FFTs.
----------------------------------------------------------------------*/
static void cfft1(size_t n, cmplx c[], cmplx ch[], const cmplx wa[],
const size_t ifac[], int isign)
{
size_t k1, l1=1, nf=ifac[1], iw=0;
cmplx *p1=c, *p2=ch;
for(k1=0; k1<nf; k1++)
{
size_t ip=ifac[k1+2];
size_t l2=ip*l1;
size_t ido = n/l2;
if(ip==4)
(isign>0) ? passb4(ido, l1, p1, p2, wa+iw)
: passf4(ido, l1, p1, p2, wa+iw);
else if(ip==2)
(isign>0) ? passb2(ido, l1, p1, p2, wa+iw)
: passf2(ido, l1, p1, p2, wa+iw);
else if(ip==3)
(isign>0) ? passb3(ido, l1, p1, p2, wa+iw)
: passf3(ido, l1, p1, p2, wa+iw);
else if(ip==5)
(isign>0) ? passb5(ido, l1, p1, p2, wa+iw)
: passf5(ido, l1, p1, p2, wa+iw);
else if(ip==6)
(isign>0) ? passb6(ido, l1, p1, p2, wa+iw)
: passf6(ido, l1, p1, p2, wa+iw);
else
(isign>0) ? passbg(ido, ip, l1, p1, p2, wa+iw)
: passfg(ido, ip, l1, p1, p2, wa+iw);
SWAP(p1,p2,cmplx *);
l1=l2;
iw+=(ip-1)*ido;
}
if (p1!=c)
memcpy (c,p1,n*sizeof(cmplx));
}
void cfftf(size_t n, double c[], double wsave[])
{
if (n!=1)
cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
(size_t*)(wsave+4*n),-1);
}
void cfftb(size_t n, double c[], double wsave[])
{
if (n!=1)
cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
(size_t*)(wsave+4*n),+1);
}
static void factorize (size_t n, const size_t *pf, size_t npf, size_t *ifac)
{
size_t nl=n, nf=0, ntry=0, j=0, i;
startloop:
j++;
ntry = (j<=npf) ? pf[j-1] : ntry+2;
do
{
size_t nq=nl / ntry;
size_t nr=nl-ntry*nq;
if (nr!=0)
goto startloop;
nf++;
ifac[nf+1]=ntry;
nl=nq;
if ((ntry==2) && (nf!=1))
{
for (i=nf+1; i>2; --i)
ifac[i]=ifac[i-1];
ifac[2]=2;
}
}
while(nl!=1);
ifac[0]=n;
ifac[1]=nf;
}
static void cffti1(size_t n, double wa[], size_t ifac[])
{
static const size_t ntryh[5]={4,6,3,2,5};
static const double twopi=6.28318530717958647692;
size_t j, k, fi;
double argh=twopi/n;
size_t i=0, l1=1;
factorize (n,ntryh,5,ifac);
for(k=1; k<=ifac[1]; k++)
{
size_t ip=ifac[k+1];
size_t ido=n/(l1*ip);
for(j=1; j<ip; j++)
{
size_t is = i;
double argld=j*l1*argh;
wa[i ]=1;
wa[i+1]=0;
for(fi=1; fi<=ido; fi++)
{
double arg=fi*argld;
i+=2;
wa[i ]=cos(arg);
wa[i+1]=sin(arg);
}
if(ip>6)
{
wa[is ]=wa[i ];
wa[is+1]=wa[i+1];
}
}
l1*=ip;
}
}
void cffti(size_t n, double wsave[])
{ if (n!=1) cffti1(n, wsave+2*n,(size_t*)(wsave+4*n)); }
/*----------------------------------------------------------------------
rfftf1, rfftb1, rfftf, rfftb, rffti1, rffti. Real FFTs.
----------------------------------------------------------------------*/
static void rfftf1(size_t n, double c[], double ch[], const double wa[],
const size_t ifac[])
{
size_t k1, l1=n, nf=ifac[1], iw=n-1;
double *p1=ch, *p2=c;
for(k1=1; k1<=nf;++k1)
{
size_t ip=ifac[nf-k1+2];
size_t ido=n / l1;
l1 /= ip;
iw-=(ip-1)*ido;
SWAP (p1,p2,double *);
if(ip==4)
radf4(ido, l1, p1, p2, wa+iw);
else if(ip==2)
radf2(ido, l1, p1, p2, wa+iw);
else if(ip==3)
radf3(ido, l1, p1, p2, wa+iw);
else if(ip==5)
radf5(ido, l1, p1, p2, wa+iw);
else
{
if (ido==1)
SWAP (p1,p2,double *);
radfg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
SWAP (p1,p2,double *);
}
}
if (p1==c)
memcpy (c,ch,n*sizeof(double));
}
static void rfftb1(size_t n, double c[], double ch[], const double wa[],
const size_t ifac[])
{
size_t k1, l1=1, nf=ifac[1], iw=0;
double *p1=c, *p2=ch;
for(k1=1; k1<=nf; k1++)
{
size_t ip = ifac[k1+1],
ido= n/(ip*l1);
if(ip==4)
radb4(ido, l1, p1, p2, wa+iw);
else if(ip==2)
radb2(ido, l1, p1, p2, wa+iw);
else if(ip==3)
radb3(ido, l1, p1, p2, wa+iw);
else if(ip==5)
radb5(ido, l1, p1, p2, wa+iw);
else
{
radbg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
if (ido!=1)
SWAP (p1,p2,double *);
}
SWAP (p1,p2,double *);
l1*=ip;
iw+=(ip-1)*ido;
}
if (p1!=c)
memcpy (c,ch,n*sizeof(double));
}
void rfftf(size_t n, double r[], double wsave[])
{ if(n!=1) rfftf1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
void rfftb(size_t n, double r[], double wsave[])
{ if(n!=1) rfftb1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
static void rffti1(size_t n, double wa[], size_t ifac[])
{
static const size_t ntryh[4]={4,2,3,5};
static const double twopi=6.28318530717958647692;
size_t i, j, k, fi;
double argh=twopi/n;
size_t is=0, l1=1;
factorize (n,ntryh,4,ifac);
for (k=1; k<ifac[1]; k++)
{
size_t ip=ifac[k+1],
ido=n/(l1*ip);
for (j=1; j<ip; ++j)
{
double argld=j*l1*argh;
for(i=is,fi=1; i<=ido+is-3; i+=2,++fi)
{
double arg=fi*argld;
wa[i ]=cos(arg);
wa[i+1]=sin(arg);
}
is+=ido;
}
l1*=ip;
}
}
void rffti(size_t n, double wsave[])
{ if (n!=1) rffti1(n, wsave+n,(size_t*)(wsave+2*n)); }

64
external/sharp/libfftpack/fftpack.h vendored Normal file
View file

@ -0,0 +1,64 @@
/*
* This file is part of libfftpack.
*
* libfftpack is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libfftpack is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libfftpack; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
fftpack.h : function declarations for fftpack.c
Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
(Version 4, 1985).
Pekka Janhunen 23.2.1995
(reformatted by joerg arndt)
reformatted and slightly enhanced by Martin Reinecke (2004)
*/
#ifndef PLANCK_FFTPACK_H
#define PLANCK_FFTPACK_H
#include "c_utils.h"
#ifdef __cplusplus
extern "C" {
#endif
/*! forward complex transform */
void cfftf(size_t N, double complex_data[], double wrk[]);
/*! backward complex transform */
void cfftb(size_t N, double complex_data[], double wrk[]);
/*! initializer for complex transforms */
void cffti(size_t N, double wrk[]);
/*! forward real transform */
void rfftf(size_t N, double data[], double wrk[]);
/*! backward real transform */
void rfftb(size_t N, double data[], double wrk[]);
/*! initializer for real transforms */
void rffti(size_t N, double wrk[]);
#ifdef __cplusplus
}
#endif
#endif

306
external/sharp/libfftpack/fftpack_inc.c vendored Normal file
View file

@ -0,0 +1,306 @@
/*
* This file is part of libfftpack.
*
* libfftpack is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libfftpack is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libfftpack; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
fftpack.c : A set of FFT routines in C.
Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
(Version 4, 1985).
C port by Martin Reinecke (2010)
*/
#ifdef BACKWARD
#define PSIGN +
#define PMSIGNC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
/* a = b*c */
#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r-b.i*c.i; a.i=b.r*c.i+b.i*c.r; }
#else
#define PSIGN -
#define PMSIGNC(a,b,c,d) { a.r=c.r-d.r; a.i=c.i-d.i; b.r=c.r+d.r; b.i=c.i+d.i; }
/* a = conj(b)*c */
#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r+b.i*c.i; a.i=b.r*c.i-b.i*c.r; }
#endif
static void X(2) (size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
const cmplx *wa)
{
const size_t cdim=2;
size_t k,i;
cmplx t;
if (ido==1)
for (k=0;k<l1;++k)
PMC (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(0,1,k))
else
for (k=0;k<l1;++k)
for (i=0;i<ido;++i)
{
PMC (CH(i,k,0),t,CC(i,0,k),CC(i,1,k))
MULPMSIGNC (CH(i,k,1),WA(0,i),t)
}
}
static void X(3)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
const cmplx *wa)
{
const size_t cdim=3;
static const double taur=-0.5, taui= PSIGN 0.86602540378443864676;
size_t i, k;
cmplx c2, c3, d2, d3, t2;
if (ido==1)
for (k=0; k<l1; ++k)
{
PMC (t2,c3,CC(0,1,k),CC(0,2,k))
ADDC (CH(0,k,0),t2,CC(0,0,k))
SCALEC(t2,taur)
ADDC(c2,CC(0,0,k),t2)
SCALEC(c3,taui)
CONJFLIPC(c3)
PMC(CH(0,k,1),CH(0,k,2),c2,c3)
}
else
for (k=0; k<l1; ++k)
for (i=0; i<ido; ++i)
{
PMC (t2,c3,CC(i,1,k),CC(i,2,k))
ADDC (CH(i,k,0),t2,CC(i,0,k))
SCALEC(t2,taur)
ADDC(c2,CC(i,0,k),t2)
SCALEC(c3,taui)
CONJFLIPC(c3)
PMC(d2,d3,c2,c3)
MULPMSIGNC(CH(i,k,1),WA(0,i),d2)
MULPMSIGNC(CH(i,k,2),WA(1,i),d3)
}
}
static void X(4)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
const cmplx *wa)
{
const size_t cdim=4;
size_t i, k;
cmplx c2, c3, c4, t1, t2, t3, t4;
if (ido==1)
for (k=0; k<l1; ++k)
{
PMC(t2,t1,CC(0,0,k),CC(0,2,k))
PMC(t3,t4,CC(0,1,k),CC(0,3,k))
CONJFLIPC(t4)
PMC(CH(0,k,0),CH(0,k,2),t2,t3)
PMSIGNC (CH(0,k,1),CH(0,k,3),t1,t4)
}
else
for (k=0; k<l1; ++k)
for (i=0; i<ido; ++i)
{
PMC(t2,t1,CC(i,0,k),CC(i,2,k))
PMC(t3,t4,CC(i,1,k),CC(i,3,k))
CONJFLIPC(t4)
PMC(CH(i,k,0),c3,t2,t3)
PMSIGNC (c2,c4,t1,t4)
MULPMSIGNC (CH(i,k,1),WA(0,i),c2)
MULPMSIGNC (CH(i,k,2),WA(1,i),c3)
MULPMSIGNC (CH(i,k,3),WA(2,i),c4)
}
}
static void X(5)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
const cmplx *wa)
{
const size_t cdim=5;
static const double tr11= 0.3090169943749474241,
ti11= PSIGN 0.95105651629515357212,
tr12=-0.8090169943749474241,
ti12= PSIGN 0.58778525229247312917;
size_t i, k;
cmplx c2, c3, c4, c5, d2, d3, d4, d5, t2, t3, t4, t5;
if (ido==1)
for (k=0; k<l1; ++k)
{
PMC (t2,t5,CC(0,1,k),CC(0,4,k))
PMC (t3,t4,CC(0,2,k),CC(0,3,k))
CH(0,k,0).r=CC(0,0,k).r+t2.r+t3.r;
CH(0,k,0).i=CC(0,0,k).i+t2.i+t3.i;
c2.r=CC(0,0,k).r+tr11*t2.r+tr12*t3.r;
c2.i=CC(0,0,k).i+tr11*t2.i+tr12*t3.i;
c3.r=CC(0,0,k).r+tr12*t2.r+tr11*t3.r;
c3.i=CC(0,0,k).i+tr12*t2.i+tr11*t3.i;
c5.r=ti11*t5.r+ti12*t4.r;
c5.i=ti11*t5.i+ti12*t4.i;
c4.r=ti12*t5.r-ti11*t4.r;
c4.i=ti12*t5.i-ti11*t4.i;
CONJFLIPC(c5)
PMC(CH(0,k,1),CH(0,k,4),c2,c5)
CONJFLIPC(c4)
PMC(CH(0,k,2),CH(0,k,3),c3,c4)
}
else
for (k=0; k<l1; ++k)
for (i=0; i<ido; ++i)
{
PMC (t2,t5,CC(i,1,k),CC(i,4,k))
PMC (t3,t4,CC(i,2,k),CC(i,3,k))
CH(i,k,0).r=CC(i,0,k).r+t2.r+t3.r;
CH(i,k,0).i=CC(i,0,k).i+t2.i+t3.i;
c2.r=CC(i,0,k).r+tr11*t2.r+tr12*t3.r;
c2.i=CC(i,0,k).i+tr11*t2.i+tr12*t3.i;
c3.r=CC(i,0,k).r+tr12*t2.r+tr11*t3.r;
c3.i=CC(i,0,k).i+tr12*t2.i+tr11*t3.i;
c5.r=ti11*t5.r+ti12*t4.r;
c5.i=ti11*t5.i+ti12*t4.i;
c4.r=ti12*t5.r-ti11*t4.r;
c4.i=ti12*t5.i-ti11*t4.i;
CONJFLIPC(c5)
PMC(d2,d5,c2,c5)
CONJFLIPC(c4)
PMC(d3,d4,c3,c4)
MULPMSIGNC (CH(i,k,1),WA(0,i),d2)
MULPMSIGNC (CH(i,k,2),WA(1,i),d3)
MULPMSIGNC (CH(i,k,3),WA(2,i),d4)
MULPMSIGNC (CH(i,k,4),WA(3,i),d5)
}
}
static void X(6)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
const cmplx *wa)
{
const size_t cdim=6;
static const double taui= PSIGN 0.86602540378443864676;
cmplx ta1,ta2,ta3,a0,a1,a2,tb1,tb2,tb3,b0,b1,b2,d1,d2,d3,d4,d5;
size_t i, k;
if (ido==1)
for (k=0; k<l1; ++k)
{
PMC(ta1,ta3,CC(0,2,k),CC(0,4,k))
ta2.r = CC(0,0,k).r - .5*ta1.r;
ta2.i = CC(0,0,k).i - .5*ta1.i;
SCALEC(ta3,taui)
ADDC(a0,CC(0,0,k),ta1)
CONJFLIPC(ta3)
PMC(a1,a2,ta2,ta3)
PMC(tb1,tb3,CC(0,5,k),CC(0,1,k))
tb2.r = CC(0,3,k).r - .5*tb1.r;
tb2.i = CC(0,3,k).i - .5*tb1.i;
SCALEC(tb3,taui)
ADDC(b0,CC(0,3,k),tb1)
CONJFLIPC(tb3)
PMC(b1,b2,tb2,tb3)
PMC(CH(0,k,0),CH(0,k,3),a0,b0)
PMC(CH(0,k,4),CH(0,k,1),a1,b1)
PMC(CH(0,k,2),CH(0,k,5),a2,b2)
}
else
for (k=0; k<l1; ++k)
for (i=0; i<ido; ++i)
{
PMC(ta1,ta3,CC(i,2,k),CC(i,4,k))
ta2.r = CC(i,0,k).r - .5*ta1.r;
ta2.i = CC(i,0,k).i - .5*ta1.i;
SCALEC(ta3,taui)
ADDC(a0,CC(i,0,k),ta1)
CONJFLIPC(ta3)
PMC(a1,a2,ta2,ta3)
PMC(tb1,tb3,CC(i,5,k),CC(i,1,k))
tb2.r = CC(i,3,k).r - .5*tb1.r;
tb2.i = CC(i,3,k).i - .5*tb1.i;
SCALEC(tb3,taui)
ADDC(b0,CC(i,3,k),tb1)
CONJFLIPC(tb3)
PMC(b1,b2,tb2,tb3)
PMC(CH(i,k,0),d3,a0,b0)
PMC(d4,d1,a1,b1)
PMC(d2,d5,a2,b2)
MULPMSIGNC (CH(i,k,1),WA(0,i),d1)
MULPMSIGNC (CH(i,k,2),WA(1,i),d2)
MULPMSIGNC (CH(i,k,3),WA(2,i),d3)
MULPMSIGNC (CH(i,k,4),WA(3,i),d4)
MULPMSIGNC (CH(i,k,5),WA(4,i),d5)
}
}
static void X(g)(size_t ido, size_t ip, size_t l1, const cmplx *cc, cmplx *ch,
const cmplx *wa)
{
const size_t cdim=ip;
cmplx *tarr=RALLOC(cmplx,2*ip);
cmplx *ccl=tarr, *wal=tarr+ip;
size_t i,j,k,l,jc,lc;
size_t ipph = (ip+1)/2;
for (i=1; i<ip; ++i)
wal[i]=wa[ido*(i-1)];
for (k=0; k<l1; ++k)
for (i=0; i<ido; ++i)
{
cmplx s=CC(i,0,k);
ccl[0] = CC(i,0,k);
for(j=1,jc=ip-1; j<ipph; ++j,--jc)
{
PMC (ccl[j],ccl[jc],CC(i,j,k),CC(i,jc,k))
ADDC (s,s,ccl[j])
}
CH(i,k,0) = s;
for (j=1, jc=ip-1; j<=ipph; ++j,--jc)
{
cmplx abr=ccl[0], abi={0.,0.};
size_t iang=0;
for (l=1,lc=ip-1; l<ipph; ++l,--lc)
{
iang+=j;
if (iang>ip) iang-=ip;
abr.r += ccl[l ].r*wal[iang].r;
abr.i += ccl[l ].i*wal[iang].r;
abi.r += ccl[lc].r*wal[iang].i;
abi.i += ccl[lc].i*wal[iang].i;
}
#ifndef BACKWARD
{ abi.i=-abi.i; abi.r=-abi.r; }
#endif
CONJFLIPC(abi)
PMC(CH(i,k,j),CH(i,k,jc),abr,abi)
}
}
DEALLOC(tarr);
if (ido==1) return;
for (j=1; j<ip; ++j)
for (k=0; k<l1; ++k)
{
size_t idij=(j-1)*ido+1;
for(i=1; i<ido; ++i, ++idij)
{
cmplx t=CH(i,k,j);
MULPMSIGNC (CH(i,k,j),wa[idij],t)
}
}
}
#undef PSIGN
#undef PMSIGNC
#undef MULPMSIGNC

View file

@ -0,0 +1,5 @@
/*! \mainpage Libfftpack documentation
<ul>
<li>\ref fftgroup "Programming interface"
</ul>
*/

291
external/sharp/libfftpack/ls_fft.c vendored Normal file
View file

@ -0,0 +1,291 @@
/*
* This file is part of libfftpack.
*
* libfftpack is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libfftpack is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libfftpack; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
* Copyright (C) 2005 Max-Planck-Society
* \author Martin Reinecke
*/
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include "bluestein.h"
#include "fftpack.h"
#include "ls_fft.h"
complex_plan make_complex_plan (size_t length)
{
complex_plan plan = RALLOC(complex_plan_i,1);
size_t pfsum = prime_factor_sum(length);
double comp1 = (double)(length*pfsum);
double comp2 = 2*3*length*log(3.*length);
comp2*=3.; /* fudge factor that appears to give good overall performance */
plan->length=length;
plan->bluestein = (comp2<comp1);
if (plan->bluestein)
bluestein_i (length,&(plan->work),&(plan->worksize));
else
{
plan->worksize=4*length+15;
plan->work=RALLOC(double,4*length+15);
cffti(length, plan->work);
}
return plan;
}
complex_plan copy_complex_plan (complex_plan plan)
{
if (!plan) return NULL;
{
complex_plan newplan = RALLOC(complex_plan_i,1);
*newplan = *plan;
newplan->work=RALLOC(double,newplan->worksize);
memcpy(newplan->work,plan->work,sizeof(double)*newplan->worksize);
return newplan;
}
}
void kill_complex_plan (complex_plan plan)
{
DEALLOC(plan->work);
DEALLOC(plan);
}
void complex_plan_forward (complex_plan plan, double *data)
{
if (plan->bluestein)
bluestein (plan->length, data, plan->work, -1);
else
cfftf (plan->length, data, plan->work);
}
void complex_plan_backward (complex_plan plan, double *data)
{
if (plan->bluestein)
bluestein (plan->length, data, plan->work, 1);
else
cfftb (plan->length, data, plan->work);
}
real_plan make_real_plan (size_t length)
{
real_plan plan = RALLOC(real_plan_i,1);
size_t pfsum = prime_factor_sum(length);
double comp1 = .5*length*pfsum;
double comp2 = 2*3*length*log(3.*length);
comp2*=3; /* fudge factor that appears to give good overall performance */
plan->length=length;
plan->bluestein = (comp2<comp1);
if (plan->bluestein)
bluestein_i (length,&(plan->work),&(plan->worksize));
else
{
plan->worksize=2*length+15;
plan->work=RALLOC(double,2*length+15);
rffti(length, plan->work);
}
return plan;
}
real_plan copy_real_plan (real_plan plan)
{
if (!plan) return NULL;
{
real_plan newplan = RALLOC(real_plan_i,1);
*newplan = *plan;
newplan->work=RALLOC(double,newplan->worksize);
memcpy(newplan->work,plan->work,sizeof(double)*newplan->worksize);
return newplan;
}
}
void kill_real_plan (real_plan plan)
{
DEALLOC(plan->work);
DEALLOC(plan);
}
void real_plan_forward_fftpack (real_plan plan, double *data)
{
if (plan->bluestein)
{
size_t m;
size_t n=plan->length;
double *tmp = RALLOC(double,2*n);
for (m=0; m<n; ++m)
{
tmp[2*m] = data[m];
tmp[2*m+1] = 0.;
}
bluestein(n,tmp,plan->work,-1);
data[0] = tmp[0];
memcpy (data+1, tmp+2, (n-1)*sizeof(double));
DEALLOC(tmp);
}
else
rfftf (plan->length, data, plan->work);
}
static void fftpack2halfcomplex (double *data, size_t n)
{
size_t m;
double *tmp = RALLOC(double,n);
tmp[0]=data[0];
for (m=1; m<(n+1)/2; ++m)
{
tmp[m]=data[2*m-1];
tmp[n-m]=data[2*m];
}
if (!(n&1))
tmp[n/2]=data[n-1];
memcpy (data,tmp,n*sizeof(double));
DEALLOC(tmp);
}
static void halfcomplex2fftpack (double *data, size_t n)
{
size_t m;
double *tmp = RALLOC(double,n);
tmp[0]=data[0];
for (m=1; m<(n+1)/2; ++m)
{
tmp[2*m-1]=data[m];
tmp[2*m]=data[n-m];
}
if (!(n&1))
tmp[n-1]=data[n/2];
memcpy (data,tmp,n*sizeof(double));
DEALLOC(tmp);
}
void real_plan_forward_fftw (real_plan plan, double *data)
{
real_plan_forward_fftpack (plan, data);
fftpack2halfcomplex (data,plan->length);
}
void real_plan_backward_fftpack (real_plan plan, double *data)
{
if (plan->bluestein)
{
size_t m;
size_t n=plan->length;
double *tmp = RALLOC(double,2*n);
tmp[0]=data[0];
tmp[1]=0.;
memcpy (tmp+2,data+1, (n-1)*sizeof(double));
if ((n&1)==0) tmp[n+1]=0.;
for (m=2; m<n; m+=2)
{
tmp[2*n-m]=tmp[m];
tmp[2*n-m+1]=-tmp[m+1];
}
bluestein (n, tmp, plan->work, 1);
for (m=0; m<n; ++m)
data[m] = tmp[2*m];
DEALLOC(tmp);
}
else
rfftb (plan->length, data, plan->work);
}
void real_plan_backward_fftw (real_plan plan, double *data)
{
halfcomplex2fftpack (data,plan->length);
real_plan_backward_fftpack (plan, data);
}
void real_plan_forward_c (real_plan plan, double *data)
{
size_t m;
size_t n=plan->length;
if (plan->bluestein)
{
for (m=1; m<2*n; m+=2)
data[m]=0;
bluestein (plan->length, data, plan->work, -1);
data[1]=0;
for (m=2; m<n; m+=2)
{
double avg;
avg = 0.5*(data[2*n-m]+data[m]);
data[2*n-m] = data[m] = avg;
avg = 0.5*(data[2*n-m+1]-data[m+1]);
data[2*n-m+1] = avg;
data[m+1] = -avg;
}
if ((n&1)==0) data[n+1] = 0.;
}
else
{
/* using "m+m" instead of "2*m" to avoid a nasty bug in Intel's compiler */
for (m=0; m<n; ++m) data[m+1] = data[m+m];
rfftf (n, data+1, plan->work);
data[0] = data[1];
data[1] = 0;
for (m=2; m<n; m+=2)
{
data[2*n-m] = data[m];
data[2*n-m+1] = -data[m+1];
}
if ((n&1)==0) data[n+1] = 0.;
}
}
void real_plan_backward_c (real_plan plan, double *data)
{
size_t n=plan->length;
if (plan->bluestein)
{
size_t m;
data[1]=0;
for (m=2; m<n; m+=2)
{
double avg;
avg = 0.5*(data[2*n-m]+data[m]);
data[2*n-m] = data[m] = avg;
avg = 0.5*(data[2*n-m+1]-data[m+1]);
data[2*n-m+1] = avg;
data[m+1] = -avg;
}
if ((n&1)==0) data[n+1] = 0.;
bluestein (plan->length, data, plan->work, 1);
for (m=1; m<2*n; m+=2)
data[m]=0;
}
else
{
ptrdiff_t m;
data[1] = data[0];
rfftb (n, data+1, plan->work);
for (m=n-1; m>=0; --m)
{
data[2*m] = data[m+1];
data[2*m+1] = 0.;
}
}
}

162
external/sharp/libfftpack/ls_fft.h vendored Normal file
View file

@ -0,0 +1,162 @@
/*
* This file is part of libfftpack.
*
* libfftpack is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libfftpack is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libfftpack; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file ls_fft.h
* Interface for the LevelS FFT package.
*
* Copyright (C) 2004 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_LS_FFT_H
#define PLANCK_LS_FFT_H
#include "c_utils.h"
#ifdef __cplusplus
extern "C" {
#endif
/*!\defgroup fftgroup FFT interface
This package is intended to calculate one-dimensional real or complex FFTs
with high accuracy and good efficiency even for lengths containing large
prime factors.
The code is written in C, but a Fortran wrapper exists as well.
Before any FFT is executed, a plan must be generated for it. Plan creation
is designed to be fast, so that there is no significant overhead if the
plan is only used once or a few times.
The main component of the code is based on Paul N. Swarztrauber's FFTPACK in the
double precision incarnation by Hugh C. Pumphrey
(http://www.netlib.org/fftpack/dp.tgz).
I replaced the iterative sine and cosine calculations in radfg() and radbg()
by an exact calculation, which slightly improves the transform accuracy for
real FFTs with lengths containing large prime factors.
Since FFTPACK becomes quite slow for FFT lengths with large prime factors
(in the worst case of prime lengths it reaches \f$\mathcal{O}(n^2)\f$
complexity), I implemented Bluestein's algorithm, which computes a FFT of length
\f$n\f$ by several FFTs of length \f$n_2\ge 2n-1\f$ and a convolution. Since
\f$n_2\f$ can be chosen to be highly composite, this algorithm is more efficient
if \f$n\f$ has large prime factors. The longer FFTs themselves are then computed
using the FFTPACK routines.
Bluestein's algorithm was implemented according to the description on Wikipedia
(<a href="http://en.wikipedia.org/wiki/Bluestein%27s_FFT_algorithm">
http://en.wikipedia.org/wiki/Bluestein%27s_FFT_algorithm</a>).
\b Thread-safety:
All routines can be called concurrently; all information needed by
<tt>ls_fft</tt> is stored in the plan variable. However, using the same plan
variable on multiple threads simultaneously is not supported and will lead to
data corruption.
*/
/*! \{ */
typedef struct
{
double *work;
size_t length, worksize;
int bluestein;
} complex_plan_i;
/*! The opaque handle type for complex-FFT plans. */
typedef complex_plan_i * complex_plan;
/*! Returns a plan for a complex FFT with \a length elements. */
complex_plan make_complex_plan (size_t length);
/*! Constructs a copy of \a plan. */
complex_plan copy_complex_plan (complex_plan plan);
/*! Destroys a plan for a complex FFT. */
void kill_complex_plan (complex_plan plan);
/*! Computes a complex forward FFT on \a data, using \a plan.
\a Data has the form <tt>r0, i0, r1, i1, ...,
r[length-1], i[length-1]</tt>. */
void complex_plan_forward (complex_plan plan, double *data);
/*! Computes a complex backward FFT on \a data, using \a plan.
\a Data has the form <tt>r0, i0, r1, i1, ...,
r[length-1], i[length-1]</tt>. */
void complex_plan_backward (complex_plan plan, double *data);
typedef struct
{
double *work;
size_t length, worksize;
int bluestein;
} real_plan_i;
/*! The opaque handle type for real-FFT plans. */
typedef real_plan_i * real_plan;
/*! Returns a plan for a real FFT with \a length elements. */
real_plan make_real_plan (size_t length);
/*! Constructs a copy of \a plan. */
real_plan copy_real_plan (real_plan plan);
/*! Destroys a plan for a real FFT. */
void kill_real_plan (real_plan plan);
/*! Computes a real forward FFT on \a data, using \a plan
and assuming the FFTPACK storage scheme:
- on entry, \a data has the form <tt>r0, r1, ..., r[length-1]</tt>;
- on exit, it has the form <tt>r0, r1, i1, r2, i2, ...</tt>
(a total of \a length values). */
void real_plan_forward_fftpack (real_plan plan, double *data);
/*! Computes a real forward FFT on \a data, using \a plan
and assuming the FFTPACK storage scheme:
- on entry, \a data has the form <tt>r0, r1, i1, r2, i2, ...</tt>
(a total of \a length values);
- on exit, it has the form <tt>r0, r1, ..., r[length-1]</tt>. */
void real_plan_backward_fftpack (real_plan plan, double *data);
/*! Computes a real forward FFT on \a data, using \a plan
and assuming the FFTW halfcomplex storage scheme:
- on entry, \a data has the form <tt>r0, r1, ..., r[length-1]</tt>;
- on exit, it has the form <tt>r0, r1, r2, ..., i2, i1</tt>. */
void real_plan_forward_fftw (real_plan plan, double *data);
/*! Computes a real backward FFT on \a data, using \a plan
and assuming the FFTW halfcomplex storage scheme:
- on entry, \a data has the form <tt>r0, r1, r2, ..., i2, i1</tt>.
- on exit, it has the form <tt>r0, r1, ..., r[length-1]</tt>. */
void real_plan_backward_fftw (real_plan plan, double *data);
/*! Computes a real forward FFT on \a data, using \a plan
and assuming a full-complex storage scheme:
- on entry, \a data has the form <tt>r0, [ignored], r1, [ignored], ...,
r[length-1], [ignored]</tt>;
- on exit, it has the form <tt>r0, i0, r1, i1, ...,
r[length-1], i[length-1]</tt>.
*/
void real_plan_forward_c (real_plan plan, double *data);
/*! Computes a real backward FFT on \a data, using \a plan
and assuming a full-complex storage scheme:
- on entry, \a data has the form <tt>r0, i0, r1, i1, ...,
r[length-1], i[length-1]</tt>;
- on exit, it has the form <tt>r0, 0, r1, 0, ..., r[length-1], 0</tt>. */
void real_plan_backward_c (real_plan plan, double *data);
/*! \} */
#ifdef __cplusplus
}
#endif
#endif

21
external/sharp/libfftpack/planck.make vendored Normal file
View file

@ -0,0 +1,21 @@
PKG:=libfftpack
SD:=$(SRCROOT)/$(PKG)
OD:=$(BLDROOT)/$(PKG)
FULL_INCLUDE+= -I$(SD)
HDR_$(PKG):=$(SD)/*.h
LIB_$(PKG):=$(LIBDIR)/libfftpack.a
OBJ:=fftpack.o bluestein.o ls_fft.o
OBJ:=$(OBJ:%=$(OD)/%)
ODEP:=$(HDR_$(PKG)) $(HDR_c_utils)
$(OD)/fftpack.o: $(SD)/fftpack_inc.c
$(OBJ): $(ODEP) | $(OD)_mkdir
$(LIB_$(PKG)): $(OBJ)
all_hdr+=$(HDR_$(PKG))
all_lib+=$(LIB_$(PKG))

94
external/sharp/libsharp/libsharp.dox vendored Normal file
View file

@ -0,0 +1,94 @@
/*! \mainpage libsharp documentation
<ul>
<li>\ref introduction "Introduction"
<li><a href="modules.html">Programming interface</a>
</ul>
*/
/*! \page introduction Introduction to libsharp
"SHARP" is an acronym for <i>Performant Spherical Harmonic Transforms</i>.
All user-visible data types and functions in this library start with
the prefix "sharp_", or with "sharps_" and "sharpd_" for single- and
double precision variants, respectively.
<i>libsharp</i>'s main functionality is the conversion between <i>maps</i>
on the sphere and <i>spherical harmonic coefficients</i> (or <i>a_lm</i>).
A map is defined as a set of <i>rings</i>, which in turn consist of
individual pixels that
<ul>
<li>all have the same colatitude and</li>
<li>are uniformly spaced in azimuthal direction.</li>
</ul>
Consequently, a ring is completely defined by
<ul>
<li>its colatitute (in radians)</li>
<li>the number of pixels it contains</li>
<li>the azimuth (in radians) of the first pixel in the ring</li>
<li>the weight that must be multiplied to every pixel during a map
analysis (typically the solid angle of a pixel in the ring) </li>
<li>the offset of the first ring pixel in the <i>map array</i></li>
<li>the stride between consecutive pixels in the ring.</li>
</ul>
The map array is a one-dimensional array of type <i>float</i> or
<i>double</i>, which contains the values of all map pixels. It is assumed
that the pixels of every ring are stored inside this array in order of
increasing azimuth and with the specified stride. Note however that the rings
themselves can be stored in any order inside the array.
The a_lm array is a one-dimensional array of type <i>complex float</i> or
<i>complex double</i>, which contains all spherical harmonic coefficients
for a full or partial set of m quantum numbers with 0<=m<=mmax and m<=l<=lmax.
There is only one constraint on the internal structure of the array, which is:
<code>Index[a_l+1,m] = Index[a_l,m] + stride</code>
That means that coefficients with identical <i>m</i> but different <i>l</i>
can be interpreted as a one-dimensional array in <i>l</i> with a unique
stride.
Several functions are provided for efficient index computation in this array;
they are documented \ref almgroup "here".
Information about a pixelisation of the sphere is stored in objects of
type sharp_geom_info. It is possible to create such an object for any
supported pixelisation by using the function sharp_make_geometry_info();
however, several easier-to-use functions are \ref geominfogroup "supplied"
for generating often-used pixelisations like ECP grids, Gaussian grids,
and Healpix grids.
Currently, SHARP supports the following kinds of transforms:
<ul>
<li>scalar a_lm to map</li>
<li>scalar map to a_lm</li>
<!-- <li>polarised a_lm to map</li>
<li>polarised map to a_lm</li> !-->
<li>spin a_lm to map</li>
<li>spin map to a_lm</li>
<li>scalar a_lm to maps of first derivatives</li>
</ul>
SHARP supports shared-memory parallelisation via OpenMP; this feature will
be automatically enabled if the compiler supports it.
SHARP will also make use of SSE2 and AVX instructions when compiled for a
platform known to support them.
Support for MPI-parallel transforms is also available; in this mode,
every MPI task must provide a unique subset of the map and a_lm coefficients.
The spherical harmonic transforms can be executed on double-precision and
single-precision maps and a_lm, but for accuracy reasons the computations
will always be performed in double precision. As a consequence,
single-precision transforms will most likely not be faster than their
double-precision counterparts, but they will require significantly less
memory.
Two example and benchmark programs are distributed with SHARP:
<ul>
<li>sharp_test.c checks the accuracy of the (iterative) map analysis
algorithm</li>
<li>sharp_bench.c determines the quickest transform strategy for a given
SHT</li>
</ul>
*/

29
external/sharp/libsharp/planck.make vendored Normal file
View file

@ -0,0 +1,29 @@
PKG:=libsharp
SD:=$(SRCROOT)/$(PKG)
OD:=$(BLDROOT)/$(PKG)
FULL_INCLUDE+= -I$(SD)
HDR_$(PKG):=$(SD)/*.h
LIB_$(PKG):=$(LIBDIR)/libsharp.a
BIN:=sharp_test sharp_acctest sharp_test_mpi sharp_bench sharp_bench2
LIBOBJ:=sharp_ylmgen_c.o sharp.o sharp_announce.o sharp_geomhelpers.o sharp_almhelpers.o sharp_core.o
ALLOBJ:=$(LIBOBJ) sharp_test.o sharp_acctest.o sharp_test_mpi.o sharp_bench.o sharp_bench2.o
LIBOBJ:=$(LIBOBJ:%=$(OD)/%)
ALLOBJ:=$(ALLOBJ:%=$(OD)/%)
ODEP:=$(HDR_$(PKG)) $(HDR_libfftpack) $(HDR_c_utils)
$(OD)/sharp_core.o: $(SD)/sharp_inchelper1.inc.c $(SD)/sharp_core_inc.c $(SD)/sharp_core_inc2.c $(SD)/sharp_core_inc3.c
$(OD)/sharp.o: $(SD)/sharp_mpi.c
BDEP:=$(LIB_$(PKG)) $(LIB_libfftpack) $(LIB_c_utils)
$(LIB_$(PKG)): $(LIBOBJ)
$(ALLOBJ): $(ODEP) | $(OD)_mkdir
BIN:=$(BIN:%=$(BINDIR)/%)
$(BIN): $(BINDIR)/% : $(OD)/%.o $(BDEP)
all_hdr+=$(HDR_$(PKG))
all_lib+=$(LIB_$(PKG))
all_cbin+=$(BIN)

669
external/sharp/libsharp/sharp.c vendored Normal file
View file

@ -0,0 +1,669 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp.c
* Spherical transform library
*
* Copyright (C) 2006-2012 Max-Planck-Society
* \author Martin Reinecke
*/
#include <math.h>
#include "ls_fft.h"
#include "sharp_ylmgen_c.h"
#include "sharp_internal.h"
#include "c_utils.h"
#include "sharp_core.h"
#include "sharp_vecutil.h"
#include "walltime_c.h"
#include "sharp_almhelpers.h"
#include "sharp_geomhelpers.h"
typedef complex double dcmplx;
typedef complex float fcmplx;
static void get_chunk_info (int ndata, int nmult, int *nchunks, int *chunksize)
{
static const int chunksize_min=500, nchunks_max=10;
*chunksize = IMAX(chunksize_min,(ndata+nchunks_max-1)/nchunks_max);
*chunksize = ((*chunksize+nmult-1)/nmult)*nmult;
*nchunks = (ndata+*chunksize-1) / *chunksize;
}
typedef struct
{
double s;
int i;
} idxhelper;
static int idx_compare (const void *xa, const void *xb)
{
const idxhelper *a=xa, *b=xb;
return (a->s > b->s) ? -1 : (a->s < b->s) ? 1 : 0;
}
typedef struct
{
double phi0_;
dcmplx *shiftarr, *work;
int s_shift, s_work;
real_plan plan;
int norot;
} ringhelper;
static void ringhelper_init (ringhelper *self)
{
static ringhelper rh_null = { 0, NULL, NULL, 0, 0, NULL, 0 };
*self = rh_null;
}
static void ringhelper_destroy (ringhelper *self)
{
if (self->plan) kill_real_plan(self->plan);
DEALLOC(self->shiftarr);
DEALLOC(self->work);
ringhelper_init(self);
}
static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
{
self->norot = (fabs(phi0)<1e-14);
if (!(self->norot))
if ((mmax!=self->s_shift-1) || (!FAPPROX(phi0,self->phi0_,1e-12)))
{
RESIZE (self->shiftarr,dcmplx,mmax+1);
self->s_shift = mmax+1;
self->phi0_ = phi0;
for (int m=0; m<=mmax; ++m)
self->shiftarr[m] = cos(m*phi0) + _Complex_I*sin(m*phi0);
}
if (!self->plan) self->plan=make_real_plan(nph);
if (nph!=(int)self->plan->length)
{
kill_real_plan(self->plan);
self->plan=make_real_plan(nph);
}
GROW(self->work,dcmplx,self->s_work,nph);
}
static int ringinfo_compare (const void *xa, const void *xb)
{
const sharp_ringinfo *a=xa, *b=xb;
return (a->sth < b->sth) ? -1 : (a->sth > b->sth) ? 1 : 0;
}
static int ringpair_compare (const void *xa, const void *xb)
{
const sharp_ringpair *a=xa, *b=xb;
if (a->r1.nph==b->r1.nph)
return (a->r1.phi0 < b->r1.phi0) ? -1 :
((a->r1.phi0 > b->r1.phi0) ? 1 :
(a->r1.cth>b->r1.cth ? -1 : 1));
return (a->r1.nph<b->r1.nph) ? -1 : 1;
}
void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval,
const ptrdiff_t *mstart, sharp_alm_info **alm_info)
{
sharp_alm_info *info = RALLOC(sharp_alm_info,1);
info->lmax = lmax;
info->nm = nm;
info->mval = RALLOC(int,nm);
info->mvstart = RALLOC(ptrdiff_t,nm);
info->stride = stride;
for (int mi=0; mi<nm; ++mi)
{
info->mval[mi] = mval[mi];
info->mvstart[mi] = mstart[mi];
}
*alm_info = info;
}
void sharp_make_alm_info (int lmax, int mmax, int stride,
const ptrdiff_t *mstart, sharp_alm_info **alm_info)
{
int *mval=RALLOC(int,mmax+1);
for (int i=0; i<=mmax; ++i)
mval[i]=i;
sharp_make_general_alm_info (lmax, mmax+1, stride, mval, mstart, alm_info);
DEALLOC(mval);
}
ptrdiff_t sharp_alm_index (const sharp_alm_info *self, int l, int mi)
{ return self->mvstart[mi]+self->stride*l; }
void sharp_destroy_alm_info (sharp_alm_info *info)
{
DEALLOC (info->mval);
DEALLOC (info->mvstart);
DEALLOC (info);
}
void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
const int *stride, const double *phi0, const double *theta,
const double *weight, sharp_geom_info **geom_info)
{
sharp_geom_info *info = RALLOC(sharp_geom_info,1);
sharp_ringinfo *infos = RALLOC(sharp_ringinfo,nrings);
int pos=0;
info->pair=RALLOC(sharp_ringpair,nrings);
info->npairs=0;
*geom_info = info;
for (int m=0; m<nrings; ++m)
{
infos[m].theta = theta[m];
infos[m].cth = cos(theta[m]);
infos[m].sth = sin(theta[m]);
infos[m].weight = weight[m];
infos[m].phi0 = phi0[m];
infos[m].ofs = ofs[m];
infos[m].stride = stride[m];
infos[m].nph = nph[m];
}
qsort(infos,nrings,sizeof(sharp_ringinfo),ringinfo_compare);
while (pos<nrings)
{
info->pair[info->npairs].r1=infos[pos];
if ((pos<nrings-1) && FAPPROX(infos[pos].cth,-infos[pos+1].cth,1e-12))
{
if (infos[pos].cth>0) // make sure northern ring is in r1
info->pair[info->npairs].r2=infos[pos+1];
else
{
info->pair[info->npairs].r1=infos[pos+1];
info->pair[info->npairs].r2=infos[pos];
}
++pos;
}
else
info->pair[info->npairs].r2.nph=-1;
++pos;
++info->npairs;
}
DEALLOC(infos);
qsort(info->pair,info->npairs,sizeof(sharp_ringpair),ringpair_compare);
}
void sharp_destroy_geom_info (sharp_geom_info *geom_info)
{
DEALLOC (geom_info->pair);
DEALLOC (geom_info);
}
static int sharp_get_mmax (int *mval, int nm)
{
int *mcheck=RALLOC(int,nm);
SET_ARRAY(mcheck,0,nm,0);
for (int i=0; i<nm; ++i)
{
int m_cur=mval[i];
UTIL_ASSERT((m_cur>=0) && (m_cur<nm), "m out of range");
UTIL_ASSERT(mcheck[m_cur]==0, "duplicate m value");
mcheck[m_cur]=1;
}
DEALLOC(mcheck);
return nm-1; // FIXME: this looks wrong
}
static void ringhelper_phase2ring (ringhelper *self,
const sharp_ringinfo *info, void *data, int mmax, const dcmplx *phase,
int pstride, sharp_fde fde)
{
int nph = info->nph;
int stride = info->stride;
ringhelper_update (self, nph, mmax, info->phi0);
self->work[0]=phase[0];
SET_ARRAY(self->work,1,nph,0.);
#if 0
if (self->norot)
for (int m=1; m<=mmax; ++m)
{
int idx1 = m%nph;
int idx2 = nph-1-((m-1)%nph);
self->work[idx1]+=phase[m*pstride];
self->work[idx2]+=conj(phase[m*pstride]);
}
else
for (int m=1; m<=mmax; ++m)
{
int idx1 = m%nph;
int idx2 = nph-1-((m-1)%nph);
dcmplx tmp = phase[m*pstride]*self->shiftarr[m];
self->work[idx1]+=tmp;
self->work[idx2]+=conj(tmp);
}
#else
int idx1=1, idx2=nph-1;
for (int m=1; m<=mmax; ++m)
{
dcmplx tmp = phase[m*pstride];
if(!self->norot) tmp*=self->shiftarr[m];
self->work[idx1]+=tmp;
self->work[idx2]+=conj(tmp);
if (++idx1>=nph) idx1=0;
if (--idx2<0) idx2=nph-1;
}
#endif
real_plan_backward_c (self->plan, (double *)(self->work));
if (fde==DOUBLE)
for (int m=0; m<nph; ++m)
((double *)data)[m*stride+info->ofs] += creal(self->work[m]);
else
for (int m=0; m<nph; ++m)
((float *)data)[m*stride+info->ofs] += (float)creal(self->work[m]);
}
static void ringhelper_ring2phase (ringhelper *self,
const sharp_ringinfo *info, const void *data, int mmax, dcmplx *phase,
int pstride, sharp_fde fde)
{
int nph = info->nph;
#if 1
int maxidx = mmax; /* Enable this for traditional Healpix compatibility */
#else
int maxidx = IMIN(nph-1,mmax);
#endif
ringhelper_update (self, nph, mmax, -info->phi0);
if (fde==DOUBLE)
for (int m=0; m<nph; ++m)
self->work[m] = ((double *)data)[info->ofs+m*info->stride]*info->weight;
else
for (int m=0; m<nph; ++m)
self->work[m] = ((float *)data)[info->ofs+m*info->stride]*info->weight;
real_plan_forward_c (self->plan, (double *)self->work);
if (self->norot)
for (int m=0; m<=maxidx; ++m)
phase[m*pstride] = self->work[m%nph];
else
for (int m=0; m<=maxidx; ++m)
phase[m*pstride]=self->work[m%nph]*self->shiftarr[m];
for (int m=maxidx+1;m<=mmax; ++m)
phase[m*pstride]=0.;
}
static void ringhelper_pair2phase (ringhelper *self, int mmax,
const sharp_ringpair *pair, const void *data, dcmplx *phase1, dcmplx *phase2,
int pstride, sharp_fde fde)
{
ringhelper_ring2phase (self, &(pair->r1), data, mmax, phase1, pstride, fde);
if (pair->r2.nph>0)
ringhelper_ring2phase (self, &(pair->r2), data, mmax, phase2, pstride, fde);
}
static void ringhelper_phase2pair (ringhelper *self, int mmax,
const dcmplx *phase1, const dcmplx *phase2, int pstride,
const sharp_ringpair *pair, void *data, sharp_fde fde)
{
ringhelper_phase2ring (self, &(pair->r1), data, mmax, phase1, pstride, fde);
if (pair->r2.nph>0)
ringhelper_phase2ring (self, &(pair->r2), data, mmax, phase2, pstride, fde);
}
static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
sharp_fde fde)
{
for (int j=0;j<ginfo->npairs;++j)
{
if (fde==DOUBLE)
{
for (int i=0;i<ginfo->pair[j].r1.nph;++i)
((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=value;
for (int i=0;i<ginfo->pair[j].r2.nph;++i)
((double *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=value;
}
else
{
for (int i=0;i<ginfo->pair[j].r1.nph;++i)
((float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
=(float)value;
for (int i=0;i<ginfo->pair[j].r2.nph;++i)
((float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
=(float)value;
}
}
}
static void fill_alm (const sharp_alm_info *ainfo, void *alm, dcmplx value,
sharp_fde fde)
{
if (fde==DOUBLE)
for (int mi=0;mi<ainfo->nm;++mi)
for (int l=ainfo->mval[mi];l<=ainfo->lmax;++l)
((dcmplx *)alm)[sharp_alm_index(ainfo,l,mi)] = value;
else
for (int mi=0;mi<ainfo->nm;++mi)
for (int l=ainfo->mval[mi];l<=ainfo->lmax;++l)
((fcmplx *)alm)[sharp_alm_index(ainfo,l,mi)] = (fcmplx)value;
}
static void init_output (sharp_job *job)
{
if (job->add_output) return;
if (job->type == SHARP_MAP2ALM)
for (int i=0; i<job->ntrans*job->nalm; ++i)
fill_alm (job->ainfo,job->alm[i],0.,job->fde);
else
for (int i=0; i<job->ntrans*job->nmaps; ++i)
fill_map (job->ginfo,job->map[i],0.,job->fde);
}
static void alloc_phase (sharp_job *job, int nm, int ntheta)
{ job->phase=RALLOC(dcmplx,2*job->ntrans*job->nmaps*nm*ntheta); }
static void dealloc_phase (sharp_job *job)
{ DEALLOC(job->phase); }
//FIXME: set phase to zero if not SHARP_MAP2ALM?
static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
{
if (job->type != SHARP_MAP2ALM) return;
int pstride = 2*job->ntrans*job->nmaps;
#pragma omp parallel
{
ringhelper helper;
ringhelper_init(&helper);
#pragma omp for schedule(dynamic,1)
for (int ith=llim; ith<ulim; ++ith)
{
int dim2 = pstride*(ith-llim)*(mmax+1);
for (int i=0; i<job->ntrans*job->nmaps; ++i)
ringhelper_pair2phase(&helper,mmax,&job->ginfo->pair[ith], job->map[i],
&job->phase[dim2+2*i], &job->phase[dim2+2*i+1], pstride, job->fde);
}
ringhelper_destroy(&helper);
} /* end of parallel region */
}
static void alloc_almtmp (sharp_job *job, int lmax)
{ job->almtmp=RALLOC(dcmplx,job->ntrans*job->nalm*(lmax+1)); }
static void dealloc_almtmp (sharp_job *job)
{ DEALLOC(job->almtmp); }
static void alm2almtmp (sharp_job *job, int lmax, int mi)
{
if (job->type!=SHARP_MAP2ALM)
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
{
ptrdiff_t aidx = sharp_alm_index(job->ainfo,l,mi);
double fct = job->norm_l[l];
for (int i=0; i<job->ntrans*job->nalm; ++i)
if (job->fde==DOUBLE)
job->almtmp[job->ntrans*job->nalm*l+i]
= ((dcmplx *)job->alm[i])[aidx]*fct;
else
job->almtmp[job->ntrans*job->nalm*l+i]
= ((fcmplx *)job->alm[i])[aidx]*fct;
}
else
SET_ARRAY(job->almtmp,job->ntrans*job->nalm*job->ainfo->mval[mi],
job->ntrans*job->nalm*(lmax+1),0.);
}
static void almtmp2alm (sharp_job *job, int lmax, int mi)
{
if (job->type != SHARP_MAP2ALM) return;
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l)
{
ptrdiff_t aidx = sharp_alm_index(job->ainfo,l,mi);
for (int i=0;i<job->ntrans*job->nalm;++i)
if (job->fde==DOUBLE)
((dcmplx *)job->alm[i])[aidx] +=
job->almtmp[job->ntrans*job->nalm*l+i]*job->norm_l[l];
else
((fcmplx *)job->alm[i])[aidx] +=
(fcmplx)(job->almtmp[job->ntrans*job->nalm*l+i]*job->norm_l[l]);
}
}
static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
{
if (job->type == SHARP_MAP2ALM) return;
int pstride = 2*job->ntrans*job->nmaps;
#pragma omp parallel
{
ringhelper helper;
ringhelper_init(&helper);
#pragma omp for schedule(dynamic,1)
for (int ith=llim; ith<ulim; ++ith)
{
int dim2 = pstride*(ith-llim)*(mmax+1);
for (int i=0; i<job->ntrans*job->nmaps; ++i)
ringhelper_phase2pair(&helper,mmax,&job->phase[dim2+2*i],
&job->phase[dim2+2*i+1],pstride,&job->ginfo->pair[ith],job->map[i],
job->fde);
}
ringhelper_destroy(&helper);
} /* end of parallel region */
}
static void sharp_execute_job (sharp_job *job)
{
double timer=wallTime();
job->opcnt=0;
int lmax = job->ainfo->lmax,
mmax=sharp_get_mmax(job->ainfo->mval, job->ainfo->nm);
job->norm_l = (job->type==SHARP_ALM2MAP_DERIV1) ?
sharp_Ylmgen_get_d1norm (lmax) :
sharp_Ylmgen_get_norm (lmax, job->spin);
/* clear output arrays if requested */
init_output (job);
int nchunks, chunksize;
get_chunk_info(job->ginfo->npairs,job->nv*VLEN,&nchunks,&chunksize);
alloc_phase (job,mmax+1,chunksize);
/* chunk loop */
for (int chunk=0; chunk<nchunks; ++chunk)
{
int llim=chunk*chunksize, ulim=IMIN(llim+chunksize,job->ginfo->npairs);
int *ispair = RALLOC(int,ulim-llim);
double *cth = RALLOC(double,ulim-llim), *sth = RALLOC(double,ulim-llim);
idxhelper *stmp = RALLOC(idxhelper,ulim-llim);
for (int i=0; i<ulim-llim; ++i)
{
ispair[i] = job->ginfo->pair[i+llim].r2.nph>0;
cth[i] = job->ginfo->pair[i+llim].r1.cth;
sth[i] = job->ginfo->pair[i+llim].r1.sth;
stmp[i].s=sth[i];
stmp[i].i=i;
}
qsort (stmp,ulim-llim,sizeof(idxhelper),idx_compare);
int *idx = RALLOC(int,ulim-llim);
for (int i=0; i<ulim-llim; ++i)
idx[i]=stmp[i].i;
DEALLOC(stmp);
/* map->phase where necessary */
map2phase (job, mmax, llim, ulim);
#pragma omp parallel
{
sharp_job ljob = *job;
ljob.opcnt=0;
sharp_Ylmgen_C generator;
sharp_Ylmgen_init (&generator,lmax,mmax,ljob.spin);
alloc_almtmp(&ljob,lmax);
#pragma omp for schedule(dynamic,1)
for (int mi=0; mi<job->ainfo->nm; ++mi)
{
/* alm->alm_tmp where necessary */
alm2almtmp (&ljob, lmax, mi);
inner_loop (&ljob, ispair, cth, sth, llim, ulim, &generator, mi, idx);
/* alm_tmp->alm where necessary */
almtmp2alm (&ljob, lmax, mi);
}
sharp_Ylmgen_destroy(&generator);
dealloc_almtmp(&ljob);
#pragma omp critical
job->opcnt+=ljob.opcnt;
} /* end of parallel region */
/* phase->map where necessary */
phase2map (job, mmax, llim, ulim);
DEALLOC(ispair);
DEALLOC(cth);
DEALLOC(sth);
DEALLOC(idx);
} /* end of chunk loop */
DEALLOC(job->norm_l);
dealloc_phase (job);
job->time=wallTime()-timer;
}
static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
int spin, int add_output, void *alm, void *map,
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
int dp, int nv)
{
UTIL_ASSERT((ntrans>0),"bad number of simultaneous transforms");
if (type==SHARP_ALM2MAP_DERIV1) spin=1;
UTIL_ASSERT((spin>=0)&&(spin<=30), "bad spin");
job->type = type;
job->spin = spin;
job->norm_l = NULL;
job->add_output = add_output;
job->nmaps = (type==SHARP_ALM2MAP_DERIV1) ? 2 : ((spin>0) ? 2 : 1);
job->nalm = (type==SHARP_ALM2MAP_DERIV1) ? 1 : ((spin>0) ? 2 : 1);
job->ginfo = geom_info;
job->ainfo = alm_info;
job->nv = (nv==0) ? sharp_nv_oracle (type, spin, ntrans) : nv;
job->time = 0.;
job->opcnt = 0;
job->ntrans = ntrans;
job->alm=alm;
job->map=map;
job->fde=dp ? DOUBLE : FLOAT;
}
void sharp_execute (sharp_jobtype type, int spin, int add_output, void *alm,
void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info,
int ntrans, int dp, int nv, double *time, unsigned long long *opcnt)
{
sharp_job job;
sharp_build_job_common (&job, type, spin, add_output, alm, map, geom_info,
alm_info, ntrans, dp, nv);
sharp_execute_job (&job);
if (time!=NULL) *time = job.time;
if (opcnt!=NULL) *opcnt = job.opcnt;
}
int sharp_get_nv_max (void)
{ return 6; }
static int sharp_oracle (sharp_jobtype type, int spin, int ntrans)
{
int lmax=127;
int mmax=(lmax+1)/2;
int nrings=(lmax+1)/4;
int ppring=1;
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
sharp_geom_info *tinfo;
sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo);
ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
int ncomp = ntrans*((spin==0) ? 1 : 2);
double **map;
ALLOC2D(map,double,ncomp,npix);
SET_ARRAY(map[0],0,npix*ncomp,0.);
sharp_alm_info *alms;
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
dcmplx **alm;
ALLOC2D(alm,dcmplx,ncomp,nalms);
SET_ARRAY(alm[0],0,nalms*ncomp,0.);
double time=1e30;
int nvbest=-1;
for (int nv=1; nv<=sharp_get_nv_max(); ++nv)
{
double time_acc=0.;
double jtime;
int ntries=0;
do
{
sharp_execute(type,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,1,nv,&jtime,
NULL);
if (jtime<time) { time=jtime; nvbest=nv; }
time_acc+=jtime;
++ntries;
}
while ((time_acc<0.02)&&(ntries<2));
}
DEALLOC2D(map);
DEALLOC2D(alm);
sharp_destroy_alm_info(alms);
sharp_destroy_geom_info(tinfo);
return nvbest;
}
int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans)
{
static const int maxtr = 6;
static int nv_opt[6][2][3] = {
{{0,0,0},{0,0,0}},
{{0,0,0},{0,0,0}},
{{0,0,0},{0,0,0}},
{{0,0,0},{0,0,0}},
{{0,0,0},{0,0,0}},
{{0,0,0},{0,0,0}} };
if (type==SHARP_ALM2MAP_DERIV1) spin=1;
UTIL_ASSERT((ntrans>0),"bad number of simultaneous transforms");
UTIL_ASSERT((spin>=0)&&(spin<=30), "bad spin");
ntrans=IMIN(ntrans,maxtr);
if (nv_opt[ntrans-1][spin!=0][type]==0)
nv_opt[ntrans-1][spin!=0][type]=sharp_oracle(type,spin,ntrans);
return nv_opt[ntrans-1][spin!=0][type];
}
#ifdef USE_MPI
#include "sharp_mpi.c"
#endif

43
external/sharp/libsharp/sharp.h vendored Normal file
View file

@ -0,0 +1,43 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp.h
* Interface for the spherical transform library.
*
* Copyright (C) 2006-2012 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_SHARP_H
#define PLANCK_SHARP_H
#ifdef __cplusplus
#error This header file cannot be included from C++, only from C
#endif
#include <complex.h>
#include "sharp_lowlevel.h"
#endif

267
external/sharp/libsharp/sharp_acctest.c vendored Normal file
View file

@ -0,0 +1,267 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_acctest.c
Systematic accuracy test for libsharp.
Copyright (C) 2006-2012 Max-Planck-Society
\author Martin Reinecke
*/
#include <stdio.h>
#include <string.h>
#ifdef USE_MPI
#include "mpi.h"
#endif
#include "sharp.h"
#include "sharp_geomhelpers.h"
#include "sharp_almhelpers.h"
#include "c_utils.h"
#include "sharp_announce.h"
#include "sharp_core.h"
typedef complex double dcmplx;
static double drand (double min, double max)
{ return min + (max-min)*rand()/(RAND_MAX+1.0); }
static void random_alm (dcmplx *alm, sharp_alm_info *helper, int spin)
{
for (int mi=0;mi<helper->nm; ++mi)
{
int m=helper->mval[mi];
for (int l=m;l<=helper->lmax; ++l)
{
if ((l<spin)&&(m<spin))
alm[sharp_alm_index(helper,l,mi)] = 0.;
else
{
double rv = drand(-1,1);
double iv = (m==0) ? 0 : drand(-1,1);
alm[sharp_alm_index(helper,l,mi)] = rv+_Complex_I*iv;
}
}
}
}
static void measure_errors (dcmplx **alm, dcmplx **alm2,
ptrdiff_t nalms, int ncomp)
{
for (int i=0; i<ncomp; ++i)
{
double sum=0, sum2=0, maxdiff=0;
for (ptrdiff_t m=0; m<nalms; ++m)
{
double x=creal(alm[i][m])-creal(alm2[i][m]),
y=cimag(alm[i][m])-cimag(alm2[i][m]);
sum+=x*x+y*y;
sum2+=creal(alm[i][m])*creal(alm[i][m])+cimag(alm[i][m])*cimag(alm[i][m]);
if (fabs(x)>maxdiff) maxdiff=fabs(x);
if (fabs(y)>maxdiff) maxdiff=fabs(y);
}
sum=sqrt(sum/nalms);
sum2=sqrt(sum2/nalms);
UTIL_ASSERT((maxdiff<1e-10)&&(sum/sum2<1e-10),"error");
}
}
static void check_sign_scale(void)
{
int lmax=50;
int mmax=lmax;
sharp_geom_info *tinfo;
int nrings=lmax+1;
int ppring=2*lmax+2;
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo);
/* flip theta to emulate the "old" Gaussian grid geometry */
for (int i=0; i<tinfo->npairs; ++i)
{
const double pi=3.141592653589793238462643383279502884197;
tinfo->pair[i].r1.cth=-tinfo->pair[i].r1.cth;
tinfo->pair[i].r2.cth=-tinfo->pair[i].r2.cth;
tinfo->pair[i].r1.theta=pi-tinfo->pair[i].r1.theta;
tinfo->pair[i].r2.theta=pi-tinfo->pair[i].r2.theta;
}
sharp_alm_info *alms;
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
for (int ntrans=1; ntrans<10; ++ntrans)
{
double **map;
ALLOC2D(map,double,2*ntrans,npix);
dcmplx **alm;
ALLOC2D(alm,dcmplx,2*ntrans,nalms);
for (int i=0; i<2*ntrans; ++i)
for (int j=0; j<nalms; ++j)
alm[i][j]=1.+_Complex_I;
sharp_execute(SHARP_ALM2MAP,0,0,&alm[0],&map[0],tinfo,alms,ntrans,1,0,NULL,
NULL);
for (int it=0; it<ntrans; ++it)
{
UTIL_ASSERT(FAPPROX(map[it][0 ], 3.588246976618616912e+00,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[it][npix/2], 4.042209792157496651e+01,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[it][npix-1],-1.234675107554816442e+01,1e-12),
"error");
}
sharp_execute(SHARP_ALM2MAP,1,0,&alm[0],&map[0],tinfo,alms,ntrans,1,0,NULL,
NULL);
for (int it=0; it<ntrans; ++it)
{
UTIL_ASSERT(FAPPROX(map[2*it ][0 ], 2.750897760535633285e+00,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[2*it ][npix/2], 3.137704477368562905e+01,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[2*it ][npix-1],-8.405730859837063917e+01,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[2*it+1][0 ],-2.398026536095463346e+00,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-4.961140548331700728e+01,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1],-1.412765834230440021e+01,1e-12),
"error");
}
sharp_execute(SHARP_ALM2MAP,2,0,&alm[0],&map[0],tinfo,alms,ntrans,1,0,NULL,
NULL);
for (int it=0; it<ntrans; ++it)
{
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-1.398186224727334448e+00,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[2*it ][npix/2],-2.456676000884031197e+01,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[2*it ][npix-1],-1.516249174408820863e+02,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[2*it+1][0 ],-3.173406200299964119e+00,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-5.831327404513146462e+01,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1],-1.863257892248353897e+01,1e-12),
"error");
}
sharp_execute(SHARP_ALM2MAP_DERIV1,1,0,&alm[0],&map[0],tinfo,alms,ntrans,1,
0,NULL,NULL);
for (int it=0; it<ntrans; ++it)
{
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-6.859393905369091105e-01,1e-11),
"error");
UTIL_ASSERT(FAPPROX(map[2*it ][npix/2],-2.103947835973212364e+02,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[2*it ][npix-1],-1.092463246472086439e+03,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[2*it+1][0 ],-1.411433220713928165e+02,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-1.146122859381925082e+03,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1], 7.821618677689795049e+02,1e-12),
"error");
}
DEALLOC2D(map);
DEALLOC2D(alm);
}
sharp_destroy_alm_info(alms);
sharp_destroy_geom_info(tinfo);
}
static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
ptrdiff_t mmax, ptrdiff_t npix, int spin, int ntrans, int nv)
{
ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
int ncomp = ntrans*((spin==0) ? 1 : 2);
double **map;
ALLOC2D(map,double,ncomp,npix);
sharp_alm_info *alms;
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
srand(4);
dcmplx **alm;
ALLOC2D(alm,dcmplx,ncomp,nalms);
for (int i=0; i<ncomp; ++i)
random_alm(alm[i],alms,spin);
dcmplx **alm2;
ALLOC2D(alm2,dcmplx,ncomp,nalms);
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,1,nv,
NULL,NULL);
sharp_execute(SHARP_MAP2ALM,spin,0,&alm2[0],&map[0],tinfo,alms,ntrans,1,nv,
NULL,NULL);
measure_errors(alm,alm2,nalms,ncomp);
DEALLOC2D(map);
DEALLOC2D(alm);
DEALLOC2D(alm2);
sharp_destroy_alm_info(alms);
}
int main(void)
{
#ifdef USE_MPI
MPI_Init(NULL,NULL);
#endif
sharp_module_startup("sharp_acctest",1,1,"",1);
int lmax=127;
printf("Checking signs and scales.\n");
check_sign_scale();
printf("Passed.\n\n");
printf("Testing map analysis accuracy.\n");
sharp_geom_info *tinfo;
int nrings=lmax+1;
int ppring=2*lmax+2;
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo);
for (int nv=1; nv<=6; ++nv)
for (int ntrans=1; ntrans<=6; ++ntrans)
{
check_accuracy(tinfo,lmax,lmax,npix,0,ntrans,nv);
check_accuracy(tinfo,lmax,lmax,npix,1,ntrans,nv);
check_accuracy(tinfo,lmax,lmax,npix,2,ntrans,nv);
check_accuracy(tinfo,lmax,lmax,npix,3,ntrans,nv);
check_accuracy(tinfo,lmax,lmax,npix,30,ntrans,nv);
}
sharp_destroy_geom_info(tinfo);
printf("Passed.\n\n");
#ifdef USE_MPI
MPI_Finalize();
#endif
return 0;
}

View file

@ -0,0 +1,68 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_almhelpers.c
* Spherical transform library
*
* Copyright (C) 2008-2011 Max-Planck-Society
* \author Martin Reinecke
*/
#include "sharp_almhelpers.h"
#include "c_utils.h"
void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
sharp_alm_info **alm_info)
{
sharp_alm_info *info = RALLOC(sharp_alm_info,1);
info->lmax = lmax;
info->nm = mmax+1;
info->mval = RALLOC(int,mmax+1);
info->mvstart = RALLOC(ptrdiff_t,mmax+1);
info->stride = stride;
int tval = 2*lmax+1;
for (ptrdiff_t m=0; m<=mmax; ++m)
{
info->mval[m] = m;
info->mvstart[m] = stride*((m*(tval-m))>>1);
}
*alm_info = info;
}
void sharp_make_rectangular_alm_info (int lmax, int mmax, int stride,
sharp_alm_info **alm_info)
{
sharp_alm_info *info = RALLOC(sharp_alm_info,1);
info->lmax = lmax;
info->nm = mmax+1;
info->mval = RALLOC(int,mmax+1);
info->mvstart = RALLOC(ptrdiff_t,mmax+1);
info->stride = stride;
for (ptrdiff_t m=0; m<=mmax; ++m)
{
info->mval[m] = m;
info->mvstart[m] = stride*m*(lmax+1);
}
*alm_info = info;
}

View file

@ -0,0 +1,57 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_almhelpers.h
* SHARP helper function for the creation of a_lm data structures
*
* Copyright (C) 2008-2011 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_SHARP_ALMHELPERS_H
#define PLANCK_SHARP_ALMHELPERS_H
#include "sharp_lowlevel.h"
#ifdef __cplusplus
extern "C" {
#endif
/*! Initialises an a_lm data structure according to the scheme used by
Healpix_cxx.
\ingroup almgroup */
void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
sharp_alm_info **alm_info);
/*! Initialises an a_lm data structure according to the scheme used by
Fortran Healpix
\ingroup almgroup */
void sharp_make_rectangular_alm_info (int lmax, int mmax, int stride,
sharp_alm_info **alm_info);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -0,0 +1,98 @@
/*
* This file is part of libc_utils.
*
* libc_utils is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libc_utils is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libc_utils; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_announce.c
* Banner for module startup
*
* Copyright (C) 2012 Max-Planck-Society
* \author Martin Reinecke
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#ifdef USE_MPI
#include <mpi.h>
#endif
#include "sharp_announce.h"
#include "sharp_vecutil.h"
static void OpenMP_status(void)
{
#ifndef _OPENMP
printf("OpenMP: not supported by this binary\n");
#else
int threads = omp_get_max_threads();
if (threads>1)
printf("OpenMP active: max. %d threads.\n",threads);
else
printf("OpenMP active, but running with 1 thread only.\n");
#endif
}
static void MPI_status(void)
{
#ifndef USE_MPI
printf("MPI: not supported by this binary\n");
#else
int tasks;
MPI_Comm_size(MPI_COMM_WORLD,&tasks);
if (tasks>1)
printf("MPI active with %d tasks.\n",tasks);
else
printf("MPI active, but running with 1 task only.\n");
#endif
}
static void vecmath_status(void)
{ printf("Supported vector length: %d\n",VLEN); }
void sharp_announce (const char *name)
{
size_t m, nlen=strlen(name);
printf("\n+-");
for (m=0; m<nlen; ++m) printf("-");
printf("-+\n");
printf("| %s |\n", name);
printf("+-");
for (m=0; m<nlen; ++m) printf("-");
printf("-+\n\n");
vecmath_status();
OpenMP_status();
MPI_status();
printf("\n");
}
void sharp_module_startup (const char *name, int argc, int argc_expected,
const char *argv_expected, int verbose)
{
if (verbose) sharp_announce (name);
if (argc==argc_expected) return;
if (verbose) fprintf(stderr, "Usage: %s %s\n", name, argv_expected);
exit(1);
}

View file

@ -0,0 +1,39 @@
/*
* This file is part of libc_utils.
*
* libc_utils is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libc_utils is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libc_utils; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_announce.h
* Banner for module startup
*
* Copyright (C) 2012 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef SHARP_ANNOUNCE_H
#define SHARP_ANNOUNCE_H
void sharp_announce (const char *name);
void sharp_module_startup (const char *name, int argc, int argc_expected,
const char *argv_expected, int verbose);
#endif

149
external/sharp/libsharp/sharp_bench.c vendored Normal file
View file

@ -0,0 +1,149 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_bench.c
Copyright (C) 2012 Max-Planck-Society
\author Martin Reinecke
*/
#include <stdio.h>
#include <string.h>
#ifdef USE_MPI
#include "mpi.h"
#endif
#include "sharp.h"
#include "sharp_geomhelpers.h"
#include "sharp_almhelpers.h"
#include "c_utils.h"
#include "sharp_announce.h"
#include "sharp_core.h"
typedef complex double dcmplx;
static void bench_sht (int spin, int nv, sharp_jobtype type,
int ntrans, double *time, unsigned long long *opcnt)
{
int lmax=2047;
int mmax=128;
int nrings=512;
int ppring=1024;
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
sharp_geom_info *tinfo;
sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo);
ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
int ncomp = ntrans*((spin==0) ? 1 : 2);
double **map;
ALLOC2D(map,double,ncomp,npix);
SET_ARRAY(map[0],0,npix*ncomp,0.);
sharp_alm_info *alms;
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
dcmplx **alm;
ALLOC2D(alm,dcmplx,ncomp,nalms);
SET_ARRAY(alm[0],0,nalms*ncomp,0.);
int nruns=0;
*time=1e30;
*opcnt=1000000000000000;
do
{
double jtime;
unsigned long long jopcnt;
sharp_execute(type,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,1,nv,&jtime,
&jopcnt);
if (jopcnt<*opcnt) *opcnt=jopcnt;
if (jtime<*time) *time=jtime;
}
while (++nruns < 4);
DEALLOC2D(map);
DEALLOC2D(alm);
sharp_destroy_alm_info(alms);
sharp_destroy_geom_info(tinfo);
}
int main(void)
{
#ifdef USE_MPI
MPI_Init(NULL,NULL);
#endif
sharp_module_startup("sharp_bench",1,1,"",1);
printf("Benchmarking SHTs.\n\n");
FILE *fp=fopen("sharp_oracle.inc","w");
UTIL_ASSERT(fp, "failed to open oracle file for writing");
fprintf(fp,"static const int maxtr = 6;\n");
fprintf(fp,"static const int nv_opt[6][2][3] = {\n");
const char *shtname[]={"map2alm","alm2map","a2mder1"};
for (int ntr=1; ntr<=6; ++ntr)
{
fprintf(fp,"{");
for (int spin=0; spin<=2; spin+=2)
{
fprintf(fp,"{");
for (sharp_jobtype type=SHARP_MAP2ALM; type<=SHARP_ALM2MAP_DERIV1; ++type)
{
if ((type==SHARP_ALM2MAP_DERIV1) && (spin==0))
fprintf(fp,"-1");
else
{
int nvbest=-1, nvoracle=sharp_nv_oracle(type,spin,ntr);
unsigned long long opmin=1000000000000000, op;
double tmin=1e30;
double *time=RALLOC(double,sharp_get_nv_max()+1);
for (int nv=1; nv<=sharp_get_nv_max(); ++nv)
{
bench_sht (spin,nv,type,ntr,&time[nv],&op);
if (op<opmin) opmin=op;
if (time[nv]<tmin)
{ tmin=time[nv]; nvbest=nv; }
}
printf("nt: %d %s spin: %d nv: %d time: %6.3f perf: %6.3f"
" dev[%d]: %6.2f%%\n",ntr,shtname[type],
spin,nvbest,tmin,opmin/tmin*1e-9,nvoracle,
(time[nvoracle]-tmin)/tmin*100.);
DEALLOC(time);
fprintf(fp,"%d",nvbest);
}
if (type!=SHARP_ALM2MAP_DERIV1) fprintf(fp,",");
}
fprintf(fp,(spin==0)?"},":"}");
printf("\n");
}
fprintf(fp,(ntr<6)?"},\n":"}\n");
}
fprintf(fp,"};\n");
fclose(fp);
#ifdef USE_MPI
MPI_Finalize();
#endif
return 0;
}

223
external/sharp/libsharp/sharp_bench2.c vendored Normal file
View file

@ -0,0 +1,223 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_bench2.c
Copyright (C) 2012 Max-Planck-Society
\author Martin Reinecke
*/
#include <stdio.h>
#if (defined(_OPENMP) && defined(USE_MPI))
#include <stdlib.h>
#include <string.h>
#include <omp.h>
#include <mpi.h>
#include "sharp_mpi.h"
#include "sharp.h"
#include "sharp_vecutil.h"
#include "sharp_geomhelpers.h"
#include "sharp_almhelpers.h"
#include "c_utils.h"
#include "sharp_announce.h"
#include "sharp_core.h"
#include "memusage.h"
typedef complex double dcmplx;
int ntasks, mytask;
static unsigned long long totalops (unsigned long long val)
{
unsigned long long tmp;
MPI_Allreduce (&val, &tmp,1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
return tmp;
}
static double maxTime (double val)
{
double tmp;
MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
return tmp;
}
static double totalMem (double val)
{
double tmp;
MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
return tmp;
}
static void reduce_alm_info(sharp_alm_info *ainfo)
{
int nmnew=0;
ptrdiff_t ofs = 0;
for (int i=mytask; i<ainfo->nm; i+=ntasks,++nmnew)
{
ainfo->mval[nmnew]=ainfo->mval[i];
ainfo->mvstart[nmnew]=ofs-ainfo->mval[nmnew];
ofs+=ainfo->lmax-ainfo->mval[nmnew]+1;
}
ainfo->nm=nmnew;
}
static void reduce_geom_info(sharp_geom_info *ginfo)
{
int npairsnew=0;
ptrdiff_t ofs = 0;
for (int i=mytask; i<ginfo->npairs; i+=ntasks,++npairsnew)
{
ginfo->pair[npairsnew]=ginfo->pair[i];
ginfo->pair[npairsnew].r1.ofs=ofs;
ofs+=ginfo->pair[npairsnew].r1.nph;
ginfo->pair[npairsnew].r2.ofs=ofs;
if (ginfo->pair[npairsnew].r2.nph>0) ofs+=ginfo->pair[npairsnew].r2.nph;
}
ginfo->npairs=npairsnew;
}
static ptrdiff_t get_nalms(const sharp_alm_info *ainfo)
{
ptrdiff_t res=0;
for (int i=0; i<ainfo->nm; ++i)
res += ainfo->lmax-ainfo->mval[i]+1;
return res;
}
static ptrdiff_t get_npix(const sharp_geom_info *ginfo)
{
ptrdiff_t res=0;
for (int i=0; i<ginfo->npairs; ++i)
{
res += ginfo->pair[i].r1.nph;
if (ginfo->pair[i].r2.nph>0) res += ginfo->pair[i].r2.nph;
}
return res;
}
int main(int argc, char **argv)
{
MPI_Init(NULL,NULL);
MPI_Comm_size(MPI_COMM_WORLD,&ntasks);
MPI_Comm_rank(MPI_COMM_WORLD,&mytask);
int master=(mytask==0);
sharp_module_startup("sharp_bench2",argc,7,
"<healpix|ecp|gauss> <lmax> <nside|nphi> <a2m/m2a> <spin> <ntrans>",0);
int lmax=atoi(argv[2]);
sharp_jobtype jtype = (strcmp(argv[4],"a2m")==0) ?
SHARP_ALM2MAP : SHARP_MAP2ALM;
int spin=atoi(argv[5]);
int ntrans=atoi(argv[6]);
sharp_geom_info *tinfo;
ptrdiff_t npix=0;
int geom2=0;
if (strcmp(argv[1],"gauss")==0)
{
int nrings=geom2=lmax+1;
int ppring=atoi(argv[3]);
sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo);
}
else if (strcmp(argv[1],"ecp")==0)
{
int nrings=geom2=2*lmax+2;
int ppring=atoi(argv[3]);
sharp_make_ecp_geom_info (nrings, ppring, 0., 1, ppring, &tinfo);
}
else if (strcmp(argv[1],"healpix")==0)
{
int nside=atoi(argv[3]);
if (nside<1) nside=1;
geom2=4*nside-1;
sharp_make_healpix_geom_info (nside, 1, &tinfo);
}
else
UTIL_FAIL("unknown grid geometry");
reduce_geom_info(tinfo);
npix=get_npix(tinfo);
int mmax=lmax;
int ncomp = ntrans*((spin==0) ? 1 : 2);
double **map;
ALLOC2D(map,double,ncomp,npix);
sharp_alm_info *alms;
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
reduce_alm_info(alms);
ptrdiff_t nalms=get_nalms(alms);
dcmplx **alm;
ALLOC2D(alm,dcmplx,ncomp,nalms);
for (int n=0; n<ncomp; ++n)
{
for (int i=0; i<npix; ++i) map[n][i]=1;
for (int i=0; i<nalms; ++i) alm[n][i]=1;
}
double time=1e20;
unsigned long long opcnt=0;
for (int ntries=0; (ntries<2)||(ntries*time<5); ++ntries)
{
double ltime;
unsigned long long lopcnt;
sharp_execute_mpi(MPI_COMM_WORLD,jtype,spin,0,&alm[0],&map[0],
tinfo,alms,ntrans,1,0,&ltime,&lopcnt);
ltime=maxTime(ltime);
if (ltime<time) { time=ltime; opcnt=totalops(lopcnt); }
}
DEALLOC2D(map);
DEALLOC2D(alm);
sharp_destroy_alm_info(alms);
sharp_destroy_geom_info(tinfo);
double mHWM=totalMem(VmHWM());
int nomp=omp_get_max_threads();
if (master)
printf("%-12s %-7s %-3s %2d %d %2d %3d %5d %5d %1d %.2e %7.2f %9.2f\n",
getenv("HOST"),argv[1],argv[4],spin,VLEN,nomp,ntasks,lmax,geom2,ntrans,
time,opcnt/(time*1e9),mHWM/(1<<20));
MPI_Finalize();
return 0;
}
#else
#include "c_utils.h"
int main(void)
{ UTIL_FAIL("Need OpenMP and MPI"); return 1; }
#endif

View file

@ -0,0 +1,135 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/* \file sharp_complex_hacks.h
* support for converting vector types and complex numbers
*
* Copyright (C) 2012 Max-Planck-Society
* Author: Martin Reinecke
*/
#ifndef SHARP_COMPLEX_HACKS_H
#define SHARP_COMPLEX_HACKS_H
#ifdef __cplusplus
#error This header file cannot be included from C++, only from C
#endif
#include <math.h>
#include <complex.h>
#include "sharp_vecsupport.h"
#define UNSAFE_CODE
#if (VLEN==1)
static inline complex double vhsum_cmplx(Tv a, Tv b)
{ return a+_Complex_I*b; }
static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d,
complex double * restrict c1, complex double * restrict c2)
{ *c1 += a+_Complex_I*b; *c2 += c+_Complex_I*d; }
#endif
#if (VLEN==2)
static inline complex double vhsum_cmplx (Tv a, Tv b)
{
#if defined(__SSE3__)
Tv tmp = _mm_hadd_pd(a,b);
#else
Tv tmp = vadd(_mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)),
_mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0)));
#endif
union {Tv v; complex double c; } u;
u.v=tmp; return u.c;
}
static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c,
Tv d, complex double * restrict c1, complex double * restrict c2)
{
#ifdef UNSAFE_CODE
#if defined(__SSE3__)
vaddeq(*((__m128d *)c1),_mm_hadd_pd(a,b));
vaddeq(*((__m128d *)c2),_mm_hadd_pd(c,d));
#else
vaddeq(*((__m128d *)c1),vadd(_mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)),
_mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0))));
vaddeq(*((__m128d *)c2),vadd(_mm_shuffle_pd(c,d,_MM_SHUFFLE2(0,1)),
_mm_shuffle_pd(c,d,_MM_SHUFFLE2(1,0))));
#endif
#else
union {Tv v; complex double c; } u1, u2;
#if defined(__SSE3__)
u1.v = _mm_hadd_pd(a,b); u2.v=_mm_hadd_pd(c,d);
#else
u1.v = vadd(_mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)),
_mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0)));
u2.v = vadd(_mm_shuffle_pd(c,d,_MM_SHUFFLE2(0,1)),
_mm_shuffle_pd(c,d,_MM_SHUFFLE2(1,0)));
#endif
*c1+=u1.c; *c2+=u2.c;
#endif
}
#endif
#if (VLEN==4)
static inline complex double vhsum_cmplx (Tv a, Tv b)
{
Tv tmp=_mm256_hadd_pd(a,b);
Tv tmp2=_mm256_permute2f128_pd(tmp,tmp,1);
tmp=_mm256_add_pd(tmp,tmp2);
#ifdef UNSAFE_CODE
complex double ret;
*((__m128d *)&ret)=_mm256_extractf128_pd(tmp, 0);
return ret;
#else
union {Tv v; complex double c[2]; } u;
u.v=tmp; return u.c[0];
#endif
}
static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d,
complex double * restrict c1, complex double * restrict c2)
{
Tv tmp1=_mm256_hadd_pd(a,b), tmp2=_mm256_hadd_pd(c,d);
Tv tmp3=_mm256_permute2f128_pd(tmp1,tmp2,49),
tmp4=_mm256_permute2f128_pd(tmp1,tmp2,32);
tmp1=vadd(tmp3,tmp4);
#ifdef UNSAFE_CODE
*((__m128d *)c1)=_mm_add_pd(*((__m128d *)c1),_mm256_extractf128_pd(tmp1, 0));
*((__m128d *)c2)=_mm_add_pd(*((__m128d *)c2),_mm256_extractf128_pd(tmp1, 1));
#else
union {Tv v; complex double c[2]; } u;
u.v=tmp1;
*c1+=u.c[0]; *c2+=u.c[1];
#endif
}
#endif
#endif

239
external/sharp/libsharp/sharp_core.c vendored Normal file
View file

@ -0,0 +1,239 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_core.c
* Computational core
*
* Copyright (C) 2012 Max-Planck-Society
* \author Martin Reinecke
*/
#include <complex.h>
#include <math.h>
#include <string.h>
#include "sharp_vecsupport.h"
#include "sharp_complex_hacks.h"
#include "sharp_ylmgen_c.h"
#include "sharp.h"
#include "sharp_core.h"
#include "c_utils.h"
typedef complex double dcmplx;
#define MAXJOB_SPECIAL 2
#define XCONCAT2(a,b) a##_##b
#define CONCAT2(a,b) XCONCAT2(a,b)
#define XCONCAT3(a,b,c) a##_##b##_##c
#define CONCAT3(a,b,c) XCONCAT3(a,b,c)
#define nvec 1
#include "sharp_inchelper1.inc.c"
#undef nvec
#define nvec 2
#include "sharp_inchelper1.inc.c"
#undef nvec
#define nvec 3
#include "sharp_inchelper1.inc.c"
#undef nvec
#define nvec 4
#include "sharp_inchelper1.inc.c"
#undef nvec
#define nvec 5
#include "sharp_inchelper1.inc.c"
#undef nvec
#define nvec 6
#include "sharp_inchelper1.inc.c"
#undef nvec
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
const int *idx)
{
int njobs=job->ntrans;
if (njobs<=MAXJOB_SPECIAL)
{
switch (njobs*16+job->nv)
{
#if (MAXJOB_SPECIAL>=1)
case 0x11:
CONCAT3(inner_loop,1,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x12:
CONCAT3(inner_loop,2,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x13:
CONCAT3(inner_loop,3,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x14:
CONCAT3(inner_loop,4,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x15:
CONCAT3(inner_loop,5,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x16:
CONCAT3(inner_loop,6,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
#endif
#if (MAXJOB_SPECIAL>=2)
case 0x21:
CONCAT3(inner_loop,1,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x22:
CONCAT3(inner_loop,2,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x23:
CONCAT3(inner_loop,3,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x24:
CONCAT3(inner_loop,4,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x25:
CONCAT3(inner_loop,5,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x26:
CONCAT3(inner_loop,6,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
#endif
#if (MAXJOB_SPECIAL>=3)
case 0x31:
CONCAT3(inner_loop,1,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x32:
CONCAT3(inner_loop,2,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x33:
CONCAT3(inner_loop,3,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x34:
CONCAT3(inner_loop,4,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x35:
CONCAT3(inner_loop,5,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x36:
CONCAT3(inner_loop,6,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
#endif
#if (MAXJOB_SPECIAL>=4)
case 0x41:
CONCAT3(inner_loop,1,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x42:
CONCAT3(inner_loop,2,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x43:
CONCAT3(inner_loop,3,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x44:
CONCAT3(inner_loop,4,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x45:
CONCAT3(inner_loop,5,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x46:
CONCAT3(inner_loop,6,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
#endif
#if (MAXJOB_SPECIAL>=5)
case 0x51:
CONCAT3(inner_loop,1,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x52:
CONCAT3(inner_loop,2,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x53:
CONCAT3(inner_loop,3,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x54:
CONCAT3(inner_loop,4,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x55:
CONCAT3(inner_loop,5,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x56:
CONCAT3(inner_loop,6,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
#endif
#if (MAXJOB_SPECIAL>=6)
case 0x61:
CONCAT3(inner_loop,1,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x62:
CONCAT3(inner_loop,2,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x63:
CONCAT3(inner_loop,3,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x64:
CONCAT3(inner_loop,4,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x65:
CONCAT3(inner_loop,5,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
case 0x66:
CONCAT3(inner_loop,6,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
return;
#endif
}
}
#if (MAXJOB_SPECIAL<6)
else
{
switch (job->nv)
{
case 1:
CONCAT2(inner_loop,1)
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
return;
case 2:
CONCAT2(inner_loop,2)
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
return;
case 3:
CONCAT2(inner_loop,3)
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
return;
case 4:
CONCAT2(inner_loop,4)
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
return;
case 5:
CONCAT2(inner_loop,5)
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
return;
case 6:
CONCAT2(inner_loop,6)
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
return;
}
}
#endif
UTIL_FAIL("Incorrect vector parameters");
}

50
external/sharp/libsharp/sharp_core.h vendored Normal file
View file

@ -0,0 +1,50 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_core.h
* Interface for the computational core
*
* Copyright (C) 2012 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_SHARP_CORE_H
#define PLANCK_SHARP_CORE_H
#include "sharp_internal.h"
#include "sharp_ylmgen_c.h"
#ifdef __cplusplus
extern "C" {
#endif
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
const int *idx);
#ifdef __cplusplus
}
#endif
#endif

288
external/sharp/libsharp/sharp_core_inc.c vendored Normal file
View file

@ -0,0 +1,288 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_core_inc.c
* Type-dependent code for the computational core
*
* Copyright (C) 2012 Max-Planck-Society
* \author Martin Reinecke
*/
typedef struct
{ Tv v[nvec]; } Tb;
typedef union
{ Tb b; double s[VLEN*nvec]; } Y(Tbu);
typedef struct
{ Tb r, i; } Y(Tbri);
typedef struct
{ Tb qr, qi, ur, ui; } Y(Tbqu);
typedef struct
{ double r[VLEN*nvec], i[VLEN*nvec]; } Y(Tsri);
typedef struct
{ double qr[VLEN*nvec],qi[VLEN*nvec],ur[VLEN*nvec],ui[VLEN*nvec]; } Y(Tsqu);
typedef union
{ Y(Tbri) b; Y(Tsri)s; } Y(Tburi);
typedef union
{ Y(Tbqu) b; Y(Tsqu)s; } Y(Tbuqu);
static inline Tb Y(Tbconst)(double val)
{
Tv v=vload(val);
Tb res;
for (int i=0; i<nvec; ++i) res.v[i]=v;
return res;
}
static inline void Y(Tbmuleq1)(Tb * restrict a, double b)
{ Tv v=vload(b); for (int i=0; i<nvec; ++i) vmuleq(a->v[i],v); }
static inline Tb Y(Tbprod)(Tb a, Tb b)
{ Tb r; for (int i=0; i<nvec; ++i) r.v[i]=vmul(a.v[i],b.v[i]); return r; }
static inline void Y(Tbmuleq)(Tb * restrict a, Tb b)
{ for (int i=0; i<nvec; ++i) vmuleq(a->v[i],b.v[i]); }
static inline void Y(Tbnormalize) (Tb * restrict val, Tb * restrict scale,
double maxval)
{
const Tv vfsmall=vload(sharp_fsmall), vfbig=vload(sharp_fbig);
const Tv vfmin=vload(sharp_fsmall*maxval), vfmax=vload(maxval);
for (int i=0;i<nvec; ++i)
{
Tv mask = vgt(vabs(val->v[i]),vfmax);
while (vanyTrue(mask))
{
vmuleq(val->v[i],vblend(mask,vfsmall,vone));
vaddeq(scale->v[i],vblend(mask,vone,vzero));
mask = vgt(vabs(val->v[i]),vfmax);
}
mask = vand(vlt(vabs(val->v[i]),vfmin),vne(val->v[i],vzero));
while (vanyTrue(mask))
{
vmuleq(val->v[i],vblend(mask,vfbig,vone));
vsubeq(scale->v[i],vblend(mask,vone,vzero));
mask = vand(vlt(vabs(val->v[i]),vfmin),vne(val->v[i],vzero));
}
}
}
static inline void Y(mypow) (Tb val, int npow, Tb * restrict resd,
Tb * restrict ress)
{
Tb scale=Y(Tbconst)(0.), scaleint=Y(Tbconst)(0.), res=Y(Tbconst)(1.);
Y(Tbnormalize)(&val,&scaleint,sharp_fbighalf);
do
{
if (npow&1)
{
for (int i=0; i<nvec; ++i)
{
vmuleq(res.v[i],val.v[i]);
vaddeq(scale.v[i],scaleint.v[i]);
}
Y(Tbnormalize)(&res,&scale,sharp_fbighalf);
}
for (int i=0; i<nvec; ++i)
{
vmuleq(val.v[i],val.v[i]);
vaddeq(scaleint.v[i],scaleint.v[i]);
}
Y(Tbnormalize)(&val,&scaleint,sharp_fbighalf);
}
while(npow>>=1);
*resd=res;
*ress=scale;
}
static inline int Y(rescale) (Tb * restrict lam1, Tb * restrict lam2,
Tb * restrict scale)
{
int did_scale=0;
for (int i=0;i<nvec; ++i)
{
Tv mask = vgt(vabs(lam2->v[i]),vload(sharp_ftol));
if (vanyTrue(mask))
{
did_scale=1;
Tv fact = vblend(mask,vload(sharp_fsmall),vone);
vmuleq(lam1->v[i],fact); vmuleq(lam2->v[i],fact);
vaddeq(scale->v[i],vblend(mask,vone,vzero));
}
}
return did_scale;
}
static inline int Y(TballLt)(Tb a,double b)
{
Tv vb=vload(b);
Tv res=vlt(a.v[0],vb);
for (int i=1; i<nvec; ++i)
res=vand(res,vlt(a.v[i],vb));
return vallTrue(res);
}
static inline int Y(TballGt)(Tb a,double b)
{
Tv vb=vload(b);
Tv res=vgt(a.v[0],vb);
for (int i=1; i<nvec; ++i)
res=vand(res,vgt(a.v[i],vb));
return vallTrue(res);
}
static inline int Y(TballGe)(Tb a,double b)
{
Tv vb=vload(b);
Tv res=vge(a.v[0],vb);
for (int i=1; i<nvec; ++i)
res=vand(res,vge(a.v[i],vb));
return vallTrue(res);
}
static inline void Y(getCorfac)(Tb scale, Tb * restrict corfac,
const double * restrict cf)
{
Y(Tbu) sc, corf;
sc.b=scale;
for (int i=0; i<VLEN*nvec; ++i)
corf.s[i] = (sc.s[i]<sharp_minscale) ?
0. : cf[(int)(sc.s[i])-sharp_minscale];
*corfac=corf.b;
}
static void Y(iter_to_ieee) (const Tb sth, Tb cth, int *l_,
Tb * restrict lam_1_, Tb * restrict lam_2_, Tb * restrict scale_,
const sharp_Ylmgen_C * restrict gen)
{
int l=gen->m;
Tb lam_1=Y(Tbconst)(0.), lam_2, scale;
Y(mypow) (sth,l,&lam_2,&scale);
Y(Tbmuleq1) (&lam_2,(gen->m&1) ? -gen->mfac[gen->m]:gen->mfac[gen->m]);
Y(Tbnormalize)(&lam_2,&scale,sharp_ftol);
int below_limit = Y(TballLt)(scale,sharp_limscale);
while (below_limit)
{
if (l+2>gen->lmax) {*l_=gen->lmax+1;return;}
Tv r0=vload(gen->rf[l].f[0]),r1=vload(gen->rf[l].f[1]);
for (int i=0; i<nvec; ++i)
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
r0=vload(gen->rf[l+1].f[0]); r1=vload(gen->rf[l+1].f[1]);
for (int i=0; i<nvec; ++i)
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
if (Y(rescale)(&lam_1,&lam_2,&scale))
below_limit = Y(TballLt)(scale,sharp_limscale);
l+=2;
}
*l_=l; *lam_1_=lam_1; *lam_2_=lam_2; *scale_=scale;
}
static inline void Y(rec_step) (Tb * restrict rxp, Tb * restrict rxm,
Tb * restrict ryp, Tb * restrict rym, const Tb cth,
const sharp_ylmgen_dbl3 fx)
{
Tv fx0=vload(fx.f[0]),fx1=vload(fx.f[1]),fx2=vload(fx.f[2]);
for (int i=0; i<nvec; ++i)
{
rxp->v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,ryp->v[i])),
vmul(fx2,rxp->v[i]));
rxm->v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rym->v[i])),
vmul(fx2,rxm->v[i]));
}
}
static void Y(iter_to_ieee_spin) (const Tb cth, int *l_,
Tb * rec1p_, Tb * rec1m_, Tb * rec2p_, Tb * rec2m_,
Tb * scalep_, Tb * scalem_, const sharp_Ylmgen_C * restrict gen)
{
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
Tb cth2, sth2;
for (int i=0; i<nvec; ++i)
{
cth2.v[i]=vsqrt(vmul(vadd(vone,cth.v[i]),vload(0.5)));
cth2.v[i]=vmax(cth2.v[i],vload(1e-15));
sth2.v[i]=vsqrt(vmul(vsub(vone,cth.v[i]),vload(0.5)));
sth2.v[i]=vmax(sth2.v[i],vload(1e-15));
}
Tb ccp, ccps, ssp, ssps, csp, csps, scp, scps;
Y(mypow)(cth2,gen->cosPow,&ccp,&ccps); Y(mypow)(sth2,gen->sinPow,&ssp,&ssps);
Y(mypow)(cth2,gen->sinPow,&csp,&csps); Y(mypow)(sth2,gen->cosPow,&scp,&scps);
Tb rec2p, rec2m, scalep, scalem;
Tb rec1p=Y(Tbconst)(0.), rec1m=Y(Tbconst)(0.);
Tv prefac=vload(gen->prefac[gen->m]),
prescale=vload(gen->fscale[gen->m]);
for (int i=0; i<nvec; ++i)
{
rec2p.v[i]=vmul(prefac,ccp.v[i]);
scalep.v[i]=vadd(prescale,ccps.v[i]);
rec2m.v[i]=vmul(prefac,csp.v[i]);
scalem.v[i]=vadd(prescale,csps.v[i]);
}
Y(Tbnormalize)(&rec2m,&scalem,sharp_fbighalf);
Y(Tbnormalize)(&rec2p,&scalep,sharp_fbighalf);
for (int i=0; i<nvec; ++i)
{
rec2p.v[i]=vmul(rec2p.v[i],ssp.v[i]);
scalep.v[i]=vadd(scalep.v[i],ssps.v[i]);
rec2m.v[i]=vmul(rec2m.v[i],scp.v[i]);
scalem.v[i]=vadd(scalem.v[i],scps.v[i]);
if (gen->preMinus_p)
rec2p.v[i]=vneg(rec2p.v[i]);
if (gen->preMinus_m)
rec2m.v[i]=vneg(rec2m.v[i]);
if (gen->s&1)
rec2p.v[i]=vneg(rec2p.v[i]);
}
Y(Tbnormalize)(&rec2m,&scalem,sharp_ftol);
Y(Tbnormalize)(&rec2p,&scalep,sharp_ftol);
int l=gen->mhi;
int below_limit = Y(TballLt)(scalep,sharp_limscale)
&& Y(TballLt)(scalem,sharp_limscale);
while (below_limit)
{
if (l+2>gen->lmax) {*l_=gen->lmax+1;return;}
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l+1]);
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l+2]);
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
below_limit = Y(TballLt)(scalep,sharp_limscale)
&& Y(TballLt)(scalem,sharp_limscale);
l+=2;
}
*l_=l;
*rec1p_=rec1p; *rec2p_=rec2p; *scalep_=scalep;
*rec1m_=rec1m; *rec2m_=rec2m; *scalem_=scalem;
}

View file

@ -0,0 +1,810 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_core_inc2.c
* Type-dependent code for the computational core
*
* Copyright (C) 2012 Max-Planck-Society
* \author Martin Reinecke
*/
typedef struct
{ Y(Tbri) j[njobs]; } Z(Tbrij);
typedef union
{ Z(Tbrij) b; Y(Tsri) j[njobs]; } Z(Tburij);
typedef struct
{ Y(Tbqu) j[njobs]; } Z(Tbquj);
typedef union
{ Z(Tbquj) b; Y(Tsqu) j[njobs]; } Z(Tbuquj);
static void Z(alm2map_kernel) (const Tb cth, Z(Tbrij) * restrict p1,
Z(Tbrij) * restrict p2, Tb lam_1, Tb lam_2,
const sharp_ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm,
int l, int lmax)
{
#if (njobs>1)
while (l<lmax-2)
{
Tb lam_3, lam_4;
Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
for (int i=0; i<nvec; ++i)
lam_3.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
for (int i=0; i<nvec; ++i)
lam_4.v[i] = vsub(vmul(vmul(cth.v[i],lam_3.v[i]),r0),vmul(lam_2.v[i],r1));
r0=vload(rf[l+2].f[0]);r1=vload(rf[l+2].f[1]);
for (int i=0; i<nvec; ++i)
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_4.v[i]),r0),vmul(lam_3.v[i],r1));
for (int j=0; j<njobs; ++j)
{
Tv ar2=vload(creal(alm[njobs*l+j])),
ai2=vload(cimag(alm[njobs*l+j])),
ar4=vload(creal(alm[njobs*(l+2)+j])),
ai4=vload(cimag(alm[njobs*(l+2)+j]));
for (int i=0; i<nvec; ++i)
{
vfmaaeq(p1->j[j].r.v[i],lam_2.v[i],ar2,lam_4.v[i],ar4);
vfmaaeq(p1->j[j].i.v[i],lam_2.v[i],ai2,lam_4.v[i],ai4);
}
Tv ar3=vload(creal(alm[njobs*(l+1)+j])),
ai3=vload(cimag(alm[njobs*(l+1)+j])),
ar1=vload(creal(alm[njobs*(l+3)+j])),
ai1=vload(cimag(alm[njobs*(l+3)+j]));
for (int i=0; i<nvec; ++i)
{
vfmaaeq(p2->j[j].r.v[i],lam_3.v[i],ar3,lam_1.v[i],ar1);
vfmaaeq(p2->j[j].i.v[i],lam_3.v[i],ai3,lam_1.v[i],ai1);
}
}
r0=vload(rf[l+3].f[0]);r1=vload(rf[l+3].f[1]);
for (int i=0; i<nvec; ++i)
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_4.v[i],r1));
l+=4;
}
#endif
while (l<lmax)
{
Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
for (int i=0; i<nvec; ++i)
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
for (int j=0; j<njobs; ++j)
{
Tv ar=vload(creal(alm[njobs*l+j])),
ai=vload(cimag(alm[njobs*l+j]));
for (int i=0; i<nvec; ++i)
{
vfmaeq(p1->j[j].r.v[i],lam_2.v[i],ar);
vfmaeq(p1->j[j].i.v[i],lam_2.v[i],ai);
}
ar=vload(creal(alm[njobs*(l+1)+j]));
ai=vload(cimag(alm[njobs*(l+1)+j]));
for (int i=0; i<nvec; ++i)
{
vfmaeq(p2->j[j].r.v[i],lam_1.v[i],ar);
vfmaeq(p2->j[j].i.v[i],lam_1.v[i],ai);
}
}
r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
for (int i=0; i<nvec; ++i)
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
l+=2;
}
if (l==lmax)
{
for (int j=0; j<njobs; ++j)
{
Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
for (int i=0; i<nvec; ++i)
{
vfmaeq(p1->j[j].r.v[i],lam_2.v[i],ar);
vfmaeq(p1->j[j].i.v[i],lam_2.v[i],ai);
}
}
}
}
static void Z(map2alm_kernel) (const Tb cth, const Z(Tbrij) * restrict p1,
const Z(Tbrij) * restrict p2, Tb lam_1, Tb lam_2,
const sharp_ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax)
{
while (l<lmax)
{
Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
for (int i=0; i<nvec; ++i)
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
for (int j=0; j<njobs; ++j)
{
Tv tr1=vzero, ti1=vzero, tr2=vzero, ti2=vzero;
for (int i=0; i<nvec; ++i)
{
vfmaeq(tr1,lam_2.v[i],p1->j[j].r.v[i]);
vfmaeq(ti1,lam_2.v[i],p1->j[j].i.v[i]);
}
for (int i=0; i<nvec; ++i)
{
vfmaeq(tr2,lam_1.v[i],p2->j[j].r.v[i]);
vfmaeq(ti2,lam_1.v[i],p2->j[j].i.v[i]);
}
vhsum_cmplx2(tr1,ti1,tr2,ti2,&alm[l*njobs+j],&alm[(l+1)*njobs+j]);
}
r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
for (int i=0; i<nvec; ++i)
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
l+=2;
}
if (l==lmax)
{
for (int j=0; j<njobs; ++j)
{
Tv tre=vzero, tim=vzero;
for (int i=0; i<nvec; ++i)
{
vfmaeq(tre,lam_2.v[i],p1->j[j].r.v[i]);
vfmaeq(tim,lam_2.v[i],p1->j[j].i.v[i]);
}
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
}
}
}
static void Z(calc_alm2map) (const Tb cth, const Tb sth,
const sharp_Ylmgen_C *gen, sharp_job *job, Z(Tbrij) * restrict p1,
Z(Tbrij) * restrict p2, int *done)
{
int l,lmax=gen->lmax;
Tb lam_1,lam_2,scale;
Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
job->opcnt += (l-gen->m) * 4*VLEN*nvec;
if (l>lmax) { *done=1; return; }
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
Tb corfac;
Y(getCorfac)(scale,&corfac,gen->cf);
const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
const dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGe)(scale,sharp_minscale);
while (!full_ieee)
{
for (int j=0; j<njobs; ++j)
{
Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
for (int i=0; i<nvec; ++i)
{
Tv tmp=vmul(lam_2.v[i],corfac.v[i]);
vfmaeq(p1->j[j].r.v[i],tmp,ar);
vfmaeq(p1->j[j].i.v[i],tmp,ai);
}
}
if (++l>lmax) break;
Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]);
for (int i=0; i<nvec; ++i)
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
for (int j=0; j<njobs; ++j)
{
Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
for (int i=0; i<nvec; ++i)
{
Tv tmp=vmul(lam_1.v[i],corfac.v[i]);
vfmaeq(p2->j[j].r.v[i],tmp,ar);
vfmaeq(p2->j[j].i.v[i],tmp,ai);
}
}
if (++l>lmax) break;
r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]);
for (int i=0; i<nvec; ++i)
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
if (Y(rescale)(&lam_1,&lam_2,&scale))
{
Y(getCorfac)(scale,&corfac,gen->cf);
full_ieee = Y(TballGe)(scale,sharp_minscale);
}
}
if (l>lmax) { *done=1; return; }
Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac);
Z(alm2map_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax);
}
static void Z(calc_map2alm) (const Tb cth, const Tb sth,
const sharp_Ylmgen_C *gen, sharp_job *job, const Z(Tbrij) * restrict p1,
const Z(Tbrij) * restrict p2, int *done)
{
int lmax=gen->lmax;
Tb lam_1,lam_2,scale;
int l=gen->m;
Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
job->opcnt += (l-gen->m) * 4*VLEN*nvec;
if (l>lmax) { *done=1; return; }
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
Tb corfac;
Y(getCorfac)(scale,&corfac,gen->cf);
dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGe)(scale,sharp_minscale);
while (!full_ieee)
{
for (int j=0; j<njobs; ++j)
{
Tv tre=vzero, tim=vzero;
for (int i=0; i<nvec; ++i)
{
Tv tmp=vmul(lam_2.v[i],corfac.v[i]);
vfmaeq(tre,tmp,p1->j[j].r.v[i]);
vfmaeq(tim,tmp,p1->j[j].i.v[i]);
}
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
}
if (++l>lmax) { *done=1; return; }
Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]);
for (int i=0; i<nvec; ++i)
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
for (int j=0; j<njobs; ++j)
{
Tv tre=vzero, tim=vzero;
for (int i=0; i<nvec; ++i)
{
Tv tmp=vmul(lam_1.v[i],corfac.v[i]);
vfmaeq(tre,tmp,p2->j[j].r.v[i]);
vfmaeq(tim,tmp,p2->j[j].i.v[i]);
}
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
}
if (++l>lmax) { *done=1; return; }
r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]);
for (int i=0; i<nvec; ++i)
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
if (Y(rescale)(&lam_1,&lam_2,&scale))
{
Y(getCorfac)(scale,&corfac,gen->cf);
full_ieee = Y(TballGe)(scale,sharp_minscale);
}
}
Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac);
Z(map2alm_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax);
}
static inline void Z(saddstep) (Z(Tbquj) * restrict px, Z(Tbquj) * restrict py,
const Tb rxp, const Tb rxm, const dcmplx * restrict alm)
{
for (int j=0; j<njobs; ++j)
{
Tv agr=vload(creal(alm[2*j])), agi=vload(cimag(alm[2*j])),
acr=vload(creal(alm[2*j+1])), aci=vload(cimag(alm[2*j+1]));
for (int i=0; i<nvec; ++i)
{
Tv lw=vadd(rxp.v[i],rxm.v[i]);
vfmaeq(px->j[j].qr.v[i],agr,lw);
vfmaeq(px->j[j].qi.v[i],agi,lw);
vfmaeq(px->j[j].ur.v[i],acr,lw);
vfmaeq(px->j[j].ui.v[i],aci,lw);
}
for (int i=0; i<nvec; ++i)
{
Tv lx=vsub(rxm.v[i],rxp.v[i]);
vfmseq(py->j[j].qr.v[i],aci,lx);
vfmaeq(py->j[j].qi.v[i],acr,lx);
vfmaeq(py->j[j].ur.v[i],agi,lx);
vfmseq(py->j[j].ui.v[i],agr,lx);
}
}
}
static inline void Z(saddstepb) (Z(Tbquj) * restrict p1, Z(Tbquj) * restrict p2,
const Tb r1p, const Tb r1m, const Tb r2p, const Tb r2m,
const dcmplx * restrict alm1, const dcmplx * restrict alm2)
{
for (int j=0; j<njobs; ++j)
{
Tv agr1=vload(creal(alm1[2*j])), agi1=vload(cimag(alm1[2*j])),
acr1=vload(creal(alm1[2*j+1])), aci1=vload(cimag(alm1[2*j+1]));
Tv agr2=vload(creal(alm2[2*j])), agi2=vload(cimag(alm2[2*j])),
acr2=vload(creal(alm2[2*j+1])), aci2=vload(cimag(alm2[2*j+1]));
for (int i=0; i<nvec; ++i)
{
Tv lw1=vadd(r2p.v[i],r2m.v[i]);
Tv lx2=vsub(r1m.v[i],r1p.v[i]);
vfmaseq(p1->j[j].qr.v[i],agr1,lw1,aci2,lx2);
vfmaaeq(p1->j[j].qi.v[i],agi1,lw1,acr2,lx2);
vfmaaeq(p1->j[j].ur.v[i],acr1,lw1,agi2,lx2);
vfmaseq(p1->j[j].ui.v[i],aci1,lw1,agr2,lx2);
}
for (int i=0; i<nvec; ++i)
{
Tv lx1=vsub(r2m.v[i],r2p.v[i]);
Tv lw2=vadd(r1p.v[i],r1m.v[i]);
vfmaseq(p2->j[j].qr.v[i],agr2,lw2,aci1,lx1);
vfmaaeq(p2->j[j].qi.v[i],agi2,lw2,acr1,lx1);
vfmaaeq(p2->j[j].ur.v[i],acr2,lw2,agi1,lx1);
vfmaseq(p2->j[j].ui.v[i],aci2,lw2,agr1,lx1);
}
}
}
static inline void Z(saddstep2) (const Z(Tbquj) * restrict px,
const Z(Tbquj) * restrict py, const Tb * restrict rxp,
const Tb * restrict rxm, dcmplx * restrict alm)
{
for (int j=0; j<njobs; ++j)
{
Tv agr=vzero, agi=vzero, acr=vzero, aci=vzero;
for (int i=0; i<nvec; ++i)
{
Tv lw=vadd(rxp->v[i],rxm->v[i]);
vfmaeq(agr,px->j[j].qr.v[i],lw);
vfmaeq(agi,px->j[j].qi.v[i],lw);
vfmaeq(acr,px->j[j].ur.v[i],lw);
vfmaeq(aci,px->j[j].ui.v[i],lw);
}
for (int i=0; i<nvec; ++i)
{
Tv lx=vsub(rxm->v[i],rxp->v[i]);
vfmseq(agr,py->j[j].ui.v[i],lx);
vfmaeq(agi,py->j[j].ur.v[i],lx);
vfmaeq(acr,py->j[j].qi.v[i],lx);
vfmseq(aci,py->j[j].qr.v[i],lx);
}
vhsum_cmplx2(agr,agi,acr,aci,&alm[2*j],&alm[2*j+1]);
}
}
static void Z(alm2map_spin_kernel) (Tb cth, Z(Tbquj) * restrict p1,
Z(Tbquj) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
int lmax)
{
while (l<lmax)
{
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
fx2=vload(fx[l+1].f[2]);
for (int i=0; i<nvec; ++i)
{
rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
vmul(fx2,rec1p.v[i]));
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
vmul(fx2,rec1m.v[i]));
}
#if (njobs>1)
Z(saddstepb)(p1,p2,rec1p,rec1m,rec2p,rec2m,&alm[2*njobs*l],
&alm[2*njobs*(l+1)]);
#else
Z(saddstep)(p1, p2, rec2p, rec2m, &alm[2*njobs*l]);
Z(saddstep)(p2, p1, rec1p, rec1m, &alm[2*njobs*(l+1)]);
#endif
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
fx2=vload(fx[l+2].f[2]);
for (int i=0; i<nvec; ++i)
{
rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
vmul(fx2,rec2p.v[i]));
rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
vmul(fx2,rec2m.v[i]));
}
l+=2;
}
if (l==lmax)
Z(saddstep)(p1, p2, rec2p, rec2m, &alm[2*njobs*l]);
}
static void Z(map2alm_spin_kernel) (Tb cth, const Z(Tbquj) * restrict p1,
const Z(Tbquj) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
const sharp_ylmgen_dbl3 * restrict fx, dcmplx * restrict alm, int l, int lmax)
{
while (l<lmax)
{
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
fx2=vload(fx[l+1].f[2]);
for (int i=0; i<nvec; ++i)
{
rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
vmul(fx2,rec1p.v[i]));
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
vmul(fx2,rec1m.v[i]));
}
Z(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l]);
Z(saddstep2)(p2, p1, &rec1p, &rec1m, &alm[2*njobs*(l+1)]);
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
fx2=vload(fx[l+2].f[2]);
for (int i=0; i<nvec; ++i)
{
rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
vmul(fx2,rec2p.v[i]));
rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
vmul(fx2,rec2m.v[i]));
}
l+=2;
}
if (l==lmax)
Z(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l]);
}
static void Z(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
sharp_job *job, Z(Tbquj) * restrict p1, Z(Tbquj) * restrict p2, int *done)
{
int l, lmax=gen->lmax;
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
if (l>lmax)
{ *done=1; return; }
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
Tb corfacp,corfacm;
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
const dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGe)(scalem,sharp_minscale);
while (!full_ieee)
{
Z(saddstep)(p1, p2,
Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm), &alm[2*njobs*l]);
if (++l>lmax) break;
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
Z(saddstep)(p2, p1,
Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm), &alm[2*njobs*l]);
if (++l>lmax) break;
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
{
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGe)(scalem,sharp_minscale);
}
}
if (l>lmax)
{ *done=1; return; }
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
Z(alm2map_spin_kernel) (cth,p1,p2,
rec1p, rec1m, rec2p, rec2m, fx, alm, l, lmax);
}
static void Z(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
sharp_job *job, const Z(Tbquj) * restrict p1, const Z(Tbquj) * restrict p2,
int *done)
{
int l, lmax=gen->lmax;
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
if (l>lmax) { *done=1; return; }
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
Tb corfacp,corfacm;
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGe)(scalem,sharp_minscale);
while (!full_ieee)
{
Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm);
Z(saddstep2)(p1, p2, &t1, &t2, &alm[2*njobs*l]);
if (++l>lmax) { *done=1; return; }
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
t1=Y(Tbprod)(rec1p,corfacp); t2=Y(Tbprod)(rec1m,corfacm);
Z(saddstep2)(p2, p1, &t1, &t2, &alm[2*njobs*l]);
if (++l>lmax) { *done=1; return; }
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
{
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGe)(scalem,sharp_minscale);
}
}
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
Z(map2alm_spin_kernel) (cth,p1,p2,rec1p,rec1m,rec2p,rec2m,fx,alm,l,lmax);
}
static inline void Z(saddstep_d) (Z(Tbquj) * restrict px,
Z(Tbquj) * restrict py, const Tb rxp, const Tb rxm,
const dcmplx * restrict alm)
{
for (int j=0; j<njobs; ++j)
{
Tv ar=vload(creal(alm[j])), ai=vload(cimag(alm[j]));
for (int i=0; i<nvec; ++i)
{
Tv lw=vadd(rxp.v[i],rxm.v[i]);
vfmaeq(px->j[j].qr.v[i],ar,lw);
vfmaeq(px->j[j].qi.v[i],ai,lw);
}
for (int i=0; i<nvec; ++i)
{
Tv lx=vsub(rxm.v[i],rxp.v[i]);
vfmaeq(py->j[j].ur.v[i],ai,lx);
vfmseq(py->j[j].ui.v[i],ar,lx);
}
}
}
static void Z(alm2map_deriv1_kernel) (Tb cth, Z(Tbquj) * restrict p1,
Z(Tbquj) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
int lmax)
{
while (l<lmax)
{
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
fx2=vload(fx[l+1].f[2]);
for (int i=0; i<nvec; ++i)
{
rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
vmul(fx2,rec1p.v[i]));
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
vmul(fx2,rec1m.v[i]));
}
Z(saddstep_d)(p1,p2,rec2p,rec2m,&alm[njobs*l]);
Z(saddstep_d)(p2,p1,rec1p,rec1m,&alm[njobs*(l+1)]);
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
fx2=vload(fx[l+2].f[2]);
for (int i=0; i<nvec; ++i)
{
rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
vmul(fx2,rec2p.v[i]));
rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
vmul(fx2,rec2m.v[i]));
}
l+=2;
}
if (l==lmax)
Z(saddstep_d)(p1, p2, rec2p, rec2m, &alm[njobs*l]);
}
static void Z(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
sharp_job *job, Z(Tbquj) * restrict p1, Z(Tbquj) * restrict p2, int *done)
{
int l, lmax=gen->lmax;
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
if (l>lmax)
{ *done=1; return; }
job->opcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec;
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
Tb corfacp,corfacm;
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
const dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGe)(scalem,sharp_minscale);
while (!full_ieee)
{
Z(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
&alm[njobs*l]);
if (++l>lmax) break;
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
Z(saddstep_d)(p2, p1, Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm),
&alm[njobs*l]);
if (++l>lmax) break;
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
{
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGe)(scalem,sharp_minscale);
}
}
if (l>lmax)
{ *done=1; return; }
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
Z(alm2map_deriv1_kernel) (cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l,
lmax);
}
#define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
static void Z(inner_loop) (sharp_job *job, const int *ispair,
const double *cth_, const double *sth_, int llim, int ulim,
sharp_Ylmgen_C *gen, int mi, const int *idx)
{
const int nval=nvec*VLEN;
const int m = job->ainfo->mval[mi];
sharp_Ylmgen_prepare (gen, m);
switch (job->type)
{
case SHARP_ALM2MAP:
case SHARP_ALM2MAP_DERIV1:
{
if (job->spin==0)
{
int done=0;
for (int ith=0; ith<ulim-llim; ith+=nval)
{
Z(Tburij) p1,p2; VZERO(p1); VZERO(p2);
if (!done)
{
Y(Tbu) cth, sth;
for (int i=0; i<nval; ++i)
{
int itot=i+ith;
if (itot>=ulim-llim) itot=ulim-llim-1;
itot=idx[itot];
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
}
Z(calc_alm2map) (cth.b,sth.b,gen,job,&p1.b,&p2.b,&done);
}
for (int i=0; i<nval; ++i)
{
int itot=i+ith;
if (itot<ulim-llim)
{
itot=idx[itot];
for (int j=0; j<njobs; ++j)
{
int phas_idx = 2*(j+njobs*(itot*job->ainfo->nm+mi));
complex double r1 = p1.j[j].r[i] + p1.j[j].i[i]*_Complex_I,
r2 = p2.j[j].r[i] + p2.j[j].i[i]*_Complex_I;
job->phase[phas_idx] = r1+r2;
if (ispair[itot])
job->phase[phas_idx+1] = r1-r2;
}
}
}
}
}
else
{
int done=0;
for (int ith=0; ith<ulim-llim; ith+=nval)
{
Z(Tbuquj) p1,p2; VZERO(p1); VZERO(p2);
if (!done)
{
Y(Tbu) cth;
for (int i=0; i<nval; ++i)
{
int itot=i+ith;
if (itot>=ulim-llim) itot=ulim-llim-1;
itot=idx[itot];
cth.s[i]=cth_[itot];
}
(job->type==SHARP_ALM2MAP) ?
Z(calc_alm2map_spin ) (cth.b,gen,job,&p1.b,&p2.b,&done) :
Z(calc_alm2map_deriv1) (cth.b,gen,job,&p1.b,&p2.b,&done);
}
for (int i=0; i<nval; ++i)
{
int itot=i+ith;
if (itot<ulim-llim)
{
itot=idx[itot];
for (int j=0; j<njobs; ++j)
{
int phas_idx = 4*(j+njobs*(itot*job->ainfo->nm+mi));
complex double q1 = p1.j[j].qr[i] + p1.j[j].qi[i]*_Complex_I,
q2 = p2.j[j].qr[i] + p2.j[j].qi[i]*_Complex_I,
u1 = p1.j[j].ur[i] + p1.j[j].ui[i]*_Complex_I,
u2 = p2.j[j].ur[i] + p2.j[j].ui[i]*_Complex_I;
job->phase[phas_idx] = q1+q2;
job->phase[phas_idx+2] = u1+u2;
if (ispair[itot])
{
dcmplx *phQ = &(job->phase[phas_idx+1]),
*phU = &(job->phase[phas_idx+3]);
*phQ = q1-q2;
*phU = u1-u2;
if ((gen->mhi-gen->m+gen->s)&1)
{ *phQ=-(*phQ); *phU=-(*phU); }
}
}
}
}
}
}
break;
}
case SHARP_MAP2ALM:
{
if (job->spin==0)
{
int done=0;
for (int ith=0; (ith<ulim-llim)&&(!done); ith+=nval)
{
Z(Tburij) p1, p2; VZERO(p1); VZERO(p2);
Y(Tbu) cth, sth;
for (int i=0; i<nval; ++i)
{
int itot=i+ith;
if (itot>=ulim-llim) itot=ulim-llim-1;
itot=idx[itot];
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
if (i+ith<ulim-llim)
{
for (int j=0; j<njobs; ++j)
{
int phas_idx = 2*(j+njobs*(itot*job->ainfo->nm+mi));
dcmplx ph1=job->phase[phas_idx];
dcmplx ph2=ispair[itot] ? job->phase[phas_idx+1] : 0.;
p1.j[j].r[i]=creal(ph1+ph2); p1.j[j].i[i]=cimag(ph1+ph2);
p2.j[j].r[i]=creal(ph1-ph2); p2.j[j].i[i]=cimag(ph1-ph2);
}
}
}
Z(calc_map2alm)(cth.b,sth.b,gen,job,&p1.b,&p2.b,&done);
}
}
else
{
int done=0;
for (int ith=0; (ith<ulim-llim)&&(!done); ith+=nval)
{
Z(Tbuquj) p1, p2; VZERO(p1); VZERO(p2);
Y(Tbu) cth;
for (int i=0; i<nval; ++i)
{
int itot=i+ith;
if (itot>=ulim-llim) itot=ulim-llim-1;
itot=idx[itot];
cth.s[i]=cth_[itot];
if (i+ith<ulim-llim)
{
for (int j=0; j<njobs; ++j)
{
int phas_idx = 4*(j+njobs*(itot*job->ainfo->nm+mi));
dcmplx p1Q=job->phase[phas_idx],
p1U=job->phase[phas_idx+2],
p2Q=ispair[itot] ? job->phase[phas_idx+1]:0.,
p2U=ispair[itot] ? job->phase[phas_idx+3]:0.;
if ((gen->mhi-gen->m+gen->s)&1)
{ p2Q=-p2Q; p2U=-p2U; }
p1.j[j].qr[i]=creal(p1Q+p2Q); p1.j[j].qi[i]=cimag(p1Q+p2Q);
p1.j[j].ur[i]=creal(p1U+p2U); p1.j[j].ui[i]=cimag(p1U+p2U);
p2.j[j].qr[i]=creal(p1Q-p2Q); p2.j[j].qi[i]=cimag(p1Q-p2Q);
p2.j[j].ur[i]=creal(p1U-p2U); p2.j[j].ui[i]=cimag(p1U-p2U);
}
}
}
Z(calc_map2alm_spin) (cth.b,gen,job,&p1.b,&p2.b,&done);
}
}
break;
}
}
}
#undef VZERO

View file

@ -0,0 +1,800 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_core_inc3.c
* Type-dependent code for the computational core
*
* Copyright (C) 2012 Max-Planck-Society
* \author Martin Reinecke
*/
static void Y(alm2map_kernel) (const Tb cth, Y(Tbri) * restrict p1,
Y(Tbri) * restrict p2, Tb lam_1, Tb lam_2,
const sharp_ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm,
int l, int lmax, int njobs)
{
while (l<lmax-2)
{
Tb lam_3, lam_4;
Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
for (int i=0; i<nvec; ++i)
lam_3.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
for (int i=0; i<nvec; ++i)
lam_4.v[i] = vsub(vmul(vmul(cth.v[i],lam_3.v[i]),r0),vmul(lam_2.v[i],r1));
r0=vload(rf[l+2].f[0]);r1=vload(rf[l+2].f[1]);
for (int i=0; i<nvec; ++i)
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_4.v[i]),r0),vmul(lam_3.v[i],r1));
for (int j=0; j<njobs; ++j)
{
Tv ar2=vload(creal(alm[njobs*l+j])),
ai2=vload(cimag(alm[njobs*l+j])),
ar4=vload(creal(alm[njobs*(l+2)+j])),
ai4=vload(cimag(alm[njobs*(l+2)+j]));
for (int i=0; i<nvec; ++i)
{
vfmaaeq(p1[j].r.v[i],lam_2.v[i],ar2,lam_4.v[i],ar4);
vfmaaeq(p1[j].i.v[i],lam_2.v[i],ai2,lam_4.v[i],ai4);
}
Tv ar3=vload(creal(alm[njobs*(l+1)+j])),
ai3=vload(cimag(alm[njobs*(l+1)+j])),
ar1=vload(creal(alm[njobs*(l+3)+j])),
ai1=vload(cimag(alm[njobs*(l+3)+j]));
for (int i=0; i<nvec; ++i)
{
vfmaaeq(p2[j].r.v[i],lam_3.v[i],ar3,lam_1.v[i],ar1);
vfmaaeq(p2[j].i.v[i],lam_3.v[i],ai3,lam_1.v[i],ai1);
}
}
r0=vload(rf[l+3].f[0]);r1=vload(rf[l+3].f[1]);
for (int i=0; i<nvec; ++i)
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_4.v[i],r1));
l+=4;
}
while (l<lmax)
{
Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
for (int i=0; i<nvec; ++i)
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
for (int j=0; j<njobs; ++j)
{
Tv ar=vload(creal(alm[njobs*l+j])),
ai=vload(cimag(alm[njobs*l+j]));
for (int i=0; i<nvec; ++i)
{
vfmaeq(p1[j].r.v[i],lam_2.v[i],ar);
vfmaeq(p1[j].i.v[i],lam_2.v[i],ai);
}
ar=vload(creal(alm[njobs*(l+1)+j]));
ai=vload(cimag(alm[njobs*(l+1)+j]));
for (int i=0; i<nvec; ++i)
{
vfmaeq(p2[j].r.v[i],lam_1.v[i],ar);
vfmaeq(p2[j].i.v[i],lam_1.v[i],ai);
}
}
r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
for (int i=0; i<nvec; ++i)
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
l+=2;
}
if (l==lmax)
{
for (int j=0; j<njobs; ++j)
{
Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
for (int i=0; i<nvec; ++i)
{
vfmaeq(p1[j].r.v[i],lam_2.v[i],ar);
vfmaeq(p1[j].i.v[i],lam_2.v[i],ai);
}
}
}
}
static void Y(map2alm_kernel) (const Tb cth, const Y(Tbri) * restrict p1,
const Y(Tbri) * restrict p2, Tb lam_1, Tb lam_2,
const sharp_ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax,
int njobs)
{
while (l<lmax)
{
Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
for (int i=0; i<nvec; ++i)
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
for (int j=0; j<njobs; ++j)
{
Tv tr1=vzero, ti1=vzero, tr2=vzero, ti2=vzero;
for (int i=0; i<nvec; ++i)
{
vfmaeq(tr1,lam_2.v[i],p1[j].r.v[i]);
vfmaeq(ti1,lam_2.v[i],p1[j].i.v[i]);
}
for (int i=0; i<nvec; ++i)
{
vfmaeq(tr2,lam_1.v[i],p2[j].r.v[i]);
vfmaeq(ti2,lam_1.v[i],p2[j].i.v[i]);
}
vhsum_cmplx2(tr1,ti1,tr2,ti2,&alm[l*njobs+j],&alm[(l+1)*njobs+j]);
}
r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
for (int i=0; i<nvec; ++i)
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
l+=2;
}
if (l==lmax)
{
for (int j=0; j<njobs; ++j)
{
Tv tre=vzero, tim=vzero;
for (int i=0; i<nvec; ++i)
{
vfmaeq(tre,lam_2.v[i],p1[j].r.v[i]);
vfmaeq(tim,lam_2.v[i],p1[j].i.v[i]);
}
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
}
}
}
static void Y(calc_alm2map) (const Tb cth, const Tb sth,
const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbri) * restrict p1,
Y(Tbri) * restrict p2, int njobs, int *done)
{
int l,lmax=gen->lmax;
Tb lam_1,lam_2,scale;
Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
job->opcnt += (l-gen->m) * 4*VLEN*nvec;
if (l>lmax) { *done=1; return; }
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
Tb corfac;
Y(getCorfac)(scale,&corfac,gen->cf);
const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
const dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGe)(scale,sharp_minscale);
while (!full_ieee)
{
for (int j=0; j<njobs; ++j)
{
Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
for (int i=0; i<nvec; ++i)
{
Tv tmp=vmul(lam_2.v[i],corfac.v[i]);
vfmaeq(p1[j].r.v[i],tmp,ar);
vfmaeq(p1[j].i.v[i],tmp,ai);
}
}
if (++l>lmax) break;
Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]);
for (int i=0; i<nvec; ++i)
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
for (int j=0; j<njobs; ++j)
{
Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
for (int i=0; i<nvec; ++i)
{
Tv tmp=vmul(lam_1.v[i],corfac.v[i]);
vfmaeq(p2[j].r.v[i],tmp,ar);
vfmaeq(p2[j].i.v[i],tmp,ai);
}
}
if (++l>lmax) break;
r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]);
for (int i=0; i<nvec; ++i)
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
if (Y(rescale)(&lam_1,&lam_2,&scale))
{
Y(getCorfac)(scale,&corfac,gen->cf);
full_ieee = Y(TballGe)(scale,sharp_minscale);
}
}
if (l>lmax) { *done=1; return; }
Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac);
Y(alm2map_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax, njobs);
}
static void Y(calc_map2alm) (const Tb cth, const Tb sth,
const sharp_Ylmgen_C *gen, sharp_job *job, const Y(Tbri) * restrict p1,
const Y(Tbri) * restrict p2, int njobs, int *done)
{
int lmax=gen->lmax;
Tb lam_1,lam_2,scale;
int l=gen->m;
Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
job->opcnt += (l-gen->m) * 4*VLEN*nvec;
if (l>lmax) { *done=1; return; }
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
Tb corfac;
Y(getCorfac)(scale,&corfac,gen->cf);
dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGe)(scale,sharp_minscale);
while (!full_ieee)
{
for (int j=0; j<njobs; ++j)
{
Tv tre=vzero, tim=vzero;
for (int i=0; i<nvec; ++i)
{
Tv tmp=vmul(lam_2.v[i],corfac.v[i]);
vfmaeq(tre,tmp,p1[j].r.v[i]);
vfmaeq(tim,tmp,p1[j].i.v[i]);
}
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
}
if (++l>lmax) { *done=1; return; }
Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]);
for (int i=0; i<nvec; ++i)
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
for (int j=0; j<njobs; ++j)
{
Tv tre=vzero, tim=vzero;
for (int i=0; i<nvec; ++i)
{
Tv tmp=vmul(lam_1.v[i],corfac.v[i]);
vfmaeq(tre,tmp,p2[j].r.v[i]);
vfmaeq(tim,tmp,p2[j].i.v[i]);
}
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
}
if (++l>lmax) { *done=1; return; }
r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]);
for (int i=0; i<nvec; ++i)
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
if (Y(rescale)(&lam_1,&lam_2,&scale))
{
Y(getCorfac)(scale,&corfac,gen->cf);
full_ieee = Y(TballGe)(scale,sharp_minscale);
}
}
Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac);
Y(map2alm_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax, njobs);
}
static inline void Y(saddstep) (Y(Tbqu) * restrict px, Y(Tbqu) * restrict py,
const Tb rxp, const Tb rxm, const dcmplx * restrict alm, int njobs)
{
for (int j=0; j<njobs; ++j)
{
Tv agr=vload(creal(alm[2*j])), agi=vload(cimag(alm[2*j])),
acr=vload(creal(alm[2*j+1])), aci=vload(cimag(alm[2*j+1]));
for (int i=0; i<nvec; ++i)
{
Tv lw=vadd(rxp.v[i],rxm.v[i]);
vfmaeq(px[j].qr.v[i],agr,lw);
vfmaeq(px[j].qi.v[i],agi,lw);
vfmaeq(px[j].ur.v[i],acr,lw);
vfmaeq(px[j].ui.v[i],aci,lw);
}
for (int i=0; i<nvec; ++i)
{
Tv lx=vsub(rxm.v[i],rxp.v[i]);
vfmseq(py[j].qr.v[i],aci,lx);
vfmaeq(py[j].qi.v[i],acr,lx);
vfmaeq(py[j].ur.v[i],agi,lx);
vfmseq(py[j].ui.v[i],agr,lx);
}
}
}
static inline void Y(saddstepb) (Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2,
const Tb r1p, const Tb r1m, const Tb r2p, const Tb r2m,
const dcmplx * restrict alm1, const dcmplx * restrict alm2, int njobs)
{
for (int j=0; j<njobs; ++j)
{
Tv agr1=vload(creal(alm1[2*j])), agi1=vload(cimag(alm1[2*j])),
acr1=vload(creal(alm1[2*j+1])), aci1=vload(cimag(alm1[2*j+1]));
Tv agr2=vload(creal(alm2[2*j])), agi2=vload(cimag(alm2[2*j])),
acr2=vload(creal(alm2[2*j+1])), aci2=vload(cimag(alm2[2*j+1]));
for (int i=0; i<nvec; ++i)
{
Tv lw1=vadd(r2p.v[i],r2m.v[i]);
Tv lx2=vsub(r1m.v[i],r1p.v[i]);
vfmaseq(p1[j].qr.v[i],agr1,lw1,aci2,lx2);
vfmaaeq(p1[j].qi.v[i],agi1,lw1,acr2,lx2);
vfmaaeq(p1[j].ur.v[i],acr1,lw1,agi2,lx2);
vfmaseq(p1[j].ui.v[i],aci1,lw1,agr2,lx2);
}
for (int i=0; i<nvec; ++i)
{
Tv lx1=vsub(r2m.v[i],r2p.v[i]);
Tv lw2=vadd(r1p.v[i],r1m.v[i]);
vfmaseq(p2[j].qr.v[i],agr2,lw2,aci1,lx1);
vfmaaeq(p2[j].qi.v[i],agi2,lw2,acr1,lx1);
vfmaaeq(p2[j].ur.v[i],acr2,lw2,agi1,lx1);
vfmaseq(p2[j].ui.v[i],aci2,lw2,agr1,lx1);
}
}
}
static inline void Y(saddstep2) (const Y(Tbqu) * restrict px,
const Y(Tbqu) * restrict py, const Tb * restrict rxp,
const Tb * restrict rxm, dcmplx * restrict alm, int njobs)
{
for (int j=0; j<njobs; ++j)
{
Tv agr=vzero, agi=vzero, acr=vzero, aci=vzero;
for (int i=0; i<nvec; ++i)
{
Tv lw=vadd(rxp->v[i],rxm->v[i]);
vfmaeq(agr,px[j].qr.v[i],lw);
vfmaeq(agi,px[j].qi.v[i],lw);
vfmaeq(acr,px[j].ur.v[i],lw);
vfmaeq(aci,px[j].ui.v[i],lw);
}
for (int i=0; i<nvec; ++i)
{
Tv lx=vsub(rxm->v[i],rxp->v[i]);
vfmseq(agr,py[j].ui.v[i],lx);
vfmaeq(agi,py[j].ur.v[i],lx);
vfmaeq(acr,py[j].qi.v[i],lx);
vfmseq(aci,py[j].qr.v[i],lx);
}
vhsum_cmplx2(agr,agi,acr,aci,&alm[2*j],&alm[2*j+1]);
}
}
static void Y(alm2map_spin_kernel) (Tb cth, Y(Tbqu) * restrict p1,
Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
int lmax, int njobs)
{
while (l<lmax)
{
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
fx2=vload(fx[l+1].f[2]);
for (int i=0; i<nvec; ++i)
{
rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
vmul(fx2,rec1p.v[i]));
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
vmul(fx2,rec1m.v[i]));
}
Y(saddstepb)(p1,p2,rec1p,rec1m,rec2p,rec2m,&alm[2*njobs*l],
&alm[2*njobs*(l+1)], njobs);
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
fx2=vload(fx[l+2].f[2]);
for (int i=0; i<nvec; ++i)
{
rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
vmul(fx2,rec2p.v[i]));
rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
vmul(fx2,rec2m.v[i]));
}
l+=2;
}
if (l==lmax)
Y(saddstep)(p1, p2, rec2p, rec2m, &alm[2*njobs*l], njobs);
}
static void Y(map2alm_spin_kernel) (Tb cth, const Y(Tbqu) * restrict p1,
const Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
const sharp_ylmgen_dbl3 * restrict fx, dcmplx * restrict alm, int l, int lmax,
int njobs)
{
while (l<lmax)
{
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
fx2=vload(fx[l+1].f[2]);
for (int i=0; i<nvec; ++i)
{
rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
vmul(fx2,rec1p.v[i]));
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
vmul(fx2,rec1m.v[i]));
}
Y(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l],njobs);
Y(saddstep2)(p2, p1, &rec1p, &rec1m, &alm[2*njobs*(l+1)],njobs);
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
fx2=vload(fx[l+2].f[2]);
for (int i=0; i<nvec; ++i)
{
rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
vmul(fx2,rec2p.v[i]));
rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
vmul(fx2,rec2m.v[i]));
}
l+=2;
}
if (l==lmax)
Y(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l], njobs);
}
static void Y(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
sharp_job *job, Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2, int njobs,
int *done)
{
int l, lmax=gen->lmax;
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
if (l>lmax)
{ *done=1; return; }
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
Tb corfacp,corfacm;
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
const dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGe)(scalem,sharp_minscale);
while (!full_ieee)
{
Y(saddstep)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
&alm[2*njobs*l],njobs);
if (++l>lmax) break;
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
Y(saddstep)(p2, p1, Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm),
&alm[2*njobs*l], njobs);
if (++l>lmax) break;
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
{
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGe)(scalem,sharp_minscale);
}
}
if (l>lmax)
{ *done=1; return; }
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
Y(alm2map_spin_kernel) (cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l,
lmax, njobs);
}
static void Y(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
sharp_job *job, const Y(Tbqu) * restrict p1, const Y(Tbqu) * restrict p2,
int njobs, int *done)
{
int l, lmax=gen->lmax;
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
if (l>lmax) { *done=1; return; }
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
Tb corfacp,corfacm;
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGe)(scalem,sharp_minscale);
while (!full_ieee)
{
Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm);
Y(saddstep2)(p1, p2, &t1, &t2, &alm[2*njobs*l], njobs);
if (++l>lmax) { *done=1; return; }
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
t1=Y(Tbprod)(rec1p,corfacp); t2=Y(Tbprod)(rec1m,corfacm);
Y(saddstep2)(p2, p1, &t1, &t2, &alm[2*njobs*l], njobs);
if (++l>lmax) { *done=1; return; }
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
{
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGe)(scalem,sharp_minscale);
}
}
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
Y(map2alm_spin_kernel)(cth,p1,p2,rec1p,rec1m,rec2p,rec2m,fx,alm,l,lmax,njobs);
}
static inline void Y(saddstep_d) (Y(Tbqu) * restrict px, Y(Tbqu) * restrict py,
const Tb rxp, const Tb rxm, const dcmplx * restrict alm, int njobs)
{
for (int j=0; j<njobs; ++j)
{
Tv ar=vload(creal(alm[j])), ai=vload(cimag(alm[j]));
for (int i=0; i<nvec; ++i)
{
Tv lw=vadd(rxp.v[i],rxm.v[i]);
vfmaeq(px[j].qr.v[i],ar,lw);
vfmaeq(px[j].qi.v[i],ai,lw);
}
for (int i=0; i<nvec; ++i)
{
Tv lx=vsub(rxm.v[i],rxp.v[i]);
vfmaeq(py[j].ur.v[i],ai,lx);
vfmseq(py[j].ui.v[i],ar,lx);
}
}
}
static void Y(alm2map_deriv1_kernel) (Tb cth, Y(Tbqu) * restrict p1,
Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
int lmax, int njobs)
{
while (l<lmax)
{
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
fx2=vload(fx[l+1].f[2]);
for (int i=0; i<nvec; ++i)
{
rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
vmul(fx2,rec1p.v[i]));
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
vmul(fx2,rec1m.v[i]));
}
Y(saddstep_d)(p1,p2,rec2p,rec2m,&alm[njobs*l],njobs);
Y(saddstep_d)(p2,p1,rec1p,rec1m,&alm[njobs*(l+1)],njobs);
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
fx2=vload(fx[l+2].f[2]);
for (int i=0; i<nvec; ++i)
{
rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
vmul(fx2,rec2p.v[i]));
rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
vmul(fx2,rec2m.v[i]));
}
l+=2;
}
if (l==lmax)
Y(saddstep_d)(p1, p2, rec2p, rec2m, &alm[njobs*l], njobs);
}
static void Y(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
sharp_job *job, Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2, int njobs,
int *done)
{
int l, lmax=gen->lmax;
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
if (l>lmax)
{ *done=1; return; }
job->opcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec;
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
Tb corfacp,corfacm;
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
const dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGe)(scalem,sharp_minscale);
while (!full_ieee)
{
Y(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
&alm[njobs*l],njobs);
if (++l>lmax) break;
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
Y(saddstep_d)(p2, p1, Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm),
&alm[njobs*l], njobs);
if (++l>lmax) break;
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
{
Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf);
full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGe)(scalem,sharp_minscale);
}
}
if (l>lmax)
{ *done=1; return; }
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
Y(alm2map_deriv1_kernel) (cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l,
lmax, njobs);
}
#define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
static void Y(inner_loop) (sharp_job *job, const int *ispair,
const double *cth_, const double *sth_, int llim, int ulim,
sharp_Ylmgen_C *gen, int mi, const int *idx, int njobs)
{
const int nval=nvec*VLEN;
const int m = job->ainfo->mval[mi];
sharp_Ylmgen_prepare (gen, m);
switch (job->type)
{
case SHARP_ALM2MAP:
case SHARP_ALM2MAP_DERIV1:
{
if (job->spin==0)
{
int done=0;
for (int ith=0; ith<ulim-llim; ith+=nval)
{
Y(Tburi) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
if (!done)
{
Y(Tbu) cth, sth;
for (int i=0; i<nval; ++i)
{
int itot=i+ith;
if (itot>=ulim-llim) itot=ulim-llim-1;
itot=idx[itot];
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
}
Y(calc_alm2map) (cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b,njobs,&done);
}
for (int i=0; i<nval; ++i)
{
int itot=i+ith;
if (itot<ulim-llim)
{
itot=idx[itot];
for (int j=0; j<njobs; ++j)
{
int phas_idx = 2*(j+njobs*(itot*job->ainfo->nm+mi));
complex double r1 = p1[j].s.r[i] + p1[j].s.i[i]*_Complex_I,
r2 = p2[j].s.r[i] + p2[j].s.i[i]*_Complex_I;
job->phase[phas_idx] = r1+r2;
if (ispair[itot])
job->phase[phas_idx+1] = r1-r2;
}
}
}
}
}
else
{
int done=0;
for (int ith=0; ith<ulim-llim; ith+=nval)
{
Y(Tbuqu) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
if (!done)
{
Y(Tbu) cth;
for (int i=0; i<nval; ++i)
{
int itot=i+ith;
if (itot>=ulim-llim) itot=ulim-llim-1;
itot=idx[itot];
cth.s[i]=cth_[itot];
}
(job->type==SHARP_ALM2MAP) ?
Y(calc_alm2map_spin )
(cth.b,gen,job,&p1[0].b,&p2[0].b,njobs,&done) :
Y(calc_alm2map_deriv1)
(cth.b,gen,job,&p1[0].b,&p2[0].b,njobs,&done);
}
for (int i=0; i<nval; ++i)
{
int itot=i+ith;
if (itot<ulim-llim)
{
itot=idx[itot];
for (int j=0; j<njobs; ++j)
{
int phas_idx = 4*(j+njobs*(itot*job->ainfo->nm+mi));
complex double q1 = p1[j].s.qr[i] + p1[j].s.qi[i]*_Complex_I,
q2 = p2[j].s.qr[i] + p2[j].s.qi[i]*_Complex_I,
u1 = p1[j].s.ur[i] + p1[j].s.ui[i]*_Complex_I,
u2 = p2[j].s.ur[i] + p2[j].s.ui[i]*_Complex_I;
job->phase[phas_idx] = q1+q2;
job->phase[phas_idx+2] = u1+u2;
if (ispair[itot])
{
dcmplx *phQ = &(job->phase[phas_idx+1]),
*phU = &(job->phase[phas_idx+3]);
*phQ = q1-q2;
*phU = u1-u2;
if ((gen->mhi-gen->m+gen->s)&1)
{ *phQ=-(*phQ); *phU=-(*phU); }
}
}
}
}
}
}
break;
}
case SHARP_MAP2ALM:
{
if (job->spin==0)
{
int done=0;
for (int ith=0; (ith<ulim-llim)&&(!done); ith+=nval)
{
Y(Tburi) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
Y(Tbu) cth, sth;
for (int i=0; i<nval; ++i)
{
int itot=i+ith;
if (itot>=ulim-llim) itot=ulim-llim-1;
itot=idx[itot];
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
if (i+ith<ulim-llim)
{
for (int j=0; j<njobs; ++j)
{
int phas_idx = 2*(j+njobs*(itot*job->ainfo->nm+mi));
dcmplx ph1=job->phase[phas_idx];
dcmplx ph2=ispair[itot] ? job->phase[phas_idx+1] : 0.;
p1[j].s.r[i]=creal(ph1+ph2); p1[j].s.i[i]=cimag(ph1+ph2);
p2[j].s.r[i]=creal(ph1-ph2); p2[j].s.i[i]=cimag(ph1-ph2);
}
}
}
Y(calc_map2alm)(cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b,njobs,&done);
}
}
else
{
int done=0;
for (int ith=0; (ith<ulim-llim)&&(!done); ith+=nval)
{
Y(Tbuqu) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
Y(Tbu) cth;
for (int i=0; i<nval; ++i)
{
int itot=i+ith;
if (itot>=ulim-llim) itot=ulim-llim-1;
itot=idx[itot];
cth.s[i]=cth_[itot];
if (i+ith<ulim-llim)
{
for (int j=0; j<njobs; ++j)
{
int phas_idx = 4*(j+njobs*(itot*job->ainfo->nm+mi));
dcmplx p1Q=job->phase[phas_idx],
p1U=job->phase[phas_idx+2],
p2Q=ispair[itot] ? job->phase[phas_idx+1]:0.,
p2U=ispair[itot] ? job->phase[phas_idx+3]:0.;
if ((gen->mhi-gen->m+gen->s)&1)
{ p2Q=-p2Q; p2U=-p2U; }
p1[j].s.qr[i]=creal(p1Q+p2Q); p1[j].s.qi[i]=cimag(p1Q+p2Q);
p1[j].s.ur[i]=creal(p1U+p2U); p1[j].s.ui[i]=cimag(p1U+p2U);
p2[j].s.qr[i]=creal(p1Q-p2Q); p2[j].s.qi[i]=cimag(p1Q-p2Q);
p2[j].s.ur[i]=creal(p1U-p2U); p2[j].s.ui[i]=cimag(p1U-p2U);
}
}
}
Y(calc_map2alm_spin) (cth.b,gen,job,&p1[0].b,&p2[0].b,njobs,&done);
}
}
break;
}
}
}
#undef VZERO

140
external/sharp/libsharp/sharp_cxx.h vendored Normal file
View file

@ -0,0 +1,140 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_cxx.h
* Spherical transform library
*
* Copyright (C) 2012 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_SHARP_CXX_H
#define PLANCK_SHARP_CXX_H
#include "sharp_lowlevel.h"
#include "sharp_geomhelpers.h"
#include "sharp_almhelpers.h"
#include "xcomplex.h"
class sharp_base
{
protected:
sharp_alm_info *ainfo;
sharp_geom_info *ginfo;
public:
sharp_base()
: ainfo(0), ginfo(0) {}
~sharp_base()
{
sharp_destroy_geom_info(ginfo);
sharp_destroy_alm_info(ainfo);
}
void set_general_geometry (int nrings, const int *nph, const ptrdiff_t *ofs,
const int *stride, const double *phi0, const double *theta,
const double *weight)
{
sharp_make_geom_info (nrings, nph, ofs, stride, phi0, theta, weight,
&ginfo);
}
void set_ECP_geometry (int nrings, int nphi)
{ sharp_make_ecp_geom_info (nrings, nphi, 0., 1, nphi, &ginfo); }
void set_Gauss_geometry (int nrings, int nphi)
{ sharp_make_gauss_geom_info (nrings, nphi, 1, nphi, &ginfo); }
void set_Healpix_geometry (int nside)
{ sharp_make_healpix_geom_info (nside, 1, &ginfo); }
void set_weighted_Healpix_geometry (int nside, const double *weight)
{ sharp_make_weighted_healpix_geom_info (nside, 1, weight, &ginfo); }
void set_triangular_alm_info (int lmax, int mmax)
{ sharp_make_triangular_alm_info (lmax, mmax, 1, &ainfo); }
};
template<typename T> struct cxxjobhelper__ {};
template<> struct cxxjobhelper__<double>
{ enum {val=1}; };
template<> struct cxxjobhelper__<float>
{ enum {val=0}; };
template<typename T> class sharp_cxxjob: public sharp_base
{
private:
static void *conv (xcomplex<T> *ptr)
{ return reinterpret_cast<void *>(ptr); }
static void *conv (const xcomplex<T> *ptr)
{ return const_cast<void *>(reinterpret_cast<const void *>(ptr)); }
static void *conv (T *ptr)
{ return reinterpret_cast<void *>(ptr); }
static void *conv (const T *ptr)
{ return const_cast<void *>(reinterpret_cast<const void *>(ptr)); }
public:
void alm2map (const xcomplex<T> *alm, T *map, bool add)
{
void *aptr=conv(alm), *mptr=conv(map);
sharp_execute (SHARP_ALM2MAP, 0, add, &aptr, &mptr, ginfo, ainfo, 1,
cxxjobhelper__<T>::val,0,0,0);
}
void alm2map_spin (const xcomplex<T> *alm1, const xcomplex<T> *alm2,
T *map1, T *map2, int spin, bool add)
{
void *aptr[2], *mptr[2];
aptr[0]=conv(alm1); aptr[1]=conv(alm2);
mptr[0]=conv(map1); mptr[1]=conv(map2);
sharp_execute (SHARP_ALM2MAP, spin, add, aptr, mptr, ginfo, ainfo, 1,
cxxjobhelper__<T>::val,0,0,0);
}
void alm2map_der1 (const xcomplex<T> *alm, T *map1, T *map2, bool add)
{
void *aptr=conv(alm), *mptr[2];
mptr[0]=conv(map1); mptr[1]=conv(map2);
sharp_execute (SHARP_ALM2MAP_DERIV1, 1, add,&aptr, mptr, ginfo, ainfo,
1, cxxjobhelper__<T>::val,0,0,0);
}
void map2alm (const T *map, xcomplex<T> *alm, bool add)
{
void *aptr=conv(alm), *mptr=conv(map);
sharp_execute (SHARP_MAP2ALM, 0, add, &aptr, &mptr, ginfo, ainfo, 1,
cxxjobhelper__<T>::val,0,0,0);
}
void map2alm_spin (const T *map1, const T *map2, xcomplex<T> *alm1,
xcomplex<T> *alm2, int spin, bool add)
{
void *aptr[2], *mptr[2];
aptr[0]=conv(alm1); aptr[1]=conv(alm2);
mptr[0]=conv(map1); mptr[1]=conv(map2);
sharp_execute (SHARP_MAP2ALM, spin, add, aptr, mptr, ginfo, ainfo, 1,
cxxjobhelper__<T>::val,0,0,0);
}
};
#endif

View file

@ -0,0 +1,222 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_geomhelpers.c
* Spherical transform library
*
* Copyright (C) 2006-2011 Max-Planck-Society
* \author Martin Reinecke
*/
#include <math.h>
#include "sharp_geomhelpers.h"
#include "c_utils.h"
void sharp_make_healpix_geom_info (int nside, int stride,
sharp_geom_info **geom_info)
{
double *weight=RALLOC(double,2*nside);
SET_ARRAY(weight,0,2*nside,1);
sharp_make_weighted_healpix_geom_info (nside, stride, weight, geom_info);
DEALLOC(weight);
}
void sharp_make_weighted_healpix_geom_info (int nside, int stride,
const double *weight, sharp_geom_info **geom_info)
{
const double pi=3.141592653589793238462643383279502884197;
ptrdiff_t npix=(ptrdiff_t)nside*nside*12;
ptrdiff_t ncap=2*(ptrdiff_t)nside*(nside-1);
int nrings=4*nside-1;
double *theta=RALLOC(double,nrings);
double *weight_=RALLOC(double,nrings);
int *nph=RALLOC(int,nrings);
double *phi0=RALLOC(double,nrings);
ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
int *stride_=RALLOC(int,nrings);
for (int m=0; m<nrings; ++m)
{
int ring=m+1;
ptrdiff_t northring = (ring>2*nside) ? 4*nside-ring : ring;
stride_[m] = stride;
if (northring < nside)
{
theta[m] = 2*asin(northring/(sqrt(6.)*nside));
nph[m] = 4*northring;
phi0[m] = pi/nph[m];
ofs[m] = 2*northring*(northring-1)*stride;
}
else
{
double fact1 = (8.*nside)/npix;
double costheta = (2*nside-northring)*fact1;
theta[m] = acos(costheta);
nph[m] = 4*nside;
if ((northring-nside) & 1)
phi0[m] = 0;
else
phi0[m] = pi/nph[m];
ofs[m] = (ncap + (northring-nside)*nph[m])*stride;
}
if (northring != ring) /* southern hemisphere */
{
theta[m] = pi-theta[m];
ofs[m] = (npix - nph[m])*stride - ofs[m];
}
weight_[m]=4.*pi/npix*weight[northring-1];
}
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, weight_,
geom_info);
DEALLOC(theta);
DEALLOC(weight_);
DEALLOC(nph);
DEALLOC(phi0);
DEALLOC(ofs);
DEALLOC(stride_);
}
static void gauleg (double x1, double x2, double *x, double *w, int n)
{
const double pi = 3.141592653589793238462643383279502884197;
const double eps = 3.0E-14;
int m = (n+1)/2;
double xm = 0.5*(x2+x1);
double xl = 0.5*(x2-x1);
for(int i=1; i<=m; ++i)
{
double z = cos(pi*(i-0.25)/(n+0.5));
double pp;
int dobreak=0;
while(1)
{
double p1 = 1.0, p2 = 0.0;
double z1 = z;
int j;
for(j=1; j<=n; ++j)
{
double p3 = p2;
p2 = p1;
p1 = ((2*j-1)*z*p2-(j-1)*p3)/j;
}
pp = n*(z*p1-p2)/(z*z-1);
z = z1 - p1/pp;
if (dobreak) break;
if (fabs(z-z1) <= eps) dobreak=1;
}
x[i-1] = xm - xl*z;
x[n-i] = xm + xl*z;
w[i-1] = w[n-i] = 2*xl/((1-z*z)*pp*pp);
}
}
static void makeweights (int bw, double *weights)
{
const double pi = 3.141592653589793238462643383279502884197;
const double fudge = pi/(4*bw);
for (int j=0; j<2*bw; ++j)
{
double tmpsum = 0;
for (int k=0; k<bw; ++k)
tmpsum += 1./(2*k+1) * sin((2*j+1)*(2*k+1)*fudge);
tmpsum *= sin((2*j+1)*fudge);
tmpsum *= 2./bw;
weights[j] = tmpsum;
/* weights[j + 2*bw] = tmpsum * sin((2*j+1)*fudge); */
}
}
void sharp_make_gauss_geom_info (int nrings, int nphi, int stride_lon,
int stride_lat, sharp_geom_info **geom_info)
{
const double pi=3.141592653589793238462643383279502884197;
double *theta=RALLOC(double,nrings);
double *weight=RALLOC(double,nrings);
int *nph=RALLOC(int,nrings);
double *phi0=RALLOC(double,nrings);
ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
int *stride_=RALLOC(int,nrings);
gauleg(-1,1,theta,weight,nrings);
for (int m=0; m<nrings; ++m)
{
theta[m] = acos(-theta[m]);
nph[m]=nphi;
phi0[m]=0;
ofs[m]=(ptrdiff_t)m*stride_lat;
stride_[m]=stride_lon;
weight[m]*=2*pi/nphi;
}
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, weight,
geom_info);
DEALLOC(theta);
DEALLOC(weight);
DEALLOC(nph);
DEALLOC(phi0);
DEALLOC(ofs);
DEALLOC(stride_);
}
void sharp_make_ecp_geom_info (int nrings, int nphi, double phi0,
int stride_lon, int stride_lat, sharp_geom_info **geom_info)
{
const double pi=3.141592653589793238462643383279502884197;
double *theta=RALLOC(double,nrings);
double *weight=RALLOC(double,nrings);
int *nph=RALLOC(int,nrings);
double *phi0_=RALLOC(double,nrings);
ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
int *stride_=RALLOC(int,nrings);
UTIL_ASSERT((nrings&1)==0,
"Even number of rings needed for equidistant grid!");
makeweights(nrings/2,weight);
for (int m=0; m<nrings; ++m)
{
theta[m] = (m+0.5)*pi/nrings;
nph[m]=nphi;
phi0_[m]=phi0;
ofs[m]=(ptrdiff_t)m*stride_lat;
stride_[m]=stride_lon;
weight[m]*=2*pi/nphi;
}
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, weight,
geom_info);
DEALLOC(theta);
DEALLOC(weight);
DEALLOC(nph);
DEALLOC(phi0_);
DEALLOC(ofs);
DEALLOC(stride_);
}

View file

@ -0,0 +1,82 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_geomhelpers.h
* SHARP helper function for the creation of grid geometries
*
* Copyright (C) 2006-2011 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_SHARP_GEOMHELPERS_H
#define PLANCK_SHARP_GEOMHELPERS_H
#include "sharp_lowlevel.h"
#ifdef __cplusplus
extern "C" {
#endif
/*! Creates a geometry information describing a HEALPix map with an
Nside parameter \a nside.
\ingroup geominfogroup */
void sharp_make_healpix_geom_info (int nside, int stride,
sharp_geom_info **geom_info);
/*! Creates a geometry information describing a HEALPix map with an
Nside parameter \a nside. \a weight contains the relative ring
weights and must have \a 2*nside entries.
\ingroup geominfogroup */
void sharp_make_weighted_healpix_geom_info (int nside, int stride,
const double *weight, sharp_geom_info **geom_info);
/*! Creates a geometry information describing a Gaussian map with \a nrings
iso-latitude rings and \a nphi pixels per ring. The azimuth of the first
pixel in each ring is 0. The index difference between two adjacent pixels
in an iso-latitude ring is \a stride_lon, the index difference between the
two start pixels in consecutive iso-latitude rings is \a stride_lat.
\ingroup geominfogroup */
void sharp_make_gauss_geom_info (int nrings, int nphi, int stride_lon,
int stride_lat, sharp_geom_info **geom_info);
/*! Creates a geometry information describing an ECP map with \a nrings
iso-latitude rings and \a nphi pixels per ring. The azimuth of the first
pixel in each ring is \a phi0 (in radians). The index difference between
two adjacent pixels in an iso-latitude ring is \a stride_lon, the index
difference between the two start pixels in consecutive iso-latitude rings
is \a stride_lat.
\note The spacing of pixel centers is equidistant in colatitude and
longitude.
\note \a nrings must be an even number.
\note The sphere is pixelized in a way that the colatitude of the first ring
is \a 0.5*(pi/nrings). There are no pixel centers at the poles.
\ingroup geominfogroup */
void sharp_make_ecp_geom_info (int nrings, int nphi, double phi0,
int stride_lon, int stride_lat, sharp_geom_info **geom_info);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -0,0 +1,57 @@
#define Tb CONCAT2(Tb,nvec)
#define Y(arg) CONCAT2(arg,nvec)
#include "sharp_core_inc.c"
#if (MAXJOB_SPECIAL<6)
#include "sharp_core_inc3.c"
#endif
#if (MAXJOB_SPECIAL>=1)
#define njobs 1
#define Z(arg) CONCAT3(arg,nvec,njobs)
#include "sharp_core_inc2.c"
#undef Z
#undef njobs
#endif
#if (MAXJOB_SPECIAL>=2)
#define njobs 2
#define Z(arg) CONCAT3(arg,nvec,njobs)
#include "sharp_core_inc2.c"
#undef Z
#undef njobs
#endif
#if (MAXJOB_SPECIAL>=3)
#define njobs 3
#define Z(arg) CONCAT3(arg,nvec,njobs)
#include "sharp_core_inc2.c"
#undef Z
#undef njobs
#endif
#if (MAXJOB_SPECIAL>=4)
#define njobs 4
#define Z(arg) CONCAT3(arg,nvec,njobs)
#include "sharp_core_inc2.c"
#undef Z
#undef njobs
#endif
#if (MAXJOB_SPECIAL>=5)
#define njobs 5
#define Z(arg) CONCAT3(arg,nvec,njobs)
#include "sharp_core_inc2.c"
#undef Z
#undef njobs
#endif
#if (MAXJOB_SPECIAL>=6)
#define njobs 6
#define Z(arg) CONCAT3(arg,nvec,njobs)
#include "sharp_core_inc2.c"
#undef Z
#undef njobs
#endif
#undef Y
#undef Tb

View file

@ -0,0 +1,66 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_internal.h
* Internally used functionality for the spherical transform library.
*
* Copyright (C) 2006-2012 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_SHARP_INTERNAL_H
#define PLANCK_SHARP_INTERNAL_H
#ifdef __cplusplus
#error This header file cannot be included from C++, only from C
#endif
#include "sharp.h"
typedef enum { FLOAT, DOUBLE } sharp_fde;
typedef struct
{
sharp_jobtype type;
int spin;
int add_output;
int nmaps, nalm;
sharp_fde fde;
void **map;
void **alm;
complex double *phase;
double *norm_l;
complex double *almtmp;
const sharp_geom_info *ginfo;
const sharp_alm_info *ainfo;
int nv;
double time;
int ntrans;
unsigned long long opcnt;
} sharp_job;
int sharp_get_nv_max (void);
int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans);
#endif

188
external/sharp/libsharp/sharp_lowlevel.h vendored Normal file
View file

@ -0,0 +1,188 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_lowlevel.h
* Low-level, portable interface for the spherical transform library.
*
* Copyright (C) 2012 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_SHARP_LOWLEVEL_H
#define PLANCK_SHARP_LOWLEVEL_H
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
/*! \internal
Helper type containing information about a single ring. */
typedef struct
{
double theta, phi0, weight, cth, sth;
ptrdiff_t ofs;
int nph, stride;
} sharp_ringinfo;
/*! \internal
Helper type containing information about a pair of rings with colatitudes
symmetric around the equator. */
typedef struct
{
sharp_ringinfo r1,r2;
} sharp_ringpair;
/*! \internal
Type holding all required information about a map geometry. */
typedef struct
{
sharp_ringpair *pair;
int npairs;
} sharp_geom_info;
/*! \defgroup almgroup Helpers for dealing with a_lm */
/*! \{ */
/*! \internal
Helper type for index calculation in a_lm arrays. */
typedef struct
{
/*! Maximum \a l index of the array */
int lmax;
/*! Number of different \a m values in this object */
int nm;
/*! Array with \a nm entries containing the individual m values */
int *mval;
/*! Array with \a nm entries containing the (hypothetical) indices of
the coefficients with quantum numbers 0,\a mval[i] */
ptrdiff_t *mvstart;
/*! Stride between a_lm and a_(l+1),m */
ptrdiff_t stride;
} sharp_alm_info;
/*! Creates an Alm data structure information from the following parameters:
\param lmax maximum \a l quantum number (>=0)
\param mmax maximum \a m quantum number (0<= \a mmax <= \a lmax)
\param stride the stride between consecutive a_lm entries
\param mstart the index of the (hypothetical) coefficient with the
quantum numbers 0,\a m. Must have \a mmax+1 entries.
\param alm_info will hold a pointer to the newly created data structure
*/
void sharp_make_alm_info (int lmax, int mmax, int stride,
const ptrdiff_t *mstart, sharp_alm_info **alm_info);
/*! Creates an Alm data structure information from the following parameters:
\param lmax maximum \a l quantum number (>=0)
\param nm number of different \a m (<=\a lmax+1)
\param stride the stride between consecutive a_lm entries
\param mval array with \a nm entries containing the individual m values
\param mvstart array with \a nm entries containing the (hypothetical)
indices of the coefficients with the quantum numbers 0,\a mval[i]
\param alm_info will hold a pointer to the newly created data structure
*/
void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval,
const ptrdiff_t *mvstart, sharp_alm_info **alm_info);
/*! Returns the index of the coefficient with quantum numbers \a l,
\a mval[mi]. */
ptrdiff_t sharp_alm_index (const sharp_alm_info *self, int l, int mi);
/*! Deallocates the a_lm info object. */
void sharp_destroy_alm_info (sharp_alm_info *info);
/*! \} */
/*! \defgroup geominfogroup Functions for dealing with geometry information */
/*! \{ */
/*! Creates a geometry information from a set of ring descriptions.
All arrays passed to this function must have \a nrings elements.
\param nrings the number of rings in the map
\param nph the number of pixels in each ring
\param ofs the index of the first pixel in each ring in the map array
\param stride the stride between consecutive pixels
\param phi0 the azimuth (in radians) of the first pixel in each ring
\param theta the colatitude (in radians) of each ring
\param weight the pixel weight to be used for the ring
\param geom_info will hold a pointer to the newly created data structure
*/
void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
const int *stride, const double *phi0, const double *theta,
const double *weight, sharp_geom_info **geom_info);
/*! Deallocates the geometry information in \a info. */
void sharp_destroy_geom_info (sharp_geom_info *info);
/*! \} */
/*! \defgroup lowlevelgroup Low-level libsharp SHT interface */
/*! \{ */
/*! Enumeration of SHARP job types. */
typedef enum { SHARP_MAP2ALM, /*!< analysis */
SHARP_ALM2MAP, /*!< synthesis */
SHARP_ALM2MAP_DERIV1 /*!< synthesis of first derivatives */
} sharp_jobtype;
/*! Performs a libsharp SHT job. The interface deliberately does not use
the C99 "complex" data type, in order to be callable from C.
\param type the type of SHT
\param spin the spin of the quantities to be transformed
\param add_output if 0, the output arrays will be overwritten,
else the result will be added to the output arrays.
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
alm[2] and alm[3] to those of the second, etc. The exact data type of \a alm
depends on the \a dp parameter.
\param map contains pointers to the maps. If \a spin==0,
map[0] points to the map of the first SHT, map[1] to that of the second
etc. If \a spin>0, map[0] and map[1] point to the maps of the first SHT,
map[2] and map[3] to those of the second, etc. The exact data type of \a map
depends on the \a dp parameter.
\param geom_info A \c sharp_geom_info object compatible with the provided
\a map arrays.
\param alm_info A \c sharp_alm_info object compatible with the provided
\a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present
exactly once.
\param ntrans the number of simultaneous SHTs
\param dp if 0, the \a alm is expected to have the type "complex float **"
and \a map is expected to have the type "float **"; otherwise the expected
types are "complex double **" and "double **", respectively.
\param nv Internally used SHT parameter. Set to 0 unless you know what you are
doing.
\param time If not NULL, the wall clock time required for this SHT
(in seconds)will be written here.
\param opcnt If not NULL, a conservative estimate of the total floating point
operation count for this SHT will be written here. */
void sharp_execute (sharp_jobtype type, int spin, int add_output, void *alm,
void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info,
int ntrans, int dp, int nv, double *time, unsigned long long *opcnt);
/*! \} */
#ifdef __cplusplus
}
#endif
#endif

300
external/sharp/libsharp/sharp_mpi.c vendored Normal file
View file

@ -0,0 +1,300 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_mpi.c
* Functionality only needed for MPI-parallel transforms
*
* Copyright (C) 2012 Max-Planck-Society
* \author Martin Reinecke
*/
#ifdef USE_MPI
#include "sharp_mpi.h"
typedef struct
{
int ntasks; /* number of tasks */
int mytask; /* own task number */
MPI_Comm comm; /* communicator to use */
int *nm; /* number of m values on every task */
int *ofs_m; /* accumulated nm */
int nmtotal; /* total number of m values (must be mmax+1) */
int *mval; /* array containing all m values of task 0, task 1 etc. */
int mmax;
int nph;
int *npair; /* number of ring pairs on every task */
int *ofs_pair; /* accumulated npair */
int npairtotal; /* total number of ring pairs */
double *theta; /* theta of first ring of every pair on task 0, task 1 etc. */
int *ispair; /* is this really a pair? */
int *almcount, *almdisp, *mapcount, *mapdisp; /* for all2all communication */
} sharp_mpi_info;
static void sharp_make_mpi_info (MPI_Comm comm, const sharp_job *job,
sharp_mpi_info *minfo)
{
minfo->comm = comm;
MPI_Comm_size (comm, &minfo->ntasks);
MPI_Comm_rank (comm, &minfo->mytask);
minfo->nm=RALLOC(int,minfo->ntasks);
MPI_Allgather ((int *)(&job->ainfo->nm),1,MPI_INT,minfo->nm,1,MPI_INT,comm);
minfo->ofs_m=RALLOC(int,minfo->ntasks+1);
minfo->ofs_m[0]=0;
for (int i=1; i<=minfo->ntasks; ++i)
minfo->ofs_m[i] = minfo->ofs_m[i-1]+minfo->nm[i-1];
minfo->nmtotal=minfo->ofs_m[minfo->ntasks];
minfo->mval=RALLOC(int,minfo->nmtotal);
MPI_Allgatherv(job->ainfo->mval, job->ainfo->nm, MPI_INT, minfo->mval,
minfo->nm, minfo->ofs_m, MPI_INT, comm);
minfo->mmax=sharp_get_mmax(minfo->mval,minfo->nmtotal);
minfo->npair=RALLOC(int,minfo->ntasks);
MPI_Allgather ((int *)(&job->ginfo->npairs), 1, MPI_INT, minfo->npair, 1,
MPI_INT, comm);
minfo->ofs_pair=RALLOC(int,minfo->ntasks+1);
minfo->ofs_pair[0]=0;
for (int i=1; i<=minfo->ntasks; ++i)
minfo->ofs_pair[i] = minfo->ofs_pair[i-1]+minfo->npair[i-1];
minfo->npairtotal=minfo->ofs_pair[minfo->ntasks];
double *theta_tmp=RALLOC(double,job->ginfo->npairs);
int *ispair_tmp=RALLOC(int,job->ginfo->npairs);
for (int i=0; i<job->ginfo->npairs; ++i)
{
theta_tmp[i]=job->ginfo->pair[i].r1.theta;
ispair_tmp[i]=job->ginfo->pair[i].r2.nph>0;
}
minfo->theta=RALLOC(double,minfo->npairtotal);
minfo->ispair=RALLOC(int,minfo->npairtotal);
MPI_Allgatherv(theta_tmp, job->ginfo->npairs, MPI_DOUBLE, minfo->theta,
minfo->npair, minfo->ofs_pair, MPI_DOUBLE, comm);
MPI_Allgatherv(ispair_tmp, job->ginfo->npairs, MPI_INT, minfo->ispair,
minfo->npair, minfo->ofs_pair, MPI_INT, comm);
DEALLOC(theta_tmp);
DEALLOC(ispair_tmp);
minfo->nph=2*job->nmaps*job->ntrans;
minfo->almcount=RALLOC(int,minfo->ntasks);
minfo->almdisp=RALLOC(int,minfo->ntasks+1);
minfo->mapcount=RALLOC(int,minfo->ntasks);
minfo->mapdisp=RALLOC(int,minfo->ntasks+1);
minfo->almdisp[0]=minfo->mapdisp[0]=0;
for (int i=0; i<minfo->ntasks; ++i)
{
minfo->almcount[i] = 2*minfo->nph*minfo->nm[minfo->mytask]*minfo->npair[i];
minfo->almdisp[i+1] = minfo->almdisp[i]+minfo->almcount[i];
minfo->mapcount[i] = 2*minfo->nph*minfo->nm[i]*minfo->npair[minfo->mytask];
minfo->mapdisp[i+1] = minfo->mapdisp[i]+minfo->mapcount[i];
}
}
static void sharp_destroy_mpi_info (sharp_mpi_info *minfo)
{
DEALLOC(minfo->nm);
DEALLOC(minfo->ofs_m);
DEALLOC(minfo->mval);
DEALLOC(minfo->npair);
DEALLOC(minfo->ofs_pair);
DEALLOC(minfo->theta);
DEALLOC(minfo->ispair);
DEALLOC(minfo->almcount);
DEALLOC(minfo->almdisp);
DEALLOC(minfo->mapcount);
DEALLOC(minfo->mapdisp);
}
static void sharp_communicate_alm2map (const sharp_mpi_info *minfo, dcmplx **ph)
{
dcmplx *phas_tmp = RALLOC(dcmplx,minfo->mapdisp[minfo->ntasks]/2);
MPI_Alltoallv (*ph,minfo->almcount,minfo->almdisp,MPI_DOUBLE,phas_tmp,
minfo->mapcount,minfo->mapdisp,MPI_DOUBLE,minfo->comm);
DEALLOC(*ph);
ALLOC(*ph,dcmplx,minfo->nph*minfo->npair[minfo->mytask]*minfo->nmtotal);
for (int task=0; task<minfo->ntasks; ++task)
for (int th=0; th<minfo->npair[minfo->mytask]; ++th)
for (int mi=0; mi<minfo->nm[task]; ++mi)
{
int m = minfo->mval[mi+minfo->ofs_m[task]];
int o1 = minfo->nph*(th*(minfo->mmax+1) + m);
int o2 = minfo->mapdisp[task]/2+minfo->nph*(mi+th*minfo->nm[task]);
for (int i=0; i<minfo->nph; ++i)
(*ph)[o1+i] = phas_tmp[o2+i];
}
DEALLOC(phas_tmp);
}
static void sharp_communicate_map2alm (const sharp_mpi_info *minfo, dcmplx **ph)
{
dcmplx *phas_tmp = RALLOC(dcmplx,minfo->mapdisp[minfo->ntasks]/2);
for (int task=0; task<minfo->ntasks; ++task)
for (int th=0; th<minfo->npair[minfo->mytask]; ++th)
for (int mi=0; mi<minfo->nm[task]; ++mi)
{
int m = minfo->mval[mi+minfo->ofs_m[task]];
int o1 = minfo->mapdisp[task]/2+minfo->nph*(mi+th*minfo->nm[task]);
int o2 = minfo->nph*(th*(minfo->mmax+1) + m);
for (int i=0; i<minfo->nph; ++i)
phas_tmp[o1+i] = (*ph)[o2+i];
}
DEALLOC(*ph);
ALLOC(*ph,dcmplx,minfo->nph*minfo->nm[minfo->mytask]*minfo->npairtotal);
MPI_Alltoallv (phas_tmp,minfo->mapcount,minfo->mapdisp,MPI_DOUBLE,
*ph,minfo->almcount,minfo->almdisp,MPI_DOUBLE,minfo->comm);
DEALLOC(phas_tmp);
}
static void alloc_phase_mpi (sharp_job *job, int nm, int ntheta,
int nmfull, int nthetafull)
{
ptrdiff_t phase_size = (job->type==SHARP_MAP2ALM) ?
(ptrdiff_t)(nmfull)*ntheta : (ptrdiff_t)(nm)*nthetafull;
job->phase=RALLOC(dcmplx,2*job->ntrans*job->nmaps*phase_size);
}
static void alm2map_comm (sharp_job *job, const sharp_mpi_info *minfo)
{
if (job->type != SHARP_MAP2ALM)
sharp_communicate_alm2map (minfo,&job->phase);
}
static void map2alm_comm (sharp_job *job, const sharp_mpi_info *minfo)
{
if (job->type == SHARP_MAP2ALM)
sharp_communicate_map2alm (minfo,&job->phase);
}
static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
{
double timer=wallTime();
int ntasks;
MPI_Comm_size(comm, &ntasks);
if (ntasks==1) /* fall back to scalar implementation */
{ sharp_execute_job (job); return; }
int lmax = job->ainfo->lmax;
job->norm_l = sharp_Ylmgen_get_norm (lmax, job->spin);
sharp_mpi_info minfo;
sharp_make_mpi_info(comm, job, &minfo);
/* clear output arrays if requested */
init_output (job);
alloc_phase_mpi (job,job->ainfo->nm,job->ginfo->npairs,minfo.mmax+1,
minfo.npairtotal);
double *cth = RALLOC(double,minfo.npairtotal),
*sth = RALLOC(double,minfo.npairtotal);
idxhelper *stmp = RALLOC(idxhelper,minfo.npairtotal);
for (int i=0; i<minfo.npairtotal; ++i)
{
cth[i] = cos(minfo.theta[i]);
sth[i] = sin(minfo.theta[i]);
stmp[i].s=sth[i];
stmp[i].i=i;
}
qsort (stmp,minfo.npairtotal,sizeof(idxhelper),idx_compare);
int *idx = RALLOC(int,minfo.npairtotal);
for (int i=0; i<minfo.npairtotal; ++i)
idx[i]=stmp[i].i;
DEALLOC(stmp);
/* map->phase where necessary */
map2phase (job, minfo.mmax, 0, job->ginfo->npairs);
map2alm_comm (job, &minfo);
#pragma omp parallel
{
sharp_job ljob = *job;
sharp_Ylmgen_C generator;
sharp_Ylmgen_init (&generator,lmax,minfo.mmax,ljob.spin);
alloc_almtmp(&ljob,lmax);
#pragma omp for schedule(dynamic,1)
for (int mi=0; mi<job->ainfo->nm; ++mi)
{
/* alm->alm_tmp where necessary */
alm2almtmp (&ljob, lmax, mi);
/* inner conversion loop */
inner_loop (&ljob, minfo.ispair, cth, sth, 0, minfo.npairtotal,
&generator, mi, idx);
/* alm_tmp->alm where necessary */
almtmp2alm (&ljob, lmax, mi);
}
sharp_Ylmgen_destroy(&generator);
dealloc_almtmp(&ljob);
#pragma omp critical
job->opcnt+=ljob.opcnt;
} /* end of parallel region */
alm2map_comm (job, &minfo);
/* phase->map where necessary */
phase2map (job, minfo.mmax, 0, job->ginfo->npairs);
DEALLOC(cth);
DEALLOC(sth);
DEALLOC(idx);
DEALLOC(job->norm_l);
dealloc_phase (job);
sharp_destroy_mpi_info(&minfo);
job->time=wallTime()-timer;
}
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
int add_output, void *alm, void *map, const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info, int ntrans, int dp, int nv, double *time,
unsigned long long *opcnt)
{
sharp_job job;
sharp_build_job_common (&job, type, spin, add_output, alm, map, geom_info,
alm_info, ntrans, dp, nv);
sharp_execute_job_mpi (&job, comm);
if (time!=NULL) *time = job.time;
if (opcnt!=NULL) *opcnt = job.opcnt;
}
#endif

85
external/sharp/libsharp/sharp_mpi.h vendored Normal file
View file

@ -0,0 +1,85 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_mpi.h
* Interface for the spherical transform library with MPI support.
*
* Copyright (C) 2011,2012 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_SHARP_MPI_H
#define PLANCK_SHARP_MPI_H
#include <mpi.h>
#include "sharp.h"
#ifdef __cplusplus
extern "C" {
#endif
/*! Performs an MPI parallel libsharp SHT job. The interface deliberately does
not use the C99 "complex" data type, in order to be callable from C.
\param comm the MPI communicator to be used for this SHT
\param type the type of SHT
\param spin the spin of the quantities to be transformed
\param add_output if 0, the output arrays will be overwritten,
else the result will be added to the output arrays.
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
alm[2] and alm[3] to those of the second, etc. The exact data type of \a alm
depends on the \a dp parameter.
\param map contains pointers to the maps. If \a spin==0,
map[0] points to the map of the first SHT, map[1] to that of the second
etc. If \a spin>0, map[0] and map[1] point to the maps of the first SHT,
map[2] and map[3] to those of the second, etc. The exact data type of \a map
depends on the \a dp parameter.
\param geom_info A \c sharp_geom_info object compatible with the provided
\a map arrays. The total map geometry is the union of all \a geom_info
objects over the participating MPI tasks.
\param alm_info A \c sharp_alm_info object compatible with the provided
\a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present
exactly once in the union of all \a alm_info objects over the participating
MPI tasks.
\param ntrans the number of simultaneous SHTs
\param dp if 0, the \a alm is expected to have the type "complex float **"
and \a map is expected to have the type "float **"; otherwise the expected
types are "complex double **" and "double **", respectively.
\param nv Internally used SHT parameter. Set to 0 unless you know what you are
doing.
\param time If not NULL, the wall clock time required for this SHT
(in seconds)will be written here.
\param opcnt If not NULL, a conservative estimate of the total floating point
operation count for this SHT will be written here. */
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
int add_output, void *alm, void *map, const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info, int ntrans, int dp, int nv, double *time,
unsigned long long *opcnt);
#ifdef __cplusplus
}
#endif
#endif

249
external/sharp/libsharp/sharp_test.c vendored Normal file
View file

@ -0,0 +1,249 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_test.c
Accuracy test for libsharp's map analysis.
This program first generates a_lm coefficients up to
a user-specified lmax (with mmax=lmax); where applicable, the
real and imaginary parts of the coefficients are uniform
random numbers of the interval [-1;1[.
Afterwards, the random a_lm are converted to a map.
This map is analyzed (optionally using an iterative scheme
with a user-supplied number of steps).
After every iteration, the code then outputs the RMS of the residual a_lm
(i.e. the difference between the current and original a_lm), divided by
the RMS of the original a_lm, as well as the maximum absolute change of any
real or imaginary part between the current and original a_lm.
This operation can be performed for several different pixelisations:
- a Gaussian with the minimal number of rings for exact analysis
and a user-defined ring resolution
- an ECP grid with the minimal number of rings for exact analysis
and a user-defined ring resolution
- a Healpix grid with a user-defined Nside parameter.
The user can specify the spin of the desired transform.
Copyright (C) 2006-2012 Max-Planck-Society
\author Martin Reinecke
*/
#include <stdio.h>
#include <string.h>
#ifdef USE_MPI
#include "mpi.h"
#endif
#include "sharp.h"
#include "sharp_geomhelpers.h"
#include "sharp_almhelpers.h"
#include "c_utils.h"
#include "sharp_announce.h"
#include "sharp_core.h"
#include "memusage.h"
typedef complex double dcmplx;
static double drand (double min, double max)
{ return min + (max-min)*rand()/(RAND_MAX+1.0); }
static void random_alm (dcmplx *alm, sharp_alm_info *helper, int spin)
{
for (int mi=0;mi<helper->nm; ++mi)
{
int m=helper->mval[mi];
for (int l=m;l<=helper->lmax; ++l)
{
if ((l<spin)&&(m<spin))
alm[sharp_alm_index(helper,l,mi)] = 0.;
else
{
double rv = drand(-1,1);
double iv = (m==0) ? 0 : drand(-1,1);
alm[sharp_alm_index(helper,l,mi)] = rv+_Complex_I*iv;
}
}
}
}
static void measure_errors (dcmplx **alm, dcmplx **alm2,
ptrdiff_t nalms, int ncomp)
{
for (int i=0; i<ncomp; ++i)
{
double sum=0, sum2=0, maxdiff=0;
for (ptrdiff_t m=0; m<nalms; ++m)
{
double x=creal(alm[i][m])-creal(alm2[i][m]),
y=cimag(alm[i][m])-cimag(alm2[i][m]);
sum+=x*x+y*y;
sum2+=creal(alm[i][m])*creal(alm[i][m])+cimag(alm[i][m])*cimag(alm[i][m]);
if (fabs(x)>maxdiff) maxdiff=fabs(x);
if (fabs(y)>maxdiff) maxdiff=fabs(y);
}
sum=sqrt(sum/nalms);
sum2=sqrt(sum2/nalms);
printf("component %i: rms %e, maxerr %e\n",i, sum/sum2, maxdiff);
}
}
static void map2alm_iter (sharp_geom_info *tinfo, double **map,
dcmplx **alm_orig, dcmplx **alm, int lmax, int mmax,
ptrdiff_t npix, ptrdiff_t nalms, int spin, int ntrans, int niter)
{
int ncomp = ntrans*((spin==0) ? 1 : 2);
sharp_alm_info *alms;
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
double time;
unsigned long long opcnt;
sharp_execute(SHARP_MAP2ALM,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,1,0,
&time,&opcnt);
printf("wall time for map2alm: %fs\n",time);
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
measure_errors(alm_orig,alm,nalms,ncomp);
for (int iter=0; iter<niter; ++iter)
{
double **map2;
ALLOC2D(map2,double,ncomp,npix);
printf ("\niteration %i:\n", iter+1);
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map2[0],tinfo,alms,ntrans,1,0,
&time,&opcnt);
printf("wall time for alm2map: %fs\n",time);
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
for (int i=0; i<ncomp; ++i)
for (ptrdiff_t m=0; m<npix; ++m)
map2[i][m] = map[i][m]-map2[i][m];
sharp_execute(SHARP_MAP2ALM,spin,1,&alm[0],&map2[0],tinfo,alms,ntrans,1,0,
&time,&opcnt);
printf("wall time for map2alm: %fs\n",time);
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
DEALLOC2D(map2);
measure_errors(alm_orig,alm,nalms,ncomp);
}
sharp_destroy_alm_info(alms);
}
static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
ptrdiff_t mmax, ptrdiff_t npix, int spin, int ntrans, int niter)
{
ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
int ncomp = ntrans*((spin==0) ? 1 : 2);
double **map;
ALLOC2D(map,double,ncomp,npix);
sharp_alm_info *alms;
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
srand(4);
dcmplx **alm;
ALLOC2D(alm,dcmplx,ncomp,nalms);
for (int i=0; i<ncomp; ++i)
random_alm(alm[i],alms,spin);
dcmplx **alm2;
ALLOC2D(alm2,dcmplx,ncomp,nalms);
double time;
unsigned long long opcnt;
printf ("\niteration 0:\n");
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,1,0,
&time,&opcnt);
printf("wall time for alm2map: %fs\n",time);
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
map2alm_iter(tinfo,map,alm,alm2,lmax,mmax,npix,nalms,spin,ntrans,niter);
DEALLOC2D(map);
DEALLOC2D(alm);
DEALLOC2D(alm2);
sharp_destroy_alm_info(alms);
}
int main(int argc, char **argv)
{
#ifdef USE_MPI
MPI_Init(NULL,NULL);
#endif
sharp_module_startup("sharp_test",argc,7,
"<healpix|ecp|gauss> <lmax> <nside|nphi> <niter> <spin> <ntrans>",1);
int lmax=atoi(argv[2]);
int niter=atoi(argv[4]);
int spin=atoi(argv[5]);
int ntrans=atoi(argv[6]);
printf("Testing map analysis accuracy.\n");
printf("lmax=%d, %d iterations, spin=%d\n", lmax, niter, spin);
sharp_geom_info *tinfo;
if (strcmp(argv[1],"gauss")==0)
{
int nrings=lmax+1;
int ppring=atoi(argv[3]);
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
printf("\nTesting Gaussian grid (%d rings, %d pixels/ring, %ld pixels)\n",
nrings,ppring,(long)npix);
sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo);
check_accuracy(tinfo,lmax,lmax,npix,spin,ntrans,niter);
sharp_destroy_geom_info(tinfo);
}
else if (strcmp(argv[1],"ecp")==0)
{
int nrings=2*lmax+2;
int ppring=atoi(argv[3]);
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
printf("\nTesting ECP grid (%d rings, %d pixels/ring, %ld pixels)\n",
nrings,ppring,(long)npix);
sharp_make_ecp_geom_info (nrings, ppring, 0., 1, ppring, &tinfo);
check_accuracy(tinfo,lmax,lmax,npix,spin,ntrans,niter);
sharp_destroy_geom_info(tinfo);
}
else if (strcmp(argv[1],"healpix")==0)
{
int nside=atoi(argv[3]);
if (nside<1) nside=1;
ptrdiff_t npix=12*(ptrdiff_t)nside*nside;
printf("\nTesting Healpix grid (nside=%d, %ld pixels)\n",
nside,(long)npix);
sharp_make_healpix_geom_info (nside, 1, &tinfo);
check_accuracy(tinfo,lmax,lmax,npix,spin,ntrans,niter);
sharp_destroy_geom_info(tinfo);
}
else
UTIL_FAIL("unknown grid geometry");
printf("\nMemory high water mark: %.2f MB\n",VmHWM()/(1<<20));
#ifdef USE_MPI
MPI_Finalize();
#endif
return 0;
}

359
external/sharp/libsharp/sharp_test_mpi.c vendored Normal file
View file

@ -0,0 +1,359 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_test_mpi.c
Accuracy test for libsharp's map analysis with MPI support.
This program first generates a_lm coefficients up to
a user-specified lmax (with mmax=lmax); where applicable, the
real and imaginary parts of the coefficients are uniform
random numbers of the interval [-1;1[.
Afterwards, the random a_lm are converted to a map.
This map is analyzed (optionally using an iterative scheme
with a user-supplied number of steps).
After every iteration, the code then outputs the RMS of the residual a_lm
(i.e. the difference between the current and original a_lm), divided by
the RMS of the original a_lm, as well as the maximum absolute change of any
real or imaginary part between the current and original a_lm.
This operation can be performed for several different pixelisations:
- a Gaussian with the minimal number of rings for exact analysis
and a user-defined ring resolution
- an ECP grid with the minimal number of rings for exact analysis
and a user-defined ring resolution
- a Healpix grid with a user-defined Nside parameter.
The user can specify the spin of the desired transform.
Copyright (C) 2006-2012 Max-Planck-Society
\author Martin Reinecke
*/
#ifdef USE_MPI
#include <stdio.h>
#include <string.h>
#include "sharp_mpi.h"
#include "sharp_geomhelpers.h"
#include "sharp_almhelpers.h"
#include "c_utils.h"
#include "walltime_c.h"
#include "sharp_announce.h"
#include "sharp_core.h"
typedef complex double dcmplx;
int ntasks, mytask;
static unsigned long long totalops (unsigned long long val)
{
unsigned long long tmp;
MPI_Allreduce (&val, &tmp,1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
return tmp;
}
static double maxTime (double val)
{
double tmp;
MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
return tmp;
}
static double drand (double min, double max)
{ return min + (max-min)*rand()/(RAND_MAX+1.0); }
static ptrdiff_t get_nalms(const sharp_alm_info *ainfo)
{
ptrdiff_t res=0;
for (int i=0; i<ainfo->nm; ++i)
res += ainfo->lmax-ainfo->mval[i]+1;
return res;
}
static ptrdiff_t get_npix(const sharp_geom_info *ginfo)
{
ptrdiff_t res=0;
for (int i=0; i<ginfo->npairs; ++i)
{
res += ginfo->pair[i].r1.nph;
if (ginfo->pair[i].r2.nph>0) res += ginfo->pair[i].r2.nph;
}
return res;
}
static void reduce_alm_info(sharp_alm_info *ainfo)
{
int nmnew=0;
ptrdiff_t ofs = 0;
for (int i=mytask; i<ainfo->nm; i+=ntasks,++nmnew)
{
ainfo->mval[nmnew]=ainfo->mval[i];
ainfo->mvstart[nmnew]=ofs-ainfo->mval[nmnew];
ofs+=ainfo->lmax-ainfo->mval[nmnew]+1;
}
ainfo->nm=nmnew;
}
static void reduce_geom_info(sharp_geom_info *ginfo)
{
int npairsnew=0;
ptrdiff_t ofs = 0;
for (int i=mytask; i<ginfo->npairs; i+=ntasks,++npairsnew)
{
ginfo->pair[npairsnew]=ginfo->pair[i];
ginfo->pair[npairsnew].r1.ofs=ofs;
ofs+=ginfo->pair[npairsnew].r1.nph;
ginfo->pair[npairsnew].r2.ofs=ofs;
if (ginfo->pair[npairsnew].r2.nph>0) ofs+=ginfo->pair[npairsnew].r2.nph;
}
ginfo->npairs=npairsnew;
}
static void random_alm (dcmplx *alm, sharp_alm_info *helper, int spin)
{
static int cnt=0;
++cnt;
for (int mi=0;mi<helper->nm; ++mi)
{
int m=helper->mval[mi];
srand(1234567*cnt+8912*m);
for (int l=m;l<=helper->lmax; ++l)
{
if ((l<spin)&&(m<spin))
alm[sharp_alm_index(helper,l,mi)] = 0.;
else
{
double rv = drand(-1,1);
double iv = (m==0) ? 0 : drand(-1,1);
alm[sharp_alm_index(helper,l,mi)] = rv+_Complex_I*iv;
}
}
}
}
static void measure_errors (dcmplx **alm, dcmplx **alm2,
const sharp_alm_info *ainfo, int ncomp)
{
long nalms=get_nalms(ainfo), nalms_tot;
MPI_Allreduce(&nalms,&nalms_tot,1,MPI_LONG,MPI_SUM,MPI_COMM_WORLD);
for (int i=0; i<ncomp; ++i)
{
double sum=0, sum2=0, maxdiff=0, sumtot, sum2tot, maxdifftot;
for (int mi=0; mi<ainfo->nm; ++mi)
{
int m=ainfo->mval[mi];
for (int l=m; l<=ainfo->lmax; ++l)
{
ptrdiff_t idx=sharp_alm_index(ainfo,l,mi);
double x=creal(alm[i][idx])-creal(alm2[i][idx]),
y=cimag(alm[i][idx])-cimag(alm2[i][idx]);
sum+=x*x+y*y;
sum2+=creal(alm[i][idx])*creal(alm[i][idx])
+cimag(alm[i][idx])*cimag(alm[i][idx]);
if (fabs(x)>maxdiff) maxdiff=fabs(x);
if (fabs(y)>maxdiff) maxdiff=fabs(y);
}
}
MPI_Allreduce(&sum,&sumtot,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
MPI_Allreduce(&sum2,&sum2tot,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
MPI_Allreduce(&maxdiff,&maxdifftot,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
sumtot=sqrt(sumtot/nalms_tot);
sum2tot=sqrt(sum2tot/nalms_tot);
if (mytask==0)
printf("component %i: rms %e, maxerr %e\n",i, sumtot/sum2tot, maxdifftot);
}
}
static void map2alm_iter (sharp_geom_info *tinfo, double **map,
dcmplx **alm_orig, dcmplx **alm, int lmax, int mmax,
ptrdiff_t npix, int spin, int ntrans, int niter)
{
int ncomp = ntrans*((spin==0) ? 1 : 2);
sharp_alm_info *alms;
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
reduce_alm_info(alms);
double jtime;
unsigned long long jopcnt;
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,0,&alm[0],&map[0],
tinfo,alms,ntrans,1,0,&jtime,&jopcnt);
unsigned long long opcnt=totalops(jopcnt);
double timer=maxTime(jtime);
if (mytask==0) printf("wall time for map2alm: %fs\n",timer);
if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/timer);
measure_errors(alm_orig,alm,alms,ncomp);
for (int iter=0; iter<niter; ++iter)
{
double **map2;
ALLOC2D(map2,double,ncomp,npix);
if (mytask==0) printf ("\niteration %i:\n", iter+1);
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,0,&alm[0],&map2[0],
tinfo,alms,ntrans,1,0,&jtime,&jopcnt);
opcnt=totalops(jopcnt);
timer=maxTime(jtime);
if (mytask==0) printf("wall time for alm2map: %fs\n",timer);
if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/timer);
for (int i=0; i<ncomp; ++i)
for (ptrdiff_t m=0; m<npix; ++m)
map2[i][m] = map[i][m]-map2[i][m];
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,1,&alm[0],&map2[0],
tinfo,alms,ntrans,1,0,&jtime,&jopcnt);
opcnt=totalops(jopcnt);
timer=maxTime(jtime);
if (mytask==0) printf("wall time for map2alm: %fs\n",wallTime()-timer);
if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/timer);
DEALLOC2D(map2);
measure_errors(alm_orig,alm,alms,ncomp);
}
sharp_destroy_alm_info(alms);
}
static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
ptrdiff_t mmax, ptrdiff_t npix, int spin, int ntrans, int niter)
{
int ncomp = ntrans*((spin==0) ? 1 : 2);
double **map;
ALLOC2D(map,double,ncomp,npix);
double jtime;
unsigned long long jopcnt;
sharp_alm_info *alms;
ptrdiff_t nalms;
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
reduce_alm_info(alms);
nalms=get_nalms(alms);
dcmplx **alm;
ALLOC2D(alm,dcmplx,ncomp,nalms);
srand(4);
for (int i=0; i<ncomp; ++i)
random_alm(alm[i],alms,spin);
dcmplx **alm2;
ALLOC2D(alm2,dcmplx,ncomp,nalms);
if (mytask==0) printf ("\niteration 0:\n");
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,0,&alm[0],&map[0],
tinfo,alms,ntrans,1,0,&jtime,&jopcnt);
unsigned long long opcnt=totalops(jopcnt);
double timer=maxTime(jtime);
if (mytask==0) printf("wall time for alm2map: %fs\n",timer);
if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/timer);
map2alm_iter(tinfo, map, alm, alm2, lmax, mmax, npix, spin, ntrans, niter);
DEALLOC2D(map);
DEALLOC2D(alm);
DEALLOC2D(alm2);
sharp_destroy_alm_info(alms);
}
int main(int argc, char **argv)
{
MPI_Init(NULL,NULL);
MPI_Comm_size(MPI_COMM_WORLD,&ntasks);
MPI_Comm_rank(MPI_COMM_WORLD,&mytask);
sharp_module_startup("sharp_test_mpi",argc,7,
"<healpix|ecp|gauss> <lmax> <nside|nphi> <niter> <spin> <ntrans>",
mytask==0);
int lmax=atoi(argv[2]);
int niter=atoi(argv[4]);
int spin=atoi(argv[5]);
int ntrans=atoi(argv[6]);
if (mytask==0)
{
printf("Testing map analysis accuracy.\n");
printf("lmax=%d, %d iterations, spin=%d\n", lmax, niter, spin);
}
sharp_geom_info *tinfo;
if (strcmp(argv[1],"gauss")==0)
{
int nrings=lmax+1;
int ppring=atoi(argv[3]);
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
if (mytask==0)
printf("\nTesting Gaussian grid (%d rings, %d pixels/ring, %ld pixels)\n",
nrings,ppring,(long)npix);
sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo);
reduce_geom_info(tinfo);
npix=get_npix(tinfo);
check_accuracy(tinfo,lmax,lmax,npix,spin,ntrans,niter);
sharp_destroy_geom_info(tinfo);
}
else if (strcmp(argv[1],"ecp")==0)
{
int nrings=2*lmax+2;
int ppring=atoi(argv[3]);
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
if (mytask==0)
printf("\nTesting ECP grid (%d rings, %d pixels/ring, %ld pixels)\n",
nrings,ppring,(long)npix);
sharp_make_ecp_geom_info (nrings, ppring, 0., 1, ppring, &tinfo);
reduce_geom_info(tinfo);
npix=get_npix(tinfo);
check_accuracy(tinfo,lmax,lmax,npix,spin,ntrans,niter);
sharp_destroy_geom_info(tinfo);
}
else if (strcmp(argv[1],"healpix")==0)
{
int nside=atoi(argv[3]);
if (nside<1) nside=1;
ptrdiff_t npix=12*(ptrdiff_t)nside*nside;
if (mytask==0)
printf("\nTesting Healpix grid (nside=%d, %ld pixels)\n",
nside,(long)npix);
sharp_make_healpix_geom_info (nside, 1, &tinfo);
reduce_geom_info(tinfo);
npix=get_npix(tinfo);
check_accuracy(tinfo,lmax,lmax,npix,spin,ntrans,niter);
sharp_destroy_geom_info(tinfo);
}
else
UTIL_FAIL("unknown grid geometry");
MPI_Finalize();
return 0;
}
#else
#include "c_utils.h"
int main(void)
{ UTIL_FAIL("MPI support not enabled."); return 1; }
#endif

View file

@ -0,0 +1,174 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/* \file sharp_vecsupport.h
* Convenience functions for vector arithmetics
*
* Copyright (C) 2012 Max-Planck-Society
* Author: Martin Reinecke
*/
#ifndef SHARP_VECSUPPORT_H
#define SHARP_VECSUPPORT_H
#include <math.h>
#include "sharp_vecutil.h"
typedef double Ts;
#if (VLEN==1)
typedef double Tv;
#define vadd(a,b) ((a)+(b))
#define vaddeq(a,b) ((a)+=(b))
#define vsub(a,b) ((a)-(b))
#define vsubeq(a,b) ((a)-=(b))
#define vmul(a,b) ((a)*(b))
#define vmuleq(a,b) ((a)*=(b))
#define vfmaeq(a,b,c) ((a)+=(b)*(c))
#define vfmseq(a,b,c) ((a)-=(b)*(c))
#define vfmaaeq(a,b,c,d,e) ((a)+=(b)*(c)+(d)*(e))
#define vfmaseq(a,b,c,d,e) ((a)+=(b)*(c)-(d)*(e))
#define vneg(a) (-(a))
#define vload(a) (a)
#define vabs(a) fabs(a)
#define vsqrt(a) sqrt(a)
#define vlt(a,b) (((a)<(b))?1.:0.)
#define vgt(a,b) (((a)>(b))?1.:0.)
#define vge(a,b) (((a)>=(b))?1.:0.)
#define vne(a,b) (((a)!=(b))?1.:0.)
#define vand(a,b) ((((a)*(b))!=0.)?1.:0.)
#define vor(a,b) ((((a)+(b))!=0.)?1.:0.)
static inline Tv vmin (Tv a, Tv b) { return (a<b) ? a : b; }
static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; }
#define vanyTrue(a) ((a)!=0.)
#define vallTrue(a) ((a)!=0.)
#define vblend(m,a,b) (((m)!=0.) ? (a) : (b))
#define vzero 0.
#define vone 1.
#endif
#if (VLEN==2)
#include <emmintrin.h>
#if defined (__SSE3__)
#include <pmmintrin.h>
#endif
#if defined (__SSE4_1__)
#include <smmintrin.h>
#endif
typedef __m128d Tv;
#define vadd(a,b) _mm_add_pd(a,b)
#define vaddeq(a,b) a=_mm_add_pd(a,b)
#define vsub(a,b) _mm_sub_pd(a,b)
#define vsubeq(a,b) a=_mm_sub_pd(a,b)
#define vmul(a,b) _mm_mul_pd(a,b)
#define vmuleq(a,b) a=_mm_mul_pd(a,b)
#define vfmaeq(a,b,c) a=_mm_add_pd(a,_mm_mul_pd(b,c))
#define vfmseq(a,b,c) a=_mm_sub_pd(a,_mm_mul_pd(b,c))
#define vfmaaeq(a,b,c,d,e) \
a=_mm_add_pd(a,_mm_add_pd(_mm_mul_pd(b,c),_mm_mul_pd(d,e)))
#define vfmaseq(a,b,c,d,e) \
a=_mm_add_pd(a,_mm_sub_pd(_mm_mul_pd(b,c),_mm_mul_pd(d,e)))
#define vneg(a) _mm_xor_pd(_mm_set1_pd(-0.),a)
#define vload(a) _mm_set1_pd(a)
#define vabs(a) _mm_andnot_pd(_mm_set1_pd(-0.),a)
#define vsqrt(a) _mm_sqrt_pd(a)
#define vlt(a,b) _mm_cmplt_pd(a,b)
#define vgt(a,b) _mm_cmpgt_pd(a,b)
#define vge(a,b) _mm_cmpge_pd(a,b)
#define vne(a,b) _mm_cmpneq_pd(a,b)
#define vand(a,b) _mm_and_pd(a,b)
#define vor(a,b) _mm_or_pd(a,b)
#define vmin(a,b) _mm_min_pd(a,b)
#define vmax(a,b) _mm_max_pd(a,b);
#define vanyTrue(a) (_mm_movemask_pd(a)!=0)
#define vallTrue(a) (_mm_movemask_pd(a)==3)
#if defined(__SSE4_1__)
#define vblend(m,a,b) _mm_blendv_pd(b,a,m)
#else
static inline Tv vblend(Tv m, Tv a, Tv b)
{ return _mm_or_pd(_mm_and_pd(a,m),_mm_andnot_pd(m,b)); }
#endif
#define vzero _mm_setzero_pd()
#define vone _mm_set1_pd(1.)
#endif
#if (VLEN==4)
#include <immintrin.h>
#ifdef __FMA4__
#include <x86intrin.h>
#endif
typedef __m256d Tv;
#define vadd(a,b) _mm256_add_pd(a,b)
#define vaddeq(a,b) a=_mm256_add_pd(a,b)
#define vsub(a,b) _mm256_sub_pd(a,b)
#define vsubeq(a,b) a=_mm256_sub_pd(a,b)
#define vmul(a,b) _mm256_mul_pd(a,b)
#define vmuleq(a,b) a=_mm256_mul_pd(a,b)
#ifdef __FMA4__
#define vfmaeq(a,b,c) a=_mm256_macc_pd(b,c,a)
#define vfmseq(a,b,c) a=_mm256_nmacc_pd(b,c,a)
#define vfmaaeq(a,b,c,d,e) a=_mm256_macc_pd(d,e,_mm256_macc_pd(b,c,a))
#define vfmaseq(a,b,c,d,e) a=_mm256_nmacc_pd(d,e,_mm256_macc_pd(b,c,a))
#else
#define vfmaeq(a,b,c) a=_mm256_add_pd(a,_mm256_mul_pd(b,c))
#define vfmseq(a,b,c) a=_mm256_sub_pd(a,_mm256_mul_pd(b,c))
#define vfmaaeq(a,b,c,d,e) \
a=_mm256_add_pd(a,_mm256_add_pd(_mm256_mul_pd(b,c),_mm256_mul_pd(d,e)))
#define vfmaseq(a,b,c,d,e) \
a=_mm256_add_pd(a,_mm256_sub_pd(_mm256_mul_pd(b,c),_mm256_mul_pd(d,e)))
#endif
#define vneg(a) _mm256_xor_pd(_mm256_set1_pd(-0.),a)
#define vload(a) _mm256_set1_pd(a)
#define vabs(a) _mm256_andnot_pd(_mm256_set1_pd(-0.),a)
#define vsqrt(a) _mm256_sqrt_pd(a)
#define vlt(a,b) _mm256_cmp_pd(a,b,_CMP_LT_OQ)
#define vgt(a,b) _mm256_cmp_pd(a,b,_CMP_GT_OQ)
#define vge(a,b) _mm256_cmp_pd(a,b,_CMP_GE_OQ)
#define vne(a,b) _mm256_cmp_pd(a,b,_CMP_NEQ_OQ)
#define vand(a,b) _mm256_and_pd(a,b)
#define vor(a,b) _mm256_or_pd(a,b)
#define vmin(a,b) _mm256_min_pd(a,b)
#define vmax(a,b) _mm256_max_pd(a,b)
#define vanyTrue(a) (_mm256_movemask_pd(a)!=0)
#define vallTrue(a) (_mm256_movemask_pd(a)==15)
#define vblend(m,a,b) _mm256_blendv_pd(b,a,m)
#define vzero _mm256_setzero_pd()
#define vone _mm256_set1_pd(1.)
#endif
#endif

43
external/sharp/libsharp/sharp_vecutil.h vendored Normal file
View file

@ -0,0 +1,43 @@
/*
* This file is part of libc_utils.
*
* libc_utils is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libc_utils is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libc_utils; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_vecutil.h
* Functionality related to vector instruction support
*
* Copyright (C) 2012 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef SHARP_VECUTIL_H
#define SHARP_VECUTIL_H
#if (defined (__AVX__))
#define VLEN 4
#elif (defined (__SSE2__))
#define VLEN 2
#else
#define VLEN 1
#endif
#endif

230
external/sharp/libsharp/sharp_ylmgen_c.c vendored Normal file
View file

@ -0,0 +1,230 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*
* Helper code for efficient calculation of Y_lm(theta,phi=0)
*
* Copyright (C) 2005-2012 Max-Planck-Society
* Author: Martin Reinecke
*/
#include <math.h>
#include <stdlib.h>
#include "sharp_ylmgen_c.h"
#include "c_utils.h"
static inline void normalize (double *val, int *scale, double xfmax)
{
while (fabs(*val)>xfmax) { *val*=sharp_fsmall; ++*scale; }
if (*val!=0.)
while (fabs(*val)<xfmax*sharp_fsmall) { *val*=sharp_fbig; --*scale; }
}
void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin)
{
const double inv_sqrt4pi = 0.2820947917738781434740397257803862929220;
gen->lmax = l_max;
gen->mmax = m_max;
UTIL_ASSERT(spin>=0,"incorrect spin");
gen->s = spin;
UTIL_ASSERT((sharp_minscale<=0)&&(sharp_maxscale>0),
"bad value for min/maxscale");
gen->cf=RALLOC(double,sharp_maxscale-sharp_minscale+1);
gen->cf[-sharp_minscale]=1.;
for (int m=-sharp_minscale-1; m>=0; --m)
gen->cf[m]=gen->cf[m+1]*sharp_fsmall;
for (int m=-sharp_minscale+1; m<(sharp_maxscale-sharp_minscale+1); ++m)
gen->cf[m]=gen->cf[m-1]*sharp_fbig;
gen->m = -1;
if (spin==0)
{
gen->rf = RALLOC(sharp_ylmgen_dbl2,gen->lmax+1);
gen->mfac = RALLOC(double,gen->mmax+1);
gen->mfac[0] = inv_sqrt4pi;
for (int m=1; m<=gen->mmax; ++m)
gen->mfac[m] = gen->mfac[m-1]*sqrt((2*m+1.)/(2*m));
gen->root = RALLOC(double,2*gen->lmax+5);
gen->iroot = RALLOC(double,2*gen->lmax+5);
for (int m=0; m<2*gen->lmax+5; ++m)
{
gen->root[m] = sqrt(m);
gen->iroot[m] = (m==0) ? 0. : 1./gen->root[m];
}
}
else
{
gen->m=gen->mlo=gen->mhi=-1234567890;
ALLOC(gen->fx,sharp_ylmgen_dbl3,gen->lmax+2);
for (int m=0; m<gen->lmax+2; ++m)
gen->fx[m].f[0]=gen->fx[m].f[1]=gen->fx[m].f[2]=0.;
ALLOC(gen->inv,double,gen->lmax+1);
gen->inv[0]=0;
for (int m=1; m<gen->lmax+1; ++m) gen->inv[m]=1./m;
ALLOC(gen->flm1,double,2*gen->lmax+1);
ALLOC(gen->flm2,double,2*gen->lmax+1);
for (int m=0; m<2*gen->lmax+1; ++m)
{
gen->flm1[m] = sqrt(1./(m+1.));
gen->flm2[m] = sqrt(m/(m+1.));
}
ALLOC(gen->prefac,double,gen->mmax+1);
ALLOC(gen->fscale,int,gen->mmax+1);
double *fac = RALLOC(double,2*gen->lmax+1);
int *facscale = RALLOC(int,2*gen->lmax+1);
fac[0]=1; facscale[0]=0;
for (int m=1; m<2*gen->lmax+1; ++m)
{
fac[m]=fac[m-1]*sqrt(m);
facscale[m]=facscale[m-1];
normalize(&fac[m],&facscale[m],sharp_fbighalf);
}
for (int m=0; m<=gen->mmax; ++m)
{
int mlo=gen->s, mhi=m;
if (mhi<mlo) SWAP(mhi,mlo,int);
double tfac=fac[2*mhi]/fac[mhi+mlo];
int tscale=facscale[2*mhi]-facscale[mhi+mlo];
normalize(&tfac,&tscale,sharp_fbighalf);
tfac/=fac[mhi-mlo];
tscale-=facscale[mhi-mlo];
normalize(&tfac,&tscale,sharp_fbighalf);
gen->prefac[m]=tfac;
gen->fscale[m]=tscale;
}
DEALLOC(fac);
DEALLOC(facscale);
}
}
void sharp_Ylmgen_destroy (sharp_Ylmgen_C *gen)
{
DEALLOC(gen->cf);
if (gen->s==0)
{
DEALLOC(gen->rf);
DEALLOC(gen->mfac);
DEALLOC(gen->root);
DEALLOC(gen->iroot);
}
else
{
DEALLOC(gen->fx);
DEALLOC(gen->prefac);
DEALLOC(gen->fscale);
DEALLOC(gen->flm1);
DEALLOC(gen->flm2);
DEALLOC(gen->inv);
}
}
void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m)
{
if (m==gen->m) return;
UTIL_ASSERT(m>=0,"incorrect m");
gen->m = m;
if (gen->s==0)
{
gen->rf[m].f[0] = gen->root[2*m+3];
gen->rf[m].f[1] = 0.;
for (int l=m+1; l<=gen->lmax; ++l)
{
double tmp=gen->root[2*l+3]*gen->iroot[l+1+m]*gen->iroot[l+1-m];
gen->rf[l].f[0] = tmp*gen->root[2*l+1];
gen->rf[l].f[1] = tmp*gen->root[l+m]*gen->root[l-m]*gen->iroot[2*l-1];
}
}
else
{
int mlo_=m, mhi_=gen->s;
if (mhi_<mlo_) SWAP(mhi_,mlo_,int);
int ms_similar = ((gen->mhi==mhi_) && (gen->mlo==mlo_));
gen->mlo = mlo_; gen->mhi = mhi_;
if (!ms_similar)
{
for (int l=gen->mhi; l<gen->lmax; ++l)
{
double t = gen->flm1[l+gen->m]*gen->flm1[l-gen->m]
*gen->flm1[l+gen->s]*gen->flm1[l-gen->s];
double lt = 2*l+1;
double l1 = l+1;
gen->fx[l+1].f[0]=l1*lt*t;
gen->fx[l+1].f[1]=gen->m*gen->s*gen->inv[l]*gen->inv[l+1];
t = gen->flm2[l+gen->m]*gen->flm2[l-gen->m]
*gen->flm2[l+gen->s]*gen->flm2[l-gen->s];
gen->fx[l+1].f[2]=t*l1*gen->inv[l];
}
}
gen->preMinus_p = gen->preMinus_m = 0;
if (gen->mhi==gen->m)
{
gen->cosPow = gen->mhi+gen->s; gen->sinPow = gen->mhi-gen->s;
gen->preMinus_p = gen->preMinus_m = ((gen->mhi-gen->s)&1);
}
else
{
gen->cosPow = gen->mhi+gen->m; gen->sinPow = gen->mhi-gen->m;
gen->preMinus_m = ((gen->mhi+gen->m)&1);
}
}
}
double *sharp_Ylmgen_get_norm (int lmax, int spin)
{
const double pi = 3.141592653589793238462643383279502884197;
double *res=RALLOC(double,lmax+1);
/* sign convention for H=1 (LensPix paper) */
#if 1
double spinsign = (spin>0) ? -1.0 : 1.0;
#else
double spinsign = 1.0;
#endif
if (spin==0)
{
for (int l=0; l<=lmax; ++l)
res[l]=1.;
return res;
}
spinsign = (spin&1) ? -spinsign : spinsign;
for (int l=0; l<=lmax; ++l)
res[l] = (l<spin) ? 0. : spinsign*0.5*sqrt((2*l+1)/(4*pi));
return res;
}
double *sharp_Ylmgen_get_d1norm (int lmax)
{
const double pi = 3.141592653589793238462643383279502884197;
double *res=RALLOC(double,lmax+1);
for (int l=0; l<=lmax; ++l)
res[l] = (l<1) ? 0. : 0.5*sqrt(l*(l+1.)*(2*l+1.)/(4*pi));
return res;
}

100
external/sharp/libsharp/sharp_ylmgen_c.h vendored Normal file
View file

@ -0,0 +1,100 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sharp_ylmgen_c.h
* Code for efficient calculation of Y_lm(phi=0,theta)
*
* Copyright (C) 2005-2012 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef SHARP_YLMGEN_C_H
#define SHARP_YLMGEN_C_H
#ifdef __cplusplus
extern "C" {
#endif
enum { sharp_minscale=0, sharp_limscale=1, sharp_maxscale=1 };
static const double sharp_fbig=0x1p+800,sharp_fsmall=0x1p-800;
static const double sharp_ftol=0x1p-60;
static const double sharp_fbighalf=0x1p+400;
typedef struct { double f[2]; } sharp_ylmgen_dbl2;
typedef struct { double f[3]; } sharp_ylmgen_dbl3;
typedef struct
{
/* for public use; immutable during lifetime */
int lmax, mmax, s;
double *cf;
/* for public use; will typically change after call to Ylmgen_prepare() */
int m;
/* used if s==0 */
double *mfac;
sharp_ylmgen_dbl2 *rf;
/* used if s!=0 */
int sinPow, cosPow, preMinus_p, preMinus_m;
double *prefac;
int *fscale;
sharp_ylmgen_dbl3 *fx;
/* internal usage only */
/* used if s==0 */
double *root, *iroot;
/* used if s!=0 */
double *flm1, *flm2, *inv;
int mlo, mhi;
} sharp_Ylmgen_C;
/*! Creates a generator which will calculate helper data for Y_lm calculation
up to \a l=l_max and \a m=m_max. */
void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin);
/*! Deallocates a generator previously initialised by Ylmgen_init(). */
void sharp_Ylmgen_destroy (sharp_Ylmgen_C *gen);
/*! Prepares the object for the calculation at \a m. */
void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m);
/*! Returns a pointer to an array with \a lmax+1 entries containing
normalisation factors that must be applied to Y_lm values computed for
\a spin. The array must be deallocated (using free()) by the user. */
double *sharp_Ylmgen_get_norm (int lmax, int spin);
/*! Returns a pointer to an array with \a lmax+1 entries containing
normalisation factors that must be applied to Y_lm values computed for
first derivatives. The array must be deallocated (using free()) by the
user. */
double *sharp_Ylmgen_get_d1norm (int lmax);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -1,5 +1,9 @@
SET(tolink ${GSL_LIBRARIES} CosmoTool)
include_directories(${CMAKE_SOURCE_DIR}/src ${NETCDF_INCLUDE_PATH} ${GSL_INCLUDE_PATH})
SET(tolink ${GSL_LIBRARIES} CosmoTool ${CosmoTool_LIBS})
include_directories(${CMAKE_SOURCE_DIR}/src ${FFTW3_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIRS} ${NETCDF_INCLUDE_PATH} ${GSL_INCLUDE_PATH})
IF(SHARP_INCLUDE_PATH)
include_directories(BEFORE ${SHARP_INCLUDE_PATH})
ENDIF(SHARP_INCLUDE_PATH)
add_executable(testBQueue testBQueue.cpp)
target_link_libraries(testBQueue ${tolink})
@ -35,4 +39,19 @@ add_executable(testEskow testEskow.cpp)
target_link_libraries(testEskow ${tolink})
add_executable(testAlgo testAlgo.cpp)
target_link_libraries(testAlgo ${tolink})
target_link_libraries(testAlgo ${tolink})
add_executable(testBSP testBSP.cpp)
target_link_libraries(testBSP ${tolink})
if (FFTW3_FOUND AND EIGEN3_FOUND)
add_executable(test_fft_calls test_fft_calls.cpp)
target_link_libraries(test_fft_calls ${tolink} ${FFTW3_LIBRARIES})
endif (FFTW3_FOUND AND EIGEN3_FOUND)
if (SHARP_LIBRARY AND SHARP_INCLUDE_PATH AND EIGEN3_FOUND)
include_directories(${SHARP_INCLUDE_PATH})
add_executable(test_healpix_calls test_healpix_calls.cpp)
target_link_libraries(test_healpix_calls ${tolink} ${SHARP_LIBRARIES})
set_target_properties(test_healpix_calls PROPERTIES COMPILE_FLAGS ${OpenMP_CXX_FLAGS} LINK_FLAGS ${OpenMP_CXX_FLAGS})
endif (SHARP_LIBRARY AND SHARP_INCLUDE_PATH AND EIGEN3_FOUND)

View file

@ -1,3 +1,4 @@
#if 0
#include "bsp_simple.hpp"
int main(int argc, char **argv)
@ -10,3 +11,5 @@ int main(int argc, char **argv)
return 0;
}
#endif
int main() {}

View file

@ -50,6 +50,8 @@ int main(int argc, char **argv)
chol.cholesky(M, M.N, norm_E);
cout << "norm_E = " << norm_E << endl;
for (int i = 0; i < M.N; i++)
{
for (int j = 0; j < M.N; j++)

View file

@ -9,7 +9,7 @@ using namespace std;
int main () {
char* filename = "lss_read_hdf5_chk_0000";
const char* filename = "lss_read_hdf5_chk_0000";
SimuData* data = CosmoTool::loadFlashMulti(filename, 0, 0);

20
sample/test_fft_calls.cpp Normal file
View file

@ -0,0 +1,20 @@
#include <iostream>
#include "fourier/euclidian.hpp"
using namespace CosmoTool;
using namespace std;
int main()
{
EuclidianFourierTransform_2d<double> dft(128,128,1.0,1.0);
double volume = 128*128;
dft.realSpace().eigen().setRandom();
dft.analysis();
cout << "Map dot-product = " << dft.realSpace().dot_product(dft.realSpace()) << endl;
cout << "Fourier dot-product = " << dft.fourierSpace().dot_product(dft.fourierSpace()).real()*volume << endl;
dft.synthesis();
cout << "Resynthesis dot-product = " << dft.realSpace().dot_product(dft.realSpace()) << endl;
return 0;
}

View file

@ -0,0 +1,20 @@
#include <iostream>
#include "fourier/healpix.hpp"
using namespace CosmoTool;
using namespace std;
int main()
{
HealpixFourierTransform<double> dft(128,3*128,3*128, 40);
long Npix = dft.realSpace().size();
dft.realSpace().eigen().setRandom();
dft.analysis();
cout << "Map dot-product = " << dft.realSpace().dot_product(dft.realSpace()) << endl;
cout << "Fourier dot-product = " << dft.fourierSpace().dot_product(dft.fourierSpace()).real()*Npix/(4*M_PI) << endl;
dft.synthesis();
cout << "Resynthesis dot-product = " << dft.realSpace().dot_product(dft.realSpace()) << endl;
return 0;
}

View file

@ -6,11 +6,19 @@ SET(CosmoTool_SRCS
loadRamses.cpp
octTree.cpp
powerSpectrum.cpp
yorick.cpp
miniargs.cpp
growthFactor.cpp
)
IF(FOUND_NETCDF3)
SET(CosmoTool_SRCS ${CosmoTool_SRCS} yorick_nc3.cpp)
ELSE(FOUND_NETCDF3)
IF(FOUND_NETCDF4)
SET(CosmoTool_SRCS ${CosmoTool_SRCS} yorick_nc4.cpp)
ENDIF(FOUND_NETCDF4)
ENDIF(FOUND_NETCDF3)
if (HDF5_FOUND)
set(CosmoTool_SRCS ${CosmoTool_SRCS}
h5_readFlash.cpp
@ -46,14 +54,16 @@ SET(CosmoTool_SRCS ${CosmoTool_SRCS}
growthFactor.hpp
)
include_directories(${GSL_INCLUDE_PATH} ${NETCDF_INCLUDE_PATH})
include_directories(${GSL_INCLUDE_PATH} ${NETCDF_INCLUDE_PATH} ${NETCDFCPP_INCLUDE_PATH} ${CMAKE_BINARY_DIR}/src)
set(CosmoTool_LIBS ${NETCDF_LIBRARY} ${NETCDFCPP_LIBRARY} ${GSL_LIBRARIES})
set(CosmoTool_LIBS ${NETCDFCPP_LIBRARY} ${NETCDF_LIBRARY} ${GSL_LIBRARIES})
if (HDF5_FOUND)
set(CosmoTool_LIBS ${CosmoTool_LIBS} ${HDF5_CXX_LIBRARIES} ${HDF5_LIBRARIES})
include_directories(${HDF5_INCLUDE_DIRS})
endif (HDF5_FOUND)
set(CosmoTool_LIBS ${CosmoTool_LIBS} PARENT_SCOPE)
if (BUILD_SHARED_LIBS)
add_library(CosmoTool SHARED ${CosmoTool_SRCS})
target_link_libraries(CosmoTool ${CosmoTool_LIBS})
@ -62,7 +72,7 @@ if (BUILD_SHARED_LIBS)
add_library(CosmoTool_static STATIC ${CosmoTool_SRCS})
endif(BUILD_STATIC_LIBS)
else (BUILD_SHARED_LIBS)
add_library(CosmoTool STATIC ${CosmoTool_LIBS})
add_library(CosmoTool STATIC ${CosmoTool_SRCS})
endif (BUILD_SHARED_LIBS)
install(TARGETS CosmoTool

View file

@ -154,6 +154,8 @@ namespace CosmoTool
f.data = data;
insert(f);
}
bool inside(const typename space_t::coord_t& p) const;
};
};

View file

@ -98,7 +98,19 @@ namespace CosmoTool
*(*i) = current;
allocated.push(current);
}
}
template<typename T, typename PType, int N>
bool BSP<T,PType,N>::inside(const typename space_t::coord_t& p) const
{
node_t *current = root;
do
{
}
while();
current
}
};

136
src/fourier/base_types.hpp Normal file
View file

@ -0,0 +1,136 @@
#ifndef __BASE_FOURIER_TYPES_HPP
#define __BASE_FOURIER_TYPES_HPP
#include <gsl/gsl_rng.h>
#include <boost/shared_ptr.hpp>
#include <string>
#include <Eigen/Dense>
#include <complex>
#include <exception>
namespace CosmoTool
{
class IncompatibleMap: virtual std::exception {};
template<typename T> class FourierMap;
template<typename T>
class SpectrumFunction
{
protected:
SpectrumFunction() {}
public:
typedef Eigen::Array<T, 1, Eigen::Dynamic> VecType;
typedef Eigen::Map<VecType, Eigen::Aligned> MapType;
typedef Eigen::Map<VecType const, Eigen::Aligned> ConstMapType;
typedef FourierMap<std::complex<T> > FourierMapType;
virtual boost::shared_ptr<FourierMapType>
newRandomFourier(gsl_rng *rng, const FourierMapType& like_map) const = 0;
virtual void mul(FourierMap<std::complex<T> >& m) const = 0;
};
template<typename T>
class FourierMap
{
protected:
FourierMap() {}
public:
typedef Eigen::Array<T, 1, Eigen::Dynamic> VecType;
typedef Eigen::Map<VecType, Eigen::Aligned> MapType;
typedef Eigen::Map<VecType const, Eigen::Aligned> ConstMapType;
virtual ~FourierMap() {}
virtual const T* data() const = 0;
virtual T* data() = 0;
virtual long size() const = 0;
MapType eigen()
{
return MapType(data(), size());
}
ConstMapType eigen() const
{
return ConstMapType(data(), size());
}
void sqrt()
{
MapType m = eigen();
m = m.sqrt();
}
void scale(const T& factor)
{
MapType m(data(), size());
m *= factor;
}
void scale(const FourierMap<T> *map2)
{
assert(size() == map2->size());
MapType m(data(), size());
MapType m2(map2->data(), map2->size());
m *= m2;
}
void add(const T& factor)
{
eigen() += factor;
}
void add(const FourierMap<T> *map2)
{
assert(size() == map2->size());
MapType m(data(), size());
MapType m2(map2->data(), map2->size());
eigen() += map2->eigen();
}
virtual FourierMap<T> *copy() const
{
FourierMap<T> *m = this->mimick();
m->eigen() = this->eigen();
return m;
}
virtual T dot_product(const FourierMap<T>& second) const
throw(std::bad_cast) = 0;
virtual FourierMap<T> *mimick() const = 0;
};
template<typename T>
class FourierTransform
{
protected:
FourierTransform() {}
public:
virtual ~FourierTransform() { }
virtual const FourierMap<std::complex<T> >& fourierSpace() const = 0;
virtual FourierMap<std::complex<T> >& fourierSpace() = 0;
virtual const FourierMap<T>& realSpace() const = 0;
virtual FourierMap<T>& realSpace() = 0;
virtual FourierTransform<T> *mimick() const = 0;
virtual void analysis() = 0;
virtual void synthesis() = 0;
virtual void analysis_conjugate() = 0;
virtual void synthesis_conjugate() = 0;
};
};
#endif

407
src/fourier/euclidian.hpp Normal file
View file

@ -0,0 +1,407 @@
#ifndef __COSMOTOOL_FOURIER_EUCLIDIAN_HPP
#define __COSMOTOOL_FOURIER_EUCLIDIAN_HPP
#include <boost/function.hpp>
#include <vector>
#include <boost/shared_ptr.hpp>
#include <gsl/gsl_randist.h>
#include "base_types.hpp"
#include "fft/fftw_calls.hpp"
#include "../algo.hpp"
namespace CosmoTool
{
template<typename T>
class EuclidianSpectrum_1D: public SpectrumFunction<T>
{
public:
typedef boost::function1<T, T> Function;
protected:
Function f;
public:
typedef typename SpectrumFunction<T>::FourierMapType FourierMapType;
typedef boost::shared_ptr<FourierMapType> ptr_map;
EuclidianSpectrum_1D(Function P)
: f(P)
{
}
ptr_map newRandomFourier(gsl_rng *rng, const FourierMapType& like_map) const;
void mul(FourierMap<std::complex<T> >& m) const;
};
template<typename T>
class EuclidianFourierMapBase: public FourierMap<T>
{
public:
typedef std::vector<int> DimArray;
private:
boost::shared_ptr<T> m_data;
DimArray m_dims;
long m_size;
public:
EuclidianFourierMapBase(boost::shared_ptr<T> indata, const DimArray& indims)
{
m_data = indata;
m_dims = indims;
m_size = 1;
for (int i = 0; i < m_dims.size(); i++)
m_size *= m_dims[i];
}
virtual ~EuclidianFourierMapBase()
{
}
const DimArray& getDims() const { return m_dims; }
virtual const T *data() const { return m_data.get(); }
virtual T *data() { return m_data.get(); }
virtual long size() const { return m_size; }
virtual FourierMap<T> *copy() const
{
FourierMap<T> *m = this->mimick();
m->eigen() = this->eigen();
return m;
}
};
template<typename T>
class EuclidianFourierMapReal: public EuclidianFourierMapBase<T>
{
public:
typedef typename EuclidianFourierMapBase<T>::DimArray DimArray;
EuclidianFourierMapReal(boost::shared_ptr<T> indata, const DimArray& indims)
: EuclidianFourierMapBase<T>(indata, indims)
{}
virtual FourierMap<T> *mimick() const
{
return new EuclidianFourierMapReal<T>(
boost::shared_ptr<T>((T *)fftw_malloc(sizeof(T)*this->size()),
std::ptr_fun(fftw_free)),
this->getDims());
}
virtual T dot_product(const FourierMap<T>& other) const
throw(std::bad_cast)
{
const EuclidianFourierMapReal<T>& m2 = dynamic_cast<const EuclidianFourierMapReal<T>&>(other);
if (this->size() != m2.size())
throw std::bad_cast();
return (this->eigen()*m2.eigen()).sum();
}
};
template<typename T>
class EuclidianFourierMapComplex: public EuclidianFourierMapBase<std::complex<T> >
{
protected:
typedef boost::shared_ptr<std::complex<T> > ptr_t;
std::vector<double> delta_k;
long plane_size;
public:
typedef typename EuclidianFourierMapBase<std::complex<T> >::DimArray DimArray;
EuclidianFourierMapComplex(ptr_t indata,
const DimArray& indims,
const std::vector<double>& dk)
: EuclidianFourierMapBase<std::complex<T> >(indata, indims), delta_k(dk)
{
assert(dk.size() == indims.size());
plane_size = 1;
for (int q = 1; q < indims.size(); q++)
plane_size *= indims[q];
}
virtual FourierMap<std::complex<T> > *mimick() const
{
return
new EuclidianFourierMapComplex<T>(
ptr_t((std::complex<T> *)
fftw_malloc(sizeof(std::complex<T>)*this->size()),
std::ptr_fun(fftw_free)),
this->getDims(),
this->delta_k);
}
template<typename Array>
double get_K(const Array& ik)
{
const DimArray& dims = this->getDims();
assert(ik.size() == dims.size());
double k2 = 0;
for (int q = 0; q < ik.size(); q++)
{
int dk = ik;
if (dk > dims[q]/2)
dk = dk - dims[q];
k2 += CosmoTool::square(delta_k[q]*dk);
}
return std::sqrt(k2);
}
double get_K(long p)
{
const DimArray& dims = this->getDims();
DimArray d(delta_k.size());
for (int q = 0; q < d.size(); q++)
{
d[q] = p%dims[q];
p = (p-d[q])/dims[q];
}
return get_K(d);
}
virtual std::complex<T> dot_product(const FourierMap<std::complex<T> >& other) const
throw(std::bad_cast)
{
const EuclidianFourierMapComplex<T>& m2 = dynamic_cast<const EuclidianFourierMapComplex<T>&>(other);
if (this->size() != m2.size())
throw std::bad_cast();
const std::complex<T> *d1 = this->data();
const std::complex<T> *d2 = m2.data();
const DimArray& dims = this->getDims();
int N0 = dims[0];
std::complex<T> result = 0;
for (long q0 = 1; q0 < N0-1; q0++)
{
for (long p = 0; p < plane_size; p++)
{
long idx = q0+N0*p;
assert(idx < this->size());
result += 2*(conj(d1[idx]) * d2[idx]).real();
}
}
for (long p = 0; p < plane_size; p++)
{
long q0 = N0*p, q1 = (p+1)*N0-1;
result += conj(d1[q0]) * d2[q0];
result += conj(d1[q1]) * d2[q1];
}
return result;
}
};
template<typename T>
class EuclidianFourierTransform: public FourierTransform<T>
{
public:
typedef typename EuclidianFourierMapBase<T>::DimArray DimArray;
private:
typedef FFTW_Calls<T> calls;
EuclidianFourierMapReal<T> *realMap;
EuclidianFourierMapComplex<T> *fourierMap;
typename calls::plan_type m_analysis, m_synthesis;
double volume;
long N;
DimArray m_dims, m_dims_hc;
std::vector<double> m_L;
public:
EuclidianFourierTransform(const DimArray& dims, const std::vector<double>& L)
{
assert(L.size() == dims.size());
std::vector<double> dk(L.size());
m_dims = dims;
m_dims_hc = dims;
m_dims_hc[0] = dims[0]/2+1;
m_L = L;
N = 1;
volume = 1;
for (int i = 0; i < dims.size(); i++)
{
N *= dims[i];
volume *= L[i];
dk[i] = 2*M_PI/L[i];
}
realMap = new EuclidianFourierMapReal<T>(
boost::shared_ptr<T>(calls::alloc_real(N),
std::ptr_fun(calls::free)),
m_dims);
fourierMap = new EuclidianFourierMapComplex<T>(
boost::shared_ptr<std::complex<T> >((std::complex<T>*)calls::alloc_complex(N),
std::ptr_fun(calls::free)),
m_dims_hc, dk);
m_analysis = calls::plan_dft_r2c(dims.size(), &dims[0],
realMap->data(), (typename calls::complex_type *)fourierMap->data(),
FFTW_MEASURE);
m_synthesis = calls::plan_dft_c2r(dims.size(), &dims[0],
(typename calls::complex_type *)fourierMap->data(), realMap->data(),
FFTW_MEASURE);
}
virtual ~EuclidianFourierTransform()
{
delete realMap;
delete fourierMap;
calls::destroy_plan(m_synthesis);
calls::destroy_plan(m_analysis);
}
void synthesis()
{
calls::execute(m_synthesis);
realMap->scale(1/volume);
}
void analysis()
{
calls::execute(m_analysis);
fourierMap->scale(volume/N);
}
void synthesis_conjugate()
{
calls::execute(m_analysis);
fourierMap->scale(1/volume);
}
void analysis_conjugate()
{
calls::execute(m_synthesis);
realMap->scale(volume/N);
}
const FourierMap<std::complex<T> >& fourierSpace() const
{
return *fourierMap;
}
FourierMap<std::complex<T> >& fourierSpace()
{
return *fourierMap;
}
const FourierMap<T>& realSpace() const
{
return *realMap;
}
FourierMap<T>& realSpace()
{
return *realMap;
}
FourierTransform<T> *mimick() const
{
return new EuclidianFourierTransform(m_dims, m_L);
}
};
template<typename T>
class EuclidianFourierTransform_2d: public EuclidianFourierTransform<T>
{
private:
template<typename T2>
static std::vector<T2> make_2d_vector(T2 a, T2 b)
{
T2 arr[2] = { a, b};
return std::vector<T2>(&arr[0], &arr[2]);
}
public:
EuclidianFourierTransform_2d(int Nx, int Ny, double Lx, double Ly)
: EuclidianFourierTransform<T>(make_2d_vector<int>(Nx, Ny), make_2d_vector<double>(Lx, Ly))
{
}
virtual ~EuclidianFourierTransform_2d() {}
};
template<typename T>
class EuclidianFourierTransform_3d: public EuclidianFourierTransform<T>
{
private:
template<typename T2>
static std::vector<T2> make_3d_vector(T2 a, T2 b, T2 c)
{
T2 arr[2] = { a, b, c};
return std::vector<T2>(&arr[0], &arr[3]);
}
public:
EuclidianFourierTransform_3d(int Nx, int Ny, int Nz, double Lx, double Ly, double Lz)
: EuclidianFourierTransform<T>(make_3d_vector<int>(Nx, Ny, Nz), make_3d_vector<double>(Lx, Ly, Lz))
{
}
virtual ~EuclidianFourierTransform_3d() {}
};
template<typename T>
typename EuclidianSpectrum_1D<T>::ptr_map
EuclidianSpectrum_1D<T>::newRandomFourier(gsl_rng *rng, const FourierMapType& like_map) const
{
typedef EuclidianFourierMapComplex<T> MapT;
typedef typename MapT::DimArray DimArray;
MapT& m_c = dynamic_cast<MapT&>(like_map);
MapT *rand_map = m_c.mimick();
std::complex<T> *d = rand_map->data();
long idx;
const DimArray& dims = rand_map->getDims();
long plane_size;
for (long p = 1; p < m_c.size(); p++)
{
double A_k = std::sqrt(0.5*f(rand_map->get_K(p)));
d[p] = std::complex<T>(gsl_ran_gaussian(rng, A_k),
gsl_ran_gaussian(rng, A_k));
}
// Generate the mean value
d[0] = std::complex<T>(std::sqrt(f(0)), 0);
// Correct the Nyquist plane
idx = dims[0]-1; // Stick to the last element of the first dimension
// 1D is special case
if (dims.size() == 1)
{
d[idx] = std::complex<T>(d[idx].real() + d[idx].imag(), 0);
return boost::shared_ptr<EuclidianSpectrum_1D<T>::FourierMapType>(rand_map);
}
plane_size = 1;
for (int q = 1; q < dims.size(); q++)
{
plane_size *= dims[q];
}
for (long p = 1; p < plane_size/2; p++)
{
long q = (p+1)*dims[0]-1;
long q2 = (plane_size-p+1)*dims[0]-1;
assert(q < plane_size*dims[0]);
assert(q2 < plane_size*dims[0]);
d[q] = conj(d[q2]);
}
long q = dims[0];
d[q] = std::complex<T>(d[q].real() + d[q].imag());
}
template<typename T>
void EuclidianSpectrum_1D<T>::mul(FourierMap<std::complex<T> >& m) const
{
EuclidianFourierMapComplex<T>& m_c = dynamic_cast<EuclidianFourierMapComplex<T>&>(m);
std::complex<T> *d = m.data();
for (long p = 0; p < m_c.size(); p++)
d[p] *= f(m.get_K(p));
}
};
#endif

View file

@ -0,0 +1,75 @@
#ifndef __FFTW_UNIFIED_CALLS_HPP
#define __FFTW_UNIFIED_CALLS_HPP
#include <fftw3.h>
namespace CosmoTool
{
static inline void init_fftw_wisdom()
{
fftw_import_system_wisdom();
fftw_import_wisdom_from_filename("fft_wisdom");
}
static inline void save_fftw_wisdom()
{
fftw_export_wisdom_to_filename("fft_wisdom");
}
template<typename T> class FFTW_Calls {};
#define FFTW_CALLS_BASE(rtype, prefix) \
template<> \
class FFTW_Calls<rtype> { \
public: \
typedef rtype real_type; \
typedef prefix ## _complex complex_type; \
typedef prefix ## _plan plan_type; \
\
static complex_type *alloc_complex(int N) { return prefix ## _alloc_complex(N); } \
static real_type *alloc_real(int N) { return prefix ## _alloc_real(N); } \
static void free(void *p) { fftw_free(p); } \
\
static void execute(plan_type p) { prefix ## _execute(p); } \
static plan_type plan_dft_r2c_2d(int Nx, int Ny, \
real_type *in, complex_type *out, \
unsigned flags) \
{ \
return prefix ## _plan_dft_r2c_2d(Nx, Ny, in, out, \
flags); \
} \
static plan_type plan_dft_c2r_2d(int Nx, int Ny, \
complex_type *in, real_type *out, \
unsigned flags) \
{ \
return prefix ## _plan_dft_c2r_2d(Nx, Ny, in, out, \
flags); \
} \
static plan_type plan_dft_r2c_3d(int Nx, int Ny, int Nz, \
real_type *in, complex_type *out, \
unsigned flags) \
{ \
return prefix ## _plan_dft_r2c_3d(Nx, Ny, Nz, in, out, flags); \
} \
static plan_type plan_dft_r2c(int rank, const int *n, real_type *in, \
complex_type *out, unsigned flags) \
{ \
return prefix ## _plan_dft_r2c(rank, n, in, out, flags); \
} \
static plan_type plan_dft_c2r(int rank, const int *n, complex_type *in, \
real_type *out, unsigned flags) \
{ \
return prefix ## _plan_dft_c2r(rank, n, in, out, flags); \
} \
static void destroy_plan(plan_type plan) { prefix ## _destroy_plan(plan); } \
}
FFTW_CALLS_BASE(double, fftw);
FFTW_CALLS_BASE(float, fftwf);
};
#endif

268
src/fourier/healpix.hpp Normal file
View file

@ -0,0 +1,268 @@
#ifndef __COSMOTOOL_FOURIER_HEALPIX_HPP
#define __COSMOTOOL_FOURIER_HEALPIX_HPP
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <cmath>
#include <vector>
#include <boost/bind.hpp>
#include <boost/shared_ptr.hpp>
#include <exception>
#include "base_types.hpp"
#include <sharp_lowlevel.h>
#include <sharp_geomhelpers.h>
#include <sharp_almhelpers.h>
namespace CosmoTool
{
template<typename T>
class HealpixSpectrum: public SpectrumFunction<T>
{
protected:
std::vector<T> cls;
public:
typedef typename SpectrumFunction<T>::FourierMapType FourierMapType;
typedef boost::shared_ptr<FourierMapType> ptr_map;
HealpixSpectrum(long Lmax)
: cls(Lmax+1) {}
T *data() { return &cls[0]; }
const T *data() const { return &cls[0]; }
long size() const { return cls.size(); }
ptr_map newRandomFourier(gsl_rng *rng, const FourierMapType& like_map) const = 0;
};
template<typename T>
class HealpixFourierMap: public FourierMap<T>
{
private:
T *m_data;
long Npix, m_Nside;
Eigen::aligned_allocator<T> alloc;
public:
HealpixFourierMap(long nSide)
: Npix(12*nSide*nSide), m_Nside(nSide)
{
m_data = alloc.allocate(Npix);
}
virtual ~HealpixFourierMap()
{
alloc.deallocate(m_data, Npix);
}
long Nside() const { return m_Nside; }
virtual const T* data() const { return m_data; }
virtual T *data() { return m_data; }
virtual long size() const { return Npix; }
virtual T dot_product(const FourierMap<T>& other) const
throw(std::bad_cast)
{
typedef typename FourierMap<T>::MapType MapType;
const HealpixFourierMap<T>& mfm = dynamic_cast<const HealpixFourierMap<T>&>(other);
if (Npix != mfm.size())
throw std::bad_cast();
MapType m1(m_data, Npix);
MapType m2(mfm.m_data, mfm.Npix);
return (m1*m2).sum();
}
virtual FourierMap<T> *mimick() const
{
return new HealpixFourierMap<T>(m_Nside);
}
};
template<typename T>
class HealpixFourierALM: public FourierMap<std::complex<T> >
{
private:
std::complex<T> *alms;
long m_size;
long Lmax_, Mmax_, TVal_;
Eigen::aligned_allocator<std::complex<T> > alloc;
public:
typedef unsigned long LType;
LType Lmax() const { return Lmax_; }
LType Mmax() const { return Mmax_; }
LType Num_Alms() const
{
return ((Mmax_+1)*(Mmax_+2))/2 + (Mmax_+1)*(Lmax_-Mmax_);
}
LType index_l0(LType m) const
{
return ((m*(TVal_-m))/2);
}
LType index(LType l, LType m) const
{
return index_l0(m) + l;
}
HealpixFourierALM(LType lmax, LType mmax)
: Lmax_(lmax), Mmax_(mmax), TVal_(2*lmax+1)
{
m_size = Num_Alms();
alms = alloc.allocate(m_size);
}
virtual ~HealpixFourierALM()
{
alloc.deallocate(alms, m_size);
}
virtual const std::complex<T>* data() const { return alms; }
virtual std::complex<T> * data() { return alms;}
virtual long size() const { return m_size; }
virtual FourierMap<std::complex<T> > *mimick() const
{
return new HealpixFourierALM<T>(Lmax_, Mmax_);
}
virtual std::complex<T> dot_product(const FourierMap<std::complex<T> >& other) const
throw(std::bad_cast)
{
const HealpixFourierALM<T>& mfm = dynamic_cast<const HealpixFourierALM<T>&>(other);
typedef typename FourierMap<std::complex<T> >::MapType MapType;
std::complex<T> S;
if (m_size != mfm.m_size)
throw std::bad_cast();
MapType m1(alms, m_size);
MapType m2(mfm.alms, mfm.m_size);
S = (m1.block(0,0,1,Lmax_+1).conjugate() * m2.block(0,0,1,Lmax_+1)).sum();
S += std::complex<T>(2,0)*(m1.block(0,1+Lmax_,1,m_size-1-Lmax_).conjugate() * m2.block(0,1+Lmax_,1,m_size-1-Lmax_)).sum();
return S;
}
};
template<typename T> struct HealpixJobHelper__ {};
template<> struct HealpixJobHelper__<double>
{ enum {val=1}; };
template<> struct HealpixJobHelper__<float>
{ enum {val=0}; };
template<typename T>
class HealpixFourierTransform: public FourierTransform<T>
{
private:
sharp_alm_info *ainfo;
sharp_geom_info *ginfo;
HealpixFourierMap<T> realMap;
HealpixFourierALM<T> fourierMap;
int m_iterate;
public:
HealpixFourierTransform(long nSide, long Lmax, long Mmax, int iterate = 0)
: realMap(nSide), fourierMap(Lmax, Mmax), ainfo(0), ginfo(0), m_iterate(iterate)
{
sharp_make_healpix_geom_info (nSide, 1, &ginfo);
sharp_make_triangular_alm_info (Lmax, Mmax, 1, &ainfo);
}
virtual ~HealpixFourierTransform()
{
sharp_destroy_geom_info(ginfo);
sharp_destroy_alm_info(ainfo);
}
virtual const FourierMap<std::complex<T> >& fourierSpace() const { return fourierMap; }
virtual FourierMap<std::complex<T> >& fourierSpace() { return fourierMap; }
virtual const FourierMap<T>& realSpace() const { return realMap; }
virtual FourierMap<T>& realSpace() { return realMap; }
virtual FourierTransform<T> *mimick() const
{
return new HealpixFourierTransform<T>(realMap.Nside(), fourierMap.Lmax(), fourierMap.Mmax());
}
virtual void analysis()
{
void *aptr=reinterpret_cast<void *>(fourierMap.data()), *mptr=reinterpret_cast<void *>(realMap.data());
sharp_execute (SHARP_MAP2ALM, 0, 0, &aptr, &mptr, ginfo, ainfo, 1,
HealpixJobHelper__<T>::val,0,0,0);
for (int i = 0; i < m_iterate; i++)
{
HealpixFourierMap<T> tmp_map(realMap.Nside());
void *tmp_ptr=reinterpret_cast<void *>(tmp_map.data());
typename HealpixFourierMap<T>::MapType m0 = tmp_map.eigen();
typename HealpixFourierMap<T>::MapType m1 = realMap.eigen();
sharp_execute (SHARP_ALM2MAP, 0, 0, &aptr, &tmp_ptr, ginfo, ainfo, 1,
HealpixJobHelper__<T>::val,0,0,0);
m0 = m1 - m0;
sharp_execute (SHARP_MAP2ALM, 0, 1, &aptr, &tmp_ptr, ginfo, ainfo, 1,
HealpixJobHelper__<T>::val,0,0,0);
}
}
virtual void synthesis()
{
void *aptr=reinterpret_cast<void *>(fourierMap.data()), *mptr=reinterpret_cast<void *>(realMap.data());
sharp_execute (SHARP_ALM2MAP, 0, 0, &aptr, &mptr, ginfo, ainfo, 1,
HealpixJobHelper__<T>::val,0,0,0);
}
virtual void analysis_conjugate()
{
synthesis();
realMap.scale(4*M_PI/realMap.size());
}
virtual void synthesis_conjugate()
{
analysis();
fourierMap.scale(realMap.size()/(4*M_PI));
}
};
template<typename T>
typename HealpixSpectrum<T>::ptr_map
HealpixSpectrum<T>::newRandomFourier(gsl_rng *rng, const FourierMapType& like_map) const
{
const HealpixFourierALM<T>& alms = dynamic_cast<const HealpixFourierALM<T>&>(like_map);
HealpixFourierALM<T> *new_alms;
ptr_map r(new_alms = new HealpixFourierALM<T>(alms.Lmax(), alms.Mmax()));
long lmaxGen = std::min(cls.size()-1, alms.Lmax());
std::complex<T> *new_data = new_alms->data();
for (long l = 0; l < lmaxGen; l++)
{
double Al = std::sqrt(cls[l]);
new_data[alms.index(l,0)] = gsl_ran_gaussian(rng, Al);
Al *= M_SQRT1_2;
for (long m = 1; m < alms.Mmax(); m++)
{
std::complex<T>& c = new_data[alms.index(l,m)];
c.real() = gsl_ran_gaussian(rng, Al);
c.imag() = gsl_ran_gaussian(rng, Al);
}
}
return r;
}
};
#endif

View file

@ -256,7 +256,7 @@ SimuData *CosmoTool::loadGadgetMulti(const char *fname, int id, int loadflags, i
}
catch (const InvalidUnformattedAccess& e)
{
cerr << "Invalid formatted while reading ID" << endl;
cerr << "Invalid unformatted access while reading ID" << endl;
delete f;
delete data;
return 0;

View file

@ -1,5 +1,11 @@
#include "ctool_netcdf_ver.hpp"
#include "config.hpp"
#ifdef NETCDFCPP4
#include <netcdf>
using namespace netCDF
#else
#include <netcdfcpp.h>
#endif
#include <fstream>
#include "yorick.hpp"
#include <assert.h>

237
src/yorick_nc4.cpp Normal file
View file

@ -0,0 +1,237 @@
#include "ctool_netcdf_ver.hpp"
#include "config.hpp"
#include <netcdf>
using namespace netCDF;
#include <fstream>
#include "yorick.hpp"
#include <assert.h>
using namespace CosmoTool;
using namespace std;
class NetCDF_handle
{
public:
NcFile *outFile;
NcVar curVar;
vector<size_t> curPos;
vector<size_t> counts;
vector<NcDim> dimList;
uint32_t rank;
NetCDF_handle(NcFile *f, NcVar v, vector<NcDim>& dimList, uint32_t rank);
virtual ~NetCDF_handle();
};
NetCDF_handle::NetCDF_handle(NcFile *f, NcVar v, vector<NcDim>& dimList, uint32_t rank)
{
this->outFile = f;
this->curVar = v;
this->dimList = dimList;
this->rank = rank;
this->counts.resize(rank);
this->curPos.resize(rank);
for (long i = 0; i < rank; i++)
this->curPos[i] = 0;
for (long i = 0; i < rank; i++)
this->counts[i] = 1;
}
NetCDF_handle::~NetCDF_handle()
{
delete outFile;
}
template<typename T>
class InputGenCDF: public NetCDF_handle, public ProgressiveInputImpl<T>
{
public:
InputGenCDF(NcFile *f, NcVar v, vector<NcDim>& dimList, uint32_t rank)
: NetCDF_handle(f,v,dimList,rank)
{}
virtual ~InputGenCDF() {}
virtual T read()
{
T a;
curVar.getVar(curPos, counts, &a);
curPos[rank-1]++;
for (long i = rank-1; i >= 1; i--)
{
if (curPos[i] == dimList[i].getSize())
{
curPos[i-1]++;
curPos[i] = 0;
}
}
return a;
}
virtual void seek(uint32_t *pos)
{
for (long i = rank-1; i >= 0; i--)
curPos[i] = pos[rank-1-i];
}
};
template<typename T>
class OutputGenCDF: public NetCDF_handle, public ProgressiveOutputImpl<T>
{
public:
OutputGenCDF(NcFile *f, NcVar v, vector<NcDim>& dimList, uint32_t rank)
: NetCDF_handle(f,v,dimList,rank)
{}
virtual ~OutputGenCDF() {}
virtual void put(T a)
{
curVar.putVar(curPos, counts, &a);
curPos[rank-1]++;
for (long i = rank-1; i >= 1; i--)
{
if (curPos[i] == dimList[i].getSize())
{
curPos[i-1]++;
curPos[i] = 0;
}
}
}
};
template<typename T> NcType& get_NetCDF_type();
#define IMPL_TYPE(T,ncT) \
template<> \
NcType& get_NetCDF_type<T>() \
{ \
return ncT; \
}
IMPL_TYPE(int,ncInt);
IMPL_TYPE(unsigned int,ncInt);
IMPL_TYPE(double,ncDouble);
IMPL_TYPE(float,ncFloat);
namespace CosmoTool {
template<typename T>
ProgressiveOutput<T>
ProgressiveOutput<T>::saveArrayProgressive(const std::string& fname, uint32_t *dimList,
uint32_t rank)
{
NcFile *f = new NcFile(fname, NcFile::replace);
vector<NcDim> dimArray;
for (uint32_t i = 0; i < rank; i++)
{
char dimName[255];
sprintf(dimName, "dim%d", i);
dimArray.push_back(f->addDim(dimName, dimList[rank-1-i]));
}
NcVar v = f->addVar("array", get_NetCDF_type<T>(), dimArray);
vector<NcDim> ldimList;
for (uint32_t i = 0; i < rank; i++)
ldimList.push_back(dimArray[rank-1-i]);
OutputGenCDF<T> *impl = new OutputGenCDF<T>(f, v, ldimList, rank);
return ProgressiveOutput<T>(impl);
}
template<typename T>
ProgressiveInput<T>
ProgressiveInput<T>::loadArrayProgressive(const std::string& fname, uint32_t *&dimList,
uint32_t& rank)
{
NcFile *f = new NcFile(fname, NcFile::read);
NcVar v = f->getVar("array");
rank = v.getDimCount();
vector<NcDim> vdimlist = v.getDims();
dimList = new uint32_t[rank];
for (uint32_t i = 0; i < rank; i++)
{
dimList[rank-i-1] = vdimlist[i].getSize();
}
InputGenCDF<T> *impl = new InputGenCDF<T>(f, v, vdimlist, rank);
return ProgressiveInput<T>(impl);
}
template<typename T>
void saveArray(const std::string& fname,
T *array, uint32_t *dimList, uint32_t rank)
{
NcFile f(fname.c_str(), NcFile::replace);
vector<NcDim> dimArray;
for (uint32_t i = 0; i < rank; i++)
{
char dimName[255];
sprintf(dimName, "dim%d", i);
dimArray.push_back(f.addDim(dimName, dimList[i]));
}
NcVar v = f.addVar("array", get_NetCDF_type<T>(), dimArray);
v.putVar(array);
}
template<typename T>
void loadArray(const std::string& fname,
T*&array, uint32_t *&dimList, uint32_t& rank)
throw (NoSuchFileException)
{
NcFile f(fname.c_str(), NcFile::read);
//if (!f.is_valid())
// throw NoSuchFileException(fname);
NcVar v = f.getVar("array");
vector<NcDim> dims = v.getDims();
rank = v.getDimCount();
uint32_t fullSize = 1;
dimList = new uint32_t[rank];
for (int i = 0; i < rank; i++)
{
dimList[i] = dims[i].getSize();
fullSize *= dimList[i];
}
if (fullSize != 0) {
array = new T[fullSize];
v.getVar(array);
}
}
template class ProgressiveInput<int>;
template class ProgressiveInput<float>;
template class ProgressiveInput<double>;
template class ProgressiveOutput<int>;
template class ProgressiveOutput<float>;
template class ProgressiveOutput<double>;
template void loadArray<int>(const std::string& fname,
int*& array, uint32_t *&dimList, uint32_t& rank);
template void loadArray<float>(const std::string& fname,
float*& array, uint32_t *&dimList, uint32_t& rank);
template void loadArray<double>(const std::string& fname,
double*& array, uint32_t *&dimList, uint32_t& rank);
template void saveArray<int>(const std::string& fname,
int *array, uint32_t *dimList, uint32_t rank);
template void saveArray<float>(const std::string& fname,
float *array, uint32_t *dimList, uint32_t rank);
template void saveArray<double>(const std::string& fname,
double *array, uint32_t *dimList, uint32_t rank);
}