Updated libsharp to commit 0787838ab3ec8afc0c28b98479a321ffba388980
This commit is contained in:
parent
a933430c60
commit
23aa450a77
49
external/sharp/Makefile
vendored
49
external/sharp/Makefile
vendored
@ -19,6 +19,8 @@ include libfftpack/planck.make
|
||||
include libsharp/planck.make
|
||||
include docsrc/planck.make
|
||||
|
||||
CYTHON_MODULES=python/libsharp/libsharp.so $(if $(MPI_CFLAGS), python/libsharp/libsharp_mpi.so)
|
||||
|
||||
$(all_lib): %: | $(LIBDIR)_mkdir
|
||||
@echo "# creating library $*"
|
||||
$(ARCREATE) $@ $^
|
||||
@ -38,18 +40,39 @@ hdrcopy: | $(INCDIR)_mkdir
|
||||
$(notdir $(all_cbin)) : % : $(BINDIR)/%
|
||||
|
||||
test: compile_all
|
||||
$(BINDIR)/sharp_acctest && \
|
||||
$(BINDIR)/sharp_test healpix 2048 1024 1 0 1 && \
|
||||
$(BINDIR)/sharp_test ecp 2047 4096 0 2 1 && \
|
||||
$(BINDIR)/sharp_test gauss 2047 4096 0 0 2
|
||||
$(BINDIR)/sharp_testsuite acctest && \
|
||||
$(BINDIR)/sharp_testsuite test healpix 2048 -1 1024 -1 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test fejer1 2047 -1 -1 4096 2 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 2047 -1 -1 4096 0 2
|
||||
|
||||
perftest: compile_all
|
||||
$(BINDIR)/sharp_test healpix 2048 1024 0 0 1 && \
|
||||
$(BINDIR)/sharp_test gauss 63 128 0 0 1 && \
|
||||
$(BINDIR)/sharp_test gauss 127 256 0 0 1 && \
|
||||
$(BINDIR)/sharp_test gauss 255 512 0 0 1 && \
|
||||
$(BINDIR)/sharp_test gauss 511 1024 0 0 1 && \
|
||||
$(BINDIR)/sharp_test gauss 1023 2048 0 0 1 && \
|
||||
$(BINDIR)/sharp_test gauss 2047 4096 0 0 1 && \
|
||||
$(BINDIR)/sharp_test gauss 4095 8192 0 0 1 && \
|
||||
$(BINDIR)/sharp_test gauss 8191 16384 0 0 1
|
||||
$(BINDIR)/sharp_testsuite test healpix 2048 -1 1024 -1 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 63 -1 -1 128 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 127 -1 -1 256 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 255 -1 -1 512 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 511 -1 -1 1024 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 1023 -1 -1 2048 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 2047 -1 -1 4096 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 4095 -1 -1 8192 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 8191 -1 -1 16384 0 1
|
||||
|
||||
%.c: %.c.in
|
||||
# Only do this if the md5sum changed, in order to avoid Python and Jinja
|
||||
# dependency when not modifying the c.in file
|
||||
grep `md5sum $< | cut -d ' ' -f 1` $@ || ./runjinja.py < $< > $@
|
||||
|
||||
genclean:
|
||||
rm libsharp/sharp_legendre.c || exit 0
|
||||
|
||||
$(CYTHON_MODULES): %.so: %.pyx
|
||||
ifndef PIC_CFLAGS
|
||||
$(error Python extension must be built using the --enable-pic configure option.)
|
||||
endif
|
||||
cython $<
|
||||
$(CC) $(DEBUG_CFLAGS) $(OPENMP_CFLAGS) $(PIC_CFLAGS) `python-config --cflags` -I$(INCDIR) -o $(<:.pyx=.o) -c $(<:.pyx=.c)
|
||||
$(CL) -shared $(<:.pyx=.o) $(OPENMP_CFLAGS) $(CYTHON_OBJ) -L$(LIBDIR) -lsharp -lfftpack -lc_utils -L`python-config --prefix`/lib `python-config --ldflags` -o $@
|
||||
|
||||
python: $(all_lib) hdrcopy $(CYTHON_MODULES)
|
||||
|
||||
pytest: python
|
||||
cd python && nosetests --nocapture libsharp/tests/test_sht.py
|
||||
|
13
external/sharp/README.compilation
vendored
13
external/sharp/README.compilation
vendored
@ -1,13 +0,0 @@
|
||||
GNU make and GNU gcc (version 4.x) are required for compilation.
|
||||
|
||||
Simply run "./configure"; if this fails, please refer to the output of
|
||||
"./configure --help" for additional hints and, if necessary, provide
|
||||
additional flags to the configure script.
|
||||
Once the script finishes successfully, run "make"
|
||||
(or "gmake"). This should install the compilation products in the
|
||||
subdirectory "auto/".
|
||||
|
||||
Documentation can be created by the command "(g)make doc".
|
||||
However this requires the doxygen application to be installed
|
||||
on your system.
|
||||
The documentation will be created in the subdirectory doc/.
|
43
external/sharp/README.md
vendored
Normal file
43
external/sharp/README.md
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
# Libsharp
|
||||
|
||||
*IMPORTANT NOTE*: It appears that the default branch upon cloning from
|
||||
github.com/dagss/libsharp was an outdated 'dagss' branch instead of
|
||||
the 'master' branch. To get the latest copy,
|
||||
please do `git checkout master; git pull`. New clones are no longer affected.
|
||||
|
||||
## Paper
|
||||
|
||||
https://arxiv.org/abs/1303.4945
|
||||
|
||||
## Compilation
|
||||
|
||||
GNU make is required for compilation.
|
||||
|
||||
Libsharp compilation has been successfully tested with GNU and Intel compilers.
|
||||
When using gcc, version 4.x is required [1].
|
||||
Since libsharp was written in standard C99, other compilers should work fine,
|
||||
but SSE2/AVX support will most likely be deactivated.
|
||||
|
||||
If you obtained libsharp directly from the git repository, you will also
|
||||
need a copy of the GNU autotools. In this case, run "autoconf" in libsharp's
|
||||
main directory before any other steps.
|
||||
For libsharp releases distributed as a .tar.gz file, this step is not necessary.
|
||||
|
||||
Afterwards, simply run "./configure"; if this fails, please refer to the output
|
||||
of "./configure --help" for additional hints and, if necessary, provide
|
||||
additional flags to the configure script.
|
||||
Once the script finishes successfully, run "make"
|
||||
(or "gmake"). This should install the compilation products in the
|
||||
subdirectory "auto/".
|
||||
|
||||
Documentation can be created by the command "(g)make doc".
|
||||
However this requires the doxygen application to be installed
|
||||
on your system.
|
||||
The documentation will be created in the subdirectory doc/.
|
||||
|
||||
|
||||
[1] Some versions of the gcc 4.4.x release series contain a bug which causes
|
||||
the compiler to crash during libsharp compilation. This appears to be fixed
|
||||
in the gcc 4.4.7 release. It is possible to work around this problem by adding
|
||||
the compiler flag "-fno-tree-fre" after the other optimization flags - the
|
||||
configure script should do this automatically.
|
3974
external/sharp/autom4te.cache/output.0
vendored
3974
external/sharp/autom4te.cache/output.0
vendored
File diff suppressed because it is too large
Load Diff
79
external/sharp/autom4te.cache/requests
vendored
79
external/sharp/autom4te.cache/requests
vendored
@ -1,79 +0,0 @@
|
||||
# This file was generated by Autom4te Sun Nov 6 20:57:04 UTC 2011.
|
||||
# It contains the lists of macros which have been traced.
|
||||
# It can be safely removed.
|
||||
|
||||
@request = (
|
||||
bless( [
|
||||
'0',
|
||||
1,
|
||||
[
|
||||
'/usr/share/autoconf'
|
||||
],
|
||||
[
|
||||
'/usr/share/autoconf/autoconf/autoconf.m4f',
|
||||
'configure.ac'
|
||||
],
|
||||
{
|
||||
'AM_PROG_F77_C_O' => 1,
|
||||
'_LT_AC_TAGCONFIG' => 1,
|
||||
'm4_pattern_forbid' => 1,
|
||||
'AC_INIT' => 1,
|
||||
'AC_CANONICAL_TARGET' => 1,
|
||||
'_AM_COND_IF' => 1,
|
||||
'AC_CONFIG_LIBOBJ_DIR' => 1,
|
||||
'AC_SUBST' => 1,
|
||||
'AC_CANONICAL_HOST' => 1,
|
||||
'AC_FC_SRCEXT' => 1,
|
||||
'AC_DEFUN' => 1,
|
||||
'AC_PROG_LIBTOOL' => 1,
|
||||
'AM_INIT_AUTOMAKE' => 1,
|
||||
'AC_CONFIG_SUBDIRS' => 1,
|
||||
'AM_PATH_GUILE' => 1,
|
||||
'AM_AUTOMAKE_VERSION' => 1,
|
||||
'LT_CONFIG_LTDL_DIR' => 1,
|
||||
'AC_CONFIG_LINKS' => 1,
|
||||
'AC_REQUIRE_AUX_FILE' => 1,
|
||||
'LT_SUPPORTED_TAG' => 1,
|
||||
'm4_sinclude' => 1,
|
||||
'AM_MAINTAINER_MODE' => 1,
|
||||
'AC_DEFUN_ONCE' => 1,
|
||||
'AM_NLS' => 1,
|
||||
'AM_GNU_GETTEXT_INTL_SUBDIR' => 1,
|
||||
'_m4_warn' => 1,
|
||||
'AM_MAKEFILE_INCLUDE' => 1,
|
||||
'AM_PROG_CXX_C_O' => 1,
|
||||
'_AM_MAKEFILE_INCLUDE' => 1,
|
||||
'_AM_COND_ENDIF' => 1,
|
||||
'AM_ENABLE_MULTILIB' => 1,
|
||||
'AM_SILENT_RULES' => 1,
|
||||
'AM_PROG_MOC' => 1,
|
||||
'AC_CONFIG_FILES' => 1,
|
||||
'LT_INIT' => 1,
|
||||
'include' => 1,
|
||||
'AM_GNU_GETTEXT' => 1,
|
||||
'AM_PROG_AR' => 1,
|
||||
'AC_LIBSOURCE' => 1,
|
||||
'AC_CANONICAL_BUILD' => 1,
|
||||
'AM_PROG_FC_C_O' => 1,
|
||||
'AC_FC_FREEFORM' => 1,
|
||||
'AH_OUTPUT' => 1,
|
||||
'AC_CONFIG_AUX_DIR' => 1,
|
||||
'_AM_SUBST_NOTMAKE' => 1,
|
||||
'm4_pattern_allow' => 1,
|
||||
'_AM_AUTOCONF_VERSION' => 1,
|
||||
'AM_PROG_CC_C_O' => 1,
|
||||
'sinclude' => 1,
|
||||
'AM_CONDITIONAL' => 1,
|
||||
'AC_CANONICAL_SYSTEM' => 1,
|
||||
'AM_XGETTEXT_OPTION' => 1,
|
||||
'AC_CONFIG_HEADERS' => 1,
|
||||
'AC_DEFINE_TRACE_LITERAL' => 1,
|
||||
'AM_POT_TOOLS' => 1,
|
||||
'm4_include' => 1,
|
||||
'_AM_COND_ELSE' => 1,
|
||||
'AU_DEFUN' => 1,
|
||||
'AC_SUBST_TRACE' => 1
|
||||
}
|
||||
], 'Autom4te::Request' )
|
||||
);
|
||||
|
229
external/sharp/autom4te.cache/traces.0
vendored
229
external/sharp/autom4te.cache/traces.0
vendored
@ -1,229 +0,0 @@
|
||||
m4trace:configure.ac:1: -1- AC_INIT([config/config.auto.in])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_forbid([^_?A[CHUM]_])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_forbid([_AC_])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_forbid([^LIBOBJS$], [do not use LIBOBJS directly, use AC_LIBOBJ (see section `AC_LIBOBJ vs LIBOBJS'])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^AS_FLAGS$])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_forbid([^_?m4_])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_forbid([^dnl$])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_forbid([^_?AS_])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([SHELL])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([SHELL])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^SHELL$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([PATH_SEPARATOR])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([PATH_SEPARATOR])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^PATH_SEPARATOR$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([PACKAGE_NAME], [m4_ifdef([AC_PACKAGE_NAME], ['AC_PACKAGE_NAME'])])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([PACKAGE_NAME])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_NAME$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([PACKAGE_TARNAME], [m4_ifdef([AC_PACKAGE_TARNAME], ['AC_PACKAGE_TARNAME'])])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([PACKAGE_TARNAME])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_TARNAME$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([PACKAGE_VERSION], [m4_ifdef([AC_PACKAGE_VERSION], ['AC_PACKAGE_VERSION'])])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([PACKAGE_VERSION])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_VERSION$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([PACKAGE_STRING], [m4_ifdef([AC_PACKAGE_STRING], ['AC_PACKAGE_STRING'])])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([PACKAGE_STRING])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_STRING$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([PACKAGE_BUGREPORT], [m4_ifdef([AC_PACKAGE_BUGREPORT], ['AC_PACKAGE_BUGREPORT'])])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([PACKAGE_BUGREPORT])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_BUGREPORT$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([PACKAGE_URL], [m4_ifdef([AC_PACKAGE_URL], ['AC_PACKAGE_URL'])])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([PACKAGE_URL])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_URL$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([exec_prefix], [NONE])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([exec_prefix])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^exec_prefix$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([prefix], [NONE])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([prefix])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^prefix$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([program_transform_name], [s,x,x,])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([program_transform_name])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^program_transform_name$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([bindir], ['${exec_prefix}/bin'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([bindir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^bindir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([sbindir], ['${exec_prefix}/sbin'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([sbindir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^sbindir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([libexecdir], ['${exec_prefix}/libexec'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([libexecdir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^libexecdir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([datarootdir], ['${prefix}/share'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([datarootdir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^datarootdir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([datadir], ['${datarootdir}'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([datadir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^datadir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([sysconfdir], ['${prefix}/etc'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([sysconfdir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^sysconfdir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([sharedstatedir], ['${prefix}/com'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([sharedstatedir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^sharedstatedir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([localstatedir], ['${prefix}/var'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([localstatedir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^localstatedir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([includedir], ['${prefix}/include'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([includedir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^includedir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([oldincludedir], ['/usr/include'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([oldincludedir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^oldincludedir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([docdir], [m4_ifset([AC_PACKAGE_TARNAME],
|
||||
['${datarootdir}/doc/${PACKAGE_TARNAME}'],
|
||||
['${datarootdir}/doc/${PACKAGE}'])])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([docdir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^docdir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([infodir], ['${datarootdir}/info'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([infodir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^infodir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([htmldir], ['${docdir}'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([htmldir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^htmldir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([dvidir], ['${docdir}'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([dvidir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^dvidir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([pdfdir], ['${docdir}'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([pdfdir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^pdfdir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([psdir], ['${docdir}'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([psdir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^psdir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([libdir], ['${exec_prefix}/lib'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([libdir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^libdir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([localedir], ['${datarootdir}/locale'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([localedir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^localedir$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([mandir], ['${datarootdir}/man'])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([mandir])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^mandir$])
|
||||
m4trace:configure.ac:1: -1- AC_DEFINE_TRACE_LITERAL([PACKAGE_NAME])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_NAME$])
|
||||
m4trace:configure.ac:1: -1- AH_OUTPUT([PACKAGE_NAME], [/* Define to the full name of this package. */
|
||||
@%:@undef PACKAGE_NAME])
|
||||
m4trace:configure.ac:1: -1- AC_DEFINE_TRACE_LITERAL([PACKAGE_TARNAME])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_TARNAME$])
|
||||
m4trace:configure.ac:1: -1- AH_OUTPUT([PACKAGE_TARNAME], [/* Define to the one symbol short name of this package. */
|
||||
@%:@undef PACKAGE_TARNAME])
|
||||
m4trace:configure.ac:1: -1- AC_DEFINE_TRACE_LITERAL([PACKAGE_VERSION])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_VERSION$])
|
||||
m4trace:configure.ac:1: -1- AH_OUTPUT([PACKAGE_VERSION], [/* Define to the version of this package. */
|
||||
@%:@undef PACKAGE_VERSION])
|
||||
m4trace:configure.ac:1: -1- AC_DEFINE_TRACE_LITERAL([PACKAGE_STRING])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_STRING$])
|
||||
m4trace:configure.ac:1: -1- AH_OUTPUT([PACKAGE_STRING], [/* Define to the full name and version of this package. */
|
||||
@%:@undef PACKAGE_STRING])
|
||||
m4trace:configure.ac:1: -1- AC_DEFINE_TRACE_LITERAL([PACKAGE_BUGREPORT])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_BUGREPORT$])
|
||||
m4trace:configure.ac:1: -1- AH_OUTPUT([PACKAGE_BUGREPORT], [/* Define to the address where bug reports for this package should be sent. */
|
||||
@%:@undef PACKAGE_BUGREPORT])
|
||||
m4trace:configure.ac:1: -1- AC_DEFINE_TRACE_LITERAL([PACKAGE_URL])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^PACKAGE_URL$])
|
||||
m4trace:configure.ac:1: -1- AH_OUTPUT([PACKAGE_URL], [/* Define to the home page for this package. */
|
||||
@%:@undef PACKAGE_URL])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([DEFS])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([DEFS])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^DEFS$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([ECHO_C])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([ECHO_C])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^ECHO_C$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([ECHO_N])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([ECHO_N])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^ECHO_N$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([ECHO_T])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([ECHO_T])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^ECHO_T$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([LIBS])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([LIBS])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^LIBS$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([build_alias])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([build_alias])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^build_alias$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([host_alias])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([host_alias])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^host_alias$])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST([target_alias])
|
||||
m4trace:configure.ac:1: -1- AC_SUBST_TRACE([target_alias])
|
||||
m4trace:configure.ac:1: -1- m4_pattern_allow([^target_alias$])
|
||||
m4trace:configure.ac:3: -1- AC_SUBST([uname_found])
|
||||
m4trace:configure.ac:3: -1- AC_SUBST_TRACE([uname_found])
|
||||
m4trace:configure.ac:3: -1- m4_pattern_allow([^uname_found$])
|
||||
m4trace:configure.ac:12: -1- _m4_warn([obsolete], [The macro `AC_TRY_COMPILE' is obsolete.
|
||||
You should run autoupdate.], [../../lib/autoconf/general.m4:2615: AC_TRY_COMPILE is expanded from...
|
||||
configure.ac:12: the top level])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST([CC])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([CC])
|
||||
m4trace:configure.ac:12: -1- m4_pattern_allow([^CC$])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST([CFLAGS])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([CFLAGS])
|
||||
m4trace:configure.ac:12: -1- m4_pattern_allow([^CFLAGS$])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST([LDFLAGS])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([LDFLAGS])
|
||||
m4trace:configure.ac:12: -1- m4_pattern_allow([^LDFLAGS$])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST([LIBS])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([LIBS])
|
||||
m4trace:configure.ac:12: -1- m4_pattern_allow([^LIBS$])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST([CPPFLAGS])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([CPPFLAGS])
|
||||
m4trace:configure.ac:12: -1- m4_pattern_allow([^CPPFLAGS$])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST([CC])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([CC])
|
||||
m4trace:configure.ac:12: -1- m4_pattern_allow([^CC$])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST([CC])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([CC])
|
||||
m4trace:configure.ac:12: -1- m4_pattern_allow([^CC$])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST([CC])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([CC])
|
||||
m4trace:configure.ac:12: -1- m4_pattern_allow([^CC$])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST([CC])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([CC])
|
||||
m4trace:configure.ac:12: -1- m4_pattern_allow([^CC$])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST([ac_ct_CC])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([ac_ct_CC])
|
||||
m4trace:configure.ac:12: -1- m4_pattern_allow([^ac_ct_CC$])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST([EXEEXT], [$ac_cv_exeext])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([EXEEXT])
|
||||
m4trace:configure.ac:12: -1- m4_pattern_allow([^EXEEXT$])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST([OBJEXT], [$ac_cv_objext])
|
||||
m4trace:configure.ac:12: -1- AC_SUBST_TRACE([OBJEXT])
|
||||
m4trace:configure.ac:12: -1- m4_pattern_allow([^OBJEXT$])
|
||||
m4trace:configure.ac:20: -1- AC_SUBST([OPENMP_CFLAGS])
|
||||
m4trace:configure.ac:20: -1- AC_SUBST_TRACE([OPENMP_CFLAGS])
|
||||
m4trace:configure.ac:20: -1- m4_pattern_allow([^OPENMP_CFLAGS$])
|
||||
m4trace:configure.ac:55: -1- AC_SUBST([GCCVERSION])
|
||||
m4trace:configure.ac:55: -1- AC_SUBST_TRACE([GCCVERSION])
|
||||
m4trace:configure.ac:55: -1- m4_pattern_allow([^GCCVERSION$])
|
||||
m4trace:configure.ac:106: -1- AC_SUBST([SILENT_RULE])
|
||||
m4trace:configure.ac:106: -1- AC_SUBST_TRACE([SILENT_RULE])
|
||||
m4trace:configure.ac:106: -1- m4_pattern_allow([^SILENT_RULE$])
|
||||
m4trace:configure.ac:107: -1- AC_SUBST([CC])
|
||||
m4trace:configure.ac:107: -1- AC_SUBST_TRACE([CC])
|
||||
m4trace:configure.ac:107: -1- m4_pattern_allow([^CC$])
|
||||
m4trace:configure.ac:108: -1- AC_SUBST([CCFLAGS_NO_C])
|
||||
m4trace:configure.ac:108: -1- AC_SUBST_TRACE([CCFLAGS_NO_C])
|
||||
m4trace:configure.ac:108: -1- m4_pattern_allow([^CCFLAGS_NO_C$])
|
||||
m4trace:configure.ac:109: -1- AC_SUBST([LDCCFLAGS])
|
||||
m4trace:configure.ac:109: -1- AC_SUBST_TRACE([LDCCFLAGS])
|
||||
m4trace:configure.ac:109: -1- m4_pattern_allow([^LDCCFLAGS$])
|
||||
m4trace:configure.ac:110: -1- AC_SUBST([ARCREATE])
|
||||
m4trace:configure.ac:110: -1- AC_SUBST_TRACE([ARCREATE])
|
||||
m4trace:configure.ac:110: -1- m4_pattern_allow([^ARCREATE$])
|
||||
m4trace:configure.ac:112: -1- AC_CONFIG_FILES([config/config.auto])
|
||||
m4trace:configure.ac:112: -1- _m4_warn([obsolete], [AC_OUTPUT should be used without arguments.
|
||||
You should run autoupdate.], [])
|
||||
m4trace:configure.ac:112: -1- AC_SUBST([LIB@&t@OBJS], [$ac_libobjs])
|
||||
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([LIB@&t@OBJS])
|
||||
m4trace:configure.ac:112: -1- m4_pattern_allow([^LIB@&t@OBJS$])
|
||||
m4trace:configure.ac:112: -1- AC_SUBST([LTLIBOBJS], [$ac_ltlibobjs])
|
||||
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([LTLIBOBJS])
|
||||
m4trace:configure.ac:112: -1- m4_pattern_allow([^LTLIBOBJS$])
|
||||
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([top_builddir])
|
||||
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([top_build_prefix])
|
||||
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([srcdir])
|
||||
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([abs_srcdir])
|
||||
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([top_srcdir])
|
||||
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([abs_top_srcdir])
|
||||
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([builddir])
|
||||
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([abs_builddir])
|
||||
m4trace:configure.ac:112: -1- AC_SUBST_TRACE([abs_top_builddir])
|
6
external/sharp/c_utils/c_utils.h
vendored
6
external/sharp/c_utils/c_utils.h
vendored
@ -113,10 +113,10 @@ void util_free_ (void *ptr);
|
||||
#define ALLOC2D(ptr,type,num1,num2) \
|
||||
do { \
|
||||
size_t cnt_, num1_=(num1), num2_=(num2); \
|
||||
ALLOC(ptr,type *,num1_); \
|
||||
ALLOC(ptr[0],type,num1_*num2_); \
|
||||
ALLOC((ptr),type *,num1_); \
|
||||
ALLOC((ptr)[0],type,num1_*num2_); \
|
||||
for (cnt_=1; cnt_<num1_; ++cnt_) \
|
||||
ptr[cnt_]=ptr[cnt_-1]+num2_; \
|
||||
(ptr)[cnt_]=(ptr)[cnt_-1]+num2_; \
|
||||
} while(0)
|
||||
#define DEALLOC2D(ptr) \
|
||||
do { if(ptr) DEALLOC((ptr)[0]); DEALLOC(ptr); } while(0)
|
||||
|
7
external/sharp/c_utils/memusage.c
vendored
7
external/sharp/c_utils/memusage.c
vendored
@ -38,7 +38,8 @@ double residentSetSize(void)
|
||||
FILE *statm = fopen("/proc/self/statm","r");
|
||||
double res;
|
||||
if (!statm) return -1.0;
|
||||
fscanf(statm,"%*f %lf",&res);
|
||||
if (fscanf(statm,"%*f %lf",&res))
|
||||
{ fclose(statm); return -1.0; }
|
||||
fclose(statm);
|
||||
return (res*4096);
|
||||
}
|
||||
@ -55,8 +56,8 @@ double VmHWM(void)
|
||||
{ fclose(f); return -1.0; }
|
||||
if (!strncmp(word, "VmHWM:", 6))
|
||||
{
|
||||
fscanf(f,"%lf",&res);
|
||||
fscanf(f,"%2s",word);
|
||||
if (fscanf(f,"%lf%2s",&res,word)<0)
|
||||
{ fclose(f); return -1.0; }
|
||||
if (strncmp(word, "kB", 2))
|
||||
{ fclose(f); return -1.0; }
|
||||
res *=1024;
|
||||
|
5
external/sharp/config/config.auto.in
vendored
5
external/sharp/config/config.auto.in
vendored
@ -5,5 +5,8 @@ CL=@CC@
|
||||
CCFLAGS_NO_C=@CCFLAGS_NO_C@
|
||||
CCFLAGS=$(CCFLAGS_NO_C) -c
|
||||
CLFLAGS=-L. -L$(LIBDIR) @LDCCFLAGS@ -lm
|
||||
|
||||
DEBUG_CFLAGS=@DEBUG_CFLAGS@
|
||||
MPI_CFLAGS=@MPI_CFLAGS@
|
||||
OPENMP_CFLAGS=@OPENMP_CFLAGS@
|
||||
PIC_CFLAGS=@PIC_CFLAGS@
|
||||
ARCREATE=@ARCREATE@
|
||||
|
2
external/sharp/config/rules.common
vendored
2
external/sharp/config/rules.common
vendored
@ -4,6 +4,7 @@ BINDIR = $(PREFIX)/bin
|
||||
INCDIR = $(PREFIX)/include
|
||||
LIBDIR = $(PREFIX)/lib
|
||||
DOCDIR = $(SRCROOT)/doc
|
||||
PYTHONDIR = $(SRCROOT)/python/libsharp
|
||||
|
||||
# do not use any suffix rules
|
||||
.SUFFIXES:
|
||||
@ -26,6 +27,7 @@ $(BLDROOT)/%.o : $(SRCROOT)/%.cc | echo_config
|
||||
|
||||
clean:
|
||||
rm -rf $(BLDROOT) $(PREFIX) $(DOCDIR) autom4te.cache/ config.log config.status
|
||||
rm -rf $(PYTHONDIR)/*.c $(PYTHONDIR)/*.o $(PYTHONDIR)/*.so
|
||||
|
||||
distclean: clean
|
||||
rm -f config/config.auto
|
||||
|
3974
external/sharp/configure
vendored
3974
external/sharp/configure
vendored
File diff suppressed because it is too large
Load Diff
27
external/sharp/configure.ac
vendored
27
external/sharp/configure.ac
vendored
@ -53,19 +53,16 @@ case $CCTYPE in
|
||||
GCCVERSION="`$CC -dumpversion 2>&1`"
|
||||
echo "Using gcc version $GCCVERSION"
|
||||
AC_SUBST(GCCVERSION)
|
||||
case $system in
|
||||
Darwin-*)
|
||||
;;
|
||||
*)
|
||||
CCFLAGS="$CCFLAGS -ffunction-sections -fdata-sections"
|
||||
;;
|
||||
esac
|
||||
changequote(,)
|
||||
gcc43=`echo $GCCVERSION | grep -c '4\.[3456789]'`
|
||||
gcc43=`echo $GCCVERSION | grep -c '^4\.[3456789]'`
|
||||
gcc44=`echo $GCCVERSION | grep -c '^4\.4'`
|
||||
changequote([,])
|
||||
if test $gcc43 -gt 0; then
|
||||
CCFLAGS="$CCFLAGS -march=native"
|
||||
fi
|
||||
if test $gcc44 -gt 0; then
|
||||
CCFLAGS="$CCFLAGS -fno-tree-fre"
|
||||
fi
|
||||
;;
|
||||
icc)
|
||||
CCFLAGS="-O3 -xHOST -std=c99 -ip -Wbrief -Wall -vec-report0 -openmp-report0 -wd383,981,1419,1572"
|
||||
@ -85,20 +82,20 @@ case $system in
|
||||
;;
|
||||
esac
|
||||
|
||||
CCFLAGS="$CCFLAGS $OPENMP_CFLAGS"
|
||||
|
||||
if test $ENABLE_DEBUG = yes; then
|
||||
CCFLAGS="$CCFLAGS -g"
|
||||
DEBUG_CFLAGS="-g"
|
||||
fi
|
||||
|
||||
if test $ENABLE_PIC = yes; then
|
||||
CCFLAGS="$CCFLAGS -fPIC"
|
||||
PIC_CFLAGS="-fPIC"
|
||||
fi
|
||||
|
||||
if test $ENABLE_MPI = yes; then
|
||||
CCFLAGS="$CCFLAGS -DUSE_MPI"
|
||||
MPI_CFLAGS="-DUSE_MPI"
|
||||
fi
|
||||
|
||||
CCFLAGS="$CCFLAGS $DEBUG_CFLAGS $OPENMP_CFLAGS $PIC_CFLAGS $MPI_CFLAGS"
|
||||
|
||||
CCFLAGS_NO_C="$CCFLAGS $CPPFLAGS"
|
||||
|
||||
LDCCFLAGS="$LDFLAGS $CCFLAGS"
|
||||
@ -107,6 +104,10 @@ AC_SUBST(SILENT_RULE)
|
||||
AC_SUBST(CC)
|
||||
AC_SUBST(CCFLAGS_NO_C)
|
||||
AC_SUBST(LDCCFLAGS)
|
||||
AC_SUBST(DEBUG_CFLAGS)
|
||||
AC_SUBST(MPI_CFLAGS)
|
||||
AC_SUBST(OPENMP_CFLAGS)
|
||||
AC_SUBST(PIC_CFLAGS)
|
||||
AC_SUBST(ARCREATE)
|
||||
|
||||
AC_OUTPUT(config/config.auto)
|
||||
|
286
external/sharp/fortran/sharp.f90
vendored
Normal file
286
external/sharp/fortran/sharp.f90
vendored
Normal file
@ -0,0 +1,286 @@
|
||||
module sharp
|
||||
use iso_c_binding
|
||||
implicit none
|
||||
! alm_info flags
|
||||
integer, parameter :: SHARP_PACKED = 1
|
||||
|
||||
! sharp job types
|
||||
enum, bind(c)
|
||||
enumerator :: SHARP_YtW = 0
|
||||
enumerator :: SHARP_Y = 1
|
||||
enumerator :: SHARP_Yt = 2
|
||||
enumerator :: SHARP_WY = 3
|
||||
enumerator :: SHARP_ALM2MAP_DERIV1 = 4
|
||||
end enum
|
||||
|
||||
! sharp job flags
|
||||
integer, parameter :: SHARP_DP = ISHFT(1, 4)
|
||||
integer, parameter :: SHARP_ADD = ISHFT(1, 5)
|
||||
integer, parameter :: SHARP_REAL_HARMONICS = ISHFT(1, 6)
|
||||
integer, parameter :: SHARP_NO_FFT = ISHFT(1, 7)
|
||||
|
||||
type sharp_geom_info
|
||||
type(c_ptr) :: handle
|
||||
integer(c_intptr_t) :: n_local
|
||||
end type sharp_geom_info
|
||||
|
||||
type sharp_alm_info
|
||||
type(c_ptr) :: handle
|
||||
integer(c_intptr_t) :: n_local
|
||||
end type sharp_alm_info
|
||||
|
||||
interface
|
||||
|
||||
! alm_info
|
||||
subroutine sharp_make_general_alm_info( &
|
||||
lmax, nm, stride, mval, mvstart, flags, alm_info) bind(c)
|
||||
use iso_c_binding
|
||||
integer(c_int), value, intent(in) :: lmax, nm, stride, flags
|
||||
integer(c_int), intent(in) :: mval(nm)
|
||||
integer(c_intptr_t), intent(in) :: mvstart(nm)
|
||||
type(c_ptr), intent(out) :: alm_info
|
||||
end subroutine sharp_make_general_alm_info
|
||||
|
||||
subroutine c_sharp_make_mmajor_real_packed_alm_info( &
|
||||
lmax, stride, nm, ms, alm_info) bind(c, name='sharp_make_mmajor_real_packed_alm_info')
|
||||
use iso_c_binding
|
||||
integer(c_int), value, intent(in) :: lmax, nm, stride
|
||||
integer(c_int), intent(in), optional :: ms(nm)
|
||||
type(c_ptr), intent(out) :: alm_info
|
||||
end subroutine c_sharp_make_mmajor_real_packed_alm_info
|
||||
|
||||
function c_sharp_alm_count(alm_info) bind(c, name='sharp_alm_count')
|
||||
use iso_c_binding
|
||||
integer(c_intptr_t) :: c_sharp_alm_count
|
||||
type(c_ptr), value, intent(in) :: alm_info
|
||||
end function c_sharp_alm_count
|
||||
|
||||
subroutine c_sharp_destroy_alm_info(alm_info) bind(c, name='sharp_destroy_alm_info')
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: alm_info
|
||||
end subroutine c_sharp_destroy_alm_info
|
||||
|
||||
! geom_info
|
||||
subroutine sharp_make_subset_healpix_geom_info ( &
|
||||
nside, stride, nrings, rings, weight, geom_info) bind(c)
|
||||
use iso_c_binding
|
||||
integer(c_int), value, intent(in) :: nside, stride, nrings
|
||||
integer(c_int), intent(in), optional :: rings(nrings)
|
||||
real(c_double), intent(in), optional :: weight(2 * nside)
|
||||
type(c_ptr), intent(out) :: geom_info
|
||||
end subroutine sharp_make_subset_healpix_geom_info
|
||||
|
||||
subroutine c_sharp_destroy_geom_info(geom_info) bind(c, name='sharp_destroy_geom_info')
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: geom_info
|
||||
end subroutine c_sharp_destroy_geom_info
|
||||
|
||||
function c_sharp_map_size(info) bind(c, name='sharp_map_size')
|
||||
use iso_c_binding
|
||||
integer(c_intptr_t) :: c_sharp_map_size
|
||||
type(c_ptr), value :: info
|
||||
end function c_sharp_map_size
|
||||
|
||||
|
||||
! execute
|
||||
subroutine c_sharp_execute(type, spin, alm, map, geom_info, alm_info, ntrans, &
|
||||
flags, time, opcnt) bind(c, name='sharp_execute')
|
||||
use iso_c_binding
|
||||
integer(c_int), value :: type, spin, ntrans, flags
|
||||
type(c_ptr), value :: alm_info, geom_info
|
||||
real(c_double), intent(out), optional :: time
|
||||
integer(c_long_long), intent(out), optional :: opcnt
|
||||
type(c_ptr), intent(in) :: alm(*), map(*)
|
||||
end subroutine c_sharp_execute
|
||||
|
||||
subroutine c_sharp_execute_mpi(comm, type, spin, alm, map, geom_info, alm_info, ntrans, &
|
||||
flags, time, opcnt) bind(c, name='sharp_execute_mpi_fortran')
|
||||
use iso_c_binding
|
||||
integer(c_int), value :: comm, type, spin, ntrans, flags
|
||||
type(c_ptr), value :: alm_info, geom_info
|
||||
real(c_double), intent(out), optional :: time
|
||||
integer(c_long_long), intent(out), optional :: opcnt
|
||||
type(c_ptr), intent(in) :: alm(*), map(*)
|
||||
end subroutine c_sharp_execute_mpi
|
||||
|
||||
! Legendre transforms
|
||||
subroutine c_sharp_legendre_transform(bl, recfac, lmax, x, out, nx) &
|
||||
bind(c, name='sharp_legendre_transform')
|
||||
use iso_c_binding
|
||||
integer(c_intptr_t), value :: lmax, nx
|
||||
real(c_double) :: bl(lmax + 1), x(nx), out(nx)
|
||||
real(c_double), optional :: recfac(lmax + 1)
|
||||
end subroutine c_sharp_legendre_transform
|
||||
|
||||
subroutine c_sharp_legendre_transform_s(bl, recfac, lmax, x, out, nx) &
|
||||
bind(c, name='sharp_legendre_transform_s')
|
||||
use iso_c_binding
|
||||
integer(c_intptr_t), value :: lmax, nx
|
||||
real(c_float) :: bl(lmax + 1), x(nx), out(nx)
|
||||
real(c_float), optional :: recfac(lmax + 1)
|
||||
end subroutine c_sharp_legendre_transform_s
|
||||
end interface
|
||||
|
||||
interface sharp_execute
|
||||
module procedure sharp_execute_d
|
||||
end interface
|
||||
|
||||
interface sharp_legendre_transform
|
||||
module procedure sharp_legendre_transform_d, sharp_legendre_transform_s
|
||||
end interface sharp_legendre_transform
|
||||
|
||||
contains
|
||||
! alm info
|
||||
|
||||
! if ms is not passed, we default to using m=0..lmax.
|
||||
subroutine sharp_make_mmajor_real_packed_alm_info(lmax, ms, alm_info)
|
||||
use iso_c_binding
|
||||
integer(c_int), value, intent(in) :: lmax
|
||||
integer(c_int), intent(in), optional :: ms(:)
|
||||
type(sharp_alm_info), intent(out) :: alm_info
|
||||
!--
|
||||
integer(c_int), allocatable :: ms_copy(:)
|
||||
integer(c_int) :: nm
|
||||
|
||||
if (present(ms)) then
|
||||
nm = size(ms)
|
||||
allocate(ms_copy(nm))
|
||||
ms_copy = ms
|
||||
call c_sharp_make_mmajor_real_packed_alm_info(lmax, 1, nm, ms_copy, alm_info=alm_info%handle)
|
||||
deallocate(ms_copy)
|
||||
else
|
||||
call c_sharp_make_mmajor_real_packed_alm_info(lmax, 1, lmax + 1, alm_info=alm_info%handle)
|
||||
end if
|
||||
alm_info%n_local = c_sharp_alm_count(alm_info%handle)
|
||||
end subroutine sharp_make_mmajor_real_packed_alm_info
|
||||
|
||||
subroutine sharp_destroy_alm_info(alm_info)
|
||||
use iso_c_binding
|
||||
type(sharp_alm_info), intent(inout) :: alm_info
|
||||
call c_sharp_destroy_alm_info(alm_info%handle)
|
||||
alm_info%handle = c_null_ptr
|
||||
end subroutine sharp_destroy_alm_info
|
||||
|
||||
|
||||
! geom info
|
||||
subroutine sharp_make_healpix_geom_info(nside, rings, weight, geom_info)
|
||||
integer(c_int), value :: nside
|
||||
integer(c_int), optional :: rings(:)
|
||||
real(c_double), intent(in), optional :: weight(2 * nside)
|
||||
type(sharp_geom_info), intent(out) :: geom_info
|
||||
!--
|
||||
integer(c_int) :: nrings
|
||||
integer(c_int), allocatable :: rings_copy(:)
|
||||
|
||||
if (present(rings)) then
|
||||
nrings = size(rings)
|
||||
allocate(rings_copy(nrings))
|
||||
rings_copy = rings
|
||||
call sharp_make_subset_healpix_geom_info(nside, 1, nrings, rings_copy, &
|
||||
weight, geom_info%handle)
|
||||
deallocate(rings_copy)
|
||||
else
|
||||
call sharp_make_subset_healpix_geom_info(nside, 1, nrings=4 * nside - 1, &
|
||||
weight=weight, geom_info=geom_info%handle)
|
||||
end if
|
||||
geom_info%n_local = c_sharp_map_size(geom_info%handle)
|
||||
end subroutine sharp_make_healpix_geom_info
|
||||
|
||||
subroutine sharp_destroy_geom_info(geom_info)
|
||||
use iso_c_binding
|
||||
type(sharp_geom_info), intent(inout) :: geom_info
|
||||
call c_sharp_destroy_geom_info(geom_info%handle)
|
||||
geom_info%handle = c_null_ptr
|
||||
end subroutine sharp_destroy_geom_info
|
||||
|
||||
|
||||
! Currently the only mode supported is stacked (not interleaved) maps.
|
||||
!
|
||||
! Note that passing the exact dimension of alm/map is necesarry, it
|
||||
! prevents the caller from doing too crazy slicing prior to pass array
|
||||
! in...
|
||||
!
|
||||
! Usage:
|
||||
!
|
||||
! The alm array must have shape exactly alm(alm_info%n_local, nmaps)
|
||||
! The maps array must have shape exactly map(map_info%n_local, nmaps).
|
||||
subroutine sharp_execute_d(type, spin, nmaps, alm, alm_info, map, geom_info, &
|
||||
add, time, opcnt, comm)
|
||||
use iso_c_binding
|
||||
use mpi
|
||||
implicit none
|
||||
integer(c_int), value :: type, spin, nmaps
|
||||
integer(c_int), optional :: comm
|
||||
logical, value, optional :: add ! should add instead of replace out
|
||||
|
||||
type(sharp_alm_info) :: alm_info
|
||||
type(sharp_geom_info) :: geom_info
|
||||
real(c_double), intent(out), optional :: time
|
||||
integer(c_long_long), intent(out), optional :: opcnt
|
||||
real(c_double), target, intent(inout) :: alm(0:alm_info%n_local - 1, 1:nmaps)
|
||||
real(c_double), target, intent(inout) :: map(0:geom_info%n_local - 1, 1:nmaps)
|
||||
!--
|
||||
integer(c_int) :: mod_flags, ntrans, k
|
||||
type(c_ptr), target :: alm_ptr(nmaps)
|
||||
type(c_ptr), target :: map_ptr(nmaps)
|
||||
|
||||
mod_flags = SHARP_DP
|
||||
if (present(add) .and. add) then
|
||||
mod_flags = or(mod_flags, SHARP_ADD)
|
||||
end if
|
||||
|
||||
if (spin == 0) then
|
||||
ntrans = nmaps
|
||||
else
|
||||
ntrans = nmaps / 2
|
||||
end if
|
||||
|
||||
! Set up pointer table to access maps
|
||||
alm_ptr(:) = c_null_ptr
|
||||
map_ptr(:) = c_null_ptr
|
||||
do k = 1, nmaps
|
||||
if (alm_info%n_local > 0) alm_ptr(k) = c_loc(alm(0, k))
|
||||
if (geom_info%n_local > 0) map_ptr(k) = c_loc(map(0, k))
|
||||
end do
|
||||
|
||||
if (present(comm)) then
|
||||
call c_sharp_execute_mpi(comm, type, spin, alm_ptr, map_ptr, &
|
||||
geom_info=geom_info%handle, &
|
||||
alm_info=alm_info%handle, &
|
||||
ntrans=ntrans, &
|
||||
flags=mod_flags, &
|
||||
time=time, &
|
||||
opcnt=opcnt)
|
||||
else
|
||||
call c_sharp_execute(type, spin, alm_ptr, map_ptr, &
|
||||
geom_info=geom_info%handle, &
|
||||
alm_info=alm_info%handle, &
|
||||
ntrans=ntrans, &
|
||||
flags=mod_flags, &
|
||||
time=time, &
|
||||
opcnt=opcnt)
|
||||
end if
|
||||
end subroutine sharp_execute_d
|
||||
|
||||
subroutine sharp_legendre_transform_d(bl, x, out)
|
||||
use iso_c_binding
|
||||
real(c_double) :: bl(:)
|
||||
real(c_double) :: x(:), out(size(x))
|
||||
!--
|
||||
integer(c_intptr_t) :: lmax, nx
|
||||
call c_sharp_legendre_transform(bl, lmax=int(size(bl) - 1, c_intptr_t), &
|
||||
x=x, out=out, nx=int(size(x), c_intptr_t))
|
||||
end subroutine sharp_legendre_transform_d
|
||||
|
||||
subroutine sharp_legendre_transform_s(bl, x, out)
|
||||
use iso_c_binding
|
||||
real(c_float) :: bl(:)
|
||||
real(c_float) :: x(:), out(size(x))
|
||||
!--
|
||||
integer(c_intptr_t) :: lmax, nx
|
||||
call c_sharp_legendre_transform_s(bl, lmax=int(size(bl) - 1, c_intptr_t), &
|
||||
x=x, out=out, nx=int(size(x), c_intptr_t))
|
||||
end subroutine sharp_legendre_transform_s
|
||||
|
||||
|
||||
end module
|
84
external/sharp/fortran/test_sharp.f90
vendored
Normal file
84
external/sharp/fortran/test_sharp.f90
vendored
Normal file
@ -0,0 +1,84 @@
|
||||
program test_sharp
|
||||
use mpi
|
||||
use sharp
|
||||
use iso_c_binding, only : c_ptr, c_double
|
||||
implicit none
|
||||
|
||||
integer, parameter :: lmax = 2, nside = 2
|
||||
type(sharp_alm_info) :: alm_info
|
||||
type(sharp_geom_info) :: geom_info
|
||||
|
||||
real(c_double), dimension(0:(lmax + 1)**2 - 1, 1:1) :: alm
|
||||
real(c_double), dimension(0:12*nside**2 - 1, 1:1) :: map
|
||||
|
||||
integer(c_int), dimension(1:lmax + 1) :: ms
|
||||
integer(c_int), dimension(1:4 * nside - 1) :: rings
|
||||
integer(c_int) :: nm, m, nrings, iring
|
||||
integer :: nodecount, rank, ierr
|
||||
|
||||
call MPI_Init(ierr)
|
||||
call MPI_Comm_size(MPI_COMM_WORLD, nodecount, ierr)
|
||||
call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
|
||||
|
||||
nm = 0
|
||||
do m = rank, lmax, nodecount
|
||||
nm = nm + 1
|
||||
ms(nm) = m
|
||||
end do
|
||||
|
||||
nrings = 0
|
||||
do iring = rank + 1, 4 * nside - 1, nodecount
|
||||
nrings = nrings + 1
|
||||
rings(nrings) = iring
|
||||
end do
|
||||
|
||||
alm = 0
|
||||
map = 0
|
||||
if (rank == 0) then
|
||||
alm(0, 1) = 1
|
||||
end if
|
||||
|
||||
print *, ms(1:nm)
|
||||
call sharp_make_mmajor_real_packed_alm_info(lmax, ms=ms(1:nm), alm_info=alm_info)
|
||||
print *, 'alm_info%n_local', alm_info%n_local
|
||||
call sharp_make_healpix_geom_info(nside, rings=rings(1:nrings), geom_info=geom_info)
|
||||
print *, 'geom_info%n_local', geom_info%n_local
|
||||
print *, 'execute'
|
||||
call sharp_execute(SHARP_Y, 0, 1, alm, alm_info, map, geom_info, comm=MPI_COMM_WORLD)
|
||||
|
||||
print *, alm
|
||||
print *, map
|
||||
|
||||
call sharp_destroy_alm_info(alm_info)
|
||||
call sharp_destroy_geom_info(geom_info)
|
||||
print *, 'DONE'
|
||||
call MPI_Finalize(ierr)
|
||||
|
||||
print *, 'LEGENDRE TRANSFORMS'
|
||||
|
||||
call test_legendre_transforms()
|
||||
|
||||
contains
|
||||
subroutine test_legendre_transforms()
|
||||
integer, parameter :: lmax = 20, nx=10
|
||||
real(c_double) :: bl(0:lmax)
|
||||
real(c_double) :: x(nx), out(nx)
|
||||
real(c_float) :: out_s(nx)
|
||||
!--
|
||||
integer :: l, i
|
||||
|
||||
do l = 0, lmax
|
||||
bl(l) = 1.0 / real(l + 1, c_double)
|
||||
end do
|
||||
do i = 1, nx
|
||||
x(i) = 1 / real(i, c_double)
|
||||
end do
|
||||
out = 0
|
||||
call sharp_legendre_transform(bl, x, out)
|
||||
print *, out
|
||||
call sharp_legendre_transform(real(bl, c_float), real(x, c_float), out_s)
|
||||
print *, out_s
|
||||
end subroutine test_legendre_transforms
|
||||
|
||||
|
||||
end program test_sharp
|
5
external/sharp/libfftpack/ls_fft.h
vendored
5
external/sharp/libfftpack/ls_fft.h
vendored
@ -122,7 +122,7 @@ void kill_real_plan (real_plan plan);
|
||||
- on exit, it has the form <tt>r0, r1, i1, r2, i2, ...</tt>
|
||||
(a total of \a length values). */
|
||||
void real_plan_forward_fftpack (real_plan plan, double *data);
|
||||
/*! Computes a real forward FFT on \a data, using \a plan
|
||||
/*! Computes a real backward FFT on \a data, using \a plan
|
||||
and assuming the FFTPACK storage scheme:
|
||||
- on entry, \a data has the form <tt>r0, r1, i1, r2, i2, ...</tt>
|
||||
(a total of \a length values);
|
||||
@ -143,8 +143,7 @@ void real_plan_backward_fftw (real_plan plan, double *data);
|
||||
- on entry, \a data has the form <tt>r0, [ignored], r1, [ignored], ...,
|
||||
r[length-1], [ignored]</tt>;
|
||||
- on exit, it has the form <tt>r0, i0, r1, i1, ...,
|
||||
r[length-1], i[length-1]</tt>.
|
||||
*/
|
||||
r[length-1], i[length-1]</tt>. */
|
||||
void real_plan_forward_c (real_plan plan, double *data);
|
||||
/*! Computes a real backward FFT on \a data, using \a plan
|
||||
and assuming a full-complex storage scheme:
|
||||
|
19
external/sharp/libsharp/libsharp.dox
vendored
19
external/sharp/libsharp/libsharp.dox
vendored
@ -7,10 +7,9 @@
|
||||
|
||||
/*! \page introduction Introduction to libsharp
|
||||
|
||||
"SHARP" is an acronym for <i>Performant Spherical Harmonic Transforms</i>.
|
||||
"SHARP" is an acronym for <i>Spherical HARmonic Package</i>.
|
||||
All user-visible data types and functions in this library start with
|
||||
the prefix "sharp_", or with "sharps_" and "sharpd_" for single- and
|
||||
double precision variants, respectively.
|
||||
the prefix "sharp_" to avoid pollution of the global C namespace.
|
||||
|
||||
<i>libsharp</i>'s main functionality is the conversion between <i>maps</i>
|
||||
on the sphere and <i>spherical harmonic coefficients</i> (or <i>a_lm</i>).
|
||||
@ -57,7 +56,7 @@
|
||||
for generating often-used pixelisations like ECP grids, Gaussian grids,
|
||||
and Healpix grids.
|
||||
|
||||
Currently, SHARP supports the following kinds of transforms:
|
||||
Currently, libsharp supports the following kinds of transforms:
|
||||
<ul>
|
||||
<li>scalar a_lm to map</li>
|
||||
<li>scalar map to a_lm</li>
|
||||
@ -68,10 +67,10 @@
|
||||
<li>scalar a_lm to maps of first derivatives</li>
|
||||
</ul>
|
||||
|
||||
SHARP supports shared-memory parallelisation via OpenMP; this feature will
|
||||
libsharp supports shared-memory parallelisation via OpenMP; this feature will
|
||||
be automatically enabled if the compiler supports it.
|
||||
|
||||
SHARP will also make use of SSE2 and AVX instructions when compiled for a
|
||||
Libsharp will also make use of SSE2 and AVX instructions when compiled for a
|
||||
platform known to support them.
|
||||
|
||||
Support for MPI-parallel transforms is also available; in this mode,
|
||||
@ -83,12 +82,4 @@
|
||||
single-precision transforms will most likely not be faster than their
|
||||
double-precision counterparts, but they will require significantly less
|
||||
memory.
|
||||
|
||||
Two example and benchmark programs are distributed with SHARP:
|
||||
<ul>
|
||||
<li>sharp_test.c checks the accuracy of the (iterative) map analysis
|
||||
algorithm</li>
|
||||
<li>sharp_bench.c determines the quickest transform strategy for a given
|
||||
SHT</li>
|
||||
</ul>
|
||||
*/
|
||||
|
8
external/sharp/libsharp/planck.make
vendored
8
external/sharp/libsharp/planck.make
vendored
@ -7,14 +7,14 @@ FULL_INCLUDE+= -I$(SD)
|
||||
|
||||
HDR_$(PKG):=$(SD)/*.h
|
||||
LIB_$(PKG):=$(LIBDIR)/libsharp.a
|
||||
BIN:=sharp_test sharp_acctest sharp_test_mpi sharp_bench sharp_bench2
|
||||
LIBOBJ:=sharp_ylmgen_c.o sharp.o sharp_announce.o sharp_geomhelpers.o sharp_almhelpers.o sharp_core.o
|
||||
ALLOBJ:=$(LIBOBJ) sharp_test.o sharp_acctest.o sharp_test_mpi.o sharp_bench.o sharp_bench2.o
|
||||
BIN:=sharp_testsuite
|
||||
LIBOBJ:=sharp_ylmgen_c.o sharp.o sharp_announce.o sharp_geomhelpers.o sharp_almhelpers.o sharp_core.o sharp_legendre.o sharp_legendre_roots.o
|
||||
ALLOBJ:=$(LIBOBJ) sharp_testsuite.o
|
||||
LIBOBJ:=$(LIBOBJ:%=$(OD)/%)
|
||||
ALLOBJ:=$(ALLOBJ:%=$(OD)/%)
|
||||
|
||||
ODEP:=$(HDR_$(PKG)) $(HDR_libfftpack) $(HDR_c_utils)
|
||||
$(OD)/sharp_core.o: $(SD)/sharp_inchelper1.inc.c $(SD)/sharp_core_inc.c $(SD)/sharp_core_inc2.c $(SD)/sharp_core_inc3.c
|
||||
$(OD)/sharp_core.o: $(SD)/sharp_core_inchelper.c $(SD)/sharp_core_inc.c $(SD)/sharp_core_inc2.c
|
||||
$(OD)/sharp.o: $(SD)/sharp_mpi.c
|
||||
BDEP:=$(LIB_$(PKG)) $(LIB_libfftpack) $(LIB_c_utils)
|
||||
|
||||
|
725
external/sharp/libsharp/sharp.c
vendored
725
external/sharp/libsharp/sharp.c
vendored
File diff suppressed because it is too large
Load Diff
2
external/sharp/libsharp/sharp.h
vendored
2
external/sharp/libsharp/sharp.h
vendored
@ -39,5 +39,7 @@
|
||||
#include <complex.h>
|
||||
|
||||
#include "sharp_lowlevel.h"
|
||||
#include "sharp_legendre.h"
|
||||
#include "sharp_legendre_roots.h"
|
||||
|
||||
#endif
|
||||
|
267
external/sharp/libsharp/sharp_acctest.c
vendored
267
external/sharp/libsharp/sharp_acctest.c
vendored
@ -1,267 +0,0 @@
|
||||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* libsharp is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libsharp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libsharp; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file sharp_acctest.c
|
||||
Systematic accuracy test for libsharp.
|
||||
|
||||
Copyright (C) 2006-2012 Max-Planck-Society
|
||||
\author Martin Reinecke
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#ifdef USE_MPI
|
||||
#include "mpi.h"
|
||||
#endif
|
||||
#include "sharp.h"
|
||||
#include "sharp_geomhelpers.h"
|
||||
#include "sharp_almhelpers.h"
|
||||
#include "c_utils.h"
|
||||
#include "sharp_announce.h"
|
||||
#include "sharp_core.h"
|
||||
|
||||
typedef complex double dcmplx;
|
||||
|
||||
static double drand (double min, double max)
|
||||
{ return min + (max-min)*rand()/(RAND_MAX+1.0); }
|
||||
|
||||
static void random_alm (dcmplx *alm, sharp_alm_info *helper, int spin)
|
||||
{
|
||||
for (int mi=0;mi<helper->nm; ++mi)
|
||||
{
|
||||
int m=helper->mval[mi];
|
||||
for (int l=m;l<=helper->lmax; ++l)
|
||||
{
|
||||
if ((l<spin)&&(m<spin))
|
||||
alm[sharp_alm_index(helper,l,mi)] = 0.;
|
||||
else
|
||||
{
|
||||
double rv = drand(-1,1);
|
||||
double iv = (m==0) ? 0 : drand(-1,1);
|
||||
alm[sharp_alm_index(helper,l,mi)] = rv+_Complex_I*iv;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void measure_errors (dcmplx **alm, dcmplx **alm2,
|
||||
ptrdiff_t nalms, int ncomp)
|
||||
{
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
{
|
||||
double sum=0, sum2=0, maxdiff=0;
|
||||
for (ptrdiff_t m=0; m<nalms; ++m)
|
||||
{
|
||||
double x=creal(alm[i][m])-creal(alm2[i][m]),
|
||||
y=cimag(alm[i][m])-cimag(alm2[i][m]);
|
||||
sum+=x*x+y*y;
|
||||
sum2+=creal(alm[i][m])*creal(alm[i][m])+cimag(alm[i][m])*cimag(alm[i][m]);
|
||||
if (fabs(x)>maxdiff) maxdiff=fabs(x);
|
||||
if (fabs(y)>maxdiff) maxdiff=fabs(y);
|
||||
}
|
||||
sum=sqrt(sum/nalms);
|
||||
sum2=sqrt(sum2/nalms);
|
||||
UTIL_ASSERT((maxdiff<1e-10)&&(sum/sum2<1e-10),"error");
|
||||
}
|
||||
}
|
||||
|
||||
static void check_sign_scale(void)
|
||||
{
|
||||
int lmax=50;
|
||||
int mmax=lmax;
|
||||
sharp_geom_info *tinfo;
|
||||
int nrings=lmax+1;
|
||||
int ppring=2*lmax+2;
|
||||
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
|
||||
sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo);
|
||||
|
||||
/* flip theta to emulate the "old" Gaussian grid geometry */
|
||||
for (int i=0; i<tinfo->npairs; ++i)
|
||||
{
|
||||
const double pi=3.141592653589793238462643383279502884197;
|
||||
tinfo->pair[i].r1.cth=-tinfo->pair[i].r1.cth;
|
||||
tinfo->pair[i].r2.cth=-tinfo->pair[i].r2.cth;
|
||||
tinfo->pair[i].r1.theta=pi-tinfo->pair[i].r1.theta;
|
||||
tinfo->pair[i].r2.theta=pi-tinfo->pair[i].r2.theta;
|
||||
}
|
||||
|
||||
sharp_alm_info *alms;
|
||||
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
|
||||
ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
|
||||
|
||||
for (int ntrans=1; ntrans<10; ++ntrans)
|
||||
{
|
||||
double **map;
|
||||
ALLOC2D(map,double,2*ntrans,npix);
|
||||
|
||||
dcmplx **alm;
|
||||
ALLOC2D(alm,dcmplx,2*ntrans,nalms);
|
||||
for (int i=0; i<2*ntrans; ++i)
|
||||
for (int j=0; j<nalms; ++j)
|
||||
alm[i][j]=1.+_Complex_I;
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP,0,0,&alm[0],&map[0],tinfo,alms,ntrans,1,0,NULL,
|
||||
NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[it][0 ], 3.588246976618616912e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[it][npix/2], 4.042209792157496651e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[it][npix-1],-1.234675107554816442e+01,1e-12),
|
||||
"error");
|
||||
}
|
||||
sharp_execute(SHARP_ALM2MAP,1,0,&alm[0],&map[0],tinfo,alms,ntrans,1,0,NULL,
|
||||
NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ], 2.750897760535633285e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix/2], 3.137704477368562905e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix-1],-8.405730859837063917e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][0 ],-2.398026536095463346e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-4.961140548331700728e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1],-1.412765834230440021e+01,1e-12),
|
||||
"error");
|
||||
}
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP,2,0,&alm[0],&map[0],tinfo,alms,ntrans,1,0,NULL,
|
||||
NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-1.398186224727334448e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix/2],-2.456676000884031197e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix-1],-1.516249174408820863e+02,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][0 ],-3.173406200299964119e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-5.831327404513146462e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1],-1.863257892248353897e+01,1e-12),
|
||||
"error");
|
||||
}
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP_DERIV1,1,0,&alm[0],&map[0],tinfo,alms,ntrans,1,
|
||||
0,NULL,NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-6.859393905369091105e-01,1e-11),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix/2],-2.103947835973212364e+02,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix-1],-1.092463246472086439e+03,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][0 ],-1.411433220713928165e+02,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-1.146122859381925082e+03,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1], 7.821618677689795049e+02,1e-12),
|
||||
"error");
|
||||
}
|
||||
|
||||
DEALLOC2D(map);
|
||||
DEALLOC2D(alm);
|
||||
}
|
||||
|
||||
sharp_destroy_alm_info(alms);
|
||||
sharp_destroy_geom_info(tinfo);
|
||||
}
|
||||
|
||||
static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
|
||||
ptrdiff_t mmax, ptrdiff_t npix, int spin, int ntrans, int nv)
|
||||
{
|
||||
ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
|
||||
int ncomp = ntrans*((spin==0) ? 1 : 2);
|
||||
|
||||
double **map;
|
||||
ALLOC2D(map,double,ncomp,npix);
|
||||
|
||||
sharp_alm_info *alms;
|
||||
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
|
||||
|
||||
srand(4);
|
||||
dcmplx **alm;
|
||||
ALLOC2D(alm,dcmplx,ncomp,nalms);
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
random_alm(alm[i],alms,spin);
|
||||
|
||||
dcmplx **alm2;
|
||||
ALLOC2D(alm2,dcmplx,ncomp,nalms);
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,1,nv,
|
||||
NULL,NULL);
|
||||
sharp_execute(SHARP_MAP2ALM,spin,0,&alm2[0],&map[0],tinfo,alms,ntrans,1,nv,
|
||||
NULL,NULL);
|
||||
measure_errors(alm,alm2,nalms,ncomp);
|
||||
|
||||
DEALLOC2D(map);
|
||||
DEALLOC2D(alm);
|
||||
DEALLOC2D(alm2);
|
||||
|
||||
sharp_destroy_alm_info(alms);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#ifdef USE_MPI
|
||||
MPI_Init(NULL,NULL);
|
||||
#endif
|
||||
sharp_module_startup("sharp_acctest",1,1,"",1);
|
||||
|
||||
int lmax=127;
|
||||
|
||||
printf("Checking signs and scales.\n");
|
||||
check_sign_scale();
|
||||
printf("Passed.\n\n");
|
||||
|
||||
printf("Testing map analysis accuracy.\n");
|
||||
|
||||
sharp_geom_info *tinfo;
|
||||
int nrings=lmax+1;
|
||||
int ppring=2*lmax+2;
|
||||
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
|
||||
sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo);
|
||||
for (int nv=1; nv<=6; ++nv)
|
||||
for (int ntrans=1; ntrans<=6; ++ntrans)
|
||||
{
|
||||
check_accuracy(tinfo,lmax,lmax,npix,0,ntrans,nv);
|
||||
check_accuracy(tinfo,lmax,lmax,npix,1,ntrans,nv);
|
||||
check_accuracy(tinfo,lmax,lmax,npix,2,ntrans,nv);
|
||||
check_accuracy(tinfo,lmax,lmax,npix,3,ntrans,nv);
|
||||
check_accuracy(tinfo,lmax,lmax,npix,30,ntrans,nv);
|
||||
}
|
||||
sharp_destroy_geom_info(tinfo);
|
||||
printf("Passed.\n\n");
|
||||
|
||||
#ifdef USE_MPI
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
return 0;
|
||||
}
|
30
external/sharp/libsharp/sharp_almhelpers.c
vendored
30
external/sharp/libsharp/sharp_almhelpers.c
vendored
@ -25,7 +25,7 @@
|
||||
/*! \file sharp_almhelpers.c
|
||||
* Spherical transform library
|
||||
*
|
||||
* Copyright (C) 2008-2011 Max-Planck-Society
|
||||
* Copyright (C) 2008-2013 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
@ -41,7 +41,8 @@ void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
|
||||
info->mval = RALLOC(int,mmax+1);
|
||||
info->mvstart = RALLOC(ptrdiff_t,mmax+1);
|
||||
info->stride = stride;
|
||||
int tval = 2*lmax+1;
|
||||
info->flags = 0;
|
||||
ptrdiff_t tval = 2*lmax+1;
|
||||
for (ptrdiff_t m=0; m<=mmax; ++m)
|
||||
{
|
||||
info->mval[m] = m;
|
||||
@ -59,6 +60,7 @@ void sharp_make_rectangular_alm_info (int lmax, int mmax, int stride,
|
||||
info->mval = RALLOC(int,mmax+1);
|
||||
info->mvstart = RALLOC(ptrdiff_t,mmax+1);
|
||||
info->stride = stride;
|
||||
info->flags = 0;
|
||||
for (ptrdiff_t m=0; m<=mmax; ++m)
|
||||
{
|
||||
info->mval[m] = m;
|
||||
@ -66,3 +68,27 @@ void sharp_make_rectangular_alm_info (int lmax, int mmax, int stride,
|
||||
}
|
||||
*alm_info = info;
|
||||
}
|
||||
|
||||
void sharp_make_mmajor_real_packed_alm_info (int lmax, int stride,
|
||||
int nm, const int *ms, sharp_alm_info **alm_info)
|
||||
{
|
||||
ptrdiff_t idx;
|
||||
int f;
|
||||
sharp_alm_info *info = RALLOC(sharp_alm_info,1);
|
||||
info->lmax = lmax;
|
||||
info->nm = nm;
|
||||
info->mval = RALLOC(int,nm);
|
||||
info->mvstart = RALLOC(ptrdiff_t,nm);
|
||||
info->stride = stride;
|
||||
info->flags = SHARP_PACKED | SHARP_REAL_HARMONICS;
|
||||
idx = 0; /* tracks the number of 'consumed' elements so far; need to correct by m */
|
||||
for (int im=0; im!=nm; ++im)
|
||||
{
|
||||
int m=(ms==NULL)?im:ms[im];
|
||||
f = (m==0) ? 1 : 2;
|
||||
info->mval[im] = m;
|
||||
info->mvstart[im] = stride * (idx - f * m);
|
||||
idx += f * (lmax + 1 - m);
|
||||
}
|
||||
*alm_info = info;
|
||||
}
|
||||
|
8
external/sharp/libsharp/sharp_almhelpers.h
vendored
8
external/sharp/libsharp/sharp_almhelpers.h
vendored
@ -50,6 +50,14 @@ void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
|
||||
void sharp_make_rectangular_alm_info (int lmax, int mmax, int stride,
|
||||
sharp_alm_info **alm_info);
|
||||
|
||||
/*! Initialises alm_info for mmajor, real, packed spherical harmonics.
|
||||
Pass \a mmax + 1 to nm and NULL to \a ms in order to use everything;
|
||||
otherwise you can pick a subset of m to process (should only be used
|
||||
for MPI parallelization).
|
||||
\ingroup almgroup */
|
||||
void sharp_make_mmajor_real_packed_alm_info (int lmax, int stride,
|
||||
int nm, const int *ms, sharp_alm_info **alm_info);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
149
external/sharp/libsharp/sharp_bench.c
vendored
149
external/sharp/libsharp/sharp_bench.c
vendored
@ -1,149 +0,0 @@
|
||||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* libsharp is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libsharp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libsharp; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file sharp_bench.c
|
||||
Copyright (C) 2012 Max-Planck-Society
|
||||
\author Martin Reinecke
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#ifdef USE_MPI
|
||||
#include "mpi.h"
|
||||
#endif
|
||||
#include "sharp.h"
|
||||
#include "sharp_geomhelpers.h"
|
||||
#include "sharp_almhelpers.h"
|
||||
#include "c_utils.h"
|
||||
#include "sharp_announce.h"
|
||||
#include "sharp_core.h"
|
||||
|
||||
typedef complex double dcmplx;
|
||||
|
||||
static void bench_sht (int spin, int nv, sharp_jobtype type,
|
||||
int ntrans, double *time, unsigned long long *opcnt)
|
||||
{
|
||||
int lmax=2047;
|
||||
int mmax=128;
|
||||
int nrings=512;
|
||||
int ppring=1024;
|
||||
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
|
||||
sharp_geom_info *tinfo;
|
||||
sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo);
|
||||
|
||||
ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
|
||||
int ncomp = ntrans*((spin==0) ? 1 : 2);
|
||||
|
||||
double **map;
|
||||
ALLOC2D(map,double,ncomp,npix);
|
||||
SET_ARRAY(map[0],0,npix*ncomp,0.);
|
||||
|
||||
sharp_alm_info *alms;
|
||||
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
|
||||
|
||||
dcmplx **alm;
|
||||
ALLOC2D(alm,dcmplx,ncomp,nalms);
|
||||
SET_ARRAY(alm[0],0,nalms*ncomp,0.);
|
||||
|
||||
int nruns=0;
|
||||
*time=1e30;
|
||||
*opcnt=1000000000000000;
|
||||
do
|
||||
{
|
||||
double jtime;
|
||||
unsigned long long jopcnt;
|
||||
sharp_execute(type,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,1,nv,&jtime,
|
||||
&jopcnt);
|
||||
|
||||
if (jopcnt<*opcnt) *opcnt=jopcnt;
|
||||
if (jtime<*time) *time=jtime;
|
||||
}
|
||||
while (++nruns < 4);
|
||||
|
||||
DEALLOC2D(map);
|
||||
DEALLOC2D(alm);
|
||||
|
||||
sharp_destroy_alm_info(alms);
|
||||
sharp_destroy_geom_info(tinfo);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
#ifdef USE_MPI
|
||||
MPI_Init(NULL,NULL);
|
||||
#endif
|
||||
sharp_module_startup("sharp_bench",1,1,"",1);
|
||||
|
||||
printf("Benchmarking SHTs.\n\n");
|
||||
FILE *fp=fopen("sharp_oracle.inc","w");
|
||||
UTIL_ASSERT(fp, "failed to open oracle file for writing");
|
||||
fprintf(fp,"static const int maxtr = 6;\n");
|
||||
fprintf(fp,"static const int nv_opt[6][2][3] = {\n");
|
||||
|
||||
const char *shtname[]={"map2alm","alm2map","a2mder1"};
|
||||
|
||||
for (int ntr=1; ntr<=6; ++ntr)
|
||||
{
|
||||
fprintf(fp,"{");
|
||||
for (int spin=0; spin<=2; spin+=2)
|
||||
{
|
||||
fprintf(fp,"{");
|
||||
for (sharp_jobtype type=SHARP_MAP2ALM; type<=SHARP_ALM2MAP_DERIV1; ++type)
|
||||
{
|
||||
if ((type==SHARP_ALM2MAP_DERIV1) && (spin==0))
|
||||
fprintf(fp,"-1");
|
||||
else
|
||||
{
|
||||
int nvbest=-1, nvoracle=sharp_nv_oracle(type,spin,ntr);
|
||||
unsigned long long opmin=1000000000000000, op;
|
||||
double tmin=1e30;
|
||||
double *time=RALLOC(double,sharp_get_nv_max()+1);
|
||||
for (int nv=1; nv<=sharp_get_nv_max(); ++nv)
|
||||
{
|
||||
bench_sht (spin,nv,type,ntr,&time[nv],&op);
|
||||
if (op<opmin) opmin=op;
|
||||
if (time[nv]<tmin)
|
||||
{ tmin=time[nv]; nvbest=nv; }
|
||||
}
|
||||
printf("nt: %d %s spin: %d nv: %d time: %6.3f perf: %6.3f"
|
||||
" dev[%d]: %6.2f%%\n",ntr,shtname[type],
|
||||
spin,nvbest,tmin,opmin/tmin*1e-9,nvoracle,
|
||||
(time[nvoracle]-tmin)/tmin*100.);
|
||||
DEALLOC(time);
|
||||
fprintf(fp,"%d",nvbest);
|
||||
}
|
||||
if (type!=SHARP_ALM2MAP_DERIV1) fprintf(fp,",");
|
||||
}
|
||||
fprintf(fp,(spin==0)?"},":"}");
|
||||
printf("\n");
|
||||
}
|
||||
fprintf(fp,(ntr<6)?"},\n":"}\n");
|
||||
}
|
||||
fprintf(fp,"};\n");
|
||||
fclose(fp);
|
||||
#ifdef USE_MPI
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
return 0;
|
||||
}
|
223
external/sharp/libsharp/sharp_bench2.c
vendored
223
external/sharp/libsharp/sharp_bench2.c
vendored
@ -1,223 +0,0 @@
|
||||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* libsharp is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libsharp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libsharp; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file sharp_bench2.c
|
||||
Copyright (C) 2012 Max-Planck-Society
|
||||
\author Martin Reinecke
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#if (defined(_OPENMP) && defined(USE_MPI))
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <omp.h>
|
||||
#include <mpi.h>
|
||||
#include "sharp_mpi.h"
|
||||
#include "sharp.h"
|
||||
#include "sharp_vecutil.h"
|
||||
#include "sharp_geomhelpers.h"
|
||||
#include "sharp_almhelpers.h"
|
||||
#include "c_utils.h"
|
||||
#include "sharp_announce.h"
|
||||
#include "sharp_core.h"
|
||||
#include "memusage.h"
|
||||
|
||||
typedef complex double dcmplx;
|
||||
|
||||
int ntasks, mytask;
|
||||
|
||||
static unsigned long long totalops (unsigned long long val)
|
||||
{
|
||||
unsigned long long tmp;
|
||||
MPI_Allreduce (&val, &tmp,1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static double maxTime (double val)
|
||||
{
|
||||
double tmp;
|
||||
MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static double totalMem (double val)
|
||||
{
|
||||
double tmp;
|
||||
MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static void reduce_alm_info(sharp_alm_info *ainfo)
|
||||
{
|
||||
int nmnew=0;
|
||||
ptrdiff_t ofs = 0;
|
||||
for (int i=mytask; i<ainfo->nm; i+=ntasks,++nmnew)
|
||||
{
|
||||
ainfo->mval[nmnew]=ainfo->mval[i];
|
||||
ainfo->mvstart[nmnew]=ofs-ainfo->mval[nmnew];
|
||||
ofs+=ainfo->lmax-ainfo->mval[nmnew]+1;
|
||||
}
|
||||
ainfo->nm=nmnew;
|
||||
}
|
||||
|
||||
static void reduce_geom_info(sharp_geom_info *ginfo)
|
||||
{
|
||||
int npairsnew=0;
|
||||
ptrdiff_t ofs = 0;
|
||||
for (int i=mytask; i<ginfo->npairs; i+=ntasks,++npairsnew)
|
||||
{
|
||||
ginfo->pair[npairsnew]=ginfo->pair[i];
|
||||
ginfo->pair[npairsnew].r1.ofs=ofs;
|
||||
ofs+=ginfo->pair[npairsnew].r1.nph;
|
||||
ginfo->pair[npairsnew].r2.ofs=ofs;
|
||||
if (ginfo->pair[npairsnew].r2.nph>0) ofs+=ginfo->pair[npairsnew].r2.nph;
|
||||
}
|
||||
ginfo->npairs=npairsnew;
|
||||
}
|
||||
|
||||
static ptrdiff_t get_nalms(const sharp_alm_info *ainfo)
|
||||
{
|
||||
ptrdiff_t res=0;
|
||||
for (int i=0; i<ainfo->nm; ++i)
|
||||
res += ainfo->lmax-ainfo->mval[i]+1;
|
||||
return res;
|
||||
}
|
||||
|
||||
static ptrdiff_t get_npix(const sharp_geom_info *ginfo)
|
||||
{
|
||||
ptrdiff_t res=0;
|
||||
for (int i=0; i<ginfo->npairs; ++i)
|
||||
{
|
||||
res += ginfo->pair[i].r1.nph;
|
||||
if (ginfo->pair[i].r2.nph>0) res += ginfo->pair[i].r2.nph;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
MPI_Init(NULL,NULL);
|
||||
MPI_Comm_size(MPI_COMM_WORLD,&ntasks);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&mytask);
|
||||
int master=(mytask==0);
|
||||
|
||||
sharp_module_startup("sharp_bench2",argc,7,
|
||||
"<healpix|ecp|gauss> <lmax> <nside|nphi> <a2m/m2a> <spin> <ntrans>",0);
|
||||
|
||||
int lmax=atoi(argv[2]);
|
||||
sharp_jobtype jtype = (strcmp(argv[4],"a2m")==0) ?
|
||||
SHARP_ALM2MAP : SHARP_MAP2ALM;
|
||||
int spin=atoi(argv[5]);
|
||||
int ntrans=atoi(argv[6]);
|
||||
|
||||
sharp_geom_info *tinfo;
|
||||
ptrdiff_t npix=0;
|
||||
int geom2=0;
|
||||
if (strcmp(argv[1],"gauss")==0)
|
||||
{
|
||||
int nrings=geom2=lmax+1;
|
||||
int ppring=atoi(argv[3]);
|
||||
sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo);
|
||||
}
|
||||
else if (strcmp(argv[1],"ecp")==0)
|
||||
{
|
||||
int nrings=geom2=2*lmax+2;
|
||||
int ppring=atoi(argv[3]);
|
||||
sharp_make_ecp_geom_info (nrings, ppring, 0., 1, ppring, &tinfo);
|
||||
}
|
||||
else if (strcmp(argv[1],"healpix")==0)
|
||||
{
|
||||
int nside=atoi(argv[3]);
|
||||
if (nside<1) nside=1;
|
||||
geom2=4*nside-1;
|
||||
sharp_make_healpix_geom_info (nside, 1, &tinfo);
|
||||
}
|
||||
else
|
||||
UTIL_FAIL("unknown grid geometry");
|
||||
|
||||
reduce_geom_info(tinfo);
|
||||
npix=get_npix(tinfo);
|
||||
|
||||
int mmax=lmax;
|
||||
int ncomp = ntrans*((spin==0) ? 1 : 2);
|
||||
|
||||
double **map;
|
||||
ALLOC2D(map,double,ncomp,npix);
|
||||
|
||||
sharp_alm_info *alms;
|
||||
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
|
||||
|
||||
reduce_alm_info(alms);
|
||||
ptrdiff_t nalms=get_nalms(alms);
|
||||
|
||||
dcmplx **alm;
|
||||
ALLOC2D(alm,dcmplx,ncomp,nalms);
|
||||
|
||||
for (int n=0; n<ncomp; ++n)
|
||||
{
|
||||
for (int i=0; i<npix; ++i) map[n][i]=1;
|
||||
for (int i=0; i<nalms; ++i) alm[n][i]=1;
|
||||
}
|
||||
|
||||
double time=1e20;
|
||||
unsigned long long opcnt=0;
|
||||
for (int ntries=0; (ntries<2)||(ntries*time<5); ++ntries)
|
||||
{
|
||||
double ltime;
|
||||
unsigned long long lopcnt;
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,jtype,spin,0,&alm[0],&map[0],
|
||||
tinfo,alms,ntrans,1,0,<ime,&lopcnt);
|
||||
|
||||
ltime=maxTime(ltime);
|
||||
if (ltime<time) { time=ltime; opcnt=totalops(lopcnt); }
|
||||
}
|
||||
DEALLOC2D(map);
|
||||
DEALLOC2D(alm);
|
||||
|
||||
sharp_destroy_alm_info(alms);
|
||||
sharp_destroy_geom_info(tinfo);
|
||||
|
||||
double mHWM=totalMem(VmHWM());
|
||||
|
||||
int nomp=omp_get_max_threads();
|
||||
|
||||
if (master)
|
||||
printf("%-12s %-7s %-3s %2d %d %2d %3d %5d %5d %1d %.2e %7.2f %9.2f\n",
|
||||
getenv("HOST"),argv[1],argv[4],spin,VLEN,nomp,ntasks,lmax,geom2,ntrans,
|
||||
time,opcnt/(time*1e9),mHWM/(1<<20));
|
||||
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include "c_utils.h"
|
||||
|
||||
int main(void)
|
||||
{ UTIL_FAIL("Need OpenMP and MPI"); return 1; }
|
||||
|
||||
#endif
|
16
external/sharp/libsharp/sharp_complex_hacks.h
vendored
16
external/sharp/libsharp/sharp_complex_hacks.h
vendored
@ -25,7 +25,7 @@
|
||||
/* \file sharp_complex_hacks.h
|
||||
* support for converting vector types and complex numbers
|
||||
*
|
||||
* Copyright (C) 2012 Max-Planck-Society
|
||||
* Copyright (C) 2012,2013 Max-Planck-Society
|
||||
* Author: Martin Reinecke
|
||||
*/
|
||||
|
||||
@ -132,4 +132,18 @@ static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d,
|
||||
|
||||
#endif
|
||||
|
||||
#if (VLEN==8)
|
||||
|
||||
static inline complex double vhsum_cmplx(Tv a, Tv b)
|
||||
{ return _mm512_reduce_add_pd(a)+_Complex_I*_mm512_reduce_add_pd(b); }
|
||||
|
||||
static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d,
|
||||
complex double * restrict c1, complex double * restrict c2)
|
||||
{
|
||||
*c1 += _mm512_reduce_add_pd(a)+_Complex_I*_mm512_reduce_add_pd(b);
|
||||
*c2 += _mm512_reduce_add_pd(c)+_Complex_I*_mm512_reduce_add_pd(d);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
121
external/sharp/libsharp/sharp_core.c
vendored
121
external/sharp/libsharp/sharp_core.c
vendored
@ -25,7 +25,7 @@
|
||||
/*! \file sharp_core.c
|
||||
* Computational core
|
||||
*
|
||||
* Copyright (C) 2012 Max-Planck-Society
|
||||
* Copyright (C) 2012-2013 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
@ -41,6 +41,7 @@
|
||||
|
||||
typedef complex double dcmplx;
|
||||
|
||||
// must be in the range [0;6]
|
||||
#define MAXJOB_SPECIAL 2
|
||||
|
||||
#define XCONCAT2(a,b) a##_##b
|
||||
@ -49,188 +50,188 @@ typedef complex double dcmplx;
|
||||
#define CONCAT3(a,b,c) XCONCAT3(a,b,c)
|
||||
|
||||
#define nvec 1
|
||||
#include "sharp_inchelper1.inc.c"
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 2
|
||||
#include "sharp_inchelper1.inc.c"
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 3
|
||||
#include "sharp_inchelper1.inc.c"
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 4
|
||||
#include "sharp_inchelper1.inc.c"
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 5
|
||||
#include "sharp_inchelper1.inc.c"
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 6
|
||||
#include "sharp_inchelper1.inc.c"
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
||||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||
const int *idx)
|
||||
const int *mlim)
|
||||
{
|
||||
int njobs=job->ntrans;
|
||||
int njobs=job->ntrans, nv=job->flags&SHARP_NVMAX;
|
||||
if (njobs<=MAXJOB_SPECIAL)
|
||||
{
|
||||
switch (njobs*16+job->nv)
|
||||
switch (njobs*16+nv)
|
||||
{
|
||||
#if (MAXJOB_SPECIAL>=1)
|
||||
#if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1))
|
||||
case 0x11:
|
||||
CONCAT3(inner_loop,1,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,1,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x12:
|
||||
CONCAT3(inner_loop,2,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,2,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x13:
|
||||
CONCAT3(inner_loop,3,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,3,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x14:
|
||||
CONCAT3(inner_loop,4,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,4,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x15:
|
||||
CONCAT3(inner_loop,5,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,5,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x16:
|
||||
CONCAT3(inner_loop,6,1) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,6,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if (MAXJOB_SPECIAL>=2)
|
||||
#if ((MAXJOB_SPECIAL>=2)&&(SHARP_MAXTRANS>=2))
|
||||
case 0x21:
|
||||
CONCAT3(inner_loop,1,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,1,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x22:
|
||||
CONCAT3(inner_loop,2,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,2,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x23:
|
||||
CONCAT3(inner_loop,3,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,3,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x24:
|
||||
CONCAT3(inner_loop,4,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,4,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x25:
|
||||
CONCAT3(inner_loop,5,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,5,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x26:
|
||||
CONCAT3(inner_loop,6,2) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,6,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if (MAXJOB_SPECIAL>=3)
|
||||
#if ((MAXJOB_SPECIAL>=3)&&(SHARP_MAXTRANS>=3))
|
||||
case 0x31:
|
||||
CONCAT3(inner_loop,1,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,1,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x32:
|
||||
CONCAT3(inner_loop,2,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,2,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x33:
|
||||
CONCAT3(inner_loop,3,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,3,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x34:
|
||||
CONCAT3(inner_loop,4,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,4,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x35:
|
||||
CONCAT3(inner_loop,5,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,5,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x36:
|
||||
CONCAT3(inner_loop,6,3) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,6,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if (MAXJOB_SPECIAL>=4)
|
||||
#if ((MAXJOB_SPECIAL>=4)&&(SHARP_MAXTRANS>=4))
|
||||
case 0x41:
|
||||
CONCAT3(inner_loop,1,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,1,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x42:
|
||||
CONCAT3(inner_loop,2,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,2,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x43:
|
||||
CONCAT3(inner_loop,3,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,3,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x44:
|
||||
CONCAT3(inner_loop,4,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,4,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x45:
|
||||
CONCAT3(inner_loop,5,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,5,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x46:
|
||||
CONCAT3(inner_loop,6,4) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,6,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if (MAXJOB_SPECIAL>=5)
|
||||
#if ((MAXJOB_SPECIAL>=5)&&(SHARP_MAXTRANS>=5))
|
||||
case 0x51:
|
||||
CONCAT3(inner_loop,1,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,1,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x52:
|
||||
CONCAT3(inner_loop,2,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,2,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x53:
|
||||
CONCAT3(inner_loop,3,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,3,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x54:
|
||||
CONCAT3(inner_loop,4,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,4,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x55:
|
||||
CONCAT3(inner_loop,5,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,5,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x56:
|
||||
CONCAT3(inner_loop,6,5) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,6,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if (MAXJOB_SPECIAL>=6)
|
||||
#if ((MAXJOB_SPECIAL>=6)&&(SHARP_MAXTRANS>=6))
|
||||
case 0x61:
|
||||
CONCAT3(inner_loop,1,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,1,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x62:
|
||||
CONCAT3(inner_loop,2,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,2,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x63:
|
||||
CONCAT3(inner_loop,3,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,3,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x64:
|
||||
CONCAT3(inner_loop,4,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,4,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x65:
|
||||
CONCAT3(inner_loop,5,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,5,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x66:
|
||||
CONCAT3(inner_loop,6,6) (job, ispair,cth,sth,llim,ulim,gen,mi,idx);
|
||||
CONCAT3(inner_loop,6,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#if (MAXJOB_SPECIAL<6)
|
||||
#if (SHARP_MAXTRANS>MAXJOB_SPECIAL)
|
||||
else
|
||||
{
|
||||
switch (job->nv)
|
||||
switch (nv)
|
||||
{
|
||||
case 1:
|
||||
CONCAT2(inner_loop,1)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 2:
|
||||
CONCAT2(inner_loop,2)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 3:
|
||||
CONCAT2(inner_loop,3)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 4:
|
||||
CONCAT2(inner_loop,4)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 5:
|
||||
CONCAT2(inner_loop,5)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 6:
|
||||
CONCAT2(inner_loop,6)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,idx,job->ntrans);
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
4
external/sharp/libsharp/sharp_core.h
vendored
4
external/sharp/libsharp/sharp_core.h
vendored
@ -25,7 +25,7 @@
|
||||
/*! \file sharp_core.h
|
||||
* Interface for the computational core
|
||||
*
|
||||
* Copyright (C) 2012 Max-Planck-Society
|
||||
* Copyright (C) 2012-2013 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
@ -41,7 +41,7 @@ extern "C" {
|
||||
|
||||
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
||||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||
const int *idx);
|
||||
const int *mlim);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
47
external/sharp/libsharp/sharp_core_inc.c
vendored
47
external/sharp/libsharp/sharp_core_inc.c
vendored
@ -70,31 +70,31 @@ static inline Tb Y(Tbprod)(Tb a, Tb b)
|
||||
static inline void Y(Tbmuleq)(Tb * restrict a, Tb b)
|
||||
{ for (int i=0; i<nvec; ++i) vmuleq(a->v[i],b.v[i]); }
|
||||
|
||||
static inline void Y(Tbnormalize) (Tb * restrict val, Tb * restrict scale,
|
||||
static void Y(Tbnormalize) (Tb * restrict val, Tb * restrict scale,
|
||||
double maxval)
|
||||
{
|
||||
const Tv vfsmall=vload(sharp_fsmall), vfbig=vload(sharp_fbig);
|
||||
const Tv vfmin=vload(sharp_fsmall*maxval), vfmax=vload(maxval);
|
||||
for (int i=0;i<nvec; ++i)
|
||||
{
|
||||
Tv mask = vgt(vabs(val->v[i]),vfmax);
|
||||
Tm mask = vgt(vabs(val->v[i]),vfmax);
|
||||
while (vanyTrue(mask))
|
||||
{
|
||||
vmuleq(val->v[i],vblend(mask,vfsmall,vone));
|
||||
vaddeq(scale->v[i],vblend(mask,vone,vzero));
|
||||
vmuleq_mask(mask,val->v[i],vfsmall);
|
||||
vaddeq_mask(mask,scale->v[i],vone);
|
||||
mask = vgt(vabs(val->v[i]),vfmax);
|
||||
}
|
||||
mask = vand(vlt(vabs(val->v[i]),vfmin),vne(val->v[i],vzero));
|
||||
mask = vand_mask(vlt(vabs(val->v[i]),vfmin),vne(val->v[i],vzero));
|
||||
while (vanyTrue(mask))
|
||||
{
|
||||
vmuleq(val->v[i],vblend(mask,vfbig,vone));
|
||||
vsubeq(scale->v[i],vblend(mask,vone,vzero));
|
||||
mask = vand(vlt(vabs(val->v[i]),vfmin),vne(val->v[i],vzero));
|
||||
vmuleq_mask(mask,val->v[i],vfbig);
|
||||
vsubeq_mask(mask,scale->v[i],vone);
|
||||
mask = vand_mask(vlt(vabs(val->v[i]),vfmin),vne(val->v[i],vzero));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Y(mypow) (Tb val, int npow, Tb * restrict resd,
|
||||
static void Y(mypow) (Tb val, int npow, Tb * restrict resd,
|
||||
Tb * restrict ress)
|
||||
{
|
||||
Tb scale=Y(Tbconst)(0.), scaleint=Y(Tbconst)(0.), res=Y(Tbconst)(1.);
|
||||
@ -131,13 +131,13 @@ static inline int Y(rescale) (Tb * restrict lam1, Tb * restrict lam2,
|
||||
int did_scale=0;
|
||||
for (int i=0;i<nvec; ++i)
|
||||
{
|
||||
Tv mask = vgt(vabs(lam2->v[i]),vload(sharp_ftol));
|
||||
Tm mask = vgt(vabs(lam2->v[i]),vload(sharp_ftol));
|
||||
if (vanyTrue(mask))
|
||||
{
|
||||
did_scale=1;
|
||||
Tv fact = vblend(mask,vload(sharp_fsmall),vone);
|
||||
vmuleq(lam1->v[i],fact); vmuleq(lam2->v[i],fact);
|
||||
vaddeq(scale->v[i],vblend(mask,vone,vzero));
|
||||
vmuleq_mask(mask,lam1->v[i],vload(sharp_fsmall));
|
||||
vmuleq_mask(mask,lam2->v[i],vload(sharp_fsmall));
|
||||
vaddeq_mask(mask,scale->v[i],vone);
|
||||
}
|
||||
}
|
||||
return did_scale;
|
||||
@ -146,29 +146,29 @@ static inline int Y(rescale) (Tb * restrict lam1, Tb * restrict lam2,
|
||||
static inline int Y(TballLt)(Tb a,double b)
|
||||
{
|
||||
Tv vb=vload(b);
|
||||
Tv res=vlt(a.v[0],vb);
|
||||
Tm res=vlt(a.v[0],vb);
|
||||
for (int i=1; i<nvec; ++i)
|
||||
res=vand(res,vlt(a.v[i],vb));
|
||||
res=vand_mask(res,vlt(a.v[i],vb));
|
||||
return vallTrue(res);
|
||||
}
|
||||
static inline int Y(TballGt)(Tb a,double b)
|
||||
{
|
||||
Tv vb=vload(b);
|
||||
Tv res=vgt(a.v[0],vb);
|
||||
Tm res=vgt(a.v[0],vb);
|
||||
for (int i=1; i<nvec; ++i)
|
||||
res=vand(res,vgt(a.v[i],vb));
|
||||
res=vand_mask(res,vgt(a.v[i],vb));
|
||||
return vallTrue(res);
|
||||
}
|
||||
static inline int Y(TballGe)(Tb a,double b)
|
||||
{
|
||||
Tv vb=vload(b);
|
||||
Tv res=vge(a.v[0],vb);
|
||||
Tm res=vge(a.v[0],vb);
|
||||
for (int i=1; i<nvec; ++i)
|
||||
res=vand(res,vge(a.v[i],vb));
|
||||
res=vand_mask(res,vge(a.v[i],vb));
|
||||
return vallTrue(res);
|
||||
}
|
||||
|
||||
static inline void Y(getCorfac)(Tb scale, Tb * restrict corfac,
|
||||
static void Y(getCorfac)(Tb scale, Tb * restrict corfac,
|
||||
const double * restrict cf)
|
||||
{
|
||||
Y(Tbu) sc, corf;
|
||||
@ -220,7 +220,7 @@ static inline void Y(rec_step) (Tb * restrict rxp, Tb * restrict rxm,
|
||||
}
|
||||
}
|
||||
|
||||
static void Y(iter_to_ieee_spin) (const Tb cth, int *l_,
|
||||
static void Y(iter_to_ieee_spin) (const Tb cth, const Tb sth, int *l_,
|
||||
Tb * rec1p_, Tb * rec1m_, Tb * rec2p_, Tb * rec2m_,
|
||||
Tb * scalep_, Tb * scalem_, const sharp_Ylmgen_C * restrict gen)
|
||||
{
|
||||
@ -232,6 +232,11 @@ static void Y(iter_to_ieee_spin) (const Tb cth, int *l_,
|
||||
cth2.v[i]=vmax(cth2.v[i],vload(1e-15));
|
||||
sth2.v[i]=vsqrt(vmul(vsub(vone,cth.v[i]),vload(0.5)));
|
||||
sth2.v[i]=vmax(sth2.v[i],vload(1e-15));
|
||||
Tm mask=vlt(sth.v[i],vzero);
|
||||
Tm cmask=vand_mask(mask,vlt(cth.v[i],vzero));
|
||||
vmuleq_mask(cmask,cth2.v[i],vload(-1.));
|
||||
Tm smask=vand_mask(mask,vgt(cth.v[i],vzero));
|
||||
vmuleq_mask(smask,sth2.v[i],vload(-1.));
|
||||
}
|
||||
|
||||
Tb ccp, ccps, ssp, ssps, csp, csps, scp, scps;
|
||||
|
419
external/sharp/libsharp/sharp_core_inc2.c
vendored
419
external/sharp/libsharp/sharp_core_inc2.c
vendored
@ -25,25 +25,17 @@
|
||||
/*! \file sharp_core_inc2.c
|
||||
* Type-dependent code for the computational core
|
||||
*
|
||||
* Copyright (C) 2012 Max-Planck-Society
|
||||
* Copyright (C) 2012-2013 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{ Y(Tbri) j[njobs]; } Z(Tbrij);
|
||||
typedef union
|
||||
{ Z(Tbrij) b; Y(Tsri) j[njobs]; } Z(Tburij);
|
||||
typedef struct
|
||||
{ Y(Tbqu) j[njobs]; } Z(Tbquj);
|
||||
typedef union
|
||||
{ Z(Tbquj) b; Y(Tsqu) j[njobs]; } Z(Tbuquj);
|
||||
|
||||
static void Z(alm2map_kernel) (const Tb cth, Z(Tbrij) * restrict p1,
|
||||
Z(Tbrij) * restrict p2, Tb lam_1, Tb lam_2,
|
||||
static void Z(alm2map_kernel) (const Tb cth, Y(Tbri) * restrict p1,
|
||||
Y(Tbri) * restrict p2, Tb lam_1, Tb lam_2,
|
||||
const sharp_ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm,
|
||||
int l, int lmax)
|
||||
int l, int lmax NJ1)
|
||||
{
|
||||
if (njobs>1)
|
||||
{
|
||||
#if (njobs>1)
|
||||
while (l<lmax-2)
|
||||
{
|
||||
Tb lam_3, lam_4;
|
||||
@ -64,8 +56,8 @@ static void Z(alm2map_kernel) (const Tb cth, Z(Tbrij) * restrict p1,
|
||||
ai4=vload(cimag(alm[njobs*(l+2)+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaaeq(p1->j[j].r.v[i],lam_2.v[i],ar2,lam_4.v[i],ar4);
|
||||
vfmaaeq(p1->j[j].i.v[i],lam_2.v[i],ai2,lam_4.v[i],ai4);
|
||||
vfmaaeq(p1[j].r.v[i],lam_2.v[i],ar2,lam_4.v[i],ar4);
|
||||
vfmaaeq(p1[j].i.v[i],lam_2.v[i],ai2,lam_4.v[i],ai4);
|
||||
}
|
||||
Tv ar3=vload(creal(alm[njobs*(l+1)+j])),
|
||||
ai3=vload(cimag(alm[njobs*(l+1)+j])),
|
||||
@ -73,8 +65,8 @@ static void Z(alm2map_kernel) (const Tb cth, Z(Tbrij) * restrict p1,
|
||||
ai1=vload(cimag(alm[njobs*(l+3)+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaaeq(p2->j[j].r.v[i],lam_3.v[i],ar3,lam_1.v[i],ar1);
|
||||
vfmaaeq(p2->j[j].i.v[i],lam_3.v[i],ai3,lam_1.v[i],ai1);
|
||||
vfmaaeq(p2[j].r.v[i],lam_3.v[i],ar3,lam_1.v[i],ar1);
|
||||
vfmaaeq(p2[j].i.v[i],lam_3.v[i],ai3,lam_1.v[i],ai1);
|
||||
}
|
||||
}
|
||||
r0=vload(rf[l+3].f[0]);r1=vload(rf[l+3].f[1]);
|
||||
@ -82,7 +74,7 @@ static void Z(alm2map_kernel) (const Tb cth, Z(Tbrij) * restrict p1,
|
||||
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_4.v[i],r1));
|
||||
l+=4;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
while (l<lmax)
|
||||
{
|
||||
Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
|
||||
@ -94,15 +86,15 @@ static void Z(alm2map_kernel) (const Tb cth, Z(Tbrij) * restrict p1,
|
||||
ai=vload(cimag(alm[njobs*l+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(p1->j[j].r.v[i],lam_2.v[i],ar);
|
||||
vfmaeq(p1->j[j].i.v[i],lam_2.v[i],ai);
|
||||
vfmaeq(p1[j].r.v[i],lam_2.v[i],ar);
|
||||
vfmaeq(p1[j].i.v[i],lam_2.v[i],ai);
|
||||
}
|
||||
ar=vload(creal(alm[njobs*(l+1)+j]));
|
||||
ai=vload(cimag(alm[njobs*(l+1)+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(p2->j[j].r.v[i],lam_1.v[i],ar);
|
||||
vfmaeq(p2->j[j].i.v[i],lam_1.v[i],ai);
|
||||
vfmaeq(p2[j].r.v[i],lam_1.v[i],ar);
|
||||
vfmaeq(p2[j].i.v[i],lam_1.v[i],ai);
|
||||
}
|
||||
}
|
||||
r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
|
||||
@ -117,16 +109,17 @@ static void Z(alm2map_kernel) (const Tb cth, Z(Tbrij) * restrict p1,
|
||||
Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(p1->j[j].r.v[i],lam_2.v[i],ar);
|
||||
vfmaeq(p1->j[j].i.v[i],lam_2.v[i],ai);
|
||||
vfmaeq(p1[j].r.v[i],lam_2.v[i],ar);
|
||||
vfmaeq(p1[j].i.v[i],lam_2.v[i],ai);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void Z(map2alm_kernel) (const Tb cth, const Z(Tbrij) * restrict p1,
|
||||
const Z(Tbrij) * restrict p2, Tb lam_1, Tb lam_2,
|
||||
const sharp_ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax)
|
||||
static void Z(map2alm_kernel) (const Tb cth, const Y(Tbri) * restrict p1,
|
||||
const Y(Tbri) * restrict p2, Tb lam_1, Tb lam_2,
|
||||
const sharp_ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax
|
||||
NJ1)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
@ -138,13 +131,13 @@ static void Z(map2alm_kernel) (const Tb cth, const Z(Tbrij) * restrict p1,
|
||||
Tv tr1=vzero, ti1=vzero, tr2=vzero, ti2=vzero;
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(tr1,lam_2.v[i],p1->j[j].r.v[i]);
|
||||
vfmaeq(ti1,lam_2.v[i],p1->j[j].i.v[i]);
|
||||
vfmaeq(tr1,lam_2.v[i],p1[j].r.v[i]);
|
||||
vfmaeq(ti1,lam_2.v[i],p1[j].i.v[i]);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(tr2,lam_1.v[i],p2->j[j].r.v[i]);
|
||||
vfmaeq(ti2,lam_1.v[i],p2->j[j].i.v[i]);
|
||||
vfmaeq(tr2,lam_1.v[i],p2[j].r.v[i]);
|
||||
vfmaeq(ti2,lam_1.v[i],p2[j].i.v[i]);
|
||||
}
|
||||
vhsum_cmplx2(tr1,ti1,tr2,ti2,&alm[l*njobs+j],&alm[(l+1)*njobs+j]);
|
||||
}
|
||||
@ -160,8 +153,8 @@ static void Z(map2alm_kernel) (const Tb cth, const Z(Tbrij) * restrict p1,
|
||||
Tv tre=vzero, tim=vzero;
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(tre,lam_2.v[i],p1->j[j].r.v[i]);
|
||||
vfmaeq(tim,lam_2.v[i],p1->j[j].i.v[i]);
|
||||
vfmaeq(tre,lam_2.v[i],p1[j].r.v[i]);
|
||||
vfmaeq(tim,lam_2.v[i],p1[j].i.v[i]);
|
||||
}
|
||||
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
|
||||
}
|
||||
@ -169,14 +162,14 @@ static void Z(map2alm_kernel) (const Tb cth, const Z(Tbrij) * restrict p1,
|
||||
}
|
||||
|
||||
static void Z(calc_alm2map) (const Tb cth, const Tb sth,
|
||||
const sharp_Ylmgen_C *gen, sharp_job *job, Z(Tbrij) * restrict p1,
|
||||
Z(Tbrij) * restrict p2, int *done)
|
||||
const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbri) * restrict p1,
|
||||
Y(Tbri) * restrict p2 NJ1)
|
||||
{
|
||||
int l,lmax=gen->lmax;
|
||||
Tb lam_1,lam_2,scale;
|
||||
Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
|
||||
job->opcnt += (l-gen->m) * 4*VLEN*nvec;
|
||||
if (l>lmax) { *done=1; return; }
|
||||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
|
||||
|
||||
Tb corfac;
|
||||
@ -192,8 +185,8 @@ static void Z(calc_alm2map) (const Tb cth, const Tb sth,
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv tmp=vmul(lam_2.v[i],corfac.v[i]);
|
||||
vfmaeq(p1->j[j].r.v[i],tmp,ar);
|
||||
vfmaeq(p1->j[j].i.v[i],tmp,ai);
|
||||
vfmaeq(p1[j].r.v[i],tmp,ar);
|
||||
vfmaeq(p1[j].i.v[i],tmp,ai);
|
||||
}
|
||||
}
|
||||
if (++l>lmax) break;
|
||||
@ -206,8 +199,8 @@ static void Z(calc_alm2map) (const Tb cth, const Tb sth,
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv tmp=vmul(lam_1.v[i],corfac.v[i]);
|
||||
vfmaeq(p2->j[j].r.v[i],tmp,ar);
|
||||
vfmaeq(p2->j[j].i.v[i],tmp,ai);
|
||||
vfmaeq(p2[j].r.v[i],tmp,ar);
|
||||
vfmaeq(p2[j].i.v[i],tmp,ai);
|
||||
}
|
||||
}
|
||||
if (++l>lmax) break;
|
||||
@ -220,22 +213,22 @@ static void Z(calc_alm2map) (const Tb cth, const Tb sth,
|
||||
full_ieee = Y(TballGe)(scale,sharp_minscale);
|
||||
}
|
||||
}
|
||||
if (l>lmax) { *done=1; return; }
|
||||
if (l>lmax) return;
|
||||
|
||||
Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac);
|
||||
Z(alm2map_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax);
|
||||
Z(alm2map_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax NJ2);
|
||||
}
|
||||
|
||||
static void Z(calc_map2alm) (const Tb cth, const Tb sth,
|
||||
const sharp_Ylmgen_C *gen, sharp_job *job, const Z(Tbrij) * restrict p1,
|
||||
const Z(Tbrij) * restrict p2, int *done)
|
||||
const sharp_Ylmgen_C *gen, sharp_job *job, const Y(Tbri) * restrict p1,
|
||||
const Y(Tbri) * restrict p2 NJ1)
|
||||
{
|
||||
int lmax=gen->lmax;
|
||||
Tb lam_1,lam_2,scale;
|
||||
int l=gen->m;
|
||||
Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
|
||||
job->opcnt += (l-gen->m) * 4*VLEN*nvec;
|
||||
if (l>lmax) { *done=1; return; }
|
||||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
|
||||
@ -251,12 +244,12 @@ static void Z(calc_map2alm) (const Tb cth, const Tb sth,
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv tmp=vmul(lam_2.v[i],corfac.v[i]);
|
||||
vfmaeq(tre,tmp,p1->j[j].r.v[i]);
|
||||
vfmaeq(tim,tmp,p1->j[j].i.v[i]);
|
||||
vfmaeq(tre,tmp,p1[j].r.v[i]);
|
||||
vfmaeq(tim,tmp,p1[j].i.v[i]);
|
||||
}
|
||||
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
|
||||
}
|
||||
if (++l>lmax) { *done=1; return; }
|
||||
if (++l>lmax) return;
|
||||
Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
|
||||
@ -266,12 +259,12 @@ static void Z(calc_map2alm) (const Tb cth, const Tb sth,
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv tmp=vmul(lam_1.v[i],corfac.v[i]);
|
||||
vfmaeq(tre,tmp,p2->j[j].r.v[i]);
|
||||
vfmaeq(tim,tmp,p2->j[j].i.v[i]);
|
||||
vfmaeq(tre,tmp,p2[j].r.v[i]);
|
||||
vfmaeq(tim,tmp,p2[j].i.v[i]);
|
||||
}
|
||||
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
|
||||
}
|
||||
if (++l>lmax) { *done=1; return; }
|
||||
if (++l>lmax) return;
|
||||
r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
|
||||
@ -283,11 +276,11 @@ static void Z(calc_map2alm) (const Tb cth, const Tb sth,
|
||||
}
|
||||
|
||||
Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac);
|
||||
Z(map2alm_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax);
|
||||
Z(map2alm_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax NJ2);
|
||||
}
|
||||
|
||||
static inline void Z(saddstep) (Z(Tbquj) * restrict px, Z(Tbquj) * restrict py,
|
||||
const Tb rxp, const Tb rxm, const dcmplx * restrict alm)
|
||||
static inline void Z(saddstep) (Y(Tbqu) * restrict px, Y(Tbqu) * restrict py,
|
||||
const Tb rxp, const Tb rxm, const dcmplx * restrict alm NJ1)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
@ -296,25 +289,25 @@ static inline void Z(saddstep) (Z(Tbquj) * restrict px, Z(Tbquj) * restrict py,
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw=vadd(rxp.v[i],rxm.v[i]);
|
||||
vfmaeq(px->j[j].qr.v[i],agr,lw);
|
||||
vfmaeq(px->j[j].qi.v[i],agi,lw);
|
||||
vfmaeq(px->j[j].ur.v[i],acr,lw);
|
||||
vfmaeq(px->j[j].ui.v[i],aci,lw);
|
||||
vfmaeq(px[j].qr.v[i],agr,lw);
|
||||
vfmaeq(px[j].qi.v[i],agi,lw);
|
||||
vfmaeq(px[j].ur.v[i],acr,lw);
|
||||
vfmaeq(px[j].ui.v[i],aci,lw);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lx=vsub(rxm.v[i],rxp.v[i]);
|
||||
vfmseq(py->j[j].qr.v[i],aci,lx);
|
||||
vfmaeq(py->j[j].qi.v[i],acr,lx);
|
||||
vfmaeq(py->j[j].ur.v[i],agi,lx);
|
||||
vfmseq(py->j[j].ui.v[i],agr,lx);
|
||||
vfmseq(py[j].qr.v[i],aci,lx);
|
||||
vfmaeq(py[j].qi.v[i],acr,lx);
|
||||
vfmaeq(py[j].ur.v[i],agi,lx);
|
||||
vfmseq(py[j].ui.v[i],agr,lx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Z(saddstepb) (Z(Tbquj) * restrict p1, Z(Tbquj) * restrict p2,
|
||||
static inline void Z(saddstepb) (Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2,
|
||||
const Tb r1p, const Tb r1m, const Tb r2p, const Tb r2m,
|
||||
const dcmplx * restrict alm1, const dcmplx * restrict alm2)
|
||||
const dcmplx * restrict alm1, const dcmplx * restrict alm2 NJ1)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
@ -326,26 +319,26 @@ static inline void Z(saddstepb) (Z(Tbquj) * restrict p1, Z(Tbquj) * restrict p2,
|
||||
{
|
||||
Tv lw1=vadd(r2p.v[i],r2m.v[i]);
|
||||
Tv lx2=vsub(r1m.v[i],r1p.v[i]);
|
||||
vfmaseq(p1->j[j].qr.v[i],agr1,lw1,aci2,lx2);
|
||||
vfmaaeq(p1->j[j].qi.v[i],agi1,lw1,acr2,lx2);
|
||||
vfmaaeq(p1->j[j].ur.v[i],acr1,lw1,agi2,lx2);
|
||||
vfmaseq(p1->j[j].ui.v[i],aci1,lw1,agr2,lx2);
|
||||
vfmaseq(p1[j].qr.v[i],agr1,lw1,aci2,lx2);
|
||||
vfmaaeq(p1[j].qi.v[i],agi1,lw1,acr2,lx2);
|
||||
vfmaaeq(p1[j].ur.v[i],acr1,lw1,agi2,lx2);
|
||||
vfmaseq(p1[j].ui.v[i],aci1,lw1,agr2,lx2);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lx1=vsub(r2m.v[i],r2p.v[i]);
|
||||
Tv lw2=vadd(r1p.v[i],r1m.v[i]);
|
||||
vfmaseq(p2->j[j].qr.v[i],agr2,lw2,aci1,lx1);
|
||||
vfmaaeq(p2->j[j].qi.v[i],agi2,lw2,acr1,lx1);
|
||||
vfmaaeq(p2->j[j].ur.v[i],acr2,lw2,agi1,lx1);
|
||||
vfmaseq(p2->j[j].ui.v[i],aci2,lw2,agr1,lx1);
|
||||
vfmaseq(p2[j].qr.v[i],agr2,lw2,aci1,lx1);
|
||||
vfmaaeq(p2[j].qi.v[i],agi2,lw2,acr1,lx1);
|
||||
vfmaaeq(p2[j].ur.v[i],acr2,lw2,agi1,lx1);
|
||||
vfmaseq(p2[j].ui.v[i],aci2,lw2,agr1,lx1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Z(saddstep2) (const Z(Tbquj) * restrict px,
|
||||
const Z(Tbquj) * restrict py, const Tb * restrict rxp,
|
||||
const Tb * restrict rxm, dcmplx * restrict alm)
|
||||
static inline void Z(saddstep2) (const Y(Tbqu) * restrict px,
|
||||
const Y(Tbqu) * restrict py, const Tb * restrict rxp,
|
||||
const Tb * restrict rxm, dcmplx * restrict alm NJ1)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
@ -353,27 +346,27 @@ static inline void Z(saddstep2) (const Z(Tbquj) * restrict px,
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw=vadd(rxp->v[i],rxm->v[i]);
|
||||
vfmaeq(agr,px->j[j].qr.v[i],lw);
|
||||
vfmaeq(agi,px->j[j].qi.v[i],lw);
|
||||
vfmaeq(acr,px->j[j].ur.v[i],lw);
|
||||
vfmaeq(aci,px->j[j].ui.v[i],lw);
|
||||
vfmaeq(agr,px[j].qr.v[i],lw);
|
||||
vfmaeq(agi,px[j].qi.v[i],lw);
|
||||
vfmaeq(acr,px[j].ur.v[i],lw);
|
||||
vfmaeq(aci,px[j].ui.v[i],lw);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lx=vsub(rxm->v[i],rxp->v[i]);
|
||||
vfmseq(agr,py->j[j].ui.v[i],lx);
|
||||
vfmaeq(agi,py->j[j].ur.v[i],lx);
|
||||
vfmaeq(acr,py->j[j].qi.v[i],lx);
|
||||
vfmseq(aci,py->j[j].qr.v[i],lx);
|
||||
vfmseq(agr,py[j].ui.v[i],lx);
|
||||
vfmaeq(agi,py[j].ur.v[i],lx);
|
||||
vfmaeq(acr,py[j].qi.v[i],lx);
|
||||
vfmseq(aci,py[j].qr.v[i],lx);
|
||||
}
|
||||
vhsum_cmplx2(agr,agi,acr,aci,&alm[2*j],&alm[2*j+1]);
|
||||
}
|
||||
}
|
||||
|
||||
static void Z(alm2map_spin_kernel) (Tb cth, Z(Tbquj) * restrict p1,
|
||||
Z(Tbquj) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
static void Z(alm2map_spin_kernel) (Tb cth, Y(Tbqu) * restrict p1,
|
||||
Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
|
||||
int lmax)
|
||||
int lmax NJ1)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
@ -386,13 +379,8 @@ static void Z(alm2map_spin_kernel) (Tb cth, Z(Tbquj) * restrict p1,
|
||||
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
|
||||
vmul(fx2,rec1m.v[i]));
|
||||
}
|
||||
#if (njobs>1)
|
||||
Z(saddstepb)(p1,p2,rec1p,rec1m,rec2p,rec2m,&alm[2*njobs*l],
|
||||
&alm[2*njobs*(l+1)]);
|
||||
#else
|
||||
Z(saddstep)(p1, p2, rec2p, rec2m, &alm[2*njobs*l]);
|
||||
Z(saddstep)(p2, p1, rec1p, rec1m, &alm[2*njobs*(l+1)]);
|
||||
#endif
|
||||
&alm[2*njobs*(l+1)] NJ2);
|
||||
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
|
||||
fx2=vload(fx[l+2].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
@ -405,12 +393,13 @@ static void Z(alm2map_spin_kernel) (Tb cth, Z(Tbquj) * restrict p1,
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
Z(saddstep)(p1, p2, rec2p, rec2m, &alm[2*njobs*l]);
|
||||
Z(saddstep)(p1, p2, rec2p, rec2m, &alm[2*njobs*l] NJ2);
|
||||
}
|
||||
|
||||
static void Z(map2alm_spin_kernel) (Tb cth, const Z(Tbquj) * restrict p1,
|
||||
const Z(Tbquj) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
const sharp_ylmgen_dbl3 * restrict fx, dcmplx * restrict alm, int l, int lmax)
|
||||
static void Z(map2alm_spin_kernel) (Tb cth, const Y(Tbqu) * restrict p1,
|
||||
const Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
const sharp_ylmgen_dbl3 * restrict fx, dcmplx * restrict alm, int l, int lmax
|
||||
NJ1)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
@ -423,8 +412,8 @@ static void Z(map2alm_spin_kernel) (Tb cth, const Z(Tbquj) * restrict p1,
|
||||
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
|
||||
vmul(fx2,rec1m.v[i]));
|
||||
}
|
||||
Z(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l]);
|
||||
Z(saddstep2)(p2, p1, &rec1p, &rec1m, &alm[2*njobs*(l+1)]);
|
||||
Z(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l] NJ2);
|
||||
Z(saddstep2)(p2, p1, &rec1p, &rec1m, &alm[2*njobs*(l+1)] NJ2);
|
||||
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
|
||||
fx2=vload(fx[l+2].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
@ -437,18 +426,19 @@ static void Z(map2alm_spin_kernel) (Tb cth, const Z(Tbquj) * restrict p1,
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
Z(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l]);
|
||||
Z(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l] NJ2);
|
||||
}
|
||||
|
||||
static void Z(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
|
||||
sharp_job *job, Z(Tbquj) * restrict p1, Z(Tbquj) * restrict p2, int *done)
|
||||
static void Z(calc_alm2map_spin) (const Tb cth, const Tb sth,
|
||||
const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbqu) * restrict p1,
|
||||
Y(Tbqu) * restrict p2 NJ1)
|
||||
{
|
||||
int l, lmax=gen->lmax;
|
||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
Y(iter_to_ieee_spin)
|
||||
(cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||
if (l>lmax)
|
||||
{ *done=1; return; }
|
||||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||
@ -460,12 +450,12 @@ static void Z(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
|
||||
&& Y(TballGe)(scalem,sharp_minscale);
|
||||
while (!full_ieee)
|
||||
{
|
||||
Z(saddstep)(p1, p2,
|
||||
Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm), &alm[2*njobs*l]);
|
||||
Z(saddstep)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
|
||||
&alm[2*njobs*l] NJ2);
|
||||
if (++l>lmax) break;
|
||||
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
|
||||
Z(saddstep)(p2, p1,
|
||||
Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm), &alm[2*njobs*l]);
|
||||
Z(saddstep)(p2, p1, Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm),
|
||||
&alm[2*njobs*l] NJ2);
|
||||
if (++l>lmax) break;
|
||||
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
|
||||
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
|
||||
@ -477,24 +467,24 @@ static void Z(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
|
||||
}
|
||||
}
|
||||
|
||||
if (l>lmax)
|
||||
{ *done=1; return; }
|
||||
if (l>lmax) return;
|
||||
|
||||
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
|
||||
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
|
||||
Z(alm2map_spin_kernel) (cth,p1,p2,
|
||||
rec1p, rec1m, rec2p, rec2m, fx, alm, l, lmax);
|
||||
Z(alm2map_spin_kernel) (cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l,
|
||||
lmax NJ2);
|
||||
}
|
||||
|
||||
static void Z(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
|
||||
sharp_job *job, const Z(Tbquj) * restrict p1, const Z(Tbquj) * restrict p2,
|
||||
int *done)
|
||||
static void Z(calc_map2alm_spin) (Tb cth, Tb sth,
|
||||
const sharp_Ylmgen_C * restrict gen, sharp_job *job,
|
||||
const Y(Tbqu) * restrict p1, const Y(Tbqu) * restrict p2 NJ1)
|
||||
{
|
||||
int l, lmax=gen->lmax;
|
||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
Y(iter_to_ieee_spin)
|
||||
(cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||
if (l>lmax) { *done=1; return; }
|
||||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||
@ -507,12 +497,12 @@ static void Z(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
|
||||
while (!full_ieee)
|
||||
{
|
||||
Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm);
|
||||
Z(saddstep2)(p1, p2, &t1, &t2, &alm[2*njobs*l]);
|
||||
if (++l>lmax) { *done=1; return; }
|
||||
Z(saddstep2)(p1, p2, &t1, &t2, &alm[2*njobs*l] NJ2);
|
||||
if (++l>lmax) return;
|
||||
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
|
||||
t1=Y(Tbprod)(rec1p,corfacp); t2=Y(Tbprod)(rec1m,corfacm);
|
||||
Z(saddstep2)(p2, p1, &t1, &t2, &alm[2*njobs*l]);
|
||||
if (++l>lmax) { *done=1; return; }
|
||||
Z(saddstep2)(p2, p1, &t1, &t2, &alm[2*njobs*l] NJ2);
|
||||
if (++l>lmax) return;
|
||||
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
|
||||
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
|
||||
{
|
||||
@ -525,12 +515,11 @@ static void Z(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
|
||||
|
||||
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
|
||||
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
|
||||
Z(map2alm_spin_kernel) (cth,p1,p2,rec1p,rec1m,rec2p,rec2m,fx,alm,l,lmax);
|
||||
Z(map2alm_spin_kernel)(cth,p1,p2,rec1p,rec1m,rec2p,rec2m,fx,alm,l,lmax NJ2);
|
||||
}
|
||||
|
||||
static inline void Z(saddstep_d) (Z(Tbquj) * restrict px,
|
||||
Z(Tbquj) * restrict py, const Tb rxp, const Tb rxm,
|
||||
const dcmplx * restrict alm)
|
||||
static inline void Z(saddstep_d) (Y(Tbqu) * restrict px, Y(Tbqu) * restrict py,
|
||||
const Tb rxp, const Tb rxm, const dcmplx * restrict alm NJ1)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
@ -538,22 +527,22 @@ static inline void Z(saddstep_d) (Z(Tbquj) * restrict px,
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw=vadd(rxp.v[i],rxm.v[i]);
|
||||
vfmaeq(px->j[j].qr.v[i],ar,lw);
|
||||
vfmaeq(px->j[j].qi.v[i],ai,lw);
|
||||
vfmaeq(px[j].qr.v[i],ar,lw);
|
||||
vfmaeq(px[j].qi.v[i],ai,lw);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lx=vsub(rxm.v[i],rxp.v[i]);
|
||||
vfmaeq(py->j[j].ur.v[i],ai,lx);
|
||||
vfmseq(py->j[j].ui.v[i],ar,lx);
|
||||
vfmaeq(py[j].ur.v[i],ai,lx);
|
||||
vfmseq(py[j].ui.v[i],ar,lx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void Z(alm2map_deriv1_kernel) (Tb cth, Z(Tbquj) * restrict p1,
|
||||
Z(Tbquj) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
static void Z(alm2map_deriv1_kernel) (Tb cth, Y(Tbqu) * restrict p1,
|
||||
Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
|
||||
int lmax)
|
||||
int lmax NJ1)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
@ -566,8 +555,8 @@ static void Z(alm2map_deriv1_kernel) (Tb cth, Z(Tbquj) * restrict p1,
|
||||
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
|
||||
vmul(fx2,rec1m.v[i]));
|
||||
}
|
||||
Z(saddstep_d)(p1,p2,rec2p,rec2m,&alm[njobs*l]);
|
||||
Z(saddstep_d)(p2,p1,rec1p,rec1m,&alm[njobs*(l+1)]);
|
||||
Z(saddstep_d)(p1,p2,rec2p,rec2m,&alm[njobs*l] NJ2);
|
||||
Z(saddstep_d)(p2,p1,rec1p,rec1m,&alm[njobs*(l+1)] NJ2);
|
||||
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
|
||||
fx2=vload(fx[l+2].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
@ -580,18 +569,19 @@ static void Z(alm2map_deriv1_kernel) (Tb cth, Z(Tbquj) * restrict p1,
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
Z(saddstep_d)(p1, p2, rec2p, rec2m, &alm[njobs*l]);
|
||||
Z(saddstep_d)(p1, p2, rec2p, rec2m, &alm[njobs*l] NJ2);
|
||||
}
|
||||
|
||||
static void Z(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
|
||||
sharp_job *job, Z(Tbquj) * restrict p1, Z(Tbquj) * restrict p2, int *done)
|
||||
static void Z(calc_alm2map_deriv1) (const Tb cth, const Tb sth,
|
||||
const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbqu) * restrict p1,
|
||||
Y(Tbqu) * restrict p2 NJ1)
|
||||
{
|
||||
int l, lmax=gen->lmax;
|
||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
Y(iter_to_ieee_spin)
|
||||
(cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||
if (l>lmax)
|
||||
{ *done=1; return; }
|
||||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||
@ -604,11 +594,11 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
|
||||
while (!full_ieee)
|
||||
{
|
||||
Z(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
|
||||
&alm[njobs*l]);
|
||||
&alm[njobs*l] NJ2);
|
||||
if (++l>lmax) break;
|
||||
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
|
||||
Z(saddstep_d)(p2, p1, Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm),
|
||||
&alm[njobs*l]);
|
||||
&alm[njobs*l] NJ2);
|
||||
if (++l>lmax) break;
|
||||
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
|
||||
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
|
||||
@ -620,20 +610,20 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
|
||||
}
|
||||
}
|
||||
|
||||
if (l>lmax)
|
||||
{ *done=1; return; }
|
||||
if (l>lmax) return;
|
||||
|
||||
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
|
||||
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
|
||||
Z(alm2map_deriv1_kernel) (cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l,
|
||||
lmax);
|
||||
lmax NJ2);
|
||||
}
|
||||
|
||||
|
||||
#define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
|
||||
|
||||
static void Z(inner_loop) (sharp_job *job, const int *ispair,
|
||||
const double *cth_, const double *sth_, int llim, int ulim,
|
||||
sharp_Ylmgen_C *gen, int mi, const int *idx)
|
||||
sharp_Ylmgen_C *gen, int mi, const int *mlim NJ1)
|
||||
{
|
||||
const int nval=nvec*VLEN;
|
||||
const int m = job->ainfo->mval[mi];
|
||||
@ -646,35 +636,32 @@ static void Z(inner_loop) (sharp_job *job, const int *ispair,
|
||||
{
|
||||
if (job->spin==0)
|
||||
{
|
||||
int done=0;
|
||||
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||
{
|
||||
Z(Tburij) p1,p2; VZERO(p1); VZERO(p2);
|
||||
if (!done)
|
||||
{
|
||||
Y(Tburi) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
|
||||
Y(Tbu) cth, sth;
|
||||
|
||||
int skip=1;
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
itot=idx[itot];
|
||||
if (mlim[itot]>=m) skip=0;
|
||||
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
||||
}
|
||||
Z(calc_alm2map) (cth.b,sth.b,gen,job,&p1.b,&p2.b,&done);
|
||||
}
|
||||
if (!skip)
|
||||
Z(calc_alm2map) (cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot<ulim-llim)
|
||||
{
|
||||
itot=idx[itot];
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
int phas_idx = 2*(j+njobs*(itot*job->ainfo->nm+mi));
|
||||
complex double r1 = p1.j[j].r[i] + p1.j[j].i[i]*_Complex_I,
|
||||
r2 = p2.j[j].r[i] + p2.j[j].i[i]*_Complex_I;
|
||||
int phas_idx = itot*job->s_th + mi*job->s_m + 2*j;
|
||||
complex double r1 = p1[j].s.r[i] + p1[j].s.i[i]*_Complex_I,
|
||||
r2 = p2[j].s.r[i] + p2[j].s.i[i]*_Complex_I;
|
||||
job->phase[phas_idx] = r1+r2;
|
||||
if (ispair[itot])
|
||||
job->phase[phas_idx+1] = r1-r2;
|
||||
@ -685,39 +672,38 @@ static void Z(inner_loop) (sharp_job *job, const int *ispair,
|
||||
}
|
||||
else
|
||||
{
|
||||
int done=0;
|
||||
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||
{
|
||||
Z(Tbuquj) p1,p2; VZERO(p1); VZERO(p2);
|
||||
if (!done)
|
||||
{
|
||||
Y(Tbu) cth;
|
||||
Y(Tbuqu) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
|
||||
Y(Tbu) cth, sth;
|
||||
int skip=1;
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
itot=idx[itot];
|
||||
cth.s[i]=cth_[itot];
|
||||
if (mlim[itot]>=m) skip=0;
|
||||
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
||||
}
|
||||
if (!skip)
|
||||
(job->type==SHARP_ALM2MAP) ?
|
||||
Z(calc_alm2map_spin ) (cth.b,gen,job,&p1.b,&p2.b,&done) :
|
||||
Z(calc_alm2map_deriv1) (cth.b,gen,job,&p1.b,&p2.b,&done);
|
||||
}
|
||||
Z(calc_alm2map_spin )
|
||||
(cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2) :
|
||||
Z(calc_alm2map_deriv1)
|
||||
(cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot<ulim-llim)
|
||||
{
|
||||
itot=idx[itot];
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
int phas_idx = 4*(j+njobs*(itot*job->ainfo->nm+mi));
|
||||
complex double q1 = p1.j[j].qr[i] + p1.j[j].qi[i]*_Complex_I,
|
||||
q2 = p2.j[j].qr[i] + p2.j[j].qi[i]*_Complex_I,
|
||||
u1 = p1.j[j].ur[i] + p1.j[j].ui[i]*_Complex_I,
|
||||
u2 = p2.j[j].ur[i] + p2.j[j].ui[i]*_Complex_I;
|
||||
int phas_idx = itot*job->s_th + mi*job->s_m + 4*j;
|
||||
complex double q1 = p1[j].s.qr[i] + p1[j].s.qi[i]*_Complex_I,
|
||||
q2 = p2[j].s.qr[i] + p2[j].s.qi[i]*_Complex_I,
|
||||
u1 = p1[j].s.ur[i] + p1[j].s.ui[i]*_Complex_I,
|
||||
u2 = p2[j].s.ur[i] + p2[j].s.ui[i]*_Complex_I;
|
||||
job->phase[phas_idx] = q1+q2;
|
||||
job->phase[phas_idx+2] = u1+u2;
|
||||
if (ispair[itot])
|
||||
@ -740,70 +726,77 @@ static void Z(inner_loop) (sharp_job *job, const int *ispair,
|
||||
{
|
||||
if (job->spin==0)
|
||||
{
|
||||
int done=0;
|
||||
for (int ith=0; (ith<ulim-llim)&&(!done); ith+=nval)
|
||||
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||
{
|
||||
Z(Tburij) p1, p2; VZERO(p1); VZERO(p2);
|
||||
Y(Tburi) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
|
||||
Y(Tbu) cth, sth;
|
||||
int skip=1;
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
itot=idx[itot];
|
||||
if (mlim[itot]>=m) skip=0;
|
||||
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
||||
if ((i+ith<ulim-llim)&&(mlim[itot]>=m))
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
int phas_idx = itot*job->s_th + mi*job->s_m + 2*j;
|
||||
dcmplx ph1=job->phase[phas_idx];
|
||||
dcmplx ph2=ispair[itot] ? job->phase[phas_idx+1] : 0.;
|
||||
p1[j].s.r[i]=creal(ph1+ph2); p1[j].s.i[i]=cimag(ph1+ph2);
|
||||
p2[j].s.r[i]=creal(ph1-ph2); p2[j].s.i[i]=cimag(ph1-ph2);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!skip)
|
||||
Z(calc_map2alm)(cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||
{
|
||||
Y(Tbuqu) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
|
||||
Y(Tbu) cth, sth;
|
||||
int skip=1;
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
if (mlim[itot]>=m) skip=0;
|
||||
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
||||
if (i+ith<ulim-llim)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
int phas_idx = 2*(j+njobs*(itot*job->ainfo->nm+mi));
|
||||
dcmplx ph1=job->phase[phas_idx];
|
||||
dcmplx ph2=ispair[itot] ? job->phase[phas_idx+1] : 0.;
|
||||
p1.j[j].r[i]=creal(ph1+ph2); p1.j[j].i[i]=cimag(ph1+ph2);
|
||||
p2.j[j].r[i]=creal(ph1-ph2); p2.j[j].i[i]=cimag(ph1-ph2);
|
||||
}
|
||||
}
|
||||
}
|
||||
Z(calc_map2alm)(cth.b,sth.b,gen,job,&p1.b,&p2.b,&done);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int done=0;
|
||||
for (int ith=0; (ith<ulim-llim)&&(!done); ith+=nval)
|
||||
{
|
||||
Z(Tbuquj) p1, p2; VZERO(p1); VZERO(p2);
|
||||
Y(Tbu) cth;
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
itot=idx[itot];
|
||||
cth.s[i]=cth_[itot];
|
||||
if (i+ith<ulim-llim)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
int phas_idx = 4*(j+njobs*(itot*job->ainfo->nm+mi));
|
||||
int phas_idx = itot*job->s_th + mi*job->s_m + 4*j;
|
||||
dcmplx p1Q=job->phase[phas_idx],
|
||||
p1U=job->phase[phas_idx+2],
|
||||
p2Q=ispair[itot] ? job->phase[phas_idx+1]:0.,
|
||||
p2U=ispair[itot] ? job->phase[phas_idx+3]:0.;
|
||||
if ((gen->mhi-gen->m+gen->s)&1)
|
||||
{ p2Q=-p2Q; p2U=-p2U; }
|
||||
p1.j[j].qr[i]=creal(p1Q+p2Q); p1.j[j].qi[i]=cimag(p1Q+p2Q);
|
||||
p1.j[j].ur[i]=creal(p1U+p2U); p1.j[j].ui[i]=cimag(p1U+p2U);
|
||||
p2.j[j].qr[i]=creal(p1Q-p2Q); p2.j[j].qi[i]=cimag(p1Q-p2Q);
|
||||
p2.j[j].ur[i]=creal(p1U-p2U); p2.j[j].ui[i]=cimag(p1U-p2U);
|
||||
p1[j].s.qr[i]=creal(p1Q+p2Q); p1[j].s.qi[i]=cimag(p1Q+p2Q);
|
||||
p1[j].s.ur[i]=creal(p1U+p2U); p1[j].s.ui[i]=cimag(p1U+p2U);
|
||||
p2[j].s.qr[i]=creal(p1Q-p2Q); p2[j].s.qi[i]=cimag(p1Q-p2Q);
|
||||
p2[j].s.ur[i]=creal(p1U-p2U); p2[j].s.ui[i]=cimag(p1U-p2U);
|
||||
}
|
||||
}
|
||||
}
|
||||
Z(calc_map2alm_spin) (cth.b,gen,job,&p1.b,&p2.b,&done);
|
||||
if (!skip)
|
||||
Z(calc_map2alm_spin) (cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
UTIL_FAIL("must not happen");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
800
external/sharp/libsharp/sharp_core_inc3.c
vendored
800
external/sharp/libsharp/sharp_core_inc3.c
vendored
@ -1,800 +0,0 @@
|
||||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* libsharp is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libsharp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libsharp; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file sharp_core_inc3.c
|
||||
* Type-dependent code for the computational core
|
||||
*
|
||||
* Copyright (C) 2012 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
static void Y(alm2map_kernel) (const Tb cth, Y(Tbri) * restrict p1,
|
||||
Y(Tbri) * restrict p2, Tb lam_1, Tb lam_2,
|
||||
const sharp_ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm,
|
||||
int l, int lmax, int njobs)
|
||||
{
|
||||
while (l<lmax-2)
|
||||
{
|
||||
Tb lam_3, lam_4;
|
||||
Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_3.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
|
||||
r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_4.v[i] = vsub(vmul(vmul(cth.v[i],lam_3.v[i]),r0),vmul(lam_2.v[i],r1));
|
||||
r0=vload(rf[l+2].f[0]);r1=vload(rf[l+2].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_4.v[i]),r0),vmul(lam_3.v[i],r1));
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv ar2=vload(creal(alm[njobs*l+j])),
|
||||
ai2=vload(cimag(alm[njobs*l+j])),
|
||||
ar4=vload(creal(alm[njobs*(l+2)+j])),
|
||||
ai4=vload(cimag(alm[njobs*(l+2)+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaaeq(p1[j].r.v[i],lam_2.v[i],ar2,lam_4.v[i],ar4);
|
||||
vfmaaeq(p1[j].i.v[i],lam_2.v[i],ai2,lam_4.v[i],ai4);
|
||||
}
|
||||
Tv ar3=vload(creal(alm[njobs*(l+1)+j])),
|
||||
ai3=vload(cimag(alm[njobs*(l+1)+j])),
|
||||
ar1=vload(creal(alm[njobs*(l+3)+j])),
|
||||
ai1=vload(cimag(alm[njobs*(l+3)+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaaeq(p2[j].r.v[i],lam_3.v[i],ar3,lam_1.v[i],ar1);
|
||||
vfmaaeq(p2[j].i.v[i],lam_3.v[i],ai3,lam_1.v[i],ai1);
|
||||
}
|
||||
}
|
||||
r0=vload(rf[l+3].f[0]);r1=vload(rf[l+3].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_4.v[i],r1));
|
||||
l+=4;
|
||||
}
|
||||
while (l<lmax)
|
||||
{
|
||||
Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv ar=vload(creal(alm[njobs*l+j])),
|
||||
ai=vload(cimag(alm[njobs*l+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(p1[j].r.v[i],lam_2.v[i],ar);
|
||||
vfmaeq(p1[j].i.v[i],lam_2.v[i],ai);
|
||||
}
|
||||
ar=vload(creal(alm[njobs*(l+1)+j]));
|
||||
ai=vload(cimag(alm[njobs*(l+1)+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(p2[j].r.v[i],lam_1.v[i],ar);
|
||||
vfmaeq(p2[j].i.v[i],lam_1.v[i],ai);
|
||||
}
|
||||
}
|
||||
r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(p1[j].r.v[i],lam_2.v[i],ar);
|
||||
vfmaeq(p1[j].i.v[i],lam_2.v[i],ai);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void Y(map2alm_kernel) (const Tb cth, const Y(Tbri) * restrict p1,
|
||||
const Y(Tbri) * restrict p2, Tb lam_1, Tb lam_2,
|
||||
const sharp_ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax,
|
||||
int njobs)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv tr1=vzero, ti1=vzero, tr2=vzero, ti2=vzero;
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(tr1,lam_2.v[i],p1[j].r.v[i]);
|
||||
vfmaeq(ti1,lam_2.v[i],p1[j].i.v[i]);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(tr2,lam_1.v[i],p2[j].r.v[i]);
|
||||
vfmaeq(ti2,lam_1.v[i],p2[j].i.v[i]);
|
||||
}
|
||||
vhsum_cmplx2(tr1,ti1,tr2,ti2,&alm[l*njobs+j],&alm[(l+1)*njobs+j]);
|
||||
}
|
||||
r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv tre=vzero, tim=vzero;
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(tre,lam_2.v[i],p1[j].r.v[i]);
|
||||
vfmaeq(tim,lam_2.v[i],p1[j].i.v[i]);
|
||||
}
|
||||
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void Y(calc_alm2map) (const Tb cth, const Tb sth,
|
||||
const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbri) * restrict p1,
|
||||
Y(Tbri) * restrict p2, int njobs, int *done)
|
||||
{
|
||||
int l,lmax=gen->lmax;
|
||||
Tb lam_1,lam_2,scale;
|
||||
Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
|
||||
job->opcnt += (l-gen->m) * 4*VLEN*nvec;
|
||||
if (l>lmax) { *done=1; return; }
|
||||
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
|
||||
|
||||
Tb corfac;
|
||||
Y(getCorfac)(scale,&corfac,gen->cf);
|
||||
const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
|
||||
const dcmplx * restrict alm=job->almtmp;
|
||||
int full_ieee = Y(TballGe)(scale,sharp_minscale);
|
||||
while (!full_ieee)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv tmp=vmul(lam_2.v[i],corfac.v[i]);
|
||||
vfmaeq(p1[j].r.v[i],tmp,ar);
|
||||
vfmaeq(p1[j].i.v[i],tmp,ai);
|
||||
}
|
||||
}
|
||||
if (++l>lmax) break;
|
||||
Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv tmp=vmul(lam_1.v[i],corfac.v[i]);
|
||||
vfmaeq(p2[j].r.v[i],tmp,ar);
|
||||
vfmaeq(p2[j].i.v[i],tmp,ai);
|
||||
}
|
||||
}
|
||||
if (++l>lmax) break;
|
||||
r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
|
||||
if (Y(rescale)(&lam_1,&lam_2,&scale))
|
||||
{
|
||||
Y(getCorfac)(scale,&corfac,gen->cf);
|
||||
full_ieee = Y(TballGe)(scale,sharp_minscale);
|
||||
}
|
||||
}
|
||||
if (l>lmax) { *done=1; return; }
|
||||
|
||||
Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac);
|
||||
Y(alm2map_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax, njobs);
|
||||
}
|
||||
|
||||
static void Y(calc_map2alm) (const Tb cth, const Tb sth,
|
||||
const sharp_Ylmgen_C *gen, sharp_job *job, const Y(Tbri) * restrict p1,
|
||||
const Y(Tbri) * restrict p2, int njobs, int *done)
|
||||
{
|
||||
int lmax=gen->lmax;
|
||||
Tb lam_1,lam_2,scale;
|
||||
int l=gen->m;
|
||||
Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
|
||||
job->opcnt += (l-gen->m) * 4*VLEN*nvec;
|
||||
if (l>lmax) { *done=1; return; }
|
||||
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
|
||||
Tb corfac;
|
||||
Y(getCorfac)(scale,&corfac,gen->cf);
|
||||
dcmplx * restrict alm=job->almtmp;
|
||||
int full_ieee = Y(TballGe)(scale,sharp_minscale);
|
||||
while (!full_ieee)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv tre=vzero, tim=vzero;
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv tmp=vmul(lam_2.v[i],corfac.v[i]);
|
||||
vfmaeq(tre,tmp,p1[j].r.v[i]);
|
||||
vfmaeq(tim,tmp,p1[j].i.v[i]);
|
||||
}
|
||||
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
|
||||
}
|
||||
if (++l>lmax) { *done=1; return; }
|
||||
Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv tre=vzero, tim=vzero;
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv tmp=vmul(lam_1.v[i],corfac.v[i]);
|
||||
vfmaeq(tre,tmp,p2[j].r.v[i]);
|
||||
vfmaeq(tim,tmp,p2[j].i.v[i]);
|
||||
}
|
||||
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
|
||||
}
|
||||
if (++l>lmax) { *done=1; return; }
|
||||
r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
|
||||
if (Y(rescale)(&lam_1,&lam_2,&scale))
|
||||
{
|
||||
Y(getCorfac)(scale,&corfac,gen->cf);
|
||||
full_ieee = Y(TballGe)(scale,sharp_minscale);
|
||||
}
|
||||
}
|
||||
|
||||
Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac);
|
||||
Y(map2alm_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax, njobs);
|
||||
}
|
||||
|
||||
static inline void Y(saddstep) (Y(Tbqu) * restrict px, Y(Tbqu) * restrict py,
|
||||
const Tb rxp, const Tb rxm, const dcmplx * restrict alm, int njobs)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv agr=vload(creal(alm[2*j])), agi=vload(cimag(alm[2*j])),
|
||||
acr=vload(creal(alm[2*j+1])), aci=vload(cimag(alm[2*j+1]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw=vadd(rxp.v[i],rxm.v[i]);
|
||||
vfmaeq(px[j].qr.v[i],agr,lw);
|
||||
vfmaeq(px[j].qi.v[i],agi,lw);
|
||||
vfmaeq(px[j].ur.v[i],acr,lw);
|
||||
vfmaeq(px[j].ui.v[i],aci,lw);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lx=vsub(rxm.v[i],rxp.v[i]);
|
||||
vfmseq(py[j].qr.v[i],aci,lx);
|
||||
vfmaeq(py[j].qi.v[i],acr,lx);
|
||||
vfmaeq(py[j].ur.v[i],agi,lx);
|
||||
vfmseq(py[j].ui.v[i],agr,lx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Y(saddstepb) (Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2,
|
||||
const Tb r1p, const Tb r1m, const Tb r2p, const Tb r2m,
|
||||
const dcmplx * restrict alm1, const dcmplx * restrict alm2, int njobs)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv agr1=vload(creal(alm1[2*j])), agi1=vload(cimag(alm1[2*j])),
|
||||
acr1=vload(creal(alm1[2*j+1])), aci1=vload(cimag(alm1[2*j+1]));
|
||||
Tv agr2=vload(creal(alm2[2*j])), agi2=vload(cimag(alm2[2*j])),
|
||||
acr2=vload(creal(alm2[2*j+1])), aci2=vload(cimag(alm2[2*j+1]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw1=vadd(r2p.v[i],r2m.v[i]);
|
||||
Tv lx2=vsub(r1m.v[i],r1p.v[i]);
|
||||
vfmaseq(p1[j].qr.v[i],agr1,lw1,aci2,lx2);
|
||||
vfmaaeq(p1[j].qi.v[i],agi1,lw1,acr2,lx2);
|
||||
vfmaaeq(p1[j].ur.v[i],acr1,lw1,agi2,lx2);
|
||||
vfmaseq(p1[j].ui.v[i],aci1,lw1,agr2,lx2);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lx1=vsub(r2m.v[i],r2p.v[i]);
|
||||
Tv lw2=vadd(r1p.v[i],r1m.v[i]);
|
||||
vfmaseq(p2[j].qr.v[i],agr2,lw2,aci1,lx1);
|
||||
vfmaaeq(p2[j].qi.v[i],agi2,lw2,acr1,lx1);
|
||||
vfmaaeq(p2[j].ur.v[i],acr2,lw2,agi1,lx1);
|
||||
vfmaseq(p2[j].ui.v[i],aci2,lw2,agr1,lx1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Y(saddstep2) (const Y(Tbqu) * restrict px,
|
||||
const Y(Tbqu) * restrict py, const Tb * restrict rxp,
|
||||
const Tb * restrict rxm, dcmplx * restrict alm, int njobs)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv agr=vzero, agi=vzero, acr=vzero, aci=vzero;
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw=vadd(rxp->v[i],rxm->v[i]);
|
||||
vfmaeq(agr,px[j].qr.v[i],lw);
|
||||
vfmaeq(agi,px[j].qi.v[i],lw);
|
||||
vfmaeq(acr,px[j].ur.v[i],lw);
|
||||
vfmaeq(aci,px[j].ui.v[i],lw);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lx=vsub(rxm->v[i],rxp->v[i]);
|
||||
vfmseq(agr,py[j].ui.v[i],lx);
|
||||
vfmaeq(agi,py[j].ur.v[i],lx);
|
||||
vfmaeq(acr,py[j].qi.v[i],lx);
|
||||
vfmseq(aci,py[j].qr.v[i],lx);
|
||||
}
|
||||
vhsum_cmplx2(agr,agi,acr,aci,&alm[2*j],&alm[2*j+1]);
|
||||
}
|
||||
}
|
||||
|
||||
static void Y(alm2map_spin_kernel) (Tb cth, Y(Tbqu) * restrict p1,
|
||||
Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
|
||||
int lmax, int njobs)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
|
||||
fx2=vload(fx[l+1].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
|
||||
vmul(fx2,rec1p.v[i]));
|
||||
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
|
||||
vmul(fx2,rec1m.v[i]));
|
||||
}
|
||||
Y(saddstepb)(p1,p2,rec1p,rec1m,rec2p,rec2m,&alm[2*njobs*l],
|
||||
&alm[2*njobs*(l+1)], njobs);
|
||||
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
|
||||
fx2=vload(fx[l+2].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
|
||||
vmul(fx2,rec2p.v[i]));
|
||||
rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
|
||||
vmul(fx2,rec2m.v[i]));
|
||||
}
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
Y(saddstep)(p1, p2, rec2p, rec2m, &alm[2*njobs*l], njobs);
|
||||
}
|
||||
|
||||
static void Y(map2alm_spin_kernel) (Tb cth, const Y(Tbqu) * restrict p1,
|
||||
const Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
const sharp_ylmgen_dbl3 * restrict fx, dcmplx * restrict alm, int l, int lmax,
|
||||
int njobs)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
|
||||
fx2=vload(fx[l+1].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
|
||||
vmul(fx2,rec1p.v[i]));
|
||||
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
|
||||
vmul(fx2,rec1m.v[i]));
|
||||
}
|
||||
Y(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l],njobs);
|
||||
Y(saddstep2)(p2, p1, &rec1p, &rec1m, &alm[2*njobs*(l+1)],njobs);
|
||||
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
|
||||
fx2=vload(fx[l+2].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
|
||||
vmul(fx2,rec2p.v[i]));
|
||||
rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
|
||||
vmul(fx2,rec2m.v[i]));
|
||||
}
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
Y(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l], njobs);
|
||||
}
|
||||
|
||||
static void Y(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
|
||||
sharp_job *job, Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2, int njobs,
|
||||
int *done)
|
||||
{
|
||||
int l, lmax=gen->lmax;
|
||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||
if (l>lmax)
|
||||
{ *done=1; return; }
|
||||
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||
Tb corfacp,corfacm;
|
||||
Y(getCorfac)(scalep,&corfacp,gen->cf);
|
||||
Y(getCorfac)(scalem,&corfacm,gen->cf);
|
||||
const dcmplx * restrict alm=job->almtmp;
|
||||
int full_ieee = Y(TballGe)(scalep,sharp_minscale)
|
||||
&& Y(TballGe)(scalem,sharp_minscale);
|
||||
while (!full_ieee)
|
||||
{
|
||||
Y(saddstep)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
|
||||
&alm[2*njobs*l],njobs);
|
||||
if (++l>lmax) break;
|
||||
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
|
||||
Y(saddstep)(p2, p1, Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm),
|
||||
&alm[2*njobs*l], njobs);
|
||||
if (++l>lmax) break;
|
||||
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
|
||||
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
|
||||
{
|
||||
Y(getCorfac)(scalep,&corfacp,gen->cf);
|
||||
Y(getCorfac)(scalem,&corfacm,gen->cf);
|
||||
full_ieee = Y(TballGe)(scalep,sharp_minscale)
|
||||
&& Y(TballGe)(scalem,sharp_minscale);
|
||||
}
|
||||
}
|
||||
|
||||
if (l>lmax)
|
||||
{ *done=1; return; }
|
||||
|
||||
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
|
||||
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
|
||||
Y(alm2map_spin_kernel) (cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l,
|
||||
lmax, njobs);
|
||||
}
|
||||
|
||||
static void Y(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
|
||||
sharp_job *job, const Y(Tbqu) * restrict p1, const Y(Tbqu) * restrict p2,
|
||||
int njobs, int *done)
|
||||
{
|
||||
int l, lmax=gen->lmax;
|
||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||
if (l>lmax) { *done=1; return; }
|
||||
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||
Tb corfacp,corfacm;
|
||||
Y(getCorfac)(scalep,&corfacp,gen->cf);
|
||||
Y(getCorfac)(scalem,&corfacm,gen->cf);
|
||||
dcmplx * restrict alm=job->almtmp;
|
||||
int full_ieee = Y(TballGe)(scalep,sharp_minscale)
|
||||
&& Y(TballGe)(scalem,sharp_minscale);
|
||||
while (!full_ieee)
|
||||
{
|
||||
Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm);
|
||||
Y(saddstep2)(p1, p2, &t1, &t2, &alm[2*njobs*l], njobs);
|
||||
if (++l>lmax) { *done=1; return; }
|
||||
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
|
||||
t1=Y(Tbprod)(rec1p,corfacp); t2=Y(Tbprod)(rec1m,corfacm);
|
||||
Y(saddstep2)(p2, p1, &t1, &t2, &alm[2*njobs*l], njobs);
|
||||
if (++l>lmax) { *done=1; return; }
|
||||
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
|
||||
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
|
||||
{
|
||||
Y(getCorfac)(scalep,&corfacp,gen->cf);
|
||||
Y(getCorfac)(scalem,&corfacm,gen->cf);
|
||||
full_ieee = Y(TballGe)(scalep,sharp_minscale)
|
||||
&& Y(TballGe)(scalem,sharp_minscale);
|
||||
}
|
||||
}
|
||||
|
||||
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
|
||||
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
|
||||
Y(map2alm_spin_kernel)(cth,p1,p2,rec1p,rec1m,rec2p,rec2m,fx,alm,l,lmax,njobs);
|
||||
}
|
||||
|
||||
static inline void Y(saddstep_d) (Y(Tbqu) * restrict px, Y(Tbqu) * restrict py,
|
||||
const Tb rxp, const Tb rxm, const dcmplx * restrict alm, int njobs)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv ar=vload(creal(alm[j])), ai=vload(cimag(alm[j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw=vadd(rxp.v[i],rxm.v[i]);
|
||||
vfmaeq(px[j].qr.v[i],ar,lw);
|
||||
vfmaeq(px[j].qi.v[i],ai,lw);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lx=vsub(rxm.v[i],rxp.v[i]);
|
||||
vfmaeq(py[j].ur.v[i],ai,lx);
|
||||
vfmseq(py[j].ui.v[i],ar,lx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void Y(alm2map_deriv1_kernel) (Tb cth, Y(Tbqu) * restrict p1,
|
||||
Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
|
||||
int lmax, int njobs)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
|
||||
fx2=vload(fx[l+1].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
|
||||
vmul(fx2,rec1p.v[i]));
|
||||
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
|
||||
vmul(fx2,rec1m.v[i]));
|
||||
}
|
||||
Y(saddstep_d)(p1,p2,rec2p,rec2m,&alm[njobs*l],njobs);
|
||||
Y(saddstep_d)(p2,p1,rec1p,rec1m,&alm[njobs*(l+1)],njobs);
|
||||
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
|
||||
fx2=vload(fx[l+2].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
|
||||
vmul(fx2,rec2p.v[i]));
|
||||
rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
|
||||
vmul(fx2,rec2m.v[i]));
|
||||
}
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
Y(saddstep_d)(p1, p2, rec2p, rec2m, &alm[njobs*l], njobs);
|
||||
}
|
||||
|
||||
static void Y(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
|
||||
sharp_job *job, Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2, int njobs,
|
||||
int *done)
|
||||
{
|
||||
int l, lmax=gen->lmax;
|
||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||
Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||
if (l>lmax)
|
||||
{ *done=1; return; }
|
||||
job->opcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||
Tb corfacp,corfacm;
|
||||
Y(getCorfac)(scalep,&corfacp,gen->cf);
|
||||
Y(getCorfac)(scalem,&corfacm,gen->cf);
|
||||
const dcmplx * restrict alm=job->almtmp;
|
||||
int full_ieee = Y(TballGe)(scalep,sharp_minscale)
|
||||
&& Y(TballGe)(scalem,sharp_minscale);
|
||||
while (!full_ieee)
|
||||
{
|
||||
Y(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
|
||||
&alm[njobs*l],njobs);
|
||||
if (++l>lmax) break;
|
||||
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
|
||||
Y(saddstep_d)(p2, p1, Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm),
|
||||
&alm[njobs*l], njobs);
|
||||
if (++l>lmax) break;
|
||||
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
|
||||
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
|
||||
{
|
||||
Y(getCorfac)(scalep,&corfacp,gen->cf);
|
||||
Y(getCorfac)(scalem,&corfacm,gen->cf);
|
||||
full_ieee = Y(TballGe)(scalep,sharp_minscale)
|
||||
&& Y(TballGe)(scalem,sharp_minscale);
|
||||
}
|
||||
}
|
||||
|
||||
if (l>lmax)
|
||||
{ *done=1; return; }
|
||||
|
||||
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
|
||||
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
|
||||
Y(alm2map_deriv1_kernel) (cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l,
|
||||
lmax, njobs);
|
||||
}
|
||||
|
||||
|
||||
#define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
|
||||
|
||||
static void Y(inner_loop) (sharp_job *job, const int *ispair,
|
||||
const double *cth_, const double *sth_, int llim, int ulim,
|
||||
sharp_Ylmgen_C *gen, int mi, const int *idx, int njobs)
|
||||
{
|
||||
const int nval=nvec*VLEN;
|
||||
const int m = job->ainfo->mval[mi];
|
||||
sharp_Ylmgen_prepare (gen, m);
|
||||
|
||||
switch (job->type)
|
||||
{
|
||||
case SHARP_ALM2MAP:
|
||||
case SHARP_ALM2MAP_DERIV1:
|
||||
{
|
||||
if (job->spin==0)
|
||||
{
|
||||
int done=0;
|
||||
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||
{
|
||||
Y(Tburi) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
|
||||
if (!done)
|
||||
{
|
||||
Y(Tbu) cth, sth;
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
itot=idx[itot];
|
||||
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
||||
}
|
||||
Y(calc_alm2map) (cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b,njobs,&done);
|
||||
}
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot<ulim-llim)
|
||||
{
|
||||
itot=idx[itot];
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
int phas_idx = 2*(j+njobs*(itot*job->ainfo->nm+mi));
|
||||
complex double r1 = p1[j].s.r[i] + p1[j].s.i[i]*_Complex_I,
|
||||
r2 = p2[j].s.r[i] + p2[j].s.i[i]*_Complex_I;
|
||||
job->phase[phas_idx] = r1+r2;
|
||||
if (ispair[itot])
|
||||
job->phase[phas_idx+1] = r1-r2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int done=0;
|
||||
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||
{
|
||||
Y(Tbuqu) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
|
||||
if (!done)
|
||||
{
|
||||
Y(Tbu) cth;
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
itot=idx[itot];
|
||||
cth.s[i]=cth_[itot];
|
||||
}
|
||||
(job->type==SHARP_ALM2MAP) ?
|
||||
Y(calc_alm2map_spin )
|
||||
(cth.b,gen,job,&p1[0].b,&p2[0].b,njobs,&done) :
|
||||
Y(calc_alm2map_deriv1)
|
||||
(cth.b,gen,job,&p1[0].b,&p2[0].b,njobs,&done);
|
||||
}
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot<ulim-llim)
|
||||
{
|
||||
itot=idx[itot];
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
int phas_idx = 4*(j+njobs*(itot*job->ainfo->nm+mi));
|
||||
complex double q1 = p1[j].s.qr[i] + p1[j].s.qi[i]*_Complex_I,
|
||||
q2 = p2[j].s.qr[i] + p2[j].s.qi[i]*_Complex_I,
|
||||
u1 = p1[j].s.ur[i] + p1[j].s.ui[i]*_Complex_I,
|
||||
u2 = p2[j].s.ur[i] + p2[j].s.ui[i]*_Complex_I;
|
||||
job->phase[phas_idx] = q1+q2;
|
||||
job->phase[phas_idx+2] = u1+u2;
|
||||
if (ispair[itot])
|
||||
{
|
||||
dcmplx *phQ = &(job->phase[phas_idx+1]),
|
||||
*phU = &(job->phase[phas_idx+3]);
|
||||
*phQ = q1-q2;
|
||||
*phU = u1-u2;
|
||||
if ((gen->mhi-gen->m+gen->s)&1)
|
||||
{ *phQ=-(*phQ); *phU=-(*phU); }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SHARP_MAP2ALM:
|
||||
{
|
||||
if (job->spin==0)
|
||||
{
|
||||
int done=0;
|
||||
for (int ith=0; (ith<ulim-llim)&&(!done); ith+=nval)
|
||||
{
|
||||
Y(Tburi) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
|
||||
Y(Tbu) cth, sth;
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
itot=idx[itot];
|
||||
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
||||
if (i+ith<ulim-llim)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
int phas_idx = 2*(j+njobs*(itot*job->ainfo->nm+mi));
|
||||
dcmplx ph1=job->phase[phas_idx];
|
||||
dcmplx ph2=ispair[itot] ? job->phase[phas_idx+1] : 0.;
|
||||
p1[j].s.r[i]=creal(ph1+ph2); p1[j].s.i[i]=cimag(ph1+ph2);
|
||||
p2[j].s.r[i]=creal(ph1-ph2); p2[j].s.i[i]=cimag(ph1-ph2);
|
||||
}
|
||||
}
|
||||
}
|
||||
Y(calc_map2alm)(cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b,njobs,&done);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int done=0;
|
||||
for (int ith=0; (ith<ulim-llim)&&(!done); ith+=nval)
|
||||
{
|
||||
Y(Tbuqu) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
|
||||
Y(Tbu) cth;
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
itot=idx[itot];
|
||||
cth.s[i]=cth_[itot];
|
||||
if (i+ith<ulim-llim)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
int phas_idx = 4*(j+njobs*(itot*job->ainfo->nm+mi));
|
||||
dcmplx p1Q=job->phase[phas_idx],
|
||||
p1U=job->phase[phas_idx+2],
|
||||
p2Q=ispair[itot] ? job->phase[phas_idx+1]:0.,
|
||||
p2U=ispair[itot] ? job->phase[phas_idx+3]:0.;
|
||||
if ((gen->mhi-gen->m+gen->s)&1)
|
||||
{ p2Q=-p2Q; p2U=-p2U; }
|
||||
p1[j].s.qr[i]=creal(p1Q+p2Q); p1[j].s.qi[i]=cimag(p1Q+p2Q);
|
||||
p1[j].s.ur[i]=creal(p1U+p2U); p1[j].s.ui[i]=cimag(p1U+p2U);
|
||||
p2[j].s.qr[i]=creal(p1Q-p2Q); p2[j].s.qi[i]=cimag(p1Q-p2Q);
|
||||
p2[j].s.ur[i]=creal(p1U-p2U); p2[j].s.ui[i]=cimag(p1U-p2U);
|
||||
}
|
||||
}
|
||||
}
|
||||
Y(calc_map2alm_spin) (cth.b,gen,job,&p1[0].b,&p2[0].b,njobs,&done);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef VZERO
|
@ -1,11 +1,21 @@
|
||||
#define Tb CONCAT2(Tb,nvec)
|
||||
#define Y(arg) CONCAT2(arg,nvec)
|
||||
#include "sharp_core_inc.c"
|
||||
#if (MAXJOB_SPECIAL<6)
|
||||
#include "sharp_core_inc3.c"
|
||||
|
||||
#if (SHARP_MAXTRANS>MAXJOB_SPECIAL)
|
||||
#define NJ1 , int njobs
|
||||
#define NJ2 , njobs
|
||||
#define Z(arg) CONCAT2(arg,nvec)
|
||||
#include "sharp_core_inc2.c"
|
||||
#undef Z
|
||||
#undef NJ1
|
||||
#undef NJ2
|
||||
#endif
|
||||
|
||||
#if (MAXJOB_SPECIAL>=1)
|
||||
#define NJ1
|
||||
#define NJ2
|
||||
|
||||
#if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1))
|
||||
#define njobs 1
|
||||
#define Z(arg) CONCAT3(arg,nvec,njobs)
|
||||
#include "sharp_core_inc2.c"
|
||||
@ -13,7 +23,7 @@
|
||||
#undef njobs
|
||||
#endif
|
||||
|
||||
#if (MAXJOB_SPECIAL>=2)
|
||||
#if ((MAXJOB_SPECIAL>=2)&&(SHARP_MAXTRANS>=2))
|
||||
#define njobs 2
|
||||
#define Z(arg) CONCAT3(arg,nvec,njobs)
|
||||
#include "sharp_core_inc2.c"
|
||||
@ -21,7 +31,7 @@
|
||||
#undef njobs
|
||||
#endif
|
||||
|
||||
#if (MAXJOB_SPECIAL>=3)
|
||||
#if ((MAXJOB_SPECIAL>=3)&&(SHARP_MAXTRANS>=3))
|
||||
#define njobs 3
|
||||
#define Z(arg) CONCAT3(arg,nvec,njobs)
|
||||
#include "sharp_core_inc2.c"
|
||||
@ -29,7 +39,7 @@
|
||||
#undef njobs
|
||||
#endif
|
||||
|
||||
#if (MAXJOB_SPECIAL>=4)
|
||||
#if ((MAXJOB_SPECIAL>=4)&&(SHARP_MAXTRANS>=4))
|
||||
#define njobs 4
|
||||
#define Z(arg) CONCAT3(arg,nvec,njobs)
|
||||
#include "sharp_core_inc2.c"
|
||||
@ -37,7 +47,7 @@
|
||||
#undef njobs
|
||||
#endif
|
||||
|
||||
#if (MAXJOB_SPECIAL>=5)
|
||||
#if ((MAXJOB_SPECIAL>=5)&&(SHARP_MAXTRANS>=5))
|
||||
#define njobs 5
|
||||
#define Z(arg) CONCAT3(arg,nvec,njobs)
|
||||
#include "sharp_core_inc2.c"
|
||||
@ -45,7 +55,7 @@
|
||||
#undef njobs
|
||||
#endif
|
||||
|
||||
#if (MAXJOB_SPECIAL>=6)
|
||||
#if ((MAXJOB_SPECIAL>=6)&&(SHARP_MAXTRANS>=6))
|
||||
#define njobs 6
|
||||
#define Z(arg) CONCAT3(arg,nvec,njobs)
|
||||
#include "sharp_core_inc2.c"
|
||||
@ -53,5 +63,8 @@
|
||||
#undef njobs
|
||||
#endif
|
||||
|
||||
#undef NJ1
|
||||
#undef NJ2
|
||||
|
||||
#undef Y
|
||||
#undef Tb
|
78
external/sharp/libsharp/sharp_cxx.h
vendored
78
external/sharp/libsharp/sharp_cxx.h
vendored
@ -25,7 +25,7 @@
|
||||
/*! \file sharp_cxx.h
|
||||
* Spherical transform library
|
||||
*
|
||||
* Copyright (C) 2012 Max-Planck-Society
|
||||
* Copyright (C) 2012-2015 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
@ -35,7 +35,6 @@
|
||||
#include "sharp_lowlevel.h"
|
||||
#include "sharp_geomhelpers.h"
|
||||
#include "sharp_almhelpers.h"
|
||||
#include "xcomplex.h"
|
||||
|
||||
class sharp_base
|
||||
{
|
||||
@ -54,32 +53,50 @@ class sharp_base
|
||||
|
||||
void set_general_geometry (int nrings, const int *nph, const ptrdiff_t *ofs,
|
||||
const int *stride, const double *phi0, const double *theta,
|
||||
const double *weight)
|
||||
const double *wgt)
|
||||
{
|
||||
sharp_make_geom_info (nrings, nph, ofs, stride, phi0, theta, weight,
|
||||
&ginfo);
|
||||
if (ginfo) sharp_destroy_geom_info(ginfo);
|
||||
sharp_make_geom_info (nrings, nph, ofs, stride, phi0, theta, wgt, &ginfo);
|
||||
}
|
||||
|
||||
void set_ECP_geometry (int nrings, int nphi)
|
||||
{ sharp_make_ecp_geom_info (nrings, nphi, 0., 1, nphi, &ginfo); }
|
||||
{
|
||||
if (ginfo) sharp_destroy_geom_info(ginfo);
|
||||
sharp_make_ecp_geom_info (nrings, nphi, 0., 1, nphi, &ginfo);
|
||||
}
|
||||
|
||||
void set_Gauss_geometry (int nrings, int nphi)
|
||||
{ sharp_make_gauss_geom_info (nrings, nphi, 1, nphi, &ginfo); }
|
||||
{
|
||||
if (ginfo) sharp_destroy_geom_info(ginfo);
|
||||
sharp_make_gauss_geom_info (nrings, nphi, 0., 1, nphi, &ginfo);
|
||||
}
|
||||
|
||||
void set_Healpix_geometry (int nside)
|
||||
{ sharp_make_healpix_geom_info (nside, 1, &ginfo); }
|
||||
{
|
||||
if (ginfo) sharp_destroy_geom_info(ginfo);
|
||||
sharp_make_healpix_geom_info (nside, 1, &ginfo);
|
||||
}
|
||||
|
||||
void set_weighted_Healpix_geometry (int nside, const double *weight)
|
||||
{ sharp_make_weighted_healpix_geom_info (nside, 1, weight, &ginfo); }
|
||||
{
|
||||
if (ginfo) sharp_destroy_geom_info(ginfo);
|
||||
sharp_make_weighted_healpix_geom_info (nside, 1, weight, &ginfo);
|
||||
}
|
||||
|
||||
void set_triangular_alm_info (int lmax, int mmax)
|
||||
{ sharp_make_triangular_alm_info (lmax, mmax, 1, &ainfo); }
|
||||
{
|
||||
if (ainfo) sharp_destroy_alm_info(ainfo);
|
||||
sharp_make_triangular_alm_info (lmax, mmax, 1, &ainfo);
|
||||
}
|
||||
|
||||
const sharp_geom_info* get_geom_info() const { return ginfo; }
|
||||
const sharp_alm_info* get_alm_info() const { return ainfo; }
|
||||
};
|
||||
|
||||
template<typename T> struct cxxjobhelper__ {};
|
||||
|
||||
template<> struct cxxjobhelper__<double>
|
||||
{ enum {val=1}; };
|
||||
{ enum {val=SHARP_DP}; };
|
||||
|
||||
template<> struct cxxjobhelper__<float>
|
||||
{ enum {val=0}; };
|
||||
@ -88,52 +105,49 @@ template<> struct cxxjobhelper__<float>
|
||||
template<typename T> class sharp_cxxjob: public sharp_base
|
||||
{
|
||||
private:
|
||||
static void *conv (xcomplex<T> *ptr)
|
||||
{ return reinterpret_cast<void *>(ptr); }
|
||||
static void *conv (const xcomplex<T> *ptr)
|
||||
{ return const_cast<void *>(reinterpret_cast<const void *>(ptr)); }
|
||||
static void *conv (T *ptr)
|
||||
{ return reinterpret_cast<void *>(ptr); }
|
||||
static void *conv (const T *ptr)
|
||||
{ return const_cast<void *>(reinterpret_cast<const void *>(ptr)); }
|
||||
|
||||
public:
|
||||
void alm2map (const xcomplex<T> *alm, T *map, bool add)
|
||||
void alm2map (const T *alm, T *map, bool add)
|
||||
{
|
||||
void *aptr=conv(alm), *mptr=conv(map);
|
||||
sharp_execute (SHARP_ALM2MAP, 0, add, &aptr, &mptr, ginfo, ainfo, 1,
|
||||
cxxjobhelper__<T>::val,0,0,0);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_ALM2MAP, 0, &aptr, &mptr, ginfo, ainfo, 1,
|
||||
flags,0,0);
|
||||
}
|
||||
void alm2map_spin (const xcomplex<T> *alm1, const xcomplex<T> *alm2,
|
||||
T *map1, T *map2, int spin, bool add)
|
||||
void alm2map_spin (const T *alm1, const T *alm2, T *map1, T *map2,
|
||||
int spin, bool add)
|
||||
{
|
||||
void *aptr[2], *mptr[2];
|
||||
aptr[0]=conv(alm1); aptr[1]=conv(alm2);
|
||||
mptr[0]=conv(map1); mptr[1]=conv(map2);
|
||||
sharp_execute (SHARP_ALM2MAP, spin, add, aptr, mptr, ginfo, ainfo, 1,
|
||||
cxxjobhelper__<T>::val,0,0,0);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_ALM2MAP,spin,aptr,mptr,ginfo,ainfo,1,flags,0,0);
|
||||
}
|
||||
void alm2map_der1 (const xcomplex<T> *alm, T *map1, T *map2, bool add)
|
||||
void alm2map_der1 (const T *alm, T *map1, T *map2, bool add)
|
||||
{
|
||||
void *aptr=conv(alm), *mptr[2];
|
||||
mptr[0]=conv(map1); mptr[1]=conv(map2);
|
||||
sharp_execute (SHARP_ALM2MAP_DERIV1, 1, add,&aptr, mptr, ginfo, ainfo,
|
||||
1, cxxjobhelper__<T>::val,0,0,0);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_ALM2MAP_DERIV1,1,&aptr,mptr,ginfo,ainfo,1,flags,0,0);
|
||||
}
|
||||
void map2alm (const T *map, xcomplex<T> *alm, bool add)
|
||||
void map2alm (const T *map, T *alm, bool add)
|
||||
{
|
||||
void *aptr=conv(alm), *mptr=conv(map);
|
||||
sharp_execute (SHARP_MAP2ALM, 0, add, &aptr, &mptr, ginfo, ainfo, 1,
|
||||
cxxjobhelper__<T>::val,0,0,0);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_MAP2ALM,0,&aptr,&mptr,ginfo,ainfo,1,flags,0,0);
|
||||
}
|
||||
void map2alm_spin (const T *map1, const T *map2, xcomplex<T> *alm1,
|
||||
xcomplex<T> *alm2, int spin, bool add)
|
||||
void map2alm_spin (const T *map1, const T *map2, T *alm1, T *alm2,
|
||||
int spin, bool add)
|
||||
{
|
||||
void *aptr[2], *mptr[2];
|
||||
aptr[0]=conv(alm1); aptr[1]=conv(alm2);
|
||||
mptr[0]=conv(map1); mptr[1]=conv(map2);
|
||||
sharp_execute (SHARP_MAP2ALM, spin, add, aptr, mptr, ginfo, ainfo, 1,
|
||||
cxxjobhelper__<T>::val,0,0,0);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_MAP2ALM,spin,aptr,mptr,ginfo,ainfo,1,flags,0,0);
|
||||
}
|
||||
};
|
||||
|
||||
|
309
external/sharp/libsharp/sharp_geomhelpers.c
vendored
309
external/sharp/libsharp/sharp_geomhelpers.c
vendored
@ -25,30 +25,24 @@
|
||||
/*! \file sharp_geomhelpers.c
|
||||
* Spherical transform library
|
||||
*
|
||||
* Copyright (C) 2006-2011 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
* Copyright (C) 2006-2012 Max-Planck-Society<br>
|
||||
* Copyright (C) 2007-2008 Pavel Holoborodko (for gauss_legendre_tbl)
|
||||
* \author Martin Reinecke \author Pavel Holoborodko
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "sharp_geomhelpers.h"
|
||||
#include "sharp_legendre_roots.h"
|
||||
#include "c_utils.h"
|
||||
#include "ls_fft.h"
|
||||
#include <stdio.h>
|
||||
|
||||
void sharp_make_healpix_geom_info (int nside, int stride,
|
||||
sharp_geom_info **geom_info)
|
||||
{
|
||||
double *weight=RALLOC(double,2*nside);
|
||||
SET_ARRAY(weight,0,2*nside,1);
|
||||
sharp_make_weighted_healpix_geom_info (nside, stride, weight, geom_info);
|
||||
DEALLOC(weight);
|
||||
}
|
||||
|
||||
void sharp_make_weighted_healpix_geom_info (int nside, int stride,
|
||||
const double *weight, sharp_geom_info **geom_info)
|
||||
void sharp_make_subset_healpix_geom_info (int nside, int stride, int nrings,
|
||||
const int *rings, const double *weight, sharp_geom_info **geom_info)
|
||||
{
|
||||
const double pi=3.141592653589793238462643383279502884197;
|
||||
ptrdiff_t npix=(ptrdiff_t)nside*nside*12;
|
||||
ptrdiff_t ncap=2*(ptrdiff_t)nside*(nside-1);
|
||||
int nrings=4*nside-1;
|
||||
|
||||
double *theta=RALLOC(double,nrings);
|
||||
double *weight_=RALLOC(double,nrings);
|
||||
@ -56,9 +50,10 @@ void sharp_make_weighted_healpix_geom_info (int nside, int stride,
|
||||
double *phi0=RALLOC(double,nrings);
|
||||
ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
|
||||
int *stride_=RALLOC(int,nrings);
|
||||
ptrdiff_t curofs=0, checkofs; /* checkofs used for assertion introduced when adding rings arg */
|
||||
for (int m=0; m<nrings; ++m)
|
||||
{
|
||||
int ring=m+1;
|
||||
int ring = (rings==NULL)? (m+1) : rings[m];
|
||||
ptrdiff_t northring = (ring>2*nside) ? 4*nside-ring : ring;
|
||||
stride_[m] = stride;
|
||||
if (northring < nside)
|
||||
@ -66,7 +61,7 @@ void sharp_make_weighted_healpix_geom_info (int nside, int stride,
|
||||
theta[m] = 2*asin(northring/(sqrt(6.)*nside));
|
||||
nph[m] = 4*northring;
|
||||
phi0[m] = pi/nph[m];
|
||||
ofs[m] = 2*northring*(northring-1)*stride;
|
||||
checkofs = 2*northring*(northring-1)*stride;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -78,14 +73,21 @@ void sharp_make_weighted_healpix_geom_info (int nside, int stride,
|
||||
phi0[m] = 0;
|
||||
else
|
||||
phi0[m] = pi/nph[m];
|
||||
ofs[m] = (ncap + (northring-nside)*nph[m])*stride;
|
||||
checkofs = (ncap + (northring-nside)*nph[m])*stride;
|
||||
ofs[m] = curofs;
|
||||
}
|
||||
if (northring != ring) /* southern hemisphere */
|
||||
{
|
||||
theta[m] = pi-theta[m];
|
||||
ofs[m] = (npix - nph[m])*stride - ofs[m];
|
||||
checkofs = (npix - nph[m])*stride - checkofs;
|
||||
ofs[m] = curofs;
|
||||
}
|
||||
weight_[m]=4.*pi/npix*weight[northring-1];
|
||||
weight_[m]=4.*pi/npix*((weight==NULL) ? 1. : weight[northring-1]);
|
||||
if (rings==NULL) {
|
||||
UTIL_ASSERT(curofs==checkofs, "Bug in computing ofs[m]");
|
||||
}
|
||||
ofs[m] = curofs;
|
||||
curofs+=nph[m];
|
||||
}
|
||||
|
||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, weight_,
|
||||
@ -99,93 +101,13 @@ void sharp_make_weighted_healpix_geom_info (int nside, int stride,
|
||||
DEALLOC(stride_);
|
||||
}
|
||||
|
||||
static void gauleg (double x1, double x2, double *x, double *w, int n)
|
||||
void sharp_make_weighted_healpix_geom_info (int nside, int stride,
|
||||
const double *weight, sharp_geom_info **geom_info)
|
||||
{
|
||||
const double pi = 3.141592653589793238462643383279502884197;
|
||||
const double eps = 3.0E-14;
|
||||
|
||||
int m = (n+1)/2;
|
||||
double xm = 0.5*(x2+x1);
|
||||
double xl = 0.5*(x2-x1);
|
||||
for(int i=1; i<=m; ++i)
|
||||
{
|
||||
double z = cos(pi*(i-0.25)/(n+0.5));
|
||||
double pp;
|
||||
int dobreak=0;
|
||||
while(1)
|
||||
{
|
||||
double p1 = 1.0, p2 = 0.0;
|
||||
double z1 = z;
|
||||
int j;
|
||||
for(j=1; j<=n; ++j)
|
||||
{
|
||||
double p3 = p2;
|
||||
p2 = p1;
|
||||
p1 = ((2*j-1)*z*p2-(j-1)*p3)/j;
|
||||
}
|
||||
pp = n*(z*p1-p2)/(z*z-1);
|
||||
z = z1 - p1/pp;
|
||||
if (dobreak) break;
|
||||
if (fabs(z-z1) <= eps) dobreak=1;
|
||||
}
|
||||
x[i-1] = xm - xl*z;
|
||||
x[n-i] = xm + xl*z;
|
||||
w[i-1] = w[n-i] = 2*xl/((1-z*z)*pp*pp);
|
||||
}
|
||||
sharp_make_subset_healpix_geom_info(nside, stride, 4 * nside - 1, NULL, weight, geom_info);
|
||||
}
|
||||
|
||||
static void makeweights (int bw, double *weights)
|
||||
{
|
||||
const double pi = 3.141592653589793238462643383279502884197;
|
||||
const double fudge = pi/(4*bw);
|
||||
for (int j=0; j<2*bw; ++j)
|
||||
{
|
||||
double tmpsum = 0;
|
||||
for (int k=0; k<bw; ++k)
|
||||
tmpsum += 1./(2*k+1) * sin((2*j+1)*(2*k+1)*fudge);
|
||||
tmpsum *= sin((2*j+1)*fudge);
|
||||
tmpsum *= 2./bw;
|
||||
weights[j] = tmpsum;
|
||||
/* weights[j + 2*bw] = tmpsum * sin((2*j+1)*fudge); */
|
||||
}
|
||||
}
|
||||
|
||||
void sharp_make_gauss_geom_info (int nrings, int nphi, int stride_lon,
|
||||
int stride_lat, sharp_geom_info **geom_info)
|
||||
{
|
||||
const double pi=3.141592653589793238462643383279502884197;
|
||||
|
||||
double *theta=RALLOC(double,nrings);
|
||||
double *weight=RALLOC(double,nrings);
|
||||
int *nph=RALLOC(int,nrings);
|
||||
double *phi0=RALLOC(double,nrings);
|
||||
ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
|
||||
int *stride_=RALLOC(int,nrings);
|
||||
|
||||
gauleg(-1,1,theta,weight,nrings);
|
||||
|
||||
for (int m=0; m<nrings; ++m)
|
||||
{
|
||||
theta[m] = acos(-theta[m]);
|
||||
nph[m]=nphi;
|
||||
phi0[m]=0;
|
||||
ofs[m]=(ptrdiff_t)m*stride_lat;
|
||||
stride_[m]=stride_lon;
|
||||
weight[m]*=2*pi/nphi;
|
||||
}
|
||||
|
||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0, theta, weight,
|
||||
geom_info);
|
||||
|
||||
DEALLOC(theta);
|
||||
DEALLOC(weight);
|
||||
DEALLOC(nph);
|
||||
DEALLOC(phi0);
|
||||
DEALLOC(ofs);
|
||||
DEALLOC(stride_);
|
||||
}
|
||||
|
||||
void sharp_make_ecp_geom_info (int nrings, int nphi, double phi0,
|
||||
void sharp_make_gauss_geom_info (int nrings, int nphi, double phi0,
|
||||
int stride_lon, int stride_lat, sharp_geom_info **geom_info)
|
||||
{
|
||||
const double pi=3.141592653589793238462643383279502884197;
|
||||
@ -197,12 +119,10 @@ void sharp_make_ecp_geom_info (int nrings, int nphi, double phi0,
|
||||
ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
|
||||
int *stride_=RALLOC(int,nrings);
|
||||
|
||||
UTIL_ASSERT((nrings&1)==0,
|
||||
"Even number of rings needed for equidistant grid!");
|
||||
makeweights(nrings/2,weight);
|
||||
sharp_legendre_roots(nrings,theta,weight);
|
||||
for (int m=0; m<nrings; ++m)
|
||||
{
|
||||
theta[m] = (m+0.5)*pi/nrings;
|
||||
theta[m] = acos(-theta[m]);
|
||||
nph[m]=nphi;
|
||||
phi0_[m]=phi0;
|
||||
ofs[m]=(ptrdiff_t)m*stride_lat;
|
||||
@ -220,3 +140,178 @@ void sharp_make_ecp_geom_info (int nrings, int nphi, double phi0,
|
||||
DEALLOC(ofs);
|
||||
DEALLOC(stride_);
|
||||
}
|
||||
|
||||
/* Weights from Waldvogel 2006: BIT Numerical Mathematics 46, p. 195 */
|
||||
void sharp_make_fejer1_geom_info (int nrings, int ppring, double phi0,
|
||||
int stride_lon, int stride_lat, sharp_geom_info **geom_info)
|
||||
{
|
||||
const double pi=3.141592653589793238462643383279502884197;
|
||||
|
||||
double *theta=RALLOC(double,nrings);
|
||||
double *weight=RALLOC(double,nrings);
|
||||
int *nph=RALLOC(int,nrings);
|
||||
double *phi0_=RALLOC(double,nrings);
|
||||
ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
|
||||
int *stride_=RALLOC(int,nrings);
|
||||
|
||||
weight[0]=2.;
|
||||
for (int k=1; k<=(nrings-1)/2; ++k)
|
||||
{
|
||||
weight[2*k-1]=2./(1.-4.*k*k)*cos((k*pi)/nrings);
|
||||
weight[2*k ]=2./(1.-4.*k*k)*sin((k*pi)/nrings);
|
||||
}
|
||||
if ((nrings&1)==0) weight[nrings-1]=0.;
|
||||
real_plan plan = make_real_plan(nrings);
|
||||
real_plan_backward_fftpack(plan,weight);
|
||||
kill_real_plan(plan);
|
||||
|
||||
for (int m=0; m<(nrings+1)/2; ++m)
|
||||
{
|
||||
theta[m]=pi*(m+0.5)/nrings;
|
||||
theta[nrings-1-m]=pi-theta[m];
|
||||
nph[m]=nph[nrings-1-m]=ppring;
|
||||
phi0_[m]=phi0_[nrings-1-m]=phi0;
|
||||
ofs[m]=(ptrdiff_t)m*stride_lat;
|
||||
ofs[nrings-1-m]=(ptrdiff_t)((nrings-1-m)*stride_lat);
|
||||
stride_[m]=stride_[nrings-1-m]=stride_lon;
|
||||
weight[m]=weight[nrings-1-m]=weight[m]*2*pi/(nrings*nph[m]);
|
||||
}
|
||||
|
||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, weight,
|
||||
geom_info);
|
||||
|
||||
DEALLOC(theta);
|
||||
DEALLOC(weight);
|
||||
DEALLOC(nph);
|
||||
DEALLOC(phi0_);
|
||||
DEALLOC(ofs);
|
||||
DEALLOC(stride_);
|
||||
}
|
||||
|
||||
/* Weights from Waldvogel 2006: BIT Numerical Mathematics 46, p. 195 */
|
||||
void sharp_make_cc_geom_info (int nrings, int ppring, double phi0,
|
||||
int stride_lon, int stride_lat, sharp_geom_info **geom_info)
|
||||
{
|
||||
const double pi=3.141592653589793238462643383279502884197;
|
||||
|
||||
double *theta=RALLOC(double,nrings);
|
||||
double *weight=RALLOC(double,nrings);
|
||||
int *nph=RALLOC(int,nrings);
|
||||
double *phi0_=RALLOC(double,nrings);
|
||||
ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
|
||||
int *stride_=RALLOC(int,nrings);
|
||||
|
||||
int n=nrings-1;
|
||||
SET_ARRAY(weight,0,nrings,0.);
|
||||
double dw=-1./(n*n-1.+(n&1));
|
||||
weight[0]=2.+dw;
|
||||
for (int k=1; k<=(n/2-1); ++k)
|
||||
weight[2*k-1]=2./(1.-4.*k*k) + dw;
|
||||
weight[2*(n/2)-1]=(n-3.)/(2*(n/2)-1) -1. -dw*((2-(n&1))*n-1);
|
||||
real_plan plan = make_real_plan(n);
|
||||
real_plan_backward_fftpack(plan,weight);
|
||||
kill_real_plan(plan);
|
||||
weight[n]=weight[0];
|
||||
|
||||
for (int m=0; m<(nrings+1)/2; ++m)
|
||||
{
|
||||
theta[m]=pi*m/(nrings-1.);
|
||||
if (theta[m]<1e-15) theta[m]=1e-15;
|
||||
theta[nrings-1-m]=pi-theta[m];
|
||||
nph[m]=nph[nrings-1-m]=ppring;
|
||||
phi0_[m]=phi0_[nrings-1-m]=phi0;
|
||||
ofs[m]=(ptrdiff_t)m*stride_lat;
|
||||
ofs[nrings-1-m]=(ptrdiff_t)((nrings-1-m)*stride_lat);
|
||||
stride_[m]=stride_[nrings-1-m]=stride_lon;
|
||||
weight[m]=weight[nrings-1-m]=weight[m]*2*pi/(n*nph[m]);
|
||||
}
|
||||
|
||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, weight,
|
||||
geom_info);
|
||||
|
||||
DEALLOC(theta);
|
||||
DEALLOC(weight);
|
||||
DEALLOC(nph);
|
||||
DEALLOC(phi0_);
|
||||
DEALLOC(ofs);
|
||||
DEALLOC(stride_);
|
||||
}
|
||||
|
||||
/* Weights from Waldvogel 2006: BIT Numerical Mathematics 46, p. 195 */
|
||||
void sharp_make_fejer2_geom_info (int nrings, int ppring, double phi0,
|
||||
int stride_lon, int stride_lat, sharp_geom_info **geom_info)
|
||||
{
|
||||
const double pi=3.141592653589793238462643383279502884197;
|
||||
|
||||
double *theta=RALLOC(double,nrings);
|
||||
double *weight=RALLOC(double,nrings+1);
|
||||
int *nph=RALLOC(int,nrings);
|
||||
double *phi0_=RALLOC(double,nrings);
|
||||
ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
|
||||
int *stride_=RALLOC(int,nrings);
|
||||
|
||||
int n=nrings+1;
|
||||
SET_ARRAY(weight,0,n,0.);
|
||||
weight[0]=2.;
|
||||
for (int k=1; k<=(n/2-1); ++k)
|
||||
weight[2*k-1]=2./(1.-4.*k*k);
|
||||
weight[2*(n/2)-1]=(n-3.)/(2*(n/2)-1) -1.;
|
||||
real_plan plan = make_real_plan(n);
|
||||
real_plan_backward_fftpack(plan,weight);
|
||||
kill_real_plan(plan);
|
||||
for (int m=0; m<nrings; ++m)
|
||||
weight[m]=weight[m+1];
|
||||
|
||||
for (int m=0; m<(nrings+1)/2; ++m)
|
||||
{
|
||||
theta[m]=pi*(m+1)/(nrings+1.);
|
||||
theta[nrings-1-m]=pi-theta[m];
|
||||
nph[m]=nph[nrings-1-m]=ppring;
|
||||
phi0_[m]=phi0_[nrings-1-m]=phi0;
|
||||
ofs[m]=(ptrdiff_t)m*stride_lat;
|
||||
ofs[nrings-1-m]=(ptrdiff_t)((nrings-1-m)*stride_lat);
|
||||
stride_[m]=stride_[nrings-1-m]=stride_lon;
|
||||
weight[m]=weight[nrings-1-m]=weight[m]*2*pi/(n*nph[m]);
|
||||
}
|
||||
|
||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, weight,
|
||||
geom_info);
|
||||
|
||||
DEALLOC(theta);
|
||||
DEALLOC(weight);
|
||||
DEALLOC(nph);
|
||||
DEALLOC(phi0_);
|
||||
DEALLOC(ofs);
|
||||
DEALLOC(stride_);
|
||||
}
|
||||
|
||||
void sharp_make_mw_geom_info (int nrings, int ppring, double phi0,
|
||||
int stride_lon, int stride_lat, sharp_geom_info **geom_info)
|
||||
{
|
||||
const double pi=3.141592653589793238462643383279502884197;
|
||||
|
||||
double *theta=RALLOC(double,nrings);
|
||||
int *nph=RALLOC(int,nrings);
|
||||
double *phi0_=RALLOC(double,nrings);
|
||||
ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
|
||||
int *stride_=RALLOC(int,nrings);
|
||||
|
||||
for (int m=0; m<nrings; ++m)
|
||||
{
|
||||
theta[m]=pi*(2.*m+1.)/(2.*nrings-1.);
|
||||
if (theta[m]>pi-1e-15) theta[m]=pi-1e-15;
|
||||
nph[m]=ppring;
|
||||
phi0_[m]=phi0;
|
||||
ofs[m]=(ptrdiff_t)m*stride_lat;
|
||||
stride_[m]=stride_lon;
|
||||
}
|
||||
|
||||
sharp_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, NULL,
|
||||
geom_info);
|
||||
|
||||
DEALLOC(theta);
|
||||
DEALLOC(nph);
|
||||
DEALLOC(phi0_);
|
||||
DEALLOC(ofs);
|
||||
DEALLOC(stride_);
|
||||
}
|
||||
|
95
external/sharp/libsharp/sharp_geomhelpers.h
vendored
95
external/sharp/libsharp/sharp_geomhelpers.h
vendored
@ -25,7 +25,7 @@
|
||||
/*! \file sharp_geomhelpers.h
|
||||
* SHARP helper function for the creation of grid geometries
|
||||
*
|
||||
* Copyright (C) 2006-2011 Max-Planck-Society
|
||||
* Copyright (C) 2006-2013 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
@ -39,26 +39,41 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
/*! Creates a geometry information describing a HEALPix map with an
|
||||
Nside parameter \a nside.
|
||||
Nside parameter \a nside. \a weight contains the relative ring
|
||||
weights and must have \a 2*nside entries. The rings array contains
|
||||
the indices of the rings, with 1 being the first ring at the north
|
||||
pole; if NULL then we take them to be sequential. Pass 4 * nside - 1
|
||||
as nrings and NULL to rings to get the full HEALPix grid.
|
||||
\note if \a weight is a null pointer, all weights are assumed to be 1.
|
||||
\note if \a rings is a null pointer, take all rings
|
||||
\ingroup geominfogroup */
|
||||
void sharp_make_healpix_geom_info (int nside, int stride,
|
||||
sharp_geom_info **geom_info);
|
||||
void sharp_make_subset_healpix_geom_info (int nside, int stride, int nrings,
|
||||
const int *rings, const double *weight, sharp_geom_info **geom_info);
|
||||
|
||||
/*! Creates a geometry information describing a HEALPix map with an
|
||||
Nside parameter \a nside. \a weight contains the relative ring
|
||||
weights and must have \a 2*nside entries.
|
||||
\note if \a weight is a null pointer, all weights are assumed to be 1.
|
||||
\ingroup geominfogroup */
|
||||
void sharp_make_weighted_healpix_geom_info (int nside, int stride,
|
||||
const double *weight, sharp_geom_info **geom_info);
|
||||
|
||||
/*! Creates a geometry information describing a HEALPix map with an
|
||||
Nside parameter \a nside.
|
||||
\ingroup geominfogroup */
|
||||
static inline void sharp_make_healpix_geom_info (int nside, int stride,
|
||||
sharp_geom_info **geom_info)
|
||||
{ sharp_make_weighted_healpix_geom_info (nside, stride, NULL, geom_info); }
|
||||
|
||||
/*! Creates a geometry information describing a Gaussian map with \a nrings
|
||||
iso-latitude rings and \a nphi pixels per ring. The azimuth of the first
|
||||
pixel in each ring is 0. The index difference between two adjacent pixels
|
||||
in an iso-latitude ring is \a stride_lon, the index difference between the
|
||||
two start pixels in consecutive iso-latitude rings is \a stride_lat.
|
||||
pixel in each ring is \a phi0 (in radians). The index difference between
|
||||
two adjacent pixels in an iso-latitude ring is \a stride_lon, the index
|
||||
difference between the two start pixels in consecutive iso-latitude rings
|
||||
is \a stride_lat.
|
||||
\ingroup geominfogroup */
|
||||
void sharp_make_gauss_geom_info (int nrings, int nphi, int stride_lon,
|
||||
int stride_lat, sharp_geom_info **geom_info);
|
||||
void sharp_make_gauss_geom_info (int nrings, int nphi, double phi0,
|
||||
int stride_lon, int stride_lat, sharp_geom_info **geom_info);
|
||||
|
||||
/*! Creates a geometry information describing an ECP map with \a nrings
|
||||
iso-latitude rings and \a nphi pixels per ring. The azimuth of the first
|
||||
@ -68,11 +83,67 @@ void sharp_make_gauss_geom_info (int nrings, int nphi, int stride_lon,
|
||||
is \a stride_lat.
|
||||
\note The spacing of pixel centers is equidistant in colatitude and
|
||||
longitude.
|
||||
\note \a nrings must be an even number.
|
||||
\note The sphere is pixelized in a way that the colatitude of the first ring
|
||||
is \a 0.5*(pi/nrings). There are no pixel centers at the poles.
|
||||
is \a 0.5*(pi/nrings) and the colatitude of the last ring is
|
||||
\a pi-0.5*(pi/nrings). There are no pixel centers at the poles.
|
||||
\note This grid corresponds to Fejer's first rule.
|
||||
\ingroup geominfogroup */
|
||||
void sharp_make_ecp_geom_info (int nrings, int nphi, double phi0,
|
||||
void sharp_make_fejer1_geom_info (int nrings, int nphi, double phi0,
|
||||
int stride_lon, int stride_lat, sharp_geom_info **geom_info);
|
||||
|
||||
/*! Old name for sharp_make_fejer1_geom_info()
|
||||
\ingroup geominfogroup */
|
||||
static inline void sharp_make_ecp_geom_info (int nrings, int nphi, double phi0,
|
||||
int stride_lon, int stride_lat, sharp_geom_info **geom_info)
|
||||
{
|
||||
sharp_make_fejer1_geom_info (nrings, nphi, phi0, stride_lon, stride_lat,
|
||||
geom_info);
|
||||
}
|
||||
|
||||
/*! Creates a geometry information describing an ECP map with \a nrings
|
||||
iso-latitude rings and \a nphi pixels per ring. The azimuth of the first
|
||||
pixel in each ring is \a phi0 (in radians). The index difference between
|
||||
two adjacent pixels in an iso-latitude ring is \a stride_lon, the index
|
||||
difference between the two start pixels in consecutive iso-latitude rings
|
||||
is \a stride_lat.
|
||||
\note The spacing of pixel centers is equidistant in colatitude and
|
||||
longitude.
|
||||
\note The sphere is pixelized in a way that the colatitude of the first ring
|
||||
is \a 0 and that of the last ring is \a pi.
|
||||
\note This grid corresponds to Clenshaw-Curtis integration.
|
||||
\ingroup geominfogroup */
|
||||
void sharp_make_cc_geom_info (int nrings, int ppring, double phi0,
|
||||
int stride_lon, int stride_lat, sharp_geom_info **geom_info);
|
||||
|
||||
/*! Creates a geometry information describing an ECP map with \a nrings
|
||||
iso-latitude rings and \a nphi pixels per ring. The azimuth of the first
|
||||
pixel in each ring is \a phi0 (in radians). The index difference between
|
||||
two adjacent pixels in an iso-latitude ring is \a stride_lon, the index
|
||||
difference between the two start pixels in consecutive iso-latitude rings
|
||||
is \a stride_lat.
|
||||
\note The spacing of pixel centers is equidistant in colatitude and
|
||||
longitude.
|
||||
\note The sphere is pixelized in a way that the colatitude of the first ring
|
||||
is \a pi/(nrings+1) and that of the last ring is \a pi-pi/(nrings+1).
|
||||
\note This grid corresponds to Fejer's second rule.
|
||||
\ingroup geominfogroup */
|
||||
void sharp_make_fejer2_geom_info (int nrings, int ppring, double phi0,
|
||||
int stride_lon, int stride_lat, sharp_geom_info **geom_info);
|
||||
|
||||
/*! Creates a geometry information describing a map with \a nrings
|
||||
iso-latitude rings and \a nphi pixels per ring. The azimuth of the first
|
||||
pixel in each ring is \a phi0 (in radians). The index difference between
|
||||
two adjacent pixels in an iso-latitude ring is \a stride_lon, the index
|
||||
difference between the two start pixels in consecutive iso-latitude rings
|
||||
is \a stride_lat.
|
||||
\note The spacing of pixel centers is equidistant in colatitude and
|
||||
longitude.
|
||||
\note The sphere is pixelized in a way that the colatitude of the first ring
|
||||
is \a pi/(2*nrings-1) and that of the last ring is \a pi.
|
||||
\note This is the grid introduced by McEwen & Wiaux 2011.
|
||||
\note This function does \e not define any quadrature weights.
|
||||
\ingroup geominfogroup */
|
||||
void sharp_make_mw_geom_info (int nrings, int ppring, double phi0,
|
||||
int stride_lon, int stride_lat, sharp_geom_info **geom_info);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
12
external/sharp/libsharp/sharp_internal.h
vendored
12
external/sharp/libsharp/sharp_internal.h
vendored
@ -25,8 +25,8 @@
|
||||
/*! \file sharp_internal.h
|
||||
* Internally used functionality for the spherical transform library.
|
||||
*
|
||||
* Copyright (C) 2006-2012 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
* Copyright (C) 2006-2013 Max-Planck-Society
|
||||
* \author Martin Reinecke \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
#ifndef PLANCK_SHARP_INTERNAL_H
|
||||
@ -38,23 +38,22 @@
|
||||
|
||||
#include "sharp.h"
|
||||
|
||||
typedef enum { FLOAT, DOUBLE } sharp_fde;
|
||||
#define SHARP_MAXTRANS 100
|
||||
|
||||
typedef struct
|
||||
{
|
||||
sharp_jobtype type;
|
||||
int spin;
|
||||
int add_output;
|
||||
int nmaps, nalm;
|
||||
sharp_fde fde;
|
||||
int flags;
|
||||
void **map;
|
||||
void **alm;
|
||||
int s_m, s_th; // strides in m and theta direction
|
||||
complex double *phase;
|
||||
double *norm_l;
|
||||
complex double *almtmp;
|
||||
const sharp_geom_info *ginfo;
|
||||
const sharp_alm_info *ainfo;
|
||||
int nv;
|
||||
double time;
|
||||
int ntrans;
|
||||
unsigned long long opcnt;
|
||||
@ -62,5 +61,6 @@ typedef struct
|
||||
|
||||
int sharp_get_nv_max (void);
|
||||
int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans);
|
||||
int sharp_get_mlim (int lmax, int spin, double sth, double cth);
|
||||
|
||||
#endif
|
||||
|
1319
external/sharp/libsharp/sharp_legendre.c
vendored
Normal file
1319
external/sharp/libsharp/sharp_legendre.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
176
external/sharp/libsharp/sharp_legendre.c.in
vendored
Normal file
176
external/sharp/libsharp/sharp_legendre.c.in
vendored
Normal file
@ -0,0 +1,176 @@
|
||||
/*
|
||||
|
||||
NOTE NOTE NOTE
|
||||
|
||||
This file is edited in sharp_legendre.c.in which is then preprocessed.
|
||||
Do not make manual modifications to sharp_legendre.c.
|
||||
|
||||
NOTE NOTE NOTE
|
||||
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*! \file sharp_legendre.c.in
|
||||
*
|
||||
* Copyright (C) 2015 University of Oslo
|
||||
* \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
#ifndef NO_LEGENDRE
|
||||
#if (VLEN==8)
|
||||
#error This code is not tested with MIC; please compile with -DNO_LEGENDRE
|
||||
/* ...or test it (it probably works) and remove this check */
|
||||
#endif
|
||||
|
||||
#ifndef SHARP_LEGENDRE_CS
|
||||
#define SHARP_LEGENDRE_CS 4
|
||||
#endif
|
||||
|
||||
#define MAX_CS 6
|
||||
#if (SHARP_LEGENDRE_CS > MAX_CS)
|
||||
#error (SHARP_LEGENDRE_CS > MAX_CS)
|
||||
#endif
|
||||
|
||||
#include "sharp_legendre.h"
|
||||
#include "sharp_vecsupport.h"
|
||||
|
||||
#include <malloc.h>
|
||||
|
||||
/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
|
||||
/*{ for cs in range(1, 7) }*/
|
||||
static void legendre_transform_vec{{cs}}{{T}}({{scalar}} *recfacs, {{scalar}} *bl, ptrdiff_t lmax,
|
||||
{{scalar}} xarr[({{cs}}) * VLEN{{T}}],
|
||||
{{scalar}} out[({{cs}}) * VLEN{{T}}]) {
|
||||
/*{ for i in range(cs) }*/
|
||||
Tv{{T}} P_{{i}}, Pm1_{{i}}, Pm2_{{i}}, x{{i}}, y{{i}};
|
||||
/*{ endfor }*/
|
||||
Tv{{T}} W1, W2, b, R;
|
||||
ptrdiff_t l;
|
||||
|
||||
/*{ for i in range(cs) }*/
|
||||
x{{i}} = vloadu{{T}}(xarr + {{i}} * VLEN{{T}});
|
||||
Pm1_{{i}} = vload{{T}}(1.0);
|
||||
P_{{i}} = x{{i}};
|
||||
b = vload{{T}}(*bl);
|
||||
y{{i}} = vmul{{T}}(Pm1_{{i}}, b);
|
||||
/*{ endfor }*/
|
||||
|
||||
b = vload{{T}}(*(bl + 1));
|
||||
/*{ for i in range(cs) }*/
|
||||
vfmaeq{{T}}(y{{i}}, P_{{i}}, b);
|
||||
/*{ endfor }*/
|
||||
|
||||
for (l = 2; l <= lmax; ++l) {
|
||||
b = vload{{T}}(*(bl + l));
|
||||
R = vload{{T}}(*(recfacs + l));
|
||||
|
||||
/*
|
||||
P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
|
||||
*/
|
||||
/*{ for i in range(cs) }*/
|
||||
Pm2_{{i}} = Pm1_{{i}}; Pm1_{{i}} = P_{{i}};
|
||||
W1 = vmul{{T}}(x{{i}}, Pm1_{{i}});
|
||||
W2 = W1;
|
||||
W2 = vsub{{T}}(W2, Pm2_{{i}});
|
||||
P_{{i}} = W1;
|
||||
vfmaeq{{T}}(P_{{i}}, W2, R);
|
||||
vfmaeq{{T}}(y{{i}}, P_{{i}}, b);
|
||||
/*{ endfor }*/
|
||||
|
||||
}
|
||||
/*{ for i in range(cs) }*/
|
||||
vstoreu{{T}}(out + {{i}} * VLEN{{T}}, y{{i}});
|
||||
/*{ endfor }*/
|
||||
}
|
||||
/*{ endfor }*/
|
||||
/*{ endfor }*/
|
||||
|
||||
|
||||
/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
|
||||
void sharp_legendre_transform_recfac{{T}}({{scalar}} *r, ptrdiff_t lmax) {
|
||||
/* (l - 1) / l, for l >= 2 */
|
||||
ptrdiff_t l;
|
||||
r[0] = 0;
|
||||
r[1] = 1;
|
||||
for (l = 2; l <= lmax; ++l) {
|
||||
r[l] = ({{scalar}})(l - 1) / ({{scalar}})l;
|
||||
}
|
||||
}
|
||||
/*{ endfor }*/
|
||||
|
||||
/*
|
||||
Compute sum_l b_l P_l(x_i) for all i.
|
||||
*/
|
||||
|
||||
#define LEN (SHARP_LEGENDRE_CS * VLEN)
|
||||
#define LEN_s (SHARP_LEGENDRE_CS * VLEN_s)
|
||||
|
||||
/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
|
||||
void sharp_legendre_transform{{T}}({{scalar}} *bl,
|
||||
{{scalar}} *recfac,
|
||||
ptrdiff_t lmax,
|
||||
{{scalar}} *x, {{scalar}} *out, ptrdiff_t nx) {
|
||||
{{scalar}} xchunk[MAX_CS * VLEN{{T}}], outchunk[MAX_CS * LEN{{T}}];
|
||||
int compute_recfac;
|
||||
ptrdiff_t i, j, len;
|
||||
|
||||
compute_recfac = (recfac == NULL);
|
||||
if (compute_recfac) {
|
||||
recfac = malloc(sizeof({{scalar}}) * (lmax + 1));
|
||||
sharp_legendre_transform_recfac{{T}}(recfac, lmax);
|
||||
}
|
||||
|
||||
for (j = 0; j != LEN{{T}}; ++j) xchunk[j] = 0;
|
||||
|
||||
for (i = 0; i < nx; i += LEN{{T}}) {
|
||||
len = (i + (LEN{{T}}) <= nx) ? (LEN{{T}}) : (nx - i);
|
||||
for (j = 0; j != len; ++j) xchunk[j] = x[i + j];
|
||||
switch ((len + VLEN{{T}} - 1) / VLEN{{T}}) {
|
||||
case 6: legendre_transform_vec6{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 5: legendre_transform_vec5{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 4: legendre_transform_vec4{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 3: legendre_transform_vec3{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 2: legendre_transform_vec2{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 1:
|
||||
case 0:
|
||||
legendre_transform_vec1{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
}
|
||||
for (j = 0; j != len; ++j) out[i + j] = outchunk[j];
|
||||
}
|
||||
if (compute_recfac) {
|
||||
free(recfac);
|
||||
}
|
||||
}
|
||||
/*{ endfor }*/
|
||||
|
||||
#endif
|
62
external/sharp/libsharp/sharp_legendre.h
vendored
Normal file
62
external/sharp/libsharp/sharp_legendre.h
vendored
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*! \file sharp_legendre.h
|
||||
* Interface for the Legendre transform parts of the spherical transform library.
|
||||
*
|
||||
* Copyright (C) 2015 University of Oslo
|
||||
* \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
#ifndef SHARP_LEGENDRE_H
|
||||
#define SHARP_LEGENDRE_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef NO_LEGENDRE
|
||||
|
||||
void sharp_legendre_transform(double *bl, double *recfac, ptrdiff_t lmax, double *x,
|
||||
double *out, ptrdiff_t nx);
|
||||
void sharp_legendre_transform_s(float *bl, float *recfac, ptrdiff_t lmax, float *x,
|
||||
float *out, ptrdiff_t nx);
|
||||
void sharp_legendre_transform_recfac(double *r, ptrdiff_t lmax);
|
||||
void sharp_legendre_transform_recfac_s(float *r, ptrdiff_t lmax);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
67
external/sharp/libsharp/sharp_legendre_roots.c
vendored
Normal file
67
external/sharp/libsharp/sharp_legendre_roots.c
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
/* Function adapted from GNU GSL file glfixed.c
|
||||
Original author: Pavel Holoborodko (http://www.holoborodko.com)
|
||||
|
||||
Adjustments by M. Reinecke
|
||||
- adjusted interface (keep epsilon internal, return full number of points)
|
||||
- removed precomputed tables
|
||||
- tweaked Newton iteration to obtain higher accuracy */
|
||||
|
||||
#include <math.h>
|
||||
#include "sharp_legendre_roots.h"
|
||||
#include "c_utils.h"
|
||||
|
||||
static inline double one_minus_x2 (double x)
|
||||
{ return (fabs(x)>0.1) ? (1.+x)*(1.-x) : 1.-x*x; }
|
||||
|
||||
void sharp_legendre_roots(int n, double *x, double *w)
|
||||
{
|
||||
const double pi = 3.141592653589793238462643383279502884197;
|
||||
const double eps = 3e-14;
|
||||
int m = (n+1)>>1;
|
||||
|
||||
double t0 = 1 - (1-1./n) / (8.*n*n);
|
||||
double t1 = 1./(4.*n+2.);
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
int i;
|
||||
#pragma omp for schedule(dynamic,100)
|
||||
for (i=1; i<=m; ++i)
|
||||
{
|
||||
double x0 = cos(pi * ((i<<2)-1) * t1) * t0;
|
||||
|
||||
int dobreak=0;
|
||||
int j=0;
|
||||
double dpdx;
|
||||
while(1)
|
||||
{
|
||||
double P_1 = 1.0;
|
||||
double P0 = x0;
|
||||
double dx, x1;
|
||||
|
||||
for (int k=2; k<=n; k++)
|
||||
{
|
||||
double P_2 = P_1;
|
||||
P_1 = P0;
|
||||
// P0 = ((2*k-1)*x0*P_1-(k-1)*P_2)/k;
|
||||
P0 = x0*P_1 + (k-1.)/k * (x0*P_1-P_2);
|
||||
}
|
||||
|
||||
dpdx = (P_1 - x0*P0) * n / one_minus_x2(x0);
|
||||
|
||||
/* Newton step */
|
||||
x1 = x0 - P0/dpdx;
|
||||
dx = x0-x1;
|
||||
x0 = x1;
|
||||
if (dobreak) break;
|
||||
|
||||
if (fabs(dx)<=eps) dobreak=1;
|
||||
UTIL_ASSERT(++j<100,"convergence problem");
|
||||
}
|
||||
|
||||
x[i-1] = -x0;
|
||||
x[n-i] = x0;
|
||||
w[i-1] = w[n-i] = 2. / (one_minus_x2(x0) * dpdx * dpdx);
|
||||
}
|
||||
} // end of parallel region
|
||||
}
|
50
external/sharp/libsharp/sharp_legendre_roots.h
vendored
Normal file
50
external/sharp/libsharp/sharp_legendre_roots.h
vendored
Normal file
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* libsharp is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libsharp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libsharp; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file sharp_legendre_roots.h
|
||||
*
|
||||
* Copyright (C) 2006-2012 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#ifndef SHARP_LEGENDRE_ROOTS_H
|
||||
#define SHARP_LEGENDRE_ROOTS_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*! Computes roots and Gaussian quadrature weights for Legendre polynomial
|
||||
of degree \a n.
|
||||
\param n Order of Legendre polynomial
|
||||
\param x Array of length \a n for output (root position)
|
||||
\param w Array of length \a w for output (weight for Gaussian quadrature)
|
||||
*/
|
||||
void sharp_legendre_roots(int n, double *x, double *w);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
148
external/sharp/libsharp/sharp_lowlevel.h
vendored
148
external/sharp/libsharp/sharp_lowlevel.h
vendored
@ -25,8 +25,8 @@
|
||||
/*! \file sharp_lowlevel.h
|
||||
* Low-level, portable interface for the spherical transform library.
|
||||
*
|
||||
* Copyright (C) 2012 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
* Copyright (C) 2012-2013 Max-Planck-Society
|
||||
* \author Martin Reinecke \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
#ifndef PLANCK_SHARP_LOWLEVEL_H
|
||||
@ -60,7 +60,7 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
sharp_ringpair *pair;
|
||||
int npairs;
|
||||
int npairs, nphmax;
|
||||
} sharp_geom_info;
|
||||
|
||||
/*! \defgroup almgroup Helpers for dealing with a_lm */
|
||||
@ -76,6 +76,8 @@ typedef struct
|
||||
int nm;
|
||||
/*! Array with \a nm entries containing the individual m values */
|
||||
int *mval;
|
||||
/*! Combination of flags from sharp_almflags */
|
||||
int flags;
|
||||
/*! Array with \a nm entries containing the (hypothetical) indices of
|
||||
the coefficients with quantum numbers 0,\a mval[i] */
|
||||
ptrdiff_t *mvstart;
|
||||
@ -83,30 +85,59 @@ typedef struct
|
||||
ptrdiff_t stride;
|
||||
} sharp_alm_info;
|
||||
|
||||
/*! Creates an Alm data structure information from the following parameters:
|
||||
/*! alm_info flags */
|
||||
typedef enum { SHARP_PACKED = 1,
|
||||
/*!< m=0-coefficients are packed so that the (zero) imaginary part is
|
||||
not present. mvstart is in units of *real* float/double for all
|
||||
m; stride is in units of reals for m=0 and complex for m!=0 */
|
||||
SHARP_REAL_HARMONICS = 1<<6
|
||||
/*!< Use the real spherical harmonic convention. For
|
||||
m==0, the alm are treated exactly the same as in
|
||||
the complex case. For m!=0, alm[i] represent a
|
||||
pair (+abs(m), -abs(m)) instead of (real, imag),
|
||||
and the coefficients are scaled by a factor of
|
||||
sqrt(2) relative to the complex case. In other
|
||||
words, (sqrt(.5) * alm[i]) recovers the
|
||||
corresponding complex coefficient (when accessed
|
||||
as complex).
|
||||
*/
|
||||
} sharp_almflags;
|
||||
|
||||
|
||||
|
||||
/*! Creates an a_lm data structure from the following parameters:
|
||||
\param lmax maximum \a l quantum number (>=0)
|
||||
\param mmax maximum \a m quantum number (0<= \a mmax <= \a lmax)
|
||||
\param stride the stride between consecutive a_lm entries
|
||||
\param stride the stride between entries with identical \a m, and \a l
|
||||
differing by 1.
|
||||
\param mstart the index of the (hypothetical) coefficient with the
|
||||
quantum numbers 0,\a m. Must have \a mmax+1 entries.
|
||||
\param alm_info will hold a pointer to the newly created data structure
|
||||
*/
|
||||
void sharp_make_alm_info (int lmax, int mmax, int stride,
|
||||
const ptrdiff_t *mstart, sharp_alm_info **alm_info);
|
||||
/*! Creates an Alm data structure information from the following parameters:
|
||||
\param lmax maximum \a l quantum number (>=0)
|
||||
\param nm number of different \a m (<=\a lmax+1)
|
||||
\param stride the stride between consecutive a_lm entries
|
||||
/*! Creates an a_lm data structure which from the following parameters:
|
||||
\param lmax maximum \a l quantum number (\a >=0)
|
||||
\param nm number of different \a m (\a 0<=nm<=lmax+1)
|
||||
\param stride the stride between entries with identical \a m, and \a l
|
||||
differing by 1.
|
||||
\param mval array with \a nm entries containing the individual m values
|
||||
\param mvstart array with \a nm entries containing the (hypothetical)
|
||||
indices of the coefficients with the quantum numbers 0,\a mval[i]
|
||||
\param flags a combination of sharp_almflags (pass 0 unless you know you need this)
|
||||
\param alm_info will hold a pointer to the newly created data structure
|
||||
*/
|
||||
void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval,
|
||||
const ptrdiff_t *mvstart, sharp_alm_info **alm_info);
|
||||
const ptrdiff_t *mvstart, int flags, sharp_alm_info **alm_info);
|
||||
/*! Returns the index of the coefficient with quantum numbers \a l,
|
||||
\a mval[mi]. */
|
||||
\a mval[mi].
|
||||
\note for a \a sharp_alm_info generated by sharp_make_alm_info() this is
|
||||
the index for the coefficient with the quantum numbers \a l, \a mi. */
|
||||
ptrdiff_t sharp_alm_index (const sharp_alm_info *self, int l, int mi);
|
||||
/*! Returns the number of alm coefficients described by \a self. If the SHARP_PACKED
|
||||
flag is set, this is number of "real" coeffecients (for m < 0 and m >= 0),
|
||||
otherwise it is the number of complex coefficients (with m>=0). */
|
||||
ptrdiff_t sharp_alm_count(const sharp_alm_info *self);
|
||||
/*! Deallocates the a_lm info object. */
|
||||
void sharp_destroy_alm_info (sharp_alm_info *info);
|
||||
|
||||
@ -123,12 +154,19 @@ void sharp_destroy_alm_info (sharp_alm_info *info);
|
||||
\param stride the stride between consecutive pixels
|
||||
\param phi0 the azimuth (in radians) of the first pixel in each ring
|
||||
\param theta the colatitude (in radians) of each ring
|
||||
\param weight the pixel weight to be used for the ring
|
||||
\param wgt the pixel weight to be used for the ring in map2alm
|
||||
and adjoint map2alm transforms.
|
||||
Pass NULL to use 1.0 as weight for all rings.
|
||||
\param geom_info will hold a pointer to the newly created data structure
|
||||
*/
|
||||
void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
|
||||
const int *stride, const double *phi0, const double *theta,
|
||||
const double *weight, sharp_geom_info **geom_info);
|
||||
const double *wgt, sharp_geom_info **geom_info);
|
||||
|
||||
/*! Counts the number of grid points needed for (the local part of) a map described
|
||||
by \a info.
|
||||
*/
|
||||
ptrdiff_t sharp_map_size(const sharp_geom_info *info);
|
||||
|
||||
/*! Deallocates the geometry information in \a info. */
|
||||
void sharp_destroy_geom_info (sharp_geom_info *info);
|
||||
@ -139,45 +177,91 @@ void sharp_destroy_geom_info (sharp_geom_info *info);
|
||||
/*! \{ */
|
||||
|
||||
/*! Enumeration of SHARP job types. */
|
||||
typedef enum { SHARP_MAP2ALM, /*!< analysis */
|
||||
SHARP_ALM2MAP, /*!< synthesis */
|
||||
SHARP_ALM2MAP_DERIV1 /*!< synthesis of first derivatives */
|
||||
typedef enum { SHARP_YtW=0, /*!< analysis */
|
||||
SHARP_MAP2ALM=SHARP_YtW, /*!< analysis */
|
||||
SHARP_Y=1, /*!< synthesis */
|
||||
SHARP_ALM2MAP=SHARP_Y, /*!< synthesis */
|
||||
SHARP_Yt=2, /*!< adjoint synthesis */
|
||||
SHARP_WY=3, /*!< adjoint analysis */
|
||||
SHARP_ALM2MAP_DERIV1=4 /*!< synthesis of first derivatives */
|
||||
} sharp_jobtype;
|
||||
|
||||
/*! Job flags */
|
||||
typedef enum { SHARP_DP = 1<<4,
|
||||
/*!< map and a_lm are in double precision */
|
||||
SHARP_ADD = 1<<5,
|
||||
/*!< results are added to the output arrays, instead of
|
||||
overwriting them */
|
||||
|
||||
/* NOTE: SHARP_REAL_HARMONICS, 1<<6, is also available in sharp_jobflags,
|
||||
but its use here is deprecated in favor of having it in the sharp_alm_info */
|
||||
|
||||
SHARP_NO_FFT = 1<<7,
|
||||
|
||||
SHARP_USE_WEIGHTS = 1<<20, /* internal use only */
|
||||
SHARP_NO_OPENMP = 1<<21, /* internal use only */
|
||||
SHARP_NVMAX = (1<<4)-1 /* internal use only */
|
||||
} sharp_jobflags;
|
||||
|
||||
/*! Performs a libsharp SHT job. The interface deliberately does not use
|
||||
the C99 "complex" data type, in order to be callable from C.
|
||||
the C99 "complex" data type, in order to be callable from C89 and C++.
|
||||
\param type the type of SHT
|
||||
\param spin the spin of the quantities to be transformed
|
||||
\param add_output if 0, the output arrays will be overwritten,
|
||||
else the result will be added to the output arrays.
|
||||
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
|
||||
alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
|
||||
etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
|
||||
alm[2] and alm[3] to those of the second, etc. The exact data type of \a alm
|
||||
depends on the \a dp parameter.
|
||||
depends on whether the SHARP_DP flag is set.
|
||||
\param map contains pointers to the maps. If \a spin==0,
|
||||
map[0] points to the map of the first SHT, map[1] to that of the second
|
||||
etc. If \a spin>0, map[0] and map[1] point to the maps of the first SHT,
|
||||
map[2] and map[3] to those of the second, etc. The exact data type of \a map
|
||||
depends on the \a dp parameter.
|
||||
etc. If \a spin>0, or \a type is SHARP_ALM2MAP_DERIV1, map[0] and map[1]
|
||||
point to the maps of the first SHT, map[2] and map[3] to those of the
|
||||
second, etc. The exact data type of \a map depends on whether the SHARP_DP
|
||||
flag is set.
|
||||
\param geom_info A \c sharp_geom_info object compatible with the provided
|
||||
\a map arrays.
|
||||
\param alm_info A \c sharp_alm_info object compatible with the provided
|
||||
\a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present
|
||||
exactly once.
|
||||
\param ntrans the number of simultaneous SHTs
|
||||
\param dp if 0, the \a alm is expected to have the type "complex float **"
|
||||
and \a map is expected to have the type "float **"; otherwise the expected
|
||||
types are "complex double **" and "double **", respectively.
|
||||
\param nv Internally used SHT parameter. Set to 0 unless you know what you are
|
||||
doing.
|
||||
\param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
|
||||
\a alm is expected to have the type "complex double **" and \a map is
|
||||
expected to have the type "double **"; otherwise, the expected
|
||||
types are "complex float **" and "float **", respectively.
|
||||
\param time If not NULL, the wall clock time required for this SHT
|
||||
(in seconds)will be written here.
|
||||
(in seconds) will be written here.
|
||||
\param opcnt If not NULL, a conservative estimate of the total floating point
|
||||
operation count for this SHT will be written here. */
|
||||
void sharp_execute (sharp_jobtype type, int spin, int add_output, void *alm,
|
||||
void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info,
|
||||
int ntrans, int dp, int nv, double *time, unsigned long long *opcnt);
|
||||
void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
|
||||
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
|
||||
int flags, double *time, unsigned long long *opcnt);
|
||||
|
||||
void sharp_set_chunksize_min(int new_chunksize_min);
|
||||
void sharp_set_nchunks_max(int new_nchunks_max);
|
||||
|
||||
|
||||
typedef enum { SHARP_ERROR_NO_MPI = 1,
|
||||
/*!< libsharp not compiled with MPI support */
|
||||
} sharp_errors;
|
||||
|
||||
/*! Works like sharp_execute_mpi, but is always present whether or not libsharp
|
||||
is compiled with USE_MPI. This is primarily useful for wrapper code etc.
|
||||
|
||||
Note that \a pcomm has the type MPI_Comm*, except we declare void* to avoid
|
||||
pulling in MPI headers. I.e., the comm argument of sharp_execute_mpi
|
||||
is *(MPI_Comm*)pcomm.
|
||||
|
||||
Other parameters are the same as sharp_execute_mpi.
|
||||
|
||||
Returns 0 if successful, or SHARP_ERROR_NO_MPI if MPI is not available
|
||||
(in which case nothing is done).
|
||||
*/
|
||||
int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
unsigned long long *opcnt);
|
||||
|
||||
|
||||
|
||||
/*! \} */
|
||||
|
||||
|
101
external/sharp/libsharp/sharp_mpi.c
vendored
101
external/sharp/libsharp/sharp_mpi.c
vendored
@ -25,8 +25,8 @@
|
||||
/*! \file sharp_mpi.c
|
||||
* Functionality only needed for MPI-parallel transforms
|
||||
*
|
||||
* Copyright (C) 2012 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
* Copyright (C) 2012-2013 Max-Planck-Society
|
||||
* \author Martin Reinecke \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
#ifdef USE_MPI
|
||||
@ -185,36 +185,71 @@ static void alloc_phase_mpi (sharp_job *job, int nm, int ntheta,
|
||||
ptrdiff_t phase_size = (job->type==SHARP_MAP2ALM) ?
|
||||
(ptrdiff_t)(nmfull)*ntheta : (ptrdiff_t)(nm)*nthetafull;
|
||||
job->phase=RALLOC(dcmplx,2*job->ntrans*job->nmaps*phase_size);
|
||||
job->s_m=2*job->ntrans*job->nmaps;
|
||||
job->s_th = job->s_m * ((job->type==SHARP_MAP2ALM) ? nmfull : nm);
|
||||
}
|
||||
|
||||
static void alm2map_comm (sharp_job *job, const sharp_mpi_info *minfo)
|
||||
{
|
||||
if (job->type != SHARP_MAP2ALM)
|
||||
{
|
||||
sharp_communicate_alm2map (minfo,&job->phase);
|
||||
job->s_th=job->s_m*minfo->nmtotal;
|
||||
}
|
||||
}
|
||||
|
||||
static void map2alm_comm (sharp_job *job, const sharp_mpi_info *minfo)
|
||||
{
|
||||
if (job->type == SHARP_MAP2ALM)
|
||||
{
|
||||
sharp_communicate_map2alm (minfo,&job->phase);
|
||||
job->s_th=job->s_m*minfo->nm[minfo->mytask];
|
||||
}
|
||||
}
|
||||
|
||||
static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
|
||||
{
|
||||
double timer=wallTime();
|
||||
int ntasks;
|
||||
MPI_Comm_size(comm, &ntasks);
|
||||
if (ntasks==1) /* fall back to scalar implementation */
|
||||
{ sharp_execute_job (job); return; }
|
||||
|
||||
int lmax = job->ainfo->lmax;
|
||||
|
||||
job->norm_l = sharp_Ylmgen_get_norm (lmax, job->spin);
|
||||
|
||||
MPI_Barrier(comm);
|
||||
double timer=wallTime();
|
||||
job->opcnt=0;
|
||||
sharp_mpi_info minfo;
|
||||
sharp_make_mpi_info(comm, job, &minfo);
|
||||
|
||||
/* clear output arrays if requested */
|
||||
if (minfo.npairtotal>minfo.ntasks*300)
|
||||
{
|
||||
int nsub=(minfo.npairtotal+minfo.ntasks*200-1)/(minfo.ntasks*200);
|
||||
for (int isub=0; isub<nsub; ++isub)
|
||||
{
|
||||
sharp_job ljob=*job;
|
||||
// When creating a_lm, every sub-job produces a complete set of
|
||||
// coefficients; they need to be added up.
|
||||
if ((isub>0)&&(job->type==SHARP_MAP2ALM)) ljob.flags|=SHARP_ADD;
|
||||
sharp_geom_info lginfo;
|
||||
lginfo.pair=RALLOC(sharp_ringpair,(job->ginfo->npairs/nsub)+1);
|
||||
lginfo.npairs=0;
|
||||
lginfo.nphmax = job->ginfo->nphmax;
|
||||
while (lginfo.npairs*nsub+isub<job->ginfo->npairs)
|
||||
{
|
||||
lginfo.pair[lginfo.npairs]=job->ginfo->pair[lginfo.npairs*nsub+isub];
|
||||
++lginfo.npairs;
|
||||
}
|
||||
ljob.ginfo=&lginfo;
|
||||
sharp_execute_job_mpi (&ljob,comm);
|
||||
job->opcnt+=ljob.opcnt;
|
||||
DEALLOC(lginfo.pair);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int lmax = job->ainfo->lmax;
|
||||
job->norm_l = sharp_Ylmgen_get_norm (lmax, job->spin);
|
||||
|
||||
/* clear output arrays if requested */
|
||||
init_output (job);
|
||||
|
||||
alloc_phase_mpi (job,job->ainfo->nm,job->ginfo->npairs,minfo.mmax+1,
|
||||
@ -222,26 +257,20 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
|
||||
|
||||
double *cth = RALLOC(double,minfo.npairtotal),
|
||||
*sth = RALLOC(double,minfo.npairtotal);
|
||||
idxhelper *stmp = RALLOC(idxhelper,minfo.npairtotal);
|
||||
int *mlim = RALLOC(int,minfo.npairtotal);
|
||||
for (int i=0; i<minfo.npairtotal; ++i)
|
||||
{
|
||||
cth[i] = cos(minfo.theta[i]);
|
||||
sth[i] = sin(minfo.theta[i]);
|
||||
stmp[i].s=sth[i];
|
||||
stmp[i].i=i;
|
||||
mlim[i] = sharp_get_mlim(lmax, job->spin, sth[i], cth[i]);
|
||||
}
|
||||
qsort (stmp,minfo.npairtotal,sizeof(idxhelper),idx_compare);
|
||||
int *idx = RALLOC(int,minfo.npairtotal);
|
||||
for (int i=0; i<minfo.npairtotal; ++i)
|
||||
idx[i]=stmp[i].i;
|
||||
DEALLOC(stmp);
|
||||
|
||||
/* map->phase where necessary */
|
||||
/* map->phase where necessary */
|
||||
map2phase (job, minfo.mmax, 0, job->ginfo->npairs);
|
||||
|
||||
map2alm_comm (job, &minfo);
|
||||
|
||||
#pragma omp parallel
|
||||
#pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
|
||||
{
|
||||
sharp_job ljob = *job;
|
||||
sharp_Ylmgen_C generator;
|
||||
@ -251,14 +280,14 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
|
||||
#pragma omp for schedule(dynamic,1)
|
||||
for (int mi=0; mi<job->ainfo->nm; ++mi)
|
||||
{
|
||||
/* alm->alm_tmp where necessary */
|
||||
/* alm->alm_tmp where necessary */
|
||||
alm2almtmp (&ljob, lmax, mi);
|
||||
|
||||
/* inner conversion loop */
|
||||
/* inner conversion loop */
|
||||
inner_loop (&ljob, minfo.ispair, cth, sth, 0, minfo.npairtotal,
|
||||
&generator, mi, idx);
|
||||
&generator, mi, mlim);
|
||||
|
||||
/* alm_tmp->alm where necessary */
|
||||
/* alm_tmp->alm where necessary */
|
||||
almtmp2alm (&ljob, lmax, mi);
|
||||
}
|
||||
|
||||
@ -271,30 +300,46 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
|
||||
|
||||
alm2map_comm (job, &minfo);
|
||||
|
||||
/* phase->map where necessary */
|
||||
/* phase->map where necessary */
|
||||
phase2map (job, minfo.mmax, 0, job->ginfo->npairs);
|
||||
|
||||
DEALLOC(mlim);
|
||||
DEALLOC(cth);
|
||||
DEALLOC(sth);
|
||||
DEALLOC(idx);
|
||||
DEALLOC(job->norm_l);
|
||||
dealloc_phase (job);
|
||||
}
|
||||
sharp_destroy_mpi_info(&minfo);
|
||||
job->time=wallTime()-timer;
|
||||
}
|
||||
|
||||
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
|
||||
int add_output, void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int dp, int nv, double *time,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
unsigned long long *opcnt)
|
||||
{
|
||||
sharp_job job;
|
||||
sharp_build_job_common (&job, type, spin, add_output, alm, map, geom_info,
|
||||
alm_info, ntrans, dp, nv);
|
||||
sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info,
|
||||
ntrans, flags);
|
||||
|
||||
sharp_execute_job_mpi (&job, comm);
|
||||
if (time!=NULL) *time = job.time;
|
||||
if (opcnt!=NULL) *opcnt = job.opcnt;
|
||||
}
|
||||
|
||||
/* We declare this only in C file to make symbol available for Fortran wrappers;
|
||||
without declaring it in C header as it should not be available to C code */
|
||||
void sharp_execute_mpi_fortran(MPI_Fint comm, sharp_jobtype type, int spin,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
unsigned long long *opcnt);
|
||||
void sharp_execute_mpi_fortran(MPI_Fint comm, sharp_jobtype type, int spin,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
unsigned long long *opcnt)
|
||||
{
|
||||
sharp_execute_mpi(MPI_Comm_f2c(comm), type, spin, alm, map, geom_info,
|
||||
alm_info, ntrans, flags, time, opcnt);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
30
external/sharp/libsharp/sharp_mpi.h
vendored
30
external/sharp/libsharp/sharp_mpi.h
vendored
@ -26,14 +26,14 @@
|
||||
* Interface for the spherical transform library with MPI support.
|
||||
*
|
||||
* Copyright (C) 2011,2012 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
* \author Martin Reinecke \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
#ifndef PLANCK_SHARP_MPI_H
|
||||
#define PLANCK_SHARP_MPI_H
|
||||
|
||||
#include <mpi.h>
|
||||
#include "sharp.h"
|
||||
#include "sharp_lowlevel.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -44,18 +44,17 @@ extern "C" {
|
||||
\param comm the MPI communicator to be used for this SHT
|
||||
\param type the type of SHT
|
||||
\param spin the spin of the quantities to be transformed
|
||||
\param add_output if 0, the output arrays will be overwritten,
|
||||
else the result will be added to the output arrays.
|
||||
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
|
||||
alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
|
||||
etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
|
||||
alm[2] and alm[3] to those of the second, etc. The exact data type of \a alm
|
||||
depends on the \a dp parameter.
|
||||
depends on whether the SHARP_DP flag is set.
|
||||
\param map contains pointers to the maps. If \a spin==0,
|
||||
map[0] points to the map of the first SHT, map[1] to that of the second
|
||||
etc. If \a spin>0, map[0] and map[1] point to the maps of the first SHT,
|
||||
map[2] and map[3] to those of the second, etc. The exact data type of \a map
|
||||
depends on the \a dp parameter.
|
||||
etc. If \a spin>0, or \a type is SHARP_ALM2MAP_DERIV1, map[0] and map[1]
|
||||
point to the maps of the first SHT, map[2] and map[3] to those of the
|
||||
second, etc. The exact data type of \a map depends on whether the SHARP_DP
|
||||
flag is set.
|
||||
\param geom_info A \c sharp_geom_info object compatible with the provided
|
||||
\a map arrays. The total map geometry is the union of all \a geom_info
|
||||
objects over the participating MPI tasks.
|
||||
@ -64,18 +63,17 @@ extern "C" {
|
||||
exactly once in the union of all \a alm_info objects over the participating
|
||||
MPI tasks.
|
||||
\param ntrans the number of simultaneous SHTs
|
||||
\param dp if 0, the \a alm is expected to have the type "complex float **"
|
||||
and \a map is expected to have the type "float **"; otherwise the expected
|
||||
types are "complex double **" and "double **", respectively.
|
||||
\param nv Internally used SHT parameter. Set to 0 unless you know what you are
|
||||
doing.
|
||||
\param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
|
||||
\a alm is expected to have the type "complex double **" and \a map is
|
||||
expected to have the type "double **"; otherwise, the expected
|
||||
types are "complex float **" and "float **", respectively.
|
||||
\param time If not NULL, the wall clock time required for this SHT
|
||||
(in seconds)will be written here.
|
||||
(in seconds) will be written here.
|
||||
\param opcnt If not NULL, a conservative estimate of the total floating point
|
||||
operation count for this SHT will be written here. */
|
||||
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
|
||||
int add_output, void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int dp, int nv, double *time,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
unsigned long long *opcnt);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
249
external/sharp/libsharp/sharp_test.c
vendored
249
external/sharp/libsharp/sharp_test.c
vendored
@ -1,249 +0,0 @@
|
||||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* libsharp is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libsharp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libsharp; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file sharp_test.c
|
||||
Accuracy test for libsharp's map analysis.
|
||||
|
||||
This program first generates a_lm coefficients up to
|
||||
a user-specified lmax (with mmax=lmax); where applicable, the
|
||||
real and imaginary parts of the coefficients are uniform
|
||||
random numbers of the interval [-1;1[.
|
||||
Afterwards, the random a_lm are converted to a map.
|
||||
This map is analyzed (optionally using an iterative scheme
|
||||
with a user-supplied number of steps).
|
||||
After every iteration, the code then outputs the RMS of the residual a_lm
|
||||
(i.e. the difference between the current and original a_lm), divided by
|
||||
the RMS of the original a_lm, as well as the maximum absolute change of any
|
||||
real or imaginary part between the current and original a_lm.
|
||||
|
||||
This operation can be performed for several different pixelisations:
|
||||
- a Gaussian with the minimal number of rings for exact analysis
|
||||
and a user-defined ring resolution
|
||||
- an ECP grid with the minimal number of rings for exact analysis
|
||||
and a user-defined ring resolution
|
||||
- a Healpix grid with a user-defined Nside parameter.
|
||||
|
||||
The user can specify the spin of the desired transform.
|
||||
|
||||
Copyright (C) 2006-2012 Max-Planck-Society
|
||||
\author Martin Reinecke
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#ifdef USE_MPI
|
||||
#include "mpi.h"
|
||||
#endif
|
||||
#include "sharp.h"
|
||||
#include "sharp_geomhelpers.h"
|
||||
#include "sharp_almhelpers.h"
|
||||
#include "c_utils.h"
|
||||
#include "sharp_announce.h"
|
||||
#include "sharp_core.h"
|
||||
#include "memusage.h"
|
||||
|
||||
typedef complex double dcmplx;
|
||||
|
||||
static double drand (double min, double max)
|
||||
{ return min + (max-min)*rand()/(RAND_MAX+1.0); }
|
||||
|
||||
static void random_alm (dcmplx *alm, sharp_alm_info *helper, int spin)
|
||||
{
|
||||
for (int mi=0;mi<helper->nm; ++mi)
|
||||
{
|
||||
int m=helper->mval[mi];
|
||||
for (int l=m;l<=helper->lmax; ++l)
|
||||
{
|
||||
if ((l<spin)&&(m<spin))
|
||||
alm[sharp_alm_index(helper,l,mi)] = 0.;
|
||||
else
|
||||
{
|
||||
double rv = drand(-1,1);
|
||||
double iv = (m==0) ? 0 : drand(-1,1);
|
||||
alm[sharp_alm_index(helper,l,mi)] = rv+_Complex_I*iv;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void measure_errors (dcmplx **alm, dcmplx **alm2,
|
||||
ptrdiff_t nalms, int ncomp)
|
||||
{
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
{
|
||||
double sum=0, sum2=0, maxdiff=0;
|
||||
for (ptrdiff_t m=0; m<nalms; ++m)
|
||||
{
|
||||
double x=creal(alm[i][m])-creal(alm2[i][m]),
|
||||
y=cimag(alm[i][m])-cimag(alm2[i][m]);
|
||||
sum+=x*x+y*y;
|
||||
sum2+=creal(alm[i][m])*creal(alm[i][m])+cimag(alm[i][m])*cimag(alm[i][m]);
|
||||
if (fabs(x)>maxdiff) maxdiff=fabs(x);
|
||||
if (fabs(y)>maxdiff) maxdiff=fabs(y);
|
||||
}
|
||||
sum=sqrt(sum/nalms);
|
||||
sum2=sqrt(sum2/nalms);
|
||||
printf("component %i: rms %e, maxerr %e\n",i, sum/sum2, maxdiff);
|
||||
}
|
||||
}
|
||||
|
||||
static void map2alm_iter (sharp_geom_info *tinfo, double **map,
|
||||
dcmplx **alm_orig, dcmplx **alm, int lmax, int mmax,
|
||||
ptrdiff_t npix, ptrdiff_t nalms, int spin, int ntrans, int niter)
|
||||
{
|
||||
int ncomp = ntrans*((spin==0) ? 1 : 2);
|
||||
|
||||
sharp_alm_info *alms;
|
||||
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
|
||||
|
||||
double time;
|
||||
unsigned long long opcnt;
|
||||
sharp_execute(SHARP_MAP2ALM,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,1,0,
|
||||
&time,&opcnt);
|
||||
printf("wall time for map2alm: %fs\n",time);
|
||||
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
||||
measure_errors(alm_orig,alm,nalms,ncomp);
|
||||
|
||||
for (int iter=0; iter<niter; ++iter)
|
||||
{
|
||||
double **map2;
|
||||
ALLOC2D(map2,double,ncomp,npix);
|
||||
printf ("\niteration %i:\n", iter+1);
|
||||
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map2[0],tinfo,alms,ntrans,1,0,
|
||||
&time,&opcnt);
|
||||
printf("wall time for alm2map: %fs\n",time);
|
||||
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
for (ptrdiff_t m=0; m<npix; ++m)
|
||||
map2[i][m] = map[i][m]-map2[i][m];
|
||||
|
||||
sharp_execute(SHARP_MAP2ALM,spin,1,&alm[0],&map2[0],tinfo,alms,ntrans,1,0,
|
||||
&time,&opcnt);
|
||||
printf("wall time for map2alm: %fs\n",time);
|
||||
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
||||
DEALLOC2D(map2);
|
||||
measure_errors(alm_orig,alm,nalms,ncomp);
|
||||
}
|
||||
|
||||
sharp_destroy_alm_info(alms);
|
||||
}
|
||||
|
||||
static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
|
||||
ptrdiff_t mmax, ptrdiff_t npix, int spin, int ntrans, int niter)
|
||||
{
|
||||
ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
|
||||
int ncomp = ntrans*((spin==0) ? 1 : 2);
|
||||
|
||||
double **map;
|
||||
ALLOC2D(map,double,ncomp,npix);
|
||||
|
||||
sharp_alm_info *alms;
|
||||
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
|
||||
|
||||
srand(4);
|
||||
dcmplx **alm;
|
||||
ALLOC2D(alm,dcmplx,ncomp,nalms);
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
random_alm(alm[i],alms,spin);
|
||||
|
||||
dcmplx **alm2;
|
||||
ALLOC2D(alm2,dcmplx,ncomp,nalms);
|
||||
|
||||
double time;
|
||||
unsigned long long opcnt;
|
||||
printf ("\niteration 0:\n");
|
||||
sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,1,0,
|
||||
&time,&opcnt);
|
||||
printf("wall time for alm2map: %fs\n",time);
|
||||
printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time);
|
||||
|
||||
map2alm_iter(tinfo,map,alm,alm2,lmax,mmax,npix,nalms,spin,ntrans,niter);
|
||||
|
||||
DEALLOC2D(map);
|
||||
DEALLOC2D(alm);
|
||||
DEALLOC2D(alm2);
|
||||
|
||||
sharp_destroy_alm_info(alms);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
#ifdef USE_MPI
|
||||
MPI_Init(NULL,NULL);
|
||||
#endif
|
||||
sharp_module_startup("sharp_test",argc,7,
|
||||
"<healpix|ecp|gauss> <lmax> <nside|nphi> <niter> <spin> <ntrans>",1);
|
||||
|
||||
int lmax=atoi(argv[2]);
|
||||
int niter=atoi(argv[4]);
|
||||
int spin=atoi(argv[5]);
|
||||
int ntrans=atoi(argv[6]);
|
||||
|
||||
printf("Testing map analysis accuracy.\n");
|
||||
printf("lmax=%d, %d iterations, spin=%d\n", lmax, niter, spin);
|
||||
|
||||
sharp_geom_info *tinfo;
|
||||
if (strcmp(argv[1],"gauss")==0)
|
||||
{
|
||||
int nrings=lmax+1;
|
||||
int ppring=atoi(argv[3]);
|
||||
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
|
||||
printf("\nTesting Gaussian grid (%d rings, %d pixels/ring, %ld pixels)\n",
|
||||
nrings,ppring,(long)npix);
|
||||
sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo);
|
||||
check_accuracy(tinfo,lmax,lmax,npix,spin,ntrans,niter);
|
||||
sharp_destroy_geom_info(tinfo);
|
||||
}
|
||||
else if (strcmp(argv[1],"ecp")==0)
|
||||
{
|
||||
int nrings=2*lmax+2;
|
||||
int ppring=atoi(argv[3]);
|
||||
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
|
||||
printf("\nTesting ECP grid (%d rings, %d pixels/ring, %ld pixels)\n",
|
||||
nrings,ppring,(long)npix);
|
||||
sharp_make_ecp_geom_info (nrings, ppring, 0., 1, ppring, &tinfo);
|
||||
check_accuracy(tinfo,lmax,lmax,npix,spin,ntrans,niter);
|
||||
sharp_destroy_geom_info(tinfo);
|
||||
}
|
||||
else if (strcmp(argv[1],"healpix")==0)
|
||||
{
|
||||
int nside=atoi(argv[3]);
|
||||
if (nside<1) nside=1;
|
||||
ptrdiff_t npix=12*(ptrdiff_t)nside*nside;
|
||||
printf("\nTesting Healpix grid (nside=%d, %ld pixels)\n",
|
||||
nside,(long)npix);
|
||||
sharp_make_healpix_geom_info (nside, 1, &tinfo);
|
||||
check_accuracy(tinfo,lmax,lmax,npix,spin,ntrans,niter);
|
||||
sharp_destroy_geom_info(tinfo);
|
||||
}
|
||||
else
|
||||
UTIL_FAIL("unknown grid geometry");
|
||||
|
||||
printf("\nMemory high water mark: %.2f MB\n",VmHWM()/(1<<20));
|
||||
|
||||
#ifdef USE_MPI
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
return 0;
|
||||
}
|
359
external/sharp/libsharp/sharp_test_mpi.c
vendored
359
external/sharp/libsharp/sharp_test_mpi.c
vendored
@ -1,359 +0,0 @@
|
||||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* libsharp is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libsharp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libsharp; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file sharp_test_mpi.c
|
||||
Accuracy test for libsharp's map analysis with MPI support.
|
||||
|
||||
This program first generates a_lm coefficients up to
|
||||
a user-specified lmax (with mmax=lmax); where applicable, the
|
||||
real and imaginary parts of the coefficients are uniform
|
||||
random numbers of the interval [-1;1[.
|
||||
Afterwards, the random a_lm are converted to a map.
|
||||
This map is analyzed (optionally using an iterative scheme
|
||||
with a user-supplied number of steps).
|
||||
After every iteration, the code then outputs the RMS of the residual a_lm
|
||||
(i.e. the difference between the current and original a_lm), divided by
|
||||
the RMS of the original a_lm, as well as the maximum absolute change of any
|
||||
real or imaginary part between the current and original a_lm.
|
||||
|
||||
This operation can be performed for several different pixelisations:
|
||||
- a Gaussian with the minimal number of rings for exact analysis
|
||||
and a user-defined ring resolution
|
||||
- an ECP grid with the minimal number of rings for exact analysis
|
||||
and a user-defined ring resolution
|
||||
- a Healpix grid with a user-defined Nside parameter.
|
||||
|
||||
The user can specify the spin of the desired transform.
|
||||
|
||||
Copyright (C) 2006-2012 Max-Planck-Society
|
||||
\author Martin Reinecke
|
||||
*/
|
||||
|
||||
#ifdef USE_MPI
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "sharp_mpi.h"
|
||||
#include "sharp_geomhelpers.h"
|
||||
#include "sharp_almhelpers.h"
|
||||
#include "c_utils.h"
|
||||
#include "walltime_c.h"
|
||||
#include "sharp_announce.h"
|
||||
#include "sharp_core.h"
|
||||
|
||||
typedef complex double dcmplx;
|
||||
|
||||
int ntasks, mytask;
|
||||
|
||||
static unsigned long long totalops (unsigned long long val)
|
||||
{
|
||||
unsigned long long tmp;
|
||||
MPI_Allreduce (&val, &tmp,1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static double maxTime (double val)
|
||||
{
|
||||
double tmp;
|
||||
MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static double drand (double min, double max)
|
||||
{ return min + (max-min)*rand()/(RAND_MAX+1.0); }
|
||||
|
||||
static ptrdiff_t get_nalms(const sharp_alm_info *ainfo)
|
||||
{
|
||||
ptrdiff_t res=0;
|
||||
for (int i=0; i<ainfo->nm; ++i)
|
||||
res += ainfo->lmax-ainfo->mval[i]+1;
|
||||
return res;
|
||||
}
|
||||
|
||||
static ptrdiff_t get_npix(const sharp_geom_info *ginfo)
|
||||
{
|
||||
ptrdiff_t res=0;
|
||||
for (int i=0; i<ginfo->npairs; ++i)
|
||||
{
|
||||
res += ginfo->pair[i].r1.nph;
|
||||
if (ginfo->pair[i].r2.nph>0) res += ginfo->pair[i].r2.nph;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static void reduce_alm_info(sharp_alm_info *ainfo)
|
||||
{
|
||||
int nmnew=0;
|
||||
ptrdiff_t ofs = 0;
|
||||
for (int i=mytask; i<ainfo->nm; i+=ntasks,++nmnew)
|
||||
{
|
||||
ainfo->mval[nmnew]=ainfo->mval[i];
|
||||
ainfo->mvstart[nmnew]=ofs-ainfo->mval[nmnew];
|
||||
ofs+=ainfo->lmax-ainfo->mval[nmnew]+1;
|
||||
}
|
||||
ainfo->nm=nmnew;
|
||||
}
|
||||
|
||||
static void reduce_geom_info(sharp_geom_info *ginfo)
|
||||
{
|
||||
int npairsnew=0;
|
||||
ptrdiff_t ofs = 0;
|
||||
for (int i=mytask; i<ginfo->npairs; i+=ntasks,++npairsnew)
|
||||
{
|
||||
ginfo->pair[npairsnew]=ginfo->pair[i];
|
||||
ginfo->pair[npairsnew].r1.ofs=ofs;
|
||||
ofs+=ginfo->pair[npairsnew].r1.nph;
|
||||
ginfo->pair[npairsnew].r2.ofs=ofs;
|
||||
if (ginfo->pair[npairsnew].r2.nph>0) ofs+=ginfo->pair[npairsnew].r2.nph;
|
||||
}
|
||||
ginfo->npairs=npairsnew;
|
||||
}
|
||||
|
||||
static void random_alm (dcmplx *alm, sharp_alm_info *helper, int spin)
|
||||
{
|
||||
static int cnt=0;
|
||||
++cnt;
|
||||
for (int mi=0;mi<helper->nm; ++mi)
|
||||
{
|
||||
int m=helper->mval[mi];
|
||||
srand(1234567*cnt+8912*m);
|
||||
for (int l=m;l<=helper->lmax; ++l)
|
||||
{
|
||||
if ((l<spin)&&(m<spin))
|
||||
alm[sharp_alm_index(helper,l,mi)] = 0.;
|
||||
else
|
||||
{
|
||||
double rv = drand(-1,1);
|
||||
double iv = (m==0) ? 0 : drand(-1,1);
|
||||
alm[sharp_alm_index(helper,l,mi)] = rv+_Complex_I*iv;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void measure_errors (dcmplx **alm, dcmplx **alm2,
|
||||
const sharp_alm_info *ainfo, int ncomp)
|
||||
{
|
||||
long nalms=get_nalms(ainfo), nalms_tot;
|
||||
MPI_Allreduce(&nalms,&nalms_tot,1,MPI_LONG,MPI_SUM,MPI_COMM_WORLD);
|
||||
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
{
|
||||
double sum=0, sum2=0, maxdiff=0, sumtot, sum2tot, maxdifftot;
|
||||
for (int mi=0; mi<ainfo->nm; ++mi)
|
||||
{
|
||||
int m=ainfo->mval[mi];
|
||||
for (int l=m; l<=ainfo->lmax; ++l)
|
||||
{
|
||||
ptrdiff_t idx=sharp_alm_index(ainfo,l,mi);
|
||||
double x=creal(alm[i][idx])-creal(alm2[i][idx]),
|
||||
y=cimag(alm[i][idx])-cimag(alm2[i][idx]);
|
||||
sum+=x*x+y*y;
|
||||
sum2+=creal(alm[i][idx])*creal(alm[i][idx])
|
||||
+cimag(alm[i][idx])*cimag(alm[i][idx]);
|
||||
if (fabs(x)>maxdiff) maxdiff=fabs(x);
|
||||
if (fabs(y)>maxdiff) maxdiff=fabs(y);
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Allreduce(&sum,&sumtot,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
|
||||
MPI_Allreduce(&sum2,&sum2tot,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
|
||||
MPI_Allreduce(&maxdiff,&maxdifftot,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
|
||||
sumtot=sqrt(sumtot/nalms_tot);
|
||||
sum2tot=sqrt(sum2tot/nalms_tot);
|
||||
if (mytask==0)
|
||||
printf("component %i: rms %e, maxerr %e\n",i, sumtot/sum2tot, maxdifftot);
|
||||
}
|
||||
}
|
||||
|
||||
static void map2alm_iter (sharp_geom_info *tinfo, double **map,
|
||||
dcmplx **alm_orig, dcmplx **alm, int lmax, int mmax,
|
||||
ptrdiff_t npix, int spin, int ntrans, int niter)
|
||||
{
|
||||
int ncomp = ntrans*((spin==0) ? 1 : 2);
|
||||
|
||||
sharp_alm_info *alms;
|
||||
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
|
||||
reduce_alm_info(alms);
|
||||
|
||||
double jtime;
|
||||
unsigned long long jopcnt;
|
||||
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,0,&alm[0],&map[0],
|
||||
tinfo,alms,ntrans,1,0,&jtime,&jopcnt);
|
||||
unsigned long long opcnt=totalops(jopcnt);
|
||||
double timer=maxTime(jtime);
|
||||
if (mytask==0) printf("wall time for map2alm: %fs\n",timer);
|
||||
if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/timer);
|
||||
measure_errors(alm_orig,alm,alms,ncomp);
|
||||
|
||||
for (int iter=0; iter<niter; ++iter)
|
||||
{
|
||||
double **map2;
|
||||
ALLOC2D(map2,double,ncomp,npix);
|
||||
if (mytask==0) printf ("\niteration %i:\n", iter+1);
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,0,&alm[0],&map2[0],
|
||||
tinfo,alms,ntrans,1,0,&jtime,&jopcnt);
|
||||
opcnt=totalops(jopcnt);
|
||||
timer=maxTime(jtime);
|
||||
if (mytask==0) printf("wall time for alm2map: %fs\n",timer);
|
||||
if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/timer);
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
for (ptrdiff_t m=0; m<npix; ++m)
|
||||
map2[i][m] = map[i][m]-map2[i][m];
|
||||
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,1,&alm[0],&map2[0],
|
||||
tinfo,alms,ntrans,1,0,&jtime,&jopcnt);
|
||||
opcnt=totalops(jopcnt);
|
||||
timer=maxTime(jtime);
|
||||
if (mytask==0) printf("wall time for map2alm: %fs\n",wallTime()-timer);
|
||||
if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/timer);
|
||||
DEALLOC2D(map2);
|
||||
measure_errors(alm_orig,alm,alms,ncomp);
|
||||
}
|
||||
|
||||
sharp_destroy_alm_info(alms);
|
||||
}
|
||||
|
||||
static void check_accuracy (sharp_geom_info *tinfo, ptrdiff_t lmax,
|
||||
ptrdiff_t mmax, ptrdiff_t npix, int spin, int ntrans, int niter)
|
||||
{
|
||||
int ncomp = ntrans*((spin==0) ? 1 : 2);
|
||||
|
||||
double **map;
|
||||
ALLOC2D(map,double,ncomp,npix);
|
||||
|
||||
double jtime;
|
||||
unsigned long long jopcnt;
|
||||
|
||||
sharp_alm_info *alms;
|
||||
ptrdiff_t nalms;
|
||||
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
|
||||
reduce_alm_info(alms);
|
||||
nalms=get_nalms(alms);
|
||||
|
||||
dcmplx **alm;
|
||||
ALLOC2D(alm,dcmplx,ncomp,nalms);
|
||||
srand(4);
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
random_alm(alm[i],alms,spin);
|
||||
|
||||
dcmplx **alm2;
|
||||
ALLOC2D(alm2,dcmplx,ncomp,nalms);
|
||||
|
||||
if (mytask==0) printf ("\niteration 0:\n");
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,0,&alm[0],&map[0],
|
||||
tinfo,alms,ntrans,1,0,&jtime,&jopcnt);
|
||||
unsigned long long opcnt=totalops(jopcnt);
|
||||
double timer=maxTime(jtime);
|
||||
if (mytask==0) printf("wall time for alm2map: %fs\n",timer);
|
||||
if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/timer);
|
||||
|
||||
map2alm_iter(tinfo, map, alm, alm2, lmax, mmax, npix, spin, ntrans, niter);
|
||||
|
||||
DEALLOC2D(map);
|
||||
DEALLOC2D(alm);
|
||||
DEALLOC2D(alm2);
|
||||
|
||||
sharp_destroy_alm_info(alms);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
MPI_Init(NULL,NULL);
|
||||
MPI_Comm_size(MPI_COMM_WORLD,&ntasks);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&mytask);
|
||||
|
||||
sharp_module_startup("sharp_test_mpi",argc,7,
|
||||
"<healpix|ecp|gauss> <lmax> <nside|nphi> <niter> <spin> <ntrans>",
|
||||
mytask==0);
|
||||
int lmax=atoi(argv[2]);
|
||||
int niter=atoi(argv[4]);
|
||||
int spin=atoi(argv[5]);
|
||||
int ntrans=atoi(argv[6]);
|
||||
|
||||
if (mytask==0)
|
||||
{
|
||||
printf("Testing map analysis accuracy.\n");
|
||||
printf("lmax=%d, %d iterations, spin=%d\n", lmax, niter, spin);
|
||||
}
|
||||
|
||||
sharp_geom_info *tinfo;
|
||||
if (strcmp(argv[1],"gauss")==0)
|
||||
{
|
||||
int nrings=lmax+1;
|
||||
int ppring=atoi(argv[3]);
|
||||
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
|
||||
if (mytask==0)
|
||||
printf("\nTesting Gaussian grid (%d rings, %d pixels/ring, %ld pixels)\n",
|
||||
nrings,ppring,(long)npix);
|
||||
sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo);
|
||||
reduce_geom_info(tinfo);
|
||||
npix=get_npix(tinfo);
|
||||
check_accuracy(tinfo,lmax,lmax,npix,spin,ntrans,niter);
|
||||
sharp_destroy_geom_info(tinfo);
|
||||
}
|
||||
else if (strcmp(argv[1],"ecp")==0)
|
||||
{
|
||||
int nrings=2*lmax+2;
|
||||
int ppring=atoi(argv[3]);
|
||||
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
|
||||
if (mytask==0)
|
||||
printf("\nTesting ECP grid (%d rings, %d pixels/ring, %ld pixels)\n",
|
||||
nrings,ppring,(long)npix);
|
||||
sharp_make_ecp_geom_info (nrings, ppring, 0., 1, ppring, &tinfo);
|
||||
reduce_geom_info(tinfo);
|
||||
npix=get_npix(tinfo);
|
||||
check_accuracy(tinfo,lmax,lmax,npix,spin,ntrans,niter);
|
||||
sharp_destroy_geom_info(tinfo);
|
||||
}
|
||||
else if (strcmp(argv[1],"healpix")==0)
|
||||
{
|
||||
int nside=atoi(argv[3]);
|
||||
if (nside<1) nside=1;
|
||||
ptrdiff_t npix=12*(ptrdiff_t)nside*nside;
|
||||
if (mytask==0)
|
||||
printf("\nTesting Healpix grid (nside=%d, %ld pixels)\n",
|
||||
nside,(long)npix);
|
||||
sharp_make_healpix_geom_info (nside, 1, &tinfo);
|
||||
reduce_geom_info(tinfo);
|
||||
npix=get_npix(tinfo);
|
||||
check_accuracy(tinfo,lmax,lmax,npix,spin,ntrans,niter);
|
||||
sharp_destroy_geom_info(tinfo);
|
||||
}
|
||||
else
|
||||
UTIL_FAIL("unknown grid geometry");
|
||||
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include "c_utils.h"
|
||||
|
||||
int main(void)
|
||||
{ UTIL_FAIL("MPI support not enabled."); return 1; }
|
||||
|
||||
#endif
|
708
external/sharp/libsharp/sharp_testsuite.c
vendored
Normal file
708
external/sharp/libsharp/sharp_testsuite.c
vendored
Normal file
@ -0,0 +1,708 @@
|
||||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* libsharp is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libsharp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libsharp; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/* \file sharp_testsuite.c
|
||||
*
|
||||
* Copyright (C) 2012-2013 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#ifdef USE_MPI
|
||||
#include "mpi.h"
|
||||
#include "sharp_mpi.h"
|
||||
#endif
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
#include "sharp.h"
|
||||
#include "sharp_internal.h"
|
||||
#include "sharp_geomhelpers.h"
|
||||
#include "sharp_almhelpers.h"
|
||||
#include "c_utils.h"
|
||||
#include "sharp_announce.h"
|
||||
#include "memusage.h"
|
||||
#include "sharp_vecsupport.h"
|
||||
|
||||
typedef complex double dcmplx;
|
||||
|
||||
int ntasks, mytask;
|
||||
|
||||
static double drand (double min, double max, int *state)
|
||||
{
|
||||
*state = (((*state) * 1103515245) + 12345) & 0x7fffffff;
|
||||
return min + (max-min)*(*state)/(0x7fffffff+1.0);
|
||||
}
|
||||
|
||||
static void random_alm (dcmplx *alm, sharp_alm_info *helper, int spin, int cnt)
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
int mi;
|
||||
#pragma omp for schedule (dynamic,100)
|
||||
for (mi=0;mi<helper->nm; ++mi)
|
||||
{
|
||||
int m=helper->mval[mi];
|
||||
int state=1234567*cnt+8912*m; // random seed
|
||||
for (int l=m;l<=helper->lmax; ++l)
|
||||
{
|
||||
if ((l<spin)&&(m<spin))
|
||||
alm[sharp_alm_index(helper,l,mi)] = 0.;
|
||||
else
|
||||
{
|
||||
double rv = drand(-1,1,&state);
|
||||
double iv = (m==0) ? 0 : drand(-1,1,&state);
|
||||
alm[sharp_alm_index(helper,l,mi)] = rv+_Complex_I*iv;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end of parallel region
|
||||
}
|
||||
|
||||
static unsigned long long totalops (unsigned long long val)
|
||||
{
|
||||
#ifdef USE_MPI
|
||||
unsigned long long tmp;
|
||||
MPI_Allreduce (&val, &tmp,1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
|
||||
return tmp;
|
||||
#else
|
||||
return val;
|
||||
#endif
|
||||
}
|
||||
|
||||
static double maxTime (double val)
|
||||
{
|
||||
#ifdef USE_MPI
|
||||
double tmp;
|
||||
MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
|
||||
return tmp;
|
||||
#else
|
||||
return val;
|
||||
#endif
|
||||
}
|
||||
|
||||
static double allreduceSumDouble (double val)
|
||||
{
|
||||
#ifdef USE_MPI
|
||||
double tmp;
|
||||
MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
return tmp;
|
||||
#else
|
||||
return val;
|
||||
#endif
|
||||
}
|
||||
|
||||
static double totalMem()
|
||||
{
|
||||
#ifdef USE_MPI
|
||||
double tmp, val=VmHWM();
|
||||
MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
return tmp;
|
||||
#else
|
||||
return VmHWM();
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef USE_MPI
|
||||
static void reduce_alm_info(sharp_alm_info *ainfo)
|
||||
{
|
||||
int nmnew=0;
|
||||
ptrdiff_t ofs = 0;
|
||||
for (int i=mytask; i<ainfo->nm; i+=ntasks,++nmnew)
|
||||
{
|
||||
ainfo->mval[nmnew]=ainfo->mval[i];
|
||||
ainfo->mvstart[nmnew]=ofs-ainfo->mval[nmnew];
|
||||
ofs+=ainfo->lmax-ainfo->mval[nmnew]+1;
|
||||
}
|
||||
ainfo->nm=nmnew;
|
||||
}
|
||||
|
||||
static void reduce_geom_info(sharp_geom_info *ginfo)
|
||||
{
|
||||
int npairsnew=0;
|
||||
ptrdiff_t ofs = 0;
|
||||
for (int i=mytask; i<ginfo->npairs; i+=ntasks,++npairsnew)
|
||||
{
|
||||
ginfo->pair[npairsnew]=ginfo->pair[i];
|
||||
ginfo->pair[npairsnew].r1.ofs=ofs;
|
||||
ofs+=ginfo->pair[npairsnew].r1.nph;
|
||||
ginfo->pair[npairsnew].r2.ofs=ofs;
|
||||
if (ginfo->pair[npairsnew].r2.nph>0) ofs+=ginfo->pair[npairsnew].r2.nph;
|
||||
}
|
||||
ginfo->npairs=npairsnew;
|
||||
}
|
||||
#endif
|
||||
|
||||
static ptrdiff_t get_nalms(const sharp_alm_info *ainfo)
|
||||
{
|
||||
ptrdiff_t res=0;
|
||||
for (int i=0; i<ainfo->nm; ++i)
|
||||
res += ainfo->lmax-ainfo->mval[i]+1;
|
||||
return res;
|
||||
}
|
||||
|
||||
static ptrdiff_t get_npix(const sharp_geom_info *ginfo)
|
||||
{
|
||||
ptrdiff_t res=0;
|
||||
for (int i=0; i<ginfo->npairs; ++i)
|
||||
{
|
||||
res += ginfo->pair[i].r1.nph;
|
||||
if (ginfo->pair[i].r2.nph>0) res += ginfo->pair[i].r2.nph;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static double *get_sqsum_and_invert (dcmplx **alm, ptrdiff_t nalms, int ncomp)
|
||||
{
|
||||
double *sqsum=RALLOC(double,ncomp);
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
{
|
||||
sqsum[i]=0;
|
||||
for (ptrdiff_t j=0; j<nalms; ++j)
|
||||
{
|
||||
sqsum[i]+=creal(alm[i][j])*creal(alm[i][j])
|
||||
+cimag(alm[i][j])*cimag(alm[i][j]);
|
||||
alm[i][j]=-alm[i][j];
|
||||
}
|
||||
}
|
||||
return sqsum;
|
||||
}
|
||||
|
||||
static void get_errors (dcmplx **alm, ptrdiff_t nalms, int ncomp, double *sqsum,
|
||||
double **err_abs, double **err_rel)
|
||||
{
|
||||
long nalms_tot=nalms;
|
||||
#ifdef USE_MPI
|
||||
MPI_Allreduce(&nalms,&nalms_tot,1,MPI_LONG,MPI_SUM,MPI_COMM_WORLD);
|
||||
#endif
|
||||
|
||||
*err_abs=RALLOC(double,ncomp);
|
||||
*err_rel=RALLOC(double,ncomp);
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
{
|
||||
double sum=0, maxdiff=0, sumtot, sqsumtot, maxdifftot;
|
||||
for (ptrdiff_t j=0; j<nalms; ++j)
|
||||
{
|
||||
double sqr=creal(alm[i][j])*creal(alm[i][j])
|
||||
+cimag(alm[i][j])*cimag(alm[i][j]);
|
||||
sum+=sqr;
|
||||
if (sqr>maxdiff) maxdiff=sqr;
|
||||
}
|
||||
maxdiff=sqrt(maxdiff);
|
||||
|
||||
#ifdef USE_MPI
|
||||
MPI_Allreduce(&sum,&sumtot,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
|
||||
MPI_Allreduce(&sqsum[i],&sqsumtot,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
|
||||
MPI_Allreduce(&maxdiff,&maxdifftot,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
|
||||
#else
|
||||
sumtot=sum;
|
||||
sqsumtot=sqsum[i];
|
||||
maxdifftot=maxdiff;
|
||||
#endif
|
||||
sumtot=sqrt(sumtot/nalms_tot);
|
||||
sqsumtot=sqrt(sqsumtot/nalms_tot);
|
||||
(*err_abs)[i]=maxdifftot;
|
||||
(*err_rel)[i]=sumtot/sqsumtot;
|
||||
}
|
||||
}
|
||||
|
||||
static int good_fft_size(int n)
|
||||
{
|
||||
if (n<=6) return n;
|
||||
int bestfac=2*n;
|
||||
|
||||
for (int f2=1; f2<bestfac; f2*=2)
|
||||
for (int f23=f2; f23<bestfac; f23*=3)
|
||||
for (int f235=f23; f235<bestfac; f235*=5)
|
||||
if (f235>=n) bestfac=f235;
|
||||
|
||||
return bestfac;
|
||||
}
|
||||
|
||||
static void get_infos (const char *gname, int lmax, int *mmax, int *gpar1,
|
||||
int *gpar2, sharp_geom_info **ginfo, sharp_alm_info **ainfo)
|
||||
{
|
||||
UTIL_ASSERT(lmax>=0,"lmax must not be negative");
|
||||
if (*mmax<0) *mmax=lmax;
|
||||
UTIL_ASSERT(*mmax<=lmax,"mmax larger than lmax");
|
||||
|
||||
if (mytask==0) printf ("lmax: %d, mmax: %d\n",lmax,*mmax);
|
||||
|
||||
sharp_make_triangular_alm_info(lmax,*mmax,1,ainfo);
|
||||
#ifdef USE_MPI
|
||||
reduce_alm_info(*ainfo);
|
||||
#endif
|
||||
|
||||
if (strcmp(gname,"healpix")==0)
|
||||
{
|
||||
if (*gpar1<1) *gpar1=lmax/2;
|
||||
if (*gpar1==0) ++(*gpar1);
|
||||
sharp_make_healpix_geom_info (*gpar1, 1, ginfo);
|
||||
if (mytask==0) printf ("HEALPix grid, nside=%d\n",*gpar1);
|
||||
}
|
||||
else if (strcmp(gname,"gauss")==0)
|
||||
{
|
||||
if (*gpar1<1) *gpar1=lmax+1;
|
||||
if (*gpar2<1) *gpar2=2*(*mmax)+1;
|
||||
sharp_make_gauss_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
|
||||
if (mytask==0)
|
||||
printf ("Gauss-Legendre grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
|
||||
}
|
||||
else if (strcmp(gname,"fejer1")==0)
|
||||
{
|
||||
if (*gpar1<1) *gpar1=2*lmax+1;
|
||||
if (*gpar2<1) *gpar2=2*(*mmax)+1;
|
||||
sharp_make_fejer1_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
|
||||
if (mytask==0) printf ("Fejer1 grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
|
||||
}
|
||||
else if (strcmp(gname,"fejer2")==0)
|
||||
{
|
||||
if (*gpar1<1) *gpar1=2*lmax+1;
|
||||
if (*gpar2<1) *gpar2=2*(*mmax)+1;
|
||||
sharp_make_fejer2_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
|
||||
if (mytask==0) printf ("Fejer2 grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
|
||||
}
|
||||
else if (strcmp(gname,"cc")==0)
|
||||
{
|
||||
if (*gpar1<1) *gpar1=2*lmax+1;
|
||||
if (*gpar2<1) *gpar2=2*(*mmax)+1;
|
||||
sharp_make_cc_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
|
||||
if (mytask==0)
|
||||
printf("Clenshaw-Curtis grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
|
||||
}
|
||||
else if (strcmp(gname,"smallgauss")==0)
|
||||
{
|
||||
int nlat=*gpar1, nlon=*gpar2;
|
||||
if (nlat<1) nlat=lmax+1;
|
||||
if (nlon<1) nlon=2*(*mmax)+1;
|
||||
*gpar1=nlat; *gpar2=nlon;
|
||||
sharp_make_gauss_geom_info (nlat, nlon, 0., 1, nlon, ginfo);
|
||||
ptrdiff_t npix_o=get_npix(*ginfo);
|
||||
size_t ofs=0;
|
||||
for (int i=0; i<(*ginfo)->npairs; ++i)
|
||||
{
|
||||
sharp_ringpair *pair=&((*ginfo)->pair[i]);
|
||||
int pring=1+2*sharp_get_mlim(lmax,0,pair->r1.sth,pair->r1.cth);
|
||||
if (pring>nlon) pring=nlon;
|
||||
pring=good_fft_size(pring);
|
||||
pair->r1.nph=pring;
|
||||
pair->r1.weight*=nlon*1./pring;
|
||||
pair->r1.ofs=ofs;
|
||||
ofs+=pring;
|
||||
if (pair->r2.nph>0)
|
||||
{
|
||||
pair->r2.nph=pring;
|
||||
pair->r2.weight*=nlon*1./pring;
|
||||
pair->r2.ofs=ofs;
|
||||
ofs+=pring;
|
||||
}
|
||||
}
|
||||
if (mytask==0)
|
||||
{
|
||||
ptrdiff_t npix=get_npix(*ginfo);
|
||||
printf("Small Gauss grid, nlat=%d, npix=%ld, savings=%.2f%%\n",
|
||||
nlat,(long)npix,(npix_o-npix)*100./npix_o);
|
||||
}
|
||||
}
|
||||
else
|
||||
UTIL_FAIL("unknown grid geometry");
|
||||
|
||||
#ifdef USE_MPI
|
||||
reduce_geom_info(*ginfo);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void check_sign_scale(void)
|
||||
{
|
||||
int lmax=50;
|
||||
int mmax=lmax;
|
||||
sharp_geom_info *tinfo;
|
||||
int nrings=lmax+1;
|
||||
int ppring=2*lmax+2;
|
||||
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
|
||||
sharp_make_gauss_geom_info (nrings, ppring, 0., 1, ppring, &tinfo);
|
||||
|
||||
/* flip theta to emulate the "old" Gaussian grid geometry */
|
||||
for (int i=0; i<tinfo->npairs; ++i)
|
||||
{
|
||||
const double pi=3.141592653589793238462643383279502884197;
|
||||
tinfo->pair[i].r1.cth=-tinfo->pair[i].r1.cth;
|
||||
tinfo->pair[i].r2.cth=-tinfo->pair[i].r2.cth;
|
||||
tinfo->pair[i].r1.theta=pi-tinfo->pair[i].r1.theta;
|
||||
tinfo->pair[i].r2.theta=pi-tinfo->pair[i].r2.theta;
|
||||
}
|
||||
|
||||
sharp_alm_info *alms;
|
||||
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
|
||||
ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
|
||||
|
||||
for (int ntrans=1; ntrans<10; ++ntrans)
|
||||
{
|
||||
double **map;
|
||||
ALLOC2D(map,double,2*ntrans,npix);
|
||||
|
||||
dcmplx **alm;
|
||||
ALLOC2D(alm,dcmplx,2*ntrans,nalms);
|
||||
for (int i=0; i<2*ntrans; ++i)
|
||||
for (int j=0; j<nalms; ++j)
|
||||
alm[i][j]=1.+_Complex_I;
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||
NULL,NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[it][0 ], 3.588246976618616912e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[it][npix/2], 4.042209792157496651e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[it][npix-1],-1.234675107554816442e+01,1e-12),
|
||||
"error");
|
||||
}
|
||||
sharp_execute(SHARP_ALM2MAP,1,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||
NULL,NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ], 2.750897760535633285e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix/2], 3.137704477368562905e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix-1],-8.405730859837063917e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][0 ],-2.398026536095463346e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-4.961140548331700728e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1],-1.412765834230440021e+01,1e-12),
|
||||
"error");
|
||||
}
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP,2,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||
NULL,NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-1.398186224727334448e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix/2],-2.456676000884031197e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix-1],-1.516249174408820863e+02,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][0 ],-3.173406200299964119e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-5.831327404513146462e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1],-1.863257892248353897e+01,1e-12),
|
||||
"error");
|
||||
}
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP_DERIV1,1,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||
SHARP_DP,NULL,NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-6.859393905369091105e-01,1e-11),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix/2],-2.103947835973212364e+02,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix-1],-1.092463246472086439e+03,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][0 ],-1.411433220713928165e+02,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-1.146122859381925082e+03,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1], 7.821618677689795049e+02,1e-12),
|
||||
"error");
|
||||
}
|
||||
|
||||
DEALLOC2D(map);
|
||||
DEALLOC2D(alm);
|
||||
}
|
||||
|
||||
sharp_destroy_alm_info(alms);
|
||||
sharp_destroy_geom_info(tinfo);
|
||||
}
|
||||
|
||||
static void do_sht (sharp_geom_info *ginfo, sharp_alm_info *ainfo,
|
||||
int spin, int ntrans, int nv, double **err_abs, double **err_rel,
|
||||
double *t_a2m, double *t_m2a, unsigned long long *op_a2m,
|
||||
unsigned long long *op_m2a)
|
||||
{
|
||||
ptrdiff_t nalms = get_nalms(ainfo);
|
||||
int ncomp = ntrans*((spin==0) ? 1 : 2);
|
||||
|
||||
size_t npix = get_npix(ginfo);
|
||||
double **map;
|
||||
ALLOC2D(map,double,ncomp,npix);
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
SET_ARRAY(map[i],0,(int)npix,0);
|
||||
|
||||
dcmplx **alm;
|
||||
ALLOC2D(alm,dcmplx,ncomp,nalms);
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
random_alm(alm[i],ainfo,spin,i+1);
|
||||
|
||||
#ifdef USE_MPI
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,&alm[0],&map[0],ginfo,
|
||||
ainfo,ntrans, SHARP_DP|SHARP_ADD|nv,t_a2m,op_a2m);
|
||||
#else
|
||||
sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map[0],ginfo,ainfo,ntrans,
|
||||
SHARP_DP|nv,t_a2m,op_a2m);
|
||||
#endif
|
||||
if (t_a2m!=NULL) *t_a2m=maxTime(*t_a2m);
|
||||
if (op_a2m!=NULL) *op_a2m=totalops(*op_a2m);
|
||||
double *sqsum=get_sqsum_and_invert(alm,nalms,ncomp);
|
||||
#ifdef USE_MPI
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,&alm[0],&map[0],ginfo,
|
||||
ainfo,ntrans,SHARP_DP|SHARP_ADD|nv,t_m2a,op_m2a);
|
||||
#else
|
||||
sharp_execute(SHARP_MAP2ALM,spin,&alm[0],&map[0],ginfo,ainfo,ntrans,
|
||||
SHARP_DP|SHARP_ADD|nv,t_m2a,op_m2a);
|
||||
#endif
|
||||
if (t_m2a!=NULL) *t_m2a=maxTime(*t_m2a);
|
||||
if (op_m2a!=NULL) *op_m2a=totalops(*op_m2a);
|
||||
get_errors(alm, nalms, ncomp, sqsum, err_abs, err_rel);
|
||||
|
||||
DEALLOC(sqsum);
|
||||
DEALLOC2D(map);
|
||||
DEALLOC2D(alm);
|
||||
}
|
||||
|
||||
static void check_accuracy (sharp_geom_info *ginfo, sharp_alm_info *ainfo,
|
||||
int spin, int ntrans, int nv)
|
||||
{
|
||||
int ncomp = ntrans*((spin==0) ? 1 : 2);
|
||||
double *err_abs, *err_rel;
|
||||
do_sht (ginfo, ainfo, spin, ntrans, nv, &err_abs, &err_rel, NULL, NULL,
|
||||
NULL, NULL);
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
UTIL_ASSERT((err_rel[i]<1e-10) && (err_abs[i]<1e-10),"error");
|
||||
DEALLOC(err_rel);
|
||||
DEALLOC(err_abs);
|
||||
}
|
||||
|
||||
static void sharp_acctest(void)
|
||||
{
|
||||
if (mytask==0) sharp_module_startup("sharp_acctest",1,1,"",1);
|
||||
|
||||
if (mytask==0) printf("Checking signs and scales.\n");
|
||||
check_sign_scale();
|
||||
if (mytask==0) printf("Passed.\n\n");
|
||||
|
||||
if (mytask==0) printf("Testing map analysis accuracy.\n");
|
||||
|
||||
sharp_geom_info *ginfo;
|
||||
sharp_alm_info *ainfo;
|
||||
int lmax=127, mmax=127, nlat=128, nlon=256;
|
||||
get_infos ("gauss", lmax, &mmax, &nlat, &nlon, &ginfo, &ainfo);
|
||||
for (int nv=1; nv<=6; ++nv)
|
||||
for (int ntrans=1; ntrans<=6; ++ntrans)
|
||||
{
|
||||
check_accuracy(ginfo,ainfo,0,ntrans,nv);
|
||||
check_accuracy(ginfo,ainfo,1,ntrans,nv);
|
||||
check_accuracy(ginfo,ainfo,2,ntrans,nv);
|
||||
check_accuracy(ginfo,ainfo,3,ntrans,nv);
|
||||
check_accuracy(ginfo,ainfo,30,ntrans,nv);
|
||||
}
|
||||
sharp_destroy_alm_info(ainfo);
|
||||
sharp_destroy_geom_info(ginfo);
|
||||
if (mytask==0) printf("Passed.\n\n");
|
||||
}
|
||||
|
||||
static void sharp_test (int argc, const char **argv)
|
||||
{
|
||||
if (mytask==0) sharp_announce("sharp_test");
|
||||
UTIL_ASSERT(argc>=9,"usage: grid lmax mmax geom1 geom2 spin ntrans");
|
||||
int lmax=atoi(argv[3]);
|
||||
int mmax=atoi(argv[4]);
|
||||
int gpar1=atoi(argv[5]);
|
||||
int gpar2=atoi(argv[6]);
|
||||
int spin=atoi(argv[7]);
|
||||
int ntrans=atoi(argv[8]);
|
||||
|
||||
if (mytask==0) printf("Testing map analysis accuracy.\n");
|
||||
if (mytask==0) printf("spin=%d, ntrans=%d\n", spin, ntrans);
|
||||
|
||||
sharp_geom_info *ginfo;
|
||||
sharp_alm_info *ainfo;
|
||||
get_infos (argv[2], lmax, &mmax, &gpar1, &gpar2, &ginfo, &ainfo);
|
||||
|
||||
int ncomp = ntrans*((spin==0) ? 1 : 2);
|
||||
double t_a2m=1e30, t_m2a=1e30;
|
||||
unsigned long long op_a2m, op_m2a;
|
||||
double *err_abs,*err_rel;
|
||||
|
||||
double t_acc=0;
|
||||
int nrpt=0;
|
||||
while(1)
|
||||
{
|
||||
++nrpt;
|
||||
double ta2m2, tm2a2;
|
||||
do_sht (ginfo, ainfo, spin, ntrans, 0, &err_abs, &err_rel, &ta2m2, &tm2a2,
|
||||
&op_a2m, &op_m2a);
|
||||
if (ta2m2<t_a2m) t_a2m=ta2m2;
|
||||
if (tm2a2<t_m2a) t_m2a=tm2a2;
|
||||
t_acc+=t_a2m+t_m2a;
|
||||
if (t_acc>2.)
|
||||
{
|
||||
if (mytask==0) printf("Best of %d runs\n",nrpt);
|
||||
break;
|
||||
}
|
||||
DEALLOC(err_abs);
|
||||
DEALLOC(err_rel);
|
||||
}
|
||||
|
||||
if (mytask==0) printf("wall time for alm2map: %fs\n",t_a2m);
|
||||
if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*op_a2m/t_a2m);
|
||||
if (mytask==0) printf("wall time for map2alm: %fs\n",t_m2a);
|
||||
if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*op_m2a/t_m2a);
|
||||
|
||||
if (mytask==0)
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
printf("component %i: rms %e, maxerr %e\n",i,err_rel[i], err_abs[i]);
|
||||
|
||||
double iosize = ncomp*(16.*get_nalms(ainfo) + 8.*get_npix(ginfo));
|
||||
iosize = allreduceSumDouble(iosize);
|
||||
|
||||
sharp_destroy_alm_info(ainfo);
|
||||
sharp_destroy_geom_info(ginfo);
|
||||
|
||||
double tmem=totalMem();
|
||||
if (mytask==0)
|
||||
printf("\nMemory high water mark: %.2f MB\n",tmem/(1<<20));
|
||||
if (mytask==0)
|
||||
printf("Memory overhead: %.2f MB (%.2f%% of working set)\n",
|
||||
(tmem-iosize)/(1<<20),100.*(1.-iosize/tmem));
|
||||
|
||||
#ifdef _OPENMP
|
||||
int nomp=omp_get_max_threads();
|
||||
#else
|
||||
int nomp=1;
|
||||
#endif
|
||||
|
||||
double maxerel=0., maxeabs=0.;
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
{
|
||||
if (maxerel<err_rel[i]) maxerel=err_rel[i];
|
||||
if (maxeabs<err_abs[i]) maxeabs=err_abs[i];
|
||||
}
|
||||
|
||||
if (mytask==0)
|
||||
printf("%-12s %-10s %2d %d %2d %3d %6d %6d %6d %6d %2d %.2e %7.2f %.2e %7.2f"
|
||||
" %9.2f %6.2f %.2e %.2e\n",
|
||||
getenv("HOST"),argv[2],spin,VLEN,nomp,ntasks,lmax,mmax,gpar1,gpar2,
|
||||
ntrans,t_a2m,1e-9*op_a2m/t_a2m,t_m2a,1e-9*op_m2a/t_m2a,tmem/(1<<20),
|
||||
100.*(1.-iosize/tmem),maxerel,maxeabs);
|
||||
|
||||
DEALLOC(err_abs);
|
||||
DEALLOC(err_rel);
|
||||
}
|
||||
|
||||
static void sharp_bench (int argc, const char **argv)
|
||||
{
|
||||
if (mytask==0) sharp_announce("sharp_bench");
|
||||
UTIL_ASSERT(argc>=9,"usage: grid lmax mmax geom1 geom2 spin ntrans");
|
||||
int lmax=atoi(argv[3]);
|
||||
int mmax=atoi(argv[4]);
|
||||
int gpar1=atoi(argv[5]);
|
||||
int gpar2=atoi(argv[6]);
|
||||
int spin=atoi(argv[7]);
|
||||
int ntrans=atoi(argv[8]);
|
||||
|
||||
if (mytask==0) printf("Testing map analysis accuracy.\n");
|
||||
if (mytask==0) printf("spin=%d, ntrans=%d\n", spin, ntrans);
|
||||
|
||||
sharp_geom_info *ginfo;
|
||||
sharp_alm_info *ainfo;
|
||||
get_infos (argv[2], lmax, &mmax, &gpar1, &gpar2, &ginfo, &ainfo);
|
||||
|
||||
double ta2m_auto=1e30, tm2a_auto=1e30, ta2m_min=1e30, tm2a_min=1e30;
|
||||
unsigned long long opa2m_min=0, opm2a_min=0;
|
||||
int nvmin_a2m=-1, nvmin_m2a=-1;
|
||||
for (int nv=0; nv<=6; ++nv)
|
||||
{
|
||||
int ntries=0;
|
||||
double tacc=0;
|
||||
do
|
||||
{
|
||||
double t_a2m, t_m2a;
|
||||
unsigned long long op_a2m, op_m2a;
|
||||
double *err_abs,*err_rel;
|
||||
do_sht (ginfo, ainfo, spin, ntrans, nv, &err_abs, &err_rel,
|
||||
&t_a2m, &t_m2a, &op_a2m, &op_m2a);
|
||||
|
||||
DEALLOC(err_abs);
|
||||
DEALLOC(err_rel);
|
||||
tacc+=t_a2m+t_m2a;
|
||||
++ntries;
|
||||
if (nv==0)
|
||||
{
|
||||
if (t_a2m<ta2m_auto) ta2m_auto=t_a2m;
|
||||
if (t_m2a<tm2a_auto) tm2a_auto=t_m2a;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (t_a2m<ta2m_min) { nvmin_a2m=nv; ta2m_min=t_a2m; opa2m_min=op_a2m; }
|
||||
if (t_m2a<tm2a_min) { nvmin_m2a=nv; tm2a_min=t_m2a; opm2a_min=op_m2a; }
|
||||
}
|
||||
} while((ntries<2)||(tacc<3.));
|
||||
}
|
||||
if (mytask==0)
|
||||
{
|
||||
printf("a2m: nvmin=%d tmin=%fs speedup=%.2f%% perf=%.2fGFlops/s\n",
|
||||
nvmin_a2m,ta2m_min,100.*(ta2m_auto-ta2m_min)/ta2m_auto,
|
||||
1e-9*opa2m_min/ta2m_min);
|
||||
printf("m2a: nvmin=%d tmin=%fs speedup=%.2f%% perf=%.2fGFlops/s\n",
|
||||
nvmin_m2a,tm2a_min,100.*(tm2a_auto-tm2a_min)/tm2a_auto,
|
||||
1e-9*opm2a_min/tm2a_min);
|
||||
}
|
||||
|
||||
sharp_destroy_alm_info(ainfo);
|
||||
sharp_destroy_geom_info(ginfo);
|
||||
}
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
#ifdef USE_MPI
|
||||
MPI_Init(NULL,NULL);
|
||||
MPI_Comm_size(MPI_COMM_WORLD,&ntasks);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&mytask);
|
||||
#else
|
||||
mytask=0; ntasks=1;
|
||||
#endif
|
||||
|
||||
UTIL_ASSERT(argc>=2,"need at least one command line argument");
|
||||
|
||||
if (strcmp(argv[1],"acctest")==0)
|
||||
sharp_acctest();
|
||||
else if (strcmp(argv[1],"test")==0)
|
||||
sharp_test(argc,argv);
|
||||
else if (strcmp(argv[1],"bench")==0)
|
||||
sharp_bench(argc,argv);
|
||||
else
|
||||
UTIL_FAIL("unknown command");
|
||||
|
||||
#ifdef USE_MPI
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
return 0;
|
||||
}
|
135
external/sharp/libsharp/sharp_vecsupport.h
vendored
135
external/sharp/libsharp/sharp_vecsupport.h
vendored
@ -25,7 +25,7 @@
|
||||
/* \file sharp_vecsupport.h
|
||||
* Convenience functions for vector arithmetics
|
||||
*
|
||||
* Copyright (C) 2012 Max-Planck-Society
|
||||
* Copyright (C) 2012,2013 Max-Planck-Society
|
||||
* Author: Martin Reinecke
|
||||
*/
|
||||
|
||||
@ -40,34 +40,46 @@ typedef double Ts;
|
||||
#if (VLEN==1)
|
||||
|
||||
typedef double Tv;
|
||||
typedef float Tv_s;
|
||||
typedef int Tm;
|
||||
|
||||
#define vadd(a,b) ((a)+(b))
|
||||
#define vadd_s(a,b) ((a)+(b))
|
||||
#define vaddeq(a,b) ((a)+=(b))
|
||||
#define vaddeq_mask(mask,a,b) if (mask) (a)+=(b);
|
||||
#define vsub(a,b) ((a)-(b))
|
||||
#define vsub_s(a,b) ((a)-(b))
|
||||
#define vsubeq(a,b) ((a)-=(b))
|
||||
#define vsubeq_mask(mask,a,b) if (mask) (a)-=(b);
|
||||
#define vmul(a,b) ((a)*(b))
|
||||
#define vmul_s(a,b) ((a)*(b))
|
||||
#define vmuleq(a,b) ((a)*=(b))
|
||||
#define vmuleq_mask(mask,a,b) if (mask) (a)*=(b);
|
||||
#define vfmaeq(a,b,c) ((a)+=(b)*(c))
|
||||
#define vfmaeq_s(a,b,c) ((a)+=(b)*(c))
|
||||
#define vfmseq(a,b,c) ((a)-=(b)*(c))
|
||||
#define vfmaaeq(a,b,c,d,e) ((a)+=(b)*(c)+(d)*(e))
|
||||
#define vfmaseq(a,b,c,d,e) ((a)+=(b)*(c)-(d)*(e))
|
||||
#define vneg(a) (-(a))
|
||||
#define vload(a) (a)
|
||||
#define vload_s(a) (a)
|
||||
#define vloadu(p) (*(p))
|
||||
#define vloadu_s(p) (*(p))
|
||||
#define vabs(a) fabs(a)
|
||||
#define vsqrt(a) sqrt(a)
|
||||
#define vlt(a,b) (((a)<(b))?1.:0.)
|
||||
#define vgt(a,b) (((a)>(b))?1.:0.)
|
||||
#define vge(a,b) (((a)>=(b))?1.:0.)
|
||||
#define vne(a,b) (((a)!=(b))?1.:0.)
|
||||
#define vand(a,b) ((((a)*(b))!=0.)?1.:0.)
|
||||
#define vor(a,b) ((((a)+(b))!=0.)?1.:0.)
|
||||
#define vlt(a,b) ((a)<(b))
|
||||
#define vgt(a,b) ((a)>(b))
|
||||
#define vge(a,b) ((a)>=(b))
|
||||
#define vne(a,b) ((a)!=(b))
|
||||
#define vand_mask(a,b) ((a)&&(b))
|
||||
#define vstoreu(p, a) (*(p)=a)
|
||||
#define vstoreu_s(p, a) (*(p)=a)
|
||||
|
||||
static inline Tv vmin (Tv a, Tv b) { return (a<b) ? a : b; }
|
||||
static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; }
|
||||
|
||||
#define vanyTrue(a) ((a)!=0.)
|
||||
#define vallTrue(a) ((a)!=0.)
|
||||
#define vblend(m,a,b) (((m)!=0.) ? (a) : (b))
|
||||
#define vanyTrue(a) (a)
|
||||
#define vallTrue(a) (a)
|
||||
#define vzero 0.
|
||||
#define vone 1.
|
||||
|
||||
@ -85,14 +97,32 @@ static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; }
|
||||
#endif
|
||||
|
||||
typedef __m128d Tv;
|
||||
typedef __m128 Tv_s;
|
||||
typedef __m128d Tm;
|
||||
|
||||
#if defined(__SSE4_1__)
|
||||
#define vblend__(m,a,b) _mm_blendv_pd(b,a,m)
|
||||
#else
|
||||
static inline Tv vblend__(Tv m, Tv a, Tv b)
|
||||
{ return _mm_or_pd(_mm_and_pd(a,m),_mm_andnot_pd(m,b)); }
|
||||
#endif
|
||||
#define vzero _mm_setzero_pd()
|
||||
#define vone _mm_set1_pd(1.)
|
||||
|
||||
#define vadd(a,b) _mm_add_pd(a,b)
|
||||
#define vadd_s(a,b) _mm_add_ps(a,b)
|
||||
#define vaddeq(a,b) a=_mm_add_pd(a,b)
|
||||
#define vaddeq_mask(mask,a,b) a=_mm_add_pd(a,vblend__(mask,b,vzero))
|
||||
#define vsub(a,b) _mm_sub_pd(a,b)
|
||||
#define vsub_s(a,b) _mm_sub_ps(a,b)
|
||||
#define vsubeq(a,b) a=_mm_sub_pd(a,b)
|
||||
#define vsubeq_mask(mask,a,b) a=_mm_sub_pd(a,vblend__(mask,b,vzero))
|
||||
#define vmul(a,b) _mm_mul_pd(a,b)
|
||||
#define vmul_s(a,b) _mm_mul_ps(a,b)
|
||||
#define vmuleq(a,b) a=_mm_mul_pd(a,b)
|
||||
#define vmuleq_mask(mask,a,b) a=_mm_mul_pd(a,vblend__(mask,b,vone))
|
||||
#define vfmaeq(a,b,c) a=_mm_add_pd(a,_mm_mul_pd(b,c))
|
||||
#define vfmaeq_s(a,b,c) a=_mm_add_ps(a,_mm_mul_ps(b,c))
|
||||
#define vfmseq(a,b,c) a=_mm_sub_pd(a,_mm_mul_pd(b,c))
|
||||
#define vfmaaeq(a,b,c,d,e) \
|
||||
a=_mm_add_pd(a,_mm_add_pd(_mm_mul_pd(b,c),_mm_mul_pd(d,e)))
|
||||
@ -100,51 +130,61 @@ typedef __m128d Tv;
|
||||
a=_mm_add_pd(a,_mm_sub_pd(_mm_mul_pd(b,c),_mm_mul_pd(d,e)))
|
||||
#define vneg(a) _mm_xor_pd(_mm_set1_pd(-0.),a)
|
||||
#define vload(a) _mm_set1_pd(a)
|
||||
#define vload_s(a) _mm_set1_ps(a)
|
||||
#define vabs(a) _mm_andnot_pd(_mm_set1_pd(-0.),a)
|
||||
#define vsqrt(a) _mm_sqrt_pd(a)
|
||||
#define vlt(a,b) _mm_cmplt_pd(a,b)
|
||||
#define vgt(a,b) _mm_cmpgt_pd(a,b)
|
||||
#define vge(a,b) _mm_cmpge_pd(a,b)
|
||||
#define vne(a,b) _mm_cmpneq_pd(a,b)
|
||||
#define vand(a,b) _mm_and_pd(a,b)
|
||||
#define vor(a,b) _mm_or_pd(a,b)
|
||||
#define vand_mask(a,b) _mm_and_pd(a,b)
|
||||
#define vmin(a,b) _mm_min_pd(a,b)
|
||||
#define vmax(a,b) _mm_max_pd(a,b);
|
||||
#define vanyTrue(a) (_mm_movemask_pd(a)!=0)
|
||||
#define vallTrue(a) (_mm_movemask_pd(a)==3)
|
||||
#if defined(__SSE4_1__)
|
||||
#define vblend(m,a,b) _mm_blendv_pd(b,a,m)
|
||||
#else
|
||||
static inline Tv vblend(Tv m, Tv a, Tv b)
|
||||
{ return _mm_or_pd(_mm_and_pd(a,m),_mm_andnot_pd(m,b)); }
|
||||
#endif
|
||||
#define vzero _mm_setzero_pd()
|
||||
#define vone _mm_set1_pd(1.)
|
||||
#define vloadu(p) _mm_loadu_pd(p)
|
||||
#define vloadu_s(p) _mm_loadu_ps(p)
|
||||
#define vstoreu(p, v) _mm_storeu_pd(p, v)
|
||||
#define vstoreu_s(p, v) _mm_storeu_ps(p, v)
|
||||
|
||||
#endif
|
||||
|
||||
#if (VLEN==4)
|
||||
|
||||
#include <immintrin.h>
|
||||
#ifdef __FMA4__
|
||||
#if (USE_FMA4)
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
typedef __m256d Tv;
|
||||
typedef __m256 Tv_s;
|
||||
typedef __m256d Tm;
|
||||
|
||||
#define vblend__(m,a,b) _mm256_blendv_pd(b,a,m)
|
||||
#define vzero _mm256_setzero_pd()
|
||||
#define vone _mm256_set1_pd(1.)
|
||||
|
||||
#define vadd(a,b) _mm256_add_pd(a,b)
|
||||
#define vadd_s(a,b) _mm256_add_ps(a,b)
|
||||
#define vaddeq(a,b) a=_mm256_add_pd(a,b)
|
||||
#define vaddeq_mask(mask,a,b) a=_mm256_add_pd(a,vblend__(mask,b,vzero))
|
||||
#define vsub(a,b) _mm256_sub_pd(a,b)
|
||||
#define vsub_s(a,b) _mm256_sub_ps(a,b)
|
||||
#define vsubeq(a,b) a=_mm256_sub_pd(a,b)
|
||||
#define vsubeq_mask(mask,a,b) a=_mm256_sub_pd(a,vblend__(mask,b,vzero))
|
||||
#define vmul(a,b) _mm256_mul_pd(a,b)
|
||||
#define vmul_s(a,b) _mm256_mul_ps(a,b)
|
||||
#define vmuleq(a,b) a=_mm256_mul_pd(a,b)
|
||||
#ifdef __FMA4__
|
||||
#define vmuleq_mask(mask,a,b) a=_mm256_mul_pd(a,vblend__(mask,b,vone))
|
||||
#if (USE_FMA4)
|
||||
#define vfmaeq(a,b,c) a=_mm256_macc_pd(b,c,a)
|
||||
#define vfmaeq_s(a,b,c) a=_mm256_macc_ps(b,c,a)
|
||||
#define vfmseq(a,b,c) a=_mm256_nmacc_pd(b,c,a)
|
||||
#define vfmaaeq(a,b,c,d,e) a=_mm256_macc_pd(d,e,_mm256_macc_pd(b,c,a))
|
||||
#define vfmaseq(a,b,c,d,e) a=_mm256_nmacc_pd(d,e,_mm256_macc_pd(b,c,a))
|
||||
#else
|
||||
#define vfmaeq(a,b,c) a=_mm256_add_pd(a,_mm256_mul_pd(b,c))
|
||||
#define vfmaeq_s(a,b,c) a=_mm256_add_ps(a,_mm256_mul_ps(b,c))
|
||||
#define vfmseq(a,b,c) a=_mm256_sub_pd(a,_mm256_mul_pd(b,c))
|
||||
#define vfmaaeq(a,b,c,d,e) \
|
||||
a=_mm256_add_pd(a,_mm256_add_pd(_mm256_mul_pd(b,c),_mm256_mul_pd(d,e)))
|
||||
@ -153,21 +193,62 @@ typedef __m256d Tv;
|
||||
#endif
|
||||
#define vneg(a) _mm256_xor_pd(_mm256_set1_pd(-0.),a)
|
||||
#define vload(a) _mm256_set1_pd(a)
|
||||
#define vload_s(a) _mm256_set1_ps(a)
|
||||
#define vabs(a) _mm256_andnot_pd(_mm256_set1_pd(-0.),a)
|
||||
#define vsqrt(a) _mm256_sqrt_pd(a)
|
||||
#define vlt(a,b) _mm256_cmp_pd(a,b,_CMP_LT_OQ)
|
||||
#define vgt(a,b) _mm256_cmp_pd(a,b,_CMP_GT_OQ)
|
||||
#define vge(a,b) _mm256_cmp_pd(a,b,_CMP_GE_OQ)
|
||||
#define vne(a,b) _mm256_cmp_pd(a,b,_CMP_NEQ_OQ)
|
||||
#define vand(a,b) _mm256_and_pd(a,b)
|
||||
#define vor(a,b) _mm256_or_pd(a,b)
|
||||
#define vand_mask(a,b) _mm256_and_pd(a,b)
|
||||
#define vmin(a,b) _mm256_min_pd(a,b)
|
||||
#define vmax(a,b) _mm256_max_pd(a,b)
|
||||
#define vanyTrue(a) (_mm256_movemask_pd(a)!=0)
|
||||
#define vallTrue(a) (_mm256_movemask_pd(a)==15)
|
||||
#define vblend(m,a,b) _mm256_blendv_pd(b,a,m)
|
||||
#define vzero _mm256_setzero_pd()
|
||||
#define vone _mm256_set1_pd(1.)
|
||||
|
||||
#define vloadu(p) _mm256_loadu_pd(p)
|
||||
#define vloadu_s(p) _mm256_loadu_ps(p)
|
||||
#define vstoreu(p, v) _mm256_storeu_pd(p, v)
|
||||
#define vstoreu_s(p, v) _mm256_storeu_ps(p, v)
|
||||
|
||||
#endif
|
||||
|
||||
#if (VLEN==8)
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
typedef __m512d Tv;
|
||||
typedef __mmask8 Tm;
|
||||
|
||||
#define vadd(a,b) _mm512_add_pd(a,b)
|
||||
#define vaddeq(a,b) a=_mm512_add_pd(a,b)
|
||||
#define vaddeq_mask(mask,a,b) a=_mm512_mask_add_pd(a,mask,a,b);
|
||||
#define vsub(a,b) _mm512_sub_pd(a,b)
|
||||
#define vsubeq(a,b) a=_mm512_sub_pd(a,b)
|
||||
#define vsubeq_mask(mask,a,b) a=_mm512_mask_sub_pd(a,mask,a,b);
|
||||
#define vmul(a,b) _mm512_mul_pd(a,b)
|
||||
#define vmuleq(a,b) a=_mm512_mul_pd(a,b)
|
||||
#define vmuleq_mask(mask,a,b) a=_mm512_mask_mul_pd(a,mask,a,b);
|
||||
#define vfmaeq(a,b,c) a=_mm512_fmadd_pd(b,c,a)
|
||||
#define vfmseq(a,b,c) a=_mm512_fnmadd_pd(b,c,a)
|
||||
#define vfmaaeq(a,b,c,d,e) a=_mm512_fmadd_pd(d,e,_mm512_fmadd_pd(b,c,a))
|
||||
#define vfmaseq(a,b,c,d,e) a=_mm512_fnmadd_pd(d,e,_mm512_fmadd_pd(b,c,a))
|
||||
#define vneg(a) _mm512_mul_pd(a,_mm512_set1_pd(-1.))
|
||||
#define vload(a) _mm512_set1_pd(a)
|
||||
#define vabs(a) (__m512d)_mm512_andnot_epi64((__m512i)_mm512_set1_pd(-0.),(__m512i)a)
|
||||
#define vsqrt(a) _mm512_sqrt_pd(a)
|
||||
#define vlt(a,b) _mm512_cmplt_pd_mask(a,b)
|
||||
#define vgt(a,b) _mm512_cmpnle_pd_mask(a,b)
|
||||
#define vge(a,b) _mm512_cmpnlt_pd_mask(a,b)
|
||||
#define vne(a,b) _mm512_cmpneq_pd_mask(a,b)
|
||||
#define vand_mask(a,b) ((a)&(b))
|
||||
#define vmin(a,b) _mm512_min_pd(a,b)
|
||||
#define vmax(a,b) _mm512_max_pd(a,b)
|
||||
#define vanyTrue(a) (a!=0)
|
||||
#define vallTrue(a) (a==255)
|
||||
|
||||
#define vzero _mm512_setzero_pd()
|
||||
#define vone _mm512_set1_pd(1.)
|
||||
|
||||
#endif
|
||||
|
||||
|
24
external/sharp/libsharp/sharp_vecutil.h
vendored
24
external/sharp/libsharp/sharp_vecutil.h
vendored
@ -25,14 +25,18 @@
|
||||
/*! \file sharp_vecutil.h
|
||||
* Functionality related to vector instruction support
|
||||
*
|
||||
* Copyright (C) 2012 Max-Planck-Society
|
||||
* Copyright (C) 2012,2013 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#ifndef SHARP_VECUTIL_H
|
||||
#define SHARP_VECUTIL_H
|
||||
|
||||
#if (defined (__AVX__))
|
||||
#ifndef VLEN
|
||||
|
||||
#if (defined (__MIC__))
|
||||
#define VLEN 8
|
||||
#elif (defined (__AVX__))
|
||||
#define VLEN 4
|
||||
#elif (defined (__SSE2__))
|
||||
#define VLEN 2
|
||||
@ -41,3 +45,19 @@
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if (VLEN==1)
|
||||
#define VLEN_s 1
|
||||
#else
|
||||
#define VLEN_s (2*VLEN)
|
||||
#endif
|
||||
|
||||
#ifndef USE_FMA4
|
||||
#ifdef __FMA4__
|
||||
#define USE_FMA4 1
|
||||
#else
|
||||
#define USE_FMA4 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
6
external/sharp/libsharp/sharp_ylmgen_c.c
vendored
6
external/sharp/libsharp/sharp_ylmgen_c.c
vendored
@ -25,7 +25,7 @@
|
||||
/*
|
||||
* Helper code for efficient calculation of Y_lm(theta,phi=0)
|
||||
*
|
||||
* Copyright (C) 2005-2012 Max-Planck-Society
|
||||
* Copyright (C) 2005-2014 Max-Planck-Society
|
||||
* Author: Martin Reinecke
|
||||
*/
|
||||
|
||||
@ -47,7 +47,9 @@ void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin)
|
||||
|
||||
gen->lmax = l_max;
|
||||
gen->mmax = m_max;
|
||||
UTIL_ASSERT(spin>=0,"incorrect spin");
|
||||
UTIL_ASSERT(spin>=0,"incorrect spin: must be nonnegative");
|
||||
UTIL_ASSERT(l_max>=spin,"incorrect l_max: must be >= spin");
|
||||
UTIL_ASSERT(l_max>=m_max,"incorrect l_max: must be >= m_max");
|
||||
gen->s = spin;
|
||||
UTIL_ASSERT((sharp_minscale<=0)&&(sharp_maxscale>0),
|
||||
"bad value for min/maxscale");
|
||||
|
1
external/sharp/python/fake_pyrex/Pyrex/Distutils/__init__.py
vendored
Normal file
1
external/sharp/python/fake_pyrex/Pyrex/Distutils/__init__.py
vendored
Normal file
@ -0,0 +1 @@
|
||||
# work around broken setuptools monkey patching
|
1
external/sharp/python/fake_pyrex/Pyrex/Distutils/build_ext.py
vendored
Normal file
1
external/sharp/python/fake_pyrex/Pyrex/Distutils/build_ext.py
vendored
Normal file
@ -0,0 +1 @@
|
||||
build_ext = "yes, it's there!"
|
1
external/sharp/python/fake_pyrex/Pyrex/__init__.py
vendored
Normal file
1
external/sharp/python/fake_pyrex/Pyrex/__init__.py
vendored
Normal file
@ -0,0 +1 @@
|
||||
# work around broken setuptools monkey patching
|
2
external/sharp/python/fake_pyrex/README
vendored
Normal file
2
external/sharp/python/fake_pyrex/README
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
This directory is here to fool setuptools into building .pyx files
|
||||
even if Pyrex is not installed. See ../setup.py.
|
1
external/sharp/python/libsharp/__init__.py
vendored
Normal file
1
external/sharp/python/libsharp/__init__.py
vendored
Normal file
@ -0,0 +1 @@
|
||||
from .libsharp import *
|
79
external/sharp/python/libsharp/libsharp.pxd
vendored
Normal file
79
external/sharp/python/libsharp/libsharp.pxd
vendored
Normal file
@ -0,0 +1,79 @@
|
||||
cdef extern from "sharp.h":
|
||||
ctypedef long ptrdiff_t
|
||||
|
||||
void sharp_legendre_transform_s(float *bl, float *recfac, ptrdiff_t lmax, float *x,
|
||||
float *out, ptrdiff_t nx)
|
||||
void sharp_legendre_transform(double *bl, double *recfac, ptrdiff_t lmax, double *x,
|
||||
double *out, ptrdiff_t nx)
|
||||
void sharp_legendre_transform_recfac(double *r, ptrdiff_t lmax)
|
||||
void sharp_legendre_transform_recfac_s(float *r, ptrdiff_t lmax)
|
||||
void sharp_legendre_roots(int n, double *x, double *w)
|
||||
|
||||
# sharp_lowlevel.h
|
||||
ctypedef struct sharp_alm_info:
|
||||
pass
|
||||
|
||||
ctypedef struct sharp_geom_info:
|
||||
pass
|
||||
|
||||
void sharp_make_alm_info (int lmax, int mmax, int stride,
|
||||
ptrdiff_t *mvstart, sharp_alm_info **alm_info)
|
||||
|
||||
void sharp_make_geom_info (int nrings, int *nph, ptrdiff_t *ofs,
|
||||
int *stride, double *phi0, double *theta,
|
||||
double *wgt, sharp_geom_info **geom_info)
|
||||
|
||||
void sharp_destroy_alm_info(sharp_alm_info *info)
|
||||
void sharp_destroy_geom_info(sharp_geom_info *info)
|
||||
|
||||
ptrdiff_t sharp_map_size(sharp_geom_info *info)
|
||||
ptrdiff_t sharp_alm_count(sharp_alm_info *self)
|
||||
|
||||
|
||||
ctypedef enum sharp_jobtype:
|
||||
SHARP_YtW
|
||||
SHARP_Yt
|
||||
SHARP_WY
|
||||
SHARP_Y
|
||||
|
||||
ctypedef enum:
|
||||
SHARP_DP
|
||||
SHARP_ADD
|
||||
|
||||
void sharp_execute(sharp_jobtype type_,
|
||||
int spin,
|
||||
void *alm,
|
||||
void *map,
|
||||
sharp_geom_info *geom_info,
|
||||
sharp_alm_info *alm_info,
|
||||
int ntrans,
|
||||
int flags,
|
||||
double *time,
|
||||
unsigned long long *opcnt) nogil
|
||||
|
||||
ctypedef enum:
|
||||
SHARP_ERROR_NO_MPI
|
||||
|
||||
int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
|
||||
void *alm, void *map, sharp_geom_info *geom_info,
|
||||
sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
unsigned long long *opcnt) nogil
|
||||
|
||||
|
||||
cdef extern from "sharp_geomhelpers.h":
|
||||
void sharp_make_subset_healpix_geom_info(
|
||||
int nside, int stride, int nrings,
|
||||
int *rings, double *weight, sharp_geom_info **geom_info)
|
||||
void sharp_make_gauss_geom_info(
|
||||
int nrings, int nphi, double phi0,
|
||||
int stride_lon, int stride_lat, sharp_geom_info **geom_info)
|
||||
|
||||
cdef extern from "sharp_almhelpers.h":
|
||||
void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
|
||||
sharp_alm_info **alm_info)
|
||||
void sharp_make_rectangular_alm_info (int lmax, int mmax, int stride,
|
||||
sharp_alm_info **alm_info)
|
||||
void sharp_make_mmajor_real_packed_alm_info (int lmax, int stride,
|
||||
int nm, const int *ms, sharp_alm_info **alm_info)
|
||||
|
||||
|
256
external/sharp/python/libsharp/libsharp.pyx
vendored
Normal file
256
external/sharp/python/libsharp/libsharp.pyx
vendored
Normal file
@ -0,0 +1,256 @@
|
||||
import numpy as np
|
||||
|
||||
__all__ = ['legendre_transform', 'legendre_roots', 'sht', 'synthesis', 'adjoint_synthesis',
|
||||
'analysis', 'adjoint_analysis', 'healpix_grid', 'triangular_order', 'rectangular_order',
|
||||
'packed_real_order']
|
||||
|
||||
|
||||
def legendre_transform(x, bl, out=None):
|
||||
if out is None:
|
||||
out = np.empty_like(x)
|
||||
if out.shape[0] == 0:
|
||||
return out
|
||||
elif x.dtype == np.float64:
|
||||
if bl.dtype != np.float64:
|
||||
bl = bl.astype(np.float64)
|
||||
return _legendre_transform(x, bl, out=out)
|
||||
elif x.dtype == np.float32:
|
||||
if bl.dtype != np.float32:
|
||||
bl = bl.astype(np.float32)
|
||||
return _legendre_transform_s(x, bl, out=out)
|
||||
else:
|
||||
raise ValueError("unsupported dtype")
|
||||
|
||||
|
||||
def _legendre_transform(double[::1] x, double[::1] bl, double[::1] out):
|
||||
if out.shape[0] != x.shape[0]:
|
||||
raise ValueError('x and out must have same shape')
|
||||
sharp_legendre_transform(&bl[0], NULL, bl.shape[0] - 1, &x[0], &out[0], x.shape[0])
|
||||
return np.asarray(out)
|
||||
|
||||
|
||||
def _legendre_transform_s(float[::1] x, float[::1] bl, float[::1] out):
|
||||
if out.shape[0] != x.shape[0]:
|
||||
raise ValueError('x and out must have same shape')
|
||||
sharp_legendre_transform_s(&bl[0], NULL, bl.shape[0] - 1, &x[0], &out[0], x.shape[0])
|
||||
return np.asarray(out)
|
||||
|
||||
|
||||
def legendre_roots(n):
|
||||
x = np.empty(n, np.double)
|
||||
w = np.empty(n, np.double)
|
||||
cdef double[::1] x_buf = x, w_buf = w
|
||||
if not (x_buf.shape[0] == w_buf.shape[0] == n):
|
||||
raise AssertionError()
|
||||
if n > 0:
|
||||
sharp_legendre_roots(n, &x_buf[0], &w_buf[0])
|
||||
return x, w
|
||||
|
||||
|
||||
JOBTYPE_TO_CONST = {
|
||||
'Y': SHARP_Y,
|
||||
'Yt': SHARP_Yt,
|
||||
'WY': SHARP_WY,
|
||||
'YtW': SHARP_YtW
|
||||
}
|
||||
|
||||
def sht(jobtype, geom_info ginfo, alm_info ainfo, double[:, :, ::1] input,
|
||||
int spin=0, comm=None, add=False):
|
||||
cdef void *comm_ptr
|
||||
cdef int flags = SHARP_DP | (SHARP_ADD if add else 0)
|
||||
cdef int r
|
||||
cdef sharp_jobtype jobtype_i
|
||||
cdef double[:, :, ::1] output_buf
|
||||
cdef int ntrans = input.shape[0] * input.shape[1]
|
||||
cdef int i, j
|
||||
|
||||
if spin == 0 and input.shape[1] != 1:
|
||||
raise ValueError('For spin == 0, we need input.shape[1] == 1')
|
||||
elif spin != 0 and input.shape[1] != 2:
|
||||
raise ValueError('For spin != 0, we need input.shape[1] == 2')
|
||||
|
||||
|
||||
cdef size_t[::1] ptrbuf = np.empty(2 * ntrans, dtype=np.uintp)
|
||||
cdef double **alm_ptrs = <double**>&ptrbuf[0]
|
||||
cdef double **map_ptrs = <double**>&ptrbuf[ntrans]
|
||||
|
||||
try:
|
||||
jobtype_i = JOBTYPE_TO_CONST[jobtype]
|
||||
except KeyError:
|
||||
raise ValueError('jobtype must be one of: %s' % ', '.join(sorted(JOBTYPE_TO_CONST.keys())))
|
||||
|
||||
if jobtype_i == SHARP_Y or jobtype_i == SHARP_WY:
|
||||
output = np.empty((input.shape[0], input.shape[1], ginfo.local_size()), dtype=np.float64)
|
||||
output_buf = output
|
||||
for i in range(input.shape[0]):
|
||||
for j in range(input.shape[1]):
|
||||
alm_ptrs[i * input.shape[1] + j] = &input[i, j, 0]
|
||||
map_ptrs[i * input.shape[1] + j] = &output_buf[i, j, 0]
|
||||
else:
|
||||
output = np.empty((input.shape[0], input.shape[1], ainfo.local_size()), dtype=np.float64)
|
||||
output_buf = output
|
||||
for i in range(input.shape[0]):
|
||||
for j in range(input.shape[1]):
|
||||
alm_ptrs[i * input.shape[1] + j] = &output_buf[i, j, 0]
|
||||
map_ptrs[i * input.shape[1] + j] = &input[i, j, 0]
|
||||
|
||||
if comm is None:
|
||||
with nogil:
|
||||
sharp_execute (
|
||||
jobtype_i,
|
||||
geom_info=ginfo.ginfo, alm_info=ainfo.ainfo,
|
||||
spin=spin, alm=alm_ptrs, map=map_ptrs,
|
||||
ntrans=ntrans, flags=flags, time=NULL, opcnt=NULL)
|
||||
else:
|
||||
from mpi4py import MPI
|
||||
if not isinstance(comm, MPI.Comm):
|
||||
raise TypeError('comm must be an mpi4py communicator')
|
||||
from .libsharp_mpi import _addressof
|
||||
comm_ptr = <void*><size_t>_addressof(comm)
|
||||
with nogil:
|
||||
r = sharp_execute_mpi_maybe (
|
||||
comm_ptr, jobtype_i,
|
||||
geom_info=ginfo.ginfo, alm_info=ainfo.ainfo,
|
||||
spin=spin, alm=alm_ptrs, map=map_ptrs,
|
||||
ntrans=ntrans, flags=flags, time=NULL, opcnt=NULL)
|
||||
if r == SHARP_ERROR_NO_MPI:
|
||||
raise Exception('MPI requested, but not available')
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def synthesis(*args, **kw):
|
||||
return sht('Y', *args, **kw)
|
||||
|
||||
def adjoint_synthesis(*args, **kw):
|
||||
return sht('Yt', *args, **kw)
|
||||
|
||||
def analysis(*args, **kw):
|
||||
return sht('YtW', *args, **kw)
|
||||
|
||||
def adjoint_analysis(*args, **kw):
|
||||
return sht('WY', *args, **kw)
|
||||
|
||||
|
||||
#
|
||||
# geom_info
|
||||
#
|
||||
class NotInitializedError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
cdef class geom_info:
|
||||
cdef sharp_geom_info *ginfo
|
||||
|
||||
def __cinit__(self, *args, **kw):
|
||||
self.ginfo = NULL
|
||||
|
||||
def local_size(self):
|
||||
if self.ginfo == NULL:
|
||||
raise NotInitializedError()
|
||||
return sharp_map_size(self.ginfo)
|
||||
|
||||
def __dealloc__(self):
|
||||
if self.ginfo != NULL:
|
||||
sharp_destroy_geom_info(self.ginfo)
|
||||
self.ginfo = NULL
|
||||
|
||||
|
||||
cdef class healpix_grid(geom_info):
|
||||
|
||||
_weight_cache = {} # { (nside, 'T'/'Q'/'U') -> numpy array of ring weights cached from file }
|
||||
|
||||
def __init__(self, int nside, stride=1, int[::1] rings=None, double[::1] weights=None):
|
||||
if weights is not None and weights.shape[0] != 2 * nside:
|
||||
raise ValueError('weights must have length 2 * nside')
|
||||
sharp_make_subset_healpix_geom_info(nside, stride,
|
||||
nrings=4 * nside - 1 if rings is None else rings.shape[0],
|
||||
rings=NULL if rings is None else &rings[0],
|
||||
weight=NULL if weights is None else &weights[0],
|
||||
geom_info=&self.ginfo)
|
||||
|
||||
@classmethod
|
||||
def load_ring_weights(cls, nside, fields):
|
||||
"""
|
||||
Loads HEALPix ring weights from file. The environment variable
|
||||
HEALPIX should be set, and this routine will look in the `data`
|
||||
subdirectory.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
nside: int
|
||||
HEALPix nside parameter
|
||||
|
||||
fields: tuple of str
|
||||
Which weights to extract; pass ('T',) to only get scalar
|
||||
weights back, or ('T', 'Q', 'U') to get all the weights
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
List of NumPy arrays, according to fields parameter.
|
||||
|
||||
"""
|
||||
import os
|
||||
from astropy.io import fits
|
||||
data_path = os.path.join(os.environ['HEALPIX'], 'data')
|
||||
fits_field_names = {
|
||||
'T': 'TEMPERATURE WEIGHTS',
|
||||
'Q': 'Q-POLARISATION WEIGHTS',
|
||||
'U': 'U-POLARISATION WEIGHTS'}
|
||||
|
||||
must_load = [field for field in fields if (nside, field) not in cls._weight_cache]
|
||||
|
||||
if must_load:
|
||||
hdulist = fits.open(os.path.join(data_path, 'weight_ring_n%05d.fits' % nside))
|
||||
try:
|
||||
for field in must_load:
|
||||
w = hdulist[1].data.field(fits_field_names[field]).ravel().astype(np.double)
|
||||
w += 1
|
||||
cls._weight_cache[nside, field] = w
|
||||
finally:
|
||||
hdulist.close()
|
||||
return [cls._weight_cache[(nside, field)].copy() for field in fields]
|
||||
|
||||
#
|
||||
# alm_info
|
||||
#
|
||||
|
||||
|
||||
cdef class alm_info:
|
||||
cdef sharp_alm_info *ainfo
|
||||
|
||||
def __cinit__(self, *args, **kw):
|
||||
self.ainfo = NULL
|
||||
|
||||
def local_size(self):
|
||||
if self.ainfo == NULL:
|
||||
raise NotInitializedError()
|
||||
return sharp_alm_count(self.ainfo)
|
||||
|
||||
def __dealloc__(self):
|
||||
if self.ainfo != NULL:
|
||||
sharp_destroy_alm_info(self.ainfo)
|
||||
self.ainfo = NULL
|
||||
|
||||
|
||||
cdef class triangular_order(alm_info):
|
||||
def __init__(self, int lmax, mmax=None, stride=1):
|
||||
mmax = mmax if mmax is not None else lmax
|
||||
sharp_make_triangular_alm_info(lmax, mmax, stride, &self.ainfo)
|
||||
|
||||
|
||||
cdef class rectangular_order(alm_info):
|
||||
def __init__(self, int lmax, mmax=None, stride=1):
|
||||
mmax = mmax if mmax is not None else lmax
|
||||
sharp_make_rectangular_alm_info(lmax, mmax, stride, &self.ainfo)
|
||||
|
||||
|
||||
cdef class packed_real_order(alm_info):
|
||||
def __init__(self, int lmax, stride=1, int[::1] ms=None):
|
||||
sharp_make_mmajor_real_packed_alm_info(lmax=lmax, stride=stride,
|
||||
nm=lmax + 1 if ms is None else ms.shape[0],
|
||||
ms=NULL if ms is None else &ms[0],
|
||||
alm_info=&self.ainfo)
|
||||
|
17
external/sharp/python/libsharp/libsharp_mpi.pyx
vendored
Normal file
17
external/sharp/python/libsharp/libsharp_mpi.pyx
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
cdef extern from "mpi.h":
|
||||
ctypedef void *MPI_Comm
|
||||
|
||||
cdef extern from "Python.h":
|
||||
object PyLong_FromVoidPtr(void*)
|
||||
|
||||
cdef extern:
|
||||
ctypedef class mpi4py.MPI.Comm [object PyMPICommObject]:
|
||||
cdef MPI_Comm ob_mpi
|
||||
cdef unsigned flags
|
||||
|
||||
# For compatibility with mpi4py <= 1.3.1
|
||||
# Newer versions could use the MPI._addressof function
|
||||
def _addressof(Comm comm):
|
||||
cdef void *ptr = NULL
|
||||
ptr = <void*>&comm.ob_mpi
|
||||
return PyLong_FromVoidPtr(ptr)
|
1
external/sharp/python/libsharp/tests/__init__.py
vendored
Normal file
1
external/sharp/python/libsharp/tests/__init__.py
vendored
Normal file
@ -0,0 +1 @@
|
||||
# empty
|
59
external/sharp/python/libsharp/tests/test_legendre.py
vendored
Normal file
59
external/sharp/python/libsharp/tests/test_legendre.py
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
import numpy as np
|
||||
from scipy.special import legendre
|
||||
from scipy.special import p_roots
|
||||
import libsharp
|
||||
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
|
||||
def check_legendre_transform(lmax, ntheta):
|
||||
l = np.arange(lmax + 1)
|
||||
if lmax >= 1:
|
||||
sigma = -np.log(1e-3) / lmax / (lmax + 1)
|
||||
bl = np.exp(-sigma*l*(l+1))
|
||||
bl *= (2 * l + 1)
|
||||
else:
|
||||
bl = np.asarray([1], dtype=np.double)
|
||||
|
||||
theta = np.linspace(0, np.pi, ntheta, endpoint=True)
|
||||
x = np.cos(theta)
|
||||
|
||||
# Compute truth using scipy.special.legendre
|
||||
P = np.zeros((ntheta, lmax + 1))
|
||||
for l in range(lmax + 1):
|
||||
P[:, l] = legendre(l)(x)
|
||||
y0 = np.dot(P, bl)
|
||||
|
||||
|
||||
# double-precision
|
||||
y = libsharp.legendre_transform(x, bl)
|
||||
|
||||
assert_allclose(y, y0, rtol=1e-12, atol=1e-12)
|
||||
|
||||
# single-precision
|
||||
y32 = libsharp.legendre_transform(x.astype(np.float32), bl)
|
||||
assert_allclose(y, y0, rtol=1e-5, atol=1e-5)
|
||||
|
||||
|
||||
def test_legendre_transform():
|
||||
nthetas_to_try = [0, 9, 17, 19] + list(np.random.randint(500, size=20))
|
||||
for ntheta in nthetas_to_try:
|
||||
for lmax in [0, 1, 2, 3, 20] + list(np.random.randint(50, size=4)):
|
||||
yield check_legendre_transform, lmax, ntheta
|
||||
|
||||
def check_legendre_roots(n):
|
||||
xs, ws = ([], []) if n == 0 else p_roots(n) # from SciPy
|
||||
xl, wl = libsharp.legendre_roots(n)
|
||||
assert_allclose(xs, xl, rtol=1e-14, atol=1e-14)
|
||||
assert_allclose(ws, wl, rtol=1e-14, atol=1e-14)
|
||||
|
||||
def test_legendre_roots():
|
||||
"""
|
||||
Test the Legendre root-finding algorithm from libsharp by comparing it with
|
||||
the SciPy version.
|
||||
"""
|
||||
yield check_legendre_roots, 0
|
||||
yield check_legendre_roots, 1
|
||||
yield check_legendre_roots, 32
|
||||
yield check_legendre_roots, 33
|
||||
yield check_legendre_roots, 128
|
34
external/sharp/python/libsharp/tests/test_sht.py
vendored
Normal file
34
external/sharp/python/libsharp/tests/test_sht.py
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
import numpy as np
|
||||
import healpy
|
||||
from scipy.special import legendre
|
||||
from scipy.special import p_roots
|
||||
from numpy.testing import assert_allclose
|
||||
import libsharp
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
|
||||
def test_basic():
|
||||
lmax = 10
|
||||
nside = 8
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
ms = np.arange(rank, lmax + 1, MPI.COMM_WORLD.Get_size(), dtype=np.int32)
|
||||
|
||||
order = libsharp.packed_real_order(lmax, ms=ms)
|
||||
grid = libsharp.healpix_grid(nside)
|
||||
|
||||
|
||||
alm = np.zeros(order.local_size())
|
||||
if rank == 0:
|
||||
alm[0] = 1
|
||||
elif rank == 1:
|
||||
alm[0] = 1
|
||||
|
||||
|
||||
map = libsharp.synthesis(grid, order, np.repeat(alm[None, None, :], 3, 0), comm=MPI.COMM_WORLD)
|
||||
assert np.all(map[2, :] == map[1, :]) and np.all(map[1, :] == map[0, :])
|
||||
map = map[0, 0, :]
|
||||
if rank == 0:
|
||||
healpy.mollzoom(map)
|
||||
from matplotlib.pyplot import show
|
||||
show()
|
77
external/sharp/python/setup.py
vendored
Normal file
77
external/sharp/python/setup.py
vendored
Normal file
@ -0,0 +1,77 @@
|
||||
#! /usr/bin/env python
|
||||
|
||||
descr = """Spherical Harmionic transforms package
|
||||
|
||||
Python API for the libsharp spherical harmonic transforms library
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
DISTNAME = 'libsharp'
|
||||
DESCRIPTION = 'libsharp library for fast Spherical Harmonic Transforms'
|
||||
LONG_DESCRIPTION = descr
|
||||
MAINTAINER = 'Dag Sverre Seljebotn',
|
||||
MAINTAINER_EMAIL = 'd.s.seljebotn@astro.uio.no',
|
||||
URL = 'http://sourceforge.net/projects/libsharp/'
|
||||
LICENSE = 'GPL'
|
||||
DOWNLOAD_URL = "http://sourceforge.net/projects/libsharp/"
|
||||
VERSION = '0.1'
|
||||
|
||||
# Add our fake Pyrex at the end of the Python search path
|
||||
# in order to fool setuptools into allowing compilation of
|
||||
# pyx files to C files. Importing Cython.Distutils then
|
||||
# makes Cython the tool of choice for this rather than
|
||||
# (the possibly nonexisting) Pyrex.
|
||||
project_path = os.path.split(__file__)[0]
|
||||
sys.path.append(os.path.join(project_path, 'fake_pyrex'))
|
||||
|
||||
from setuptools import setup, find_packages, Extension
|
||||
from Cython.Distutils import build_ext
|
||||
import numpy as np
|
||||
|
||||
libsharp = os.environ.get('LIBSHARP', None)
|
||||
libsharp_include = os.environ.get('LIBSHARP_INCLUDE', libsharp and os.path.join(libsharp, 'include'))
|
||||
libsharp_lib = os.environ.get('LIBSHARP_LIB', libsharp and os.path.join(libsharp, 'lib'))
|
||||
|
||||
if libsharp_include is None or libsharp_lib is None:
|
||||
sys.stderr.write('Please set LIBSHARP environment variable to the install directly of libsharp, '
|
||||
'this script will refer to the lib and include sub-directories. Alternatively '
|
||||
'set LIBSHARP_INCLUDE and LIBSHARP_LIB\n')
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup(install_requires = ['numpy'],
|
||||
packages = find_packages(),
|
||||
test_suite="nose.collector",
|
||||
# Well, technically zipping the package will work, but since it's
|
||||
# all compiled code it'll just get unzipped again at runtime, which
|
||||
# is pointless:
|
||||
zip_safe = False,
|
||||
name = DISTNAME,
|
||||
version = VERSION,
|
||||
maintainer = MAINTAINER,
|
||||
maintainer_email = MAINTAINER_EMAIL,
|
||||
description = DESCRIPTION,
|
||||
license = LICENSE,
|
||||
url = URL,
|
||||
download_url = DOWNLOAD_URL,
|
||||
long_description = LONG_DESCRIPTION,
|
||||
classifiers =
|
||||
[ 'Development Status :: 3 - Alpha',
|
||||
'Environment :: Console',
|
||||
'Intended Audience :: Developers',
|
||||
'Intended Audience :: Science/Research',
|
||||
'License :: OSI Approved :: GNU General Public License (GPL)',
|
||||
'Topic :: Scientific/Engineering'],
|
||||
cmdclass = {"build_ext": build_ext},
|
||||
ext_modules = [
|
||||
Extension("libsharp.libsharp",
|
||||
["libsharp/libsharp.pyx"],
|
||||
libraries=["sharp", "fftpack", "c_utils"],
|
||||
include_dirs=[libsharp_include],
|
||||
library_dirs=[libsharp_lib],
|
||||
extra_link_args=["-fopenmp"],
|
||||
)
|
||||
],
|
||||
)
|
19
external/sharp/runjinja.py
vendored
Executable file
19
external/sharp/runjinja.py
vendored
Executable file
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
Preprocesses foo.c.in to foo.c. Reads STDIN and writes STDOUT.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import hashlib
|
||||
from jinja2 import Template, Environment
|
||||
|
||||
env = Environment(block_start_string='/*{',
|
||||
block_end_string='}*/',
|
||||
variable_start_string='{{',
|
||||
variable_end_string='}}')
|
||||
|
||||
extra_vars = dict(len=len)
|
||||
input = sys.stdin.read()
|
||||
sys.stdout.write('/* DO NOT EDIT. md5sum of source: %s */' % hashlib.md5(input).hexdigest())
|
||||
sys.stdout.write(env.from_string(input).render(**extra_vars))
|
Loading…
Reference in New Issue
Block a user