Merge branch 'pol_ispack' into 'master'
Pol ispack See merge request mtr/libsharp!16
This commit is contained in:
commit
ea75d4f65b
85 changed files with 4561 additions and 10046 deletions
46
.gitignore
vendored
46
.gitignore
vendored
|
@ -1,17 +1,35 @@
|
|||
*.o
|
||||
*.so
|
||||
#*
|
||||
*~
|
||||
*.pyc
|
||||
*.pyo
|
||||
|
||||
**.o
|
||||
**.lo
|
||||
**.la
|
||||
**.so
|
||||
**/#*
|
||||
**~
|
||||
**.pyc
|
||||
**.pyo
|
||||
.libs
|
||||
**/.deps
|
||||
**/.dirstamp
|
||||
libsharp-uninstalled.pc
|
||||
libsharp-uninstalled.sh
|
||||
libsharp.pc
|
||||
libsharp.pc.in
|
||||
perf.data*
|
||||
/auto
|
||||
/autom4te.cache
|
||||
/config.log
|
||||
/config.status
|
||||
/config/config.auto
|
||||
/m4
|
||||
config.log
|
||||
config.guess
|
||||
config.status
|
||||
config.sub
|
||||
ltmain.sh
|
||||
compile
|
||||
libtool
|
||||
missing
|
||||
/comp
|
||||
/configure
|
||||
/sharp_oracle.inc
|
||||
|
||||
/python/libsharp/libsharp.c
|
||||
/python/libsharp/libsharp_mpi.c
|
||||
/Makefile
|
||||
/Makefile.in
|
||||
/aclocal.m4
|
||||
/ar-lib
|
||||
/depcomp
|
||||
/install-sh
|
||||
|
|
85
COMPILE
Normal file
85
COMPILE
Normal file
|
@ -0,0 +1,85 @@
|
|||
Libsharp is configured, compiled and installed using GNU autotools.
|
||||
|
||||
If you have cloned the libsharp repository, you have to run
|
||||
"autoreconf -i" before starting the configuration, which requires several
|
||||
GNU developer tools to be available on your system.
|
||||
|
||||
When using a release tarball, configuration is done via
|
||||
|
||||
[CC=...] [CFLAGS=...] ./configure
|
||||
|
||||
The following sections briefly describe possible choices for compilers and
|
||||
flags.
|
||||
|
||||
|
||||
Fast math
|
||||
---------
|
||||
|
||||
Specifying "-ffast-math" is important for all compilers, since it allows the
|
||||
compiler to fuse multiplications and additions into FMA instructions, which is
|
||||
forbidden by the C99 standard. Since FMAs are a central aspect of the algorithm,
|
||||
they are needed for optimum performance.
|
||||
|
||||
If you are calling libsharp from other code which requires strict adherence
|
||||
to the C99 standard, you should still be able to compile libsharp with
|
||||
"-ffast-math" without any problems.
|
||||
|
||||
|
||||
Runtime CPU selection with gcc
|
||||
------------------------------
|
||||
|
||||
When using a recent gcc (6.0 and newer) on an x86_64 platform, the build
|
||||
machinery will compile the time-critical functions for several different
|
||||
architectures (SSE2, AVX, AVX2, FMA3, FMA4, AVX512F), and the appropriate
|
||||
implementation will be selected at runtime.
|
||||
This only happens if you do _not_ explicitly specify a target architecture via
|
||||
the compiler flags. I.e., please do _not_ specify "-march=native" or
|
||||
"-mtarget=avx" or similar if you want a portable binary that will run
|
||||
efficiently on different x86_64 CPUs.
|
||||
If you are compiling libsharp for a particular target CPU only, or if you are
|
||||
using a different compiler, however, "-march-native" should be used. The
|
||||
resulting binary will most likely not run on other computers, though.
|
||||
|
||||
|
||||
OpenMP
|
||||
------
|
||||
|
||||
OpenMP should be switched on for maximum performance, and at runtime
|
||||
OMP_NUM_THREADS should be set to the number of hardware threads (not physical
|
||||
cores) of the system.
|
||||
(Usually this is already the default setting when OMP_NUM_THREADS is not
|
||||
specified.)
|
||||
|
||||
|
||||
MPI
|
||||
---
|
||||
|
||||
MPI support is enabled by using the MPI compiler (typically "mpicc") _and_
|
||||
adding the flag "-DUSE_MPI".
|
||||
When using MPI and OpenMP simultaneously, the product of MPI tasks per node
|
||||
and OMP_NUM_THREADS should be equal to the number of hardware threads available
|
||||
on the node. One MPI task per node should result in the best performance.
|
||||
|
||||
|
||||
Example configure invocations
|
||||
=============================
|
||||
|
||||
GCC, OpenMP, portable binary:
|
||||
CFLAGS="-std=c99 -O3 -ffast-math -fopenmp" ./configure
|
||||
|
||||
GCC, no OpenMP, portable binary:
|
||||
CFLAGS="-std=c99 -O3 -ffast-math" ./configure
|
||||
|
||||
Clang, OpenMP, nonportable binary:
|
||||
CC=clang CFLAGS="-std=c99 -O3 -march=native -ffast-math -fopenmp" ./configure
|
||||
|
||||
Intel C compiler, OpenMP, nonportable binary:
|
||||
CC=icc CFLAGS="-std=c99 -O3 -march=native -ffast-math -fopenmp" ./configure
|
||||
|
||||
MPI support, nonportable binary:
|
||||
CC=mpicc CFLAGS="-DUSE_MPI -std=c99 -O3 -march=native -ffast-math" ./configure
|
||||
|
||||
Additional GCC flags for pedantic warning and debugging:
|
||||
|
||||
-Wall -Wextra -Wshadow -Wmissing-prototypes -Wfatal-errors -pedantic -g
|
||||
-fsanitize=address
|
80
Makefile
80
Makefile
|
@ -1,80 +0,0 @@
|
|||
SHARP_TARGET?=auto
|
||||
ifndef SHARP_TARGET
|
||||
SHARP_TARGET:=$(error SHARP_TARGET undefined. Please see README.compilation for help)UNDEFINED
|
||||
endif
|
||||
|
||||
default: compile_all
|
||||
SRCROOT:=$(shell pwd)
|
||||
include $(SRCROOT)/config/config.$(SHARP_TARGET)
|
||||
include $(SRCROOT)/config/rules.common
|
||||
|
||||
all_hdr:=
|
||||
all_lib:=
|
||||
all_cbin:=
|
||||
|
||||
FULL_INCLUDE:=
|
||||
|
||||
include c_utils/planck.make
|
||||
include libfftpack/planck.make
|
||||
include libsharp/planck.make
|
||||
include docsrc/planck.make
|
||||
|
||||
CYTHON_MODULES=python/libsharp/libsharp.so $(if $(MPI_CFLAGS), python/libsharp/libsharp_mpi.so)
|
||||
|
||||
$(all_lib): %: | $(LIBDIR)_mkdir
|
||||
@echo "# creating library $*"
|
||||
$(ARCREATE) $@ $^
|
||||
|
||||
$(all_cbin): %: | $(BINDIR)_mkdir
|
||||
@echo "# linking C binary $*"
|
||||
$(CL) -o $@ $^ $(CLFLAGS)
|
||||
|
||||
compile_all: $(all_cbin) hdrcopy
|
||||
|
||||
hdrclean:
|
||||
@if [ -d $(INCDIR) ]; then rm -rf $(INCDIR)/* ; fi
|
||||
|
||||
hdrcopy: | $(INCDIR)_mkdir
|
||||
@if [ "$(all_hdr)" ]; then cp -p $(all_hdr) $(INCDIR); fi
|
||||
|
||||
$(notdir $(all_cbin)) : % : $(BINDIR)/%
|
||||
|
||||
test: compile_all
|
||||
$(BINDIR)/sharp_testsuite acctest && \
|
||||
$(BINDIR)/sharp_testsuite test healpix 2048 -1 1024 -1 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test fejer1 2047 -1 -1 4096 2 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 2047 -1 -1 4096 0 2
|
||||
|
||||
perftest: compile_all
|
||||
$(BINDIR)/sharp_testsuite test healpix 2048 -1 1024 -1 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 63 -1 -1 128 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 127 -1 -1 256 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 255 -1 -1 512 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 511 -1 -1 1024 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 1023 -1 -1 2048 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 2047 -1 -1 4096 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 4095 -1 -1 8192 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 8191 -1 -1 16384 0 1
|
||||
|
||||
%.c: %.c.in
|
||||
# Only do this if the md5sum changed, in order to avoid Python and Jinja
|
||||
# dependency when not modifying the c.in file
|
||||
grep `md5sum $< | cut -d ' ' -f 1` $@ || ./runjinja.py < $< > $@
|
||||
|
||||
genclean:
|
||||
rm libsharp/sharp_legendre.c || exit 0
|
||||
|
||||
$(CYTHON_MODULES): %.so: %.pyx
|
||||
ifndef PIC_CFLAGS
|
||||
$(error Python extension must be built using the --enable-pic configure option.)
|
||||
endif
|
||||
cython $<
|
||||
$(CC) $(DEBUG_CFLAGS) $(OPENMP_CFLAGS) $(PIC_CFLAGS) `python-config --cflags` -I$(INCDIR) -o $(<:.pyx=.o) -c $(<:.pyx=.c)
|
||||
$(CL) -shared $(<:.pyx=.o) $(OPENMP_CFLAGS) $(CYTHON_OBJ) -L$(LIBDIR) -lsharp -lfftpack -lc_utils -L`python-config --prefix`/lib `python-config --ldflags` -o $@
|
||||
|
||||
python: $(all_lib) hdrcopy $(CYTHON_MODULES)
|
||||
|
||||
# the following test files are automatic; the sht wrapper test
|
||||
# must be run manually and requires MPI at the moment..
|
||||
pytest: python
|
||||
cd python && nosetests --nocapture libsharp/tests/test_legendre_table.py libsharp/tests/test_legendre.py
|
49
Makefile.am
Normal file
49
Makefile.am
Normal file
|
@ -0,0 +1,49 @@
|
|||
ACLOCAL_AMFLAGS = -I m4
|
||||
|
||||
lib_LTLIBRARIES = libsharp.la
|
||||
|
||||
src_sharp = \
|
||||
c_utils/c_utils.c \
|
||||
c_utils/c_utils.h \
|
||||
pocketfft/pocketfft.c \
|
||||
pocketfft/pocketfft.h \
|
||||
libsharp/sharp.c \
|
||||
libsharp/sharp_almhelpers.c \
|
||||
libsharp/sharp_core.c \
|
||||
libsharp/sharp_core_avx.c \
|
||||
libsharp/sharp_core_avx2.c \
|
||||
libsharp/sharp_core_fma.c \
|
||||
libsharp/sharp_core_fma4.c \
|
||||
libsharp/sharp_core_avx512f.c \
|
||||
libsharp/sharp_geomhelpers.c \
|
||||
libsharp/sharp_legendre_roots.c \
|
||||
libsharp/sharp_ylmgen_c.c \
|
||||
libsharp/sharp_internal.h \
|
||||
libsharp/sharp_legendre_roots.h \
|
||||
libsharp/sharp_vecsupport.h \
|
||||
libsharp/sharp_ylmgen_c.h
|
||||
|
||||
include_HEADERS = \
|
||||
libsharp/sharp.h \
|
||||
libsharp/sharp_geomhelpers.h \
|
||||
libsharp/sharp_almhelpers.h \
|
||||
libsharp/sharp_cxx.h
|
||||
|
||||
EXTRA_DIST = \
|
||||
libsharp/sharp_core_inc.c \
|
||||
runtest.sh
|
||||
|
||||
libsharp_la_SOURCES = $(src_sharp)
|
||||
|
||||
check_PROGRAMS = sharp_testsuite
|
||||
sharp_testsuite_SOURCES = libsharp/sharp_testsuite.c c_utils/memusage.c c_utils/memusage.h c_utils/walltime_c.c c_utils/walltime_c.h
|
||||
sharp_testsuite_LDADD = libsharp.la
|
||||
|
||||
TESTS = runtest.sh
|
||||
|
||||
AM_CFLAGS = -I$(top_srcdir)/c_utils -I$(top_srcdir)/libsharp @AM_CFLAGS@
|
||||
|
||||
pkgconfigdir = $(libdir)/pkgconfig
|
||||
nodist_pkgconfig_DATA = @PACKAGE_NAME@.pc
|
||||
|
||||
DISTCLEANFILES=@PACKAGE_NAME@.pc @PACKAGE_NAME@.pc.in @PACKAGE_NAME@-uninstalled.pc @PACKAGE_NAME@-uninstalled.sh
|
60
README.md
60
README.md
|
@ -1,43 +1,35 @@
|
|||
# Libsharp
|
||||
|
||||
*IMPORTANT NOTE*: It appears that the default branch upon cloning from
|
||||
github.com/dagss/libsharp was an outdated 'dagss' branch instead of
|
||||
the 'master' branch. To get the latest copy,
|
||||
please do `git checkout master; git pull`. New clones are no longer affected.
|
||||
Library for efficient spherical harmonic transforms at arbitrary spins,
|
||||
supporting CPU vectorization, OpenMP and MPI.
|
||||
|
||||
## Paper
|
||||
|
||||
https://arxiv.org/abs/1303.4945
|
||||
|
||||
## News
|
||||
|
||||
### January 2019
|
||||
|
||||
This update features significant speedups thanks to important algorithmic
|
||||
discoveries by Keiichi Ishioka
|
||||
(https://www.jstage.jst.go.jp/article/jmsj/96/2/96_2018-019/_article and
|
||||
personal communication).
|
||||
|
||||
These improvements reduce the fraction of CPU time spent on evaluating the
|
||||
recurrences for Y_lm coefficients, which means that computing multiple
|
||||
simultaneous SHTs no longer have a big performance advantage compared to SHTs
|
||||
done one after the other.
|
||||
As a consequence, libsharp support for simultaneous SHTs was dropped, making
|
||||
its interface much simpler.
|
||||
|
||||
With the proper compilers and flags (see the file COMPILE for details) libsharp
|
||||
is now built with support for SSE2, AVX, AVX2, FMA3, FMA4 and AVX512f and the
|
||||
appropriate implementation is selected dynamically at runtime. This should
|
||||
provide a very significant performance boost for everyone using pre-compiled
|
||||
portable binaries.
|
||||
|
||||
## Compilation
|
||||
|
||||
GNU make is required for compilation.
|
||||
|
||||
Libsharp compilation has been successfully tested with GNU and Intel compilers.
|
||||
When using gcc, version 4.x is required [1].
|
||||
Since libsharp was written in standard C99, other compilers should work fine,
|
||||
but SSE2/AVX support will most likely be deactivated.
|
||||
|
||||
If you obtained libsharp directly from the git repository, you will also
|
||||
need a copy of the GNU autotools. In this case, run "autoconf" in libsharp's
|
||||
main directory before any other steps.
|
||||
For libsharp releases distributed as a .tar.gz file, this step is not necessary.
|
||||
|
||||
Afterwards, simply run "./configure"; if this fails, please refer to the output
|
||||
of "./configure --help" for additional hints and, if necessary, provide
|
||||
additional flags to the configure script.
|
||||
Once the script finishes successfully, run "make"
|
||||
(or "gmake"). This should install the compilation products in the
|
||||
subdirectory "auto/".
|
||||
|
||||
Documentation can be created by the command "(g)make doc".
|
||||
However this requires the doxygen application to be installed
|
||||
on your system.
|
||||
The documentation will be created in the subdirectory doc/.
|
||||
|
||||
|
||||
[1] Some versions of the gcc 4.4.x release series contain a bug which causes
|
||||
the compiler to crash during libsharp compilation. This appears to be fixed
|
||||
in the gcc 4.4.7 release. It is possible to work around this problem by adding
|
||||
the compiler flag "-fno-tree-fre" after the other optimization flags - the
|
||||
configure script should do this automatically.
|
||||
The library uses the standard `autotools` mechanism for configuration,
|
||||
compilation and installation. See the file `COMPILE` for configuration hints.
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
/*
|
||||
* Convenience functions
|
||||
*
|
||||
* Copyright (C) 2008, 2009, 2010, 2011, 2012 Max-Planck-Society
|
||||
* Copyright (C) 2008-2017 Max-Planck-Society
|
||||
* Author: Martin Reinecke
|
||||
*/
|
||||
|
||||
|
@ -44,7 +44,7 @@ void util_warn_ (const char *file, int line, const char *func, const char *msg)
|
|||
|
||||
/* This function tries to avoid allocations with a total size close to a high
|
||||
power of two (called the "critical stride" here), by adding a few more bytes
|
||||
if necssary. This lowers the probability that two arrays differ by a multiple
|
||||
if necessary. This lowers the probability that two arrays differ by a multiple
|
||||
of the critical stride in their starting address, which in turn lowers the
|
||||
risk of cache line contention. */
|
||||
static size_t manipsize(size_t sz)
|
||||
|
@ -61,7 +61,7 @@ void *util_malloc_ (size_t sz)
|
|||
{
|
||||
void *res;
|
||||
if (sz==0) return NULL;
|
||||
res = _mm_malloc(manipsize(sz),16);
|
||||
res = _mm_malloc(manipsize(sz),32);
|
||||
UTIL_ASSERT(res,"_mm_malloc() failed");
|
||||
return res;
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
/*! \file c_utils.h
|
||||
* Convenience functions
|
||||
*
|
||||
* Copyright (C) 2008, 2009, 2010, 2011 Max-Planck-Society
|
||||
* Copyright (C) 2008-2017 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
* \note This file should only be included from .c files, NOT from .h files.
|
||||
*/
|
||||
|
@ -144,4 +144,10 @@ void util_free_ (void *ptr);
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define NOINLINE __attribute__((noinline))
|
||||
#else
|
||||
#define NOINLINE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
PKG:=c_utils
|
||||
|
||||
SD:=$(SRCROOT)/$(PKG)
|
||||
OD:=$(BLDROOT)/$(PKG)
|
||||
|
||||
FULL_INCLUDE+= -I$(SD)
|
||||
|
||||
HDR_$(PKG):=$(SD)/*.h
|
||||
LIB_$(PKG):=$(LIBDIR)/libc_utils.a
|
||||
|
||||
OBJ:=c_utils.o walltime_c.o memusage.o
|
||||
OBJ:=$(OBJ:%=$(OD)/%)
|
||||
|
||||
$(OBJ): $(HDR_$(PKG)) | $(OD)_mkdir
|
||||
$(LIB_$(PKG)): $(OBJ)
|
||||
|
||||
all_hdr+=$(HDR_$(PKG))
|
||||
all_lib+=$(LIB_$(PKG))
|
|
@ -25,7 +25,7 @@
|
|||
/*
|
||||
* Functionality for reading wall clock time
|
||||
*
|
||||
* Copyright (C) 2010, 2011 Max-Planck-Society
|
||||
* Copyright (C) 2010-2016 Max-Planck-Society
|
||||
* Author: Martin Reinecke
|
||||
*/
|
||||
|
||||
|
@ -33,6 +33,8 @@
|
|||
#include <omp.h>
|
||||
#elif defined (USE_MPI)
|
||||
#include "mpi.h"
|
||||
#elif defined (_WIN32)
|
||||
#include <Windows.h>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -46,6 +48,17 @@ double wallTime(void)
|
|||
return omp_get_wtime();
|
||||
#elif defined (USE_MPI)
|
||||
return MPI_Wtime();
|
||||
#elif defined (_WIN32)
|
||||
static double inv_freq = -1.;
|
||||
if (inv_freq<0)
|
||||
{
|
||||
LARGE_INTEGER freq;
|
||||
QueryPerformanceFrequency(&freq);
|
||||
inv_freq = 1. / double(freq.QuadPart);
|
||||
}
|
||||
LARGE_INTEGER count;
|
||||
QueryPerformanceCounter(&count);
|
||||
return count.QuadPart*inv_freq;
|
||||
#else
|
||||
struct timeval t;
|
||||
gettimeofday(&t, NULL);
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
@SILENT_RULE@
|
||||
|
||||
CC=@CC@
|
||||
CL=@CC@
|
||||
CCFLAGS_NO_C=@CCFLAGS_NO_C@
|
||||
CCFLAGS=$(CCFLAGS_NO_C) -c
|
||||
CLFLAGS=-L. -L$(LIBDIR) @LDCCFLAGS@ -lm
|
||||
DEBUG_CFLAGS=@DEBUG_CFLAGS@
|
||||
MPI_CFLAGS=@MPI_CFLAGS@
|
||||
OPENMP_CFLAGS=@OPENMP_CFLAGS@
|
||||
PIC_CFLAGS=@PIC_CFLAGS@
|
||||
ARCREATE=@ARCREATE@
|
|
@ -1,33 +0,0 @@
|
|||
BLDROOT = $(SRCROOT)/build.$(SHARP_TARGET)
|
||||
PREFIX = $(SRCROOT)/$(SHARP_TARGET)
|
||||
BINDIR = $(PREFIX)/bin
|
||||
INCDIR = $(PREFIX)/include
|
||||
LIBDIR = $(PREFIX)/lib
|
||||
DOCDIR = $(SRCROOT)/doc
|
||||
PYTHONDIR = $(SRCROOT)/python/libsharp
|
||||
|
||||
# do not use any suffix rules
|
||||
.SUFFIXES:
|
||||
# do not use any default rules
|
||||
.DEFAULT:
|
||||
|
||||
echo_config:
|
||||
@echo using configuration \'$(SHARP_TARGET)\'
|
||||
|
||||
$(BLDROOT)/%.o : $(SRCROOT)/%.c | echo_config
|
||||
@echo "# compiling $*.c"
|
||||
cd $(@D) && $(CC) $(FULL_INCLUDE) -I$(BLDROOT) $(CCFLAGS) $<
|
||||
|
||||
$(BLDROOT)/%.o : $(SRCROOT)/%.cc | echo_config
|
||||
@echo "# compiling $*.cc"
|
||||
cd $(@D) && $(CXX) $(FULL_INCLUDE) -I$(BLDROOT) $(CXXCFLAGS) $<
|
||||
|
||||
%_mkdir:
|
||||
@if [ ! -d $* ]; then mkdir -p $* ; fi
|
||||
|
||||
clean:
|
||||
rm -rf $(BLDROOT) $(PREFIX) $(DOCDIR) autom4te.cache/ config.log config.status
|
||||
rm -rf $(PYTHONDIR)/*.c $(PYTHONDIR)/*.o $(PYTHONDIR)/*.so
|
||||
|
||||
distclean: clean
|
||||
rm -f config/config.auto
|
136
configure.ac
136
configure.ac
|
@ -1,113 +1,45 @@
|
|||
AC_INIT(config/config.auto.in)
|
||||
AC_INIT([libsharp], [1.0.0])
|
||||
AM_INIT_AUTOMAKE([foreign subdir-objects -Wall -Werror])
|
||||
AM_MAINTAINER_MODE([enable])
|
||||
|
||||
AC_CHECK_PROG([uname_found],[uname],[1],[0])
|
||||
if test $uname_found -eq 0 ; then
|
||||
echo "No uname found; setting system type to unknown."
|
||||
system="unknown"
|
||||
else
|
||||
system=`uname -s`-`uname -r`
|
||||
fi
|
||||
AC_LANG([C])
|
||||
|
||||
AC_TRY_COMPILE([], [@%:@ifndef __INTEL_COMPILER
|
||||
choke me
|
||||
@%:@endif], [ICC=[yes]], [ICC=[no]])
|
||||
dnl
|
||||
dnl Needed for linking on Windows.
|
||||
dnl Protect with m4_ifdef because AM_PROG_AR is required in
|
||||
dnl autoconf >= 1.12 when using -Wall, but the macro is
|
||||
dnl absent in old versions of autoconf.
|
||||
dnl
|
||||
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
|
||||
|
||||
if test $ICC = yes; then GCC=no; fi
|
||||
CCTYPE=unknown
|
||||
if test $GCC = yes; then CCTYPE=gcc; fi
|
||||
if test $ICC = yes; then CCTYPE=icc; fi
|
||||
AC_OPENMP
|
||||
LT_INIT
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
|
||||
SILENT_RULE=".SILENT:"
|
||||
AC_ARG_ENABLE(noisy-make,
|
||||
[ --enable-noisy-make enable detailed make output],
|
||||
[if test "$enableval" = yes; then
|
||||
SILENT_RULE=""
|
||||
fi])
|
||||
dnl
|
||||
dnl By default, install the headers into a subdirectory of
|
||||
dnl ${prefix}/include to avoid possible header filename collisions.
|
||||
dnl
|
||||
includedir="${includedir}/${PACKAGE_NAME}"
|
||||
|
||||
ENABLE_MPI=no
|
||||
AC_ARG_ENABLE(mpi,
|
||||
[ --enable-mpi enable generation of MPI-parallel code],
|
||||
[if test "$enableval" = yes; then
|
||||
ENABLE_MPI=yes
|
||||
fi])
|
||||
dnl
|
||||
dnl Enable silent build rules if this version of Automake supports them
|
||||
dnl
|
||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||
|
||||
ENABLE_DEBUG=no
|
||||
AC_ARG_ENABLE(debug,
|
||||
[ --enable-debug enable generation of debugging symbols],
|
||||
[if test "$enableval" = yes; then
|
||||
ENABLE_DEBUG=yes
|
||||
fi])
|
||||
|
||||
ENABLE_PIC=no
|
||||
AC_ARG_ENABLE(pic,
|
||||
[ --enable-pic enable generation of position independent code],
|
||||
[if test "$enableval" = yes; then
|
||||
ENABLE_PIC=yes
|
||||
fi])
|
||||
AC_PROG_CC_C99
|
||||
|
||||
case $CCTYPE in
|
||||
gcc)
|
||||
CCFLAGS="-O3 -fno-tree-vectorize -ffast-math -fomit-frame-pointer -std=c99 -pedantic -Wextra -Wall -Wno-unknown-pragmas -Wshadow -Wmissing-prototypes -Wfatal-errors"
|
||||
GCCVERSION="`$CC -dumpversion 2>&1`"
|
||||
echo "Using gcc version $GCCVERSION"
|
||||
AC_SUBST(GCCVERSION)
|
||||
changequote(,)
|
||||
gcc43=`echo $GCCVERSION | grep -c '^4\.[3456789]'`
|
||||
gcc44=`echo $GCCVERSION | grep -c '^4\.4'`
|
||||
changequote([,])
|
||||
if test $gcc43 -gt 0; then
|
||||
CCFLAGS="$CCFLAGS -march=native"
|
||||
fi
|
||||
if test $gcc44 -gt 0; then
|
||||
CCFLAGS="$CCFLAGS -fno-tree-fre"
|
||||
fi
|
||||
;;
|
||||
icc)
|
||||
CCFLAGS="-O3 -xHOST -std=c99 -ip -Wbrief -Wall -vec-report0 -openmp-report0 -wd383,981,1419,1572"
|
||||
;;
|
||||
*)
|
||||
CCFLAGS="-O2"
|
||||
# Don't do anything now
|
||||
;;
|
||||
esac
|
||||
# adding the lib to the files to link
|
||||
LIBS="-lm"
|
||||
|
||||
case $system in
|
||||
Darwin-*)
|
||||
ARCREATE="libtool -static -o"
|
||||
;;
|
||||
*)
|
||||
ARCREATE="ar cr"
|
||||
;;
|
||||
esac
|
||||
AC_PROG_LIBTOOL
|
||||
|
||||
if test $ENABLE_DEBUG = yes; then
|
||||
DEBUG_CFLAGS="-g"
|
||||
fi
|
||||
dnl
|
||||
dnl Create pkgconfig .pc file.
|
||||
dnl
|
||||
AX_CREATE_PKGCONFIG_INFO(,,,,[])
|
||||
AC_SUBST([LIBS])
|
||||
AC_SUBST([AM_CFLAGS])
|
||||
AC_SUBST([AM_LDFLAGS])
|
||||
|
||||
if test $ENABLE_PIC = yes; then
|
||||
PIC_CFLAGS="-fPIC"
|
||||
fi
|
||||
|
||||
if test $ENABLE_MPI = yes; then
|
||||
MPI_CFLAGS="-DUSE_MPI"
|
||||
fi
|
||||
|
||||
CCFLAGS="$CCFLAGS $DEBUG_CFLAGS $OPENMP_CFLAGS $PIC_CFLAGS $MPI_CFLAGS"
|
||||
|
||||
CCFLAGS_NO_C="$CCFLAGS $CPPFLAGS"
|
||||
|
||||
LDCCFLAGS="$LDFLAGS $CCFLAGS"
|
||||
|
||||
AC_SUBST(SILENT_RULE)
|
||||
AC_SUBST(CC)
|
||||
AC_SUBST(CCFLAGS_NO_C)
|
||||
AC_SUBST(LDCCFLAGS)
|
||||
AC_SUBST(DEBUG_CFLAGS)
|
||||
AC_SUBST(MPI_CFLAGS)
|
||||
AC_SUBST(OPENMP_CFLAGS)
|
||||
AC_SUBST(PIC_CFLAGS)
|
||||
AC_SUBST(ARCREATE)
|
||||
|
||||
AC_OUTPUT(config/config.auto)
|
||||
AC_CONFIG_FILES([Makefile])
|
||||
AC_OUTPUT
|
||||
|
|
|
@ -1,290 +0,0 @@
|
|||
# Doxyfile 1.8.1
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Project related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
DOXYFILE_ENCODING = UTF-8
|
||||
PROJECT_NAME = "LevelS C support library"
|
||||
PROJECT_NUMBER = 0.1
|
||||
PROJECT_BRIEF =
|
||||
PROJECT_LOGO =
|
||||
OUTPUT_DIRECTORY = .
|
||||
CREATE_SUBDIRS = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
BRIEF_MEMBER_DESC = NO
|
||||
REPEAT_BRIEF = YES
|
||||
ABBREVIATE_BRIEF =
|
||||
ALWAYS_DETAILED_SEC = NO
|
||||
INLINE_INHERITED_MEMB = NO
|
||||
FULL_PATH_NAMES = NO
|
||||
STRIP_FROM_PATH =
|
||||
STRIP_FROM_INC_PATH =
|
||||
SHORT_NAMES = NO
|
||||
JAVADOC_AUTOBRIEF = NO
|
||||
QT_AUTOBRIEF = NO
|
||||
MULTILINE_CPP_IS_BRIEF = NO
|
||||
INHERIT_DOCS = YES
|
||||
SEPARATE_MEMBER_PAGES = NO
|
||||
TAB_SIZE = 8
|
||||
ALIASES =
|
||||
TCL_SUBST =
|
||||
OPTIMIZE_OUTPUT_FOR_C = YES
|
||||
OPTIMIZE_OUTPUT_JAVA = NO
|
||||
OPTIMIZE_FOR_FORTRAN = NO
|
||||
OPTIMIZE_OUTPUT_VHDL = NO
|
||||
EXTENSION_MAPPING =
|
||||
MARKDOWN_SUPPORT = YES
|
||||
BUILTIN_STL_SUPPORT = NO
|
||||
CPP_CLI_SUPPORT = NO
|
||||
SIP_SUPPORT = NO
|
||||
IDL_PROPERTY_SUPPORT = YES
|
||||
DISTRIBUTE_GROUP_DOC = NO
|
||||
SUBGROUPING = YES
|
||||
INLINE_GROUPED_CLASSES = NO
|
||||
INLINE_SIMPLE_STRUCTS = NO
|
||||
TYPEDEF_HIDES_STRUCT = NO
|
||||
SYMBOL_CACHE_SIZE = 0
|
||||
LOOKUP_CACHE_SIZE = 0
|
||||
#---------------------------------------------------------------------------
|
||||
# Build related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
EXTRACT_ALL = NO
|
||||
EXTRACT_PRIVATE = NO
|
||||
EXTRACT_PACKAGE = NO
|
||||
EXTRACT_STATIC = NO
|
||||
EXTRACT_LOCAL_CLASSES = YES
|
||||
EXTRACT_LOCAL_METHODS = NO
|
||||
EXTRACT_ANON_NSPACES = NO
|
||||
HIDE_UNDOC_MEMBERS = YES
|
||||
HIDE_UNDOC_CLASSES = YES
|
||||
HIDE_FRIEND_COMPOUNDS = YES
|
||||
HIDE_IN_BODY_DOCS = NO
|
||||
INTERNAL_DOCS = NO
|
||||
CASE_SENSE_NAMES = YES
|
||||
HIDE_SCOPE_NAMES = NO
|
||||
SHOW_INCLUDE_FILES = YES
|
||||
FORCE_LOCAL_INCLUDES = NO
|
||||
INLINE_INFO = YES
|
||||
SORT_MEMBER_DOCS = NO
|
||||
SORT_BRIEF_DOCS = NO
|
||||
SORT_MEMBERS_CTORS_1ST = NO
|
||||
SORT_GROUP_NAMES = NO
|
||||
SORT_BY_SCOPE_NAME = NO
|
||||
STRICT_PROTO_MATCHING = NO
|
||||
GENERATE_TODOLIST = YES
|
||||
GENERATE_TESTLIST = YES
|
||||
GENERATE_BUGLIST = YES
|
||||
GENERATE_DEPRECATEDLIST= YES
|
||||
ENABLED_SECTIONS =
|
||||
MAX_INITIALIZER_LINES = 30
|
||||
SHOW_USED_FILES = YES
|
||||
SHOW_FILES = YES
|
||||
SHOW_NAMESPACES = YES
|
||||
FILE_VERSION_FILTER =
|
||||
LAYOUT_FILE =
|
||||
CITE_BIB_FILES =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to warning and progress messages
|
||||
#---------------------------------------------------------------------------
|
||||
QUIET = YES
|
||||
WARNINGS = YES
|
||||
WARN_IF_UNDOCUMENTED = YES
|
||||
WARN_IF_DOC_ERROR = YES
|
||||
WARN_NO_PARAMDOC = NO
|
||||
WARN_FORMAT = "$file:$line: $text"
|
||||
WARN_LOGFILE =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the input files
|
||||
#---------------------------------------------------------------------------
|
||||
INPUT = ../c_utils
|
||||
INPUT_ENCODING = UTF-8
|
||||
FILE_PATTERNS = *.h \
|
||||
*.c \
|
||||
*.dox
|
||||
RECURSIVE = YES
|
||||
EXCLUDE =
|
||||
EXCLUDE_SYMLINKS = NO
|
||||
EXCLUDE_PATTERNS =
|
||||
EXCLUDE_SYMBOLS =
|
||||
EXAMPLE_PATH =
|
||||
EXAMPLE_PATTERNS =
|
||||
EXAMPLE_RECURSIVE = NO
|
||||
IMAGE_PATH =
|
||||
INPUT_FILTER =
|
||||
FILTER_PATTERNS =
|
||||
FILTER_SOURCE_FILES = NO
|
||||
FILTER_SOURCE_PATTERNS =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to source browsing
|
||||
#---------------------------------------------------------------------------
|
||||
SOURCE_BROWSER = YES
|
||||
INLINE_SOURCES = NO
|
||||
STRIP_CODE_COMMENTS = NO
|
||||
REFERENCED_BY_RELATION = NO
|
||||
REFERENCES_RELATION = NO
|
||||
REFERENCES_LINK_SOURCE = YES
|
||||
USE_HTAGS = NO
|
||||
VERBATIM_HEADERS = YES
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the alphabetical class index
|
||||
#---------------------------------------------------------------------------
|
||||
ALPHABETICAL_INDEX = YES
|
||||
COLS_IN_ALPHA_INDEX = 5
|
||||
IGNORE_PREFIX =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the HTML output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_HTML = YES
|
||||
HTML_OUTPUT = htmldoc
|
||||
HTML_FILE_EXTENSION = .html
|
||||
HTML_HEADER =
|
||||
HTML_FOOTER = footer.html
|
||||
HTML_STYLESHEET =
|
||||
HTML_EXTRA_FILES =
|
||||
HTML_COLORSTYLE_HUE = 220
|
||||
HTML_COLORSTYLE_SAT = 100
|
||||
HTML_COLORSTYLE_GAMMA = 80
|
||||
HTML_TIMESTAMP = YES
|
||||
HTML_DYNAMIC_SECTIONS = NO
|
||||
HTML_INDEX_NUM_ENTRIES = 100
|
||||
GENERATE_DOCSET = NO
|
||||
DOCSET_FEEDNAME = "Doxygen generated docs"
|
||||
DOCSET_BUNDLE_ID = org.doxygen.Project
|
||||
DOCSET_PUBLISHER_ID = org.doxygen.Publisher
|
||||
DOCSET_PUBLISHER_NAME = Publisher
|
||||
GENERATE_HTMLHELP = NO
|
||||
CHM_FILE =
|
||||
HHC_LOCATION =
|
||||
GENERATE_CHI = NO
|
||||
CHM_INDEX_ENCODING =
|
||||
BINARY_TOC = NO
|
||||
TOC_EXPAND = NO
|
||||
GENERATE_QHP = NO
|
||||
QCH_FILE =
|
||||
QHP_NAMESPACE = org.doxygen.Project
|
||||
QHP_VIRTUAL_FOLDER = doc
|
||||
QHP_CUST_FILTER_NAME =
|
||||
QHP_CUST_FILTER_ATTRS =
|
||||
QHP_SECT_FILTER_ATTRS =
|
||||
QHG_LOCATION =
|
||||
GENERATE_ECLIPSEHELP = NO
|
||||
ECLIPSE_DOC_ID = org.doxygen.Project
|
||||
DISABLE_INDEX = NO
|
||||
GENERATE_TREEVIEW = NO
|
||||
ENUM_VALUES_PER_LINE = 4
|
||||
TREEVIEW_WIDTH = 250
|
||||
EXT_LINKS_IN_WINDOW = NO
|
||||
FORMULA_FONTSIZE = 10
|
||||
FORMULA_TRANSPARENT = YES
|
||||
USE_MATHJAX = NO
|
||||
MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
|
||||
MATHJAX_EXTENSIONS =
|
||||
SEARCHENGINE = NO
|
||||
SERVER_BASED_SEARCH = NO
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the LaTeX output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_LATEX = NO
|
||||
LATEX_OUTPUT = latex
|
||||
LATEX_CMD_NAME = latex
|
||||
MAKEINDEX_CMD_NAME = makeindex
|
||||
COMPACT_LATEX = YES
|
||||
PAPER_TYPE = a4wide
|
||||
EXTRA_PACKAGES =
|
||||
LATEX_HEADER =
|
||||
LATEX_FOOTER =
|
||||
PDF_HYPERLINKS = YES
|
||||
USE_PDFLATEX = YES
|
||||
LATEX_BATCHMODE = NO
|
||||
LATEX_HIDE_INDICES = NO
|
||||
LATEX_SOURCE_CODE = NO
|
||||
LATEX_BIB_STYLE = plain
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the RTF output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_RTF = NO
|
||||
RTF_OUTPUT = rtf
|
||||
COMPACT_RTF = NO
|
||||
RTF_HYPERLINKS = NO
|
||||
RTF_STYLESHEET_FILE =
|
||||
RTF_EXTENSIONS_FILE =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the man page output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_MAN = NO
|
||||
MAN_OUTPUT = man
|
||||
MAN_EXTENSION = .3
|
||||
MAN_LINKS = NO
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the XML output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_XML = NO
|
||||
XML_OUTPUT = xml
|
||||
XML_SCHEMA =
|
||||
XML_DTD =
|
||||
XML_PROGRAMLISTING = YES
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options for the AutoGen Definitions output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_AUTOGEN_DEF = NO
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the Perl module output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_PERLMOD = NO
|
||||
PERLMOD_LATEX = NO
|
||||
PERLMOD_PRETTY = YES
|
||||
PERLMOD_MAKEVAR_PREFIX =
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration options related to the preprocessor
|
||||
#---------------------------------------------------------------------------
|
||||
ENABLE_PREPROCESSING = YES
|
||||
MACRO_EXPANSION = NO
|
||||
EXPAND_ONLY_PREDEF = NO
|
||||
SEARCH_INCLUDES = YES
|
||||
INCLUDE_PATH =
|
||||
INCLUDE_FILE_PATTERNS =
|
||||
PREDEFINED =
|
||||
EXPAND_AS_DEFINED =
|
||||
SKIP_FUNCTION_MACROS = YES
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration::additions related to external references
|
||||
#---------------------------------------------------------------------------
|
||||
TAGFILES =
|
||||
GENERATE_TAGFILE = c_utils.tag
|
||||
ALLEXTERNALS = NO
|
||||
EXTERNAL_GROUPS = YES
|
||||
PERL_PATH = /usr/bin/perl
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration options related to the dot tool
|
||||
#---------------------------------------------------------------------------
|
||||
CLASS_DIAGRAMS = YES
|
||||
MSCGEN_PATH =
|
||||
HIDE_UNDOC_RELATIONS = YES
|
||||
HAVE_DOT = NO
|
||||
DOT_NUM_THREADS = 0
|
||||
DOT_FONTNAME = FreeSans
|
||||
DOT_FONTSIZE = 10
|
||||
DOT_FONTPATH =
|
||||
CLASS_GRAPH = YES
|
||||
COLLABORATION_GRAPH = YES
|
||||
GROUP_GRAPHS = YES
|
||||
UML_LOOK = NO
|
||||
UML_LIMIT_NUM_FIELDS = 10
|
||||
TEMPLATE_RELATIONS = YES
|
||||
INCLUDE_GRAPH = NO
|
||||
INCLUDED_BY_GRAPH = NO
|
||||
CALL_GRAPH = NO
|
||||
CALLER_GRAPH = NO
|
||||
GRAPHICAL_HIERARCHY = NO
|
||||
DIRECTORY_GRAPH = YES
|
||||
DOT_IMAGE_FORMAT = png
|
||||
INTERACTIVE_SVG = NO
|
||||
DOT_PATH =
|
||||
DOTFILE_DIRS =
|
||||
MSCFILE_DIRS =
|
||||
DOT_GRAPH_MAX_NODES = 50
|
||||
MAX_DOT_GRAPH_DEPTH = 0
|
||||
DOT_TRANSPARENT = NO
|
||||
DOT_MULTI_TARGETS = NO
|
||||
GENERATE_LEGEND = YES
|
||||
DOT_CLEANUP = YES
|
|
@ -1,5 +0,0 @@
|
|||
<hr><address style="align: right;"><small>
|
||||
Generated on $datetime for $projectname
|
||||
</a> </small></address>
|
||||
</body>
|
||||
</html>
|
|
@ -1,15 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1">
|
||||
<title>Libsharp source code documentation</title>
|
||||
</head><body>
|
||||
<H1>Libsharp source code documentation</H1>
|
||||
|
||||
<H2>C interfaces</H2>
|
||||
|
||||
<ul>
|
||||
<li><a href="c_utils/index.html">C support library</a>
|
||||
<li><a href="libfftpack/index.html">FFT interface</a>
|
||||
<li><a href="libsharp/index.html">Library for spherical harmonic transforms</a>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
|
@ -1,290 +0,0 @@
|
|||
# Doxyfile 1.8.1
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Project related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
DOXYFILE_ENCODING = UTF-8
|
||||
PROJECT_NAME = "LevelS FFT library"
|
||||
PROJECT_NUMBER = 0.1
|
||||
PROJECT_BRIEF =
|
||||
PROJECT_LOGO =
|
||||
OUTPUT_DIRECTORY = .
|
||||
CREATE_SUBDIRS = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
BRIEF_MEMBER_DESC = NO
|
||||
REPEAT_BRIEF = YES
|
||||
ABBREVIATE_BRIEF =
|
||||
ALWAYS_DETAILED_SEC = NO
|
||||
INLINE_INHERITED_MEMB = NO
|
||||
FULL_PATH_NAMES = NO
|
||||
STRIP_FROM_PATH =
|
||||
STRIP_FROM_INC_PATH =
|
||||
SHORT_NAMES = NO
|
||||
JAVADOC_AUTOBRIEF = NO
|
||||
QT_AUTOBRIEF = NO
|
||||
MULTILINE_CPP_IS_BRIEF = NO
|
||||
INHERIT_DOCS = YES
|
||||
SEPARATE_MEMBER_PAGES = NO
|
||||
TAB_SIZE = 8
|
||||
ALIASES =
|
||||
TCL_SUBST =
|
||||
OPTIMIZE_OUTPUT_FOR_C = YES
|
||||
OPTIMIZE_OUTPUT_JAVA = NO
|
||||
OPTIMIZE_FOR_FORTRAN = NO
|
||||
OPTIMIZE_OUTPUT_VHDL = NO
|
||||
EXTENSION_MAPPING =
|
||||
MARKDOWN_SUPPORT = YES
|
||||
BUILTIN_STL_SUPPORT = NO
|
||||
CPP_CLI_SUPPORT = NO
|
||||
SIP_SUPPORT = NO
|
||||
IDL_PROPERTY_SUPPORT = YES
|
||||
DISTRIBUTE_GROUP_DOC = NO
|
||||
SUBGROUPING = YES
|
||||
INLINE_GROUPED_CLASSES = NO
|
||||
INLINE_SIMPLE_STRUCTS = NO
|
||||
TYPEDEF_HIDES_STRUCT = NO
|
||||
SYMBOL_CACHE_SIZE = 0
|
||||
LOOKUP_CACHE_SIZE = 0
|
||||
#---------------------------------------------------------------------------
|
||||
# Build related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
EXTRACT_ALL = NO
|
||||
EXTRACT_PRIVATE = NO
|
||||
EXTRACT_PACKAGE = NO
|
||||
EXTRACT_STATIC = NO
|
||||
EXTRACT_LOCAL_CLASSES = YES
|
||||
EXTRACT_LOCAL_METHODS = NO
|
||||
EXTRACT_ANON_NSPACES = NO
|
||||
HIDE_UNDOC_MEMBERS = YES
|
||||
HIDE_UNDOC_CLASSES = YES
|
||||
HIDE_FRIEND_COMPOUNDS = YES
|
||||
HIDE_IN_BODY_DOCS = NO
|
||||
INTERNAL_DOCS = NO
|
||||
CASE_SENSE_NAMES = YES
|
||||
HIDE_SCOPE_NAMES = NO
|
||||
SHOW_INCLUDE_FILES = YES
|
||||
FORCE_LOCAL_INCLUDES = NO
|
||||
INLINE_INFO = YES
|
||||
SORT_MEMBER_DOCS = NO
|
||||
SORT_BRIEF_DOCS = NO
|
||||
SORT_MEMBERS_CTORS_1ST = NO
|
||||
SORT_GROUP_NAMES = NO
|
||||
SORT_BY_SCOPE_NAME = NO
|
||||
STRICT_PROTO_MATCHING = NO
|
||||
GENERATE_TODOLIST = YES
|
||||
GENERATE_TESTLIST = YES
|
||||
GENERATE_BUGLIST = YES
|
||||
GENERATE_DEPRECATEDLIST= YES
|
||||
ENABLED_SECTIONS =
|
||||
MAX_INITIALIZER_LINES = 30
|
||||
SHOW_USED_FILES = YES
|
||||
SHOW_FILES = YES
|
||||
SHOW_NAMESPACES = YES
|
||||
FILE_VERSION_FILTER =
|
||||
LAYOUT_FILE =
|
||||
CITE_BIB_FILES =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to warning and progress messages
|
||||
#---------------------------------------------------------------------------
|
||||
QUIET = YES
|
||||
WARNINGS = YES
|
||||
WARN_IF_UNDOCUMENTED = YES
|
||||
WARN_IF_DOC_ERROR = YES
|
||||
WARN_NO_PARAMDOC = NO
|
||||
WARN_FORMAT = "$file:$line: $text"
|
||||
WARN_LOGFILE =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the input files
|
||||
#---------------------------------------------------------------------------
|
||||
INPUT = ../libfftpack
|
||||
INPUT_ENCODING = UTF-8
|
||||
FILE_PATTERNS = *.h \
|
||||
*.c \
|
||||
*.dox
|
||||
RECURSIVE = YES
|
||||
EXCLUDE =
|
||||
EXCLUDE_SYMLINKS = NO
|
||||
EXCLUDE_PATTERNS =
|
||||
EXCLUDE_SYMBOLS =
|
||||
EXAMPLE_PATH =
|
||||
EXAMPLE_PATTERNS =
|
||||
EXAMPLE_RECURSIVE = NO
|
||||
IMAGE_PATH =
|
||||
INPUT_FILTER =
|
||||
FILTER_PATTERNS =
|
||||
FILTER_SOURCE_FILES = NO
|
||||
FILTER_SOURCE_PATTERNS =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to source browsing
|
||||
#---------------------------------------------------------------------------
|
||||
SOURCE_BROWSER = YES
|
||||
INLINE_SOURCES = NO
|
||||
STRIP_CODE_COMMENTS = NO
|
||||
REFERENCED_BY_RELATION = NO
|
||||
REFERENCES_RELATION = NO
|
||||
REFERENCES_LINK_SOURCE = YES
|
||||
USE_HTAGS = NO
|
||||
VERBATIM_HEADERS = YES
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the alphabetical class index
|
||||
#---------------------------------------------------------------------------
|
||||
ALPHABETICAL_INDEX = YES
|
||||
COLS_IN_ALPHA_INDEX = 5
|
||||
IGNORE_PREFIX =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the HTML output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_HTML = YES
|
||||
HTML_OUTPUT = htmldoc
|
||||
HTML_FILE_EXTENSION = .html
|
||||
HTML_HEADER =
|
||||
HTML_FOOTER = footer.html
|
||||
HTML_STYLESHEET =
|
||||
HTML_EXTRA_FILES =
|
||||
HTML_COLORSTYLE_HUE = 220
|
||||
HTML_COLORSTYLE_SAT = 100
|
||||
HTML_COLORSTYLE_GAMMA = 80
|
||||
HTML_TIMESTAMP = YES
|
||||
HTML_DYNAMIC_SECTIONS = NO
|
||||
HTML_INDEX_NUM_ENTRIES = 100
|
||||
GENERATE_DOCSET = NO
|
||||
DOCSET_FEEDNAME = "Doxygen generated docs"
|
||||
DOCSET_BUNDLE_ID = org.doxygen.Project
|
||||
DOCSET_PUBLISHER_ID = org.doxygen.Publisher
|
||||
DOCSET_PUBLISHER_NAME = Publisher
|
||||
GENERATE_HTMLHELP = NO
|
||||
CHM_FILE =
|
||||
HHC_LOCATION =
|
||||
GENERATE_CHI = NO
|
||||
CHM_INDEX_ENCODING =
|
||||
BINARY_TOC = NO
|
||||
TOC_EXPAND = NO
|
||||
GENERATE_QHP = NO
|
||||
QCH_FILE =
|
||||
QHP_NAMESPACE = org.doxygen.Project
|
||||
QHP_VIRTUAL_FOLDER = doc
|
||||
QHP_CUST_FILTER_NAME =
|
||||
QHP_CUST_FILTER_ATTRS =
|
||||
QHP_SECT_FILTER_ATTRS =
|
||||
QHG_LOCATION =
|
||||
GENERATE_ECLIPSEHELP = NO
|
||||
ECLIPSE_DOC_ID = org.doxygen.Project
|
||||
DISABLE_INDEX = NO
|
||||
GENERATE_TREEVIEW = NO
|
||||
ENUM_VALUES_PER_LINE = 4
|
||||
TREEVIEW_WIDTH = 250
|
||||
EXT_LINKS_IN_WINDOW = NO
|
||||
FORMULA_FONTSIZE = 10
|
||||
FORMULA_TRANSPARENT = YES
|
||||
USE_MATHJAX = NO
|
||||
MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
|
||||
MATHJAX_EXTENSIONS =
|
||||
SEARCHENGINE = NO
|
||||
SERVER_BASED_SEARCH = NO
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the LaTeX output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_LATEX = NO
|
||||
LATEX_OUTPUT = latex
|
||||
LATEX_CMD_NAME = latex
|
||||
MAKEINDEX_CMD_NAME = makeindex
|
||||
COMPACT_LATEX = YES
|
||||
PAPER_TYPE = a4wide
|
||||
EXTRA_PACKAGES =
|
||||
LATEX_HEADER =
|
||||
LATEX_FOOTER =
|
||||
PDF_HYPERLINKS = YES
|
||||
USE_PDFLATEX = YES
|
||||
LATEX_BATCHMODE = NO
|
||||
LATEX_HIDE_INDICES = NO
|
||||
LATEX_SOURCE_CODE = NO
|
||||
LATEX_BIB_STYLE = plain
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the RTF output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_RTF = NO
|
||||
RTF_OUTPUT = rtf
|
||||
COMPACT_RTF = NO
|
||||
RTF_HYPERLINKS = NO
|
||||
RTF_STYLESHEET_FILE =
|
||||
RTF_EXTENSIONS_FILE =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the man page output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_MAN = NO
|
||||
MAN_OUTPUT = man
|
||||
MAN_EXTENSION = .3
|
||||
MAN_LINKS = NO
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the XML output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_XML = NO
|
||||
XML_OUTPUT = xml
|
||||
XML_SCHEMA =
|
||||
XML_DTD =
|
||||
XML_PROGRAMLISTING = YES
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options for the AutoGen Definitions output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_AUTOGEN_DEF = NO
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the Perl module output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_PERLMOD = NO
|
||||
PERLMOD_LATEX = NO
|
||||
PERLMOD_PRETTY = YES
|
||||
PERLMOD_MAKEVAR_PREFIX =
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration options related to the preprocessor
|
||||
#---------------------------------------------------------------------------
|
||||
ENABLE_PREPROCESSING = YES
|
||||
MACRO_EXPANSION = NO
|
||||
EXPAND_ONLY_PREDEF = NO
|
||||
SEARCH_INCLUDES = YES
|
||||
INCLUDE_PATH =
|
||||
INCLUDE_FILE_PATTERNS =
|
||||
PREDEFINED =
|
||||
EXPAND_AS_DEFINED =
|
||||
SKIP_FUNCTION_MACROS = YES
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration::additions related to external references
|
||||
#---------------------------------------------------------------------------
|
||||
TAGFILES = c_utils.tag=../c_utils
|
||||
GENERATE_TAGFILE = libfftpack.tag
|
||||
ALLEXTERNALS = NO
|
||||
EXTERNAL_GROUPS = YES
|
||||
PERL_PATH = /usr/bin/perl
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration options related to the dot tool
|
||||
#---------------------------------------------------------------------------
|
||||
CLASS_DIAGRAMS = YES
|
||||
MSCGEN_PATH =
|
||||
HIDE_UNDOC_RELATIONS = YES
|
||||
HAVE_DOT = NO
|
||||
DOT_NUM_THREADS = 0
|
||||
DOT_FONTNAME = FreeSans
|
||||
DOT_FONTSIZE = 10
|
||||
DOT_FONTPATH =
|
||||
CLASS_GRAPH = YES
|
||||
COLLABORATION_GRAPH = YES
|
||||
GROUP_GRAPHS = YES
|
||||
UML_LOOK = NO
|
||||
UML_LIMIT_NUM_FIELDS = 10
|
||||
TEMPLATE_RELATIONS = YES
|
||||
INCLUDE_GRAPH = NO
|
||||
INCLUDED_BY_GRAPH = NO
|
||||
CALL_GRAPH = NO
|
||||
CALLER_GRAPH = NO
|
||||
GRAPHICAL_HIERARCHY = NO
|
||||
DIRECTORY_GRAPH = YES
|
||||
DOT_IMAGE_FORMAT = png
|
||||
INTERACTIVE_SVG = NO
|
||||
DOT_PATH =
|
||||
DOTFILE_DIRS =
|
||||
MSCFILE_DIRS =
|
||||
DOT_GRAPH_MAX_NODES = 50
|
||||
MAX_DOT_GRAPH_DEPTH = 0
|
||||
DOT_TRANSPARENT = NO
|
||||
DOT_MULTI_TARGETS = NO
|
||||
GENERATE_LEGEND = YES
|
||||
DOT_CLEANUP = YES
|
|
@ -1,291 +0,0 @@
|
|||
# Doxyfile 1.8.1
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Project related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
DOXYFILE_ENCODING = UTF-8
|
||||
PROJECT_NAME = "LevelS SHT library"
|
||||
PROJECT_NUMBER = 0.1
|
||||
PROJECT_BRIEF =
|
||||
PROJECT_LOGO =
|
||||
OUTPUT_DIRECTORY = .
|
||||
CREATE_SUBDIRS = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
BRIEF_MEMBER_DESC = NO
|
||||
REPEAT_BRIEF = YES
|
||||
ABBREVIATE_BRIEF =
|
||||
ALWAYS_DETAILED_SEC = NO
|
||||
INLINE_INHERITED_MEMB = NO
|
||||
FULL_PATH_NAMES = NO
|
||||
STRIP_FROM_PATH =
|
||||
STRIP_FROM_INC_PATH =
|
||||
SHORT_NAMES = NO
|
||||
JAVADOC_AUTOBRIEF = NO
|
||||
QT_AUTOBRIEF = NO
|
||||
MULTILINE_CPP_IS_BRIEF = NO
|
||||
INHERIT_DOCS = YES
|
||||
SEPARATE_MEMBER_PAGES = NO
|
||||
TAB_SIZE = 8
|
||||
ALIASES =
|
||||
TCL_SUBST =
|
||||
OPTIMIZE_OUTPUT_FOR_C = YES
|
||||
OPTIMIZE_OUTPUT_JAVA = NO
|
||||
OPTIMIZE_FOR_FORTRAN = NO
|
||||
OPTIMIZE_OUTPUT_VHDL = NO
|
||||
EXTENSION_MAPPING =
|
||||
MARKDOWN_SUPPORT = YES
|
||||
BUILTIN_STL_SUPPORT = NO
|
||||
CPP_CLI_SUPPORT = NO
|
||||
SIP_SUPPORT = NO
|
||||
IDL_PROPERTY_SUPPORT = YES
|
||||
DISTRIBUTE_GROUP_DOC = NO
|
||||
SUBGROUPING = YES
|
||||
INLINE_GROUPED_CLASSES = NO
|
||||
INLINE_SIMPLE_STRUCTS = NO
|
||||
TYPEDEF_HIDES_STRUCT = NO
|
||||
SYMBOL_CACHE_SIZE = 0
|
||||
LOOKUP_CACHE_SIZE = 0
|
||||
#---------------------------------------------------------------------------
|
||||
# Build related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
EXTRACT_ALL = NO
|
||||
EXTRACT_PRIVATE = NO
|
||||
EXTRACT_PACKAGE = NO
|
||||
EXTRACT_STATIC = NO
|
||||
EXTRACT_LOCAL_CLASSES = YES
|
||||
EXTRACT_LOCAL_METHODS = NO
|
||||
EXTRACT_ANON_NSPACES = NO
|
||||
HIDE_UNDOC_MEMBERS = YES
|
||||
HIDE_UNDOC_CLASSES = YES
|
||||
HIDE_FRIEND_COMPOUNDS = YES
|
||||
HIDE_IN_BODY_DOCS = NO
|
||||
INTERNAL_DOCS = NO
|
||||
CASE_SENSE_NAMES = YES
|
||||
HIDE_SCOPE_NAMES = NO
|
||||
SHOW_INCLUDE_FILES = YES
|
||||
FORCE_LOCAL_INCLUDES = NO
|
||||
INLINE_INFO = YES
|
||||
SORT_MEMBER_DOCS = NO
|
||||
SORT_BRIEF_DOCS = NO
|
||||
SORT_MEMBERS_CTORS_1ST = NO
|
||||
SORT_GROUP_NAMES = NO
|
||||
SORT_BY_SCOPE_NAME = NO
|
||||
STRICT_PROTO_MATCHING = NO
|
||||
GENERATE_TODOLIST = YES
|
||||
GENERATE_TESTLIST = YES
|
||||
GENERATE_BUGLIST = YES
|
||||
GENERATE_DEPRECATEDLIST= YES
|
||||
ENABLED_SECTIONS =
|
||||
MAX_INITIALIZER_LINES = 30
|
||||
SHOW_USED_FILES = YES
|
||||
SHOW_FILES = YES
|
||||
SHOW_NAMESPACES = YES
|
||||
FILE_VERSION_FILTER =
|
||||
LAYOUT_FILE =
|
||||
CITE_BIB_FILES =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to warning and progress messages
|
||||
#---------------------------------------------------------------------------
|
||||
QUIET = YES
|
||||
WARNINGS = YES
|
||||
WARN_IF_UNDOCUMENTED = YES
|
||||
WARN_IF_DOC_ERROR = YES
|
||||
WARN_NO_PARAMDOC = NO
|
||||
WARN_FORMAT = "$file:$line: $text"
|
||||
WARN_LOGFILE =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the input files
|
||||
#---------------------------------------------------------------------------
|
||||
INPUT = ../libsharp
|
||||
INPUT_ENCODING = UTF-8
|
||||
FILE_PATTERNS = *.h \
|
||||
*.c \
|
||||
*.dox
|
||||
RECURSIVE = YES
|
||||
EXCLUDE =
|
||||
EXCLUDE_SYMLINKS = NO
|
||||
EXCLUDE_PATTERNS =
|
||||
EXCLUDE_SYMBOLS =
|
||||
EXAMPLE_PATH =
|
||||
EXAMPLE_PATTERNS =
|
||||
EXAMPLE_RECURSIVE = NO
|
||||
IMAGE_PATH =
|
||||
INPUT_FILTER =
|
||||
FILTER_PATTERNS =
|
||||
FILTER_SOURCE_FILES = NO
|
||||
FILTER_SOURCE_PATTERNS =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to source browsing
|
||||
#---------------------------------------------------------------------------
|
||||
SOURCE_BROWSER = YES
|
||||
INLINE_SOURCES = NO
|
||||
STRIP_CODE_COMMENTS = NO
|
||||
REFERENCED_BY_RELATION = NO
|
||||
REFERENCES_RELATION = NO
|
||||
REFERENCES_LINK_SOURCE = YES
|
||||
USE_HTAGS = NO
|
||||
VERBATIM_HEADERS = YES
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the alphabetical class index
|
||||
#---------------------------------------------------------------------------
|
||||
ALPHABETICAL_INDEX = YES
|
||||
COLS_IN_ALPHA_INDEX = 5
|
||||
IGNORE_PREFIX =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the HTML output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_HTML = YES
|
||||
HTML_OUTPUT = htmldoc
|
||||
HTML_FILE_EXTENSION = .html
|
||||
HTML_HEADER =
|
||||
HTML_FOOTER = footer.html
|
||||
HTML_STYLESHEET =
|
||||
HTML_EXTRA_FILES =
|
||||
HTML_COLORSTYLE_HUE = 220
|
||||
HTML_COLORSTYLE_SAT = 100
|
||||
HTML_COLORSTYLE_GAMMA = 80
|
||||
HTML_TIMESTAMP = YES
|
||||
HTML_DYNAMIC_SECTIONS = NO
|
||||
HTML_INDEX_NUM_ENTRIES = 100
|
||||
GENERATE_DOCSET = NO
|
||||
DOCSET_FEEDNAME = "Doxygen generated docs"
|
||||
DOCSET_BUNDLE_ID = org.doxygen.Project
|
||||
DOCSET_PUBLISHER_ID = org.doxygen.Publisher
|
||||
DOCSET_PUBLISHER_NAME = Publisher
|
||||
GENERATE_HTMLHELP = NO
|
||||
CHM_FILE =
|
||||
HHC_LOCATION =
|
||||
GENERATE_CHI = NO
|
||||
CHM_INDEX_ENCODING =
|
||||
BINARY_TOC = NO
|
||||
TOC_EXPAND = NO
|
||||
GENERATE_QHP = NO
|
||||
QCH_FILE =
|
||||
QHP_NAMESPACE = org.doxygen.Project
|
||||
QHP_VIRTUAL_FOLDER = doc
|
||||
QHP_CUST_FILTER_NAME =
|
||||
QHP_CUST_FILTER_ATTRS =
|
||||
QHP_SECT_FILTER_ATTRS =
|
||||
QHG_LOCATION =
|
||||
GENERATE_ECLIPSEHELP = NO
|
||||
ECLIPSE_DOC_ID = org.doxygen.Project
|
||||
DISABLE_INDEX = NO
|
||||
GENERATE_TREEVIEW = NO
|
||||
ENUM_VALUES_PER_LINE = 4
|
||||
TREEVIEW_WIDTH = 250
|
||||
EXT_LINKS_IN_WINDOW = NO
|
||||
FORMULA_FONTSIZE = 10
|
||||
FORMULA_TRANSPARENT = YES
|
||||
USE_MATHJAX = NO
|
||||
MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
|
||||
MATHJAX_EXTENSIONS =
|
||||
SEARCHENGINE = NO
|
||||
SERVER_BASED_SEARCH = NO
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the LaTeX output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_LATEX = NO
|
||||
LATEX_OUTPUT = latex
|
||||
LATEX_CMD_NAME = latex
|
||||
MAKEINDEX_CMD_NAME = makeindex
|
||||
COMPACT_LATEX = YES
|
||||
PAPER_TYPE = a4wide
|
||||
EXTRA_PACKAGES =
|
||||
LATEX_HEADER =
|
||||
LATEX_FOOTER =
|
||||
PDF_HYPERLINKS = YES
|
||||
USE_PDFLATEX = YES
|
||||
LATEX_BATCHMODE = NO
|
||||
LATEX_HIDE_INDICES = NO
|
||||
LATEX_SOURCE_CODE = NO
|
||||
LATEX_BIB_STYLE = plain
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the RTF output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_RTF = NO
|
||||
RTF_OUTPUT = rtf
|
||||
COMPACT_RTF = NO
|
||||
RTF_HYPERLINKS = NO
|
||||
RTF_STYLESHEET_FILE =
|
||||
RTF_EXTENSIONS_FILE =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the man page output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_MAN = NO
|
||||
MAN_OUTPUT = man
|
||||
MAN_EXTENSION = .3
|
||||
MAN_LINKS = NO
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the XML output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_XML = NO
|
||||
XML_OUTPUT = xml
|
||||
XML_SCHEMA =
|
||||
XML_DTD =
|
||||
XML_PROGRAMLISTING = YES
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options for the AutoGen Definitions output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_AUTOGEN_DEF = NO
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the Perl module output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_PERLMOD = NO
|
||||
PERLMOD_LATEX = NO
|
||||
PERLMOD_PRETTY = YES
|
||||
PERLMOD_MAKEVAR_PREFIX =
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration options related to the preprocessor
|
||||
#---------------------------------------------------------------------------
|
||||
ENABLE_PREPROCESSING = YES
|
||||
MACRO_EXPANSION = NO
|
||||
EXPAND_ONLY_PREDEF = NO
|
||||
SEARCH_INCLUDES = YES
|
||||
INCLUDE_PATH =
|
||||
INCLUDE_FILE_PATTERNS =
|
||||
PREDEFINED =
|
||||
EXPAND_AS_DEFINED =
|
||||
SKIP_FUNCTION_MACROS = YES
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration::additions related to external references
|
||||
#---------------------------------------------------------------------------
|
||||
TAGFILES = libfftpack.tag=../libfftpack \
|
||||
c_utils.tag=../c_utils
|
||||
GENERATE_TAGFILE = libsharp.tag
|
||||
ALLEXTERNALS = NO
|
||||
EXTERNAL_GROUPS = YES
|
||||
PERL_PATH = /usr/bin/perl
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration options related to the dot tool
|
||||
#---------------------------------------------------------------------------
|
||||
CLASS_DIAGRAMS = YES
|
||||
MSCGEN_PATH =
|
||||
HIDE_UNDOC_RELATIONS = YES
|
||||
HAVE_DOT = NO
|
||||
DOT_NUM_THREADS = 0
|
||||
DOT_FONTNAME = FreeSans
|
||||
DOT_FONTSIZE = 10
|
||||
DOT_FONTPATH =
|
||||
CLASS_GRAPH = YES
|
||||
COLLABORATION_GRAPH = YES
|
||||
GROUP_GRAPHS = YES
|
||||
UML_LOOK = NO
|
||||
UML_LIMIT_NUM_FIELDS = 10
|
||||
TEMPLATE_RELATIONS = YES
|
||||
INCLUDE_GRAPH = NO
|
||||
INCLUDED_BY_GRAPH = NO
|
||||
CALL_GRAPH = NO
|
||||
CALLER_GRAPH = NO
|
||||
GRAPHICAL_HIERARCHY = NO
|
||||
DIRECTORY_GRAPH = YES
|
||||
DOT_IMAGE_FORMAT = png
|
||||
INTERACTIVE_SVG = NO
|
||||
DOT_PATH =
|
||||
DOTFILE_DIRS =
|
||||
MSCFILE_DIRS =
|
||||
DOT_GRAPH_MAX_NODES = 50
|
||||
MAX_DOT_GRAPH_DEPTH = 0
|
||||
DOT_TRANSPARENT = NO
|
||||
DOT_MULTI_TARGETS = NO
|
||||
GENERATE_LEGEND = YES
|
||||
DOT_CLEANUP = YES
|
|
@ -1,20 +0,0 @@
|
|||
PKG:=docsrc
|
||||
|
||||
docsrc_idx: $(DOCDIR)_mkdir
|
||||
cp $(SRCROOT)/docsrc/index_code.html $(DOCDIR)/index.html
|
||||
|
||||
docsrc_code_doc: $(DOCDIR)_mkdir docsrc_idx
|
||||
cd $(SRCROOT)/docsrc; \
|
||||
for i in c_utils libfftpack libsharp; do \
|
||||
doxygen $${i}.dox; \
|
||||
rm -rf $(DOCDIR)/$${i}; mv htmldoc $(DOCDIR)/$${i}; \
|
||||
done; \
|
||||
rm *.tag;
|
||||
|
||||
docsrc_clean:
|
||||
cd $(SRCROOT)/docsrc; \
|
||||
rm -f *.tag
|
||||
cd $(SRCROOT)/docsrc; \
|
||||
rm -rf htmldoc
|
||||
|
||||
doc: docsrc_code_doc
|
|
@ -1,286 +0,0 @@
|
|||
module sharp
|
||||
use iso_c_binding
|
||||
implicit none
|
||||
! alm_info flags
|
||||
integer, parameter :: SHARP_PACKED = 1
|
||||
|
||||
! sharp job types
|
||||
enum, bind(c)
|
||||
enumerator :: SHARP_YtW = 0
|
||||
enumerator :: SHARP_Y = 1
|
||||
enumerator :: SHARP_Yt = 2
|
||||
enumerator :: SHARP_WY = 3
|
||||
enumerator :: SHARP_ALM2MAP_DERIV1 = 4
|
||||
end enum
|
||||
|
||||
! sharp job flags
|
||||
integer, parameter :: SHARP_DP = ISHFT(1, 4)
|
||||
integer, parameter :: SHARP_ADD = ISHFT(1, 5)
|
||||
integer, parameter :: SHARP_REAL_HARMONICS = ISHFT(1, 6)
|
||||
integer, parameter :: SHARP_NO_FFT = ISHFT(1, 7)
|
||||
|
||||
type sharp_geom_info
|
||||
type(c_ptr) :: handle
|
||||
integer(c_intptr_t) :: n_local
|
||||
end type sharp_geom_info
|
||||
|
||||
type sharp_alm_info
|
||||
type(c_ptr) :: handle
|
||||
integer(c_intptr_t) :: n_local
|
||||
end type sharp_alm_info
|
||||
|
||||
interface
|
||||
|
||||
! alm_info
|
||||
subroutine sharp_make_general_alm_info( &
|
||||
lmax, nm, stride, mval, mvstart, flags, alm_info) bind(c)
|
||||
use iso_c_binding
|
||||
integer(c_int), value, intent(in) :: lmax, nm, stride, flags
|
||||
integer(c_int), intent(in) :: mval(nm)
|
||||
integer(c_intptr_t), intent(in) :: mvstart(nm)
|
||||
type(c_ptr), intent(out) :: alm_info
|
||||
end subroutine sharp_make_general_alm_info
|
||||
|
||||
subroutine c_sharp_make_mmajor_real_packed_alm_info( &
|
||||
lmax, stride, nm, ms, alm_info) bind(c, name='sharp_make_mmajor_real_packed_alm_info')
|
||||
use iso_c_binding
|
||||
integer(c_int), value, intent(in) :: lmax, nm, stride
|
||||
integer(c_int), intent(in), optional :: ms(nm)
|
||||
type(c_ptr), intent(out) :: alm_info
|
||||
end subroutine c_sharp_make_mmajor_real_packed_alm_info
|
||||
|
||||
function c_sharp_alm_count(alm_info) bind(c, name='sharp_alm_count')
|
||||
use iso_c_binding
|
||||
integer(c_intptr_t) :: c_sharp_alm_count
|
||||
type(c_ptr), value, intent(in) :: alm_info
|
||||
end function c_sharp_alm_count
|
||||
|
||||
subroutine c_sharp_destroy_alm_info(alm_info) bind(c, name='sharp_destroy_alm_info')
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: alm_info
|
||||
end subroutine c_sharp_destroy_alm_info
|
||||
|
||||
! geom_info
|
||||
subroutine sharp_make_subset_healpix_geom_info ( &
|
||||
nside, stride, nrings, rings, weight, geom_info) bind(c)
|
||||
use iso_c_binding
|
||||
integer(c_int), value, intent(in) :: nside, stride, nrings
|
||||
integer(c_int), intent(in), optional :: rings(nrings)
|
||||
real(c_double), intent(in), optional :: weight(2 * nside)
|
||||
type(c_ptr), intent(out) :: geom_info
|
||||
end subroutine sharp_make_subset_healpix_geom_info
|
||||
|
||||
subroutine c_sharp_destroy_geom_info(geom_info) bind(c, name='sharp_destroy_geom_info')
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: geom_info
|
||||
end subroutine c_sharp_destroy_geom_info
|
||||
|
||||
function c_sharp_map_size(info) bind(c, name='sharp_map_size')
|
||||
use iso_c_binding
|
||||
integer(c_intptr_t) :: c_sharp_map_size
|
||||
type(c_ptr), value :: info
|
||||
end function c_sharp_map_size
|
||||
|
||||
|
||||
! execute
|
||||
subroutine c_sharp_execute(type, spin, alm, map, geom_info, alm_info, ntrans, &
|
||||
flags, time, opcnt) bind(c, name='sharp_execute')
|
||||
use iso_c_binding
|
||||
integer(c_int), value :: type, spin, ntrans, flags
|
||||
type(c_ptr), value :: alm_info, geom_info
|
||||
real(c_double), intent(out), optional :: time
|
||||
integer(c_long_long), intent(out), optional :: opcnt
|
||||
type(c_ptr), intent(in) :: alm(*), map(*)
|
||||
end subroutine c_sharp_execute
|
||||
|
||||
subroutine c_sharp_execute_mpi(comm, type, spin, alm, map, geom_info, alm_info, ntrans, &
|
||||
flags, time, opcnt) bind(c, name='sharp_execute_mpi_fortran')
|
||||
use iso_c_binding
|
||||
integer(c_int), value :: comm, type, spin, ntrans, flags
|
||||
type(c_ptr), value :: alm_info, geom_info
|
||||
real(c_double), intent(out), optional :: time
|
||||
integer(c_long_long), intent(out), optional :: opcnt
|
||||
type(c_ptr), intent(in) :: alm(*), map(*)
|
||||
end subroutine c_sharp_execute_mpi
|
||||
|
||||
! Legendre transforms
|
||||
subroutine c_sharp_legendre_transform(bl, recfac, lmax, x, out, nx) &
|
||||
bind(c, name='sharp_legendre_transform')
|
||||
use iso_c_binding
|
||||
integer(c_intptr_t), value :: lmax, nx
|
||||
real(c_double) :: bl(lmax + 1), x(nx), out(nx)
|
||||
real(c_double), optional :: recfac(lmax + 1)
|
||||
end subroutine c_sharp_legendre_transform
|
||||
|
||||
subroutine c_sharp_legendre_transform_s(bl, recfac, lmax, x, out, nx) &
|
||||
bind(c, name='sharp_legendre_transform_s')
|
||||
use iso_c_binding
|
||||
integer(c_intptr_t), value :: lmax, nx
|
||||
real(c_float) :: bl(lmax + 1), x(nx), out(nx)
|
||||
real(c_float), optional :: recfac(lmax + 1)
|
||||
end subroutine c_sharp_legendre_transform_s
|
||||
end interface
|
||||
|
||||
interface sharp_execute
|
||||
module procedure sharp_execute_d
|
||||
end interface
|
||||
|
||||
interface sharp_legendre_transform
|
||||
module procedure sharp_legendre_transform_d, sharp_legendre_transform_s
|
||||
end interface sharp_legendre_transform
|
||||
|
||||
contains
|
||||
! alm info
|
||||
|
||||
! if ms is not passed, we default to using m=0..lmax.
|
||||
subroutine sharp_make_mmajor_real_packed_alm_info(lmax, ms, alm_info)
|
||||
use iso_c_binding
|
||||
integer(c_int), value, intent(in) :: lmax
|
||||
integer(c_int), intent(in), optional :: ms(:)
|
||||
type(sharp_alm_info), intent(out) :: alm_info
|
||||
!--
|
||||
integer(c_int), allocatable :: ms_copy(:)
|
||||
integer(c_int) :: nm
|
||||
|
||||
if (present(ms)) then
|
||||
nm = size(ms)
|
||||
allocate(ms_copy(nm))
|
||||
ms_copy = ms
|
||||
call c_sharp_make_mmajor_real_packed_alm_info(lmax, 1, nm, ms_copy, alm_info=alm_info%handle)
|
||||
deallocate(ms_copy)
|
||||
else
|
||||
call c_sharp_make_mmajor_real_packed_alm_info(lmax, 1, lmax + 1, alm_info=alm_info%handle)
|
||||
end if
|
||||
alm_info%n_local = c_sharp_alm_count(alm_info%handle)
|
||||
end subroutine sharp_make_mmajor_real_packed_alm_info
|
||||
|
||||
subroutine sharp_destroy_alm_info(alm_info)
|
||||
use iso_c_binding
|
||||
type(sharp_alm_info), intent(inout) :: alm_info
|
||||
call c_sharp_destroy_alm_info(alm_info%handle)
|
||||
alm_info%handle = c_null_ptr
|
||||
end subroutine sharp_destroy_alm_info
|
||||
|
||||
|
||||
! geom info
|
||||
subroutine sharp_make_healpix_geom_info(nside, rings, weight, geom_info)
|
||||
integer(c_int), value :: nside
|
||||
integer(c_int), optional :: rings(:)
|
||||
real(c_double), intent(in), optional :: weight(2 * nside)
|
||||
type(sharp_geom_info), intent(out) :: geom_info
|
||||
!--
|
||||
integer(c_int) :: nrings
|
||||
integer(c_int), allocatable :: rings_copy(:)
|
||||
|
||||
if (present(rings)) then
|
||||
nrings = size(rings)
|
||||
allocate(rings_copy(nrings))
|
||||
rings_copy = rings
|
||||
call sharp_make_subset_healpix_geom_info(nside, 1, nrings, rings_copy, &
|
||||
weight, geom_info%handle)
|
||||
deallocate(rings_copy)
|
||||
else
|
||||
call sharp_make_subset_healpix_geom_info(nside, 1, nrings=4 * nside - 1, &
|
||||
weight=weight, geom_info=geom_info%handle)
|
||||
end if
|
||||
geom_info%n_local = c_sharp_map_size(geom_info%handle)
|
||||
end subroutine sharp_make_healpix_geom_info
|
||||
|
||||
subroutine sharp_destroy_geom_info(geom_info)
|
||||
use iso_c_binding
|
||||
type(sharp_geom_info), intent(inout) :: geom_info
|
||||
call c_sharp_destroy_geom_info(geom_info%handle)
|
||||
geom_info%handle = c_null_ptr
|
||||
end subroutine sharp_destroy_geom_info
|
||||
|
||||
|
||||
! Currently the only mode supported is stacked (not interleaved) maps.
|
||||
!
|
||||
! Note that passing the exact dimension of alm/map is necesarry, it
|
||||
! prevents the caller from doing too crazy slicing prior to pass array
|
||||
! in...
|
||||
!
|
||||
! Usage:
|
||||
!
|
||||
! The alm array must have shape exactly alm(alm_info%n_local, nmaps)
|
||||
! The maps array must have shape exactly map(map_info%n_local, nmaps).
|
||||
subroutine sharp_execute_d(type, spin, nmaps, alm, alm_info, map, geom_info, &
|
||||
add, time, opcnt, comm)
|
||||
use iso_c_binding
|
||||
use mpi
|
||||
implicit none
|
||||
integer(c_int), value :: type, spin, nmaps
|
||||
integer(c_int), optional :: comm
|
||||
logical, value, optional :: add ! should add instead of replace out
|
||||
|
||||
type(sharp_alm_info) :: alm_info
|
||||
type(sharp_geom_info) :: geom_info
|
||||
real(c_double), intent(out), optional :: time
|
||||
integer(c_long_long), intent(out), optional :: opcnt
|
||||
real(c_double), target, intent(inout) :: alm(0:alm_info%n_local - 1, 1:nmaps)
|
||||
real(c_double), target, intent(inout) :: map(0:geom_info%n_local - 1, 1:nmaps)
|
||||
!--
|
||||
integer(c_int) :: mod_flags, ntrans, k
|
||||
type(c_ptr), target :: alm_ptr(nmaps)
|
||||
type(c_ptr), target :: map_ptr(nmaps)
|
||||
|
||||
mod_flags = SHARP_DP
|
||||
if (present(add) .and. add) then
|
||||
mod_flags = or(mod_flags, SHARP_ADD)
|
||||
end if
|
||||
|
||||
if (spin == 0) then
|
||||
ntrans = nmaps
|
||||
else
|
||||
ntrans = nmaps / 2
|
||||
end if
|
||||
|
||||
! Set up pointer table to access maps
|
||||
alm_ptr(:) = c_null_ptr
|
||||
map_ptr(:) = c_null_ptr
|
||||
do k = 1, nmaps
|
||||
if (alm_info%n_local > 0) alm_ptr(k) = c_loc(alm(0, k))
|
||||
if (geom_info%n_local > 0) map_ptr(k) = c_loc(map(0, k))
|
||||
end do
|
||||
|
||||
if (present(comm)) then
|
||||
call c_sharp_execute_mpi(comm, type, spin, alm_ptr, map_ptr, &
|
||||
geom_info=geom_info%handle, &
|
||||
alm_info=alm_info%handle, &
|
||||
ntrans=ntrans, &
|
||||
flags=mod_flags, &
|
||||
time=time, &
|
||||
opcnt=opcnt)
|
||||
else
|
||||
call c_sharp_execute(type, spin, alm_ptr, map_ptr, &
|
||||
geom_info=geom_info%handle, &
|
||||
alm_info=alm_info%handle, &
|
||||
ntrans=ntrans, &
|
||||
flags=mod_flags, &
|
||||
time=time, &
|
||||
opcnt=opcnt)
|
||||
end if
|
||||
end subroutine sharp_execute_d
|
||||
|
||||
subroutine sharp_legendre_transform_d(bl, x, out)
|
||||
use iso_c_binding
|
||||
real(c_double) :: bl(:)
|
||||
real(c_double) :: x(:), out(size(x))
|
||||
!--
|
||||
integer(c_intptr_t) :: lmax, nx
|
||||
call c_sharp_legendre_transform(bl, lmax=int(size(bl) - 1, c_intptr_t), &
|
||||
x=x, out=out, nx=int(size(x), c_intptr_t))
|
||||
end subroutine sharp_legendre_transform_d
|
||||
|
||||
subroutine sharp_legendre_transform_s(bl, x, out)
|
||||
use iso_c_binding
|
||||
real(c_float) :: bl(:)
|
||||
real(c_float) :: x(:), out(size(x))
|
||||
!--
|
||||
integer(c_intptr_t) :: lmax, nx
|
||||
call c_sharp_legendre_transform_s(bl, lmax=int(size(bl) - 1, c_intptr_t), &
|
||||
x=x, out=out, nx=int(size(x), c_intptr_t))
|
||||
end subroutine sharp_legendre_transform_s
|
||||
|
||||
|
||||
end module
|
|
@ -1,84 +0,0 @@
|
|||
program test_sharp
|
||||
use mpi
|
||||
use sharp
|
||||
use iso_c_binding, only : c_ptr, c_double
|
||||
implicit none
|
||||
|
||||
integer, parameter :: lmax = 2, nside = 2
|
||||
type(sharp_alm_info) :: alm_info
|
||||
type(sharp_geom_info) :: geom_info
|
||||
|
||||
real(c_double), dimension(0:(lmax + 1)**2 - 1, 1:1) :: alm
|
||||
real(c_double), dimension(0:12*nside**2 - 1, 1:1) :: map
|
||||
|
||||
integer(c_int), dimension(1:lmax + 1) :: ms
|
||||
integer(c_int), dimension(1:4 * nside - 1) :: rings
|
||||
integer(c_int) :: nm, m, nrings, iring
|
||||
integer :: nodecount, rank, ierr
|
||||
|
||||
call MPI_Init(ierr)
|
||||
call MPI_Comm_size(MPI_COMM_WORLD, nodecount, ierr)
|
||||
call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
|
||||
|
||||
nm = 0
|
||||
do m = rank, lmax, nodecount
|
||||
nm = nm + 1
|
||||
ms(nm) = m
|
||||
end do
|
||||
|
||||
nrings = 0
|
||||
do iring = rank + 1, 4 * nside - 1, nodecount
|
||||
nrings = nrings + 1
|
||||
rings(nrings) = iring
|
||||
end do
|
||||
|
||||
alm = 0
|
||||
map = 0
|
||||
if (rank == 0) then
|
||||
alm(0, 1) = 1
|
||||
end if
|
||||
|
||||
print *, ms(1:nm)
|
||||
call sharp_make_mmajor_real_packed_alm_info(lmax, ms=ms(1:nm), alm_info=alm_info)
|
||||
print *, 'alm_info%n_local', alm_info%n_local
|
||||
call sharp_make_healpix_geom_info(nside, rings=rings(1:nrings), geom_info=geom_info)
|
||||
print *, 'geom_info%n_local', geom_info%n_local
|
||||
print *, 'execute'
|
||||
call sharp_execute(SHARP_Y, 0, 1, alm, alm_info, map, geom_info, comm=MPI_COMM_WORLD)
|
||||
|
||||
print *, alm
|
||||
print *, map
|
||||
|
||||
call sharp_destroy_alm_info(alm_info)
|
||||
call sharp_destroy_geom_info(geom_info)
|
||||
print *, 'DONE'
|
||||
call MPI_Finalize(ierr)
|
||||
|
||||
print *, 'LEGENDRE TRANSFORMS'
|
||||
|
||||
call test_legendre_transforms()
|
||||
|
||||
contains
|
||||
subroutine test_legendre_transforms()
|
||||
integer, parameter :: lmax = 20, nx=10
|
||||
real(c_double) :: bl(0:lmax)
|
||||
real(c_double) :: x(nx), out(nx)
|
||||
real(c_float) :: out_s(nx)
|
||||
!--
|
||||
integer :: l, i
|
||||
|
||||
do l = 0, lmax
|
||||
bl(l) = 1.0 / real(l + 1, c_double)
|
||||
end do
|
||||
do i = 1, nx
|
||||
x(i) = 1 / real(i, c_double)
|
||||
end do
|
||||
out = 0
|
||||
call sharp_legendre_transform(bl, x, out)
|
||||
print *, out
|
||||
call sharp_legendre_transform(real(bl, c_float), real(x, c_float), out_s)
|
||||
print *, out_s
|
||||
end subroutine test_legendre_transforms
|
||||
|
||||
|
||||
end program test_sharp
|
|
@ -1,34 +0,0 @@
|
|||
ls_fft description:
|
||||
|
||||
This package is intended to calculate one-dimensional real or complex FFTs
|
||||
with high accuracy and good efficiency even for lengths containing large
|
||||
prime factors.
|
||||
The code is written in C, but a Fortran wrapper exists as well.
|
||||
|
||||
Before any FFT is executed, a plan must be generated for it. Plan creation
|
||||
is designed to be fast, so that there is no significant overhead if the
|
||||
plan is only used once or a few times.
|
||||
|
||||
The main component of the code is based on Paul N. Swarztrauber's FFTPACK in the
|
||||
double precision incarnation by Hugh C. Pumphrey
|
||||
(http://www.netlib.org/fftpack/dp.tgz).
|
||||
|
||||
I replaced the iterative sine and cosine calculations in radfg() and radbg()
|
||||
by an exact calculation, which slightly improves the transform accuracy for
|
||||
real FFTs with lengths containing large prime factors.
|
||||
|
||||
Since FFTPACK becomes quite slow for FFT lengths with large prime factors
|
||||
(in the worst case of prime lengths it reaches O(n*n) complexity), I
|
||||
implemented Bluestein's algorithm, which computes a FFT of length n by
|
||||
several FFTs of length n2>=2*n-1 and a convolution. Since n2 can be chosen
|
||||
to be highly composite, this algorithm is more efficient if n has large
|
||||
prime factors. The longer FFTs themselves are then computed using the FFTPACK
|
||||
routines.
|
||||
Bluestein's algorithm was implemented according to the description at
|
||||
http://en.wikipedia.org/wiki/Bluestein's_FFT_algorithm.
|
||||
|
||||
Thread-safety:
|
||||
All routines can be called concurrently; all information needed by ls_fft
|
||||
is stored in the plan variable. However, using the same plan variable on
|
||||
multiple threads simultaneously is not supported and will lead to data
|
||||
corruption.
|
|
@ -1,173 +0,0 @@
|
|||
/*
|
||||
* This file is part of libfftpack.
|
||||
*
|
||||
* libfftpack is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libfftpack is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libfftpack; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2005, 2006, 2007, 2008 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include "fftpack.h"
|
||||
#include "bluestein.h"
|
||||
|
||||
/* returns the sum of all prime factors of n */
|
||||
size_t prime_factor_sum (size_t n)
|
||||
{
|
||||
size_t result=0,x,limit,tmp;
|
||||
while (((tmp=(n>>1))<<1)==n)
|
||||
{ result+=2; n=tmp; }
|
||||
|
||||
limit=(size_t)sqrt(n+0.01);
|
||||
for (x=3; x<=limit; x+=2)
|
||||
while ((tmp=(n/x))*x==n)
|
||||
{
|
||||
result+=x;
|
||||
n=tmp;
|
||||
limit=(size_t)sqrt(n+0.01);
|
||||
}
|
||||
if (n>1) result+=n;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* returns the smallest composite of 2, 3 and 5 which is >= n */
|
||||
static size_t good_size(size_t n)
|
||||
{
|
||||
size_t f2, f23, f235, bestfac=2*n;
|
||||
if (n<=6) return n;
|
||||
|
||||
for (f2=1; f2<bestfac; f2*=2)
|
||||
for (f23=f2; f23<bestfac; f23*=3)
|
||||
for (f235=f23; f235<bestfac; f235*=5)
|
||||
if (f235>=n) bestfac=f235;
|
||||
return bestfac;
|
||||
}
|
||||
|
||||
void bluestein_i (size_t n, double **tstorage, size_t *worksize)
|
||||
{
|
||||
static const double pi=3.14159265358979323846;
|
||||
size_t n2=good_size(n*2-1);
|
||||
size_t m, coeff;
|
||||
double angle, xn2;
|
||||
double *bk, *bkf, *work;
|
||||
double pibyn=pi/n;
|
||||
*worksize=2+2*n+8*n2+16;
|
||||
*tstorage = RALLOC(double,2+2*n+8*n2+16);
|
||||
((size_t *)(*tstorage))[0]=n2;
|
||||
bk = *tstorage+2;
|
||||
bkf = *tstorage+2+2*n;
|
||||
work= *tstorage+2+2*(n+n2);
|
||||
|
||||
/* initialize b_k */
|
||||
bk[0] = 1;
|
||||
bk[1] = 0;
|
||||
|
||||
coeff=0;
|
||||
for (m=1; m<n; ++m)
|
||||
{
|
||||
coeff+=2*m-1;
|
||||
if (coeff>=2*n) coeff-=2*n;
|
||||
angle = pibyn*coeff;
|
||||
bk[2*m] = cos(angle);
|
||||
bk[2*m+1] = sin(angle);
|
||||
}
|
||||
|
||||
/* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */
|
||||
xn2 = 1./n2;
|
||||
bkf[0] = bk[0]*xn2;
|
||||
bkf[1] = bk[1]*xn2;
|
||||
for (m=2; m<2*n; m+=2)
|
||||
{
|
||||
bkf[m] = bkf[2*n2-m] = bk[m] *xn2;
|
||||
bkf[m+1] = bkf[2*n2-m+1] = bk[m+1] *xn2;
|
||||
}
|
||||
for (m=2*n;m<=(2*n2-2*n+1);++m)
|
||||
bkf[m]=0.;
|
||||
cffti (n2,work);
|
||||
cfftf (n2,bkf,work);
|
||||
}
|
||||
|
||||
void bluestein (size_t n, double *data, double *tstorage, int isign)
|
||||
{
|
||||
size_t n2=*((size_t *)tstorage);
|
||||
size_t m;
|
||||
double *bk, *bkf, *akf, *work;
|
||||
bk = tstorage+2;
|
||||
bkf = tstorage+2+2*n;
|
||||
work= tstorage+2+2*(n+n2);
|
||||
akf = tstorage+2+2*n+6*n2+16;
|
||||
|
||||
/* initialize a_k and FFT it */
|
||||
if (isign>0)
|
||||
for (m=0; m<2*n; m+=2)
|
||||
{
|
||||
akf[m] = data[m]*bk[m] - data[m+1]*bk[m+1];
|
||||
akf[m+1] = data[m]*bk[m+1] + data[m+1]*bk[m];
|
||||
}
|
||||
else
|
||||
for (m=0; m<2*n; m+=2)
|
||||
{
|
||||
akf[m] = data[m]*bk[m] + data[m+1]*bk[m+1];
|
||||
akf[m+1] =-data[m]*bk[m+1] + data[m+1]*bk[m];
|
||||
}
|
||||
for (m=2*n; m<2*n2; ++m)
|
||||
akf[m]=0;
|
||||
|
||||
cfftf (n2,akf,work);
|
||||
|
||||
/* do the convolution */
|
||||
if (isign>0)
|
||||
for (m=0; m<2*n2; m+=2)
|
||||
{
|
||||
double im = -akf[m]*bkf[m+1] + akf[m+1]*bkf[m];
|
||||
akf[m ] = akf[m]*bkf[m] + akf[m+1]*bkf[m+1];
|
||||
akf[m+1] = im;
|
||||
}
|
||||
else
|
||||
for (m=0; m<2*n2; m+=2)
|
||||
{
|
||||
double im = akf[m]*bkf[m+1] + akf[m+1]*bkf[m];
|
||||
akf[m ] = akf[m]*bkf[m] - akf[m+1]*bkf[m+1];
|
||||
akf[m+1] = im;
|
||||
}
|
||||
|
||||
|
||||
/* inverse FFT */
|
||||
cfftb (n2,akf,work);
|
||||
|
||||
/* multiply by b_k* */
|
||||
if (isign>0)
|
||||
for (m=0; m<2*n; m+=2)
|
||||
{
|
||||
data[m] = bk[m] *akf[m] - bk[m+1]*akf[m+1];
|
||||
data[m+1] = bk[m+1]*akf[m] + bk[m] *akf[m+1];
|
||||
}
|
||||
else
|
||||
for (m=0; m<2*n; m+=2)
|
||||
{
|
||||
data[m] = bk[m] *akf[m] + bk[m+1]*akf[m+1];
|
||||
data[m+1] =-bk[m+1]*akf[m] + bk[m] *akf[m+1];
|
||||
}
|
||||
}
|
|
@ -1,48 +0,0 @@
|
|||
/*
|
||||
* This file is part of libfftpack.
|
||||
*
|
||||
* libfftpack is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libfftpack is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libfftpack; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2005 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#ifndef PLANCK_BLUESTEIN_H
|
||||
#define PLANCK_BLUESTEIN_H
|
||||
|
||||
#include "c_utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
size_t prime_factor_sum (size_t n);
|
||||
|
||||
void bluestein_i (size_t n, double **tstorage, size_t *worksize);
|
||||
void bluestein (size_t n, double *data, double *tstorage, int isign);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,833 +0,0 @@
|
|||
/*
|
||||
* This file is part of libfftpack.
|
||||
*
|
||||
* libfftpack is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libfftpack is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libfftpack; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*
|
||||
fftpack.c : A set of FFT routines in C.
|
||||
Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
|
||||
(Version 4, 1985).
|
||||
|
||||
C port by Martin Reinecke (2010)
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "fftpack.h"
|
||||
|
||||
#define WA(x,i) wa[(i)+(x)*ido]
|
||||
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
|
||||
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
|
||||
#define PM(a,b,c,d) { a=c+d; b=c-d; }
|
||||
#define PMC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
|
||||
#define ADDC(a,b,c) { a.r=b.r+c.r; a.i=b.i+c.i; }
|
||||
#define SCALEC(a,b) { a.r*=b; a.i*=b; }
|
||||
#define CONJFLIPC(a) { double tmp_=a.r; a.r=-a.i; a.i=tmp_; }
|
||||
/* (a+ib) = conj(c+id) * (e+if) */
|
||||
#define MULPM(a,b,c,d,e,f) { a=c*e+d*f; b=c*f-d*e; }
|
||||
|
||||
typedef struct {
|
||||
double r,i;
|
||||
} cmplx;
|
||||
|
||||
#define CONCAT(a,b) a ## b
|
||||
|
||||
#define X(arg) CONCAT(passb,arg)
|
||||
#define BACKWARD
|
||||
#include "fftpack_inc.c"
|
||||
#undef BACKWARD
|
||||
#undef X
|
||||
|
||||
#define X(arg) CONCAT(passf,arg)
|
||||
#include "fftpack_inc.c"
|
||||
#undef X
|
||||
|
||||
#undef CC
|
||||
#undef CH
|
||||
#define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))]
|
||||
#define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))]
|
||||
|
||||
static void radf2 (size_t ido, size_t l1, const double *cc, double *ch,
|
||||
const double *wa)
|
||||
{
|
||||
const size_t cdim=2;
|
||||
size_t i, k, ic;
|
||||
double ti2, tr2;
|
||||
|
||||
for (k=0; k<l1; k++)
|
||||
PM (CH(0,0,k),CH(ido-1,1,k),CC(0,k,0),CC(0,k,1))
|
||||
if ((ido&1)==0)
|
||||
for (k=0; k<l1; k++)
|
||||
{
|
||||
CH( 0,1,k) = -CC(ido-1,k,1);
|
||||
CH(ido-1,0,k) = CC(ido-1,k,0);
|
||||
}
|
||||
if (ido<=2) return;
|
||||
for (k=0; k<l1; k++)
|
||||
for (i=2; i<ido; i+=2)
|
||||
{
|
||||
ic=ido-i;
|
||||
MULPM (tr2,ti2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
|
||||
PM (CH(i-1,0,k),CH(ic-1,1,k),CC(i-1,k,0),tr2)
|
||||
PM (CH(i ,0,k),CH(ic ,1,k),ti2,CC(i ,k,0))
|
||||
}
|
||||
}
|
||||
|
||||
static void radf3(size_t ido, size_t l1, const double *cc, double *ch,
|
||||
const double *wa)
|
||||
{
|
||||
const size_t cdim=3;
|
||||
static const double taur=-0.5, taui=0.86602540378443864676;
|
||||
size_t i, k, ic;
|
||||
double ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3;
|
||||
|
||||
for (k=0; k<l1; k++)
|
||||
{
|
||||
cr2=CC(0,k,1)+CC(0,k,2);
|
||||
CH(0,0,k) = CC(0,k,0)+cr2;
|
||||
CH(0,2,k) = taui*(CC(0,k,2)-CC(0,k,1));
|
||||
CH(ido-1,1,k) = CC(0,k,0)+taur*cr2;
|
||||
}
|
||||
if (ido==1) return;
|
||||
for (k=0; k<l1; k++)
|
||||
for (i=2; i<ido; i+=2)
|
||||
{
|
||||
ic=ido-i;
|
||||
MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
|
||||
MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
|
||||
cr2=dr2+dr3;
|
||||
ci2=di2+di3;
|
||||
CH(i-1,0,k) = CC(i-1,k,0)+cr2;
|
||||
CH(i ,0,k) = CC(i ,k,0)+ci2;
|
||||
tr2 = CC(i-1,k,0)+taur*cr2;
|
||||
ti2 = CC(i ,k,0)+taur*ci2;
|
||||
tr3 = taui*(di2-di3);
|
||||
ti3 = taui*(dr3-dr2);
|
||||
PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr3)
|
||||
PM(CH(i ,2,k),CH(ic ,1,k),ti3,ti2)
|
||||
}
|
||||
}
|
||||
|
||||
static void radf4(size_t ido, size_t l1, const double *cc, double *ch,
|
||||
const double *wa)
|
||||
{
|
||||
const size_t cdim=4;
|
||||
static const double hsqt2=0.70710678118654752440;
|
||||
size_t i, k, ic;
|
||||
double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
|
||||
|
||||
for (k=0; k<l1; k++)
|
||||
{
|
||||
PM (tr1,CH(0,2,k),CC(0,k,3),CC(0,k,1))
|
||||
PM (tr2,CH(ido-1,1,k),CC(0,k,0),CC(0,k,2))
|
||||
PM (CH(0,0,k),CH(ido-1,3,k),tr2,tr1)
|
||||
}
|
||||
if ((ido&1)==0)
|
||||
for (k=0; k<l1; k++)
|
||||
{
|
||||
ti1=-hsqt2*(CC(ido-1,k,1)+CC(ido-1,k,3));
|
||||
tr1= hsqt2*(CC(ido-1,k,1)-CC(ido-1,k,3));
|
||||
PM (CH(ido-1,0,k),CH(ido-1,2,k),CC(ido-1,k,0),tr1)
|
||||
PM (CH( 0,3,k),CH( 0,1,k),ti1,CC(ido-1,k,2))
|
||||
}
|
||||
if (ido<=2) return;
|
||||
for (k=0; k<l1; k++)
|
||||
for (i=2; i<ido; i+=2)
|
||||
{
|
||||
ic=ido-i;
|
||||
MULPM(cr2,ci2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
|
||||
MULPM(cr3,ci3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
|
||||
MULPM(cr4,ci4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
|
||||
PM(tr1,tr4,cr4,cr2)
|
||||
PM(ti1,ti4,ci2,ci4)
|
||||
PM(tr2,tr3,CC(i-1,k,0),cr3)
|
||||
PM(ti2,ti3,CC(i ,k,0),ci3)
|
||||
PM(CH(i-1,0,k),CH(ic-1,3,k),tr2,tr1)
|
||||
PM(CH(i ,0,k),CH(ic ,3,k),ti1,ti2)
|
||||
PM(CH(i-1,2,k),CH(ic-1,1,k),tr3,ti4)
|
||||
PM(CH(i ,2,k),CH(ic ,1,k),tr4,ti3)
|
||||
}
|
||||
}
|
||||
|
||||
static void radf5(size_t ido, size_t l1, const double *cc, double *ch,
|
||||
const double *wa)
|
||||
{
|
||||
const size_t cdim=5;
|
||||
static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
|
||||
tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
|
||||
size_t i, k, ic;
|
||||
double ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3,
|
||||
dr4, dr5, cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5;
|
||||
|
||||
for (k=0; k<l1; k++)
|
||||
{
|
||||
PM (cr2,ci5,CC(0,k,4),CC(0,k,1))
|
||||
PM (cr3,ci4,CC(0,k,3),CC(0,k,2))
|
||||
CH(0,0,k)=CC(0,k,0)+cr2+cr3;
|
||||
CH(ido-1,1,k)=CC(0,k,0)+tr11*cr2+tr12*cr3;
|
||||
CH(0,2,k)=ti11*ci5+ti12*ci4;
|
||||
CH(ido-1,3,k)=CC(0,k,0)+tr12*cr2+tr11*cr3;
|
||||
CH(0,4,k)=ti12*ci5-ti11*ci4;
|
||||
}
|
||||
if (ido==1) return;
|
||||
for (k=0; k<l1;++k)
|
||||
for (i=2; i<ido; i+=2)
|
||||
{
|
||||
ic=ido-i;
|
||||
MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
|
||||
MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
|
||||
MULPM (dr4,di4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
|
||||
MULPM (dr5,di5,WA(3,i-2),WA(3,i-1),CC(i-1,k,4),CC(i,k,4))
|
||||
PM(cr2,ci5,dr5,dr2)
|
||||
PM(ci2,cr5,di2,di5)
|
||||
PM(cr3,ci4,dr4,dr3)
|
||||
PM(ci3,cr4,di3,di4)
|
||||
CH(i-1,0,k)=CC(i-1,k,0)+cr2+cr3;
|
||||
CH(i ,0,k)=CC(i ,k,0)+ci2+ci3;
|
||||
tr2=CC(i-1,k,0)+tr11*cr2+tr12*cr3;
|
||||
ti2=CC(i ,k,0)+tr11*ci2+tr12*ci3;
|
||||
tr3=CC(i-1,k,0)+tr12*cr2+tr11*cr3;
|
||||
ti3=CC(i ,k,0)+tr12*ci2+tr11*ci3;
|
||||
MULPM(tr5,tr4,cr5,cr4,ti11,ti12)
|
||||
MULPM(ti5,ti4,ci5,ci4,ti11,ti12)
|
||||
PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr5)
|
||||
PM(CH(i ,2,k),CH(ic ,1,k),ti5,ti2)
|
||||
PM(CH(i-1,4,k),CH(ic-1,3,k),tr3,tr4)
|
||||
PM(CH(i ,4,k),CH(ic ,3,k),ti4,ti3)
|
||||
}
|
||||
}
|
||||
|
||||
#undef CH
|
||||
#undef CC
|
||||
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
|
||||
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
|
||||
#define C1(a,b,c) cc[(a)+ido*((b)+l1*(c))]
|
||||
#define C2(a,b) cc[(a)+idl1*(b)]
|
||||
#define CH2(a,b) ch[(a)+idl1*(b)]
|
||||
static void radfg(size_t ido, size_t ip, size_t l1, size_t idl1,
|
||||
double *cc, double *ch, const double *wa)
|
||||
{
|
||||
const size_t cdim=ip;
|
||||
static const double twopi=6.28318530717958647692;
|
||||
size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
|
||||
double ai1, ai2, ar1, ar2, arg;
|
||||
double *csarr;
|
||||
size_t aidx;
|
||||
|
||||
ipph=(ip+1)/ 2;
|
||||
if(ido!=1)
|
||||
{
|
||||
memcpy(ch,cc,idl1*sizeof(double));
|
||||
|
||||
for(j=1; j<ip; j++)
|
||||
for(k=0; k<l1; k++)
|
||||
{
|
||||
CH(0,k,j)=C1(0,k,j);
|
||||
idij=(j-1)*ido+1;
|
||||
for(i=2; i<ido; i+=2,idij+=2)
|
||||
MULPM(CH(i-1,k,j),CH(i,k,j),wa[idij-1],wa[idij],C1(i-1,k,j),C1(i,k,j))
|
||||
}
|
||||
|
||||
for(j=1,jc=ip-1; j<ipph; j++,jc--)
|
||||
for(k=0; k<l1; k++)
|
||||
for(i=2; i<ido; i+=2)
|
||||
{
|
||||
PM(C1(i-1,k,j),C1(i ,k,jc),CH(i-1,k,jc),CH(i-1,k,j ))
|
||||
PM(C1(i ,k,j),C1(i-1,k,jc),CH(i ,k,j ),CH(i ,k,jc))
|
||||
}
|
||||
}
|
||||
else
|
||||
memcpy(cc,ch,idl1*sizeof(double));
|
||||
|
||||
for(j=1,jc=ip-1; j<ipph; j++,jc--)
|
||||
for(k=0; k<l1; k++)
|
||||
PM(C1(0,k,j),C1(0,k,jc),CH(0,k,jc),CH(0,k,j))
|
||||
|
||||
csarr=RALLOC(double,2*ip);
|
||||
arg=twopi / ip;
|
||||
csarr[0]=1.;
|
||||
csarr[1]=0.;
|
||||
csarr[2]=csarr[2*ip-2]=cos(arg);
|
||||
csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
|
||||
for (i=2; i<=ip/2; ++i)
|
||||
{
|
||||
csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
|
||||
csarr[2*i+1]=sin(i*arg);
|
||||
csarr[2*ip-2*i+1]=-csarr[2*i+1];
|
||||
}
|
||||
for(l=1,lc=ip-1; l<ipph; l++,lc--)
|
||||
{
|
||||
ar1=csarr[2*l];
|
||||
ai1=csarr[2*l+1];
|
||||
for(ik=0; ik<idl1; ik++)
|
||||
{
|
||||
CH2(ik,l)=C2(ik,0)+ar1*C2(ik,1);
|
||||
CH2(ik,lc)=ai1*C2(ik,ip-1);
|
||||
}
|
||||
aidx=2*l;
|
||||
for(j=2,jc=ip-2; j<ipph; j++,jc--)
|
||||
{
|
||||
aidx+=2*l;
|
||||
if (aidx>=2*ip) aidx-=2*ip;
|
||||
ar2=csarr[aidx];
|
||||
ai2=csarr[aidx+1];
|
||||
for(ik=0; ik<idl1; ik++)
|
||||
{
|
||||
CH2(ik,l )+=ar2*C2(ik,j );
|
||||
CH2(ik,lc)+=ai2*C2(ik,jc);
|
||||
}
|
||||
}
|
||||
}
|
||||
DEALLOC(csarr);
|
||||
|
||||
for(j=1; j<ipph; j++)
|
||||
for(ik=0; ik<idl1; ik++)
|
||||
CH2(ik,0)+=C2(ik,j);
|
||||
|
||||
for(k=0; k<l1; k++)
|
||||
memcpy(&CC(0,0,k),&CH(0,k,0),ido*sizeof(double));
|
||||
for(j=1; j<ipph; j++)
|
||||
{
|
||||
jc=ip-j;
|
||||
j2=2*j;
|
||||
for(k=0; k<l1; k++)
|
||||
{
|
||||
CC(ido-1,j2-1,k) = CH(0,k,j );
|
||||
CC(0 ,j2 ,k) = CH(0,k,jc);
|
||||
}
|
||||
}
|
||||
if(ido==1) return;
|
||||
|
||||
for(j=1; j<ipph; j++)
|
||||
{
|
||||
jc=ip-j;
|
||||
j2=2*j;
|
||||
for(k=0; k<l1; k++)
|
||||
for(i=2; i<ido; i+=2)
|
||||
{
|
||||
ic=ido-i;
|
||||
PM (CC(i-1,j2,k),CC(ic-1,j2-1,k),CH(i-1,k,j ),CH(i-1,k,jc))
|
||||
PM (CC(i ,j2,k),CC(ic ,j2-1,k),CH(i ,k,jc),CH(i ,k,j ))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef CC
|
||||
#undef CH
|
||||
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
|
||||
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
|
||||
|
||||
static void radb2(size_t ido, size_t l1, const double *cc, double *ch,
|
||||
const double *wa)
|
||||
{
|
||||
const size_t cdim=2;
|
||||
size_t i, k, ic;
|
||||
double ti2, tr2;
|
||||
|
||||
for (k=0; k<l1; k++)
|
||||
PM (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(ido-1,1,k))
|
||||
if ((ido&1)==0)
|
||||
for (k=0; k<l1; k++)
|
||||
{
|
||||
CH(ido-1,k,0) = 2*CC(ido-1,0,k);
|
||||
CH(ido-1,k,1) = -2*CC(0 ,1,k);
|
||||
}
|
||||
if (ido<=2) return;
|
||||
for (k=0; k<l1;++k)
|
||||
for (i=2; i<ido; i+=2)
|
||||
{
|
||||
ic=ido-i;
|
||||
PM (CH(i-1,k,0),tr2,CC(i-1,0,k),CC(ic-1,1,k))
|
||||
PM (ti2,CH(i ,k,0),CC(i ,0,k),CC(ic ,1,k))
|
||||
MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ti2,tr2)
|
||||
}
|
||||
}
|
||||
|
||||
static void radb3(size_t ido, size_t l1, const double *cc, double *ch,
|
||||
const double *wa)
|
||||
{
|
||||
const size_t cdim=3;
|
||||
static const double taur=-0.5, taui=0.86602540378443864676;
|
||||
size_t i, k, ic;
|
||||
double ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
|
||||
|
||||
for (k=0; k<l1; k++)
|
||||
{
|
||||
tr2=2*CC(ido-1,1,k);
|
||||
cr2=CC(0,0,k)+taur*tr2;
|
||||
CH(0,k,0)=CC(0,0,k)+tr2;
|
||||
ci3=2*taui*CC(0,2,k);
|
||||
PM (CH(0,k,2),CH(0,k,1),cr2,ci3);
|
||||
}
|
||||
if (ido==1) return;
|
||||
for (k=0; k<l1; k++)
|
||||
for (i=2; i<ido; i+=2)
|
||||
{
|
||||
ic=ido-i;
|
||||
tr2=CC(i-1,2,k)+CC(ic-1,1,k);
|
||||
ti2=CC(i ,2,k)-CC(ic ,1,k);
|
||||
cr2=CC(i-1,0,k)+taur*tr2;
|
||||
ci2=CC(i ,0,k)+taur*ti2;
|
||||
CH(i-1,k,0)=CC(i-1,0,k)+tr2;
|
||||
CH(i ,k,0)=CC(i ,0,k)+ti2;
|
||||
cr3=taui*(CC(i-1,2,k)-CC(ic-1,1,k));
|
||||
ci3=taui*(CC(i ,2,k)+CC(ic ,1,k));
|
||||
PM(dr3,dr2,cr2,ci3)
|
||||
PM(di2,di3,ci2,cr3)
|
||||
MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
|
||||
MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
|
||||
}
|
||||
}
|
||||
|
||||
static void radb4(size_t ido, size_t l1, const double *cc, double *ch,
|
||||
const double *wa)
|
||||
{
|
||||
const size_t cdim=4;
|
||||
static const double sqrt2=1.41421356237309504880;
|
||||
size_t i, k, ic;
|
||||
double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
|
||||
|
||||
for (k=0; k<l1; k++)
|
||||
{
|
||||
PM (tr2,tr1,CC(0,0,k),CC(ido-1,3,k))
|
||||
tr3=2*CC(ido-1,1,k);
|
||||
tr4=2*CC(0,2,k);
|
||||
PM (CH(0,k,0),CH(0,k,2),tr2,tr3)
|
||||
PM (CH(0,k,3),CH(0,k,1),tr1,tr4)
|
||||
}
|
||||
if ((ido&1)==0)
|
||||
for (k=0; k<l1; k++)
|
||||
{
|
||||
PM (ti1,ti2,CC(0 ,3,k),CC(0 ,1,k))
|
||||
PM (tr2,tr1,CC(ido-1,0,k),CC(ido-1,2,k))
|
||||
CH(ido-1,k,0)=tr2+tr2;
|
||||
CH(ido-1,k,1)=sqrt2*(tr1-ti1);
|
||||
CH(ido-1,k,2)=ti2+ti2;
|
||||
CH(ido-1,k,3)=-sqrt2*(tr1+ti1);
|
||||
}
|
||||
if (ido<=2) return;
|
||||
for (k=0; k<l1;++k)
|
||||
for (i=2; i<ido; i+=2)
|
||||
{
|
||||
ic=ido-i;
|
||||
PM (tr2,tr1,CC(i-1,0,k),CC(ic-1,3,k))
|
||||
PM (ti1,ti2,CC(i ,0,k),CC(ic ,3,k))
|
||||
PM (tr4,ti3,CC(i ,2,k),CC(ic ,1,k))
|
||||
PM (tr3,ti4,CC(i-1,2,k),CC(ic-1,1,k))
|
||||
PM (CH(i-1,k,0),cr3,tr2,tr3)
|
||||
PM (CH(i ,k,0),ci3,ti2,ti3)
|
||||
PM (cr4,cr2,tr1,tr4)
|
||||
PM (ci2,ci4,ti1,ti4)
|
||||
MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ci2,cr2)
|
||||
MULPM (CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),ci3,cr3)
|
||||
MULPM (CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),ci4,cr4)
|
||||
}
|
||||
}
|
||||
|
||||
static void radb5(size_t ido, size_t l1, const double *cc, double *ch,
|
||||
const double *wa)
|
||||
{
|
||||
const size_t cdim=5;
|
||||
static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
|
||||
tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
|
||||
size_t i, k, ic;
|
||||
double ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4,
|
||||
ti2, ti3, ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
|
||||
|
||||
for (k=0; k<l1; k++)
|
||||
{
|
||||
ti5=2*CC(0,2,k);
|
||||
ti4=2*CC(0,4,k);
|
||||
tr2=2*CC(ido-1,1,k);
|
||||
tr3=2*CC(ido-1,3,k);
|
||||
CH(0,k,0)=CC(0,0,k)+tr2+tr3;
|
||||
cr2=CC(0,0,k)+tr11*tr2+tr12*tr3;
|
||||
cr3=CC(0,0,k)+tr12*tr2+tr11*tr3;
|
||||
MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
|
||||
PM(CH(0,k,4),CH(0,k,1),cr2,ci5)
|
||||
PM(CH(0,k,3),CH(0,k,2),cr3,ci4)
|
||||
}
|
||||
if (ido==1) return;
|
||||
for (k=0; k<l1;++k)
|
||||
for (i=2; i<ido; i+=2)
|
||||
{
|
||||
ic=ido-i;
|
||||
PM(tr2,tr5,CC(i-1,2,k),CC(ic-1,1,k))
|
||||
PM(ti5,ti2,CC(i ,2,k),CC(ic ,1,k))
|
||||
PM(tr3,tr4,CC(i-1,4,k),CC(ic-1,3,k))
|
||||
PM(ti4,ti3,CC(i ,4,k),CC(ic ,3,k))
|
||||
CH(i-1,k,0)=CC(i-1,0,k)+tr2+tr3;
|
||||
CH(i ,k,0)=CC(i ,0,k)+ti2+ti3;
|
||||
cr2=CC(i-1,0,k)+tr11*tr2+tr12*tr3;
|
||||
ci2=CC(i ,0,k)+tr11*ti2+tr12*ti3;
|
||||
cr3=CC(i-1,0,k)+tr12*tr2+tr11*tr3;
|
||||
ci3=CC(i ,0,k)+tr12*ti2+tr11*ti3;
|
||||
MULPM(cr5,cr4,tr5,tr4,ti11,ti12)
|
||||
MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
|
||||
PM(dr4,dr3,cr3,ci4)
|
||||
PM(di3,di4,ci3,cr4)
|
||||
PM(dr5,dr2,cr2,ci5)
|
||||
PM(di2,di5,ci2,cr5)
|
||||
MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
|
||||
MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
|
||||
MULPM(CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),di4,dr4)
|
||||
MULPM(CH(i,k,4),CH(i-1,k,4),WA(3,i-2),WA(3,i-1),di5,dr5)
|
||||
}
|
||||
}
|
||||
|
||||
static void radbg(size_t ido, size_t ip, size_t l1, size_t idl1,
|
||||
double *cc, double *ch, const double *wa)
|
||||
{
|
||||
const size_t cdim=ip;
|
||||
static const double twopi=6.28318530717958647692;
|
||||
size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
|
||||
double ai1, ai2, ar1, ar2, arg;
|
||||
double *csarr;
|
||||
size_t aidx;
|
||||
|
||||
ipph=(ip+1)/ 2;
|
||||
for(k=0; k<l1; k++)
|
||||
memcpy(&CH(0,k,0),&CC(0,0,k),ido*sizeof(double));
|
||||
for(j=1; j<ipph; j++)
|
||||
{
|
||||
jc=ip-j;
|
||||
j2=2*j;
|
||||
for(k=0; k<l1; k++)
|
||||
{
|
||||
CH(0,k,j )=2*CC(ido-1,j2-1,k);
|
||||
CH(0,k,jc)=2*CC(0 ,j2 ,k);
|
||||
}
|
||||
}
|
||||
|
||||
if(ido!=1)
|
||||
for(j=1,jc=ip-1; j<ipph; j++,jc--)
|
||||
for(k=0; k<l1; k++)
|
||||
for(i=2; i<ido; i+=2)
|
||||
{
|
||||
ic=ido-i;
|
||||
PM (CH(i-1,k,j ),CH(i-1,k,jc),CC(i-1,2*j,k),CC(ic-1,2*j-1,k))
|
||||
PM (CH(i ,k,jc),CH(i ,k,j ),CC(i ,2*j,k),CC(ic ,2*j-1,k))
|
||||
}
|
||||
|
||||
csarr=RALLOC(double,2*ip);
|
||||
arg=twopi/ip;
|
||||
csarr[0]=1.;
|
||||
csarr[1]=0.;
|
||||
csarr[2]=csarr[2*ip-2]=cos(arg);
|
||||
csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
|
||||
for (i=2; i<=ip/2; ++i)
|
||||
{
|
||||
csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
|
||||
csarr[2*i+1]=sin(i*arg);
|
||||
csarr[2*ip-2*i+1]=-csarr[2*i+1];
|
||||
}
|
||||
for(l=1; l<ipph; l++)
|
||||
{
|
||||
lc=ip-l;
|
||||
ar1=csarr[2*l];
|
||||
ai1=csarr[2*l+1];
|
||||
for(ik=0; ik<idl1; ik++)
|
||||
{
|
||||
C2(ik,l)=CH2(ik,0)+ar1*CH2(ik,1);
|
||||
C2(ik,lc)=ai1*CH2(ik,ip-1);
|
||||
}
|
||||
aidx=2*l;
|
||||
for(j=2; j<ipph; j++)
|
||||
{
|
||||
jc=ip-j;
|
||||
aidx+=2*l;
|
||||
if (aidx>=2*ip) aidx-=2*ip;
|
||||
ar2=csarr[aidx];
|
||||
ai2=csarr[aidx+1];
|
||||
for(ik=0; ik<idl1; ik++)
|
||||
{
|
||||
C2(ik,l )+=ar2*CH2(ik,j );
|
||||
C2(ik,lc)+=ai2*CH2(ik,jc);
|
||||
}
|
||||
}
|
||||
}
|
||||
DEALLOC(csarr);
|
||||
|
||||
for(j=1; j<ipph; j++)
|
||||
for(ik=0; ik<idl1; ik++)
|
||||
CH2(ik,0)+=CH2(ik,j);
|
||||
|
||||
for(j=1,jc=ip-1; j<ipph; j++,jc--)
|
||||
for(k=0; k<l1; k++)
|
||||
PM (CH(0,k,jc),CH(0,k,j),C1(0,k,j),C1(0,k,jc))
|
||||
|
||||
if(ido==1)
|
||||
return;
|
||||
for(j=1,jc=ip-1; j<ipph; j++,jc--)
|
||||
for(k=0; k<l1; k++)
|
||||
for(i=2; i<ido; i+=2)
|
||||
{
|
||||
PM (CH(i-1,k,jc),CH(i-1,k,j ),C1(i-1,k,j),C1(i ,k,jc))
|
||||
PM (CH(i ,k,j ),CH(i ,k,jc),C1(i ,k,j),C1(i-1,k,jc))
|
||||
}
|
||||
memcpy(cc,ch,idl1*sizeof(double));
|
||||
|
||||
for(j=1; j<ip; j++)
|
||||
for(k=0; k<l1; k++)
|
||||
{
|
||||
C1(0,k,j)=CH(0,k,j);
|
||||
idij=(j-1)*ido+1;
|
||||
for(i=2; i<ido; i+=2,idij+=2)
|
||||
MULPM (C1(i,k,j),C1(i-1,k,j),wa[idij-1],wa[idij],CH(i,k,j),CH(i-1,k,j))
|
||||
}
|
||||
}
|
||||
|
||||
#undef CC
|
||||
#undef CH
|
||||
#undef PM
|
||||
#undef MULPM
|
||||
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
cfftf1, cfftb1, cfftf, cfftb, cffti1, cffti. Complex FFTs.
|
||||
----------------------------------------------------------------------*/
|
||||
|
||||
static void cfft1(size_t n, cmplx c[], cmplx ch[], const cmplx wa[],
|
||||
const size_t ifac[], int isign)
|
||||
{
|
||||
size_t k1, l1=1, nf=ifac[1], iw=0;
|
||||
cmplx *p1=c, *p2=ch;
|
||||
|
||||
for(k1=0; k1<nf; k1++)
|
||||
{
|
||||
size_t ip=ifac[k1+2];
|
||||
size_t l2=ip*l1;
|
||||
size_t ido = n/l2;
|
||||
if(ip==4)
|
||||
(isign>0) ? passb4(ido, l1, p1, p2, wa+iw)
|
||||
: passf4(ido, l1, p1, p2, wa+iw);
|
||||
else if(ip==2)
|
||||
(isign>0) ? passb2(ido, l1, p1, p2, wa+iw)
|
||||
: passf2(ido, l1, p1, p2, wa+iw);
|
||||
else if(ip==3)
|
||||
(isign>0) ? passb3(ido, l1, p1, p2, wa+iw)
|
||||
: passf3(ido, l1, p1, p2, wa+iw);
|
||||
else if(ip==5)
|
||||
(isign>0) ? passb5(ido, l1, p1, p2, wa+iw)
|
||||
: passf5(ido, l1, p1, p2, wa+iw);
|
||||
else if(ip==6)
|
||||
(isign>0) ? passb6(ido, l1, p1, p2, wa+iw)
|
||||
: passf6(ido, l1, p1, p2, wa+iw);
|
||||
else
|
||||
(isign>0) ? passbg(ido, ip, l1, p1, p2, wa+iw)
|
||||
: passfg(ido, ip, l1, p1, p2, wa+iw);
|
||||
SWAP(p1,p2,cmplx *);
|
||||
l1=l2;
|
||||
iw+=(ip-1)*ido;
|
||||
}
|
||||
if (p1!=c)
|
||||
memcpy (c,p1,n*sizeof(cmplx));
|
||||
}
|
||||
|
||||
void cfftf(size_t n, double c[], double wsave[])
|
||||
{
|
||||
if (n!=1)
|
||||
cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
|
||||
(size_t*)(wsave+4*n),-1);
|
||||
}
|
||||
|
||||
void cfftb(size_t n, double c[], double wsave[])
|
||||
{
|
||||
if (n!=1)
|
||||
cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
|
||||
(size_t*)(wsave+4*n),+1);
|
||||
}
|
||||
|
||||
static void factorize (size_t n, const size_t *pf, size_t npf, size_t *ifac)
|
||||
{
|
||||
size_t nl=n, nf=0, ntry=0, j=0, i;
|
||||
|
||||
startloop:
|
||||
j++;
|
||||
ntry = (j<=npf) ? pf[j-1] : ntry+2;
|
||||
do
|
||||
{
|
||||
size_t nq=nl / ntry;
|
||||
size_t nr=nl-ntry*nq;
|
||||
if (nr!=0)
|
||||
goto startloop;
|
||||
nf++;
|
||||
ifac[nf+1]=ntry;
|
||||
nl=nq;
|
||||
if ((ntry==2) && (nf!=1))
|
||||
{
|
||||
for (i=nf+1; i>2; --i)
|
||||
ifac[i]=ifac[i-1];
|
||||
ifac[2]=2;
|
||||
}
|
||||
}
|
||||
while(nl!=1);
|
||||
ifac[0]=n;
|
||||
ifac[1]=nf;
|
||||
}
|
||||
|
||||
static void cffti1(size_t n, double wa[], size_t ifac[])
|
||||
{
|
||||
static const size_t ntryh[5]={4,6,3,2,5};
|
||||
static const double twopi=6.28318530717958647692;
|
||||
size_t j, k, fi;
|
||||
|
||||
double argh=twopi/n;
|
||||
size_t i=0, l1=1;
|
||||
factorize (n,ntryh,5,ifac);
|
||||
for(k=1; k<=ifac[1]; k++)
|
||||
{
|
||||
size_t ip=ifac[k+1];
|
||||
size_t ido=n/(l1*ip);
|
||||
for(j=1; j<ip; j++)
|
||||
{
|
||||
size_t is = i;
|
||||
double argld=j*l1*argh;
|
||||
wa[i ]=1;
|
||||
wa[i+1]=0;
|
||||
for(fi=1; fi<=ido; fi++)
|
||||
{
|
||||
double arg=fi*argld;
|
||||
i+=2;
|
||||
wa[i ]=cos(arg);
|
||||
wa[i+1]=sin(arg);
|
||||
}
|
||||
if(ip>6)
|
||||
{
|
||||
wa[is ]=wa[i ];
|
||||
wa[is+1]=wa[i+1];
|
||||
}
|
||||
}
|
||||
l1*=ip;
|
||||
}
|
||||
}
|
||||
|
||||
void cffti(size_t n, double wsave[])
|
||||
{ if (n!=1) cffti1(n, wsave+2*n,(size_t*)(wsave+4*n)); }
|
||||
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
rfftf1, rfftb1, rfftf, rfftb, rffti1, rffti. Real FFTs.
|
||||
----------------------------------------------------------------------*/
|
||||
|
||||
static void rfftf1(size_t n, double c[], double ch[], const double wa[],
|
||||
const size_t ifac[])
|
||||
{
|
||||
size_t k1, l1=n, nf=ifac[1], iw=n-1;
|
||||
double *p1=ch, *p2=c;
|
||||
|
||||
for(k1=1; k1<=nf;++k1)
|
||||
{
|
||||
size_t ip=ifac[nf-k1+2];
|
||||
size_t ido=n / l1;
|
||||
l1 /= ip;
|
||||
iw-=(ip-1)*ido;
|
||||
SWAP (p1,p2,double *);
|
||||
if(ip==4)
|
||||
radf4(ido, l1, p1, p2, wa+iw);
|
||||
else if(ip==2)
|
||||
radf2(ido, l1, p1, p2, wa+iw);
|
||||
else if(ip==3)
|
||||
radf3(ido, l1, p1, p2, wa+iw);
|
||||
else if(ip==5)
|
||||
radf5(ido, l1, p1, p2, wa+iw);
|
||||
else
|
||||
{
|
||||
if (ido==1)
|
||||
SWAP (p1,p2,double *);
|
||||
radfg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
|
||||
SWAP (p1,p2,double *);
|
||||
}
|
||||
}
|
||||
if (p1==c)
|
||||
memcpy (c,ch,n*sizeof(double));
|
||||
}
|
||||
|
||||
static void rfftb1(size_t n, double c[], double ch[], const double wa[],
|
||||
const size_t ifac[])
|
||||
{
|
||||
size_t k1, l1=1, nf=ifac[1], iw=0;
|
||||
double *p1=c, *p2=ch;
|
||||
|
||||
for(k1=1; k1<=nf; k1++)
|
||||
{
|
||||
size_t ip = ifac[k1+1],
|
||||
ido= n/(ip*l1);
|
||||
if(ip==4)
|
||||
radb4(ido, l1, p1, p2, wa+iw);
|
||||
else if(ip==2)
|
||||
radb2(ido, l1, p1, p2, wa+iw);
|
||||
else if(ip==3)
|
||||
radb3(ido, l1, p1, p2, wa+iw);
|
||||
else if(ip==5)
|
||||
radb5(ido, l1, p1, p2, wa+iw);
|
||||
else
|
||||
{
|
||||
radbg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
|
||||
if (ido!=1)
|
||||
SWAP (p1,p2,double *);
|
||||
}
|
||||
SWAP (p1,p2,double *);
|
||||
l1*=ip;
|
||||
iw+=(ip-1)*ido;
|
||||
}
|
||||
if (p1!=c)
|
||||
memcpy (c,ch,n*sizeof(double));
|
||||
}
|
||||
|
||||
void rfftf(size_t n, double r[], double wsave[])
|
||||
{ if(n!=1) rfftf1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
|
||||
|
||||
void rfftb(size_t n, double r[], double wsave[])
|
||||
{ if(n!=1) rfftb1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
|
||||
|
||||
static void rffti1(size_t n, double wa[], size_t ifac[])
|
||||
{
|
||||
static const size_t ntryh[4]={4,2,3,5};
|
||||
static const double twopi=6.28318530717958647692;
|
||||
size_t i, j, k, fi;
|
||||
|
||||
double argh=twopi/n;
|
||||
size_t is=0, l1=1;
|
||||
factorize (n,ntryh,4,ifac);
|
||||
for (k=1; k<ifac[1]; k++)
|
||||
{
|
||||
size_t ip=ifac[k+1],
|
||||
ido=n/(l1*ip);
|
||||
for (j=1; j<ip; ++j)
|
||||
{
|
||||
double argld=j*l1*argh;
|
||||
for(i=is,fi=1; i<=ido+is-3; i+=2,++fi)
|
||||
{
|
||||
double arg=fi*argld;
|
||||
wa[i ]=cos(arg);
|
||||
wa[i+1]=sin(arg);
|
||||
}
|
||||
is+=ido;
|
||||
}
|
||||
l1*=ip;
|
||||
}
|
||||
}
|
||||
|
||||
void rffti(size_t n, double wsave[])
|
||||
{ if (n!=1) rffti1(n, wsave+n,(size_t*)(wsave+2*n)); }
|
|
@ -1,64 +0,0 @@
|
|||
/*
|
||||
* This file is part of libfftpack.
|
||||
*
|
||||
* libfftpack is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libfftpack is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libfftpack; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*
|
||||
fftpack.h : function declarations for fftpack.c
|
||||
Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
|
||||
(Version 4, 1985).
|
||||
|
||||
Pekka Janhunen 23.2.1995
|
||||
|
||||
(reformatted by joerg arndt)
|
||||
|
||||
reformatted and slightly enhanced by Martin Reinecke (2004)
|
||||
*/
|
||||
|
||||
#ifndef PLANCK_FFTPACK_H
|
||||
#define PLANCK_FFTPACK_H
|
||||
|
||||
#include "c_utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*! forward complex transform */
|
||||
void cfftf(size_t N, double complex_data[], double wrk[]);
|
||||
/*! backward complex transform */
|
||||
void cfftb(size_t N, double complex_data[], double wrk[]);
|
||||
/*! initializer for complex transforms */
|
||||
void cffti(size_t N, double wrk[]);
|
||||
|
||||
/*! forward real transform */
|
||||
void rfftf(size_t N, double data[], double wrk[]);
|
||||
/*! backward real transform */
|
||||
void rfftb(size_t N, double data[], double wrk[]);
|
||||
/*! initializer for real transforms */
|
||||
void rffti(size_t N, double wrk[]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,306 +0,0 @@
|
|||
/*
|
||||
* This file is part of libfftpack.
|
||||
*
|
||||
* libfftpack is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libfftpack is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libfftpack; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*
|
||||
fftpack.c : A set of FFT routines in C.
|
||||
Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
|
||||
(Version 4, 1985).
|
||||
|
||||
C port by Martin Reinecke (2010)
|
||||
*/
|
||||
|
||||
#ifdef BACKWARD
|
||||
#define PSIGN +
|
||||
#define PMSIGNC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
|
||||
/* a = b*c */
|
||||
#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r-b.i*c.i; a.i=b.r*c.i+b.i*c.r; }
|
||||
#else
|
||||
#define PSIGN -
|
||||
#define PMSIGNC(a,b,c,d) { a.r=c.r-d.r; a.i=c.i-d.i; b.r=c.r+d.r; b.i=c.i+d.i; }
|
||||
/* a = conj(b)*c */
|
||||
#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r+b.i*c.i; a.i=b.r*c.i-b.i*c.r; }
|
||||
#endif
|
||||
|
||||
static void X(2) (size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
|
||||
const cmplx *wa)
|
||||
{
|
||||
const size_t cdim=2;
|
||||
size_t k,i;
|
||||
cmplx t;
|
||||
if (ido==1)
|
||||
for (k=0;k<l1;++k)
|
||||
PMC (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(0,1,k))
|
||||
else
|
||||
for (k=0;k<l1;++k)
|
||||
for (i=0;i<ido;++i)
|
||||
{
|
||||
PMC (CH(i,k,0),t,CC(i,0,k),CC(i,1,k))
|
||||
MULPMSIGNC (CH(i,k,1),WA(0,i),t)
|
||||
}
|
||||
}
|
||||
|
||||
static void X(3)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
|
||||
const cmplx *wa)
|
||||
{
|
||||
const size_t cdim=3;
|
||||
static const double taur=-0.5, taui= PSIGN 0.86602540378443864676;
|
||||
size_t i, k;
|
||||
cmplx c2, c3, d2, d3, t2;
|
||||
|
||||
if (ido==1)
|
||||
for (k=0; k<l1; ++k)
|
||||
{
|
||||
PMC (t2,c3,CC(0,1,k),CC(0,2,k))
|
||||
ADDC (CH(0,k,0),t2,CC(0,0,k))
|
||||
SCALEC(t2,taur)
|
||||
ADDC(c2,CC(0,0,k),t2)
|
||||
SCALEC(c3,taui)
|
||||
CONJFLIPC(c3)
|
||||
PMC(CH(0,k,1),CH(0,k,2),c2,c3)
|
||||
}
|
||||
else
|
||||
for (k=0; k<l1; ++k)
|
||||
for (i=0; i<ido; ++i)
|
||||
{
|
||||
PMC (t2,c3,CC(i,1,k),CC(i,2,k))
|
||||
ADDC (CH(i,k,0),t2,CC(i,0,k))
|
||||
SCALEC(t2,taur)
|
||||
ADDC(c2,CC(i,0,k),t2)
|
||||
SCALEC(c3,taui)
|
||||
CONJFLIPC(c3)
|
||||
PMC(d2,d3,c2,c3)
|
||||
MULPMSIGNC(CH(i,k,1),WA(0,i),d2)
|
||||
MULPMSIGNC(CH(i,k,2),WA(1,i),d3)
|
||||
}
|
||||
}
|
||||
|
||||
static void X(4)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
|
||||
const cmplx *wa)
|
||||
{
|
||||
const size_t cdim=4;
|
||||
size_t i, k;
|
||||
cmplx c2, c3, c4, t1, t2, t3, t4;
|
||||
|
||||
if (ido==1)
|
||||
for (k=0; k<l1; ++k)
|
||||
{
|
||||
PMC(t2,t1,CC(0,0,k),CC(0,2,k))
|
||||
PMC(t3,t4,CC(0,1,k),CC(0,3,k))
|
||||
CONJFLIPC(t4)
|
||||
PMC(CH(0,k,0),CH(0,k,2),t2,t3)
|
||||
PMSIGNC (CH(0,k,1),CH(0,k,3),t1,t4)
|
||||
}
|
||||
else
|
||||
for (k=0; k<l1; ++k)
|
||||
for (i=0; i<ido; ++i)
|
||||
{
|
||||
PMC(t2,t1,CC(i,0,k),CC(i,2,k))
|
||||
PMC(t3,t4,CC(i,1,k),CC(i,3,k))
|
||||
CONJFLIPC(t4)
|
||||
PMC(CH(i,k,0),c3,t2,t3)
|
||||
PMSIGNC (c2,c4,t1,t4)
|
||||
MULPMSIGNC (CH(i,k,1),WA(0,i),c2)
|
||||
MULPMSIGNC (CH(i,k,2),WA(1,i),c3)
|
||||
MULPMSIGNC (CH(i,k,3),WA(2,i),c4)
|
||||
}
|
||||
}
|
||||
|
||||
static void X(5)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
|
||||
const cmplx *wa)
|
||||
{
|
||||
const size_t cdim=5;
|
||||
static const double tr11= 0.3090169943749474241,
|
||||
ti11= PSIGN 0.95105651629515357212,
|
||||
tr12=-0.8090169943749474241,
|
||||
ti12= PSIGN 0.58778525229247312917;
|
||||
size_t i, k;
|
||||
cmplx c2, c3, c4, c5, d2, d3, d4, d5, t2, t3, t4, t5;
|
||||
|
||||
if (ido==1)
|
||||
for (k=0; k<l1; ++k)
|
||||
{
|
||||
PMC (t2,t5,CC(0,1,k),CC(0,4,k))
|
||||
PMC (t3,t4,CC(0,2,k),CC(0,3,k))
|
||||
CH(0,k,0).r=CC(0,0,k).r+t2.r+t3.r;
|
||||
CH(0,k,0).i=CC(0,0,k).i+t2.i+t3.i;
|
||||
c2.r=CC(0,0,k).r+tr11*t2.r+tr12*t3.r;
|
||||
c2.i=CC(0,0,k).i+tr11*t2.i+tr12*t3.i;
|
||||
c3.r=CC(0,0,k).r+tr12*t2.r+tr11*t3.r;
|
||||
c3.i=CC(0,0,k).i+tr12*t2.i+tr11*t3.i;
|
||||
c5.r=ti11*t5.r+ti12*t4.r;
|
||||
c5.i=ti11*t5.i+ti12*t4.i;
|
||||
c4.r=ti12*t5.r-ti11*t4.r;
|
||||
c4.i=ti12*t5.i-ti11*t4.i;
|
||||
CONJFLIPC(c5)
|
||||
PMC(CH(0,k,1),CH(0,k,4),c2,c5)
|
||||
CONJFLIPC(c4)
|
||||
PMC(CH(0,k,2),CH(0,k,3),c3,c4)
|
||||
}
|
||||
else
|
||||
for (k=0; k<l1; ++k)
|
||||
for (i=0; i<ido; ++i)
|
||||
{
|
||||
PMC (t2,t5,CC(i,1,k),CC(i,4,k))
|
||||
PMC (t3,t4,CC(i,2,k),CC(i,3,k))
|
||||
CH(i,k,0).r=CC(i,0,k).r+t2.r+t3.r;
|
||||
CH(i,k,0).i=CC(i,0,k).i+t2.i+t3.i;
|
||||
c2.r=CC(i,0,k).r+tr11*t2.r+tr12*t3.r;
|
||||
c2.i=CC(i,0,k).i+tr11*t2.i+tr12*t3.i;
|
||||
c3.r=CC(i,0,k).r+tr12*t2.r+tr11*t3.r;
|
||||
c3.i=CC(i,0,k).i+tr12*t2.i+tr11*t3.i;
|
||||
c5.r=ti11*t5.r+ti12*t4.r;
|
||||
c5.i=ti11*t5.i+ti12*t4.i;
|
||||
c4.r=ti12*t5.r-ti11*t4.r;
|
||||
c4.i=ti12*t5.i-ti11*t4.i;
|
||||
CONJFLIPC(c5)
|
||||
PMC(d2,d5,c2,c5)
|
||||
CONJFLIPC(c4)
|
||||
PMC(d3,d4,c3,c4)
|
||||
MULPMSIGNC (CH(i,k,1),WA(0,i),d2)
|
||||
MULPMSIGNC (CH(i,k,2),WA(1,i),d3)
|
||||
MULPMSIGNC (CH(i,k,3),WA(2,i),d4)
|
||||
MULPMSIGNC (CH(i,k,4),WA(3,i),d5)
|
||||
}
|
||||
}
|
||||
|
||||
static void X(6)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
|
||||
const cmplx *wa)
|
||||
{
|
||||
const size_t cdim=6;
|
||||
static const double taui= PSIGN 0.86602540378443864676;
|
||||
cmplx ta1,ta2,ta3,a0,a1,a2,tb1,tb2,tb3,b0,b1,b2,d1,d2,d3,d4,d5;
|
||||
size_t i, k;
|
||||
|
||||
if (ido==1)
|
||||
for (k=0; k<l1; ++k)
|
||||
{
|
||||
PMC(ta1,ta3,CC(0,2,k),CC(0,4,k))
|
||||
ta2.r = CC(0,0,k).r - .5*ta1.r;
|
||||
ta2.i = CC(0,0,k).i - .5*ta1.i;
|
||||
SCALEC(ta3,taui)
|
||||
ADDC(a0,CC(0,0,k),ta1)
|
||||
CONJFLIPC(ta3)
|
||||
PMC(a1,a2,ta2,ta3)
|
||||
PMC(tb1,tb3,CC(0,5,k),CC(0,1,k))
|
||||
tb2.r = CC(0,3,k).r - .5*tb1.r;
|
||||
tb2.i = CC(0,3,k).i - .5*tb1.i;
|
||||
SCALEC(tb3,taui)
|
||||
ADDC(b0,CC(0,3,k),tb1)
|
||||
CONJFLIPC(tb3)
|
||||
PMC(b1,b2,tb2,tb3)
|
||||
PMC(CH(0,k,0),CH(0,k,3),a0,b0)
|
||||
PMC(CH(0,k,4),CH(0,k,1),a1,b1)
|
||||
PMC(CH(0,k,2),CH(0,k,5),a2,b2)
|
||||
}
|
||||
else
|
||||
for (k=0; k<l1; ++k)
|
||||
for (i=0; i<ido; ++i)
|
||||
{
|
||||
PMC(ta1,ta3,CC(i,2,k),CC(i,4,k))
|
||||
ta2.r = CC(i,0,k).r - .5*ta1.r;
|
||||
ta2.i = CC(i,0,k).i - .5*ta1.i;
|
||||
SCALEC(ta3,taui)
|
||||
ADDC(a0,CC(i,0,k),ta1)
|
||||
CONJFLIPC(ta3)
|
||||
PMC(a1,a2,ta2,ta3)
|
||||
PMC(tb1,tb3,CC(i,5,k),CC(i,1,k))
|
||||
tb2.r = CC(i,3,k).r - .5*tb1.r;
|
||||
tb2.i = CC(i,3,k).i - .5*tb1.i;
|
||||
SCALEC(tb3,taui)
|
||||
ADDC(b0,CC(i,3,k),tb1)
|
||||
CONJFLIPC(tb3)
|
||||
PMC(b1,b2,tb2,tb3)
|
||||
PMC(CH(i,k,0),d3,a0,b0)
|
||||
PMC(d4,d1,a1,b1)
|
||||
PMC(d2,d5,a2,b2)
|
||||
MULPMSIGNC (CH(i,k,1),WA(0,i),d1)
|
||||
MULPMSIGNC (CH(i,k,2),WA(1,i),d2)
|
||||
MULPMSIGNC (CH(i,k,3),WA(2,i),d3)
|
||||
MULPMSIGNC (CH(i,k,4),WA(3,i),d4)
|
||||
MULPMSIGNC (CH(i,k,5),WA(4,i),d5)
|
||||
}
|
||||
}
|
||||
|
||||
static void X(g)(size_t ido, size_t ip, size_t l1, const cmplx *cc, cmplx *ch,
|
||||
const cmplx *wa)
|
||||
{
|
||||
const size_t cdim=ip;
|
||||
cmplx *tarr=RALLOC(cmplx,2*ip);
|
||||
cmplx *ccl=tarr, *wal=tarr+ip;
|
||||
size_t i,j,k,l,jc,lc;
|
||||
size_t ipph = (ip+1)/2;
|
||||
|
||||
for (i=1; i<ip; ++i)
|
||||
wal[i]=wa[ido*(i-1)];
|
||||
for (k=0; k<l1; ++k)
|
||||
for (i=0; i<ido; ++i)
|
||||
{
|
||||
cmplx s=CC(i,0,k);
|
||||
ccl[0] = CC(i,0,k);
|
||||
for(j=1,jc=ip-1; j<ipph; ++j,--jc)
|
||||
{
|
||||
PMC (ccl[j],ccl[jc],CC(i,j,k),CC(i,jc,k))
|
||||
ADDC (s,s,ccl[j])
|
||||
}
|
||||
CH(i,k,0) = s;
|
||||
for (j=1, jc=ip-1; j<=ipph; ++j,--jc)
|
||||
{
|
||||
cmplx abr=ccl[0], abi={0.,0.};
|
||||
size_t iang=0;
|
||||
for (l=1,lc=ip-1; l<ipph; ++l,--lc)
|
||||
{
|
||||
iang+=j;
|
||||
if (iang>ip) iang-=ip;
|
||||
abr.r += ccl[l ].r*wal[iang].r;
|
||||
abr.i += ccl[l ].i*wal[iang].r;
|
||||
abi.r += ccl[lc].r*wal[iang].i;
|
||||
abi.i += ccl[lc].i*wal[iang].i;
|
||||
}
|
||||
#ifndef BACKWARD
|
||||
{ abi.i=-abi.i; abi.r=-abi.r; }
|
||||
#endif
|
||||
CONJFLIPC(abi)
|
||||
PMC(CH(i,k,j),CH(i,k,jc),abr,abi)
|
||||
}
|
||||
}
|
||||
|
||||
DEALLOC(tarr);
|
||||
|
||||
if (ido==1) return;
|
||||
|
||||
for (j=1; j<ip; ++j)
|
||||
for (k=0; k<l1; ++k)
|
||||
{
|
||||
size_t idij=(j-1)*ido+1;
|
||||
for(i=1; i<ido; ++i, ++idij)
|
||||
{
|
||||
cmplx t=CH(i,k,j);
|
||||
MULPMSIGNC (CH(i,k,j),wa[idij],t)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef PSIGN
|
||||
#undef PMSIGNC
|
||||
#undef MULPMSIGNC
|
|
@ -1,5 +0,0 @@
|
|||
/*! \mainpage Libfftpack documentation
|
||||
<ul>
|
||||
<li>\ref fftgroup "Programming interface"
|
||||
</ul>
|
||||
*/
|
|
@ -1,291 +0,0 @@
|
|||
/*
|
||||
* This file is part of libfftpack.
|
||||
*
|
||||
* libfftpack is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libfftpack is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libfftpack; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2005 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "bluestein.h"
|
||||
#include "fftpack.h"
|
||||
#include "ls_fft.h"
|
||||
|
||||
complex_plan make_complex_plan (size_t length)
|
||||
{
|
||||
complex_plan plan = RALLOC(complex_plan_i,1);
|
||||
size_t pfsum = prime_factor_sum(length);
|
||||
double comp1 = (double)(length*pfsum);
|
||||
double comp2 = 2*3*length*log(3.*length);
|
||||
comp2*=3.; /* fudge factor that appears to give good overall performance */
|
||||
plan->length=length;
|
||||
plan->bluestein = (comp2<comp1);
|
||||
if (plan->bluestein)
|
||||
bluestein_i (length,&(plan->work),&(plan->worksize));
|
||||
else
|
||||
{
|
||||
plan->worksize=4*length+15;
|
||||
plan->work=RALLOC(double,4*length+15);
|
||||
cffti(length, plan->work);
|
||||
}
|
||||
return plan;
|
||||
}
|
||||
|
||||
complex_plan copy_complex_plan (complex_plan plan)
|
||||
{
|
||||
if (!plan) return NULL;
|
||||
{
|
||||
complex_plan newplan = RALLOC(complex_plan_i,1);
|
||||
*newplan = *plan;
|
||||
newplan->work=RALLOC(double,newplan->worksize);
|
||||
memcpy(newplan->work,plan->work,sizeof(double)*newplan->worksize);
|
||||
return newplan;
|
||||
}
|
||||
}
|
||||
|
||||
void kill_complex_plan (complex_plan plan)
|
||||
{
|
||||
DEALLOC(plan->work);
|
||||
DEALLOC(plan);
|
||||
}
|
||||
|
||||
void complex_plan_forward (complex_plan plan, double *data)
|
||||
{
|
||||
if (plan->bluestein)
|
||||
bluestein (plan->length, data, plan->work, -1);
|
||||
else
|
||||
cfftf (plan->length, data, plan->work);
|
||||
}
|
||||
|
||||
void complex_plan_backward (complex_plan plan, double *data)
|
||||
{
|
||||
if (plan->bluestein)
|
||||
bluestein (plan->length, data, plan->work, 1);
|
||||
else
|
||||
cfftb (plan->length, data, plan->work);
|
||||
}
|
||||
|
||||
|
||||
real_plan make_real_plan (size_t length)
|
||||
{
|
||||
real_plan plan = RALLOC(real_plan_i,1);
|
||||
size_t pfsum = prime_factor_sum(length);
|
||||
double comp1 = .5*length*pfsum;
|
||||
double comp2 = 2*3*length*log(3.*length);
|
||||
comp2*=3; /* fudge factor that appears to give good overall performance */
|
||||
plan->length=length;
|
||||
plan->bluestein = (comp2<comp1);
|
||||
if (plan->bluestein)
|
||||
bluestein_i (length,&(plan->work),&(plan->worksize));
|
||||
else
|
||||
{
|
||||
plan->worksize=2*length+15;
|
||||
plan->work=RALLOC(double,2*length+15);
|
||||
rffti(length, plan->work);
|
||||
}
|
||||
return plan;
|
||||
}
|
||||
|
||||
real_plan copy_real_plan (real_plan plan)
|
||||
{
|
||||
if (!plan) return NULL;
|
||||
{
|
||||
real_plan newplan = RALLOC(real_plan_i,1);
|
||||
*newplan = *plan;
|
||||
newplan->work=RALLOC(double,newplan->worksize);
|
||||
memcpy(newplan->work,plan->work,sizeof(double)*newplan->worksize);
|
||||
return newplan;
|
||||
}
|
||||
}
|
||||
|
||||
void kill_real_plan (real_plan plan)
|
||||
{
|
||||
DEALLOC(plan->work);
|
||||
DEALLOC(plan);
|
||||
}
|
||||
|
||||
void real_plan_forward_fftpack (real_plan plan, double *data)
|
||||
{
|
||||
if (plan->bluestein)
|
||||
{
|
||||
size_t m;
|
||||
size_t n=plan->length;
|
||||
double *tmp = RALLOC(double,2*n);
|
||||
for (m=0; m<n; ++m)
|
||||
{
|
||||
tmp[2*m] = data[m];
|
||||
tmp[2*m+1] = 0.;
|
||||
}
|
||||
bluestein(n,tmp,plan->work,-1);
|
||||
data[0] = tmp[0];
|
||||
memcpy (data+1, tmp+2, (n-1)*sizeof(double));
|
||||
DEALLOC(tmp);
|
||||
}
|
||||
else
|
||||
rfftf (plan->length, data, plan->work);
|
||||
}
|
||||
|
||||
static void fftpack2halfcomplex (double *data, size_t n)
|
||||
{
|
||||
size_t m;
|
||||
double *tmp = RALLOC(double,n);
|
||||
tmp[0]=data[0];
|
||||
for (m=1; m<(n+1)/2; ++m)
|
||||
{
|
||||
tmp[m]=data[2*m-1];
|
||||
tmp[n-m]=data[2*m];
|
||||
}
|
||||
if (!(n&1))
|
||||
tmp[n/2]=data[n-1];
|
||||
memcpy (data,tmp,n*sizeof(double));
|
||||
DEALLOC(tmp);
|
||||
}
|
||||
|
||||
static void halfcomplex2fftpack (double *data, size_t n)
|
||||
{
|
||||
size_t m;
|
||||
double *tmp = RALLOC(double,n);
|
||||
tmp[0]=data[0];
|
||||
for (m=1; m<(n+1)/2; ++m)
|
||||
{
|
||||
tmp[2*m-1]=data[m];
|
||||
tmp[2*m]=data[n-m];
|
||||
}
|
||||
if (!(n&1))
|
||||
tmp[n-1]=data[n/2];
|
||||
memcpy (data,tmp,n*sizeof(double));
|
||||
DEALLOC(tmp);
|
||||
}
|
||||
|
||||
void real_plan_forward_fftw (real_plan plan, double *data)
|
||||
{
|
||||
real_plan_forward_fftpack (plan, data);
|
||||
fftpack2halfcomplex (data,plan->length);
|
||||
}
|
||||
|
||||
void real_plan_backward_fftpack (real_plan plan, double *data)
|
||||
{
|
||||
if (plan->bluestein)
|
||||
{
|
||||
size_t m;
|
||||
size_t n=plan->length;
|
||||
double *tmp = RALLOC(double,2*n);
|
||||
tmp[0]=data[0];
|
||||
tmp[1]=0.;
|
||||
memcpy (tmp+2,data+1, (n-1)*sizeof(double));
|
||||
if ((n&1)==0) tmp[n+1]=0.;
|
||||
for (m=2; m<n; m+=2)
|
||||
{
|
||||
tmp[2*n-m]=tmp[m];
|
||||
tmp[2*n-m+1]=-tmp[m+1];
|
||||
}
|
||||
bluestein (n, tmp, plan->work, 1);
|
||||
for (m=0; m<n; ++m)
|
||||
data[m] = tmp[2*m];
|
||||
DEALLOC(tmp);
|
||||
}
|
||||
else
|
||||
rfftb (plan->length, data, plan->work);
|
||||
}
|
||||
|
||||
void real_plan_backward_fftw (real_plan plan, double *data)
|
||||
{
|
||||
halfcomplex2fftpack (data,plan->length);
|
||||
real_plan_backward_fftpack (plan, data);
|
||||
}
|
||||
|
||||
void real_plan_forward_c (real_plan plan, double *data)
|
||||
{
|
||||
size_t m;
|
||||
size_t n=plan->length;
|
||||
|
||||
if (plan->bluestein)
|
||||
{
|
||||
for (m=1; m<2*n; m+=2)
|
||||
data[m]=0;
|
||||
bluestein (plan->length, data, plan->work, -1);
|
||||
data[1]=0;
|
||||
for (m=2; m<n; m+=2)
|
||||
{
|
||||
double avg;
|
||||
avg = 0.5*(data[2*n-m]+data[m]);
|
||||
data[2*n-m] = data[m] = avg;
|
||||
avg = 0.5*(data[2*n-m+1]-data[m+1]);
|
||||
data[2*n-m+1] = avg;
|
||||
data[m+1] = -avg;
|
||||
}
|
||||
if ((n&1)==0) data[n+1] = 0.;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* using "m+m" instead of "2*m" to avoid a nasty bug in Intel's compiler */
|
||||
for (m=0; m<n; ++m) data[m+1] = data[m+m];
|
||||
rfftf (n, data+1, plan->work);
|
||||
data[0] = data[1];
|
||||
data[1] = 0;
|
||||
for (m=2; m<n; m+=2)
|
||||
{
|
||||
data[2*n-m] = data[m];
|
||||
data[2*n-m+1] = -data[m+1];
|
||||
}
|
||||
if ((n&1)==0) data[n+1] = 0.;
|
||||
}
|
||||
}
|
||||
|
||||
void real_plan_backward_c (real_plan plan, double *data)
|
||||
{
|
||||
size_t n=plan->length;
|
||||
|
||||
if (plan->bluestein)
|
||||
{
|
||||
size_t m;
|
||||
data[1]=0;
|
||||
for (m=2; m<n; m+=2)
|
||||
{
|
||||
double avg;
|
||||
avg = 0.5*(data[2*n-m]+data[m]);
|
||||
data[2*n-m] = data[m] = avg;
|
||||
avg = 0.5*(data[2*n-m+1]-data[m+1]);
|
||||
data[2*n-m+1] = avg;
|
||||
data[m+1] = -avg;
|
||||
}
|
||||
if ((n&1)==0) data[n+1] = 0.;
|
||||
bluestein (plan->length, data, plan->work, 1);
|
||||
for (m=1; m<2*n; m+=2)
|
||||
data[m]=0;
|
||||
}
|
||||
else
|
||||
{
|
||||
ptrdiff_t m;
|
||||
data[1] = data[0];
|
||||
rfftb (n, data+1, plan->work);
|
||||
for (m=n-1; m>=0; --m)
|
||||
{
|
||||
data[2*m] = data[m+1];
|
||||
data[2*m+1] = 0.;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,161 +0,0 @@
|
|||
/*
|
||||
* This file is part of libfftpack.
|
||||
*
|
||||
* libfftpack is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libfftpack is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libfftpack; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file ls_fft.h
|
||||
* Interface for the LevelS FFT package.
|
||||
*
|
||||
* Copyright (C) 2004 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#ifndef PLANCK_LS_FFT_H
|
||||
#define PLANCK_LS_FFT_H
|
||||
|
||||
#include "c_utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*!\defgroup fftgroup FFT interface
|
||||
This package is intended to calculate one-dimensional real or complex FFTs
|
||||
with high accuracy and good efficiency even for lengths containing large
|
||||
prime factors.
|
||||
The code is written in C, but a Fortran wrapper exists as well.
|
||||
|
||||
Before any FFT is executed, a plan must be generated for it. Plan creation
|
||||
is designed to be fast, so that there is no significant overhead if the
|
||||
plan is only used once or a few times.
|
||||
|
||||
The main component of the code is based on Paul N. Swarztrauber's FFTPACK in the
|
||||
double precision incarnation by Hugh C. Pumphrey
|
||||
(http://www.netlib.org/fftpack/dp.tgz).
|
||||
|
||||
I replaced the iterative sine and cosine calculations in radfg() and radbg()
|
||||
by an exact calculation, which slightly improves the transform accuracy for
|
||||
real FFTs with lengths containing large prime factors.
|
||||
|
||||
Since FFTPACK becomes quite slow for FFT lengths with large prime factors
|
||||
(in the worst case of prime lengths it reaches \f$\mathcal{O}(n^2)\f$
|
||||
complexity), I implemented Bluestein's algorithm, which computes a FFT of length
|
||||
\f$n\f$ by several FFTs of length \f$n_2\ge 2n-1\f$ and a convolution. Since
|
||||
\f$n_2\f$ can be chosen to be highly composite, this algorithm is more efficient
|
||||
if \f$n\f$ has large prime factors. The longer FFTs themselves are then computed
|
||||
using the FFTPACK routines.
|
||||
Bluestein's algorithm was implemented according to the description on Wikipedia
|
||||
(<a href="http://en.wikipedia.org/wiki/Bluestein%27s_FFT_algorithm">
|
||||
http://en.wikipedia.org/wiki/Bluestein%27s_FFT_algorithm</a>).
|
||||
|
||||
\b Thread-safety:
|
||||
All routines can be called concurrently; all information needed by
|
||||
<tt>ls_fft</tt> is stored in the plan variable. However, using the same plan
|
||||
variable on multiple threads simultaneously is not supported and will lead to
|
||||
data corruption.
|
||||
*/
|
||||
/*! \{ */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
double *work;
|
||||
size_t length, worksize;
|
||||
int bluestein;
|
||||
} complex_plan_i;
|
||||
|
||||
/*! The opaque handle type for complex-FFT plans. */
|
||||
typedef complex_plan_i * complex_plan;
|
||||
|
||||
/*! Returns a plan for a complex FFT with \a length elements. */
|
||||
complex_plan make_complex_plan (size_t length);
|
||||
/*! Constructs a copy of \a plan. */
|
||||
complex_plan copy_complex_plan (complex_plan plan);
|
||||
/*! Destroys a plan for a complex FFT. */
|
||||
void kill_complex_plan (complex_plan plan);
|
||||
/*! Computes a complex forward FFT on \a data, using \a plan.
|
||||
\a Data has the form <tt>r0, i0, r1, i1, ...,
|
||||
r[length-1], i[length-1]</tt>. */
|
||||
void complex_plan_forward (complex_plan plan, double *data);
|
||||
/*! Computes a complex backward FFT on \a data, using \a plan.
|
||||
\a Data has the form <tt>r0, i0, r1, i1, ...,
|
||||
r[length-1], i[length-1]</tt>. */
|
||||
void complex_plan_backward (complex_plan plan, double *data);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
double *work;
|
||||
size_t length, worksize;
|
||||
int bluestein;
|
||||
} real_plan_i;
|
||||
|
||||
/*! The opaque handle type for real-FFT plans. */
|
||||
typedef real_plan_i * real_plan;
|
||||
|
||||
/*! Returns a plan for a real FFT with \a length elements. */
|
||||
real_plan make_real_plan (size_t length);
|
||||
/*! Constructs a copy of \a plan. */
|
||||
real_plan copy_real_plan (real_plan plan);
|
||||
/*! Destroys a plan for a real FFT. */
|
||||
void kill_real_plan (real_plan plan);
|
||||
/*! Computes a real forward FFT on \a data, using \a plan
|
||||
and assuming the FFTPACK storage scheme:
|
||||
- on entry, \a data has the form <tt>r0, r1, ..., r[length-1]</tt>;
|
||||
- on exit, it has the form <tt>r0, r1, i1, r2, i2, ...</tt>
|
||||
(a total of \a length values). */
|
||||
void real_plan_forward_fftpack (real_plan plan, double *data);
|
||||
/*! Computes a real backward FFT on \a data, using \a plan
|
||||
and assuming the FFTPACK storage scheme:
|
||||
- on entry, \a data has the form <tt>r0, r1, i1, r2, i2, ...</tt>
|
||||
(a total of \a length values);
|
||||
- on exit, it has the form <tt>r0, r1, ..., r[length-1]</tt>. */
|
||||
void real_plan_backward_fftpack (real_plan plan, double *data);
|
||||
/*! Computes a real forward FFT on \a data, using \a plan
|
||||
and assuming the FFTW halfcomplex storage scheme:
|
||||
- on entry, \a data has the form <tt>r0, r1, ..., r[length-1]</tt>;
|
||||
- on exit, it has the form <tt>r0, r1, r2, ..., i2, i1</tt>. */
|
||||
void real_plan_forward_fftw (real_plan plan, double *data);
|
||||
/*! Computes a real backward FFT on \a data, using \a plan
|
||||
and assuming the FFTW halfcomplex storage scheme:
|
||||
- on entry, \a data has the form <tt>r0, r1, r2, ..., i2, i1</tt>.
|
||||
- on exit, it has the form <tt>r0, r1, ..., r[length-1]</tt>. */
|
||||
void real_plan_backward_fftw (real_plan plan, double *data);
|
||||
/*! Computes a real forward FFT on \a data, using \a plan
|
||||
and assuming a full-complex storage scheme:
|
||||
- on entry, \a data has the form <tt>r0, [ignored], r1, [ignored], ...,
|
||||
r[length-1], [ignored]</tt>;
|
||||
- on exit, it has the form <tt>r0, i0, r1, i1, ...,
|
||||
r[length-1], i[length-1]</tt>. */
|
||||
void real_plan_forward_c (real_plan plan, double *data);
|
||||
/*! Computes a real backward FFT on \a data, using \a plan
|
||||
and assuming a full-complex storage scheme:
|
||||
- on entry, \a data has the form <tt>r0, i0, r1, i1, ...,
|
||||
r[length-1], i[length-1]</tt>;
|
||||
- on exit, it has the form <tt>r0, 0, r1, 0, ..., r[length-1], 0</tt>. */
|
||||
void real_plan_backward_c (real_plan plan, double *data);
|
||||
|
||||
/*! \} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,21 +0,0 @@
|
|||
PKG:=libfftpack
|
||||
|
||||
SD:=$(SRCROOT)/$(PKG)
|
||||
OD:=$(BLDROOT)/$(PKG)
|
||||
|
||||
FULL_INCLUDE+= -I$(SD)
|
||||
|
||||
HDR_$(PKG):=$(SD)/*.h
|
||||
LIB_$(PKG):=$(LIBDIR)/libfftpack.a
|
||||
OBJ:=fftpack.o bluestein.o ls_fft.o
|
||||
OBJ:=$(OBJ:%=$(OD)/%)
|
||||
|
||||
ODEP:=$(HDR_$(PKG)) $(HDR_c_utils)
|
||||
|
||||
$(OD)/fftpack.o: $(SD)/fftpack_inc.c
|
||||
|
||||
$(OBJ): $(ODEP) | $(OD)_mkdir
|
||||
$(LIB_$(PKG)): $(OBJ)
|
||||
|
||||
all_hdr+=$(HDR_$(PKG))
|
||||
all_lib+=$(LIB_$(PKG))
|
|
@ -70,8 +70,8 @@
|
|||
libsharp supports shared-memory parallelisation via OpenMP; this feature will
|
||||
be automatically enabled if the compiler supports it.
|
||||
|
||||
Libsharp will also make use of SSE2 and AVX instructions when compiled for a
|
||||
platform known to support them.
|
||||
Libsharp will also make use of SSE2/AVX/AVX512 instructions when compiled
|
||||
for a platform known to support them.
|
||||
|
||||
Support for MPI-parallel transforms is also available; in this mode,
|
||||
every MPI task must provide a unique subset of the map and a_lm coefficients.
|
||||
|
|
|
@ -1,29 +0,0 @@
|
|||
PKG:=libsharp
|
||||
|
||||
SD:=$(SRCROOT)/$(PKG)
|
||||
OD:=$(BLDROOT)/$(PKG)
|
||||
|
||||
FULL_INCLUDE+= -I$(SD)
|
||||
|
||||
HDR_$(PKG):=$(SD)/*.h
|
||||
LIB_$(PKG):=$(LIBDIR)/libsharp.a
|
||||
BIN:=sharp_testsuite
|
||||
LIBOBJ:=sharp_ylmgen_c.o sharp.o sharp_announce.o sharp_geomhelpers.o sharp_almhelpers.o sharp_core.o sharp_legendre.o sharp_legendre_roots.o sharp_legendre_table.o
|
||||
ALLOBJ:=$(LIBOBJ) sharp_testsuite.o
|
||||
LIBOBJ:=$(LIBOBJ:%=$(OD)/%)
|
||||
ALLOBJ:=$(ALLOBJ:%=$(OD)/%)
|
||||
|
||||
ODEP:=$(HDR_$(PKG)) $(HDR_libfftpack) $(HDR_c_utils)
|
||||
$(OD)/sharp_core.o: $(SD)/sharp_core_inchelper.c $(SD)/sharp_core_inc.c $(SD)/sharp_core_inc2.c
|
||||
$(OD)/sharp.o: $(SD)/sharp_mpi.c
|
||||
BDEP:=$(LIB_$(PKG)) $(LIB_libfftpack) $(LIB_c_utils)
|
||||
|
||||
$(LIB_$(PKG)): $(LIBOBJ)
|
||||
|
||||
$(ALLOBJ): $(ODEP) | $(OD)_mkdir
|
||||
BIN:=$(BIN:%=$(BINDIR)/%)
|
||||
$(BIN): $(BINDIR)/% : $(OD)/%.o $(BDEP)
|
||||
|
||||
all_hdr+=$(HDR_$(PKG))
|
||||
all_lib+=$(LIB_$(PKG))
|
||||
all_cbin+=$(BIN)
|
355
libsharp/sharp.c
355
libsharp/sharp.c
|
@ -25,17 +25,16 @@
|
|||
/*! \file sharp.c
|
||||
* Spherical transform library
|
||||
*
|
||||
* Copyright (C) 2006-2013 Max-Planck-Society
|
||||
* Copyright (C) 2006-2016 Max-Planck-Society
|
||||
* \author Martin Reinecke \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "ls_fft.h"
|
||||
#include <string.h>
|
||||
#include "pocketfft/pocketfft.h"
|
||||
#include "sharp_ylmgen_c.h"
|
||||
#include "sharp_internal.h"
|
||||
#include "c_utils.h"
|
||||
#include "sharp_core.h"
|
||||
#include "sharp_vecutil.h"
|
||||
#include "walltime_c.h"
|
||||
#include "sharp_almhelpers.h"
|
||||
#include "sharp_geomhelpers.h"
|
||||
|
@ -63,7 +62,7 @@ static void get_chunk_info (int ndata, int nmult, int *nchunks, int *chunksize)
|
|||
*nchunks = (ndata+(*chunksize)-1)/(*chunksize);
|
||||
}
|
||||
|
||||
int sharp_get_mlim (int lmax, int spin, double sth, double cth)
|
||||
NOINLINE int sharp_get_mlim (int lmax, int spin, double sth, double cth)
|
||||
{
|
||||
double ofs=lmax*0.01;
|
||||
if (ofs<100.) ofs=100.;
|
||||
|
@ -82,24 +81,25 @@ typedef struct
|
|||
double phi0_;
|
||||
dcmplx *shiftarr;
|
||||
int s_shift;
|
||||
real_plan plan;
|
||||
rfft_plan plan;
|
||||
int length;
|
||||
int norot;
|
||||
} ringhelper;
|
||||
|
||||
static void ringhelper_init (ringhelper *self)
|
||||
{
|
||||
static ringhelper rh_null = { 0, NULL, 0, NULL, 0 };
|
||||
static ringhelper rh_null = { 0, NULL, 0, NULL, 0, 0 };
|
||||
*self = rh_null;
|
||||
}
|
||||
|
||||
static void ringhelper_destroy (ringhelper *self)
|
||||
{
|
||||
if (self->plan) kill_real_plan(self->plan);
|
||||
if (self->plan) destroy_rfft_plan(self->plan);
|
||||
DEALLOC(self->shiftarr);
|
||||
ringhelper_init(self);
|
||||
}
|
||||
|
||||
static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
|
||||
NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
|
||||
{
|
||||
self->norot = (fabs(phi0)<1e-14);
|
||||
if (!(self->norot))
|
||||
|
@ -108,14 +108,18 @@ static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
|
|||
RESIZE (self->shiftarr,dcmplx,mmax+1);
|
||||
self->s_shift = mmax+1;
|
||||
self->phi0_ = phi0;
|
||||
// FIXME: improve this by using sincos2pibyn(nph) etc.
|
||||
for (int m=0; m<=mmax; ++m)
|
||||
self->shiftarr[m] = cos(m*phi0) + _Complex_I*sin(m*phi0);
|
||||
// double *tmp=(double *) self->shiftarr;
|
||||
// sincos_multi (mmax+1, phi0, &tmp[1], &tmp[0], 2);
|
||||
}
|
||||
if (!self->plan) self->plan=make_real_plan(nph);
|
||||
if (nph!=(int)self->plan->length)
|
||||
// if (!self->plan) self->plan=make_rfft_plan(nph);
|
||||
if (nph!=(int)self->length)
|
||||
{
|
||||
kill_real_plan(self->plan);
|
||||
self->plan=make_real_plan(nph);
|
||||
if (self->plan) destroy_rfft_plan(self->plan);
|
||||
self->plan=make_rfft_plan(nph);
|
||||
self->length=nph;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -127,6 +131,7 @@ static int ringinfo_compare (const void *xa, const void *xb)
|
|||
static int ringpair_compare (const void *xa, const void *xb)
|
||||
{
|
||||
const sharp_ringpair *a=xa, *b=xb;
|
||||
// return (a->r1.sth < b->r1.sth) ? -1 : (a->r1.sth > b->r1.sth) ? 1 : 0;
|
||||
if (a->r1.nph==b->r1.nph)
|
||||
return (a->r1.phi0 < b->r1.phi0) ? -1 :
|
||||
((a->r1.phi0 > b->r1.phi0) ? 1 :
|
||||
|
@ -261,6 +266,7 @@ void sharp_destroy_geom_info (sharp_geom_info *geom_info)
|
|||
distribution are permissible. */
|
||||
static int sharp_get_mmax (int *mval, int nm)
|
||||
{
|
||||
//FIXME: if gaps are allowed, we have to search the maximum m in the array
|
||||
int *mcheck=RALLOC(int,nm);
|
||||
SET_ARRAY(mcheck,0,nm,0);
|
||||
for (int i=0; i<nm; ++i)
|
||||
|
@ -274,7 +280,7 @@ static int sharp_get_mmax (int *mval, int nm)
|
|||
return nm-1;
|
||||
}
|
||||
|
||||
static void ringhelper_phase2ring (ringhelper *self,
|
||||
NOINLINE static void ringhelper_phase2ring (ringhelper *self,
|
||||
const sharp_ringinfo *info, double *data, int mmax, const dcmplx *phase,
|
||||
int pstride, int flags)
|
||||
{
|
||||
|
@ -288,13 +294,19 @@ static void ringhelper_phase2ring (ringhelper *self,
|
|||
|
||||
if (nph>=2*mmax+1)
|
||||
{
|
||||
for (int m=0; m<=mmax; ++m)
|
||||
{
|
||||
dcmplx tmp = phase[m*pstride]*wgt;
|
||||
if(!self->norot) tmp*=self->shiftarr[m];
|
||||
data[2*m]=creal(tmp);
|
||||
data[2*m+1]=cimag(tmp);
|
||||
}
|
||||
if (self->norot)
|
||||
for (int m=0; m<=mmax; ++m)
|
||||
{
|
||||
data[2*m]=creal(phase[m*pstride])*wgt;
|
||||
data[2*m+1]=cimag(phase[m*pstride])*wgt;
|
||||
}
|
||||
else
|
||||
for (int m=0; m<=mmax; ++m)
|
||||
{
|
||||
dcmplx tmp = phase[m*pstride]*self->shiftarr[m];
|
||||
data[2*m]=creal(tmp)*wgt;
|
||||
data[2*m+1]=cimag(tmp)*wgt;
|
||||
}
|
||||
for (int m=2*(mmax+1); m<nph+2; ++m)
|
||||
data[m]=0.;
|
||||
}
|
||||
|
@ -323,10 +335,10 @@ static void ringhelper_phase2ring (ringhelper *self,
|
|||
}
|
||||
}
|
||||
data[1]=data[0];
|
||||
real_plan_backward_fftpack (self->plan, &(data[1]));
|
||||
rfft_backward (self->plan, &(data[1]), 1.);
|
||||
}
|
||||
|
||||
static void ringhelper_ring2phase (ringhelper *self,
|
||||
NOINLINE static void ringhelper_ring2phase (ringhelper *self,
|
||||
const sharp_ringinfo *info, double *data, int mmax, dcmplx *phase,
|
||||
int pstride, int flags)
|
||||
{
|
||||
|
@ -342,7 +354,7 @@ static void ringhelper_ring2phase (ringhelper *self,
|
|||
if (flags&SHARP_REAL_HARMONICS)
|
||||
wgt *= sqrt_two;
|
||||
|
||||
real_plan_forward_fftpack (self->plan, &(data[1]));
|
||||
rfft_forward (self->plan, &(data[1]), 1.);
|
||||
data[0]=data[1];
|
||||
data[1]=data[nph+1]=0.;
|
||||
|
||||
|
@ -376,7 +388,7 @@ static void ringhelper_ring2phase (ringhelper *self,
|
|||
phase[m*pstride]=0.;
|
||||
}
|
||||
|
||||
static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
|
||||
NOINLINE static void clear_map (const sharp_geom_info *ginfo, void *map,
|
||||
int flags)
|
||||
{
|
||||
if (flags & SHARP_NO_FFT)
|
||||
|
@ -386,50 +398,55 @@ static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
|
|||
if (flags&SHARP_DP)
|
||||
{
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
|
||||
((dcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
|
||||
=value;
|
||||
((dcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
|
||||
((dcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
|
||||
=value;
|
||||
((dcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
|
||||
((fcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
|
||||
=(float)value;
|
||||
((fcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
|
||||
((fcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
|
||||
=(float)value;
|
||||
((fcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int j=0;j<ginfo->npairs;++j)
|
||||
if (flags&SHARP_DP)
|
||||
{
|
||||
if (flags&SHARP_DP)
|
||||
for (int j=0;j<ginfo->npairs;++j)
|
||||
{
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
|
||||
((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
|
||||
=value;
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
|
||||
((double *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
|
||||
=value;
|
||||
double *dmap=(double *)map;
|
||||
if (ginfo->pair[j].r1.stride==1)
|
||||
memset(&dmap[ginfo->pair[j].r1.ofs],0,
|
||||
ginfo->pair[j].r1.nph*sizeof(double));
|
||||
else
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
|
||||
dmap[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
|
||||
if ((ginfo->pair[j].r2.nph>0)&&(ginfo->pair[j].r2.stride==1))
|
||||
memset(&dmap[ginfo->pair[j].r2.ofs],0,
|
||||
ginfo->pair[j].r2.nph*sizeof(double));
|
||||
else
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
|
||||
dmap[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
|
||||
}
|
||||
else
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int j=0;j<ginfo->npairs;++j)
|
||||
{
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
|
||||
((float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
|
||||
=(float)value;
|
||||
((float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
|
||||
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
|
||||
((float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
|
||||
=(float)value;
|
||||
((float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void clear_alm (const sharp_alm_info *ainfo, void *alm, int flags)
|
||||
NOINLINE static void clear_alm (const sharp_alm_info *ainfo, void *alm,
|
||||
int flags)
|
||||
{
|
||||
#define CLEARLOOP(real_t,body) \
|
||||
{ \
|
||||
|
@ -465,59 +482,67 @@ static void clear_alm (const sharp_alm_info *ainfo, void *alm, int flags)
|
|||
}
|
||||
}
|
||||
|
||||
static void init_output (sharp_job *job)
|
||||
NOINLINE static void init_output (sharp_job *job)
|
||||
{
|
||||
if (job->flags&SHARP_ADD) return;
|
||||
if (job->type == SHARP_MAP2ALM)
|
||||
for (int i=0; i<job->ntrans*job->nalm; ++i)
|
||||
for (int i=0; i<job->nalm; ++i)
|
||||
clear_alm (job->ainfo,job->alm[i],job->flags);
|
||||
else
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
fill_map (job->ginfo,job->map[i],0.,job->flags);
|
||||
for (int i=0; i<job->nmaps; ++i)
|
||||
clear_map (job->ginfo,job->map[i],job->flags);
|
||||
}
|
||||
|
||||
static void alloc_phase (sharp_job *job, int nm, int ntheta)
|
||||
NOINLINE static void alloc_phase (sharp_job *job, int nm, int ntheta)
|
||||
{
|
||||
if (job->type==SHARP_MAP2ALM)
|
||||
{
|
||||
if ((nm&1023)==0) nm+=3; // hack to avoid critical strides
|
||||
job->s_m=2*job->ntrans*job->nmaps;
|
||||
job->s_m=2*job->nmaps;
|
||||
if (((job->s_m*16*nm)&1023)==0) nm+=3; // hack to avoid critical strides
|
||||
job->s_th=job->s_m*nm;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((ntheta&1023)==0) ntheta+=3; // hack to avoid critical strides
|
||||
job->s_th=2*job->ntrans*job->nmaps;
|
||||
job->s_th=2*job->nmaps;
|
||||
if (((job->s_th*16*ntheta)&1023)==0) ntheta+=3; // hack to avoid critical strides
|
||||
job->s_m=job->s_th*ntheta;
|
||||
}
|
||||
job->phase=RALLOC(dcmplx,2*job->ntrans*job->nmaps*nm*ntheta);
|
||||
job->phase=RALLOC(dcmplx,2*job->nmaps*nm*ntheta);
|
||||
}
|
||||
|
||||
static void dealloc_phase (sharp_job *job)
|
||||
{ DEALLOC(job->phase); }
|
||||
|
||||
static void alloc_almtmp (sharp_job *job, int lmax)
|
||||
{ job->almtmp=RALLOC(dcmplx,job->ntrans*job->nalm*(lmax+1)); }
|
||||
{ job->almtmp=RALLOC(dcmplx,job->nalm*(lmax+2)); }
|
||||
|
||||
static void dealloc_almtmp (sharp_job *job)
|
||||
{ DEALLOC(job->almtmp); }
|
||||
|
||||
static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
||||
NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
||||
{
|
||||
|
||||
#define COPY_LOOP(real_t, source_t, expr_of_x) \
|
||||
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) \
|
||||
for (int i=0; i<job->ntrans*job->nalm; ++i) \
|
||||
#define COPY_LOOP(real_t, source_t, expr_of_x) \
|
||||
{ \
|
||||
for (int l=m; l<lmin; ++l) \
|
||||
for (int i=0; i<job->nalm; ++i) \
|
||||
job->almtmp[job->nalm*l+i] = 0; \
|
||||
for (int l=lmin; l<=lmax; ++l) \
|
||||
for (int i=0; i<job->nalm; ++i) \
|
||||
{ \
|
||||
source_t x = *(source_t *)(((real_t *)job->alm[i])+ofs+l*stride); \
|
||||
job->almtmp[job->ntrans*job->nalm*l+i] = expr_of_x; \
|
||||
}
|
||||
source_t x = *(source_t *)(((real_t *)job->alm[i])+ofs+l*stride); \
|
||||
job->almtmp[job->nalm*l+i] = expr_of_x; \
|
||||
} \
|
||||
for (int i=0; i<job->nalm; ++i) \
|
||||
job->almtmp[job->nalm*(lmax+1)+i] = 0; \
|
||||
}
|
||||
|
||||
if (job->type!=SHARP_MAP2ALM)
|
||||
{
|
||||
ptrdiff_t ofs=job->ainfo->mvstart[mi];
|
||||
int stride=job->ainfo->stride;
|
||||
int m=job->ainfo->mval[mi];
|
||||
int lmin=(m<job->spin) ? job->spin : m;
|
||||
/* in the case of SHARP_REAL_HARMONICS, phase2ring scales all the
|
||||
coefficients by sqrt_one_half; here we must compensate to avoid scaling
|
||||
m=0 */
|
||||
|
@ -562,20 +587,20 @@ static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
|||
}
|
||||
}
|
||||
else
|
||||
SET_ARRAY(job->almtmp,job->ntrans*job->nalm*job->ainfo->mval[mi],
|
||||
job->ntrans*job->nalm*(lmax+1),0.);
|
||||
memset (job->almtmp+job->nalm*job->ainfo->mval[mi], 0,
|
||||
job->nalm*(lmax+2-job->ainfo->mval[mi])*sizeof(dcmplx));
|
||||
|
||||
#undef COPY_LOOP
|
||||
}
|
||||
|
||||
static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
||||
NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
||||
{
|
||||
|
||||
#define COPY_LOOP(real_t, target_t, expr_of_x) \
|
||||
for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) \
|
||||
for (int i=0; i<job->ntrans*job->nalm; ++i) \
|
||||
for (int l=lmin; l<=lmax; ++l) \
|
||||
for (int i=0; i<job->nalm; ++i) \
|
||||
{ \
|
||||
dcmplx x = job->almtmp[job->ntrans*job->nalm*l+i]; \
|
||||
dcmplx x = job->almtmp[job->nalm*l+i]; \
|
||||
*(target_t *)(((real_t *)job->alm[i])+ofs+l*stride) += expr_of_x; \
|
||||
}
|
||||
|
||||
|
@ -583,6 +608,7 @@ static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
|||
ptrdiff_t ofs=job->ainfo->mvstart[mi];
|
||||
int stride=job->ainfo->stride;
|
||||
int m=job->ainfo->mval[mi];
|
||||
int lmin=(m<job->spin) ? job->spin : m;
|
||||
/* in the case of SHARP_REAL_HARMONICS, ring2phase scales all the
|
||||
coefficients by sqrt_two; here we must compensate to avoid scaling
|
||||
m=0 */
|
||||
|
@ -629,27 +655,56 @@ static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
|||
#undef COPY_LOOP
|
||||
}
|
||||
|
||||
static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri, double *ringtmp,
|
||||
int rstride)
|
||||
NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
|
||||
const double *ringtmp, int rstride)
|
||||
{
|
||||
double **dmap = (double **)job->map;
|
||||
float **fmap = (float **)job->map;
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
for (int m=0; m<ri->nph; ++m)
|
||||
if (job->flags & SHARP_DP)
|
||||
dmap[i][ri->ofs+m*ri->stride] += ringtmp[i*rstride+m+1];
|
||||
if (job->flags & SHARP_DP)
|
||||
{
|
||||
double **dmap = (double **)job->map;
|
||||
for (int i=0; i<job->nmaps; ++i)
|
||||
{
|
||||
double *restrict p1=&dmap[i][ri->ofs];
|
||||
const double *restrict p2=&ringtmp[i*rstride+1];
|
||||
if (ri->stride==1)
|
||||
{
|
||||
if (job->flags&SHARP_ADD)
|
||||
for (int m=0; m<ri->nph; ++m)
|
||||
p1[m] += p2[m];
|
||||
else
|
||||
memcpy(p1,p2,ri->nph*sizeof(double));
|
||||
}
|
||||
else
|
||||
for (int m=0; m<ri->nph; ++m)
|
||||
p1[m*ri->stride] += p2[m];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
float **fmap = (float **)job->map;
|
||||
for (int i=0; i<job->nmaps; ++i)
|
||||
for (int m=0; m<ri->nph; ++m)
|
||||
fmap[i][ri->ofs+m*ri->stride] += (float)ringtmp[i*rstride+m+1];
|
||||
}
|
||||
}
|
||||
|
||||
static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri, double *ringtmp,
|
||||
int rstride)
|
||||
NOINLINE static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri,
|
||||
double *ringtmp, int rstride)
|
||||
{
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
for (int m=0; m<ri->nph; ++m)
|
||||
ringtmp[i*rstride+m+1] = (job->flags & SHARP_DP) ?
|
||||
((double *)(job->map[i]))[ri->ofs+m*ri->stride] :
|
||||
((float *)(job->map[i]))[ri->ofs+m*ri->stride];
|
||||
if (job->flags & SHARP_DP)
|
||||
for (int i=0; i<job->nmaps; ++i)
|
||||
{
|
||||
double *restrict p1=&ringtmp[i*rstride+1],
|
||||
*restrict p2=&(((double *)(job->map[i]))[ri->ofs]);
|
||||
if (ri->stride==1)
|
||||
memcpy(p1,p2,ri->nph*sizeof(double));
|
||||
else
|
||||
for (int m=0; m<ri->nph; ++m)
|
||||
p1[m] = p2[m*ri->stride];
|
||||
}
|
||||
else
|
||||
for (int i=0; i<job->nmaps; ++i)
|
||||
for (int m=0; m<ri->nph; ++m)
|
||||
ringtmp[i*rstride+m+1] = ((float *)(job->map[i]))[ri->ofs+m*ri->stride];
|
||||
}
|
||||
|
||||
static void ring2phase_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
|
||||
|
@ -657,7 +712,7 @@ static void ring2phase_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
|
|||
{
|
||||
if (ri->nph<0)
|
||||
{
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
for (int i=0; i<job->nmaps; ++i)
|
||||
for (int m=0; m<=mmax; ++m)
|
||||
phase[2*i+job->s_m*m]=0.;
|
||||
}
|
||||
|
@ -667,7 +722,7 @@ static void ring2phase_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
|
|||
double wgt = (job->flags&SHARP_USE_WEIGHTS) ? (ri->nph*ri->weight) : 1.;
|
||||
if (job->flags&SHARP_REAL_HARMONICS)
|
||||
wgt *= sqrt_two;
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
for (int i=0; i<job->nmaps; ++i)
|
||||
for (int m=0; m<=mmax; ++m)
|
||||
phase[2*i+job->s_m*m]= (job->flags & SHARP_DP) ?
|
||||
((dcmplx *)(job->map[i]))[ri->ofs+m*ri->stride]*wgt :
|
||||
|
@ -684,7 +739,7 @@ static void phase2ring_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
|
|||
double wgt = (job->flags&SHARP_USE_WEIGHTS) ? (ri->nph*ri->weight) : 1.;
|
||||
if (job->flags&SHARP_REAL_HARMONICS)
|
||||
wgt *= sqrt_one_half;
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
for (int i=0; i<job->nmaps; ++i)
|
||||
for (int m=0; m<=mmax; ++m)
|
||||
if (job->flags & SHARP_DP)
|
||||
dmap[i][ri->ofs+m*ri->stride] += wgt*phase[2*i+job->s_m*m];
|
||||
|
@ -693,7 +748,7 @@ static void phase2ring_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
|
|||
}
|
||||
|
||||
//FIXME: set phase to zero if not SHARP_MAP2ALM?
|
||||
static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
||||
NOINLINE static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
||||
{
|
||||
if (job->type != SHARP_MAP2ALM) return;
|
||||
int pstride = job->s_m;
|
||||
|
@ -715,19 +770,19 @@ static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
|||
ringhelper helper;
|
||||
ringhelper_init(&helper);
|
||||
int rstride=job->ginfo->nphmax+2;
|
||||
double *ringtmp=RALLOC(double,job->ntrans*job->nmaps*rstride);
|
||||
double *ringtmp=RALLOC(double,job->nmaps*rstride);
|
||||
#pragma omp for schedule(dynamic,1)
|
||||
for (int ith=llim; ith<ulim; ++ith)
|
||||
{
|
||||
int dim2 = job->s_th*(ith-llim);
|
||||
ring2ringtmp(job,&(job->ginfo->pair[ith].r1),ringtmp,rstride);
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
for (int i=0; i<job->nmaps; ++i)
|
||||
ringhelper_ring2phase (&helper,&(job->ginfo->pair[ith].r1),
|
||||
&ringtmp[i*rstride],mmax,&job->phase[dim2+2*i],pstride,job->flags);
|
||||
if (job->ginfo->pair[ith].r2.nph>0)
|
||||
{
|
||||
ring2ringtmp(job,&(job->ginfo->pair[ith].r2),ringtmp,rstride);
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
for (int i=0; i<job->nmaps; ++i)
|
||||
ringhelper_ring2phase (&helper,&(job->ginfo->pair[ith].r2),
|
||||
&ringtmp[i*rstride],mmax,&job->phase[dim2+2*i+1],pstride,job->flags);
|
||||
}
|
||||
|
@ -738,7 +793,7 @@ static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
|||
}
|
||||
}
|
||||
|
||||
static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
||||
NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
||||
{
|
||||
if (job->type == SHARP_MAP2ALM) return;
|
||||
int pstride = job->s_m;
|
||||
|
@ -760,18 +815,18 @@ static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
|||
ringhelper helper;
|
||||
ringhelper_init(&helper);
|
||||
int rstride=job->ginfo->nphmax+2;
|
||||
double *ringtmp=RALLOC(double,job->ntrans*job->nmaps*rstride);
|
||||
double *ringtmp=RALLOC(double,job->nmaps*rstride);
|
||||
#pragma omp for schedule(dynamic,1)
|
||||
for (int ith=llim; ith<ulim; ++ith)
|
||||
{
|
||||
int dim2 = job->s_th*(ith-llim);
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
for (int i=0; i<job->nmaps; ++i)
|
||||
ringhelper_phase2ring (&helper,&(job->ginfo->pair[ith].r1),
|
||||
&ringtmp[i*rstride],mmax,&job->phase[dim2+2*i],pstride,job->flags);
|
||||
ringtmp2ring(job,&(job->ginfo->pair[ith].r1),ringtmp,rstride);
|
||||
if (job->ginfo->pair[ith].r2.nph>0)
|
||||
{
|
||||
for (int i=0; i<job->ntrans*job->nmaps; ++i)
|
||||
for (int i=0; i<job->nmaps; ++i)
|
||||
ringhelper_phase2ring (&helper,&(job->ginfo->pair[ith].r2),
|
||||
&ringtmp[i*rstride],mmax,&job->phase[dim2+2*i+1],pstride,job->flags);
|
||||
ringtmp2ring(job,&(job->ginfo->pair[ith].r2),ringtmp,rstride);
|
||||
|
@ -783,7 +838,7 @@ static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
|||
}
|
||||
}
|
||||
|
||||
static void sharp_execute_job (sharp_job *job)
|
||||
NOINLINE static void sharp_execute_job (sharp_job *job)
|
||||
{
|
||||
double timer=wallTime();
|
||||
job->opcnt=0;
|
||||
|
@ -798,8 +853,9 @@ static void sharp_execute_job (sharp_job *job)
|
|||
init_output (job);
|
||||
|
||||
int nchunks, chunksize;
|
||||
get_chunk_info(job->ginfo->npairs,(job->flags&SHARP_NVMAX)*VLEN,&nchunks,
|
||||
&chunksize);
|
||||
get_chunk_info(job->ginfo->npairs,sharp_veclen()*sharp_max_nvec(job->spin),
|
||||
&nchunks,&chunksize);
|
||||
//FIXME: needs to be changed to "nm"
|
||||
alloc_phase (job,mmax+1,chunksize);
|
||||
|
||||
/* chunk loop */
|
||||
|
@ -863,10 +919,8 @@ static void sharp_execute_job (sharp_job *job)
|
|||
|
||||
static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
|
||||
int spin, void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags)
|
||||
const sharp_alm_info *alm_info, int flags)
|
||||
{
|
||||
UTIL_ASSERT((ntrans>0)&&(ntrans<=SHARP_MAXTRANS),
|
||||
"bad number of simultaneous transforms");
|
||||
if (type==SHARP_ALM2MAP_DERIV1) spin=1;
|
||||
if (type==SHARP_MAP2ALM) flags|=SHARP_USE_WEIGHTS;
|
||||
if (type==SHARP_Yt) type=SHARP_MAP2ALM;
|
||||
|
@ -881,24 +935,21 @@ static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
|
|||
job->ginfo = geom_info;
|
||||
job->ainfo = alm_info;
|
||||
job->flags = flags;
|
||||
if ((job->flags&SHARP_NVMAX)==0)
|
||||
job->flags|=sharp_nv_oracle (type, spin, ntrans);
|
||||
if (alm_info->flags&SHARP_REAL_HARMONICS)
|
||||
job->flags|=SHARP_REAL_HARMONICS;
|
||||
job->time = 0.;
|
||||
job->opcnt = 0;
|
||||
job->ntrans = ntrans;
|
||||
job->alm=alm;
|
||||
job->map=map;
|
||||
}
|
||||
|
||||
void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
|
||||
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
|
||||
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info,
|
||||
int flags, double *time, unsigned long long *opcnt)
|
||||
{
|
||||
sharp_job job;
|
||||
sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info,
|
||||
ntrans, flags);
|
||||
flags);
|
||||
|
||||
sharp_execute_job (&job);
|
||||
if (time!=NULL) *time = job.time;
|
||||
|
@ -910,96 +961,16 @@ void sharp_set_chunksize_min(int new_chunksize_min)
|
|||
void sharp_set_nchunks_max(int new_nchunks_max)
|
||||
{ nchunks_max=new_nchunks_max; }
|
||||
|
||||
int sharp_get_nv_max (void)
|
||||
{ return 6; }
|
||||
|
||||
static int sharp_oracle (sharp_jobtype type, int spin, int ntrans)
|
||||
{
|
||||
int lmax=511;
|
||||
int mmax=(lmax+1)/2;
|
||||
int nrings=(lmax+1)/4;
|
||||
int ppring=1;
|
||||
|
||||
spin = (spin!=0) ? 2 : 0;
|
||||
|
||||
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
|
||||
sharp_geom_info *tinfo;
|
||||
sharp_make_gauss_geom_info (nrings, ppring, 0., 1, ppring, &tinfo);
|
||||
|
||||
ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
|
||||
int ncomp = ntrans*((spin==0) ? 1 : 2);
|
||||
|
||||
double **map;
|
||||
ALLOC2D(map,double,ncomp,npix);
|
||||
SET_ARRAY(map[0],0,npix*ncomp,0.);
|
||||
|
||||
sharp_alm_info *alms;
|
||||
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
|
||||
|
||||
dcmplx **alm;
|
||||
ALLOC2D(alm,dcmplx,ncomp,nalms);
|
||||
SET_ARRAY(alm[0],0,nalms*ncomp,0.);
|
||||
|
||||
double time=1e30;
|
||||
int nvbest=-1;
|
||||
|
||||
for (int nv=1; nv<=sharp_get_nv_max(); ++nv)
|
||||
{
|
||||
double time_acc=0.;
|
||||
double jtime;
|
||||
int ntries=0;
|
||||
do
|
||||
{
|
||||
sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||
nv|SHARP_DP|SHARP_NO_OPENMP,&jtime,NULL);
|
||||
|
||||
if (jtime<time) { time=jtime; nvbest=nv; }
|
||||
time_acc+=jtime;
|
||||
++ntries;
|
||||
}
|
||||
while ((time_acc<0.02)&&(ntries<2));
|
||||
}
|
||||
|
||||
DEALLOC2D(map);
|
||||
DEALLOC2D(alm);
|
||||
|
||||
sharp_destroy_alm_info(alms);
|
||||
sharp_destroy_geom_info(tinfo);
|
||||
return nvbest;
|
||||
}
|
||||
|
||||
int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans)
|
||||
{
|
||||
static const int maxtr = 6;
|
||||
static int nv_opt[6][2][5] = {
|
||||
{{0,0,0,0,0},{0,0,0,0,0}},
|
||||
{{0,0,0,0,0},{0,0,0,0,0}},
|
||||
{{0,0,0,0,0},{0,0,0,0,0}},
|
||||
{{0,0,0,0,0},{0,0,0,0,0}},
|
||||
{{0,0,0,0,0},{0,0,0,0,0}},
|
||||
{{0,0,0,0,0},{0,0,0,0,0}} };
|
||||
|
||||
if (type==SHARP_ALM2MAP_DERIV1) spin=1;
|
||||
UTIL_ASSERT(type<5,"bad type");
|
||||
UTIL_ASSERT((ntrans>0),"bad number of simultaneous transforms");
|
||||
UTIL_ASSERT(spin>=0, "bad spin");
|
||||
ntrans=IMIN(ntrans,maxtr);
|
||||
|
||||
if (nv_opt[ntrans-1][spin!=0][type]==0)
|
||||
nv_opt[ntrans-1][spin!=0][type]=sharp_oracle(type,spin,ntrans);
|
||||
return nv_opt[ntrans-1][spin!=0][type];
|
||||
}
|
||||
|
||||
#ifdef USE_MPI
|
||||
#include "sharp_mpi.c"
|
||||
|
||||
int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
const sharp_alm_info *alm_info, int flags, double *time,
|
||||
unsigned long long *opcnt)
|
||||
{
|
||||
MPI_Comm comm = *(MPI_Comm*)pcomm;
|
||||
sharp_execute_mpi((MPI_Comm)comm, type, spin, alm, map, geom_info, alm_info, ntrans,
|
||||
sharp_execute_mpi((MPI_Comm)comm, type, spin, alm, map, geom_info, alm_info,
|
||||
flags, time, opcnt);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1008,12 +979,12 @@ int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
|
|||
|
||||
int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
const sharp_alm_info *alm_info, int flags, double *time,
|
||||
unsigned long long *opcnt)
|
||||
{
|
||||
/* Suppress unused warning: */
|
||||
(void)pcomm; (void)type; (void)spin; (void)alm; (void)map; (void)geom_info;
|
||||
(void)alm_info; (void)ntrans; (void)flags; (void)time; (void)opcnt;
|
||||
(void)alm_info; (void)flags; (void)time; (void)opcnt;
|
||||
return SHARP_ERROR_NO_MPI;
|
||||
}
|
||||
|
||||
|
|
237
libsharp/sharp.h
237
libsharp/sharp.h
|
@ -23,24 +23,243 @@
|
|||
*/
|
||||
|
||||
/*! \file sharp.h
|
||||
* Interface for the spherical transform library.
|
||||
* Portable interface for the spherical transform library.
|
||||
*
|
||||
* Copyright (C) 2006-2012 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
* Copyright (C) 2012-2019 Max-Planck-Society
|
||||
* \author Martin Reinecke \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
#ifndef PLANCK_SHARP_H
|
||||
#define PLANCK_SHARP_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
#error This header file cannot be included from C++, only from C
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <complex.h>
|
||||
/*! \internal
|
||||
Helper type containing information about a single ring. */
|
||||
typedef struct
|
||||
{
|
||||
double theta, phi0, weight, cth, sth;
|
||||
ptrdiff_t ofs;
|
||||
int nph, stride;
|
||||
} sharp_ringinfo;
|
||||
|
||||
#include "sharp_lowlevel.h"
|
||||
#include "sharp_legendre.h"
|
||||
#include "sharp_legendre_roots.h"
|
||||
#include "sharp_legendre_table.h"
|
||||
/*! \internal
|
||||
Helper type containing information about a pair of rings with colatitudes
|
||||
symmetric around the equator. */
|
||||
typedef struct
|
||||
{
|
||||
sharp_ringinfo r1,r2;
|
||||
} sharp_ringpair;
|
||||
|
||||
/*! \internal
|
||||
Type holding all required information about a map geometry. */
|
||||
typedef struct
|
||||
{
|
||||
sharp_ringpair *pair;
|
||||
int npairs, nphmax;
|
||||
} sharp_geom_info;
|
||||
|
||||
/*! \defgroup almgroup Helpers for dealing with a_lm */
|
||||
/*! \{ */
|
||||
|
||||
/*! \internal
|
||||
Helper type for index calculation in a_lm arrays. */
|
||||
typedef struct
|
||||
{
|
||||
/*! Maximum \a l index of the array */
|
||||
int lmax;
|
||||
/*! Number of different \a m values in this object */
|
||||
int nm;
|
||||
/*! Array with \a nm entries containing the individual m values */
|
||||
int *mval;
|
||||
/*! Combination of flags from sharp_almflags */
|
||||
int flags;
|
||||
/*! Array with \a nm entries containing the (hypothetical) indices of
|
||||
the coefficients with quantum numbers 0,\a mval[i] */
|
||||
ptrdiff_t *mvstart;
|
||||
/*! Stride between a_lm and a_(l+1),m */
|
||||
ptrdiff_t stride;
|
||||
} sharp_alm_info;
|
||||
|
||||
/*! alm_info flags */
|
||||
typedef enum { SHARP_PACKED = 1,
|
||||
/*!< m=0-coefficients are packed so that the (zero) imaginary part is
|
||||
not present. mvstart is in units of *real* float/double for all
|
||||
m; stride is in units of reals for m=0 and complex for m!=0 */
|
||||
SHARP_REAL_HARMONICS = 1<<6
|
||||
/*!< Use the real spherical harmonic convention. For
|
||||
m==0, the alm are treated exactly the same as in
|
||||
the complex case. For m!=0, alm[i] represent a
|
||||
pair (+abs(m), -abs(m)) instead of (real, imag),
|
||||
and the coefficients are scaled by a factor of
|
||||
sqrt(2) relative to the complex case. In other
|
||||
words, (sqrt(.5) * alm[i]) recovers the
|
||||
corresponding complex coefficient (when accessed
|
||||
as complex).
|
||||
*/
|
||||
} sharp_almflags;
|
||||
|
||||
|
||||
|
||||
/*! Creates an a_lm data structure from the following parameters:
|
||||
\param lmax maximum \a l quantum number (>=0)
|
||||
\param mmax maximum \a m quantum number (0<= \a mmax <= \a lmax)
|
||||
\param stride the stride between entries with identical \a m, and \a l
|
||||
differing by 1.
|
||||
\param mstart the index of the (hypothetical) coefficient with the
|
||||
quantum numbers 0,\a m. Must have \a mmax+1 entries.
|
||||
\param alm_info will hold a pointer to the newly created data structure
|
||||
*/
|
||||
void sharp_make_alm_info (int lmax, int mmax, int stride,
|
||||
const ptrdiff_t *mstart, sharp_alm_info **alm_info);
|
||||
/*! Creates an a_lm data structure which from the following parameters:
|
||||
\param lmax maximum \a l quantum number (\a >=0)
|
||||
\param nm number of different \a m (\a 0<=nm<=lmax+1)
|
||||
\param stride the stride between entries with identical \a m, and \a l
|
||||
differing by 1.
|
||||
\param mval array with \a nm entries containing the individual m values
|
||||
\param mvstart array with \a nm entries containing the (hypothetical)
|
||||
indices of the coefficients with the quantum numbers 0,\a mval[i]
|
||||
\param flags a combination of sharp_almflags (pass 0 unless you know you need this)
|
||||
\param alm_info will hold a pointer to the newly created data structure
|
||||
*/
|
||||
void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval,
|
||||
const ptrdiff_t *mvstart, int flags, sharp_alm_info **alm_info);
|
||||
/*! Returns the index of the coefficient with quantum numbers \a l,
|
||||
\a mval[mi].
|
||||
\note for a \a sharp_alm_info generated by sharp_make_alm_info() this is
|
||||
the index for the coefficient with the quantum numbers \a l, \a mi. */
|
||||
ptrdiff_t sharp_alm_index (const sharp_alm_info *self, int l, int mi);
|
||||
/*! Returns the number of alm coefficients described by \a self. If the SHARP_PACKED
|
||||
flag is set, this is number of "real" coeffecients (for m < 0 and m >= 0),
|
||||
otherwise it is the number of complex coefficients (with m>=0). */
|
||||
ptrdiff_t sharp_alm_count(const sharp_alm_info *self);
|
||||
/*! Deallocates the a_lm info object. */
|
||||
void sharp_destroy_alm_info (sharp_alm_info *info);
|
||||
|
||||
/*! \} */
|
||||
|
||||
/*! \defgroup geominfogroup Functions for dealing with geometry information */
|
||||
/*! \{ */
|
||||
|
||||
/*! Creates a geometry information from a set of ring descriptions.
|
||||
All arrays passed to this function must have \a nrings elements.
|
||||
\param nrings the number of rings in the map
|
||||
\param nph the number of pixels in each ring
|
||||
\param ofs the index of the first pixel in each ring in the map array
|
||||
\param stride the stride between consecutive pixels
|
||||
\param phi0 the azimuth (in radians) of the first pixel in each ring
|
||||
\param theta the colatitude (in radians) of each ring
|
||||
\param wgt the pixel weight to be used for the ring in map2alm
|
||||
and adjoint map2alm transforms.
|
||||
Pass NULL to use 1.0 as weight for all rings.
|
||||
\param geom_info will hold a pointer to the newly created data structure
|
||||
*/
|
||||
void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
|
||||
const int *stride, const double *phi0, const double *theta,
|
||||
const double *wgt, sharp_geom_info **geom_info);
|
||||
|
||||
/*! Counts the number of grid points needed for (the local part of) a map described
|
||||
by \a info.
|
||||
*/
|
||||
ptrdiff_t sharp_map_size(const sharp_geom_info *info);
|
||||
|
||||
/*! Deallocates the geometry information in \a info. */
|
||||
void sharp_destroy_geom_info (sharp_geom_info *info);
|
||||
|
||||
/*! \} */
|
||||
|
||||
/*! \defgroup lowlevelgroup Low-level libsharp SHT interface */
|
||||
/*! \{ */
|
||||
|
||||
/*! Enumeration of SHARP job types. */
|
||||
typedef enum { SHARP_YtW=0, /*!< analysis */
|
||||
SHARP_MAP2ALM=SHARP_YtW, /*!< analysis */
|
||||
SHARP_Y=1, /*!< synthesis */
|
||||
SHARP_ALM2MAP=SHARP_Y, /*!< synthesis */
|
||||
SHARP_Yt=2, /*!< adjoint synthesis */
|
||||
SHARP_WY=3, /*!< adjoint analysis */
|
||||
SHARP_ALM2MAP_DERIV1=4 /*!< synthesis of first derivatives */
|
||||
} sharp_jobtype;
|
||||
|
||||
/*! Job flags */
|
||||
typedef enum { SHARP_DP = 1<<4,
|
||||
/*!< map and a_lm are in double precision */
|
||||
SHARP_ADD = 1<<5,
|
||||
/*!< results are added to the output arrays, instead of
|
||||
overwriting them */
|
||||
|
||||
/* NOTE: SHARP_REAL_HARMONICS, 1<<6, is also available in sharp_jobflags,
|
||||
but its use here is deprecated in favor of having it in the sharp_alm_info */
|
||||
|
||||
SHARP_NO_FFT = 1<<7,
|
||||
|
||||
SHARP_USE_WEIGHTS = 1<<20, /* internal use only */
|
||||
SHARP_NO_OPENMP = 1<<21, /* internal use only */
|
||||
} sharp_jobflags;
|
||||
|
||||
/*! Performs a libsharp SHT job. The interface deliberately does not use
|
||||
the C99 "complex" data type, in order to be callable from C89 and C++.
|
||||
\param type the type of SHT
|
||||
\param spin the spin of the quantities to be transformed
|
||||
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
|
||||
alm[0] points to the a_lm of the SHT. If \a spin>0, alm[0] and alm[1]
|
||||
point to the two a_lm sets of the SHT. The exact data type of \a alm
|
||||
depends on whether the SHARP_DP flag is set.
|
||||
\param map contains pointers to the maps. If \a spin==0,
|
||||
map[0] points to the map of the SHT. If \a spin>0, or \a type is
|
||||
SHARP_ALM2MAP_DERIV1, map[0] and map[1] point to the two maps of the SHT.
|
||||
The exact data type of \a map depends on whether the SHARP_DP flag is set.
|
||||
\param geom_info A \c sharp_geom_info object compatible with the provided
|
||||
\a map arrays.
|
||||
\param alm_info A \c sharp_alm_info object compatible with the provided
|
||||
\a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present
|
||||
exactly once.
|
||||
\param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
|
||||
\a alm is expected to have the type "complex double **" and \a map is
|
||||
expected to have the type "double **"; otherwise, the expected
|
||||
types are "complex float **" and "float **", respectively.
|
||||
\param time If not NULL, the wall clock time required for this SHT
|
||||
(in seconds) will be written here.
|
||||
\param opcnt If not NULL, a conservative estimate of the total floating point
|
||||
operation count for this SHT will be written here. */
|
||||
void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
|
||||
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info,
|
||||
int flags, double *time, unsigned long long *opcnt);
|
||||
|
||||
void sharp_set_chunksize_min(int new_chunksize_min);
|
||||
void sharp_set_nchunks_max(int new_nchunks_max);
|
||||
|
||||
|
||||
typedef enum { SHARP_ERROR_NO_MPI = 1,
|
||||
/*!< libsharp not compiled with MPI support */
|
||||
} sharp_errors;
|
||||
|
||||
/*! Works like sharp_execute_mpi, but is always present whether or not libsharp
|
||||
is compiled with USE_MPI. This is primarily useful for wrapper code etc.
|
||||
|
||||
Note that \a pcomm has the type MPI_Comm*, except we declare void* to avoid
|
||||
pulling in MPI headers. I.e., the comm argument of sharp_execute_mpi
|
||||
is *(MPI_Comm*)pcomm.
|
||||
|
||||
Other parameters are the same as sharp_execute_mpi.
|
||||
|
||||
Returns 0 if successful, or SHARP_ERROR_NO_MPI if MPI is not available
|
||||
(in which case nothing is done).
|
||||
*/
|
||||
int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int flags, double *time,
|
||||
unsigned long long *opcnt);
|
||||
|
||||
/*! \} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
/*! \file sharp_almhelpers.c
|
||||
* Spherical transform library
|
||||
*
|
||||
* Copyright (C) 2008-2013 Max-Planck-Society
|
||||
* Copyright (C) 2008-2016 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
|
|
|
@ -25,14 +25,14 @@
|
|||
/*! \file sharp_almhelpers.h
|
||||
* SHARP helper function for the creation of a_lm data structures
|
||||
*
|
||||
* Copyright (C) 2008-2011 Max-Planck-Society
|
||||
* Copyright (C) 2008-2019 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#ifndef PLANCK_SHARP_ALMHELPERS_H
|
||||
#define PLANCK_SHARP_ALMHELPERS_H
|
||||
|
||||
#include "sharp_lowlevel.h"
|
||||
#include "sharp.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
|
@ -1,98 +0,0 @@
|
|||
/*
|
||||
* This file is part of libc_utils.
|
||||
*
|
||||
* libc_utils is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libc_utils is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libc_utils; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file sharp_announce.c
|
||||
* Banner for module startup
|
||||
*
|
||||
* Copyright (C) 2012 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
#ifdef USE_MPI
|
||||
#include <mpi.h>
|
||||
#endif
|
||||
|
||||
#include "sharp_announce.h"
|
||||
#include "sharp_vecutil.h"
|
||||
|
||||
static void OpenMP_status(void)
|
||||
{
|
||||
#ifndef _OPENMP
|
||||
printf("OpenMP: not supported by this binary\n");
|
||||
#else
|
||||
int threads = omp_get_max_threads();
|
||||
if (threads>1)
|
||||
printf("OpenMP active: max. %d threads.\n",threads);
|
||||
else
|
||||
printf("OpenMP active, but running with 1 thread only.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void MPI_status(void)
|
||||
{
|
||||
#ifndef USE_MPI
|
||||
printf("MPI: not supported by this binary\n");
|
||||
#else
|
||||
int tasks;
|
||||
MPI_Comm_size(MPI_COMM_WORLD,&tasks);
|
||||
if (tasks>1)
|
||||
printf("MPI active with %d tasks.\n",tasks);
|
||||
else
|
||||
printf("MPI active, but running with 1 task only.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void vecmath_status(void)
|
||||
{ printf("Supported vector length: %d\n",VLEN); }
|
||||
|
||||
void sharp_announce (const char *name)
|
||||
{
|
||||
size_t m, nlen=strlen(name);
|
||||
printf("\n+-");
|
||||
for (m=0; m<nlen; ++m) printf("-");
|
||||
printf("-+\n");
|
||||
printf("| %s |\n", name);
|
||||
printf("+-");
|
||||
for (m=0; m<nlen; ++m) printf("-");
|
||||
printf("-+\n\n");
|
||||
vecmath_status();
|
||||
OpenMP_status();
|
||||
MPI_status();
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void sharp_module_startup (const char *name, int argc, int argc_expected,
|
||||
const char *argv_expected, int verbose)
|
||||
{
|
||||
if (verbose) sharp_announce (name);
|
||||
if (argc==argc_expected) return;
|
||||
if (verbose) fprintf(stderr, "Usage: %s %s\n", name, argv_expected);
|
||||
exit(1);
|
||||
}
|
|
@ -1,39 +0,0 @@
|
|||
/*
|
||||
* This file is part of libc_utils.
|
||||
*
|
||||
* libc_utils is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libc_utils is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libc_utils; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file sharp_announce.h
|
||||
* Banner for module startup
|
||||
*
|
||||
* Copyright (C) 2012 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#ifndef SHARP_ANNOUNCE_H
|
||||
#define SHARP_ANNOUNCE_H
|
||||
|
||||
void sharp_announce (const char *name);
|
||||
void sharp_module_startup (const char *name, int argc, int argc_expected,
|
||||
const char *argv_expected, int verbose);
|
||||
|
||||
#endif
|
|
@ -1,149 +0,0 @@
|
|||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* libsharp is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libsharp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libsharp; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/* \file sharp_complex_hacks.h
|
||||
* support for converting vector types and complex numbers
|
||||
*
|
||||
* Copyright (C) 2012,2013 Max-Planck-Society
|
||||
* Author: Martin Reinecke
|
||||
*/
|
||||
|
||||
#ifndef SHARP_COMPLEX_HACKS_H
|
||||
#define SHARP_COMPLEX_HACKS_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
#error This header file cannot be included from C++, only from C
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <complex.h>
|
||||
#include "sharp_vecsupport.h"
|
||||
|
||||
#define UNSAFE_CODE
|
||||
|
||||
#if (VLEN==1)
|
||||
|
||||
static inline complex double vhsum_cmplx(Tv a, Tv b)
|
||||
{ return a+_Complex_I*b; }
|
||||
|
||||
static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d,
|
||||
complex double * restrict c1, complex double * restrict c2)
|
||||
{ *c1 += a+_Complex_I*b; *c2 += c+_Complex_I*d; }
|
||||
|
||||
#endif
|
||||
|
||||
#if (VLEN==2)
|
||||
|
||||
static inline complex double vhsum_cmplx (Tv a, Tv b)
|
||||
{
|
||||
#if defined(__SSE3__)
|
||||
Tv tmp = _mm_hadd_pd(a,b);
|
||||
#else
|
||||
Tv tmp = vadd(_mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)),
|
||||
_mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0)));
|
||||
#endif
|
||||
union {Tv v; complex double c; } u;
|
||||
u.v=tmp; return u.c;
|
||||
}
|
||||
|
||||
static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c,
|
||||
Tv d, complex double * restrict c1, complex double * restrict c2)
|
||||
{
|
||||
#ifdef UNSAFE_CODE
|
||||
#if defined(__SSE3__)
|
||||
vaddeq(*((__m128d *)c1),_mm_hadd_pd(a,b));
|
||||
vaddeq(*((__m128d *)c2),_mm_hadd_pd(c,d));
|
||||
#else
|
||||
vaddeq(*((__m128d *)c1),vadd(_mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)),
|
||||
_mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0))));
|
||||
vaddeq(*((__m128d *)c2),vadd(_mm_shuffle_pd(c,d,_MM_SHUFFLE2(0,1)),
|
||||
_mm_shuffle_pd(c,d,_MM_SHUFFLE2(1,0))));
|
||||
#endif
|
||||
#else
|
||||
union {Tv v; complex double c; } u1, u2;
|
||||
#if defined(__SSE3__)
|
||||
u1.v = _mm_hadd_pd(a,b); u2.v=_mm_hadd_pd(c,d);
|
||||
#else
|
||||
u1.v = vadd(_mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)),
|
||||
_mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0)));
|
||||
u2.v = vadd(_mm_shuffle_pd(c,d,_MM_SHUFFLE2(0,1)),
|
||||
_mm_shuffle_pd(c,d,_MM_SHUFFLE2(1,0)));
|
||||
#endif
|
||||
*c1+=u1.c; *c2+=u2.c;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if (VLEN==4)
|
||||
|
||||
static inline complex double vhsum_cmplx (Tv a, Tv b)
|
||||
{
|
||||
Tv tmp=_mm256_hadd_pd(a,b);
|
||||
Tv tmp2=_mm256_permute2f128_pd(tmp,tmp,1);
|
||||
tmp=_mm256_add_pd(tmp,tmp2);
|
||||
#ifdef UNSAFE_CODE
|
||||
complex double ret;
|
||||
*((__m128d *)&ret)=_mm256_extractf128_pd(tmp, 0);
|
||||
return ret;
|
||||
#else
|
||||
union {Tv v; complex double c[2]; } u;
|
||||
u.v=tmp; return u.c[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d,
|
||||
complex double * restrict c1, complex double * restrict c2)
|
||||
{
|
||||
Tv tmp1=_mm256_hadd_pd(a,b), tmp2=_mm256_hadd_pd(c,d);
|
||||
Tv tmp3=_mm256_permute2f128_pd(tmp1,tmp2,49),
|
||||
tmp4=_mm256_permute2f128_pd(tmp1,tmp2,32);
|
||||
tmp1=vadd(tmp3,tmp4);
|
||||
#ifdef UNSAFE_CODE
|
||||
*((__m128d *)c1)=_mm_add_pd(*((__m128d *)c1),_mm256_extractf128_pd(tmp1, 0));
|
||||
*((__m128d *)c2)=_mm_add_pd(*((__m128d *)c2),_mm256_extractf128_pd(tmp1, 1));
|
||||
#else
|
||||
union {Tv v; complex double c[2]; } u;
|
||||
u.v=tmp1;
|
||||
*c1+=u.c[0]; *c2+=u.c[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if (VLEN==8)
|
||||
|
||||
static inline complex double vhsum_cmplx(Tv a, Tv b)
|
||||
{ return _mm512_reduce_add_pd(a)+_Complex_I*_mm512_reduce_add_pd(b); }
|
||||
|
||||
static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d,
|
||||
complex double * restrict c1, complex double * restrict c2)
|
||||
{
|
||||
*c1 += _mm512_reduce_add_pd(a)+_Complex_I*_mm512_reduce_add_pd(b);
|
||||
*c2 += _mm512_reduce_add_pd(c)+_Complex_I*_mm512_reduce_add_pd(d);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,240 +1,116 @@
|
|||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* libsharp is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libsharp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libsharp; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#define XCONCATX(a,b) a##_##b
|
||||
#define XCONCATX2(a,b) XCONCATX(a,b)
|
||||
#define XARCH(a) XCONCATX2(a,ARCH)
|
||||
|
||||
/*
|
||||
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
#define ARCH default
|
||||
#include "sharp_core_inc.c"
|
||||
#undef ARCH
|
||||
|
||||
/*! \file sharp_core.c
|
||||
* Computational core
|
||||
*
|
||||
* Copyright (C) 2012-2013 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
typedef void (*t_inner_loop) (sharp_job *job, const int *ispair,
|
||||
const double *cth_, const double *sth_, int llim, int ulim,
|
||||
sharp_Ylmgen_C *gen, int mi, const int *mlim);
|
||||
typedef int (*t_veclen) (void);
|
||||
typedef int (*t_max_nvec) (int spin);
|
||||
typedef const char *(*t_architecture) (void);
|
||||
|
||||
#include <complex.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "sharp_vecsupport.h"
|
||||
#include "sharp_complex_hacks.h"
|
||||
#include "sharp_ylmgen_c.h"
|
||||
#include "sharp.h"
|
||||
#include "sharp_core.h"
|
||||
#include "c_utils.h"
|
||||
static t_inner_loop inner_loop_ = NULL;
|
||||
static t_veclen veclen_ = NULL;
|
||||
static t_max_nvec max_nvec_ = NULL;
|
||||
static t_architecture architecture_ = NULL;
|
||||
|
||||
typedef complex double dcmplx;
|
||||
#if defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6)
|
||||
|
||||
// must be in the range [0;6]
|
||||
#define MAXJOB_SPECIAL 2
|
||||
#define DECL(arch) \
|
||||
static int XCONCATX2(have,arch)(void) \
|
||||
{ \
|
||||
static int res=-1; \
|
||||
if (res<0) \
|
||||
{ \
|
||||
__builtin_cpu_init(); \
|
||||
res = __builtin_cpu_supports(#arch); \
|
||||
} \
|
||||
return res; \
|
||||
} \
|
||||
\
|
||||
void XCONCATX2(inner_loop,arch) (sharp_job *job, const int *ispair, \
|
||||
const double *cth_, const double *sth_, int llim, int ulim, \
|
||||
sharp_Ylmgen_C *gen, int mi, const int *mlim); \
|
||||
int XCONCATX2(sharp_veclen,arch) (void); \
|
||||
int XCONCATX2(sharp_max_nvec,arch) (int spin); \
|
||||
const char *XCONCATX2(sharp_architecture,arch) (void);
|
||||
|
||||
#define XCONCAT2(a,b) a##_##b
|
||||
#define CONCAT2(a,b) XCONCAT2(a,b)
|
||||
#define XCONCAT3(a,b,c) a##_##b##_##c
|
||||
#define CONCAT3(a,b,c) XCONCAT3(a,b,c)
|
||||
#if (!defined(__AVX512F__))
|
||||
DECL(avx512f)
|
||||
#endif
|
||||
#if (!defined(__FMA4__))
|
||||
DECL(fma4)
|
||||
#endif
|
||||
#if (!defined(__FMA__))
|
||||
DECL(fma)
|
||||
#endif
|
||||
#if (!defined(__AVX2__))
|
||||
DECL(avx2)
|
||||
#endif
|
||||
#if (!defined(__AVX__))
|
||||
DECL(avx)
|
||||
#endif
|
||||
|
||||
#define nvec 1
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
#endif
|
||||
|
||||
#define nvec 2
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
static void assign_funcs(void)
|
||||
{
|
||||
#if defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6)
|
||||
#define DECL2(arch) \
|
||||
if (XCONCATX2(have,arch)()) \
|
||||
{ \
|
||||
inner_loop_ = XCONCATX2(inner_loop,arch); \
|
||||
veclen_ = XCONCATX2(sharp_veclen,arch); \
|
||||
max_nvec_ = XCONCATX2(sharp_max_nvec,arch); \
|
||||
architecture_ = XCONCATX2(sharp_architecture,arch); \
|
||||
return; \
|
||||
}
|
||||
#if (!defined(__AVX512F__))
|
||||
DECL2(avx512f)
|
||||
#endif
|
||||
#if (!defined(__FMA4__))
|
||||
DECL2(fma4)
|
||||
#endif
|
||||
#if (!defined(__FMA__))
|
||||
DECL2(fma)
|
||||
#endif
|
||||
#if (!defined(__AVX2__))
|
||||
DECL2(avx2)
|
||||
#endif
|
||||
#if (!defined(__AVX__))
|
||||
DECL2(avx)
|
||||
#endif
|
||||
#endif
|
||||
inner_loop_ = inner_loop_default;
|
||||
veclen_ = sharp_veclen_default;
|
||||
max_nvec_ = sharp_max_nvec_default;
|
||||
architecture_ = sharp_architecture_default;
|
||||
}
|
||||
|
||||
#define nvec 3
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 4
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 5
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 6
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
||||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||
const int *mlim)
|
||||
{
|
||||
int njobs=job->ntrans, nv=job->flags&SHARP_NVMAX;
|
||||
if (njobs<=MAXJOB_SPECIAL)
|
||||
{
|
||||
switch (njobs*16+nv)
|
||||
{
|
||||
#if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1))
|
||||
case 0x11:
|
||||
CONCAT3(inner_loop,1,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x12:
|
||||
CONCAT3(inner_loop,2,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x13:
|
||||
CONCAT3(inner_loop,3,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x14:
|
||||
CONCAT3(inner_loop,4,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x15:
|
||||
CONCAT3(inner_loop,5,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x16:
|
||||
CONCAT3(inner_loop,6,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=2)&&(SHARP_MAXTRANS>=2))
|
||||
case 0x21:
|
||||
CONCAT3(inner_loop,1,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x22:
|
||||
CONCAT3(inner_loop,2,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x23:
|
||||
CONCAT3(inner_loop,3,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x24:
|
||||
CONCAT3(inner_loop,4,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x25:
|
||||
CONCAT3(inner_loop,5,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x26:
|
||||
CONCAT3(inner_loop,6,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=3)&&(SHARP_MAXTRANS>=3))
|
||||
case 0x31:
|
||||
CONCAT3(inner_loop,1,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x32:
|
||||
CONCAT3(inner_loop,2,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x33:
|
||||
CONCAT3(inner_loop,3,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x34:
|
||||
CONCAT3(inner_loop,4,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x35:
|
||||
CONCAT3(inner_loop,5,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x36:
|
||||
CONCAT3(inner_loop,6,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=4)&&(SHARP_MAXTRANS>=4))
|
||||
case 0x41:
|
||||
CONCAT3(inner_loop,1,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x42:
|
||||
CONCAT3(inner_loop,2,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x43:
|
||||
CONCAT3(inner_loop,3,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x44:
|
||||
CONCAT3(inner_loop,4,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x45:
|
||||
CONCAT3(inner_loop,5,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x46:
|
||||
CONCAT3(inner_loop,6,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=5)&&(SHARP_MAXTRANS>=5))
|
||||
case 0x51:
|
||||
CONCAT3(inner_loop,1,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x52:
|
||||
CONCAT3(inner_loop,2,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x53:
|
||||
CONCAT3(inner_loop,3,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x54:
|
||||
CONCAT3(inner_loop,4,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x55:
|
||||
CONCAT3(inner_loop,5,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x56:
|
||||
CONCAT3(inner_loop,6,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=6)&&(SHARP_MAXTRANS>=6))
|
||||
case 0x61:
|
||||
CONCAT3(inner_loop,1,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x62:
|
||||
CONCAT3(inner_loop,2,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x63:
|
||||
CONCAT3(inner_loop,3,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x64:
|
||||
CONCAT3(inner_loop,4,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x65:
|
||||
CONCAT3(inner_loop,5,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x66:
|
||||
CONCAT3(inner_loop,6,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#if (SHARP_MAXTRANS>MAXJOB_SPECIAL)
|
||||
else
|
||||
{
|
||||
switch (nv)
|
||||
{
|
||||
case 1:
|
||||
CONCAT2(inner_loop,1)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 2:
|
||||
CONCAT2(inner_loop,2)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 3:
|
||||
CONCAT2(inner_loop,3)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 4:
|
||||
CONCAT2(inner_loop,4)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 5:
|
||||
CONCAT2(inner_loop,5)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 6:
|
||||
CONCAT2(inner_loop,6)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
UTIL_FAIL("Incorrect vector parameters");
|
||||
if (!inner_loop_) assign_funcs();
|
||||
inner_loop_(job, ispair, cth, sth, llim, ulim, gen, mi, mlim);
|
||||
}
|
||||
int sharp_veclen(void)
|
||||
{
|
||||
if (!veclen_) assign_funcs();
|
||||
return veclen_();
|
||||
}
|
||||
int sharp_max_nvec(int spin)
|
||||
{
|
||||
if (!max_nvec_) assign_funcs();
|
||||
return max_nvec_(spin);
|
||||
}
|
||||
const char *sharp_architecture(void)
|
||||
{
|
||||
if (!architecture_) assign_funcs();
|
||||
return architecture_();
|
||||
}
|
||||
|
|
|
@ -1,50 +0,0 @@
|
|||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* libsharp is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libsharp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libsharp; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file sharp_core.h
|
||||
* Interface for the computational core
|
||||
*
|
||||
* Copyright (C) 2012-2013 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#ifndef PLANCK_SHARP_CORE_H
|
||||
#define PLANCK_SHARP_CORE_H
|
||||
|
||||
#include "sharp_internal.h"
|
||||
#include "sharp_ylmgen_c.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
||||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||
const int *mlim);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
11
libsharp/sharp_core_avx.c
Normal file
11
libsharp/sharp_core_avx.c
Normal file
|
@ -0,0 +1,11 @@
|
|||
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6)
|
||||
|
||||
#define XCONCATX(a,b) a##_##b
|
||||
#define XCONCATX2(a,b) XCONCATX(a,b)
|
||||
#define XARCH(a) XCONCATX2(a,ARCH)
|
||||
|
||||
#define ARCH avx
|
||||
#pragma GCC target("avx")
|
||||
#include "sharp_core_inc.c"
|
||||
|
||||
#endif
|
11
libsharp/sharp_core_avx2.c
Normal file
11
libsharp/sharp_core_avx2.c
Normal file
|
@ -0,0 +1,11 @@
|
|||
#if (!defined(__AVX2__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6)
|
||||
|
||||
#define XCONCATX(a,b) a##_##b
|
||||
#define XCONCATX2(a,b) XCONCATX(a,b)
|
||||
#define XARCH(a) XCONCATX2(a,ARCH)
|
||||
|
||||
#define ARCH avx2
|
||||
#pragma GCC target("avx2")
|
||||
#include "sharp_core_inc.c"
|
||||
|
||||
#endif
|
11
libsharp/sharp_core_avx512f.c
Normal file
11
libsharp/sharp_core_avx512f.c
Normal file
|
@ -0,0 +1,11 @@
|
|||
#if (!defined(__AVX512F__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6)
|
||||
|
||||
#define XCONCATX(a,b) a##_##b
|
||||
#define XCONCATX2(a,b) XCONCATX(a,b)
|
||||
#define XARCH(a) XCONCATX2(a,ARCH)
|
||||
|
||||
#define ARCH avx512f
|
||||
#pragma GCC target("avx512f")
|
||||
#include "sharp_core_inc.c"
|
||||
|
||||
#endif
|
11
libsharp/sharp_core_fma.c
Normal file
11
libsharp/sharp_core_fma.c
Normal file
|
@ -0,0 +1,11 @@
|
|||
#if (!defined(__FMA__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6)
|
||||
|
||||
#define XCONCATX(a,b) a##_##b
|
||||
#define XCONCATX2(a,b) XCONCATX(a,b)
|
||||
#define XARCH(a) XCONCATX2(a,ARCH)
|
||||
|
||||
#define ARCH fma
|
||||
#pragma GCC target("fma")
|
||||
#include "sharp_core_inc.c"
|
||||
|
||||
#endif
|
11
libsharp/sharp_core_fma4.c
Normal file
11
libsharp/sharp_core_fma4.c
Normal file
|
@ -0,0 +1,11 @@
|
|||
#if (!defined(__FMA4__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6)
|
||||
|
||||
#define XCONCATX(a,b) a##_##b
|
||||
#define XCONCATX2(a,b) XCONCATX(a,b)
|
||||
#define XARCH(a) XCONCATX2(a,ARCH)
|
||||
|
||||
#define ARCH fma4
|
||||
#pragma GCC target("fma4")
|
||||
#include "sharp_core_inc.c"
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load diff
|
@ -1,803 +0,0 @@
|
|||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* libsharp is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libsharp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libsharp; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file sharp_core_inc2.c
|
||||
* Type-dependent code for the computational core
|
||||
*
|
||||
* Copyright (C) 2012-2013 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
static void Z(alm2map_kernel) (const Tb cth, Y(Tbri) * restrict p1,
|
||||
Y(Tbri) * restrict p2, Tb lam_1, Tb lam_2,
|
||||
const sharp_ylmgen_dbl2 * restrict rf, const dcmplx * restrict alm,
|
||||
int l, int lmax NJ1)
|
||||
{
|
||||
if (njobs>1)
|
||||
{
|
||||
while (l<lmax-2)
|
||||
{
|
||||
Tb lam_3, lam_4;
|
||||
Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_3.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
|
||||
r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_4.v[i] = vsub(vmul(vmul(cth.v[i],lam_3.v[i]),r0),vmul(lam_2.v[i],r1));
|
||||
r0=vload(rf[l+2].f[0]);r1=vload(rf[l+2].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_4.v[i]),r0),vmul(lam_3.v[i],r1));
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv ar2=vload(creal(alm[njobs*l+j])),
|
||||
ai2=vload(cimag(alm[njobs*l+j])),
|
||||
ar4=vload(creal(alm[njobs*(l+2)+j])),
|
||||
ai4=vload(cimag(alm[njobs*(l+2)+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaaeq(p1[j].r.v[i],lam_2.v[i],ar2,lam_4.v[i],ar4);
|
||||
vfmaaeq(p1[j].i.v[i],lam_2.v[i],ai2,lam_4.v[i],ai4);
|
||||
}
|
||||
Tv ar3=vload(creal(alm[njobs*(l+1)+j])),
|
||||
ai3=vload(cimag(alm[njobs*(l+1)+j])),
|
||||
ar1=vload(creal(alm[njobs*(l+3)+j])),
|
||||
ai1=vload(cimag(alm[njobs*(l+3)+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaaeq(p2[j].r.v[i],lam_3.v[i],ar3,lam_1.v[i],ar1);
|
||||
vfmaaeq(p2[j].i.v[i],lam_3.v[i],ai3,lam_1.v[i],ai1);
|
||||
}
|
||||
}
|
||||
r0=vload(rf[l+3].f[0]);r1=vload(rf[l+3].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_4.v[i],r1));
|
||||
l+=4;
|
||||
}
|
||||
}
|
||||
while (l<lmax)
|
||||
{
|
||||
Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv ar=vload(creal(alm[njobs*l+j])),
|
||||
ai=vload(cimag(alm[njobs*l+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(p1[j].r.v[i],lam_2.v[i],ar);
|
||||
vfmaeq(p1[j].i.v[i],lam_2.v[i],ai);
|
||||
}
|
||||
ar=vload(creal(alm[njobs*(l+1)+j]));
|
||||
ai=vload(cimag(alm[njobs*(l+1)+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(p2[j].r.v[i],lam_1.v[i],ar);
|
||||
vfmaeq(p2[j].i.v[i],lam_1.v[i],ai);
|
||||
}
|
||||
}
|
||||
r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(p1[j].r.v[i],lam_2.v[i],ar);
|
||||
vfmaeq(p1[j].i.v[i],lam_2.v[i],ai);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void Z(map2alm_kernel) (const Tb cth, const Y(Tbri) * restrict p1,
|
||||
const Y(Tbri) * restrict p2, Tb lam_1, Tb lam_2,
|
||||
const sharp_ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax
|
||||
NJ1)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
Tv r0=vload(rf[l].f[0]),r1=vload(rf[l].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv tr1=vzero, ti1=vzero, tr2=vzero, ti2=vzero;
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(tr1,lam_2.v[i],p1[j].r.v[i]);
|
||||
vfmaeq(ti1,lam_2.v[i],p1[j].i.v[i]);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(tr2,lam_1.v[i],p2[j].r.v[i]);
|
||||
vfmaeq(ti2,lam_1.v[i],p2[j].i.v[i]);
|
||||
}
|
||||
vhsum_cmplx2(tr1,ti1,tr2,ti2,&alm[l*njobs+j],&alm[(l+1)*njobs+j]);
|
||||
}
|
||||
r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv tre=vzero, tim=vzero;
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
vfmaeq(tre,lam_2.v[i],p1[j].r.v[i]);
|
||||
vfmaeq(tim,lam_2.v[i],p1[j].i.v[i]);
|
||||
}
|
||||
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void Z(calc_alm2map) (const Tb cth, const Tb sth,
|
||||
const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbri) * restrict p1,
|
||||
Y(Tbri) * restrict p2 NJ1)
|
||||
{
|
||||
int l,lmax=gen->lmax;
|
||||
Tb lam_1,lam_2,scale;
|
||||
Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
|
||||
job->opcnt += (l-gen->m) * 4*VLEN*nvec;
|
||||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
|
||||
|
||||
Tb corfac;
|
||||
Y(getCorfac)(scale,&corfac,gen->cf);
|
||||
const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
|
||||
const dcmplx * restrict alm=job->almtmp;
|
||||
int full_ieee = Y(TballGe)(scale,sharp_minscale);
|
||||
while (!full_ieee)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv tmp=vmul(lam_2.v[i],corfac.v[i]);
|
||||
vfmaeq(p1[j].r.v[i],tmp,ar);
|
||||
vfmaeq(p1[j].i.v[i],tmp,ai);
|
||||
}
|
||||
}
|
||||
if (++l>lmax) break;
|
||||
Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv ar=vload(creal(alm[njobs*l+j])),ai=vload(cimag(alm[njobs*l+j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv tmp=vmul(lam_1.v[i],corfac.v[i]);
|
||||
vfmaeq(p2[j].r.v[i],tmp,ar);
|
||||
vfmaeq(p2[j].i.v[i],tmp,ai);
|
||||
}
|
||||
}
|
||||
if (++l>lmax) break;
|
||||
r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
|
||||
if (Y(rescale)(&lam_1,&lam_2,&scale))
|
||||
{
|
||||
Y(getCorfac)(scale,&corfac,gen->cf);
|
||||
full_ieee = Y(TballGe)(scale,sharp_minscale);
|
||||
}
|
||||
}
|
||||
if (l>lmax) return;
|
||||
|
||||
Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac);
|
||||
Z(alm2map_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax NJ2);
|
||||
}
|
||||
|
||||
static void Z(calc_map2alm) (const Tb cth, const Tb sth,
|
||||
const sharp_Ylmgen_C *gen, sharp_job *job, const Y(Tbri) * restrict p1,
|
||||
const Y(Tbri) * restrict p2 NJ1)
|
||||
{
|
||||
int lmax=gen->lmax;
|
||||
Tb lam_1,lam_2,scale;
|
||||
int l=gen->m;
|
||||
Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen);
|
||||
job->opcnt += (l-gen->m) * 4*VLEN*nvec;
|
||||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
|
||||
Tb corfac;
|
||||
Y(getCorfac)(scale,&corfac,gen->cf);
|
||||
dcmplx * restrict alm=job->almtmp;
|
||||
int full_ieee = Y(TballGe)(scale,sharp_minscale);
|
||||
while (!full_ieee)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv tre=vzero, tim=vzero;
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv tmp=vmul(lam_2.v[i],corfac.v[i]);
|
||||
vfmaeq(tre,tmp,p1[j].r.v[i]);
|
||||
vfmaeq(tim,tmp,p1[j].i.v[i]);
|
||||
}
|
||||
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
|
||||
}
|
||||
if (++l>lmax) return;
|
||||
Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_1.v[i] = vsub(vmul(vmul(cth.v[i],lam_2.v[i]),r0),vmul(lam_1.v[i],r1));
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv tre=vzero, tim=vzero;
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv tmp=vmul(lam_1.v[i],corfac.v[i]);
|
||||
vfmaeq(tre,tmp,p2[j].r.v[i]);
|
||||
vfmaeq(tim,tmp,p2[j].i.v[i]);
|
||||
}
|
||||
alm[l*njobs+j]+=vhsum_cmplx(tre,tim);
|
||||
}
|
||||
if (++l>lmax) return;
|
||||
r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
lam_2.v[i] = vsub(vmul(vmul(cth.v[i],lam_1.v[i]),r0),vmul(lam_2.v[i],r1));
|
||||
if (Y(rescale)(&lam_1,&lam_2,&scale))
|
||||
{
|
||||
Y(getCorfac)(scale,&corfac,gen->cf);
|
||||
full_ieee = Y(TballGe)(scale,sharp_minscale);
|
||||
}
|
||||
}
|
||||
|
||||
Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac);
|
||||
Z(map2alm_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax NJ2);
|
||||
}
|
||||
|
||||
static inline void Z(saddstep) (Y(Tbqu) * restrict px, Y(Tbqu) * restrict py,
|
||||
const Tb rxp, const Tb rxm, const dcmplx * restrict alm NJ1)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv agr=vload(creal(alm[2*j])), agi=vload(cimag(alm[2*j])),
|
||||
acr=vload(creal(alm[2*j+1])), aci=vload(cimag(alm[2*j+1]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw=vadd(rxp.v[i],rxm.v[i]);
|
||||
vfmaeq(px[j].qr.v[i],agr,lw);
|
||||
vfmaeq(px[j].qi.v[i],agi,lw);
|
||||
vfmaeq(px[j].ur.v[i],acr,lw);
|
||||
vfmaeq(px[j].ui.v[i],aci,lw);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lx=vsub(rxm.v[i],rxp.v[i]);
|
||||
vfmseq(py[j].qr.v[i],aci,lx);
|
||||
vfmaeq(py[j].qi.v[i],acr,lx);
|
||||
vfmaeq(py[j].ur.v[i],agi,lx);
|
||||
vfmseq(py[j].ui.v[i],agr,lx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Z(saddstepb) (Y(Tbqu) * restrict p1, Y(Tbqu) * restrict p2,
|
||||
const Tb r1p, const Tb r1m, const Tb r2p, const Tb r2m,
|
||||
const dcmplx * restrict alm1, const dcmplx * restrict alm2 NJ1)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv agr1=vload(creal(alm1[2*j])), agi1=vload(cimag(alm1[2*j])),
|
||||
acr1=vload(creal(alm1[2*j+1])), aci1=vload(cimag(alm1[2*j+1]));
|
||||
Tv agr2=vload(creal(alm2[2*j])), agi2=vload(cimag(alm2[2*j])),
|
||||
acr2=vload(creal(alm2[2*j+1])), aci2=vload(cimag(alm2[2*j+1]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw1=vadd(r2p.v[i],r2m.v[i]);
|
||||
Tv lx2=vsub(r1m.v[i],r1p.v[i]);
|
||||
vfmaseq(p1[j].qr.v[i],agr1,lw1,aci2,lx2);
|
||||
vfmaaeq(p1[j].qi.v[i],agi1,lw1,acr2,lx2);
|
||||
vfmaaeq(p1[j].ur.v[i],acr1,lw1,agi2,lx2);
|
||||
vfmaseq(p1[j].ui.v[i],aci1,lw1,agr2,lx2);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lx1=vsub(r2m.v[i],r2p.v[i]);
|
||||
Tv lw2=vadd(r1p.v[i],r1m.v[i]);
|
||||
vfmaseq(p2[j].qr.v[i],agr2,lw2,aci1,lx1);
|
||||
vfmaaeq(p2[j].qi.v[i],agi2,lw2,acr1,lx1);
|
||||
vfmaaeq(p2[j].ur.v[i],acr2,lw2,agi1,lx1);
|
||||
vfmaseq(p2[j].ui.v[i],aci2,lw2,agr1,lx1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Z(saddstep2) (const Y(Tbqu) * restrict px,
|
||||
const Y(Tbqu) * restrict py, const Tb * restrict rxp,
|
||||
const Tb * restrict rxm, dcmplx * restrict alm NJ1)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv agr=vzero, agi=vzero, acr=vzero, aci=vzero;
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw=vadd(rxp->v[i],rxm->v[i]);
|
||||
vfmaeq(agr,px[j].qr.v[i],lw);
|
||||
vfmaeq(agi,px[j].qi.v[i],lw);
|
||||
vfmaeq(acr,px[j].ur.v[i],lw);
|
||||
vfmaeq(aci,px[j].ui.v[i],lw);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lx=vsub(rxm->v[i],rxp->v[i]);
|
||||
vfmseq(agr,py[j].ui.v[i],lx);
|
||||
vfmaeq(agi,py[j].ur.v[i],lx);
|
||||
vfmaeq(acr,py[j].qi.v[i],lx);
|
||||
vfmseq(aci,py[j].qr.v[i],lx);
|
||||
}
|
||||
vhsum_cmplx2(agr,agi,acr,aci,&alm[2*j],&alm[2*j+1]);
|
||||
}
|
||||
}
|
||||
|
||||
static void Z(alm2map_spin_kernel) (Tb cth, Y(Tbqu) * restrict p1,
|
||||
Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
|
||||
int lmax NJ1)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
|
||||
fx2=vload(fx[l+1].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
|
||||
vmul(fx2,rec1p.v[i]));
|
||||
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
|
||||
vmul(fx2,rec1m.v[i]));
|
||||
}
|
||||
Z(saddstepb)(p1,p2,rec1p,rec1m,rec2p,rec2m,&alm[2*njobs*l],
|
||||
&alm[2*njobs*(l+1)] NJ2);
|
||||
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
|
||||
fx2=vload(fx[l+2].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
|
||||
vmul(fx2,rec2p.v[i]));
|
||||
rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
|
||||
vmul(fx2,rec2m.v[i]));
|
||||
}
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
Z(saddstep)(p1, p2, rec2p, rec2m, &alm[2*njobs*l] NJ2);
|
||||
}
|
||||
|
||||
static void Z(map2alm_spin_kernel) (Tb cth, const Y(Tbqu) * restrict p1,
|
||||
const Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
const sharp_ylmgen_dbl3 * restrict fx, dcmplx * restrict alm, int l, int lmax
|
||||
NJ1)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
|
||||
fx2=vload(fx[l+1].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
|
||||
vmul(fx2,rec1p.v[i]));
|
||||
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
|
||||
vmul(fx2,rec1m.v[i]));
|
||||
}
|
||||
Z(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l] NJ2);
|
||||
Z(saddstep2)(p2, p1, &rec1p, &rec1m, &alm[2*njobs*(l+1)] NJ2);
|
||||
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
|
||||
fx2=vload(fx[l+2].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
|
||||
vmul(fx2,rec2p.v[i]));
|
||||
rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
|
||||
vmul(fx2,rec2m.v[i]));
|
||||
}
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
Z(saddstep2)(p1, p2, &rec2p, &rec2m, &alm[2*njobs*l] NJ2);
|
||||
}
|
||||
|
||||
static void Z(calc_alm2map_spin) (const Tb cth, const Tb sth,
|
||||
const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbqu) * restrict p1,
|
||||
Y(Tbqu) * restrict p2 NJ1)
|
||||
{
|
||||
int l, lmax=gen->lmax;
|
||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||
Y(iter_to_ieee_spin)
|
||||
(cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||
Tb corfacp,corfacm;
|
||||
Y(getCorfac)(scalep,&corfacp,gen->cf);
|
||||
Y(getCorfac)(scalem,&corfacm,gen->cf);
|
||||
const dcmplx * restrict alm=job->almtmp;
|
||||
int full_ieee = Y(TballGe)(scalep,sharp_minscale)
|
||||
&& Y(TballGe)(scalem,sharp_minscale);
|
||||
while (!full_ieee)
|
||||
{
|
||||
Z(saddstep)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
|
||||
&alm[2*njobs*l] NJ2);
|
||||
if (++l>lmax) break;
|
||||
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
|
||||
Z(saddstep)(p2, p1, Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm),
|
||||
&alm[2*njobs*l] NJ2);
|
||||
if (++l>lmax) break;
|
||||
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
|
||||
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
|
||||
{
|
||||
Y(getCorfac)(scalep,&corfacp,gen->cf);
|
||||
Y(getCorfac)(scalem,&corfacm,gen->cf);
|
||||
full_ieee = Y(TballGe)(scalep,sharp_minscale)
|
||||
&& Y(TballGe)(scalem,sharp_minscale);
|
||||
}
|
||||
}
|
||||
|
||||
if (l>lmax) return;
|
||||
|
||||
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
|
||||
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
|
||||
Z(alm2map_spin_kernel) (cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l,
|
||||
lmax NJ2);
|
||||
}
|
||||
|
||||
static void Z(calc_map2alm_spin) (Tb cth, Tb sth,
|
||||
const sharp_Ylmgen_C * restrict gen, sharp_job *job,
|
||||
const Y(Tbqu) * restrict p1, const Y(Tbqu) * restrict p2 NJ1)
|
||||
{
|
||||
int l, lmax=gen->lmax;
|
||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||
Y(iter_to_ieee_spin)
|
||||
(cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||
Tb corfacp,corfacm;
|
||||
Y(getCorfac)(scalep,&corfacp,gen->cf);
|
||||
Y(getCorfac)(scalem,&corfacm,gen->cf);
|
||||
dcmplx * restrict alm=job->almtmp;
|
||||
int full_ieee = Y(TballGe)(scalep,sharp_minscale)
|
||||
&& Y(TballGe)(scalem,sharp_minscale);
|
||||
while (!full_ieee)
|
||||
{
|
||||
Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm);
|
||||
Z(saddstep2)(p1, p2, &t1, &t2, &alm[2*njobs*l] NJ2);
|
||||
if (++l>lmax) return;
|
||||
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
|
||||
t1=Y(Tbprod)(rec1p,corfacp); t2=Y(Tbprod)(rec1m,corfacm);
|
||||
Z(saddstep2)(p2, p1, &t1, &t2, &alm[2*njobs*l] NJ2);
|
||||
if (++l>lmax) return;
|
||||
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
|
||||
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
|
||||
{
|
||||
Y(getCorfac)(scalep,&corfacp,gen->cf);
|
||||
Y(getCorfac)(scalem,&corfacm,gen->cf);
|
||||
full_ieee = Y(TballGe)(scalep,sharp_minscale)
|
||||
&& Y(TballGe)(scalem,sharp_minscale);
|
||||
}
|
||||
}
|
||||
|
||||
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
|
||||
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
|
||||
Z(map2alm_spin_kernel)(cth,p1,p2,rec1p,rec1m,rec2p,rec2m,fx,alm,l,lmax NJ2);
|
||||
}
|
||||
|
||||
static inline void Z(saddstep_d) (Y(Tbqu) * restrict px, Y(Tbqu) * restrict py,
|
||||
const Tb rxp, const Tb rxm, const dcmplx * restrict alm NJ1)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
Tv ar=vload(creal(alm[j])), ai=vload(cimag(alm[j]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw=vadd(rxp.v[i],rxm.v[i]);
|
||||
vfmaeq(px[j].qr.v[i],ar,lw);
|
||||
vfmaeq(px[j].qi.v[i],ai,lw);
|
||||
}
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lx=vsub(rxm.v[i],rxp.v[i]);
|
||||
vfmaeq(py[j].ur.v[i],ai,lx);
|
||||
vfmseq(py[j].ui.v[i],ar,lx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void Z(alm2map_deriv1_kernel) (Tb cth, Y(Tbqu) * restrict p1,
|
||||
Y(Tbqu) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
|
||||
int lmax NJ1)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
|
||||
fx2=vload(fx[l+1].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
|
||||
vmul(fx2,rec1p.v[i]));
|
||||
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
|
||||
vmul(fx2,rec1m.v[i]));
|
||||
}
|
||||
Z(saddstep_d)(p1,p2,rec2p,rec2m,&alm[njobs*l] NJ2);
|
||||
Z(saddstep_d)(p2,p1,rec1p,rec1m,&alm[njobs*(l+1)] NJ2);
|
||||
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
|
||||
fx2=vload(fx[l+2].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
|
||||
vmul(fx2,rec2p.v[i]));
|
||||
rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
|
||||
vmul(fx2,rec2m.v[i]));
|
||||
}
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
Z(saddstep_d)(p1, p2, rec2p, rec2m, &alm[njobs*l] NJ2);
|
||||
}
|
||||
|
||||
static void Z(calc_alm2map_deriv1) (const Tb cth, const Tb sth,
|
||||
const sharp_Ylmgen_C *gen, sharp_job *job, Y(Tbqu) * restrict p1,
|
||||
Y(Tbqu) * restrict p2 NJ1)
|
||||
{
|
||||
int l, lmax=gen->lmax;
|
||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||
Y(iter_to_ieee_spin)
|
||||
(cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||
Tb corfacp,corfacm;
|
||||
Y(getCorfac)(scalep,&corfacp,gen->cf);
|
||||
Y(getCorfac)(scalem,&corfacm,gen->cf);
|
||||
const dcmplx * restrict alm=job->almtmp;
|
||||
int full_ieee = Y(TballGe)(scalep,sharp_minscale)
|
||||
&& Y(TballGe)(scalem,sharp_minscale);
|
||||
while (!full_ieee)
|
||||
{
|
||||
Z(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
|
||||
&alm[njobs*l] NJ2);
|
||||
if (++l>lmax) break;
|
||||
Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
|
||||
Z(saddstep_d)(p2, p1, Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm),
|
||||
&alm[njobs*l] NJ2);
|
||||
if (++l>lmax) break;
|
||||
Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
|
||||
if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem))
|
||||
{
|
||||
Y(getCorfac)(scalep,&corfacp,gen->cf);
|
||||
Y(getCorfac)(scalem,&corfacm,gen->cf);
|
||||
full_ieee = Y(TballGe)(scalep,sharp_minscale)
|
||||
&& Y(TballGe)(scalem,sharp_minscale);
|
||||
}
|
||||
}
|
||||
|
||||
if (l>lmax) return;
|
||||
|
||||
Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp);
|
||||
Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm);
|
||||
Z(alm2map_deriv1_kernel) (cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l,
|
||||
lmax NJ2);
|
||||
}
|
||||
|
||||
|
||||
#define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
|
||||
|
||||
static void Z(inner_loop) (sharp_job *job, const int *ispair,
|
||||
const double *cth_, const double *sth_, int llim, int ulim,
|
||||
sharp_Ylmgen_C *gen, int mi, const int *mlim NJ1)
|
||||
{
|
||||
const int nval=nvec*VLEN;
|
||||
const int m = job->ainfo->mval[mi];
|
||||
sharp_Ylmgen_prepare (gen, m);
|
||||
|
||||
switch (job->type)
|
||||
{
|
||||
case SHARP_ALM2MAP:
|
||||
case SHARP_ALM2MAP_DERIV1:
|
||||
{
|
||||
if (job->spin==0)
|
||||
{
|
||||
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||
{
|
||||
Y(Tburi) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
|
||||
Y(Tbu) cth, sth;
|
||||
|
||||
int skip=1;
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
if (mlim[itot]>=m) skip=0;
|
||||
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
||||
}
|
||||
if (!skip)
|
||||
Z(calc_alm2map) (cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot<ulim-llim)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
int phas_idx = itot*job->s_th + mi*job->s_m + 2*j;
|
||||
complex double r1 = p1[j].s.r[i] + p1[j].s.i[i]*_Complex_I,
|
||||
r2 = p2[j].s.r[i] + p2[j].s.i[i]*_Complex_I;
|
||||
job->phase[phas_idx] = r1+r2;
|
||||
if (ispair[itot])
|
||||
job->phase[phas_idx+1] = r1-r2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||
{
|
||||
Y(Tbuqu) p1[njobs],p2[njobs]; VZERO(p1); VZERO(p2);
|
||||
Y(Tbu) cth, sth;
|
||||
int skip=1;
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
if (mlim[itot]>=m) skip=0;
|
||||
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
||||
}
|
||||
if (!skip)
|
||||
(job->type==SHARP_ALM2MAP) ?
|
||||
Z(calc_alm2map_spin )
|
||||
(cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2) :
|
||||
Z(calc_alm2map_deriv1)
|
||||
(cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot<ulim-llim)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
int phas_idx = itot*job->s_th + mi*job->s_m + 4*j;
|
||||
complex double q1 = p1[j].s.qr[i] + p1[j].s.qi[i]*_Complex_I,
|
||||
q2 = p2[j].s.qr[i] + p2[j].s.qi[i]*_Complex_I,
|
||||
u1 = p1[j].s.ur[i] + p1[j].s.ui[i]*_Complex_I,
|
||||
u2 = p2[j].s.ur[i] + p2[j].s.ui[i]*_Complex_I;
|
||||
job->phase[phas_idx] = q1+q2;
|
||||
job->phase[phas_idx+2] = u1+u2;
|
||||
if (ispair[itot])
|
||||
{
|
||||
dcmplx *phQ = &(job->phase[phas_idx+1]),
|
||||
*phU = &(job->phase[phas_idx+3]);
|
||||
*phQ = q1-q2;
|
||||
*phU = u1-u2;
|
||||
if ((gen->mhi-gen->m+gen->s)&1)
|
||||
{ *phQ=-(*phQ); *phU=-(*phU); }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SHARP_MAP2ALM:
|
||||
{
|
||||
if (job->spin==0)
|
||||
{
|
||||
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||
{
|
||||
Y(Tburi) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
|
||||
Y(Tbu) cth, sth;
|
||||
int skip=1;
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
if (mlim[itot]>=m) skip=0;
|
||||
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
||||
if ((i+ith<ulim-llim)&&(mlim[itot]>=m))
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
int phas_idx = itot*job->s_th + mi*job->s_m + 2*j;
|
||||
dcmplx ph1=job->phase[phas_idx];
|
||||
dcmplx ph2=ispair[itot] ? job->phase[phas_idx+1] : 0.;
|
||||
p1[j].s.r[i]=creal(ph1+ph2); p1[j].s.i[i]=cimag(ph1+ph2);
|
||||
p2[j].s.r[i]=creal(ph1-ph2); p2[j].s.i[i]=cimag(ph1-ph2);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!skip)
|
||||
Z(calc_map2alm)(cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||
{
|
||||
Y(Tbuqu) p1[njobs], p2[njobs]; VZERO(p1); VZERO(p2);
|
||||
Y(Tbu) cth, sth;
|
||||
int skip=1;
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
if (mlim[itot]>=m) skip=0;
|
||||
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
||||
if (i+ith<ulim-llim)
|
||||
{
|
||||
for (int j=0; j<njobs; ++j)
|
||||
{
|
||||
int phas_idx = itot*job->s_th + mi*job->s_m + 4*j;
|
||||
dcmplx p1Q=job->phase[phas_idx],
|
||||
p1U=job->phase[phas_idx+2],
|
||||
p2Q=ispair[itot] ? job->phase[phas_idx+1]:0.,
|
||||
p2U=ispair[itot] ? job->phase[phas_idx+3]:0.;
|
||||
if ((gen->mhi-gen->m+gen->s)&1)
|
||||
{ p2Q=-p2Q; p2U=-p2U; }
|
||||
p1[j].s.qr[i]=creal(p1Q+p2Q); p1[j].s.qi[i]=cimag(p1Q+p2Q);
|
||||
p1[j].s.ur[i]=creal(p1U+p2U); p1[j].s.ui[i]=cimag(p1U+p2U);
|
||||
p2[j].s.qr[i]=creal(p1Q-p2Q); p2[j].s.qi[i]=cimag(p1Q-p2Q);
|
||||
p2[j].s.ur[i]=creal(p1U-p2U); p2[j].s.ui[i]=cimag(p1U-p2U);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!skip)
|
||||
Z(calc_map2alm_spin) (cth.b,sth.b,gen,job,&p1[0].b,&p2[0].b NJ2);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
UTIL_FAIL("must not happen");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef VZERO
|
|
@ -1,70 +0,0 @@
|
|||
#define Tb CONCAT2(Tb,nvec)
|
||||
#define Y(arg) CONCAT2(arg,nvec)
|
||||
#include "sharp_core_inc.c"
|
||||
|
||||
#if (SHARP_MAXTRANS>MAXJOB_SPECIAL)
|
||||
#define NJ1 , int njobs
|
||||
#define NJ2 , njobs
|
||||
#define Z(arg) CONCAT2(arg,nvec)
|
||||
#include "sharp_core_inc2.c"
|
||||
#undef Z
|
||||
#undef NJ1
|
||||
#undef NJ2
|
||||
#endif
|
||||
|
||||
#define NJ1
|
||||
#define NJ2
|
||||
|
||||
#if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1))
|
||||
#define njobs 1
|
||||
#define Z(arg) CONCAT3(arg,nvec,njobs)
|
||||
#include "sharp_core_inc2.c"
|
||||
#undef Z
|
||||
#undef njobs
|
||||
#endif
|
||||
|
||||
#if ((MAXJOB_SPECIAL>=2)&&(SHARP_MAXTRANS>=2))
|
||||
#define njobs 2
|
||||
#define Z(arg) CONCAT3(arg,nvec,njobs)
|
||||
#include "sharp_core_inc2.c"
|
||||
#undef Z
|
||||
#undef njobs
|
||||
#endif
|
||||
|
||||
#if ((MAXJOB_SPECIAL>=3)&&(SHARP_MAXTRANS>=3))
|
||||
#define njobs 3
|
||||
#define Z(arg) CONCAT3(arg,nvec,njobs)
|
||||
#include "sharp_core_inc2.c"
|
||||
#undef Z
|
||||
#undef njobs
|
||||
#endif
|
||||
|
||||
#if ((MAXJOB_SPECIAL>=4)&&(SHARP_MAXTRANS>=4))
|
||||
#define njobs 4
|
||||
#define Z(arg) CONCAT3(arg,nvec,njobs)
|
||||
#include "sharp_core_inc2.c"
|
||||
#undef Z
|
||||
#undef njobs
|
||||
#endif
|
||||
|
||||
#if ((MAXJOB_SPECIAL>=5)&&(SHARP_MAXTRANS>=5))
|
||||
#define njobs 5
|
||||
#define Z(arg) CONCAT3(arg,nvec,njobs)
|
||||
#include "sharp_core_inc2.c"
|
||||
#undef Z
|
||||
#undef njobs
|
||||
#endif
|
||||
|
||||
#if ((MAXJOB_SPECIAL>=6)&&(SHARP_MAXTRANS>=6))
|
||||
#define njobs 6
|
||||
#define Z(arg) CONCAT3(arg,nvec,njobs)
|
||||
#include "sharp_core_inc2.c"
|
||||
#undef Z
|
||||
#undef njobs
|
||||
#endif
|
||||
|
||||
#undef NJ1
|
||||
#undef NJ2
|
||||
|
||||
#undef Y
|
||||
#undef Tb
|
|
@ -25,14 +25,15 @@
|
|||
/*! \file sharp_cxx.h
|
||||
* Spherical transform library
|
||||
*
|
||||
* Copyright (C) 2012-2015 Max-Planck-Society
|
||||
* Copyright (C) 2012-2019 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#ifndef PLANCK_SHARP_CXX_H
|
||||
#define PLANCK_SHARP_CXX_H
|
||||
|
||||
#include "sharp_lowlevel.h"
|
||||
#include <complex>
|
||||
#include "sharp.h"
|
||||
#include "sharp_geomhelpers.h"
|
||||
#include "sharp_almhelpers.h"
|
||||
|
||||
|
@ -47,8 +48,8 @@ class sharp_base
|
|||
: ainfo(0), ginfo(0) {}
|
||||
~sharp_base()
|
||||
{
|
||||
sharp_destroy_geom_info(ginfo);
|
||||
sharp_destroy_alm_info(ainfo);
|
||||
if (ginfo) sharp_destroy_geom_info(ginfo);
|
||||
if (ainfo) sharp_destroy_alm_info(ainfo);
|
||||
}
|
||||
|
||||
void set_general_geometry (int nrings, const int *nph, const ptrdiff_t *ofs,
|
||||
|
@ -107,47 +108,115 @@ template<typename T> class sharp_cxxjob: public sharp_base
|
|||
private:
|
||||
static void *conv (T *ptr)
|
||||
{ return reinterpret_cast<void *>(ptr); }
|
||||
static void *conv (std::complex<T> *ptr)
|
||||
{ return reinterpret_cast<void *>(ptr); }
|
||||
static void *conv (const T *ptr)
|
||||
{ return const_cast<void *>(reinterpret_cast<const void *>(ptr)); }
|
||||
static void *conv (const std::complex<T> *ptr)
|
||||
{ return const_cast<void *>(reinterpret_cast<const void *>(ptr)); }
|
||||
|
||||
public:
|
||||
void alm2map (const T *alm, T *map, bool add)
|
||||
void alm2map (const T *alm, T *map, bool add) const
|
||||
{
|
||||
void *aptr=conv(alm), *mptr=conv(map);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_ALM2MAP, 0, &aptr, &mptr, ginfo, ainfo, 1,
|
||||
flags,0,0);
|
||||
sharp_execute (SHARP_ALM2MAP, 0, &aptr, &mptr, ginfo, ainfo, flags, 0, 0);
|
||||
}
|
||||
void alm2map_spin (const T *alm1, const T *alm2, T *map1, T *map2,
|
||||
int spin, bool add)
|
||||
void alm2map (const std::complex<T> *alm, T *map, bool add) const
|
||||
{
|
||||
void *aptr=conv(alm), *mptr=conv(map);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_ALM2MAP, 0, &aptr, &mptr, ginfo, ainfo, flags, 0, 0);
|
||||
}
|
||||
void alm2map_spin (const T *alm1, const T *alm2,
|
||||
T *map1, T *map2, int spin, bool add) const
|
||||
{
|
||||
void *aptr[2], *mptr[2];
|
||||
aptr[0]=conv(alm1); aptr[1]=conv(alm2);
|
||||
mptr[0]=conv(map1); mptr[1]=conv(map2);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_ALM2MAP,spin,aptr,mptr,ginfo,ainfo,1,flags,0,0);
|
||||
sharp_execute (SHARP_ALM2MAP,spin,aptr,mptr,ginfo,ainfo,flags, 0, 0);
|
||||
}
|
||||
void alm2map_der1 (const T *alm, T *map1, T *map2, bool add)
|
||||
void alm2map_spin (const std::complex<T> *alm1, const std::complex<T> *alm2,
|
||||
T *map1, T *map2, int spin, bool add) const
|
||||
{
|
||||
void *aptr[2], *mptr[2];
|
||||
aptr[0]=conv(alm1); aptr[1]=conv(alm2);
|
||||
mptr[0]=conv(map1); mptr[1]=conv(map2);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_ALM2MAP, spin, aptr, mptr, ginfo, ainfo, flags,0,0);
|
||||
}
|
||||
void alm2map_der1 (const T *alm, T *map1, T *map2, bool add) const
|
||||
{
|
||||
void *aptr=conv(alm), *mptr[2];
|
||||
mptr[0]=conv(map1); mptr[1]=conv(map2);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_ALM2MAP_DERIV1,1,&aptr,mptr,ginfo,ainfo,1,flags,0,0);
|
||||
sharp_execute (SHARP_ALM2MAP_DERIV1,1,&aptr,mptr,ginfo,ainfo,flags,0,0);
|
||||
}
|
||||
void map2alm (const T *map, T *alm, bool add)
|
||||
void alm2map_der1 (const std::complex<T> *alm, T *map1, T *map2, bool add)
|
||||
const
|
||||
{
|
||||
void *aptr=conv(alm), *mptr[2];
|
||||
mptr[0]=conv(map1); mptr[1]=conv(map2);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_ALM2MAP_DERIV1,1,&aptr,mptr,ginfo,ainfo,flags,0,0);
|
||||
}
|
||||
void alm2map_adjoint (const T *map, T *alm, bool add) const
|
||||
{
|
||||
void *aptr=conv(alm), *mptr=conv(map);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_MAP2ALM,0,&aptr,&mptr,ginfo,ainfo,1,flags,0,0);
|
||||
sharp_execute (SHARP_Yt,0,&aptr,&mptr,ginfo,ainfo,flags,0,0);
|
||||
}
|
||||
void map2alm_spin (const T *map1, const T *map2, T *alm1, T *alm2,
|
||||
int spin, bool add)
|
||||
void alm2map_adjoint (const T *map, std::complex<T> *alm, bool add) const
|
||||
{
|
||||
void *aptr=conv(alm), *mptr=conv(map);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_Yt,0,&aptr,&mptr,ginfo,ainfo,flags,0,0);
|
||||
}
|
||||
void alm2map_spin_adjoint (const T *map1, const T *map2, T *alm1, T *alm2,
|
||||
int spin, bool add) const
|
||||
{
|
||||
void *aptr[2], *mptr[2];
|
||||
aptr[0]=conv(alm1); aptr[1]=conv(alm2);
|
||||
mptr[0]=conv(map1); mptr[1]=conv(map2);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_MAP2ALM,spin,aptr,mptr,ginfo,ainfo,1,flags,0,0);
|
||||
sharp_execute (SHARP_Yt,spin,aptr,mptr,ginfo,ainfo,flags,0,0);
|
||||
}
|
||||
void alm2map_spin_adjoint (const T *map1, const T *map2,
|
||||
std::complex<T> *alm1, std::complex<T> *alm2, int spin, bool add) const
|
||||
{
|
||||
alm2map_spin_adjoint (map1, map2, reinterpret_cast<T *>(alm1),
|
||||
reinterpret_cast<T *>(alm2), spin, add);
|
||||
}
|
||||
void map2alm (const T *map, T *alm, bool add) const
|
||||
{
|
||||
void *aptr=conv(alm), *mptr=conv(map);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_MAP2ALM,0,&aptr,&mptr,ginfo,ainfo,flags,0,0);
|
||||
}
|
||||
void map2alm (const T *map, std::complex<T> *alm, bool add) const
|
||||
{
|
||||
void *aptr=conv(alm), *mptr=conv(map);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_MAP2ALM,0,&aptr,&mptr,ginfo,ainfo,flags,0,0);
|
||||
}
|
||||
void map2alm_spin (const T *map1, const T *map2, T *alm1, T *alm2,
|
||||
int spin, bool add) const
|
||||
{
|
||||
void *aptr[2], *mptr[2];
|
||||
aptr[0]=conv(alm1); aptr[1]=conv(alm2);
|
||||
mptr[0]=conv(map1); mptr[1]=conv(map2);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_MAP2ALM,spin,aptr,mptr,ginfo,ainfo,flags,0,0);
|
||||
}
|
||||
void map2alm_spin (const T *map1, const T *map2, std::complex<T> *alm1,
|
||||
std::complex<T> *alm2, int spin, bool add) const
|
||||
{
|
||||
void *aptr[2], *mptr[2];
|
||||
aptr[0]=conv(alm1); aptr[1]=conv(alm2);
|
||||
mptr[0]=conv(map1); mptr[1]=conv(map2);
|
||||
int flags=cxxjobhelper__<T>::val | (add ? SHARP_ADD : 0);
|
||||
sharp_execute (SHARP_MAP2ALM,spin,aptr,mptr,ginfo,ainfo,flags,0,0);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -25,17 +25,15 @@
|
|||
/*! \file sharp_geomhelpers.c
|
||||
* Spherical transform library
|
||||
*
|
||||
* Copyright (C) 2006-2012 Max-Planck-Society<br>
|
||||
* Copyright (C) 2007-2008 Pavel Holoborodko (for gauss_legendre_tbl)
|
||||
* \author Martin Reinecke \author Pavel Holoborodko
|
||||
* Copyright (C) 2006-2018 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "sharp_geomhelpers.h"
|
||||
#include "sharp_legendre_roots.h"
|
||||
#include "c_utils.h"
|
||||
#include "ls_fft.h"
|
||||
#include <stdio.h>
|
||||
#include "pocketfft/pocketfft.h"
|
||||
|
||||
void sharp_make_subset_healpix_geom_info (int nside, int stride, int nrings,
|
||||
const int *rings, const double *weight, sharp_geom_info **geom_info)
|
||||
|
@ -161,9 +159,9 @@ void sharp_make_fejer1_geom_info (int nrings, int ppring, double phi0,
|
|||
weight[2*k ]=2./(1.-4.*k*k)*sin((k*pi)/nrings);
|
||||
}
|
||||
if ((nrings&1)==0) weight[nrings-1]=0.;
|
||||
real_plan plan = make_real_plan(nrings);
|
||||
real_plan_backward_fftpack(plan,weight);
|
||||
kill_real_plan(plan);
|
||||
rfft_plan plan = make_rfft_plan(nrings);
|
||||
rfft_backward(plan,weight,1.);
|
||||
destroy_rfft_plan(plan);
|
||||
|
||||
for (int m=0; m<(nrings+1)/2; ++m)
|
||||
{
|
||||
|
@ -208,9 +206,9 @@ void sharp_make_cc_geom_info (int nrings, int ppring, double phi0,
|
|||
for (int k=1; k<=(n/2-1); ++k)
|
||||
weight[2*k-1]=2./(1.-4.*k*k) + dw;
|
||||
weight[2*(n/2)-1]=(n-3.)/(2*(n/2)-1) -1. -dw*((2-(n&1))*n-1);
|
||||
real_plan plan = make_real_plan(n);
|
||||
real_plan_backward_fftpack(plan,weight);
|
||||
kill_real_plan(plan);
|
||||
rfft_plan plan = make_rfft_plan(n);
|
||||
rfft_backward(plan,weight,1.);
|
||||
destroy_rfft_plan(plan);
|
||||
weight[n]=weight[0];
|
||||
|
||||
for (int m=0; m<(nrings+1)/2; ++m)
|
||||
|
@ -256,9 +254,9 @@ void sharp_make_fejer2_geom_info (int nrings, int ppring, double phi0,
|
|||
for (int k=1; k<=(n/2-1); ++k)
|
||||
weight[2*k-1]=2./(1.-4.*k*k);
|
||||
weight[2*(n/2)-1]=(n-3.)/(2*(n/2)-1) -1.;
|
||||
real_plan plan = make_real_plan(n);
|
||||
real_plan_backward_fftpack(plan,weight);
|
||||
kill_real_plan(plan);
|
||||
rfft_plan plan = make_rfft_plan(n);
|
||||
rfft_backward(plan,weight,1.);
|
||||
destroy_rfft_plan(plan);
|
||||
for (int m=0; m<nrings; ++m)
|
||||
weight[m]=weight[m+1];
|
||||
|
||||
|
|
|
@ -25,14 +25,14 @@
|
|||
/*! \file sharp_geomhelpers.h
|
||||
* SHARP helper function for the creation of grid geometries
|
||||
*
|
||||
* Copyright (C) 2006-2013 Max-Planck-Society
|
||||
* Copyright (C) 2006-2019 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#ifndef PLANCK_SHARP_GEOMHELPERS_H
|
||||
#define PLANCK_SHARP_GEOMHELPERS_H
|
||||
|
||||
#include "sharp_lowlevel.h"
|
||||
#include "sharp.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
/*! \file sharp_internal.h
|
||||
* Internally used functionality for the spherical transform library.
|
||||
*
|
||||
* Copyright (C) 2006-2013 Max-Planck-Society
|
||||
* Copyright (C) 2006-2019 Max-Planck-Society
|
||||
* \author Martin Reinecke \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
|
@ -36,7 +36,9 @@
|
|||
#error This header file cannot be included from C++, only from C
|
||||
#endif
|
||||
|
||||
#include <complex.h>
|
||||
#include "sharp.h"
|
||||
#include "sharp_ylmgen_c.h"
|
||||
|
||||
#define SHARP_MAXTRANS 100
|
||||
|
||||
|
@ -55,12 +57,17 @@ typedef struct
|
|||
const sharp_geom_info *ginfo;
|
||||
const sharp_alm_info *ainfo;
|
||||
double time;
|
||||
int ntrans;
|
||||
unsigned long long opcnt;
|
||||
} sharp_job;
|
||||
|
||||
int sharp_get_nv_max (void);
|
||||
int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans);
|
||||
int sharp_get_mlim (int lmax, int spin, double sth, double cth);
|
||||
|
||||
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
||||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||
const int *mlim);
|
||||
|
||||
int sharp_veclen(void);
|
||||
int sharp_max_nvec(int spin);
|
||||
const char *sharp_architecture(void);
|
||||
|
||||
#endif
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,176 +0,0 @@
|
|||
/*
|
||||
|
||||
NOTE NOTE NOTE
|
||||
|
||||
This file is edited in sharp_legendre.c.in which is then preprocessed.
|
||||
Do not make manual modifications to sharp_legendre.c.
|
||||
|
||||
NOTE NOTE NOTE
|
||||
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*! \file sharp_legendre.c.in
|
||||
*
|
||||
* Copyright (C) 2015 University of Oslo
|
||||
* \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
#ifndef NO_LEGENDRE
|
||||
#if (VLEN==8)
|
||||
#error This code is not tested with MIC; please compile with -DNO_LEGENDRE
|
||||
/* ...or test it (it probably works) and remove this check */
|
||||
#endif
|
||||
|
||||
#ifndef SHARP_LEGENDRE_CS
|
||||
#define SHARP_LEGENDRE_CS 4
|
||||
#endif
|
||||
|
||||
#define MAX_CS 6
|
||||
#if (SHARP_LEGENDRE_CS > MAX_CS)
|
||||
#error (SHARP_LEGENDRE_CS > MAX_CS)
|
||||
#endif
|
||||
|
||||
#include "sharp_legendre.h"
|
||||
#include "sharp_vecsupport.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
|
||||
/*{ for cs in range(1, 7) }*/
|
||||
static void legendre_transform_vec{{cs}}{{T}}({{scalar}} *recfacs, {{scalar}} *bl, ptrdiff_t lmax,
|
||||
{{scalar}} xarr[({{cs}}) * VLEN{{T}}],
|
||||
{{scalar}} out[({{cs}}) * VLEN{{T}}]) {
|
||||
/*{ for i in range(cs) }*/
|
||||
Tv{{T}} P_{{i}}, Pm1_{{i}}, Pm2_{{i}}, x{{i}}, y{{i}};
|
||||
/*{ endfor }*/
|
||||
Tv{{T}} W1, W2, b, R;
|
||||
ptrdiff_t l;
|
||||
|
||||
/*{ for i in range(cs) }*/
|
||||
x{{i}} = vloadu{{T}}(xarr + {{i}} * VLEN{{T}});
|
||||
Pm1_{{i}} = vload{{T}}(1.0);
|
||||
P_{{i}} = x{{i}};
|
||||
b = vload{{T}}(*bl);
|
||||
y{{i}} = vmul{{T}}(Pm1_{{i}}, b);
|
||||
/*{ endfor }*/
|
||||
|
||||
b = vload{{T}}(*(bl + 1));
|
||||
/*{ for i in range(cs) }*/
|
||||
vfmaeq{{T}}(y{{i}}, P_{{i}}, b);
|
||||
/*{ endfor }*/
|
||||
|
||||
for (l = 2; l <= lmax; ++l) {
|
||||
b = vload{{T}}(*(bl + l));
|
||||
R = vload{{T}}(*(recfacs + l));
|
||||
|
||||
/*
|
||||
P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
|
||||
*/
|
||||
/*{ for i in range(cs) }*/
|
||||
Pm2_{{i}} = Pm1_{{i}}; Pm1_{{i}} = P_{{i}};
|
||||
W1 = vmul{{T}}(x{{i}}, Pm1_{{i}});
|
||||
W2 = W1;
|
||||
W2 = vsub{{T}}(W2, Pm2_{{i}});
|
||||
P_{{i}} = W1;
|
||||
vfmaeq{{T}}(P_{{i}}, W2, R);
|
||||
vfmaeq{{T}}(y{{i}}, P_{{i}}, b);
|
||||
/*{ endfor }*/
|
||||
|
||||
}
|
||||
/*{ for i in range(cs) }*/
|
||||
vstoreu{{T}}(out + {{i}} * VLEN{{T}}, y{{i}});
|
||||
/*{ endfor }*/
|
||||
}
|
||||
/*{ endfor }*/
|
||||
/*{ endfor }*/
|
||||
|
||||
|
||||
/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
|
||||
void sharp_legendre_transform_recfac{{T}}({{scalar}} *r, ptrdiff_t lmax) {
|
||||
/* (l - 1) / l, for l >= 2 */
|
||||
ptrdiff_t l;
|
||||
r[0] = 0;
|
||||
r[1] = 1;
|
||||
for (l = 2; l <= lmax; ++l) {
|
||||
r[l] = ({{scalar}})(l - 1) / ({{scalar}})l;
|
||||
}
|
||||
}
|
||||
/*{ endfor }*/
|
||||
|
||||
/*
|
||||
Compute sum_l b_l P_l(x_i) for all i.
|
||||
*/
|
||||
|
||||
#define LEN (SHARP_LEGENDRE_CS * VLEN)
|
||||
#define LEN_s (SHARP_LEGENDRE_CS * VLEN_s)
|
||||
|
||||
/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
|
||||
void sharp_legendre_transform{{T}}({{scalar}} *bl,
|
||||
{{scalar}} *recfac,
|
||||
ptrdiff_t lmax,
|
||||
{{scalar}} *x, {{scalar}} *out, ptrdiff_t nx) {
|
||||
{{scalar}} xchunk[MAX_CS * VLEN{{T}}], outchunk[MAX_CS * LEN{{T}}];
|
||||
int compute_recfac;
|
||||
ptrdiff_t i, j, len;
|
||||
|
||||
compute_recfac = (recfac == NULL);
|
||||
if (compute_recfac) {
|
||||
recfac = malloc(sizeof({{scalar}}) * (lmax + 1));
|
||||
sharp_legendre_transform_recfac{{T}}(recfac, lmax);
|
||||
}
|
||||
|
||||
for (j = 0; j != LEN{{T}}; ++j) xchunk[j] = 0;
|
||||
|
||||
for (i = 0; i < nx; i += LEN{{T}}) {
|
||||
len = (i + (LEN{{T}}) <= nx) ? (LEN{{T}}) : (nx - i);
|
||||
for (j = 0; j != len; ++j) xchunk[j] = x[i + j];
|
||||
switch ((len + VLEN{{T}} - 1) / VLEN{{T}}) {
|
||||
case 6: legendre_transform_vec6{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 5: legendre_transform_vec5{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 4: legendre_transform_vec4{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 3: legendre_transform_vec3{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 2: legendre_transform_vec2{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 1:
|
||||
case 0:
|
||||
legendre_transform_vec1{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
}
|
||||
for (j = 0; j != len; ++j) out[i + j] = outchunk[j];
|
||||
}
|
||||
if (compute_recfac) {
|
||||
free(recfac);
|
||||
}
|
||||
}
|
||||
/*{ endfor }*/
|
||||
|
||||
#endif
|
|
@ -1,62 +0,0 @@
|
|||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*! \file sharp_legendre.h
|
||||
* Interface for the Legendre transform parts of the spherical transform library.
|
||||
*
|
||||
* Copyright (C) 2015 University of Oslo
|
||||
* \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
#ifndef SHARP_LEGENDRE_H
|
||||
#define SHARP_LEGENDRE_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef NO_LEGENDRE
|
||||
|
||||
void sharp_legendre_transform(double *bl, double *recfac, ptrdiff_t lmax, double *x,
|
||||
double *out, ptrdiff_t nx);
|
||||
void sharp_legendre_transform_s(float *bl, float *recfac, ptrdiff_t lmax, float *x,
|
||||
float *out, ptrdiff_t nx);
|
||||
void sharp_legendre_transform_recfac(double *r, ptrdiff_t lmax);
|
||||
void sharp_legendre_transform_recfac_s(float *r, ptrdiff_t lmax);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
/*! \file sharp_legendre_roots.h
|
||||
*
|
||||
* Copyright (C) 2006-2012 Max-Planck-Society
|
||||
* Copyright (C) 2006-2019 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,97 +0,0 @@
|
|||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*! \file sharp_legendre_table.h
|
||||
* Interface for computing tables of the normalized associated Legendre transform
|
||||
*
|
||||
* Copyright (C) 2017 Dag Sverre Seljebotn
|
||||
* \author Dag Sverre Seljebotn
|
||||
*
|
||||
* Note: This code was mainly copied from libpsht; only a small high-level wrapper added
|
||||
*/
|
||||
|
||||
#ifndef SHARP_LEGENDRE_TABLE_H
|
||||
#define SHARP_LEGENDRE_TABLE_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef NO_LEGENDRE_TABLE
|
||||
|
||||
|
||||
/*! Returns a table of the normalized associated Legendre polynomials. m is a single
|
||||
fixed argument and a table for multiple l and cos(theta) is provided.
|
||||
(Internally, sin(theta) is also used for part of the computation, making theta
|
||||
the most convenient argument.)
|
||||
|
||||
NOTE: Support for spin-weighted Legendre functions is on the TODO-list. Only spin=0
|
||||
is supported now.
|
||||
|
||||
\param m The m-value to compute a table for; must be >= 0
|
||||
\param spin The spin parameter; pass 0 for the regular associated Legendre functions.
|
||||
NOTE: This is present for future compatability, currently only 0 is supported.
|
||||
\param lmax A table will be provided for l = m .. lmax
|
||||
\param ntheta How many theta values to evaluate for
|
||||
\param theta Contiguous 1D array of theta values
|
||||
\param theta_stride See below
|
||||
\param l_stride See below
|
||||
\param spin_stride See below. "ispin" will always be 0 if spin==0, or 0 for positive spin
|
||||
and 1 for the corresponding negative spin otherwise.
|
||||
\param out Contiguous 3D array that will receive the output. Each output entry
|
||||
is assigned to out[itheta * theta_stride + (l - m) * l_stride + ispin * spin_stride].
|
||||
*/
|
||||
void sharp_normalized_associated_legendre_table(
|
||||
ptrdiff_t m,
|
||||
int spin,
|
||||
ptrdiff_t lmax,
|
||||
ptrdiff_t ntheta,
|
||||
/* contiguous 1D array of theta values to compute for,
|
||||
contains ntheta values */
|
||||
double *theta,
|
||||
/* contiguous 2D array, in "theta-major ordering". Has `ntheta`
|
||||
rows and `ncols` columns. Indexed as out[itheta * ncols + (l - m)].
|
||||
If `ncols > lmax - m` then those entries are not accessed.
|
||||
*/
|
||||
ptrdiff_t theta_stride,
|
||||
ptrdiff_t l_stride,
|
||||
ptrdiff_t spin_stride,
|
||||
double *out
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,272 +0,0 @@
|
|||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* libsharp is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libsharp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libsharp; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file sharp_lowlevel.h
|
||||
* Low-level, portable interface for the spherical transform library.
|
||||
*
|
||||
* Copyright (C) 2012-2013 Max-Planck-Society
|
||||
* \author Martin Reinecke \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
#ifndef PLANCK_SHARP_LOWLEVEL_H
|
||||
#define PLANCK_SHARP_LOWLEVEL_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*! \internal
|
||||
Helper type containing information about a single ring. */
|
||||
typedef struct
|
||||
{
|
||||
double theta, phi0, weight, cth, sth;
|
||||
ptrdiff_t ofs;
|
||||
int nph, stride;
|
||||
} sharp_ringinfo;
|
||||
|
||||
/*! \internal
|
||||
Helper type containing information about a pair of rings with colatitudes
|
||||
symmetric around the equator. */
|
||||
typedef struct
|
||||
{
|
||||
sharp_ringinfo r1,r2;
|
||||
} sharp_ringpair;
|
||||
|
||||
/*! \internal
|
||||
Type holding all required information about a map geometry. */
|
||||
typedef struct
|
||||
{
|
||||
sharp_ringpair *pair;
|
||||
int npairs, nphmax;
|
||||
} sharp_geom_info;
|
||||
|
||||
/*! \defgroup almgroup Helpers for dealing with a_lm */
|
||||
/*! \{ */
|
||||
|
||||
/*! \internal
|
||||
Helper type for index calculation in a_lm arrays. */
|
||||
typedef struct
|
||||
{
|
||||
/*! Maximum \a l index of the array */
|
||||
int lmax;
|
||||
/*! Number of different \a m values in this object */
|
||||
int nm;
|
||||
/*! Array with \a nm entries containing the individual m values */
|
||||
int *mval;
|
||||
/*! Combination of flags from sharp_almflags */
|
||||
int flags;
|
||||
/*! Array with \a nm entries containing the (hypothetical) indices of
|
||||
the coefficients with quantum numbers 0,\a mval[i] */
|
||||
ptrdiff_t *mvstart;
|
||||
/*! Stride between a_lm and a_(l+1),m */
|
||||
ptrdiff_t stride;
|
||||
} sharp_alm_info;
|
||||
|
||||
/*! alm_info flags */
|
||||
typedef enum { SHARP_PACKED = 1,
|
||||
/*!< m=0-coefficients are packed so that the (zero) imaginary part is
|
||||
not present. mvstart is in units of *real* float/double for all
|
||||
m; stride is in units of reals for m=0 and complex for m!=0 */
|
||||
SHARP_REAL_HARMONICS = 1<<6
|
||||
/*!< Use the real spherical harmonic convention. For
|
||||
m==0, the alm are treated exactly the same as in
|
||||
the complex case. For m!=0, alm[i] represent a
|
||||
pair (+abs(m), -abs(m)) instead of (real, imag),
|
||||
and the coefficients are scaled by a factor of
|
||||
sqrt(2) relative to the complex case. In other
|
||||
words, (sqrt(.5) * alm[i]) recovers the
|
||||
corresponding complex coefficient (when accessed
|
||||
as complex).
|
||||
*/
|
||||
} sharp_almflags;
|
||||
|
||||
|
||||
|
||||
/*! Creates an a_lm data structure from the following parameters:
|
||||
\param lmax maximum \a l quantum number (>=0)
|
||||
\param mmax maximum \a m quantum number (0<= \a mmax <= \a lmax)
|
||||
\param stride the stride between entries with identical \a m, and \a l
|
||||
differing by 1.
|
||||
\param mstart the index of the (hypothetical) coefficient with the
|
||||
quantum numbers 0,\a m. Must have \a mmax+1 entries.
|
||||
\param alm_info will hold a pointer to the newly created data structure
|
||||
*/
|
||||
void sharp_make_alm_info (int lmax, int mmax, int stride,
|
||||
const ptrdiff_t *mstart, sharp_alm_info **alm_info);
|
||||
/*! Creates an a_lm data structure which from the following parameters:
|
||||
\param lmax maximum \a l quantum number (\a >=0)
|
||||
\param nm number of different \a m (\a 0<=nm<=lmax+1)
|
||||
\param stride the stride between entries with identical \a m, and \a l
|
||||
differing by 1.
|
||||
\param mval array with \a nm entries containing the individual m values
|
||||
\param mvstart array with \a nm entries containing the (hypothetical)
|
||||
indices of the coefficients with the quantum numbers 0,\a mval[i]
|
||||
\param flags a combination of sharp_almflags (pass 0 unless you know you need this)
|
||||
\param alm_info will hold a pointer to the newly created data structure
|
||||
*/
|
||||
void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval,
|
||||
const ptrdiff_t *mvstart, int flags, sharp_alm_info **alm_info);
|
||||
/*! Returns the index of the coefficient with quantum numbers \a l,
|
||||
\a mval[mi].
|
||||
\note for a \a sharp_alm_info generated by sharp_make_alm_info() this is
|
||||
the index for the coefficient with the quantum numbers \a l, \a mi. */
|
||||
ptrdiff_t sharp_alm_index (const sharp_alm_info *self, int l, int mi);
|
||||
/*! Returns the number of alm coefficients described by \a self. If the SHARP_PACKED
|
||||
flag is set, this is number of "real" coeffecients (for m < 0 and m >= 0),
|
||||
otherwise it is the number of complex coefficients (with m>=0). */
|
||||
ptrdiff_t sharp_alm_count(const sharp_alm_info *self);
|
||||
/*! Deallocates the a_lm info object. */
|
||||
void sharp_destroy_alm_info (sharp_alm_info *info);
|
||||
|
||||
/*! \} */
|
||||
|
||||
/*! \defgroup geominfogroup Functions for dealing with geometry information */
|
||||
/*! \{ */
|
||||
|
||||
/*! Creates a geometry information from a set of ring descriptions.
|
||||
All arrays passed to this function must have \a nrings elements.
|
||||
\param nrings the number of rings in the map
|
||||
\param nph the number of pixels in each ring
|
||||
\param ofs the index of the first pixel in each ring in the map array
|
||||
\param stride the stride between consecutive pixels
|
||||
\param phi0 the azimuth (in radians) of the first pixel in each ring
|
||||
\param theta the colatitude (in radians) of each ring
|
||||
\param wgt the pixel weight to be used for the ring in map2alm
|
||||
and adjoint map2alm transforms.
|
||||
Pass NULL to use 1.0 as weight for all rings.
|
||||
\param geom_info will hold a pointer to the newly created data structure
|
||||
*/
|
||||
void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
|
||||
const int *stride, const double *phi0, const double *theta,
|
||||
const double *wgt, sharp_geom_info **geom_info);
|
||||
|
||||
/*! Counts the number of grid points needed for (the local part of) a map described
|
||||
by \a info.
|
||||
*/
|
||||
ptrdiff_t sharp_map_size(const sharp_geom_info *info);
|
||||
|
||||
/*! Deallocates the geometry information in \a info. */
|
||||
void sharp_destroy_geom_info (sharp_geom_info *info);
|
||||
|
||||
/*! \} */
|
||||
|
||||
/*! \defgroup lowlevelgroup Low-level libsharp SHT interface */
|
||||
/*! \{ */
|
||||
|
||||
/*! Enumeration of SHARP job types. */
|
||||
typedef enum { SHARP_YtW=0, /*!< analysis */
|
||||
SHARP_MAP2ALM=SHARP_YtW, /*!< analysis */
|
||||
SHARP_Y=1, /*!< synthesis */
|
||||
SHARP_ALM2MAP=SHARP_Y, /*!< synthesis */
|
||||
SHARP_Yt=2, /*!< adjoint synthesis */
|
||||
SHARP_WY=3, /*!< adjoint analysis */
|
||||
SHARP_ALM2MAP_DERIV1=4 /*!< synthesis of first derivatives */
|
||||
} sharp_jobtype;
|
||||
|
||||
/*! Job flags */
|
||||
typedef enum { SHARP_DP = 1<<4,
|
||||
/*!< map and a_lm are in double precision */
|
||||
SHARP_ADD = 1<<5,
|
||||
/*!< results are added to the output arrays, instead of
|
||||
overwriting them */
|
||||
|
||||
/* NOTE: SHARP_REAL_HARMONICS, 1<<6, is also available in sharp_jobflags,
|
||||
but its use here is deprecated in favor of having it in the sharp_alm_info */
|
||||
|
||||
SHARP_NO_FFT = 1<<7,
|
||||
|
||||
SHARP_USE_WEIGHTS = 1<<20, /* internal use only */
|
||||
SHARP_NO_OPENMP = 1<<21, /* internal use only */
|
||||
SHARP_NVMAX = (1<<4)-1 /* internal use only */
|
||||
} sharp_jobflags;
|
||||
|
||||
/*! Performs a libsharp SHT job. The interface deliberately does not use
|
||||
the C99 "complex" data type, in order to be callable from C89 and C++.
|
||||
\param type the type of SHT
|
||||
\param spin the spin of the quantities to be transformed
|
||||
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
|
||||
alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
|
||||
etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
|
||||
alm[2] and alm[3] to those of the second, etc. The exact data type of \a alm
|
||||
depends on whether the SHARP_DP flag is set.
|
||||
\param map contains pointers to the maps. If \a spin==0,
|
||||
map[0] points to the map of the first SHT, map[1] to that of the second
|
||||
etc. If \a spin>0, or \a type is SHARP_ALM2MAP_DERIV1, map[0] and map[1]
|
||||
point to the maps of the first SHT, map[2] and map[3] to those of the
|
||||
second, etc. The exact data type of \a map depends on whether the SHARP_DP
|
||||
flag is set.
|
||||
\param geom_info A \c sharp_geom_info object compatible with the provided
|
||||
\a map arrays.
|
||||
\param alm_info A \c sharp_alm_info object compatible with the provided
|
||||
\a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present
|
||||
exactly once.
|
||||
\param ntrans the number of simultaneous SHTs
|
||||
\param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
|
||||
\a alm is expected to have the type "complex double **" and \a map is
|
||||
expected to have the type "double **"; otherwise, the expected
|
||||
types are "complex float **" and "float **", respectively.
|
||||
\param time If not NULL, the wall clock time required for this SHT
|
||||
(in seconds) will be written here.
|
||||
\param opcnt If not NULL, a conservative estimate of the total floating point
|
||||
operation count for this SHT will be written here. */
|
||||
void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
|
||||
const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
|
||||
int flags, double *time, unsigned long long *opcnt);
|
||||
|
||||
void sharp_set_chunksize_min(int new_chunksize_min);
|
||||
void sharp_set_nchunks_max(int new_nchunks_max);
|
||||
|
||||
|
||||
typedef enum { SHARP_ERROR_NO_MPI = 1,
|
||||
/*!< libsharp not compiled with MPI support */
|
||||
} sharp_errors;
|
||||
|
||||
/*! Works like sharp_execute_mpi, but is always present whether or not libsharp
|
||||
is compiled with USE_MPI. This is primarily useful for wrapper code etc.
|
||||
|
||||
Note that \a pcomm has the type MPI_Comm*, except we declare void* to avoid
|
||||
pulling in MPI headers. I.e., the comm argument of sharp_execute_mpi
|
||||
is *(MPI_Comm*)pcomm.
|
||||
|
||||
Other parameters are the same as sharp_execute_mpi.
|
||||
|
||||
Returns 0 if successful, or SHARP_ERROR_NO_MPI if MPI is not available
|
||||
(in which case nothing is done).
|
||||
*/
|
||||
int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
unsigned long long *opcnt);
|
||||
|
||||
|
||||
|
||||
/*! \} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -25,7 +25,7 @@
|
|||
/*! \file sharp_mpi.c
|
||||
* Functionality only needed for MPI-parallel transforms
|
||||
*
|
||||
* Copyright (C) 2012-2013 Max-Planck-Society
|
||||
* Copyright (C) 2012-2019 Max-Planck-Society
|
||||
* \author Martin Reinecke \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
|
@ -101,7 +101,7 @@ static void sharp_make_mpi_info (MPI_Comm comm, const sharp_job *job,
|
|||
DEALLOC(theta_tmp);
|
||||
DEALLOC(ispair_tmp);
|
||||
|
||||
minfo->nph=2*job->nmaps*job->ntrans;
|
||||
minfo->nph=2*job->nmaps;
|
||||
|
||||
minfo->almcount=RALLOC(int,minfo->ntasks);
|
||||
minfo->almdisp=RALLOC(int,minfo->ntasks+1);
|
||||
|
@ -184,8 +184,8 @@ static void alloc_phase_mpi (sharp_job *job, int nm, int ntheta,
|
|||
{
|
||||
ptrdiff_t phase_size = (job->type==SHARP_MAP2ALM) ?
|
||||
(ptrdiff_t)(nmfull)*ntheta : (ptrdiff_t)(nm)*nthetafull;
|
||||
job->phase=RALLOC(dcmplx,2*job->ntrans*job->nmaps*phase_size);
|
||||
job->s_m=2*job->ntrans*job->nmaps;
|
||||
job->phase=RALLOC(dcmplx,2*job->nmaps*phase_size);
|
||||
job->s_m=2*job->nmaps;
|
||||
job->s_th = job->s_m * ((job->type==SHARP_MAP2ALM) ? nmfull : nm);
|
||||
}
|
||||
|
||||
|
@ -315,12 +315,12 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
|
|||
|
||||
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
const sharp_alm_info *alm_info, int flags, double *time,
|
||||
unsigned long long *opcnt)
|
||||
{
|
||||
sharp_job job;
|
||||
sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info,
|
||||
ntrans, flags);
|
||||
flags);
|
||||
|
||||
sharp_execute_job_mpi (&job, comm);
|
||||
if (time!=NULL) *time = job.time;
|
||||
|
@ -331,15 +331,15 @@ void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
|
|||
without declaring it in C header as it should not be available to C code */
|
||||
void sharp_execute_mpi_fortran(MPI_Fint comm, sharp_jobtype type, int spin,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
const sharp_alm_info *alm_info, int flags, double *time,
|
||||
unsigned long long *opcnt);
|
||||
void sharp_execute_mpi_fortran(MPI_Fint comm, sharp_jobtype type, int spin,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
const sharp_alm_info *alm_info, int flags, double *time,
|
||||
unsigned long long *opcnt)
|
||||
{
|
||||
sharp_execute_mpi(MPI_Comm_f2c(comm), type, spin, alm, map, geom_info,
|
||||
alm_info, ntrans, flags, time, opcnt);
|
||||
alm_info, flags, time, opcnt);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
/*! \file sharp_mpi.h
|
||||
* Interface for the spherical transform library with MPI support.
|
||||
*
|
||||
* Copyright (C) 2011,2012 Max-Planck-Society
|
||||
* Copyright (C) 2011-2019 Max-Planck-Society
|
||||
* \author Martin Reinecke \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
|
@ -33,28 +33,25 @@
|
|||
#define PLANCK_SHARP_MPI_H
|
||||
|
||||
#include <mpi.h>
|
||||
#include "sharp_lowlevel.h"
|
||||
#include "sharp.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*! Performs an MPI parallel libsharp SHT job. The interface deliberately does
|
||||
not use the C99 "complex" data type, in order to be callable from C.
|
||||
not use the C99 "complex" data type, in order to be callable from C89 and C++.
|
||||
\param comm the MPI communicator to be used for this SHT
|
||||
\param type the type of SHT
|
||||
\param spin the spin of the quantities to be transformed
|
||||
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
|
||||
alm[0] points to the a_lm of the first SHT, alm[1] to those of the second
|
||||
etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT,
|
||||
alm[2] and alm[3] to those of the second, etc. The exact data type of \a alm
|
||||
alm[0] points to the a_lm of the SHT. If \a spin>0, alm[0] and alm[1]
|
||||
point to the two a_lm sets of the SHT. The exact data type of \a alm
|
||||
depends on whether the SHARP_DP flag is set.
|
||||
\param map contains pointers to the maps. If \a spin==0,
|
||||
map[0] points to the map of the first SHT, map[1] to that of the second
|
||||
etc. If \a spin>0, or \a type is SHARP_ALM2MAP_DERIV1, map[0] and map[1]
|
||||
point to the maps of the first SHT, map[2] and map[3] to those of the
|
||||
second, etc. The exact data type of \a map depends on whether the SHARP_DP
|
||||
flag is set.
|
||||
map[0] points to the map of the SHT. If \a spin>0, or \a type is
|
||||
SHARP_ALM2MAP_DERIV1, map[0] and map[1] point to the two maps of the SHT.
|
||||
The exact data type of \a map depends on whether the SHARP_DP flag is set.
|
||||
\param geom_info A \c sharp_geom_info object compatible with the provided
|
||||
\a map arrays. The total map geometry is the union of all \a geom_info
|
||||
objects over the participating MPI tasks.
|
||||
|
@ -62,7 +59,6 @@ extern "C" {
|
|||
\a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present
|
||||
exactly once in the union of all \a alm_info objects over the participating
|
||||
MPI tasks.
|
||||
\param ntrans the number of simultaneous SHTs
|
||||
\param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
|
||||
\a alm is expected to have the type "complex double **" and \a map is
|
||||
expected to have the type "double **"; otherwise, the expected
|
||||
|
@ -73,7 +69,7 @@ extern "C" {
|
|||
operation count for this SHT will be written here. */
|
||||
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
|
||||
void *alm, void *map, const sharp_geom_info *geom_info,
|
||||
const sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
const sharp_alm_info *alm_info, int flags, double *time,
|
||||
unsigned long long *opcnt);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -23,8 +23,8 @@
|
|||
*/
|
||||
|
||||
/* \file sharp_testsuite.c
|
||||
*
|
||||
* Copyright (C) 2012-2013 Max-Planck-Society
|
||||
*
|
||||
* Copyright (C) 2012-2019 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
|
@ -42,17 +42,68 @@
|
|||
#include "sharp_geomhelpers.h"
|
||||
#include "sharp_almhelpers.h"
|
||||
#include "c_utils.h"
|
||||
#include "sharp_announce.h"
|
||||
#include "memusage.h"
|
||||
#include "sharp_vecsupport.h"
|
||||
|
||||
static void OpenMP_status(void)
|
||||
{
|
||||
#ifndef _OPENMP
|
||||
printf("OpenMP: not supported by this binary\n");
|
||||
#else
|
||||
int threads = omp_get_max_threads();
|
||||
if (threads>1)
|
||||
printf("OpenMP active: max. %d threads.\n",threads);
|
||||
else
|
||||
printf("OpenMP active, but running with 1 thread only.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void MPI_status(void)
|
||||
{
|
||||
#ifndef USE_MPI
|
||||
printf("MPI: not supported by this binary\n");
|
||||
#else
|
||||
int tasks;
|
||||
MPI_Comm_size(MPI_COMM_WORLD,&tasks);
|
||||
if (tasks>1)
|
||||
printf("MPI active with %d tasks.\n",tasks);
|
||||
else
|
||||
printf("MPI active, but running with 1 task only.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void sharp_announce (const char *name)
|
||||
{
|
||||
size_t m, nlen=strlen(name);
|
||||
printf("\n+-");
|
||||
for (m=0; m<nlen; ++m) printf("-");
|
||||
printf("-+\n");
|
||||
printf("| %s |\n", name);
|
||||
printf("+-");
|
||||
for (m=0; m<nlen; ++m) printf("-");
|
||||
printf("-+\n\n");
|
||||
printf("Detected hardware architecture: %s\n", sharp_architecture());
|
||||
printf("Supported vector length: %d\n", sharp_veclen());
|
||||
OpenMP_status();
|
||||
MPI_status();
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void sharp_module_startup (const char *name, int argc, int argc_expected,
|
||||
const char *argv_expected, int verbose)
|
||||
{
|
||||
if (verbose) sharp_announce (name);
|
||||
if (argc==argc_expected) return;
|
||||
if (verbose) fprintf(stderr, "Usage: %s %s\n", name, argv_expected);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
typedef complex double dcmplx;
|
||||
|
||||
int ntasks, mytask;
|
||||
|
||||
static double drand (double min, double max, int *state)
|
||||
static double drand (double min, double max, unsigned *state)
|
||||
{
|
||||
*state = (((*state) * 1103515245) + 12345) & 0x7fffffff;
|
||||
*state = (((*state) * 1103515245u) + 12345u) & 0x7fffffffu;
|
||||
return min + (max-min)*(*state)/(0x7fffffff+1.0);
|
||||
}
|
||||
|
||||
|
@ -65,7 +116,7 @@ static void random_alm (dcmplx *alm, sharp_alm_info *helper, int spin, int cnt)
|
|||
for (mi=0;mi<helper->nm; ++mi)
|
||||
{
|
||||
int m=helper->mval[mi];
|
||||
int state=1234567*cnt+8912*m; // random seed
|
||||
unsigned state=1234567u*(unsigned)cnt+8912u*(unsigned)m; // random seed
|
||||
for (int l=m;l<=helper->lmax; ++l)
|
||||
{
|
||||
if ((l<spin)&&(m<spin))
|
||||
|
@ -242,13 +293,14 @@ static int good_fft_size(int n)
|
|||
}
|
||||
|
||||
static void get_infos (const char *gname, int lmax, int *mmax, int *gpar1,
|
||||
int *gpar2, sharp_geom_info **ginfo, sharp_alm_info **ainfo)
|
||||
int *gpar2, sharp_geom_info **ginfo, sharp_alm_info **ainfo, int verbose)
|
||||
{
|
||||
UTIL_ASSERT(lmax>=0,"lmax must not be negative");
|
||||
if (*mmax<0) *mmax=lmax;
|
||||
UTIL_ASSERT(*mmax<=lmax,"mmax larger than lmax");
|
||||
|
||||
if (mytask==0) printf ("lmax: %d, mmax: %d\n",lmax,*mmax);
|
||||
verbose &= (mytask==0);
|
||||
if (verbose) printf ("lmax: %d, mmax: %d\n",lmax,*mmax);
|
||||
|
||||
sharp_make_triangular_alm_info(lmax,*mmax,1,ainfo);
|
||||
#ifdef USE_MPI
|
||||
|
@ -260,14 +312,14 @@ static void get_infos (const char *gname, int lmax, int *mmax, int *gpar1,
|
|||
if (*gpar1<1) *gpar1=lmax/2;
|
||||
if (*gpar1==0) ++(*gpar1);
|
||||
sharp_make_healpix_geom_info (*gpar1, 1, ginfo);
|
||||
if (mytask==0) printf ("HEALPix grid, nside=%d\n",*gpar1);
|
||||
if (verbose) printf ("HEALPix grid, nside=%d\n",*gpar1);
|
||||
}
|
||||
else if (strcmp(gname,"gauss")==0)
|
||||
{
|
||||
if (*gpar1<1) *gpar1=lmax+1;
|
||||
if (*gpar2<1) *gpar2=2*(*mmax)+1;
|
||||
sharp_make_gauss_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
|
||||
if (mytask==0)
|
||||
if (verbose)
|
||||
printf ("Gauss-Legendre grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
|
||||
}
|
||||
else if (strcmp(gname,"fejer1")==0)
|
||||
|
@ -275,21 +327,21 @@ static void get_infos (const char *gname, int lmax, int *mmax, int *gpar1,
|
|||
if (*gpar1<1) *gpar1=2*lmax+1;
|
||||
if (*gpar2<1) *gpar2=2*(*mmax)+1;
|
||||
sharp_make_fejer1_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
|
||||
if (mytask==0) printf ("Fejer1 grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
|
||||
if (verbose) printf ("Fejer1 grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
|
||||
}
|
||||
else if (strcmp(gname,"fejer2")==0)
|
||||
{
|
||||
if (*gpar1<1) *gpar1=2*lmax+1;
|
||||
if (*gpar2<1) *gpar2=2*(*mmax)+1;
|
||||
sharp_make_fejer2_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
|
||||
if (mytask==0) printf ("Fejer2 grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
|
||||
if (verbose) printf ("Fejer2 grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
|
||||
}
|
||||
else if (strcmp(gname,"cc")==0)
|
||||
{
|
||||
if (*gpar1<1) *gpar1=2*lmax+1;
|
||||
if (*gpar2<1) *gpar2=2*(*mmax)+1;
|
||||
sharp_make_cc_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
|
||||
if (mytask==0)
|
||||
if (verbose)
|
||||
printf("Clenshaw-Curtis grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
|
||||
}
|
||||
else if (strcmp(gname,"smallgauss")==0)
|
||||
|
@ -319,7 +371,7 @@ static void get_infos (const char *gname, int lmax, int *mmax, int *gpar1,
|
|||
ofs+=pring;
|
||||
}
|
||||
}
|
||||
if (mytask==0)
|
||||
if (verbose)
|
||||
{
|
||||
ptrdiff_t npix=get_npix(*ginfo);
|
||||
printf("Small Gauss grid, nlat=%d, npix=%ld, savings=%.2f%%\n",
|
||||
|
@ -358,97 +410,83 @@ static void check_sign_scale(void)
|
|||
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
|
||||
ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
|
||||
|
||||
for (int ntrans=1; ntrans<10; ++ntrans)
|
||||
{
|
||||
double **map;
|
||||
ALLOC2D(map,double,2*ntrans,npix);
|
||||
double **map;
|
||||
ALLOC2D(map,double,2,npix);
|
||||
|
||||
dcmplx **alm;
|
||||
ALLOC2D(alm,dcmplx,2*ntrans,nalms);
|
||||
for (int i=0; i<2*ntrans; ++i)
|
||||
for (int j=0; j<nalms; ++j)
|
||||
alm[i][j]=1.+_Complex_I;
|
||||
dcmplx **alm;
|
||||
ALLOC2D(alm,dcmplx,2,nalms);
|
||||
for (int i=0; i<2; ++i)
|
||||
for (int j=0; j<nalms; ++j)
|
||||
alm[i][j]=1.+_Complex_I;
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP,0,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||
NULL,NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[it][0 ], 3.588246976618616912e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[it][npix/2], 4.042209792157496651e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[it][npix-1],-1.234675107554816442e+01,1e-12),
|
||||
"error");
|
||||
}
|
||||
sharp_execute(SHARP_ALM2MAP,1,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||
NULL,NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ], 2.750897760535633285e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix/2], 3.137704477368562905e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix-1],-8.405730859837063917e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][0 ],-2.398026536095463346e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-4.961140548331700728e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1],-1.412765834230440021e+01,1e-12),
|
||||
"error");
|
||||
}
|
||||
sharp_execute(SHARP_ALM2MAP,0,&alm[0],&map[0],tinfo,alms,SHARP_DP,
|
||||
NULL,NULL);
|
||||
UTIL_ASSERT(FAPPROX(map[0][0 ], 3.588246976618616912e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[0][npix/2], 4.042209792157496651e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[0][npix-1],-1.234675107554816442e+01,1e-12),
|
||||
"error");
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP,2,&alm[0],&map[0],tinfo,alms,ntrans,SHARP_DP,
|
||||
NULL,NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-1.398186224727334448e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix/2],-2.456676000884031197e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix-1],-1.516249174408820863e+02,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][0 ],-3.173406200299964119e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-5.831327404513146462e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1],-1.863257892248353897e+01,1e-12),
|
||||
"error");
|
||||
}
|
||||
sharp_execute(SHARP_ALM2MAP,1,&alm[0],&map[0],tinfo,alms,SHARP_DP,
|
||||
NULL,NULL);
|
||||
UTIL_ASSERT(FAPPROX(map[0][0 ], 2.750897760535633285e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[0][npix/2], 3.137704477368562905e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[0][npix-1],-8.405730859837063917e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[1][0 ],-2.398026536095463346e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[1][npix/2],-4.961140548331700728e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[1][npix-1],-1.412765834230440021e+01,1e-12),
|
||||
"error");
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP_DERIV1,1,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||
SHARP_DP,NULL,NULL);
|
||||
for (int it=0; it<ntrans; ++it)
|
||||
{
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][0 ],-6.859393905369091105e-01,1e-11),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix/2],-2.103947835973212364e+02,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it ][npix-1],-1.092463246472086439e+03,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][0 ],-1.411433220713928165e+02,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix/2],-1.146122859381925082e+03,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[2*it+1][npix-1], 7.821618677689795049e+02,1e-12),
|
||||
"error");
|
||||
}
|
||||
sharp_execute(SHARP_ALM2MAP,2,&alm[0],&map[0],tinfo,alms,SHARP_DP,
|
||||
NULL,NULL);
|
||||
UTIL_ASSERT(FAPPROX(map[0][0 ],-1.398186224727334448e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[0][npix/2],-2.456676000884031197e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[0][npix-1],-1.516249174408820863e+02,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[1][0 ],-3.173406200299964119e+00,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[1][npix/2],-5.831327404513146462e+01,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[1][npix-1],-1.863257892248353897e+01,1e-12),
|
||||
"error");
|
||||
|
||||
DEALLOC2D(map);
|
||||
DEALLOC2D(alm);
|
||||
}
|
||||
sharp_execute(SHARP_ALM2MAP_DERIV1,1,&alm[0],&map[0],tinfo,alms,
|
||||
SHARP_DP,NULL,NULL);
|
||||
UTIL_ASSERT(FAPPROX(map[0][0 ],-6.859393905369091105e-01,1e-11),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[0][npix/2],-2.103947835973212364e+02,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[0][npix-1],-1.092463246472086439e+03,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[1][0 ],-1.411433220713928165e+02,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[1][npix/2],-1.146122859381925082e+03,1e-12),
|
||||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[1][npix-1], 7.821618677689795049e+02,1e-12),
|
||||
"error");
|
||||
|
||||
DEALLOC2D(map);
|
||||
DEALLOC2D(alm);
|
||||
|
||||
sharp_destroy_alm_info(alms);
|
||||
sharp_destroy_geom_info(tinfo);
|
||||
}
|
||||
|
||||
static void do_sht (sharp_geom_info *ginfo, sharp_alm_info *ainfo,
|
||||
int spin, int ntrans, int nv, double **err_abs, double **err_rel,
|
||||
int spin, double **err_abs, double **err_rel,
|
||||
double *t_a2m, double *t_m2a, unsigned long long *op_a2m,
|
||||
unsigned long long *op_m2a)
|
||||
{
|
||||
ptrdiff_t nalms = get_nalms(ainfo);
|
||||
int ncomp = ntrans*((spin==0) ? 1 : 2);
|
||||
int ncomp = (spin==0) ? 1 : 2;
|
||||
|
||||
size_t npix = get_npix(ginfo);
|
||||
double **map;
|
||||
|
@ -463,20 +501,20 @@ static void do_sht (sharp_geom_info *ginfo, sharp_alm_info *ainfo,
|
|||
|
||||
#ifdef USE_MPI
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,&alm[0],&map[0],ginfo,
|
||||
ainfo,ntrans, SHARP_DP|SHARP_ADD|nv,t_a2m,op_a2m);
|
||||
ainfo, SHARP_DP|SHARP_ADD,t_a2m,op_a2m);
|
||||
#else
|
||||
sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map[0],ginfo,ainfo,ntrans,
|
||||
SHARP_DP|nv,t_a2m,op_a2m);
|
||||
sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map[0],ginfo,ainfo,
|
||||
SHARP_DP,t_a2m,op_a2m);
|
||||
#endif
|
||||
if (t_a2m!=NULL) *t_a2m=maxTime(*t_a2m);
|
||||
if (op_a2m!=NULL) *op_a2m=totalops(*op_a2m);
|
||||
double *sqsum=get_sqsum_and_invert(alm,nalms,ncomp);
|
||||
#ifdef USE_MPI
|
||||
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,&alm[0],&map[0],ginfo,
|
||||
ainfo,ntrans,SHARP_DP|SHARP_ADD|nv,t_m2a,op_m2a);
|
||||
ainfo,SHARP_DP|SHARP_ADD,t_m2a,op_m2a);
|
||||
#else
|
||||
sharp_execute(SHARP_MAP2ALM,spin,&alm[0],&map[0],ginfo,ainfo,ntrans,
|
||||
SHARP_DP|SHARP_ADD|nv,t_m2a,op_m2a);
|
||||
sharp_execute(SHARP_MAP2ALM,spin,&alm[0],&map[0],ginfo,ainfo,
|
||||
SHARP_DP|SHARP_ADD,t_m2a,op_m2a);
|
||||
#endif
|
||||
if (t_m2a!=NULL) *t_m2a=maxTime(*t_m2a);
|
||||
if (op_m2a!=NULL) *op_m2a=totalops(*op_m2a);
|
||||
|
@ -488,11 +526,11 @@ static void do_sht (sharp_geom_info *ginfo, sharp_alm_info *ainfo,
|
|||
}
|
||||
|
||||
static void check_accuracy (sharp_geom_info *ginfo, sharp_alm_info *ainfo,
|
||||
int spin, int ntrans, int nv)
|
||||
int spin)
|
||||
{
|
||||
int ncomp = ntrans*((spin==0) ? 1 : 2);
|
||||
int ncomp = (spin==0) ? 1 : 2;
|
||||
double *err_abs, *err_rel;
|
||||
do_sht (ginfo, ainfo, spin, ntrans, nv, &err_abs, &err_rel, NULL, NULL,
|
||||
do_sht (ginfo, ainfo, spin, &err_abs, &err_rel, NULL, NULL,
|
||||
NULL, NULL);
|
||||
for (int i=0; i<ncomp; ++i)
|
||||
UTIL_ASSERT((err_rel[i]<1e-10) && (err_abs[i]<1e-10),"error");
|
||||
|
@ -500,6 +538,16 @@ static void check_accuracy (sharp_geom_info *ginfo, sharp_alm_info *ainfo,
|
|||
DEALLOC(err_abs);
|
||||
}
|
||||
|
||||
static void run(int lmax, int mmax, int nlat, int nlon, int spin)
|
||||
{
|
||||
sharp_geom_info *ginfo;
|
||||
sharp_alm_info *ainfo;
|
||||
get_infos ("gauss", lmax, &mmax, &nlat, &nlon, &ginfo, &ainfo, 0);
|
||||
check_accuracy(ginfo,ainfo,spin);
|
||||
sharp_destroy_alm_info(ainfo);
|
||||
sharp_destroy_geom_info(ginfo);
|
||||
}
|
||||
|
||||
static void sharp_acctest(void)
|
||||
{
|
||||
if (mytask==0) sharp_module_startup("sharp_acctest",1,1,"",1);
|
||||
|
@ -510,43 +558,36 @@ static void sharp_acctest(void)
|
|||
|
||||
if (mytask==0) printf("Testing map analysis accuracy.\n");
|
||||
|
||||
sharp_geom_info *ginfo;
|
||||
sharp_alm_info *ainfo;
|
||||
int lmax=127, mmax=127, nlat=128, nlon=256;
|
||||
get_infos ("gauss", lmax, &mmax, &nlat, &nlon, &ginfo, &ainfo);
|
||||
for (int nv=1; nv<=6; ++nv)
|
||||
for (int ntrans=1; ntrans<=6; ++ntrans)
|
||||
{
|
||||
check_accuracy(ginfo,ainfo,0,ntrans,nv);
|
||||
check_accuracy(ginfo,ainfo,1,ntrans,nv);
|
||||
check_accuracy(ginfo,ainfo,2,ntrans,nv);
|
||||
check_accuracy(ginfo,ainfo,3,ntrans,nv);
|
||||
check_accuracy(ginfo,ainfo,30,ntrans,nv);
|
||||
}
|
||||
sharp_destroy_alm_info(ainfo);
|
||||
sharp_destroy_geom_info(ginfo);
|
||||
run(127, 127, 128, 256, 0);
|
||||
run(127, 127, 128, 256, 1);
|
||||
run(127, 127, 128, 256, 2);
|
||||
run(127, 127, 128, 256, 3);
|
||||
run(127, 127, 128, 256, 30);
|
||||
run(5, 0, 6, 1, 0);
|
||||
run(5, 0, 7, 2, 0);
|
||||
run(8, 8, 9, 17, 0);
|
||||
run(8, 8, 9, 17, 2);
|
||||
if (mytask==0) printf("Passed.\n\n");
|
||||
}
|
||||
|
||||
static void sharp_test (int argc, const char **argv)
|
||||
{
|
||||
if (mytask==0) sharp_announce("sharp_test");
|
||||
UTIL_ASSERT(argc>=9,"usage: grid lmax mmax geom1 geom2 spin ntrans");
|
||||
UTIL_ASSERT(argc>=8,"usage: grid lmax mmax geom1 geom2 spin");
|
||||
int lmax=atoi(argv[3]);
|
||||
int mmax=atoi(argv[4]);
|
||||
int gpar1=atoi(argv[5]);
|
||||
int gpar2=atoi(argv[6]);
|
||||
int spin=atoi(argv[7]);
|
||||
int ntrans=atoi(argv[8]);
|
||||
|
||||
if (mytask==0) printf("Testing map analysis accuracy.\n");
|
||||
if (mytask==0) printf("spin=%d, ntrans=%d\n", spin, ntrans);
|
||||
if (mytask==0) printf("spin=%d\n", spin);
|
||||
|
||||
sharp_geom_info *ginfo;
|
||||
sharp_alm_info *ainfo;
|
||||
get_infos (argv[2], lmax, &mmax, &gpar1, &gpar2, &ginfo, &ainfo);
|
||||
get_infos (argv[2], lmax, &mmax, &gpar1, &gpar2, &ginfo, &ainfo, 1);
|
||||
|
||||
int ncomp = ntrans*((spin==0) ? 1 : 2);
|
||||
int ncomp = (spin==0) ? 1 : 2;
|
||||
double t_a2m=1e30, t_m2a=1e30;
|
||||
unsigned long long op_a2m, op_m2a;
|
||||
double *err_abs,*err_rel;
|
||||
|
@ -557,7 +598,7 @@ static void sharp_test (int argc, const char **argv)
|
|||
{
|
||||
++nrpt;
|
||||
double ta2m2, tm2a2;
|
||||
do_sht (ginfo, ainfo, spin, ntrans, 0, &err_abs, &err_rel, &ta2m2, &tm2a2,
|
||||
do_sht (ginfo, ainfo, spin, &err_abs, &err_rel, &ta2m2, &tm2a2,
|
||||
&op_a2m, &op_m2a);
|
||||
if (ta2m2<t_a2m) t_a2m=ta2m2;
|
||||
if (tm2a2<t_m2a) t_m2a=tm2a2;
|
||||
|
@ -607,79 +648,16 @@ static void sharp_test (int argc, const char **argv)
|
|||
}
|
||||
|
||||
if (mytask==0)
|
||||
printf("%-12s %-10s %2d %d %2d %3d %6d %6d %6d %6d %2d %.2e %7.2f %.2e %7.2f"
|
||||
printf("%-12s %-10s %2d %d %2d %3d %6d %6d %6d %6d %.2e %7.2f %.2e %7.2f"
|
||||
" %9.2f %6.2f %.2e %.2e\n",
|
||||
getenv("HOST"),argv[2],spin,VLEN,nomp,ntasks,lmax,mmax,gpar1,gpar2,
|
||||
ntrans,t_a2m,1e-9*op_a2m/t_a2m,t_m2a,1e-9*op_m2a/t_m2a,tmem/(1<<20),
|
||||
getenv("HOST"),argv[2],spin,sharp_veclen(),nomp,ntasks,lmax,mmax,gpar1,gpar2,
|
||||
t_a2m,1e-9*op_a2m/t_a2m,t_m2a,1e-9*op_m2a/t_m2a,tmem/(1<<20),
|
||||
100.*(1.-iosize/tmem),maxerel,maxeabs);
|
||||
|
||||
DEALLOC(err_abs);
|
||||
DEALLOC(err_rel);
|
||||
}
|
||||
|
||||
static void sharp_bench (int argc, const char **argv)
|
||||
{
|
||||
if (mytask==0) sharp_announce("sharp_bench");
|
||||
UTIL_ASSERT(argc>=9,"usage: grid lmax mmax geom1 geom2 spin ntrans");
|
||||
int lmax=atoi(argv[3]);
|
||||
int mmax=atoi(argv[4]);
|
||||
int gpar1=atoi(argv[5]);
|
||||
int gpar2=atoi(argv[6]);
|
||||
int spin=atoi(argv[7]);
|
||||
int ntrans=atoi(argv[8]);
|
||||
|
||||
if (mytask==0) printf("Testing map analysis accuracy.\n");
|
||||
if (mytask==0) printf("spin=%d, ntrans=%d\n", spin, ntrans);
|
||||
|
||||
sharp_geom_info *ginfo;
|
||||
sharp_alm_info *ainfo;
|
||||
get_infos (argv[2], lmax, &mmax, &gpar1, &gpar2, &ginfo, &ainfo);
|
||||
|
||||
double ta2m_auto=1e30, tm2a_auto=1e30, ta2m_min=1e30, tm2a_min=1e30;
|
||||
unsigned long long opa2m_min=0, opm2a_min=0;
|
||||
int nvmin_a2m=-1, nvmin_m2a=-1;
|
||||
for (int nv=0; nv<=6; ++nv)
|
||||
{
|
||||
int ntries=0;
|
||||
double tacc=0;
|
||||
do
|
||||
{
|
||||
double t_a2m, t_m2a;
|
||||
unsigned long long op_a2m, op_m2a;
|
||||
double *err_abs,*err_rel;
|
||||
do_sht (ginfo, ainfo, spin, ntrans, nv, &err_abs, &err_rel,
|
||||
&t_a2m, &t_m2a, &op_a2m, &op_m2a);
|
||||
|
||||
DEALLOC(err_abs);
|
||||
DEALLOC(err_rel);
|
||||
tacc+=t_a2m+t_m2a;
|
||||
++ntries;
|
||||
if (nv==0)
|
||||
{
|
||||
if (t_a2m<ta2m_auto) ta2m_auto=t_a2m;
|
||||
if (t_m2a<tm2a_auto) tm2a_auto=t_m2a;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (t_a2m<ta2m_min) { nvmin_a2m=nv; ta2m_min=t_a2m; opa2m_min=op_a2m; }
|
||||
if (t_m2a<tm2a_min) { nvmin_m2a=nv; tm2a_min=t_m2a; opm2a_min=op_m2a; }
|
||||
}
|
||||
} while((ntries<2)||(tacc<3.));
|
||||
}
|
||||
if (mytask==0)
|
||||
{
|
||||
printf("a2m: nvmin=%d tmin=%fs speedup=%.2f%% perf=%.2fGFlops/s\n",
|
||||
nvmin_a2m,ta2m_min,100.*(ta2m_auto-ta2m_min)/ta2m_auto,
|
||||
1e-9*opa2m_min/ta2m_min);
|
||||
printf("m2a: nvmin=%d tmin=%fs speedup=%.2f%% perf=%.2fGFlops/s\n",
|
||||
nvmin_m2a,tm2a_min,100.*(tm2a_auto-tm2a_min)/tm2a_auto,
|
||||
1e-9*opm2a_min/tm2a_min);
|
||||
}
|
||||
|
||||
sharp_destroy_alm_info(ainfo);
|
||||
sharp_destroy_geom_info(ginfo);
|
||||
}
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
#ifdef USE_MPI
|
||||
|
@ -696,8 +674,6 @@ int main(int argc, const char **argv)
|
|||
sharp_acctest();
|
||||
else if (strcmp(argv[1],"test")==0)
|
||||
sharp_test(argc,argv);
|
||||
else if (strcmp(argv[1],"bench")==0)
|
||||
sharp_bench(argc,argv);
|
||||
else
|
||||
UTIL_FAIL("unknown command");
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
/* \file sharp_vecsupport.h
|
||||
* Convenience functions for vector arithmetics
|
||||
*
|
||||
* Copyright (C) 2012,2013 Max-Planck-Society
|
||||
* Copyright (C) 2012-2019 Max-Planck-Society
|
||||
* Author: Martin Reinecke
|
||||
*/
|
||||
|
||||
|
@ -33,38 +33,36 @@
|
|||
#define SHARP_VECSUPPORT_H
|
||||
|
||||
#include <math.h>
|
||||
#include "sharp_vecutil.h"
|
||||
|
||||
#ifndef VLEN
|
||||
|
||||
#if (defined(__AVX512F__))
|
||||
#define VLEN 8
|
||||
#elif (defined (__AVX__))
|
||||
#define VLEN 4
|
||||
#elif (defined (__SSE2__))
|
||||
#define VLEN 2
|
||||
#else
|
||||
#define VLEN 1
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
typedef double Ts;
|
||||
|
||||
#if (VLEN==1)
|
||||
|
||||
typedef double Tv;
|
||||
typedef float Tv_s;
|
||||
typedef int Tm;
|
||||
|
||||
#define vadd(a,b) ((a)+(b))
|
||||
#define vadd_s(a,b) ((a)+(b))
|
||||
#define vaddeq(a,b) ((a)+=(b))
|
||||
#define vaddeq_mask(mask,a,b) if (mask) (a)+=(b);
|
||||
#define vsub(a,b) ((a)-(b))
|
||||
#define vsub_s(a,b) ((a)-(b))
|
||||
#define vsubeq(a,b) ((a)-=(b))
|
||||
#define vsubeq_mask(mask,a,b) if (mask) (a)-=(b);
|
||||
#define vmul(a,b) ((a)*(b))
|
||||
#define vmul_s(a,b) ((a)*(b))
|
||||
#define vmuleq(a,b) ((a)*=(b))
|
||||
#define vmuleq_mask(mask,a,b) if (mask) (a)*=(b);
|
||||
#define vfmaeq(a,b,c) ((a)+=(b)*(c))
|
||||
#define vfmaeq_s(a,b,c) ((a)+=(b)*(c))
|
||||
#define vfmseq(a,b,c) ((a)-=(b)*(c))
|
||||
#define vfmaaeq(a,b,c,d,e) ((a)+=(b)*(c)+(d)*(e))
|
||||
#define vfmaseq(a,b,c,d,e) ((a)+=(b)*(c)-(d)*(e))
|
||||
#define vneg(a) (-(a))
|
||||
#define vload(a) (a)
|
||||
#define vload_s(a) (a)
|
||||
#define vloadu(p) (*(p))
|
||||
#define vloadu_s(p) (*(p))
|
||||
#define vzero 0.
|
||||
#define vone 1.
|
||||
|
||||
#define vaddeq_mask(mask,a,b) if (mask) (a)+=(b);
|
||||
#define vsubeq_mask(mask,a,b) if (mask) (a)-=(b);
|
||||
#define vmuleq_mask(mask,a,b) if (mask) (a)*=(b);
|
||||
#define vneg(a) (-(a))
|
||||
#define vabs(a) fabs(a)
|
||||
#define vsqrt(a) sqrt(a)
|
||||
#define vlt(a,b) ((a)<(b))
|
||||
|
@ -72,16 +70,16 @@ typedef int Tm;
|
|||
#define vge(a,b) ((a)>=(b))
|
||||
#define vne(a,b) ((a)!=(b))
|
||||
#define vand_mask(a,b) ((a)&&(b))
|
||||
#define vstoreu(p, a) (*(p)=a)
|
||||
#define vstoreu_s(p, a) (*(p)=a)
|
||||
|
||||
#define vor_mask(a,b) ((a)||(b))
|
||||
static inline Tv vmin (Tv a, Tv b) { return (a<b) ? a : b; }
|
||||
static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; }
|
||||
|
||||
#define vanyTrue(a) (a)
|
||||
#define vallTrue(a) (a)
|
||||
#define vzero 0.
|
||||
#define vone 1.
|
||||
|
||||
static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d,
|
||||
_Complex double * restrict cc)
|
||||
{ cc[0] += a+_Complex_I*b; cc[1] += c+_Complex_I*d; }
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -97,7 +95,6 @@ static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; }
|
|||
#endif
|
||||
|
||||
typedef __m128d Tv;
|
||||
typedef __m128 Tv_s;
|
||||
typedef __m128d Tm;
|
||||
|
||||
#if defined(__SSE4_1__)
|
||||
|
@ -106,110 +103,84 @@ typedef __m128d Tm;
|
|||
static inline Tv vblend__(Tv m, Tv a, Tv b)
|
||||
{ return _mm_or_pd(_mm_and_pd(a,m),_mm_andnot_pd(m,b)); }
|
||||
#endif
|
||||
#define vzero _mm_setzero_pd()
|
||||
#define vone _mm_set1_pd(1.)
|
||||
|
||||
#define vadd(a,b) _mm_add_pd(a,b)
|
||||
#define vadd_s(a,b) _mm_add_ps(a,b)
|
||||
#define vaddeq(a,b) a=_mm_add_pd(a,b)
|
||||
#define vaddeq_mask(mask,a,b) a=_mm_add_pd(a,vblend__(mask,b,vzero))
|
||||
#define vsub(a,b) _mm_sub_pd(a,b)
|
||||
#define vsub_s(a,b) _mm_sub_ps(a,b)
|
||||
#define vsubeq(a,b) a=_mm_sub_pd(a,b)
|
||||
#define vsubeq_mask(mask,a,b) a=_mm_sub_pd(a,vblend__(mask,b,vzero))
|
||||
#define vmul(a,b) _mm_mul_pd(a,b)
|
||||
#define vmul_s(a,b) _mm_mul_ps(a,b)
|
||||
#define vmuleq(a,b) a=_mm_mul_pd(a,b)
|
||||
#define vmuleq_mask(mask,a,b) a=_mm_mul_pd(a,vblend__(mask,b,vone))
|
||||
#define vfmaeq(a,b,c) a=_mm_add_pd(a,_mm_mul_pd(b,c))
|
||||
#define vfmaeq_s(a,b,c) a=_mm_add_ps(a,_mm_mul_ps(b,c))
|
||||
#define vfmseq(a,b,c) a=_mm_sub_pd(a,_mm_mul_pd(b,c))
|
||||
#define vfmaaeq(a,b,c,d,e) \
|
||||
a=_mm_add_pd(a,_mm_add_pd(_mm_mul_pd(b,c),_mm_mul_pd(d,e)))
|
||||
#define vfmaseq(a,b,c,d,e) \
|
||||
a=_mm_add_pd(a,_mm_sub_pd(_mm_mul_pd(b,c),_mm_mul_pd(d,e)))
|
||||
#define vneg(a) _mm_xor_pd(_mm_set1_pd(-0.),a)
|
||||
#define vload(a) _mm_set1_pd(a)
|
||||
#define vload_s(a) _mm_set1_ps(a)
|
||||
#define vabs(a) _mm_andnot_pd(_mm_set1_pd(-0.),a)
|
||||
#define vzero _mm_setzero_pd()
|
||||
#define vone vload(1.)
|
||||
|
||||
#define vaddeq_mask(mask,a,b) a+=vblend__(mask,b,vzero)
|
||||
#define vsubeq_mask(mask,a,b) a-=vblend__(mask,b,vzero)
|
||||
#define vmuleq_mask(mask,a,b) a*=vblend__(mask,b,vone)
|
||||
#define vneg(a) _mm_xor_pd(vload(-0.),a)
|
||||
#define vabs(a) _mm_andnot_pd(vload(-0.),a)
|
||||
#define vsqrt(a) _mm_sqrt_pd(a)
|
||||
#define vlt(a,b) _mm_cmplt_pd(a,b)
|
||||
#define vgt(a,b) _mm_cmpgt_pd(a,b)
|
||||
#define vge(a,b) _mm_cmpge_pd(a,b)
|
||||
#define vne(a,b) _mm_cmpneq_pd(a,b)
|
||||
#define vand_mask(a,b) _mm_and_pd(a,b)
|
||||
#define vor_mask(a,b) _mm_or_pd(a,b)
|
||||
#define vmin(a,b) _mm_min_pd(a,b)
|
||||
#define vmax(a,b) _mm_max_pd(a,b);
|
||||
#define vanyTrue(a) (_mm_movemask_pd(a)!=0)
|
||||
#define vallTrue(a) (_mm_movemask_pd(a)==3)
|
||||
#define vloadu(p) _mm_loadu_pd(p)
|
||||
#define vloadu_s(p) _mm_loadu_ps(p)
|
||||
#define vstoreu(p, v) _mm_storeu_pd(p, v)
|
||||
#define vstoreu_s(p, v) _mm_storeu_ps(p, v)
|
||||
|
||||
static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c,
|
||||
Tv d, _Complex double * restrict cc)
|
||||
{
|
||||
union {Tv v; _Complex double c; } u1, u2;
|
||||
#if defined(__SSE3__)
|
||||
u1.v = _mm_hadd_pd(a,b); u2.v=_mm_hadd_pd(c,d);
|
||||
#else
|
||||
u1.v = _mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)) +
|
||||
_mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0));
|
||||
u2.v = _mm_shuffle_pd(c,d,_MM_SHUFFLE2(0,1)) +
|
||||
_mm_shuffle_pd(c,d,_MM_SHUFFLE2(1,0));
|
||||
#endif
|
||||
cc[0]+=u1.c; cc[1]+=u2.c;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if (VLEN==4)
|
||||
|
||||
#include <immintrin.h>
|
||||
#if (USE_FMA4)
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
typedef __m256d Tv;
|
||||
typedef __m256 Tv_s;
|
||||
typedef __m256d Tm;
|
||||
|
||||
#define vblend__(m,a,b) _mm256_blendv_pd(b,a,m)
|
||||
#define vzero _mm256_setzero_pd()
|
||||
#define vone _mm256_set1_pd(1.)
|
||||
|
||||
#define vadd(a,b) _mm256_add_pd(a,b)
|
||||
#define vadd_s(a,b) _mm256_add_ps(a,b)
|
||||
#define vaddeq(a,b) a=_mm256_add_pd(a,b)
|
||||
#define vaddeq_mask(mask,a,b) a=_mm256_add_pd(a,vblend__(mask,b,vzero))
|
||||
#define vsub(a,b) _mm256_sub_pd(a,b)
|
||||
#define vsub_s(a,b) _mm256_sub_ps(a,b)
|
||||
#define vsubeq(a,b) a=_mm256_sub_pd(a,b)
|
||||
#define vsubeq_mask(mask,a,b) a=_mm256_sub_pd(a,vblend__(mask,b,vzero))
|
||||
#define vmul(a,b) _mm256_mul_pd(a,b)
|
||||
#define vmul_s(a,b) _mm256_mul_ps(a,b)
|
||||
#define vmuleq(a,b) a=_mm256_mul_pd(a,b)
|
||||
#define vmuleq_mask(mask,a,b) a=_mm256_mul_pd(a,vblend__(mask,b,vone))
|
||||
#if (USE_FMA4)
|
||||
#define vfmaeq(a,b,c) a=_mm256_macc_pd(b,c,a)
|
||||
#define vfmaeq_s(a,b,c) a=_mm256_macc_ps(b,c,a)
|
||||
#define vfmseq(a,b,c) a=_mm256_nmacc_pd(b,c,a)
|
||||
#define vfmaaeq(a,b,c,d,e) a=_mm256_macc_pd(d,e,_mm256_macc_pd(b,c,a))
|
||||
#define vfmaseq(a,b,c,d,e) a=_mm256_nmacc_pd(d,e,_mm256_macc_pd(b,c,a))
|
||||
#else
|
||||
#define vfmaeq(a,b,c) a=_mm256_add_pd(a,_mm256_mul_pd(b,c))
|
||||
#define vfmaeq_s(a,b,c) a=_mm256_add_ps(a,_mm256_mul_ps(b,c))
|
||||
#define vfmseq(a,b,c) a=_mm256_sub_pd(a,_mm256_mul_pd(b,c))
|
||||
#define vfmaaeq(a,b,c,d,e) \
|
||||
a=_mm256_add_pd(a,_mm256_add_pd(_mm256_mul_pd(b,c),_mm256_mul_pd(d,e)))
|
||||
#define vfmaseq(a,b,c,d,e) \
|
||||
a=_mm256_add_pd(a,_mm256_sub_pd(_mm256_mul_pd(b,c),_mm256_mul_pd(d,e)))
|
||||
#endif
|
||||
#define vneg(a) _mm256_xor_pd(_mm256_set1_pd(-0.),a)
|
||||
#define vload(a) _mm256_set1_pd(a)
|
||||
#define vload_s(a) _mm256_set1_ps(a)
|
||||
#define vabs(a) _mm256_andnot_pd(_mm256_set1_pd(-0.),a)
|
||||
#define vzero _mm256_setzero_pd()
|
||||
#define vone vload(1.)
|
||||
|
||||
#define vaddeq_mask(mask,a,b) a+=vblend__(mask,b,vzero)
|
||||
#define vsubeq_mask(mask,a,b) a-=vblend__(mask,b,vzero)
|
||||
#define vmuleq_mask(mask,a,b) a*=vblend__(mask,b,vone)
|
||||
#define vneg(a) _mm256_xor_pd(vload(-0.),a)
|
||||
#define vabs(a) _mm256_andnot_pd(vload(-0.),a)
|
||||
#define vsqrt(a) _mm256_sqrt_pd(a)
|
||||
#define vlt(a,b) _mm256_cmp_pd(a,b,_CMP_LT_OQ)
|
||||
#define vgt(a,b) _mm256_cmp_pd(a,b,_CMP_GT_OQ)
|
||||
#define vge(a,b) _mm256_cmp_pd(a,b,_CMP_GE_OQ)
|
||||
#define vne(a,b) _mm256_cmp_pd(a,b,_CMP_NEQ_OQ)
|
||||
#define vand_mask(a,b) _mm256_and_pd(a,b)
|
||||
#define vor_mask(a,b) _mm256_or_pd(a,b)
|
||||
#define vmin(a,b) _mm256_min_pd(a,b)
|
||||
#define vmax(a,b) _mm256_max_pd(a,b)
|
||||
#define vanyTrue(a) (_mm256_movemask_pd(a)!=0)
|
||||
#define vallTrue(a) (_mm256_movemask_pd(a)==15)
|
||||
|
||||
#define vloadu(p) _mm256_loadu_pd(p)
|
||||
#define vloadu_s(p) _mm256_loadu_ps(p)
|
||||
#define vstoreu(p, v) _mm256_storeu_pd(p, v)
|
||||
#define vstoreu_s(p, v) _mm256_storeu_ps(p, v)
|
||||
static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d,
|
||||
_Complex double * restrict cc)
|
||||
{
|
||||
Tv tmp1=_mm256_hadd_pd(a,b), tmp2=_mm256_hadd_pd(c,d);
|
||||
Tv tmp3=_mm256_permute2f128_pd(tmp1,tmp2,49),
|
||||
tmp4=_mm256_permute2f128_pd(tmp1,tmp2,32);
|
||||
tmp1=tmp3+tmp4;
|
||||
union {Tv v; _Complex double c[2]; } u;
|
||||
u.v=tmp1;
|
||||
cc[0]+=u.c[0]; cc[1]+=u.c[1];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -220,35 +191,33 @@ typedef __m256d Tm;
|
|||
typedef __m512d Tv;
|
||||
typedef __mmask8 Tm;
|
||||
|
||||
#define vadd(a,b) _mm512_add_pd(a,b)
|
||||
#define vaddeq(a,b) a=_mm512_add_pd(a,b)
|
||||
#define vaddeq_mask(mask,a,b) a=_mm512_mask_add_pd(a,mask,a,b);
|
||||
#define vsub(a,b) _mm512_sub_pd(a,b)
|
||||
#define vsubeq(a,b) a=_mm512_sub_pd(a,b)
|
||||
#define vsubeq_mask(mask,a,b) a=_mm512_mask_sub_pd(a,mask,a,b);
|
||||
#define vmul(a,b) _mm512_mul_pd(a,b)
|
||||
#define vmuleq(a,b) a=_mm512_mul_pd(a,b)
|
||||
#define vmuleq_mask(mask,a,b) a=_mm512_mask_mul_pd(a,mask,a,b);
|
||||
#define vfmaeq(a,b,c) a=_mm512_fmadd_pd(b,c,a)
|
||||
#define vfmseq(a,b,c) a=_mm512_fnmadd_pd(b,c,a)
|
||||
#define vfmaaeq(a,b,c,d,e) a=_mm512_fmadd_pd(d,e,_mm512_fmadd_pd(b,c,a))
|
||||
#define vfmaseq(a,b,c,d,e) a=_mm512_fnmadd_pd(d,e,_mm512_fmadd_pd(b,c,a))
|
||||
#define vneg(a) _mm512_mul_pd(a,_mm512_set1_pd(-1.))
|
||||
#define vload(a) _mm512_set1_pd(a)
|
||||
#define vabs(a) (__m512d)_mm512_andnot_epi64((__m512i)_mm512_set1_pd(-0.),(__m512i)a)
|
||||
#define vzero _mm512_setzero_pd()
|
||||
#define vone vload(1.)
|
||||
|
||||
#define vaddeq_mask(mask,a,b) a=_mm512_mask_add_pd(a,mask,a,b);
|
||||
#define vsubeq_mask(mask,a,b) a=_mm512_mask_sub_pd(a,mask,a,b);
|
||||
#define vmuleq_mask(mask,a,b) a=_mm512_mask_mul_pd(a,mask,a,b);
|
||||
#define vneg(a) _mm512_mul_pd(a,vload(-1.))
|
||||
#define vabs(a) (__m512d)_mm512_andnot_epi64((__m512i)vload(-0.),(__m512i)a)
|
||||
#define vsqrt(a) _mm512_sqrt_pd(a)
|
||||
#define vlt(a,b) _mm512_cmplt_pd_mask(a,b)
|
||||
#define vgt(a,b) _mm512_cmpnle_pd_mask(a,b)
|
||||
#define vge(a,b) _mm512_cmpnlt_pd_mask(a,b)
|
||||
#define vne(a,b) _mm512_cmpneq_pd_mask(a,b)
|
||||
#define vlt(a,b) _mm512_cmp_pd_mask(a,b,_CMP_LT_OQ)
|
||||
#define vgt(a,b) _mm512_cmp_pd_mask(a,b,_CMP_GT_OQ)
|
||||
#define vge(a,b) _mm512_cmp_pd_mask(a,b,_CMP_GE_OQ)
|
||||
#define vne(a,b) _mm512_cmp_pd_mask(a,b,_CMP_NEQ_OQ)
|
||||
#define vand_mask(a,b) ((a)&(b))
|
||||
#define vor_mask(a,b) ((a)|(b))
|
||||
#define vmin(a,b) _mm512_min_pd(a,b)
|
||||
#define vmax(a,b) _mm512_max_pd(a,b)
|
||||
#define vanyTrue(a) (a!=0)
|
||||
#define vallTrue(a) (a==255)
|
||||
|
||||
#define vzero _mm512_setzero_pd()
|
||||
#define vone _mm512_set1_pd(1.)
|
||||
static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d,
|
||||
_Complex double * restrict cc)
|
||||
{
|
||||
cc[0] += _mm512_reduce_add_pd(a)+_Complex_I*_mm512_reduce_add_pd(b);
|
||||
cc[1] += _mm512_reduce_add_pd(c)+_Complex_I*_mm512_reduce_add_pd(d);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1,63 +0,0 @@
|
|||
/*
|
||||
* This file is part of libc_utils.
|
||||
*
|
||||
* libc_utils is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libc_utils is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libc_utils; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file sharp_vecutil.h
|
||||
* Functionality related to vector instruction support
|
||||
*
|
||||
* Copyright (C) 2012,2013 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#ifndef SHARP_VECUTIL_H
|
||||
#define SHARP_VECUTIL_H
|
||||
|
||||
#ifndef VLEN
|
||||
|
||||
#if (defined (__MIC__))
|
||||
#define VLEN 8
|
||||
#elif (defined (__AVX__))
|
||||
#define VLEN 4
|
||||
#elif (defined (__SSE2__))
|
||||
#define VLEN 2
|
||||
#else
|
||||
#define VLEN 1
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if (VLEN==1)
|
||||
#define VLEN_s 1
|
||||
#else
|
||||
#define VLEN_s (2*VLEN)
|
||||
#endif
|
||||
|
||||
#ifndef USE_FMA4
|
||||
#ifdef __FMA4__
|
||||
#define USE_FMA4 1
|
||||
#else
|
||||
#define USE_FMA4 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -25,7 +25,7 @@
|
|||
/*
|
||||
* Helper code for efficient calculation of Y_lm(theta,phi=0)
|
||||
*
|
||||
* Copyright (C) 2005-2014 Max-Planck-Society
|
||||
* Copyright (C) 2005-2019 Max-Planck-Society
|
||||
* Author: Martin Reinecke
|
||||
*/
|
||||
|
||||
|
@ -59,35 +59,44 @@ void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin)
|
|||
gen->cf[m]=gen->cf[m+1]*sharp_fsmall;
|
||||
for (int m=-sharp_minscale+1; m<(sharp_maxscale-sharp_minscale+1); ++m)
|
||||
gen->cf[m]=gen->cf[m-1]*sharp_fbig;
|
||||
gen->powlimit=RALLOC(double,m_max+spin+1);
|
||||
gen->powlimit[0]=0.;
|
||||
const double ln2 = 0.6931471805599453094172321214581766;
|
||||
const double expo=-400*ln2;
|
||||
for (int m=1; m<=m_max+spin; ++m)
|
||||
gen->powlimit[m]=exp(expo/m);
|
||||
|
||||
gen->m = -1;
|
||||
if (spin==0)
|
||||
{
|
||||
gen->rf = RALLOC(sharp_ylmgen_dbl2,gen->lmax+1);
|
||||
gen->mfac = RALLOC(double,gen->mmax+1);
|
||||
gen->mfac[0] = inv_sqrt4pi;
|
||||
for (int m=1; m<=gen->mmax; ++m)
|
||||
gen->mfac[m] = gen->mfac[m-1]*sqrt((2*m+1.)/(2*m));
|
||||
gen->root = RALLOC(double,2*gen->lmax+5);
|
||||
gen->iroot = RALLOC(double,2*gen->lmax+5);
|
||||
for (int m=0; m<2*gen->lmax+5; ++m)
|
||||
gen->root = RALLOC(double,2*gen->lmax+8);
|
||||
gen->iroot = RALLOC(double,2*gen->lmax+8);
|
||||
for (int m=0; m<2*gen->lmax+8; ++m)
|
||||
{
|
||||
gen->root[m] = sqrt(m);
|
||||
gen->iroot[m] = (m==0) ? 0. : 1./gen->root[m];
|
||||
}
|
||||
gen->eps=RALLOC(double, gen->lmax+4);
|
||||
gen->alpha=RALLOC(double, gen->lmax/2+2);
|
||||
gen->coef=RALLOC(sharp_ylmgen_dbl2, gen->lmax/2+2);
|
||||
}
|
||||
else
|
||||
{
|
||||
gen->m=gen->mlo=gen->mhi=-1234567890;
|
||||
ALLOC(gen->fx,sharp_ylmgen_dbl3,gen->lmax+2);
|
||||
for (int m=0; m<gen->lmax+2; ++m)
|
||||
gen->fx[m].f[0]=gen->fx[m].f[1]=gen->fx[m].f[2]=0.;
|
||||
ALLOC(gen->inv,double,gen->lmax+1);
|
||||
ALLOC(gen->coef,sharp_ylmgen_dbl2,gen->lmax+3);
|
||||
for (int m=0; m<gen->lmax+3; ++m)
|
||||
gen->coef[m].a=gen->coef[m].b=0.;
|
||||
ALLOC(gen->alpha,double,gen->lmax+3);
|
||||
ALLOC(gen->inv,double,gen->lmax+2);
|
||||
gen->inv[0]=0;
|
||||
for (int m=1; m<gen->lmax+1; ++m) gen->inv[m]=1./m;
|
||||
ALLOC(gen->flm1,double,2*gen->lmax+1);
|
||||
ALLOC(gen->flm2,double,2*gen->lmax+1);
|
||||
for (int m=0; m<2*gen->lmax+1; ++m)
|
||||
for (int m=1; m<gen->lmax+2; ++m) gen->inv[m]=1./m;
|
||||
ALLOC(gen->flm1,double,2*gen->lmax+3);
|
||||
ALLOC(gen->flm2,double,2*gen->lmax+3);
|
||||
for (int m=0; m<2*gen->lmax+3; ++m)
|
||||
{
|
||||
gen->flm1[m] = sqrt(1./(m+1.));
|
||||
gen->flm2[m] = sqrt(m/(m+1.));
|
||||
|
@ -124,16 +133,18 @@ void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin)
|
|||
void sharp_Ylmgen_destroy (sharp_Ylmgen_C *gen)
|
||||
{
|
||||
DEALLOC(gen->cf);
|
||||
DEALLOC(gen->powlimit);
|
||||
DEALLOC(gen->alpha);
|
||||
DEALLOC(gen->coef);
|
||||
if (gen->s==0)
|
||||
{
|
||||
DEALLOC(gen->rf);
|
||||
DEALLOC(gen->mfac);
|
||||
DEALLOC(gen->root);
|
||||
DEALLOC(gen->iroot);
|
||||
DEALLOC(gen->eps);
|
||||
}
|
||||
else
|
||||
{
|
||||
DEALLOC(gen->fx);
|
||||
DEALLOC(gen->prefac);
|
||||
DEALLOC(gen->fscale);
|
||||
DEALLOC(gen->flm1);
|
||||
|
@ -150,13 +161,20 @@ void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m)
|
|||
|
||||
if (gen->s==0)
|
||||
{
|
||||
gen->rf[m].f[0] = gen->root[2*m+3];
|
||||
gen->rf[m].f[1] = 0.;
|
||||
for (int l=m+1; l<=gen->lmax; ++l)
|
||||
gen->eps[m] = 0.;
|
||||
for (int l=m+1; l<gen->lmax+4; ++l)
|
||||
gen->eps[l] = gen->root[l+m]*gen->root[l-m]
|
||||
*gen->iroot[2*l+1]*gen->iroot[2*l-1];
|
||||
gen->alpha[0] = 1./gen->eps[m+1];
|
||||
gen->alpha[1] = gen->eps[m+1]/(gen->eps[m+2]*gen->eps[m+3]);
|
||||
for (int il=1, l=m+2; l<gen->lmax+1; ++il, l+=2)
|
||||
gen->alpha[il+1]= ((il&1) ? -1 : 1)
|
||||
/(gen->eps[l+2]*gen->eps[l+3]*gen->alpha[il]);
|
||||
for (int il=0, l=m; l<gen->lmax+2; ++il, l+=2)
|
||||
{
|
||||
double tmp=gen->root[2*l+3]*gen->iroot[l+1+m]*gen->iroot[l+1-m];
|
||||
gen->rf[l].f[0] = tmp*gen->root[2*l+1];
|
||||
gen->rf[l].f[1] = tmp*gen->root[l+m]*gen->root[l-m]*gen->iroot[2*l-1];
|
||||
gen->coef[il].a = ((il&1) ? -1 : 1)*gen->alpha[il]*gen->alpha[il];
|
||||
double t1 = gen->eps[l+2], t2 = gen->eps[l+1];
|
||||
gen->coef[il].b = -gen->coef[il].a*(t1*t1+t2*t2);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -169,17 +187,25 @@ void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m)
|
|||
|
||||
if (!ms_similar)
|
||||
{
|
||||
for (int l=gen->mhi; l<gen->lmax; ++l)
|
||||
gen->alpha[gen->mhi] = 1.;
|
||||
gen->coef[gen->mhi].a = gen->coef[gen->mhi].b = 0.;
|
||||
for (int l=gen->mhi; l<gen->lmax+1; ++l)
|
||||
{
|
||||
double t = gen->flm1[l+gen->m]*gen->flm1[l-gen->m]
|
||||
*gen->flm1[l+gen->s]*gen->flm1[l-gen->s];
|
||||
double lt = 2*l+1;
|
||||
double l1 = l+1;
|
||||
gen->fx[l+1].f[0]=l1*lt*t;
|
||||
gen->fx[l+1].f[1]=gen->m*gen->s*gen->inv[l]*gen->inv[l+1];
|
||||
double flp10=l1*lt*t;
|
||||
double flp11=gen->m*gen->s*gen->inv[l]*gen->inv[l+1];
|
||||
t = gen->flm2[l+gen->m]*gen->flm2[l-gen->m]
|
||||
*gen->flm2[l+gen->s]*gen->flm2[l-gen->s];
|
||||
gen->fx[l+1].f[2]=t*l1*gen->inv[l];
|
||||
double flp12=t*l1*gen->inv[l];
|
||||
if (l>gen->mhi)
|
||||
gen->alpha[l+1] = gen->alpha[l-1]*flp12;
|
||||
else
|
||||
gen->alpha[l+1] = 1.;
|
||||
gen->coef[l+1].a = flp10*gen->alpha[l]/gen->alpha[l+1];
|
||||
gen->coef[l+1].b = flp11*gen->coef[l+1].a;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
/*! \file sharp_ylmgen_c.h
|
||||
* Code for efficient calculation of Y_lm(phi=0,theta)
|
||||
*
|
||||
* Copyright (C) 2005-2012 Max-Planck-Society
|
||||
* Copyright (C) 2005-2019 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
|
@ -41,27 +41,28 @@ static const double sharp_fbig=0x1p+800,sharp_fsmall=0x1p-800;
|
|||
static const double sharp_ftol=0x1p-60;
|
||||
static const double sharp_fbighalf=0x1p+400;
|
||||
|
||||
typedef struct { double f[2]; } sharp_ylmgen_dbl2;
|
||||
typedef struct { double f[3]; } sharp_ylmgen_dbl3;
|
||||
typedef struct { double a, b; } sharp_ylmgen_dbl2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* for public use; immutable during lifetime */
|
||||
int lmax, mmax, s;
|
||||
double *cf;
|
||||
double *powlimit;
|
||||
|
||||
/* for public use; will typically change after call to Ylmgen_prepare() */
|
||||
int m;
|
||||
|
||||
double *alpha;
|
||||
sharp_ylmgen_dbl2 *coef;
|
||||
|
||||
/* used if s==0 */
|
||||
double *mfac;
|
||||
sharp_ylmgen_dbl2 *rf;
|
||||
double *mfac, *eps;
|
||||
|
||||
/* used if s!=0 */
|
||||
int sinPow, cosPow, preMinus_p, preMinus_m;
|
||||
double *prefac;
|
||||
int *fscale;
|
||||
sharp_ylmgen_dbl3 *fx;
|
||||
|
||||
/* internal usage only */
|
||||
/* used if s==0 */
|
||||
|
|
2190
pocketfft/pocketfft.c
Normal file
2190
pocketfft/pocketfft.c
Normal file
File diff suppressed because it is too large
Load diff
34
pocketfft/pocketfft.h
Normal file
34
pocketfft/pocketfft.h
Normal file
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* This file is part of pocketfft.
|
||||
* Licensed under a 3-clause BSD style license - see LICENSE.md
|
||||
*/
|
||||
|
||||
/*! \file pocketfft.h
|
||||
* Public interface of the pocketfft library
|
||||
*
|
||||
* Copyright (C) 2008-2018 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#ifndef POCKETFFT_H
|
||||
#define POCKETFFT_H
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
struct cfft_plan_i;
|
||||
typedef struct cfft_plan_i * cfft_plan;
|
||||
cfft_plan make_cfft_plan (size_t length);
|
||||
void destroy_cfft_plan (cfft_plan plan);
|
||||
int cfft_backward(cfft_plan plan, double c[], double fct);
|
||||
int cfft_forward(cfft_plan plan, double c[], double fct);
|
||||
size_t cfft_length(cfft_plan plan);
|
||||
|
||||
struct rfft_plan_i;
|
||||
typedef struct rfft_plan_i * rfft_plan;
|
||||
rfft_plan make_rfft_plan (size_t length);
|
||||
void destroy_rfft_plan (rfft_plan plan);
|
||||
int rfft_backward(rfft_plan plan, double c[], double fct);
|
||||
int rfft_forward(rfft_plan plan, double c[], double fct);
|
||||
size_t rfft_length(rfft_plan plan);
|
||||
|
||||
#endif
|
|
@ -1 +0,0 @@
|
|||
# work around broken setuptools monkey patching
|
|
@ -1 +0,0 @@
|
|||
build_ext = "yes, it's there!"
|
|
@ -1 +0,0 @@
|
|||
# work around broken setuptools monkey patching
|
|
@ -1,2 +0,0 @@
|
|||
This directory is here to fool setuptools into building .pyx files
|
||||
even if Pyrex is not installed. See ../setup.py.
|
|
@ -1 +0,0 @@
|
|||
from .libsharp import *
|
|
@ -1,92 +0,0 @@
|
|||
cdef extern from "sharp.h":
|
||||
|
||||
void sharp_legendre_transform_s(float *bl, float *recfac, ptrdiff_t lmax, float *x,
|
||||
float *out, ptrdiff_t nx)
|
||||
void sharp_legendre_transform(double *bl, double *recfac, ptrdiff_t lmax, double *x,
|
||||
double *out, ptrdiff_t nx)
|
||||
void sharp_legendre_transform_recfac(double *r, ptrdiff_t lmax)
|
||||
void sharp_legendre_transform_recfac_s(float *r, ptrdiff_t lmax)
|
||||
void sharp_legendre_roots(int n, double *x, double *w)
|
||||
|
||||
# sharp_lowlevel.h
|
||||
ctypedef struct sharp_alm_info:
|
||||
# Maximum \a l index of the array
|
||||
int lmax
|
||||
# Number of different \a m values in this object
|
||||
int nm
|
||||
# Array with \a nm entries containing the individual m values
|
||||
int *mval
|
||||
# Combination of flags from sharp_almflags
|
||||
int flags
|
||||
# Array with \a nm entries containing the (hypothetical) indices of
|
||||
# the coefficients with quantum numbers 0,\a mval[i]
|
||||
long *mvstart
|
||||
# Stride between a_lm and a_(l+1),m
|
||||
long stride
|
||||
|
||||
ctypedef struct sharp_geom_info:
|
||||
pass
|
||||
|
||||
void sharp_make_alm_info (int lmax, int mmax, int stride,
|
||||
ptrdiff_t *mvstart, sharp_alm_info **alm_info)
|
||||
|
||||
void sharp_make_geom_info (int nrings, int *nph, ptrdiff_t *ofs,
|
||||
int *stride, double *phi0, double *theta,
|
||||
double *wgt, sharp_geom_info **geom_info)
|
||||
|
||||
void sharp_destroy_alm_info(sharp_alm_info *info)
|
||||
void sharp_destroy_geom_info(sharp_geom_info *info)
|
||||
|
||||
ptrdiff_t sharp_map_size(sharp_geom_info *info)
|
||||
ptrdiff_t sharp_alm_count(sharp_alm_info *self)
|
||||
|
||||
|
||||
ctypedef enum sharp_jobtype:
|
||||
SHARP_YtW
|
||||
SHARP_Yt
|
||||
SHARP_WY
|
||||
SHARP_Y
|
||||
|
||||
ctypedef enum:
|
||||
SHARP_DP
|
||||
SHARP_ADD
|
||||
|
||||
void sharp_execute(sharp_jobtype type_,
|
||||
int spin,
|
||||
void *alm,
|
||||
void *map,
|
||||
sharp_geom_info *geom_info,
|
||||
sharp_alm_info *alm_info,
|
||||
int ntrans,
|
||||
int flags,
|
||||
double *time,
|
||||
unsigned long long *opcnt) nogil
|
||||
|
||||
ctypedef enum:
|
||||
SHARP_ERROR_NO_MPI
|
||||
|
||||
int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
|
||||
void *alm, void *map, sharp_geom_info *geom_info,
|
||||
sharp_alm_info *alm_info, int ntrans, int flags, double *time,
|
||||
unsigned long long *opcnt) nogil
|
||||
|
||||
void sharp_normalized_associated_legendre_table(int m, int spin, int lmax, int ntheta,
|
||||
double *theta, int theta_stride, int l_stride, int spin_stride, double *out) nogil
|
||||
|
||||
|
||||
cdef extern from "sharp_geomhelpers.h":
|
||||
void sharp_make_subset_healpix_geom_info(
|
||||
int nside, int stride, int nrings,
|
||||
int *rings, double *weight, sharp_geom_info **geom_info)
|
||||
void sharp_make_gauss_geom_info(
|
||||
int nrings, int nphi, double phi0,
|
||||
int stride_lon, int stride_lat, sharp_geom_info **geom_info)
|
||||
|
||||
cdef extern from "sharp_almhelpers.h":
|
||||
void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
|
||||
sharp_alm_info **alm_info)
|
||||
void sharp_make_rectangular_alm_info (int lmax, int mmax, int stride,
|
||||
sharp_alm_info **alm_info)
|
||||
void sharp_make_mmajor_real_packed_alm_info (int lmax, int stride,
|
||||
int nm, const int *ms, sharp_alm_info **alm_info)
|
||||
|
|
@ -1,324 +0,0 @@
|
|||
import numpy as np
|
||||
cimport numpy as np
|
||||
cimport cython
|
||||
|
||||
__all__ = ['legendre_transform', 'legendre_roots', 'sht', 'synthesis', 'adjoint_synthesis',
|
||||
'analysis', 'adjoint_analysis', 'healpix_grid', 'triangular_order', 'rectangular_order',
|
||||
'packed_real_order', 'normalized_associated_legendre_table']
|
||||
|
||||
|
||||
def legendre_transform(x, bl, out=None):
|
||||
if out is None:
|
||||
out = np.empty_like(x)
|
||||
if out.shape[0] == 0:
|
||||
return out
|
||||
elif x.dtype == np.float64:
|
||||
if bl.dtype != np.float64:
|
||||
bl = bl.astype(np.float64)
|
||||
return _legendre_transform(x, bl, out=out)
|
||||
elif x.dtype == np.float32:
|
||||
if bl.dtype != np.float32:
|
||||
bl = bl.astype(np.float32)
|
||||
return _legendre_transform_s(x, bl, out=out)
|
||||
else:
|
||||
raise ValueError("unsupported dtype")
|
||||
|
||||
|
||||
def _legendre_transform(double[::1] x, double[::1] bl, double[::1] out):
|
||||
if out.shape[0] != x.shape[0]:
|
||||
raise ValueError('x and out must have same shape')
|
||||
sharp_legendre_transform(&bl[0], NULL, bl.shape[0] - 1, &x[0], &out[0], x.shape[0])
|
||||
return np.asarray(out)
|
||||
|
||||
|
||||
def _legendre_transform_s(float[::1] x, float[::1] bl, float[::1] out):
|
||||
if out.shape[0] != x.shape[0]:
|
||||
raise ValueError('x and out must have same shape')
|
||||
sharp_legendre_transform_s(&bl[0], NULL, bl.shape[0] - 1, &x[0], &out[0], x.shape[0])
|
||||
return np.asarray(out)
|
||||
|
||||
|
||||
def legendre_roots(n):
|
||||
x = np.empty(n, np.double)
|
||||
w = np.empty(n, np.double)
|
||||
cdef double[::1] x_buf = x, w_buf = w
|
||||
if not (x_buf.shape[0] == w_buf.shape[0] == n):
|
||||
raise AssertionError()
|
||||
if n > 0:
|
||||
sharp_legendre_roots(n, &x_buf[0], &w_buf[0])
|
||||
return x, w
|
||||
|
||||
|
||||
JOBTYPE_TO_CONST = {
|
||||
'Y': SHARP_Y,
|
||||
'Yt': SHARP_Yt,
|
||||
'WY': SHARP_WY,
|
||||
'YtW': SHARP_YtW
|
||||
}
|
||||
|
||||
def sht(jobtype, geom_info ginfo, alm_info ainfo, double[:, :, ::1] input,
|
||||
int spin=0, comm=None, add=False):
|
||||
cdef void *comm_ptr
|
||||
cdef int flags = SHARP_DP | (SHARP_ADD if add else 0)
|
||||
cdef int r
|
||||
cdef sharp_jobtype jobtype_i
|
||||
cdef double[:, :, ::1] output_buf
|
||||
cdef int ntrans = input.shape[0]
|
||||
cdef int ntotcomp = ntrans * input.shape[1]
|
||||
cdef int i, j
|
||||
|
||||
if spin == 0 and input.shape[1] != 1:
|
||||
raise ValueError('For spin == 0, we need input.shape[1] == 1')
|
||||
elif spin != 0 and input.shape[1] != 2:
|
||||
raise ValueError('For spin != 0, we need input.shape[1] == 2')
|
||||
|
||||
|
||||
cdef size_t[::1] ptrbuf = np.empty(2 * ntotcomp, dtype=np.uintp)
|
||||
cdef double **alm_ptrs = <double**>&ptrbuf[0]
|
||||
cdef double **map_ptrs = <double**>&ptrbuf[ntotcomp]
|
||||
|
||||
try:
|
||||
jobtype_i = JOBTYPE_TO_CONST[jobtype]
|
||||
except KeyError:
|
||||
raise ValueError('jobtype must be one of: %s' % ', '.join(sorted(JOBTYPE_TO_CONST.keys())))
|
||||
|
||||
if jobtype_i == SHARP_Y or jobtype_i == SHARP_WY:
|
||||
output = np.empty((input.shape[0], input.shape[1], ginfo.local_size()), dtype=np.float64)
|
||||
output_buf = output
|
||||
for i in range(input.shape[0]):
|
||||
for j in range(input.shape[1]):
|
||||
alm_ptrs[i * input.shape[1] + j] = &input[i, j, 0]
|
||||
map_ptrs[i * input.shape[1] + j] = &output_buf[i, j, 0]
|
||||
else:
|
||||
output = np.empty((input.shape[0], input.shape[1], ainfo.local_size()), dtype=np.float64)
|
||||
output_buf = output
|
||||
for i in range(input.shape[0]):
|
||||
for j in range(input.shape[1]):
|
||||
alm_ptrs[i * input.shape[1] + j] = &output_buf[i, j, 0]
|
||||
map_ptrs[i * input.shape[1] + j] = &input[i, j, 0]
|
||||
|
||||
if comm is None:
|
||||
with nogil:
|
||||
sharp_execute (
|
||||
jobtype_i,
|
||||
geom_info=ginfo.ginfo, alm_info=ainfo.ainfo,
|
||||
spin=spin, alm=alm_ptrs, map=map_ptrs,
|
||||
ntrans=ntrans, flags=flags, time=NULL, opcnt=NULL)
|
||||
else:
|
||||
from mpi4py import MPI
|
||||
if not isinstance(comm, MPI.Comm):
|
||||
raise TypeError('comm must be an mpi4py communicator')
|
||||
from .libsharp_mpi import _addressof
|
||||
comm_ptr = <void*><size_t>_addressof(comm)
|
||||
with nogil:
|
||||
r = sharp_execute_mpi_maybe (
|
||||
comm_ptr, jobtype_i,
|
||||
geom_info=ginfo.ginfo, alm_info=ainfo.ainfo,
|
||||
spin=spin, alm=alm_ptrs, map=map_ptrs,
|
||||
ntrans=ntrans, flags=flags, time=NULL, opcnt=NULL)
|
||||
if r == SHARP_ERROR_NO_MPI:
|
||||
raise Exception('MPI requested, but not available')
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def synthesis(*args, **kw):
|
||||
return sht('Y', *args, **kw)
|
||||
|
||||
def adjoint_synthesis(*args, **kw):
|
||||
return sht('Yt', *args, **kw)
|
||||
|
||||
def analysis(*args, **kw):
|
||||
return sht('YtW', *args, **kw)
|
||||
|
||||
def adjoint_analysis(*args, **kw):
|
||||
return sht('WY', *args, **kw)
|
||||
|
||||
|
||||
#
|
||||
# geom_info
|
||||
#
|
||||
class NotInitializedError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
cdef class geom_info:
|
||||
cdef sharp_geom_info *ginfo
|
||||
|
||||
def __cinit__(self, *args, **kw):
|
||||
self.ginfo = NULL
|
||||
|
||||
def local_size(self):
|
||||
if self.ginfo == NULL:
|
||||
raise NotInitializedError()
|
||||
return sharp_map_size(self.ginfo)
|
||||
|
||||
def __dealloc__(self):
|
||||
if self.ginfo != NULL:
|
||||
sharp_destroy_geom_info(self.ginfo)
|
||||
self.ginfo = NULL
|
||||
|
||||
|
||||
cdef class healpix_grid(geom_info):
|
||||
|
||||
_weight_cache = {} # { (nside, 'T'/'Q'/'U') -> numpy array of ring weights cached from file }
|
||||
|
||||
def __init__(self, int nside, stride=1, int[::1] rings=None, double[::1] weights=None):
|
||||
if weights is not None and weights.shape[0] != 2 * nside:
|
||||
raise ValueError('weights must have length 2 * nside')
|
||||
sharp_make_subset_healpix_geom_info(nside, stride,
|
||||
nrings=4 * nside - 1 if rings is None else rings.shape[0],
|
||||
rings=NULL if rings is None else &rings[0],
|
||||
weight=NULL if weights is None else &weights[0],
|
||||
geom_info=&self.ginfo)
|
||||
|
||||
@classmethod
|
||||
def load_ring_weights(cls, nside, fields):
|
||||
"""
|
||||
Loads HEALPix ring weights from file. The environment variable
|
||||
HEALPIX should be set, and this routine will look in the `data`
|
||||
subdirectory.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
nside: int
|
||||
HEALPix nside parameter
|
||||
|
||||
fields: tuple of str
|
||||
Which weights to extract; pass ('T',) to only get scalar
|
||||
weights back, or ('T', 'Q', 'U') to get all the weights
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
List of NumPy arrays, according to fields parameter.
|
||||
|
||||
"""
|
||||
import os
|
||||
from astropy.io import fits
|
||||
data_path = os.path.join(os.environ['HEALPIX'], 'data')
|
||||
fits_field_names = {
|
||||
'T': 'TEMPERATURE WEIGHTS',
|
||||
'Q': 'Q-POLARISATION WEIGHTS',
|
||||
'U': 'U-POLARISATION WEIGHTS'}
|
||||
|
||||
must_load = [field for field in fields if (nside, field) not in cls._weight_cache]
|
||||
|
||||
if must_load:
|
||||
hdulist = fits.open(os.path.join(data_path, 'weight_ring_n%05d.fits' % nside))
|
||||
try:
|
||||
for field in must_load:
|
||||
w = hdulist[1].data.field(fits_field_names[field]).ravel().astype(np.double)
|
||||
w += 1
|
||||
cls._weight_cache[nside, field] = w
|
||||
finally:
|
||||
hdulist.close()
|
||||
return [cls._weight_cache[(nside, field)].copy() for field in fields]
|
||||
|
||||
#
|
||||
# alm_info
|
||||
#
|
||||
|
||||
|
||||
cdef class alm_info:
|
||||
cdef sharp_alm_info *ainfo
|
||||
|
||||
def __cinit__(self, *args, **kw):
|
||||
self.ainfo = NULL
|
||||
|
||||
def local_size(self):
|
||||
if self.ainfo == NULL:
|
||||
raise NotInitializedError()
|
||||
return sharp_alm_count(self.ainfo)
|
||||
|
||||
def mval(self):
|
||||
if self.ainfo == NULL:
|
||||
raise NotInitializedError()
|
||||
return np.asarray(<int[:self.ainfo.nm]> self.ainfo.mval)
|
||||
|
||||
def mvstart(self):
|
||||
if self.ainfo == NULL:
|
||||
raise NotInitializedError()
|
||||
return np.asarray(<long[:self.ainfo.nm]> self.ainfo.mvstart)
|
||||
|
||||
def __dealloc__(self):
|
||||
if self.ainfo != NULL:
|
||||
sharp_destroy_alm_info(self.ainfo)
|
||||
self.ainfo = NULL
|
||||
|
||||
@cython.boundscheck(False)
|
||||
def almxfl(self, np.ndarray[double, ndim=3, mode='c'] alm, np.ndarray[double, ndim=2, mode='c'] fl):
|
||||
"""Multiply Alm by a Ell based array
|
||||
|
||||
|
||||
Parameters
|
||||
----------
|
||||
alm : np.ndarray
|
||||
input alm, 3 dimensions = (different signal x polarizations x lm-ordering)
|
||||
fl : np.ndarray
|
||||
either 1 dimension, e.g. gaussian beam, or 2 dimensions e.g. a polarized beam
|
||||
|
||||
Returns
|
||||
-------
|
||||
None, it modifies alms in-place
|
||||
|
||||
"""
|
||||
cdef int mvstart = 0
|
||||
cdef bint has_multiple_beams = alm.shape[2] > 1 and fl.shape[1] > 1
|
||||
cdef int f, i_m, m, num_ells, i_l, i_signal, i_pol, i_mv
|
||||
|
||||
for i_m in range(self.ainfo.nm):
|
||||
m = self.ainfo.mval[i_m]
|
||||
f = 1 if (m==0) else 2
|
||||
num_ells = self.ainfo.lmax + 1 - m
|
||||
|
||||
if not has_multiple_beams:
|
||||
for i_signal in range(alm.shape[0]):
|
||||
for i_pol in range(alm.shape[1]):
|
||||
for i_l in range(num_ells):
|
||||
l = m + i_l
|
||||
for i_mv in range(mvstart + f*i_l, mvstart + f*i_l +f):
|
||||
alm[i_signal, i_pol, i_mv] *= fl[l, 0]
|
||||
else:
|
||||
for i_signal in range(alm.shape[0]):
|
||||
for i_pol in range(alm.shape[1]):
|
||||
for i_l in range(num_ells):
|
||||
l = m + i_l
|
||||
for i_mv in range(mvstart + f*i_l, mvstart + f*i_l +f):
|
||||
alm[i_signal, i_pol, i_mv] *= fl[l, i_pol]
|
||||
mvstart += f * num_ells
|
||||
|
||||
cdef class triangular_order(alm_info):
|
||||
def __init__(self, int lmax, mmax=None, stride=1):
|
||||
mmax = mmax if mmax is not None else lmax
|
||||
sharp_make_triangular_alm_info(lmax, mmax, stride, &self.ainfo)
|
||||
|
||||
|
||||
cdef class rectangular_order(alm_info):
|
||||
def __init__(self, int lmax, mmax=None, stride=1):
|
||||
mmax = mmax if mmax is not None else lmax
|
||||
sharp_make_rectangular_alm_info(lmax, mmax, stride, &self.ainfo)
|
||||
|
||||
|
||||
cdef class packed_real_order(alm_info):
|
||||
def __init__(self, int lmax, stride=1, int[::1] ms=None):
|
||||
sharp_make_mmajor_real_packed_alm_info(lmax=lmax, stride=stride,
|
||||
nm=lmax + 1 if ms is None else ms.shape[0],
|
||||
ms=NULL if ms is None else &ms[0],
|
||||
alm_info=&self.ainfo)
|
||||
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
@cython.boundscheck(False)
|
||||
def normalized_associated_legendre_table(int lmax, int m, theta):
|
||||
cdef double[::1] theta_ = np.ascontiguousarray(theta, dtype=np.double)
|
||||
out = np.zeros((theta_.shape[0], lmax - m + 1), np.double)
|
||||
cdef double[:, ::1] out_ = out
|
||||
if lmax < m:
|
||||
raise ValueError("lmax < m")
|
||||
with nogil:
|
||||
sharp_normalized_associated_legendre_table(m, 0, lmax, theta_.shape[0], &theta_[0], lmax - m + 1, 1, 1, &out_[0,0])
|
||||
return out
|
|
@ -1,17 +0,0 @@
|
|||
cdef extern from "mpi.h":
|
||||
ctypedef void *MPI_Comm
|
||||
|
||||
cdef extern from "Python.h":
|
||||
object PyLong_FromVoidPtr(void*)
|
||||
|
||||
cdef extern:
|
||||
ctypedef class mpi4py.MPI.Comm [object PyMPICommObject]:
|
||||
cdef MPI_Comm ob_mpi
|
||||
cdef unsigned flags
|
||||
|
||||
# For compatibility with mpi4py <= 1.3.1
|
||||
# Newer versions could use the MPI._addressof function
|
||||
def _addressof(Comm comm):
|
||||
cdef void *ptr = NULL
|
||||
ptr = <void*>&comm.ob_mpi
|
||||
return PyLong_FromVoidPtr(ptr)
|
|
@ -1 +0,0 @@
|
|||
# empty
|
|
@ -1,58 +0,0 @@
|
|||
import numpy as np
|
||||
from scipy.special import legendre
|
||||
from scipy.special import p_roots
|
||||
import libsharp
|
||||
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
|
||||
def check_legendre_transform(lmax, ntheta):
|
||||
l = np.arange(lmax + 1)
|
||||
if lmax >= 1:
|
||||
sigma = -np.log(1e-3) / lmax / (lmax + 1)
|
||||
bl = np.exp(-sigma*l*(l+1))
|
||||
bl *= (2 * l + 1)
|
||||
else:
|
||||
bl = np.asarray([1], dtype=np.double)
|
||||
|
||||
theta = np.linspace(0, np.pi, ntheta, endpoint=True)
|
||||
x = np.cos(theta)
|
||||
|
||||
# Compute truth using scipy.special.legendre
|
||||
P = np.zeros((ntheta, lmax + 1))
|
||||
for l in range(lmax + 1):
|
||||
P[:, l] = legendre(l)(x)
|
||||
y0 = np.dot(P, bl)
|
||||
|
||||
|
||||
# double-precision
|
||||
y = libsharp.legendre_transform(x, bl)
|
||||
|
||||
assert_allclose(y, y0, rtol=1e-12, atol=1e-12)
|
||||
|
||||
# single-precision
|
||||
y32 = libsharp.legendre_transform(x.astype(np.float32), bl)
|
||||
assert_allclose(y, y0, rtol=1e-5, atol=1e-5)
|
||||
|
||||
|
||||
def test_legendre_transform():
|
||||
nthetas_to_try = [0, 9, 17, 19] + list(np.random.randint(500, size=20))
|
||||
for ntheta in nthetas_to_try:
|
||||
for lmax in [0, 1, 2, 3, 20] + list(np.random.randint(50, size=4)):
|
||||
yield check_legendre_transform, lmax, ntheta
|
||||
|
||||
def check_legendre_roots(n):
|
||||
xs, ws = ([], []) if n == 0 else p_roots(n) # from SciPy
|
||||
xl, wl = libsharp.legendre_roots(n)
|
||||
assert_allclose(xs, xl, rtol=1e-14, atol=1e-14)
|
||||
assert_allclose(ws, wl, rtol=1e-14, atol=1e-14)
|
||||
|
||||
def test_legendre_roots():
|
||||
"""
|
||||
Test the Legendre root-finding algorithm from libsharp by comparing it with
|
||||
the SciPy version.
|
||||
"""
|
||||
yield check_legendre_roots, 0
|
||||
yield check_legendre_roots, 1
|
||||
yield check_legendre_roots, 32
|
||||
yield check_legendre_roots, 33
|
|
@ -1,36 +0,0 @@
|
|||
from __future__ import print_function
|
||||
import numpy as np
|
||||
|
||||
from numpy.testing import assert_almost_equal
|
||||
from nose.tools import eq_, ok_
|
||||
|
||||
from libsharp import normalized_associated_legendre_table
|
||||
from scipy.special import sph_harm, p_roots
|
||||
|
||||
def test_compare_legendre_table_with_scipy():
|
||||
def test(theta, m, lmax):
|
||||
Plm = normalized_associated_legendre_table(lmax, m, theta)
|
||||
|
||||
Plm_p = sph_harm(m, np.arange(m, lmax + 1), 0, theta)[None, :]
|
||||
if not np.allclose(Plm_p, Plm):
|
||||
print(Plm_p)
|
||||
print(Plm)
|
||||
return ok_, np.allclose(Plm_p, Plm)
|
||||
|
||||
yield test(np.pi/2, 0, 10)
|
||||
yield test(np.pi/4, 0, 10)
|
||||
yield test(3 * np.pi/4, 0, 10)
|
||||
yield test(np.pi/4, 1, 4)
|
||||
yield test(np.pi/4, 2, 4)
|
||||
yield test(np.pi/4, 50, 50)
|
||||
yield test(np.pi/2, 49, 50)
|
||||
|
||||
|
||||
def test_legendre_table_wrapper_logic():
|
||||
# tests the SSE 2 logic in the high-level wrapper by using an odd number of thetas
|
||||
theta = np.asarray([np.pi/2, np.pi/4, 3 * np.pi / 4])
|
||||
m = 3
|
||||
lmax = 10
|
||||
Plm = normalized_associated_legendre_table(lmax, m, theta)
|
||||
assert np.allclose(Plm[1, :], normalized_associated_legendre_table(lmax, m, np.pi/4)[0, :])
|
||||
assert np.allclose(Plm[2, :], normalized_associated_legendre_table(lmax, m, 3 * np.pi/4)[0, :])
|
|
@ -1,32 +0,0 @@
|
|||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
import libsharp
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
|
||||
def test_basic():
|
||||
lmax = 10
|
||||
nside = 8
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
ms = np.arange(rank, lmax + 1, MPI.COMM_WORLD.Get_size(), dtype=np.int32)
|
||||
|
||||
order = libsharp.packed_real_order(lmax, ms=ms)
|
||||
grid = libsharp.healpix_grid(nside)
|
||||
|
||||
|
||||
alm = np.zeros(order.local_size())
|
||||
if rank == 0:
|
||||
alm[0] = 1
|
||||
elif rank == 1:
|
||||
alm[0] = 1
|
||||
|
||||
|
||||
map = libsharp.synthesis(grid, order, np.repeat(alm[None, None, :], 3, 0), comm=MPI.COMM_WORLD)
|
||||
assert np.all(map[2, :] == map[1, :]) and np.all(map[1, :] == map[0, :])
|
||||
map = map[0, 0, :]
|
||||
print(rank, "shape", map.shape)
|
||||
print(rank, "mean", map.mean())
|
||||
|
||||
if __name__=="__main__":
|
||||
test_basic()
|
|
@ -1,137 +0,0 @@
|
|||
# This test needs to be run with:
|
||||
|
||||
# mpirun -np X python test_smoothing_noise_pol_mpi.py
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
import numpy as np
|
||||
|
||||
import healpy as hp
|
||||
|
||||
import libsharp
|
||||
|
||||
mpi = True
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
nside = 256
|
||||
npix = hp.nside2npix(nside)
|
||||
|
||||
np.random.seed(100)
|
||||
input_map = np.random.normal(size=(3, npix))
|
||||
fwhm_deg = 10
|
||||
lmax = 512
|
||||
|
||||
nrings = 4 * nside - 1 # four missing pixels
|
||||
|
||||
if rank == 0:
|
||||
print("total rings", nrings)
|
||||
|
||||
n_mpi_processes = MPI.COMM_WORLD.Get_size()
|
||||
rings_per_process = nrings // n_mpi_processes + 1
|
||||
# ring indices are 1-based
|
||||
|
||||
ring_indices_emisphere = np.arange(2*nside, dtype=np.int32) + 1
|
||||
local_ring_indices = ring_indices_emisphere[rank::n_mpi_processes]
|
||||
|
||||
# to improve performance, simmetric rings north/south need to be in the same rank
|
||||
# therefore we use symmetry to create the full ring indexing
|
||||
|
||||
if local_ring_indices[-1] == 2 * nside:
|
||||
# has equator ring
|
||||
local_ring_indices = np.concatenate(
|
||||
[local_ring_indices[:-1],
|
||||
nrings - local_ring_indices[::-1] + 1]
|
||||
)
|
||||
else:
|
||||
# does not have equator ring
|
||||
local_ring_indices = np.concatenate(
|
||||
[local_ring_indices,
|
||||
nrings - local_ring_indices[::-1] + 1]
|
||||
)
|
||||
|
||||
print("rank", rank, "n_rings", len(local_ring_indices))
|
||||
|
||||
if not mpi:
|
||||
local_ring_indices = None
|
||||
grid = libsharp.healpix_grid(nside, rings=local_ring_indices)
|
||||
|
||||
# returns start index of the ring and number of pixels
|
||||
startpix, ringpix, _, _, _ = hp.ringinfo(nside, local_ring_indices.astype(np.int64))
|
||||
|
||||
local_npix = grid.local_size()
|
||||
|
||||
def expand_pix(startpix, ringpix, local_npix):
|
||||
"""Turn first pixel index and number of pixel in full array of pixels
|
||||
|
||||
to be optimized with cython or numba
|
||||
"""
|
||||
local_pix = np.empty(local_npix, dtype=np.int64)
|
||||
i = 0
|
||||
for start, num in zip(startpix, ringpix):
|
||||
local_pix[i:i+num] = np.arange(start, start+num)
|
||||
i += num
|
||||
return local_pix
|
||||
|
||||
local_pix = expand_pix(startpix, ringpix, local_npix)
|
||||
|
||||
local_map = input_map[:, local_pix]
|
||||
|
||||
local_hitmap = np.zeros(npix)
|
||||
local_hitmap[local_pix] = 1
|
||||
hp.write_map("hitmap_{}.fits".format(rank), local_hitmap, overwrite=True)
|
||||
|
||||
print("rank", rank, "npix", npix, "local_npix", local_npix, "local_map len", len(local_map), "unique pix", len(np.unique(local_pix)))
|
||||
|
||||
local_m_indices = np.arange(rank, lmax + 1, MPI.COMM_WORLD.Get_size(), dtype=np.int32)
|
||||
if not mpi:
|
||||
local_m_indices = None
|
||||
|
||||
order = libsharp.packed_real_order(lmax, ms=local_m_indices)
|
||||
local_nl = order.local_size()
|
||||
print("rank", rank, "local_nl", local_nl, "mval", order.mval())
|
||||
|
||||
mpi_comm = MPI.COMM_WORLD if mpi else None
|
||||
|
||||
# map2alm
|
||||
# maps in libsharp are 3D, 2nd dimension is IQU, 3rd is pixel
|
||||
|
||||
alm_sharp_I = libsharp.analysis(grid, order,
|
||||
np.ascontiguousarray(local_map[0].reshape((1, 1, -1))),
|
||||
spin=0, comm=mpi_comm)
|
||||
alm_sharp_P = libsharp.analysis(grid, order,
|
||||
np.ascontiguousarray(local_map[1:].reshape((1, 2, -1))),
|
||||
spin=2, comm=mpi_comm)
|
||||
|
||||
beam = hp.gauss_beam(fwhm=np.radians(fwhm_deg), lmax=lmax, pol=True)
|
||||
|
||||
print("Smooth")
|
||||
# smooth in place (zonca implemented this function)
|
||||
order.almxfl(alm_sharp_I, np.ascontiguousarray(beam[:, 0:1]))
|
||||
order.almxfl(alm_sharp_P, np.ascontiguousarray(beam[:, (1, 2)]))
|
||||
|
||||
# alm2map
|
||||
|
||||
new_local_map_I = libsharp.synthesis(grid, order, alm_sharp_I, spin=0, comm=mpi_comm)
|
||||
new_local_map_P = libsharp.synthesis(grid, order, alm_sharp_P, spin=2, comm=mpi_comm)
|
||||
|
||||
# Transfer map to first process for writing
|
||||
|
||||
local_full_map = np.zeros(input_map.shape, dtype=np.float64)
|
||||
local_full_map[0, local_pix] = new_local_map_I
|
||||
local_full_map[1:, local_pix] = new_local_map_P
|
||||
|
||||
output_map = np.zeros(input_map.shape, dtype=np.float64) if rank == 0 else None
|
||||
mpi_comm.Reduce(local_full_map, output_map, root=0, op=MPI.SUM)
|
||||
|
||||
if rank == 0:
|
||||
# hp.write_map("sharp_smoothed_map.fits", output_map, overwrite=True)
|
||||
# hp_smoothed = hp.alm2map(hp.map2alm(input_map, lmax=lmax), nside=nside) # transform only
|
||||
hp_smoothed = hp.smoothing(input_map, fwhm=np.radians(fwhm_deg), lmax=lmax)
|
||||
std_diff = (hp_smoothed-output_map).std()
|
||||
print("Std of difference between libsharp and healpy", std_diff)
|
||||
# hp.write_map(
|
||||
# "healpy_smoothed_map.fits",
|
||||
# hp_smoothed,
|
||||
# overwrite=True
|
||||
# )
|
||||
assert std_diff < 1e-5
|
|
@ -1,83 +0,0 @@
|
|||
#! /usr/bin/env python
|
||||
|
||||
descr = """Spherical Harmionic transforms package
|
||||
|
||||
Python API for the libsharp spherical harmonic transforms library
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
DISTNAME = 'libsharp'
|
||||
DESCRIPTION = 'libsharp library for fast Spherical Harmonic Transforms'
|
||||
LONG_DESCRIPTION = descr
|
||||
MAINTAINER = 'Dag Sverre Seljebotn',
|
||||
MAINTAINER_EMAIL = 'd.s.seljebotn@astro.uio.no',
|
||||
URL = 'http://sourceforge.net/projects/libsharp/'
|
||||
LICENSE = 'GPL'
|
||||
DOWNLOAD_URL = "http://sourceforge.net/projects/libsharp/"
|
||||
VERSION = '0.1'
|
||||
|
||||
# Add our fake Pyrex at the end of the Python search path
|
||||
# in order to fool setuptools into allowing compilation of
|
||||
# pyx files to C files. Importing Cython.Distutils then
|
||||
# makes Cython the tool of choice for this rather than
|
||||
# (the possibly nonexisting) Pyrex.
|
||||
project_path = os.path.split(__file__)[0]
|
||||
sys.path.append(os.path.join(project_path, 'fake_pyrex'))
|
||||
|
||||
from setuptools import setup, find_packages, Extension
|
||||
from Cython.Build import cythonize
|
||||
import numpy as np
|
||||
|
||||
libsharp = os.environ.get('LIBSHARP', None)
|
||||
libsharp_include = os.environ.get('LIBSHARP_INCLUDE', libsharp and os.path.join(libsharp, 'include'))
|
||||
libsharp_lib = os.environ.get('LIBSHARP_LIB', libsharp and os.path.join(libsharp, 'lib'))
|
||||
|
||||
if libsharp_include is None or libsharp_lib is None:
|
||||
sys.stderr.write('Please set LIBSHARP environment variable to the install directly of libsharp, '
|
||||
'this script will refer to the lib and include sub-directories. Alternatively '
|
||||
'set LIBSHARP_INCLUDE and LIBSHARP_LIB\n')
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup(install_requires = ['numpy'],
|
||||
packages = find_packages(),
|
||||
test_suite="nose.collector",
|
||||
# Well, technically zipping the package will work, but since it's
|
||||
# all compiled code it'll just get unzipped again at runtime, which
|
||||
# is pointless:
|
||||
zip_safe = False,
|
||||
name = DISTNAME,
|
||||
version = VERSION,
|
||||
maintainer = MAINTAINER,
|
||||
maintainer_email = MAINTAINER_EMAIL,
|
||||
description = DESCRIPTION,
|
||||
license = LICENSE,
|
||||
url = URL,
|
||||
download_url = DOWNLOAD_URL,
|
||||
long_description = LONG_DESCRIPTION,
|
||||
classifiers =
|
||||
[ 'Development Status :: 3 - Alpha',
|
||||
'Environment :: Console',
|
||||
'Intended Audience :: Developers',
|
||||
'Intended Audience :: Science/Research',
|
||||
'License :: OSI Approved :: GNU General Public License (GPL)',
|
||||
'Topic :: Scientific/Engineering'],
|
||||
ext_modules = cythonize([
|
||||
Extension("libsharp.libsharp",
|
||||
["libsharp/libsharp.pyx"],
|
||||
libraries=["sharp", "fftpack", "c_utils"],
|
||||
include_dirs=[libsharp_include, np.get_include()],
|
||||
library_dirs=[libsharp_lib],
|
||||
extra_link_args=["-fopenmp"],
|
||||
),
|
||||
Extension("libsharp.libsharp_mpi",
|
||||
["libsharp/libsharp_mpi.pyx"],
|
||||
libraries=["sharp", "fftpack", "c_utils"],
|
||||
include_dirs=[libsharp_include, np.get_include()],
|
||||
library_dirs=[libsharp_lib],
|
||||
extra_link_args=["-fopenmp"],
|
||||
),
|
||||
]),
|
||||
)
|
19
runjinja.py
19
runjinja.py
|
@ -1,19 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
Preprocesses foo.c.in to foo.c. Reads STDIN and writes STDOUT.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import hashlib
|
||||
from jinja2 import Template, Environment
|
||||
|
||||
env = Environment(block_start_string='/*{',
|
||||
block_end_string='}*/',
|
||||
variable_start_string='{{',
|
||||
variable_end_string='}}')
|
||||
|
||||
extra_vars = dict(len=len)
|
||||
input = sys.stdin.read()
|
||||
sys.stdout.write('/* DO NOT EDIT. md5sum of source: %s */' % hashlib.md5(input.encode()).hexdigest())
|
||||
sys.stdout.write(env.from_string(input).render(**extra_vars))
|
4
runtest.sh
Executable file
4
runtest.sh
Executable file
|
@ -0,0 +1,4 @@
|
|||
#!/bin/sh
|
||||
|
||||
./sharp_testsuite acctest
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue