From 540e7e44f8dfc61d64debf5ae42aebb011444b5d Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Wed, 27 Feb 2019 10:44:38 +0100 Subject: [PATCH] doc improvements and a pragma, which probably does nothing --- COMPILE | 6 +++--- libsharp/sharp_core_inc.c | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/COMPILE b/COMPILE index 5b1c5b2..b63ae7f 100644 --- a/COMPILE +++ b/COMPILE @@ -15,7 +15,7 @@ flags. Fast math --------- -Specifying "-ffast-math" is important for all compilers, since it allows the +Specifying "-ffast-math" or "-ffp-contract=fast" is important for all compilers, since it allows the compiler to fuse multiplications and additions into FMA instructions, which is forbidden by the C99 standard. Since FMAs are a central aspect of the algorithm, they are needed for optimum performance. @@ -25,8 +25,8 @@ to the C99 standard, you should still be able to compile libsharp with "-ffast-math" without any problems. -Runtime CPU selection with gcc ------------------------------- +Runtime CPU selection with gcc and clang +---------------------------------------- When using a recent gcc (6.0 and newer) or a recent clang (successfully tested with versions 6 and 7) on an x86_64 platform, the build machinery can compile diff --git a/libsharp/sharp_core_inc.c b/libsharp/sharp_core_inc.c index 331f8f7..682558c 100644 --- a/libsharp/sharp_core_inc.c +++ b/libsharp/sharp_core_inc.c @@ -42,6 +42,11 @@ #include "libsharp/sharp_internal.h" #include "c_utils/c_utils.h" +// In the following, we explicitly allow the compiler to contract floating +// point operations, like multiply-and-add. +// Unfortunately, most compilers don't act on this pragma yet. +#pragma STDC FP_CONTRACT ON + typedef complex double dcmplx; #define nv0 (128/VLEN)