From 540e7e44f8dfc61d64debf5ae42aebb011444b5d Mon Sep 17 00:00:00 2001
From: Martin Reinecke <martin@mpa-garching.mpg.de>
Date: Wed, 27 Feb 2019 10:44:38 +0100
Subject: [PATCH] doc improvements and a pragma, which probably does nothing

---
 COMPILE                   | 6 +++---
 libsharp/sharp_core_inc.c | 5 +++++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/COMPILE b/COMPILE
index 5b1c5b2..b63ae7f 100644
--- a/COMPILE
+++ b/COMPILE
@@ -15,7 +15,7 @@ flags.
 Fast math
 ---------
 
-Specifying "-ffast-math" is important for all compilers, since it allows the
+Specifying "-ffast-math" or "-ffp-contract=fast" is important for all compilers, since it allows the
 compiler to fuse multiplications and additions into FMA instructions, which is
 forbidden by the C99 standard. Since FMAs are a central aspect of the algorithm,
 they are needed for optimum performance.
@@ -25,8 +25,8 @@ to the C99 standard, you should still be able to compile libsharp with
 "-ffast-math" without any problems.
 
 
-Runtime CPU selection with gcc
-------------------------------
+Runtime CPU selection with gcc and clang
+----------------------------------------
 
 When using a recent gcc (6.0 and newer) or a recent clang (successfully tested
 with versions 6 and 7) on an x86_64 platform, the build machinery can compile
diff --git a/libsharp/sharp_core_inc.c b/libsharp/sharp_core_inc.c
index 331f8f7..682558c 100644
--- a/libsharp/sharp_core_inc.c
+++ b/libsharp/sharp_core_inc.c
@@ -42,6 +42,11 @@
 #include "libsharp/sharp_internal.h"
 #include "c_utils/c_utils.h"
 
+// In the following, we explicitly allow the compiler to contract floating
+// point operations, like multiply-and-add.
+// Unfortunately, most compilers don't act on this pragma yet.
+#pragma STDC FP_CONTRACT ON
+
 typedef complex double dcmplx;
 
 #define nv0 (128/VLEN)