From c9684732b84e39c87fc21288049dc0b46bf128e6 Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Thu, 10 Jan 2019 13:30:29 +0100 Subject: [PATCH] cleanup --- Makefile.am | 1 - libsharp/sharp.h | 4 +- libsharp/sharp_almhelpers.h | 2 +- libsharp/sharp_complex_hacks.h | 112 --------------------------------- libsharp/sharp_core.c | 1 - libsharp/sharp_vecsupport.h | 39 ++++++++++++ 6 files changed, 41 insertions(+), 118 deletions(-) delete mode 100644 libsharp/sharp_complex_hacks.h diff --git a/Makefile.am b/Makefile.am index 163fcd0..26b41ad 100644 --- a/Makefile.am +++ b/Makefile.am @@ -15,7 +15,6 @@ src_sharp = \ libsharp/sharp_legendre_roots.c \ libsharp/sharp_ylmgen_c.c \ libsharp/sharp_announce.h \ - libsharp/sharp_complex_hacks.h \ libsharp/sharp_internal.h \ libsharp/sharp_legendre_roots.h \ libsharp/sharp_vecsupport.h \ diff --git a/libsharp/sharp.h b/libsharp/sharp.h index 35a0cb5..ef9cafb 100644 --- a/libsharp/sharp.h +++ b/libsharp/sharp.h @@ -25,7 +25,7 @@ /*! \file sharp.h * Portable interface for the spherical transform library. * - * Copyright (C) 2012-2018 Max-Planck-Society + * Copyright (C) 2012-2019 Max-Planck-Society * \author Martin Reinecke \author Dag Sverre Seljebotn */ @@ -259,8 +259,6 @@ int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin, const sharp_alm_info *alm_info, int flags, double *time, unsigned long long *opcnt); - - /*! \} */ #ifdef __cplusplus diff --git a/libsharp/sharp_almhelpers.h b/libsharp/sharp_almhelpers.h index c17028a..06bee8f 100644 --- a/libsharp/sharp_almhelpers.h +++ b/libsharp/sharp_almhelpers.h @@ -25,7 +25,7 @@ /*! \file sharp_almhelpers.h * SHARP helper function for the creation of a_lm data structures * - * Copyright (C) 2008-2016 Max-Planck-Society + * Copyright (C) 2008-2019 Max-Planck-Society * \author Martin Reinecke */ diff --git a/libsharp/sharp_complex_hacks.h b/libsharp/sharp_complex_hacks.h deleted file mode 100644 index d50eabe..0000000 --- a/libsharp/sharp_complex_hacks.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * This file is part of libsharp. - * - * libsharp is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * libsharp is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with libsharp; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/* - * libsharp is being developed at the Max-Planck-Institut fuer Astrophysik - * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt - * (DLR). - */ - -/* \file sharp_complex_hacks.h - * support for converting vector types and complex numbers - * - * Copyright (C) 2012-2018 Max-Planck-Society - * Author: Martin Reinecke - */ - -#ifndef SHARP_COMPLEX_HACKS_H -#define SHARP_COMPLEX_HACKS_H - -#include -#include "sharp_vecsupport.h" - -#define UNSAFE_CODE - -#if (VLEN==1) - -static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d, - _Complex double * restrict cc) - { cc[0] += a+_Complex_I*b; cc[1] += c+_Complex_I*d; } - -#endif - -#if (VLEN==2) - -static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, - Tv d, _Complex double * restrict c1, _Complex double * restrict c2) - { -#ifdef UNSAFE_CODE -#if defined(__SSE3__) - *((__m128d *)c1) += _mm_hadd_pd(a,b); - *((__m128d *)c2) += _mm_hadd_pd(c,d); -#else - *((__m128d *)c1) += _mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)) + - _mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0)); - *((__m128d *)c2) += _mm_shuffle_pd(c,d,_MM_SHUFFLE2(0,1)) + - _mm_shuffle_pd(c,d,_MM_SHUFFLE2(1,0)); -#endif -#else - union {Tv v; _Complex double c; } u1, u2; -#if defined(__SSE3__) - u1.v = _mm_hadd_pd(a,b); u2.v=_mm_hadd_pd(c,d); -#else - u1.v = _mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)) + - _mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0)); - u2.v = _mm_shuffle_pd(c,d,_MM_SHUFFLE2(0,1)) + - _mm_shuffle_pd(c,d,_MM_SHUFFLE2(1,0)); -#endif - *c1+=u1.c; *c2+=u2.c; -#endif - } - -static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d, - _Complex double * restrict cc) - { vhsum_cmplx2(a,b,c,d,cc,cc+1); } - -#endif - -#if (VLEN==4) - -static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d, - _Complex double * restrict cc) - { - Tv tmp1=_mm256_hadd_pd(a,b), tmp2=_mm256_hadd_pd(c,d); - Tv tmp3=_mm256_permute2f128_pd(tmp1,tmp2,49), - tmp4=_mm256_permute2f128_pd(tmp1,tmp2,32); - tmp1=tmp3+tmp4; -#ifdef UNSAFE_CODE - _mm256_storeu_pd((double *)cc, - _mm256_add_pd(_mm256_loadu_pd((double *)cc),tmp1)); -#else - union {Tv v; _Complex double c[2]; } u; - u.v=tmp1; - cc[0]+=u.c[0]; cc[1]+=u.c[1]; -#endif - } - -#endif - -#if (VLEN==8) - -static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d, - _Complex double * restrict cc) - { vhsum_cmplx2(a,b,c,d,cc,cc+1); } - -#endif - -#endif diff --git a/libsharp/sharp_core.c b/libsharp/sharp_core.c index 7307202..b619ed3 100644 --- a/libsharp/sharp_core.c +++ b/libsharp/sharp_core.c @@ -33,7 +33,6 @@ #include #include #include "sharp_vecsupport.h" -#include "sharp_complex_hacks.h" #include "sharp.h" #include "sharp_internal.h" #include "c_utils.h" diff --git a/libsharp/sharp_vecsupport.h b/libsharp/sharp_vecsupport.h index b70143d..e4bfc4f 100644 --- a/libsharp/sharp_vecsupport.h +++ b/libsharp/sharp_vecsupport.h @@ -76,6 +76,11 @@ static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; } #define vanyTrue(a) (a) #define vallTrue(a) (a) +static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d, + _Complex double * restrict cc) + { cc[0] += a+_Complex_I*b; cc[1] += c+_Complex_I*d; } + + #endif #if (VLEN==2) @@ -119,6 +124,21 @@ static inline Tv vblend__(Tv m, Tv a, Tv b) #define vanyTrue(a) (_mm_movemask_pd(a)!=0) #define vallTrue(a) (_mm_movemask_pd(a)==3) +static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, + Tv d, _Complex double * restrict cc) + { + union {Tv v; _Complex double c; } u1, u2; +#if defined(__SSE3__) + u1.v = _mm_hadd_pd(a,b); u2.v=_mm_hadd_pd(c,d); +#else + u1.v = _mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)) + + _mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0)); + u2.v = _mm_shuffle_pd(c,d,_MM_SHUFFLE2(0,1)) + + _mm_shuffle_pd(c,d,_MM_SHUFFLE2(1,0)); +#endif + cc[0]+=u1.c; cc[1]+=u2.c; + } + #endif #if (VLEN==4) @@ -150,6 +170,18 @@ typedef __m256d Tm; #define vanyTrue(a) (_mm256_movemask_pd(a)!=0) #define vallTrue(a) (_mm256_movemask_pd(a)==15) +static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d, + _Complex double * restrict cc) + { + Tv tmp1=_mm256_hadd_pd(a,b), tmp2=_mm256_hadd_pd(c,d); + Tv tmp3=_mm256_permute2f128_pd(tmp1,tmp2,49), + tmp4=_mm256_permute2f128_pd(tmp1,tmp2,32); + tmp1=tmp3+tmp4; + union {Tv v; _Complex double c[2]; } u; + u.v=tmp1; + cc[0]+=u.c[0]; cc[1]+=u.c[1]; + } + #endif #if (VLEN==8) @@ -180,6 +212,13 @@ typedef __mmask8 Tm; #define vanyTrue(a) (a!=0) #define vallTrue(a) (a==255) +static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d, + _Complex double * restrict cc) + { + cc[0] += _mm512_reduce_add_pd(a)+_Complex_I*_mm512_reduce_add_pd(b); + cc[1] += _mm512_reduce_add_pd(c)+_Complex_I*_mm512_reduce_add_pd(d); + } + #endif #endif