vide_public/external/healpix/c_utils/sse_utils.h
2012-10-30 13:56:48 -04:00

137 lines
3 KiB
C

/*
* This file is part of libc_utils.
*
* libc_utils is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libc_utils is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libc_utils; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
* (DLR).
*/
/*! \file sse_utils.h
* SSE/SSE2/SSE3-related functionality
*
* Copyright (C) 2010 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef PLANCK_SSE_UTILS_H
#define PLANCK_SSE_UTILS_H
#ifndef PLANCK_DISABLE_SSE
#if (defined(__SSE__))
#include <xmmintrin.h>
#define PLANCK_HAVE_SSE
#ifdef __cplusplus
extern "C" {
#endif
typedef __m128 v4sf; /* vector of 4 floats (SSE1) */
typedef union {
float f[4];
v4sf v;
} V4SF;
static inline v4sf build_v4sf (float a, float b, float c, float d)
{ return _mm_set_ps(d,c,b,a); }
static inline void read_v4sf (v4sf v, float *a, float *b, float *c, float *d)
{
V4SF tmp;
tmp.v = v;
if (a) *a=tmp.f[0];
if (b) *b=tmp.f[1];
if (c) *c=tmp.f[2];
if (d) *d=tmp.f[3];
}
#ifdef __cplusplus
}
#endif
#endif
#if (defined(__SSE2__))
#include <emmintrin.h>
#define PLANCK_HAVE_SSE2
#ifdef __cplusplus
extern "C" {
#endif
typedef __m128d v2df; /* vector of 2 doubles (SSE2) */
typedef union {
double d[2];
v2df v;
} V2DF;
typedef struct {
v2df a,b;
} v2df2;
typedef struct {
V2DF a,b;
} V2DF2;
#define V2DF_SIGNMASK \
_mm_castsi128_pd(_mm_set_epi32(-0x80000000,0,-0x80000000,0))
static inline v2df build_v2df (double a, double b)
{ return _mm_set_pd(b,a); }
static inline void read_v2df (v2df v, double *a, double *b)
{ _mm_store_sd(a,v); _mm_storeh_pd(b,v); }
static inline int v2df_any_gt (v2df a, v2df b)
{
return (_mm_movemask_pd(_mm_cmpgt_pd(_mm_andnot_pd(V2DF_SIGNMASK,a),b))!=0);
}
static inline int v2df_all_ge (v2df a, v2df b)
{
return (_mm_movemask_pd(_mm_cmplt_pd(_mm_andnot_pd(V2DF_SIGNMASK,a),b))==0);
}
static inline V2DF to_V2DF (v2df x)
{ V2DF X; X.v=x; return X; }
static inline V2DF2 to_V2DF2 (v2df2 x)
{ V2DF2 X; X.a.v=x.a; X.b.v=x.b; return X; }
static inline v2df2 to_v2df2 (V2DF2 X)
{ v2df2 x; x.a=X.a.v; x.b=X.b.v; return x; }
static inline v2df2 zero_v2df2(void)
{ v2df2 x; x.a=x.b=_mm_setzero_pd(); return x; }
#ifdef __cplusplus
}
#endif
#endif
#if (defined(__SSE3__))
#include <pmmintrin.h>
#define PLANCK_HAVE_SSE3
#endif
#endif
#endif