360 lines
11 KiB
C++
360 lines
11 KiB
C++
/*+
|
|
ARES/HADES/BORG Package -- ./libLSS/tools/fused_reduce.hpp
|
|
Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
|
|
Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
|
|
|
|
Additional contributions from:
|
|
Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
|
|
|
|
+*/
|
|
#ifndef __LIBLSS_FUSED_REDUCTION_HPP
|
|
#define __LIBLSS_FUSED_REDUCTION_HPP
|
|
|
|
#include <type_traits>
|
|
#include <boost/type_traits/has_trivial_constructor.hpp>
|
|
|
|
// This include file defines the reduction operation on
|
|
// virtual arrays as defined by fused_array.hpp
|
|
// The goal is to be able to combine virtual arrays and
|
|
// apply parallel reduction operation on it.
|
|
// A straightforward example is given in test_fuse_reduce.cpp
|
|
//
|
|
// r = LibLSS::reduce_sum(
|
|
// b_fused_idx (
|
|
// [](int i, int j)->int {return i*j;},
|
|
// extents[N][M] )
|
|
// );
|
|
//
|
|
// Which computes a \sum_{i=0,j=0}^{i=N-1,j=M-1} i*j, with openmp.
|
|
// However arrays can be folded in that.
|
|
//
|
|
// r = LibLSS::reduce_sum(
|
|
// b_fused_idx (
|
|
// [](int i, int j)->int {return i*j;},
|
|
// extents[N][M] )
|
|
// );
|
|
|
|
//
|
|
|
|
namespace LibLSS {
|
|
|
|
namespace FUSE_details {
|
|
template<std::size_t N, typename T, bool parallel>
|
|
struct OperatorReduction {};
|
|
|
|
|
|
// ======================
|
|
// MAX OPERATOR REDUCTION
|
|
template<std::size_t N, typename T, bool parallel>
|
|
struct MaxOperatorReduction {};
|
|
|
|
template<std::size_t N,typename T>
|
|
struct MaxOperatorReduction<N,T,false> {
|
|
template<typename A, typename M>
|
|
static T reduce(const A& a, const M& m) {
|
|
std::size_t s = a.index_bases()[0], e = a.shape()[0];
|
|
T r = -std::numeric_limits<T>::infinity();
|
|
for (std::size_t i = s; i < s+e; i++) {
|
|
MaxOperatorReduction<N-1,T,false> op;
|
|
r = std::max(r, op.reduce(a[i], m[i]));
|
|
}
|
|
return r;
|
|
}
|
|
};
|
|
|
|
template<std::size_t N,typename T>
|
|
struct MaxOperatorReduction<N,T,true> {
|
|
template<typename A, typename M>
|
|
static T reduce(const A& a, const M& m) {
|
|
std::size_t s = a.index_bases()[0], e = a.shape()[0];
|
|
typename boost::remove_reference<A>::type const *a_ptr = &a;
|
|
T r = -std::numeric_limits<T>::infinity();
|
|
|
|
#pragma omp parallel for reduction(max:r)
|
|
for (std::size_t i = s; i < s+e; i++) {
|
|
MaxOperatorReduction<N-1,T,false> op;
|
|
r = std::max(r, op.reduce((*a_ptr)[i], m[i]));
|
|
}
|
|
return r;
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct MaxOperatorReduction<1,T,false> {
|
|
template<typename A,typename M>
|
|
static T reduce(const A& a, const M& m) {
|
|
std::size_t s = a.index_bases()[0], e = a.shape()[0];
|
|
T r = -std::numeric_limits<T>::infinity();
|
|
for (std::size_t i = s; i < s+e; i++) {
|
|
if (m[i])
|
|
r = std::max(r, T(a[i]));
|
|
}
|
|
return r;
|
|
}
|
|
};
|
|
|
|
|
|
template<typename T>
|
|
struct MaxOperatorReduction<1,T,true> {
|
|
template<typename A, typename M>
|
|
static T reduce(const A& a, const M& m) {
|
|
std::size_t s = a.index_bases()[0], e = a.shape()[0];
|
|
typename boost::remove_reference<A>::type const *a_ptr = &a;
|
|
T r = -std::numeric_limits<T>::infinity();
|
|
|
|
#pragma omp parallel for reduction(max:r)
|
|
for (std::size_t i = s; i < s+e; i++) {
|
|
if (m[i])
|
|
r = std::max(r, T((*a_ptr)[i]));
|
|
}
|
|
return r;
|
|
}
|
|
};
|
|
|
|
|
|
template<typename T, bool parallel>
|
|
struct MaxOperatorReduction<0,T,parallel> {
|
|
template<typename A>
|
|
static T reduce(const A& a) {
|
|
return a;
|
|
}
|
|
};
|
|
|
|
// ===============================
|
|
//
|
|
// ======================
|
|
// MIN OPERATOR REDUCTION
|
|
template<std::size_t N, typename T, bool parallel>
|
|
struct MinOperatorReduction {};
|
|
|
|
template<std::size_t N,typename T>
|
|
struct MinOperatorReduction<N,T,false> {
|
|
template<typename A, typename M>
|
|
static T reduce(const A& a, const M& m) {
|
|
std::size_t s = a.index_bases()[0], e = a.shape()[0];
|
|
T r = std::numeric_limits<T>::infinity();
|
|
for (std::size_t i = s; i < s+e; i++) {
|
|
MinOperatorReduction<N-1,T,false> op;
|
|
r = std::min(r, op.reduce(a[i], m[i]));
|
|
}
|
|
return r;
|
|
}
|
|
};
|
|
|
|
template<std::size_t N,typename T>
|
|
struct MinOperatorReduction<N,T,true> {
|
|
template<typename A, typename M>
|
|
static T reduce(const A& a, const M& m) {
|
|
std::size_t s = a.index_bases()[0], e = a.shape()[0];
|
|
typename boost::remove_reference<A>::type const *a_ptr = &a;
|
|
T r = std::numeric_limits<T>::infinity();
|
|
|
|
#pragma omp parallel for reduction(min:r)
|
|
for (std::size_t i = s; i < s+e; i++) {
|
|
MinOperatorReduction<N-1,T,false> op;
|
|
r = std::min(r, op.reduce((*a_ptr)[i], m[i]));
|
|
}
|
|
return r;
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct MinOperatorReduction<1,T,false> {
|
|
template<typename A,typename M>
|
|
static T reduce(const A& a, const M& m) {
|
|
std::size_t s = a.index_bases()[0], e = a.shape()[0];
|
|
T r = std::numeric_limits<T>::infinity();
|
|
for (std::size_t i = s; i < s+e; i++) {
|
|
if (m[i])
|
|
r = std::min(r, T(a[i]));
|
|
}
|
|
return r;
|
|
}
|
|
};
|
|
|
|
|
|
template<typename T>
|
|
struct MinOperatorReduction<1,T,true> {
|
|
template<typename A, typename M>
|
|
static T reduce(const A& a, const M& m) {
|
|
std::size_t s = a.index_bases()[0], e = a.shape()[0];
|
|
typename boost::remove_reference<A>::type const *a_ptr = &a;
|
|
T r = std::numeric_limits<T>::infinity();
|
|
|
|
#pragma omp parallel for reduction(min:r)
|
|
for (std::size_t i = s; i < s+e; i++) {
|
|
if (m[i])
|
|
r = std::min(r, T((*a_ptr)[i]));
|
|
}
|
|
return r;
|
|
}
|
|
};
|
|
|
|
|
|
template<typename T, bool parallel>
|
|
struct MinOperatorReduction<0,T,parallel> {
|
|
template<typename A>
|
|
static T reduce(const A& a) {
|
|
return a;
|
|
}
|
|
};
|
|
|
|
// ===============================
|
|
|
|
template<std::size_t N,typename T>
|
|
struct OperatorReduction<N,T,false> {
|
|
template<typename A, typename M>
|
|
static T reduce(const A& a, const M& m) {
|
|
std::size_t s = a.index_bases()[0], e = a.shape()[0];
|
|
T r = 0;
|
|
for (std::size_t i = s; i < s+e; i++) {
|
|
OperatorReduction<N-1,T,false> op;
|
|
r += op.reduce(a[i], m[i]);
|
|
}
|
|
return r;
|
|
}
|
|
};
|
|
|
|
template<std::size_t N,typename T>
|
|
struct OperatorReduction<N,T,true> {
|
|
template<typename A, typename M>
|
|
static T reduce(const A& a, const M& m) {
|
|
std::size_t s = a.index_bases()[0], e = a.shape()[0];
|
|
typename boost::remove_reference<A>::type const *a_ptr = &a;
|
|
T r = 0;
|
|
|
|
#pragma omp parallel for reduction(+:r)
|
|
for (std::size_t i = s; i < s+e; i++) {
|
|
OperatorReduction<N-1,T,false> op;
|
|
r += op.reduce((*a_ptr)[i], m[i]);
|
|
}
|
|
return r;
|
|
}
|
|
};
|
|
|
|
|
|
template<typename T>
|
|
struct OperatorReduction<1,T,false> {
|
|
template<typename A,typename M>
|
|
static T reduce(const A& a, const M& m) {
|
|
std::size_t s = a.index_bases()[0], e = a.shape()[0];
|
|
T r = 0;
|
|
for (std::size_t i = s; i < s+e; i++) {
|
|
if (m[i])
|
|
r += a[i];
|
|
}
|
|
return r;
|
|
}
|
|
};
|
|
|
|
|
|
template<typename T>
|
|
struct OperatorReduction<1,T,true> {
|
|
template<typename A, typename M>
|
|
static T reduce(const A& a, const M& m) {
|
|
std::size_t s = a.index_bases()[0], e = a.shape()[0];
|
|
typename boost::remove_reference<A>::type const *a_ptr = &a;
|
|
T r = 0;
|
|
|
|
#pragma omp parallel for reduction(+:r)
|
|
for (std::size_t i = s; i < s+e; i++) {
|
|
if (m[i])
|
|
r += (*a_ptr)[i];
|
|
}
|
|
return r;
|
|
}
|
|
};
|
|
|
|
|
|
template<typename T>
|
|
struct OperatorReduction<0,T,false> {
|
|
template<typename A>
|
|
static T reduce(const A& a) {
|
|
return a;
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct OperatorReduction<0,T,true> {
|
|
template<typename A>
|
|
static T reduce(const A& a) {
|
|
return a;
|
|
}
|
|
};
|
|
|
|
|
|
|
|
template<typename T,typename InArray,typename MaskArray>
|
|
typename std::enable_if<!std::is_same<MaskArray,bool>::value, T>::type
|
|
reduce_min(const InArray& A, const MaskArray& mask, bool openmp=true) {
|
|
typedef typename boost::remove_reference<InArray>::type PureArray;
|
|
if (openmp) {
|
|
MinOperatorReduction<InArray::dimensionality,T,true> op;
|
|
return op.template reduce(A, mask);
|
|
} else {
|
|
MinOperatorReduction<InArray::dimensionality,T,false> op;
|
|
return op.template reduce(A, mask);
|
|
}
|
|
}
|
|
|
|
template<typename T,typename InArray,typename MaskArray>
|
|
typename std::enable_if<!std::is_same<MaskArray,bool>::value, T>::type
|
|
reduce_max(const InArray& A, const MaskArray& mask, bool openmp=true) {
|
|
typedef typename boost::remove_reference<InArray>::type PureArray;
|
|
if (openmp) {
|
|
MaxOperatorReduction<InArray::dimensionality,T,true> op;
|
|
return op.template reduce(A, mask);
|
|
} else {
|
|
MaxOperatorReduction<InArray::dimensionality,T,false> op;
|
|
return op.template reduce(A, mask);
|
|
}
|
|
}
|
|
|
|
template<typename T,typename InArray,typename MaskArray>
|
|
typename std::enable_if<!std::is_same<MaskArray,bool>::value, T>::type
|
|
reduce_sum(const InArray& A, const MaskArray& mask, bool openmp=true) {
|
|
typedef typename boost::remove_reference<InArray>::type PureArray;
|
|
if (openmp) {
|
|
OperatorReduction<InArray::dimensionality,T,true> op;
|
|
return op.template reduce(A, mask);
|
|
} else {
|
|
OperatorReduction<InArray::dimensionality,T,false> op;
|
|
return op.template reduce(A, mask);
|
|
}
|
|
}
|
|
|
|
struct noMaskDummy {
|
|
template<typename... Args>
|
|
bool operator()(Args&&... t) const {
|
|
return true;
|
|
}
|
|
};
|
|
|
|
template<typename T,typename InArray>
|
|
T reduce_sum(const InArray& A, bool openmp=true) {
|
|
static_assert(DetectShaped<InArray>::Shaped, "Array has no shape");
|
|
return reduce_sum<T>(A, b_va_fused<bool,InArray::dimensionality>(noMaskDummy()), openmp);
|
|
}
|
|
|
|
template<typename T,typename InArray>
|
|
T reduce_min(const InArray& A, bool openmp=true) {
|
|
static_assert(DetectShaped<InArray>::Shaped, "Array has no shape");
|
|
return reduce_min<T>(A, b_va_fused<bool,InArray::dimensionality>(noMaskDummy()), openmp);
|
|
}
|
|
|
|
template<typename T,typename InArray>
|
|
T reduce_max(const InArray& A, bool openmp=true) {
|
|
static_assert(DetectShaped<InArray>::Shaped, "Array has no shape");
|
|
return reduce_max<T>(A, b_va_fused<bool,InArray::dimensionality>(noMaskDummy()), openmp);
|
|
}
|
|
|
|
|
|
}
|
|
|
|
using FUSE_details::reduce_sum;
|
|
using FUSE_details::reduce_min;
|
|
using FUSE_details::reduce_max;
|
|
};
|
|
|
|
#endif
|