/*+ ARES/HADES/BORG Package -- ./libLSS/tools/fused_reduce.hpp Copyright (C) 2014-2020 Guilhem Lavaux Copyright (C) 2009-2020 Jens Jasche Additional contributions from: Guilhem Lavaux (2023) +*/ #ifndef __LIBLSS_FUSED_REDUCTION_HPP #define __LIBLSS_FUSED_REDUCTION_HPP #include #include // This include file defines the reduction operation on // virtual arrays as defined by fused_array.hpp // The goal is to be able to combine virtual arrays and // apply parallel reduction operation on it. // A straightforward example is given in test_fuse_reduce.cpp // // r = LibLSS::reduce_sum( // b_fused_idx ( // [](int i, int j)->int {return i*j;}, // extents[N][M] ) // ); // // Which computes a \sum_{i=0,j=0}^{i=N-1,j=M-1} i*j, with openmp. // However arrays can be folded in that. // // r = LibLSS::reduce_sum( // b_fused_idx ( // [](int i, int j)->int {return i*j;}, // extents[N][M] ) // ); // namespace LibLSS { namespace FUSE_details { template struct OperatorReduction {}; // ====================== // MAX OPERATOR REDUCTION template struct MaxOperatorReduction {}; template struct MaxOperatorReduction { template static T reduce(const A& a, const M& m) { std::size_t s = a.index_bases()[0], e = a.shape()[0]; T r = -std::numeric_limits::infinity(); for (std::size_t i = s; i < s+e; i++) { MaxOperatorReduction op; r = std::max(r, op.reduce(a[i], m[i])); } return r; } }; template struct MaxOperatorReduction { template static T reduce(const A& a, const M& m) { std::size_t s = a.index_bases()[0], e = a.shape()[0]; typename boost::remove_reference::type const *a_ptr = &a; T r = -std::numeric_limits::infinity(); #pragma omp parallel for reduction(max:r) for (std::size_t i = s; i < s+e; i++) { MaxOperatorReduction op; r = std::max(r, op.reduce((*a_ptr)[i], m[i])); } return r; } }; template struct MaxOperatorReduction<1,T,false> { template static T reduce(const A& a, const M& m) { std::size_t s = a.index_bases()[0], e = a.shape()[0]; T r = -std::numeric_limits::infinity(); for (std::size_t i = s; i < s+e; i++) { if (m[i]) r = std::max(r, T(a[i])); } return r; } }; template struct MaxOperatorReduction<1,T,true> { template static T reduce(const A& a, const M& m) { std::size_t s = a.index_bases()[0], e = a.shape()[0]; typename boost::remove_reference::type const *a_ptr = &a; T r = -std::numeric_limits::infinity(); #pragma omp parallel for reduction(max:r) for (std::size_t i = s; i < s+e; i++) { if (m[i]) r = std::max(r, T((*a_ptr)[i])); } return r; } }; template struct MaxOperatorReduction<0,T,parallel> { template static T reduce(const A& a) { return a; } }; // =============================== // // ====================== // MIN OPERATOR REDUCTION template struct MinOperatorReduction {}; template struct MinOperatorReduction { template static T reduce(const A& a, const M& m) { std::size_t s = a.index_bases()[0], e = a.shape()[0]; T r = std::numeric_limits::infinity(); for (std::size_t i = s; i < s+e; i++) { MinOperatorReduction op; r = std::min(r, op.reduce(a[i], m[i])); } return r; } }; template struct MinOperatorReduction { template static T reduce(const A& a, const M& m) { std::size_t s = a.index_bases()[0], e = a.shape()[0]; typename boost::remove_reference::type const *a_ptr = &a; T r = std::numeric_limits::infinity(); #pragma omp parallel for reduction(min:r) for (std::size_t i = s; i < s+e; i++) { MinOperatorReduction op; r = std::min(r, op.reduce((*a_ptr)[i], m[i])); } return r; } }; template struct MinOperatorReduction<1,T,false> { template static T reduce(const A& a, const M& m) { std::size_t s = a.index_bases()[0], e = a.shape()[0]; T r = std::numeric_limits::infinity(); for (std::size_t i = s; i < s+e; i++) { if (m[i]) r = std::min(r, T(a[i])); } return r; } }; template struct MinOperatorReduction<1,T,true> { template static T reduce(const A& a, const M& m) { std::size_t s = a.index_bases()[0], e = a.shape()[0]; typename boost::remove_reference::type const *a_ptr = &a; T r = std::numeric_limits::infinity(); #pragma omp parallel for reduction(min:r) for (std::size_t i = s; i < s+e; i++) { if (m[i]) r = std::min(r, T((*a_ptr)[i])); } return r; } }; template struct MinOperatorReduction<0,T,parallel> { template static T reduce(const A& a) { return a; } }; // =============================== template struct OperatorReduction { template static T reduce(const A& a, const M& m) { std::size_t s = a.index_bases()[0], e = a.shape()[0]; T r = 0; for (std::size_t i = s; i < s+e; i++) { OperatorReduction op; r += op.reduce(a[i], m[i]); } return r; } }; template struct OperatorReduction { template static T reduce(const A& a, const M& m) { std::size_t s = a.index_bases()[0], e = a.shape()[0]; typename boost::remove_reference::type const *a_ptr = &a; T r = 0; #pragma omp parallel for reduction(+:r) for (std::size_t i = s; i < s+e; i++) { OperatorReduction op; r += op.reduce((*a_ptr)[i], m[i]); } return r; } }; template struct OperatorReduction<1,T,false> { template static T reduce(const A& a, const M& m) { std::size_t s = a.index_bases()[0], e = a.shape()[0]; T r = 0; for (std::size_t i = s; i < s+e; i++) { if (m[i]) r += a[i]; } return r; } }; template struct OperatorReduction<1,T,true> { template static T reduce(const A& a, const M& m) { std::size_t s = a.index_bases()[0], e = a.shape()[0]; typename boost::remove_reference::type const *a_ptr = &a; T r = 0; #pragma omp parallel for reduction(+:r) for (std::size_t i = s; i < s+e; i++) { if (m[i]) r += (*a_ptr)[i]; } return r; } }; template struct OperatorReduction<0,T,false> { template static T reduce(const A& a) { return a; } }; template struct OperatorReduction<0,T,true> { template static T reduce(const A& a) { return a; } }; template typename std::enable_if::value, T>::type reduce_min(const InArray& A, const MaskArray& mask, bool openmp=true) { typedef typename boost::remove_reference::type PureArray; if (openmp) { MinOperatorReduction op; return op.template reduce(A, mask); } else { MinOperatorReduction op; return op.template reduce(A, mask); } } template typename std::enable_if::value, T>::type reduce_max(const InArray& A, const MaskArray& mask, bool openmp=true) { typedef typename boost::remove_reference::type PureArray; if (openmp) { MaxOperatorReduction op; return op.template reduce(A, mask); } else { MaxOperatorReduction op; return op.template reduce(A, mask); } } template typename std::enable_if::value, T>::type reduce_sum(const InArray& A, const MaskArray& mask, bool openmp=true) { typedef typename boost::remove_reference::type PureArray; if (openmp) { OperatorReduction op; return op.template reduce(A, mask); } else { OperatorReduction op; return op.template reduce(A, mask); } } struct noMaskDummy { template bool operator()(Args&&... t) const { return true; } }; template T reduce_sum(const InArray& A, bool openmp=true) { static_assert(DetectShaped::Shaped, "Array has no shape"); return reduce_sum(A, b_va_fused(noMaskDummy()), openmp); } template T reduce_min(const InArray& A, bool openmp=true) { static_assert(DetectShaped::Shaped, "Array has no shape"); return reduce_min(A, b_va_fused(noMaskDummy()), openmp); } template T reduce_max(const InArray& A, bool openmp=true) { static_assert(DetectShaped::Shaped, "Array has no shape"); return reduce_max(A, b_va_fused(noMaskDummy()), openmp); } } using FUSE_details::reduce_sum; using FUSE_details::reduce_min; using FUSE_details::reduce_max; }; #endif