ra-optimize.H 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. // (c) Daniel Llorens - 2015
  2. // This library is free software; you can redistribute it and/or modify it under
  3. // the terms of the GNU Lesser General Public License as published by the Free
  4. // Software Foundation; either version 3 of the License, or (at your option) any
  5. // later version.
  6. #ifndef RA_OPTIMIZE_H
  7. #define RA_OPTIMIZE_H
  8. /// @file ra-optimize.H
  9. /// @brief Naive optimization pass over ETs.
  10. #include "ra/ra-expr.H"
  11. #include "ra/ra-small.H"
  12. // no real downside to this.
  13. #ifndef RA_OPTIMIZE_IOTA
  14. #define RA_OPTIMIZE_IOTA 1
  15. #endif
  16. // benchmark shows it's not good by default; probably requires optimizing also +=, etc.
  17. #ifndef RA_OPTIMIZE_SMALLVECTOR
  18. #define RA_OPTIMIZE_SMALLVECTOR 0
  19. #endif
  20. namespace ra {
  21. template <class E, int a=0> inline decltype(auto) optimize(E && e) { return std::forward<E>(e); }
  22. // These are named to match & transform Expr<OPNAME, ...> later on, and used by operator+ etc.
  23. #define DEFINE_NAMED_BINARY_OP(OP, OPNAME) \
  24. struct OPNAME \
  25. { \
  26. template <class A, class B> \
  27. decltype(auto) operator()(A && a, B && b) { return std::forward<A>(a) OP std::forward<B>(b); } \
  28. };
  29. DEFINE_NAMED_BINARY_OP(+, plus)
  30. DEFINE_NAMED_BINARY_OP(-, minus)
  31. DEFINE_NAMED_BINARY_OP(*, times)
  32. DEFINE_NAMED_BINARY_OP(/, slash)
  33. #undef DEFINE_NAMED_BINARY_OP
  34. // @TODO need something to handle the & variants...
  35. #define ITEM(i) std::get<(i)>(e.t)
  36. #if RA_OPTIMIZE_IOTA==1
  37. #define IS_IOTA(I) std::is_same<std::decay_t<I>, Iota<typename I::T> >
  38. // The second condition is to disallow Iota(3)*float, since Iota doesn't support float yet. @TODO Eventually allow.
  39. template <class X>
  40. struct iota_operand
  41. {
  42. constexpr static bool value = ra::is_zero_or_scalar<X>::value
  43. && std::numeric_limits<std::decay_t<decltype(*(start(std::declval<X>()).flat()))> >::is_integer;
  44. };
  45. // --------------
  46. // plus
  47. // --------------
  48. template <class I, class J, enableif_<mp::And<IS_IOTA(I), iota_operand<J> >, int> =0>
  49. inline constexpr auto optimize(Expr<ra::plus, std::tuple<I, J> > && e)
  50. {
  51. return Iota<decltype(ITEM(0).org_+ITEM(1))> { ITEM(0).size_, ITEM(0).org_+ITEM(1), ITEM(0).stride_ };
  52. }
  53. template <class I, class J, enableif_<mp::And<iota_operand<I>, IS_IOTA(J)>, int> =0>
  54. inline constexpr auto optimize(Expr<ra::plus, std::tuple<I, J> > && e)
  55. {
  56. return Iota<decltype(ITEM(0)+ITEM(1).org_)> { ITEM(1).size_, ITEM(0)+ITEM(1).org_, ITEM(1).stride_ };
  57. }
  58. template <class I, class J, enableif_<mp::And<IS_IOTA(I), IS_IOTA(J)>, int> =0>
  59. inline constexpr auto optimize(Expr<ra::plus, std::tuple<I, J> > && e)
  60. {
  61. assert(ITEM(0).size_==ITEM(1).size_ && "size mismatch");
  62. return Iota<decltype(ITEM(0).org_+ITEM(1).org_)> { ITEM(0).size_, ITEM(0).org_+ITEM(1).org_, ITEM(0).stride_+ITEM(1).stride_ };
  63. }
  64. // --------------
  65. // minus
  66. // --------------
  67. template <class I, class J, enableif_<mp::And<IS_IOTA(I), iota_operand<J> >, int> =0>
  68. inline constexpr auto optimize(Expr<ra::minus, std::tuple<I, J> > && e)
  69. {
  70. return Iota<decltype(ITEM(0).org_-ITEM(1))> { ITEM(0).size_, ITEM(0).org_-ITEM(1), ITEM(0).stride_ };
  71. }
  72. template <class I, class J, enableif_<mp::And<iota_operand<I>, IS_IOTA(J)>, int> =0>
  73. inline constexpr auto optimize(Expr<ra::minus, std::tuple<I, J> > && e)
  74. {
  75. return Iota<decltype(ITEM(0)-ITEM(1).org_)> { ITEM(1).size_, ITEM(0)-ITEM(1).org_, -ITEM(1).stride_ };
  76. }
  77. template <class I, class J, enableif_<mp::And<IS_IOTA(I), IS_IOTA(J)>, int> =0>
  78. inline constexpr auto optimize(Expr<ra::minus, std::tuple<I, J> > && e)
  79. {
  80. assert(ITEM(0).size_==ITEM(1).size_ && "size mismatch");
  81. return Iota<decltype(ITEM(0).org_-ITEM(1).org_)> { ITEM(0).size_, ITEM(0).org_-ITEM(1).org_, ITEM(0).stride_-ITEM(1).stride_ };
  82. }
  83. // --------------
  84. // times
  85. // --------------
  86. template <class I, class J, enableif_<mp::And<IS_IOTA(I), iota_operand<J> >, int> =0>
  87. inline constexpr auto optimize(Expr<ra::times, std::tuple<I, J> > && e)
  88. {
  89. return Iota<decltype(ITEM(0).org_*ITEM(1))> { ITEM(0).size_, ITEM(0).org_*ITEM(1), ITEM(0).stride_*ITEM(1) };
  90. }
  91. template <class I, class J, enableif_<mp::And<iota_operand<I>, IS_IOTA(J)>, int> =0>
  92. inline constexpr auto optimize(Expr<ra::times, std::tuple<I, J> > && e)
  93. {
  94. return Iota<decltype(ITEM(0)*ITEM(1).org_)> { ITEM(1).size_, ITEM(0)*ITEM(1).org_, ITEM(0)*ITEM(1).stride_ };
  95. }
  96. #undef IS_IOTA
  97. #endif // RA_OPTIMIZE_IOTA
  98. #if RA_OPTIMIZE_SMALLVECTOR==1
  99. template <class T, int N> using extvector __attribute__((vector_size(N*sizeof(T)))) = T;
  100. inline auto
  101. optimize(ra::Expr<ra::plus,
  102. std::tuple<decltype(start(std::declval<ra::Small<double, 4> >())),
  103. decltype(start(std::declval<ra::Small<double, 4> >()))> > && e)
  104. {
  105. ra::Small<double, 4> val;
  106. (extvector<double, 4> &)(val) = ((extvector<double, 4> &)(*(ITEM(0).p)) + (extvector<double, 4> &)(*(ITEM(1).p)));
  107. return val;
  108. }
  109. inline auto
  110. optimize(ra::Expr<ra::times,
  111. std::tuple<decltype(start(std::declval<ra::Small<double, 4> >())),
  112. decltype(start(std::declval<ra::Small<double, 4> >()))> > && e)
  113. {
  114. ra::Small<double, 4> val;
  115. (extvector<double, 4> &)(val) = ((extvector<double, 4> &)(*(ITEM(0).p)) * (extvector<double, 4> &)(*(ITEM(1).p)));
  116. return val;
  117. }
  118. #endif // RA_OPTIMIZE_SMALLVECTOR
  119. #undef ITEM
  120. } // namespace ra
  121. #endif // RA_OPTIMIZE_H