optimize.hh 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. // -*- mode: c++; coding: utf-8 -*-
  2. // ra-ra - Naive optimization pass over expression templates.
  3. // (c) Daniel Llorens - 2015-2023
  4. // This library is free software; you can redistribute it and/or modify it under
  5. // the terms of the GNU Lesser General Public License as published by the Free
  6. // Software Foundation; either version 3 of the License, or (at your option) any
  7. // later version.
  8. #pragma once
  9. #include "small.hh"
  10. namespace ra {
  11. template <class E> constexpr decltype(auto) optimize(E && e) { return std::forward<E>(e); }
  12. // These are named to match & transform Expr<OPNAME, ...> later on, and used by operator+ etc.
  13. #define DEFINE_NAMED_BINARY_OP(OP, OPNAME) \
  14. struct OPNAME \
  15. { \
  16. template <class A, class B> \
  17. constexpr decltype(auto) \
  18. operator()(A && a, B && b) const { return std::forward<A>(a) OP std::forward<B>(b); } \
  19. };
  20. // FIXME don't know why gcc 12.1 flags this. See also Expr::Flat
  21. #pragma GCC diagnostic push
  22. #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
  23. DEFINE_NAMED_BINARY_OP(+, plus)
  24. DEFINE_NAMED_BINARY_OP(-, minus)
  25. DEFINE_NAMED_BINARY_OP(*, times)
  26. DEFINE_NAMED_BINARY_OP(/, slash)
  27. #undef DEFINE_NAMED_BINARY_OP
  28. #pragma GCC diagnostic pop
  29. // TODO need something to handle the & variants...
  30. #define ITEM(i) std::get<(i)>(e.t)
  31. #if RA_DO_OPT_IOTA==1
  32. // TODO iota(int)*real is not opt to iota(real) since a+a+... != n*a.
  33. template <class X> constexpr bool iota_op = ra::is_zero_or_scalar<X> && std::numeric_limits<value_t<X>>::is_integer;
  34. // --------------
  35. // plus
  36. // --------------
  37. template <class I, class J> requires (is_iota<I> && iota_op<J>)
  38. constexpr auto
  39. optimize(Expr<ra::plus, std::tuple<I, J>> && e)
  40. {
  41. return ITEM(0).set(ITEM(0).i + ITEM(1));
  42. }
  43. template <class I, class J> requires (iota_op<I> && is_iota<J>)
  44. constexpr auto
  45. optimize(Expr<ra::plus, std::tuple<I, J>> && e)
  46. {
  47. return ITEM(1).set(ITEM(1).i + ITEM(0));
  48. }
  49. template <class I, class J> requires (is_iota<I> && is_iota<J>)
  50. constexpr auto
  51. optimize(Expr<ra::plus, std::tuple<I, J>> && e)
  52. {
  53. return iota(e.len(0), ITEM(0).i+ITEM(1).i, ITEM(0).gets()+ITEM(1).gets());
  54. }
  55. // --------------
  56. // minus
  57. // --------------
  58. template <class I, class J> requires (is_iota<I> && iota_op<J>)
  59. constexpr auto
  60. optimize(Expr<ra::minus, std::tuple<I, J>> && e)
  61. {
  62. return ITEM(0).set(ITEM(0).i - ITEM(1));
  63. }
  64. template <class I, class J> requires (iota_op<I> && is_iota<J>)
  65. constexpr auto
  66. optimize(Expr<ra::minus, std::tuple<I, J>> && e)
  67. {
  68. return iota(e.len(0), ITEM(0)-ITEM(1).i, -ITEM(1).gets());
  69. }
  70. template <class I, class J> requires (is_iota<I> && is_iota<J>)
  71. constexpr auto
  72. optimize(Expr<ra::minus, std::tuple<I, J>> && e)
  73. {
  74. return iota(e.len(0), ITEM(0).i-ITEM(1).i, ITEM(0).gets()-ITEM(1).gets());
  75. }
  76. // --------------
  77. // times
  78. // --------------
  79. template <class I, class J> requires (is_iota<I> && iota_op<J>)
  80. constexpr auto
  81. optimize(Expr<ra::times, std::tuple<I, J>> && e)
  82. {
  83. return iota(e.len(0), ITEM(0).i*ITEM(1), ITEM(0).gets()*ITEM(1));
  84. }
  85. template <class I, class J> requires (iota_op<I> && is_iota<J>)
  86. constexpr auto
  87. optimize(Expr<ra::times, std::tuple<I, J>> && e)
  88. {
  89. return iota(e.len(0), ITEM(0)*ITEM(1).i, ITEM(0)*ITEM(1).gets());
  90. }
  91. #endif // RA_DO_OPT_IOTA
  92. #if RA_DO_OPT_SMALLVECTOR==1
  93. // FIXME find a way to peel qualifiers from parameter type of start(), to ignore SmallBase<SmallArray> vs SmallBase<SmallView> or const vs nonconst.
  94. template <class A, class T, dim_t N> constexpr bool match_smallvector =
  95. std::is_same_v<std::decay_t<A>, typename ra::Small<T, N>::template iterator<0>>
  96. || std::is_same_v<std::decay_t<A>, typename ra::Small<T, N>::template const_iterator<0>>;
  97. static_assert(match_smallvector<ra::CellSmall<ra::SmallBase<ra::SmallView, double, mp::int_list<4>, mp::int_list<1>>, 0>,
  98. double, 4>);
  99. #define RA_OPT_SMALLVECTOR_OP(OP, NAME, T, N) \
  100. template <class A, class B> \
  101. requires (match_smallvector<A, T, N> && match_smallvector<B, T, N>) \
  102. constexpr auto \
  103. optimize(ra::Expr<NAME, std::tuple<A, B>> && e) \
  104. { \
  105. alignas (alignof(extvector<T, N>)) ra::Small<T, N> val; \
  106. *(extvector<T, N> *)(&val) = *(extvector<T, N> *)((ITEM(0).c.p)) OP *(extvector<T, N> *)((ITEM(1).c.p)); \
  107. return val; \
  108. }
  109. #define RA_OPT_SMALLVECTOR_OP_FUNS(T, N) \
  110. static_assert(0==alignof(ra::Small<T, N>) % alignof(extvector<T, N>)); \
  111. RA_OPT_SMALLVECTOR_OP(+, ra::plus, T, N) \
  112. RA_OPT_SMALLVECTOR_OP(-, ra::minus, T, N) \
  113. RA_OPT_SMALLVECTOR_OP(/, ra::slash, T, N) \
  114. RA_OPT_SMALLVECTOR_OP(*, ra::times, T, N)
  115. #define RA_OPT_SMALLVECTOR_OP_SIZES(T) \
  116. RA_OPT_SMALLVECTOR_OP_FUNS(T, 2) \
  117. RA_OPT_SMALLVECTOR_OP_FUNS(T, 4) \
  118. RA_OPT_SMALLVECTOR_OP_FUNS(T, 8)
  119. RA_OPT_SMALLVECTOR_OP_SIZES(double)
  120. RA_OPT_SMALLVECTOR_OP_SIZES(float)
  121. #undef RA_OPT_SMALLVECTOR_OP_SIZES
  122. #undef RA_OPT_SMALLVECTOR_OP_FUNS
  123. #undef RA_OPT_SMALLVECTOR_OP_OP
  124. #endif // RA_DO_OPT_SMALLVECTOR
  125. #undef ITEM
  126. } // namespace ra