optimize.hh 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. // -*- mode: c++; coding: utf-8 -*-
  2. // ra-ra - Naive optimization pass over expression templates.
  3. // (c) Daniel Llorens - 2015-2023
  4. // This library is free software; you can redistribute it and/or modify it under
  5. // the terms of the GNU Lesser General Public License as published by the Free
  6. // Software Foundation; either version 3 of the License, or (at your option) any
  7. // later version.
  8. #pragma once
  9. #include "small.hh"
  10. namespace ra {
  11. template <class E> constexpr decltype(auto) optimize(E && e) { return std::forward<E>(e); }
  12. // FIXME only reduces iota exprs as op'ed on in operators.hh, not a tree like WithLen does.
  13. #if RA_DO_OPT_IOTA==1
  14. // TODO maybe don't opt iota(int)*real -> iota(real) since a+a+... != n*a
  15. template <class X> constexpr bool iota_op = ra::is_zero_or_scalar<X> && std::is_arithmetic_v<value_t<X>>;
  16. // TODO need something to handle the & variants...
  17. #define ITEM(i) std::get<(i)>(e.t)
  18. // Make ct len iotas if the argument has ct len.
  19. template <class E>
  20. constexpr auto
  21. len0(E && e)
  22. {
  23. if constexpr (DIM_ANY==e.len_s(0)) {
  24. return e.len(0);
  25. } else {
  26. return int_c<e.len_s(0)>();
  27. }
  28. }
  29. // --------------
  30. // plus
  31. // --------------
  32. template <class I, class J> requires (is_iota<I> && iota_op<J>)
  33. constexpr auto
  34. optimize(Expr<std::plus<>, std::tuple<I, J>> && e)
  35. {
  36. return iota(len0(ITEM(0)), ITEM(0).i+ITEM(1), ITEM(0).gets());
  37. }
  38. template <class I, class J> requires (iota_op<I> && is_iota<J>)
  39. constexpr auto
  40. optimize(Expr<std::plus<>, std::tuple<I, J>> && e)
  41. {
  42. return iota(len0(ITEM(1)), ITEM(0)+ITEM(1).i, ITEM(1).gets());
  43. }
  44. template <class I, class J> requires (is_iota<I> && is_iota<J>)
  45. constexpr auto
  46. optimize(Expr<std::plus<>, std::tuple<I, J>> && e)
  47. {
  48. return iota(len0(e), ITEM(0).i+ITEM(1).i, ITEM(0).gets()+ITEM(1).gets());
  49. }
  50. // --------------
  51. // minus
  52. // --------------
  53. template <class I, class J> requires (is_iota<I> && iota_op<J>)
  54. constexpr auto
  55. optimize(Expr<std::minus<>, std::tuple<I, J>> && e)
  56. {
  57. return iota(len0(ITEM(0)), ITEM(0).i-ITEM(1), ITEM(0).gets());
  58. }
  59. template <class I, class J> requires (iota_op<I> && is_iota<J>)
  60. constexpr auto
  61. optimize(Expr<std::minus<>, std::tuple<I, J>> && e)
  62. {
  63. return iota(len0(ITEM(1)), ITEM(0)-ITEM(1).i, -ITEM(1).gets());
  64. }
  65. template <class I, class J> requires (is_iota<I> && is_iota<J>)
  66. constexpr auto
  67. optimize(Expr<std::minus<>, std::tuple<I, J>> && e)
  68. {
  69. return iota(len0(e), ITEM(0).i-ITEM(1).i, ITEM(0).gets()-ITEM(1).gets());
  70. }
  71. // --------------
  72. // times
  73. // --------------
  74. template <class I, class J> requires (is_iota<I> && iota_op<J>)
  75. constexpr auto
  76. optimize(Expr<std::multiplies<>, std::tuple<I, J>> && e)
  77. {
  78. return iota(len0(ITEM(0)), ITEM(0).i*ITEM(1), ITEM(0).gets()*ITEM(1));
  79. }
  80. template <class I, class J> requires (iota_op<I> && is_iota<J>)
  81. constexpr auto
  82. optimize(Expr<std::multiplies<>, std::tuple<I, J>> && e)
  83. {
  84. return iota(len0(ITEM(1)), ITEM(0)*ITEM(1).i, ITEM(0)*ITEM(1).gets());
  85. }
  86. // --------------
  87. // negate
  88. // --------------
  89. template <class I> requires (is_iota<I>)
  90. constexpr auto
  91. optimize(Expr<std::negate<>, std::tuple<I>> && e)
  92. {
  93. return iota(len0(ITEM(0)), -ITEM(0).i, -ITEM(0).gets());
  94. }
  95. #endif // RA_DO_OPT_IOTA
  96. #if RA_DO_OPT_SMALLVECTOR==1
  97. // FIXME find a way to peel qualifiers from parameter type of start(), to ignore SmallBase<SmallArray> vs SmallBase<SmallView> or const vs nonconst.
  98. template <class A, class T, dim_t N> constexpr bool match_smallvector =
  99. std::is_same_v<std::decay_t<A>, typename ra::Small<T, N>::template iterator<0>>
  100. || std::is_same_v<std::decay_t<A>, typename ra::Small<T, N>::template const_iterator<0>>;
  101. static_assert(match_smallvector<ra::CellSmall<ra::SmallBase<ra::SmallView, double, mp::int_list<4>, mp::int_list<1>>, 0>,
  102. double, 4>);
  103. #define RA_OPT_SMALLVECTOR_OP(OP, NAME, T, N) \
  104. template <class A, class B> \
  105. requires (match_smallvector<A, T, N> && match_smallvector<B, T, N>) \
  106. constexpr auto \
  107. optimize(ra::Expr<NAME, std::tuple<A, B>> && e) \
  108. { \
  109. alignas (alignof(extvector<T, N>)) ra::Small<T, N> val; \
  110. *(extvector<T, N> *)(&val) = *(extvector<T, N> *)((ITEM(0).c.p)) OP *(extvector<T, N> *)((ITEM(1).c.p)); \
  111. return val; \
  112. }
  113. #define RA_OPT_SMALLVECTOR_OP_FUNS(T, N) \
  114. static_assert(0==alignof(ra::Small<T, N>) % alignof(extvector<T, N>)); \
  115. RA_OPT_SMALLVECTOR_OP(+, std::plus<>, T, N) \
  116. RA_OPT_SMALLVECTOR_OP(-, std::minus<>, T, N) \
  117. RA_OPT_SMALLVECTOR_OP(/, std::divides<>, T, N) \
  118. RA_OPT_SMALLVECTOR_OP(*, std::multiplies<>, T, N)
  119. #define RA_OPT_SMALLVECTOR_OP_SIZES(T) \
  120. RA_OPT_SMALLVECTOR_OP_FUNS(T, 2) \
  121. RA_OPT_SMALLVECTOR_OP_FUNS(T, 4) \
  122. RA_OPT_SMALLVECTOR_OP_FUNS(T, 8)
  123. RA_OPT_SMALLVECTOR_OP_SIZES(double)
  124. RA_OPT_SMALLVECTOR_OP_SIZES(float)
  125. #undef RA_OPT_SMALLVECTOR_OP_SIZES
  126. #undef RA_OPT_SMALLVECTOR_OP_FUNS
  127. #undef RA_OPT_SMALLVECTOR_OP_OP
  128. #endif // RA_DO_OPT_SMALLVECTOR
  129. #undef ITEM
  130. } // namespace ra