bench-from.cc 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. // -*- mode: c++; coding: utf-8 -*-
  2. // ra-ra/bench - Selection ops in ra::
  3. // (c) Daniel Llorens - 2015, 2017
  4. // This library is free software; you can redistribute it and/or modify it under
  5. // the terms of the GNU Lesser General Public License as published by the Free
  6. // Software Foundation; either version 3 of the License, or (at your option) any
  7. // later version.
  8. #include <iostream>
  9. #include <iomanip>
  10. #include <string>
  11. #include "ra/test.hh"
  12. #include "ra/bench.hh"
  13. using std::cout, std::endl, std::flush, ra::TestRecorder;
  14. using real = double;
  15. int main()
  16. {
  17. TestRecorder tr(cout);
  18. cout.precision(4);
  19. tr.section("rank1(rank1)");
  20. {
  21. auto rank1_test = [&tr](auto A_, int Asize, int Isize, int Istep, int N)
  22. {
  23. cout << "select " << Isize << " step " << Istep << " from " << Asize << endl;
  24. using Array1 = std::decay_t<decltype(A_)>;
  25. Array1 A = ra::iota(Asize);
  26. ra::Unique<int, 1> I = ra::iota(Isize)*Istep;
  27. Array1 B({Isize}, 0);
  28. auto II = I.data();
  29. auto AA = A.data();
  30. auto BB = B.data();
  31. Benchmark bm { N, 3 };
  32. auto report = [&](std::string const & tag, auto && bv)
  33. {
  34. tr.info(std::setw(5), std::fixed, bm.avg(bv)/B.size()/1e-9, " ns [", bm.stddev(bv)/B.size()/1e-9, "] ", tag)
  35. .test_eq(ra::iota(Isize)*Istep, B);
  36. };
  37. report("indexing on raw pointers",
  38. bm.run([&] {
  39. for (int i=0; i<Isize; ++i) {
  40. BB[i] = AA[II[i]];
  41. }
  42. }));
  43. report("vectorized selection",
  44. bm.run([&] {
  45. B = A(I);
  46. }));
  47. report("write out the indexing loop",
  48. bm.run([&] {
  49. for_each([&A](auto & b, auto i) { b = A(i); }, B, I);
  50. }));
  51. report("loop on scalar selection",
  52. bm.run([&]() {
  53. for (int i=0; i<Isize; ++i) {
  54. B(i) = A(I(i));
  55. }
  56. }));
  57. };
  58. tr.section("fixed rank");
  59. rank1_test(ra::Unique<real, 1>(), 10000, 500, 20, 5000);
  60. rank1_test(ra::Unique<real, 1>(), 1000, 50, 20, 10*5000);
  61. rank1_test(ra::Unique<real, 1>(), 100, 5, 20, 100*5000);
  62. rank1_test(ra::Unique<real, 1>(), 10000, 500, 2, 5000);
  63. rank1_test(ra::Unique<real, 1>(), 1000, 50, 2, 10*5000);
  64. rank1_test(ra::Unique<real, 1>(), 100, 5, 2, 100*5000);
  65. tr.section("var rank");
  66. rank1_test(ra::Unique<real>(), 10000, 500, 20, 5000);
  67. rank1_test(ra::Unique<real>(), 1000, 50, 20, 10*5000);
  68. rank1_test(ra::Unique<real>(), 100, 5, 20, 100*5000);
  69. rank1_test(ra::Unique<real>(), 10000, 500, 2, 5000);
  70. rank1_test(ra::Unique<real>(), 1000, 50, 2, 10*5000);
  71. rank1_test(ra::Unique<real>(), 100, 5, 2, 100*5000);
  72. }
  73. tr.section("rank2(rank1, rank1)");
  74. {
  75. auto rank1_11_test = [&tr](auto A_, int Asize, int Isize, int Istep, int N)
  76. {
  77. cout << "select " << Isize << " step " << Istep << " from " << Asize << endl;
  78. using Array2 = std::decay_t<decltype(A_)>;
  79. Array2 A({Asize, Asize}, ra::_0 + ra::_1);
  80. ra::Unique<int, 1> I = ra::iota(Isize)*Istep;
  81. Array2 B({Isize, Isize}, 0);
  82. auto II = I.data();
  83. auto AA = A.data();
  84. auto BB = B.data();
  85. Benchmark bm { N, 3 };
  86. auto report = [&](std::string const &tag, auto && bv)
  87. {
  88. tr.info(std::setw(5), std::fixed, bm.avg(bv)/B.size()/1e-9, " ns [", bm.stddev(bv)/B.size()/1e-9, "] ", tag)
  89. .test_eq(Istep*(ra::_0 + ra::_1), B);
  90. };
  91. report("2D indexing on raw pointers",
  92. bm.run([&] {
  93. for (int i=0; i<Isize; ++i) {
  94. for (int j=0; j<Isize; ++j) {
  95. BB[i*Isize + j] = AA[II[i]*Asize + II[j]];
  96. }
  97. }
  98. }));
  99. report("vectorized selection",
  100. bm.run([&] {
  101. B = A(I, I);
  102. }));
  103. };
  104. tr.section("fixed rank");
  105. rank1_11_test(ra::Unique<real, 2>(), 1000, 50, 20, 5000);
  106. rank1_11_test(ra::Unique<real, 2>(), 100, 5, 20, 10*10*5000);
  107. rank1_11_test(ra::Unique<real, 2>(), 1000, 50, 2, 5000);
  108. rank1_11_test(ra::Unique<real, 2>(), 100, 5, 2, 10*10*5000);
  109. rank1_11_test(ra::Unique<real, 2>(), 10, 5, 2, 10*10*5000);
  110. tr.section("var rank");
  111. rank1_11_test(ra::Unique<real>(), 1000, 50, 20, 5000);
  112. rank1_11_test(ra::Unique<real>(), 100, 5, 20, 10*10*5000);
  113. rank1_11_test(ra::Unique<real>(), 1000, 50, 2, 5000);
  114. rank1_11_test(ra::Unique<real>(), 100, 5, 2, 10*10*5000);
  115. rank1_11_test(ra::Unique<real>(), 10, 5, 2, 10*10*5000);
  116. }
  117. tr.section("rank3(rank1, rank1, rank1)");
  118. {
  119. auto rank1_111_test = [&tr](auto A_, int Asize, int Isize, int Istep, int N)
  120. {
  121. cout << "select " << Isize << " step " << Istep << " from " << Asize << endl;
  122. using Array3 = std::decay_t<decltype(A_)>;
  123. Array3 A({Asize, Asize, Asize}, 10000*ra::_0 + 100*ra::_1 + 1*ra::_2);
  124. ra::Unique<int, 1> I = ra::iota(Isize)*Istep;
  125. Array3 B({Isize, Isize, Isize}, 0);
  126. auto II = I.data();
  127. auto AA = A.data();
  128. auto BB = B.data();
  129. Benchmark bm { N, 3 };
  130. auto report = [&](std::string const &tag, auto && bv)
  131. {
  132. tr.info(std::setw(5), std::fixed, bm.avg(bv)/B.size()/1e-9, " ns [", bm.stddev(bv)/B.size()/1e-9, "] ", tag)
  133. .test_eq(Istep*(10000*ra::_0 + 100*ra::_1 + 1*ra::_2), B);
  134. };
  135. report("3D indexing on raw pointers",
  136. bm.run([&] {
  137. for (int i=0; i<Isize; ++i) {
  138. for (int j=0; j<Isize; ++j) {
  139. for (int k=0; k<Isize; ++k) {
  140. BB[k+Isize*(j+Isize*i)] = AA[II[k]+Asize*(II[j]+Asize*II[i])];
  141. }
  142. }
  143. }
  144. }));
  145. report("vectorized selection",
  146. bm.run([&] {
  147. B = A(I, I, I);
  148. }));
  149. };
  150. tr.section("fixed rank");
  151. rank1_111_test(ra::Unique<real, 3>(), 40, 20, 2, 2000);
  152. rank1_111_test(ra::Unique<real, 3>(), 100, 5, 20, 4*4*4*2000);
  153. rank1_111_test(ra::Unique<real, 3>(), 10, 5, 2, 4*4*4*2000);
  154. }
  155. tr.section("rank4(rank1, rank1, rank1, rank1)");
  156. {
  157. auto rank1_1111_test = [&tr](auto A_, int Asize, int Isize, int Istep, int N)
  158. {
  159. cout << "select " << Isize << " step " << Istep << " from " << Asize << endl;
  160. using Array4 = std::decay_t<decltype(A_)>;
  161. ra::Unique<real, 4> A(ra::Small<int, 4>(Asize), 1000000*ra::_0 + 10000*ra::_1 + 100*ra::_2 + 1*ra::_3);
  162. ra::Unique<int, 1> I = ra::iota(Isize)*Istep;
  163. Array4 B(ra::Small<int, 4>(Isize), 0);
  164. auto II = I.data();
  165. auto AA = A.data();
  166. auto BB = B.data();
  167. Benchmark bm { N, 3 };
  168. auto report = [&](std::string const &tag, auto && bv)
  169. {
  170. tr.info(std::setw(5), std::fixed, bm.avg(bv)/B.size()/1e-9, " ns [", bm.stddev(bv)/B.size()/1e-9, "] ", tag)
  171. .test_eq(Istep*(1000000*ra::_0 + 10000*ra::_1 + 100*ra::_2 + 1*ra::_3), B);
  172. };
  173. report("3D indexing on raw pointers",
  174. bm.run([&] {
  175. for (int i=0; i<Isize; ++i) {
  176. for (int j=0; j<Isize; ++j) {
  177. for (int k=0; k<Isize; ++k) {
  178. for (int l=0; l<Isize; ++l) {
  179. BB[l+Isize*(k+Isize*(j+Isize*i))] = AA[II[l]+Asize*(II[k]+Asize*(II[j]+Asize*II[i]))];
  180. }
  181. }
  182. }
  183. }
  184. }));
  185. report("vectorized selection",
  186. bm.run([&] {
  187. B = A(I, I, I, I);
  188. }));
  189. report("slice one axis at a time", // TODO one way A(i, i, i, i) could work
  190. bm.run([&] {
  191. for (int i=0; i<Isize; ++i) {
  192. for (int j=0; j<Isize; ++j) {
  193. for (int k=0; k<Isize; ++k) {
  194. B(i, j, k) = A(I[i], I[j], I[k])(I);
  195. }
  196. }
  197. }
  198. }));
  199. };
  200. tr.section("fixed rank");
  201. rank1_1111_test(ra::Unique<real, 4>(), 40, 20, 2, 100);
  202. rank1_1111_test(ra::Unique<real, 4>(), 10, 5, 2, 4*4*4*4*100);
  203. }
  204. return tr.summary();
  205. }