xatomic.hpp 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. /* Declarations for atomic operations.
  2. This file is part of xrcu.
  3. xrcu is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <https://www.gnu.org/licenses/>. */
  13. #ifndef __XRCU_XATOMIC_HPP__
  14. #define __XRCU_XATOMIC_HPP__ 1
  15. #include <cstdint>
  16. /*
  17. * This file defines an interface for atomic operations that isn't (quite)
  18. * achievable with the C++ standard atomic API. Basically, instead of using
  19. * a template class, we use raw pointers.
  20. *
  21. * This interface is needed because it is not generally possible to get
  22. * a pointer to the underlying integer in the std::atomic API (it may not
  23. * even exist as such).
  24. *
  25. * While we are at it, we also define a few additional operations that are
  26. * not present in the standard (double CAS, atomic spin).
  27. *
  28. * Note that these aren't template functions; we only require these atomic
  29. * ops to work on pointer-sized values, so we don't bother with anything else.
  30. */
  31. #if (defined (__GNUC__) && (__GNUC__ > 4 || \
  32. (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) || (defined (__clang__) && \
  33. defined (__clang_major__) && (__clang_major__ >= 4 || \
  34. (__clang_major__ == 3 && __clang_minor__ >= 8)))
  35. namespace xrcu
  36. {
  37. inline uintptr_t
  38. xatomic_cas (uintptr_t *ptr, uintptr_t exp, uintptr_t nval)
  39. {
  40. __atomic_compare_exchange_n (ptr, &exp, nval, 0,
  41. __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
  42. return (exp);
  43. }
  44. inline uintptr_t
  45. xatomic_or (uintptr_t *ptr, uintptr_t val)
  46. {
  47. return (__atomic_fetch_or (ptr, val, __ATOMIC_ACQ_REL));
  48. }
  49. inline void
  50. xatomic_and (uintptr_t *ptr, uintptr_t val)
  51. {
  52. (void)__atomic_and_fetch (ptr, val, __ATOMIC_ACQ_REL);
  53. }
  54. inline uintptr_t
  55. xatomic_swap (uintptr_t *ptr, uintptr_t val)
  56. {
  57. return (__atomic_exchange_n (ptr, val, __ATOMIC_ACQ_REL));
  58. }
  59. inline uintptr_t
  60. xatomic_add (uintptr_t *ptr, intptr_t val)
  61. {
  62. return (__atomic_fetch_add (ptr, val, __ATOMIC_ACQ_REL));
  63. }
  64. #else
  65. #include <atomic>
  66. static_assert (sizeof (uintptr_t) == sizeof (std::atomic_uintptr_t) &&
  67. alignof (uintptr_t) == alignof (std::atomic_uintptr_t),
  68. "unsupported compiler (uintptr_t and atomic_uintptr_t mismatch)");
  69. namespace xrcu
  70. {
  71. #define AS_ATOMIC(x) ((std::atomic_uintptr_t *)(x))
  72. inline uintptr_t
  73. xatomic_cas (uintptr_t *ptr, uintptr_t exp, uintptr_t nval)
  74. {
  75. AS_ATOMIC(ptr)->compare_exchange_weak (exp, nval,
  76. std::memory_order_acq_rel, std::memory_order_relaxed);
  77. return (exp);
  78. }
  79. inline uintptr_t
  80. xatomic_swap (uintptr_t *ptr, uintptr_t val)
  81. {
  82. return (AS_ATOMIC(ptr)->exchange (val, std::memory_order_acq_rel));
  83. }
  84. inline uintptr_t
  85. xatomic_add (uintptr_t *ptr, intptr_t val)
  86. {
  87. return (AS_ATOMIC(ptr)->fetch_add (val, std::memory_order_acq_rel));
  88. }
  89. #undef AS_ATOMIC
  90. inline uintptr_t
  91. xatomic_or (uintptr_t *ptr, uintptr_t val)
  92. {
  93. while (true)
  94. {
  95. uintptr_t ret = *ptr;
  96. if (xatomic_cas (ptr, ret, ret | val) == ret)
  97. return (ret);
  98. xatomic_spin_nop ();
  99. }
  100. }
  101. inline void
  102. xatomic_and (uintptr_t *ptr, uintptr_t val)
  103. {
  104. while (true)
  105. {
  106. uintptr_t ret = *ptr;
  107. if (xatomic_cas (ptr, ret, ret & val) == ret)
  108. return;
  109. xatomic_spin_nop ();
  110. }
  111. }
  112. #endif
  113. #if defined (__GNUC__)
  114. # if defined (__i386__) || defined (__x86_64__)
  115. inline void
  116. xatomic_spin_nop ()
  117. {
  118. __asm__ __volatile__ ("pause" : : : "memory");
  119. }
  120. # elif defined (__aarch64__) || defined (__arm__)
  121. inline void
  122. xatomic_spin_nop ()
  123. {
  124. __asm__ __volatile__ ("wfe" : : : "memory");
  125. }
  126. # else
  127. inline void
  128. xatomic_spin_nop ()
  129. {
  130. __atomic_thread_fence (__ATOMIC_ACQUIRE);
  131. }
  132. # endif
  133. #else
  134. inline void
  135. xatomic_spin_nop ()
  136. {
  137. std::atomic_thread_fence (std::memory_order_acquire);
  138. }
  139. #endif
  140. inline bool
  141. xatomic_cas_bool (uintptr_t *ptr, uintptr_t exp, uintptr_t nval)
  142. {
  143. return (xatomic_cas (ptr, exp, nval) == exp);
  144. }
  145. // Try to define double-width CAS.
  146. #if defined (__GNUC__)
  147. # if defined (__amd64) || defined (__amd64__) || \
  148. defined (__x86_64) || defined (__x86_64__)
  149. # define XRCU_HAVE_XATOMIC_DCAS
  150. # if defined (_ILP32) || defined (__ILP32__)
  151. inline bool
  152. xatomic_dcas_bool (uintptr_t *ptr, uintptr_t elo,
  153. uintptr_t ehi, uintptr_t nlo, uintptr_t nhi)
  154. {
  155. uint64_t exp = ((uint64_t)ehi << 32) | elo;
  156. uint64_t nval = ((uint64_t)nhi << 32) | nlo;
  157. return (__atomic_compare_exchange_n ((uint64_t *)ptr,
  158. &exp, nval, 0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED));
  159. }
  160. # else
  161. inline bool
  162. xatomic_dcas_bool (uintptr_t *ptr, uintptr_t elo,
  163. uintptr_t ehi, uintptr_t nlo, uintptr_t nhi)
  164. {
  165. char r;
  166. __asm__ __volatile__
  167. (
  168. "lock; cmpxchg16b %0\n\t"
  169. "setz %1"
  170. : "+m" (*ptr), "=q" (r)
  171. : "d" (ehi), "a" (elo),
  172. "c" (nhi), "b" (nlo)
  173. : "memory"
  174. );
  175. return ((bool)r);
  176. }
  177. # endif // ILP32
  178. # elif defined (__i386) || defined (__i386__)
  179. # define XRCU_HAVE_XATOMIC_DCAS
  180. # if defined (__PIC__) && __GNUC__ < 5
  181. inline bool
  182. xatomic_dcas_bool (uintptr_t *ptr, uintptr_t elo,
  183. uintptr_t ehi, uintptr_t nlo, uintptr_t nhi)
  184. {
  185. uintptr_t s;
  186. char r;
  187. __asm__ __volatile__
  188. (
  189. "movl %%ebx, %2\n\t"
  190. "leal %0, %%edi\n\t"
  191. "movl %7, %%ebx\n\t"
  192. "lock; cmpxchg8b (%%edi)\n\t"
  193. "movl %2, %%ebx\n\t"
  194. "setz %1"
  195. : "=m" (*ptr), "=a" (r), "=m" (s)
  196. : "m" (*ptr), "d" (ehi), "a" (elo),
  197. "c" (nhi), "m" (nlo)
  198. : "%edi", "memory"
  199. );
  200. return ((bool)r);
  201. }
  202. # else
  203. inline bool
  204. xatomic_dcas_bool (uintptr_t *ptr, uintptr_t elo,
  205. uintptr_t ehi, uintptr_t nlo, uintptr_t nhi)
  206. {
  207. char r;
  208. __asm__ __volatile__
  209. (
  210. "lock; cmpxchg8b %0\n\t"
  211. "setz %1"
  212. : "+m" (*ptr), "=a" (r)
  213. : "d" (ehi), "a" (elo),
  214. "c" (nhi), "b" (nlo)
  215. : "memory"
  216. );
  217. return ((bool)r);
  218. }
  219. # endif // PIC.
  220. # elif (defined (__arm__) || defined (__thumb__)) && \
  221. ((!defined (__thumb__) || (defined (__thumb2__) && \
  222. !defined (__ARM_ARCH_7__)) && !defined (__ARCH_ARM_7M__) && \
  223. !defined (__ARM_ARCH_7EM__)) && (!defined (__clang__) || \
  224. (__clang_major__ == 3 && __clang_minor__ >= 3)))
  225. # define XRCU_HAVE_XATOMIC_DCAS
  226. inline bool
  227. xatomic_dcas_bool (uintptr_t *ptr, uintptr_t elo,
  228. uintptr_t ehi, uintptr_t nlo, uintptr_t nhi)
  229. {
  230. uint64_t qv = ((uint64_t)ehi << 32) | elo;
  231. uint64_t nv = ((uint64_t)nhi << 32) | nlo;
  232. while (true)
  233. {
  234. uint64_t tmp;
  235. __asm__ __volatile__
  236. (
  237. "ldrexd %0, %H0, [%1]"
  238. : "=&r" (tmp) : "r" (ptr)
  239. );
  240. if (tmp != qv)
  241. return (false);
  242. int r;
  243. __asm__ __volatile__
  244. (
  245. "strexd %0, %3, %H3, [%2]"
  246. : "=&r" (r), "+m" (*ptr)
  247. : "r" (ptr), "r" (nv)
  248. : "cc"
  249. );
  250. if (r == 0)
  251. return (true);
  252. }
  253. }
  254. # elif defined (__aarch64__)
  255. # define XRCU_HAVE_XATOMIC_DCAS
  256. inline bool
  257. xatomic_dcas_bool (uintptr_t *ptr, uintptr_t elo,
  258. uintptr_t ehi, uintptr_t nlo, uintptr_t nhi)
  259. {
  260. while (true)
  261. {
  262. uintptr_t t1, t2;
  263. __asm__ __volatile__
  264. (
  265. "ldaxp %0, %1, %2"
  266. : "=&r" (t1), "=&r" (t2)
  267. : "Q" (*ptr)
  268. );
  269. if (t1 != elo || t2 != ehi)
  270. return (false);
  271. int r;
  272. __asm__ __volatile__
  273. (
  274. "stxp %w0, %2, %3, %1"
  275. : "=&r" (r), "=Q" (*ptr)
  276. : "r" (nlo), "r" (nhi)
  277. );
  278. if (r == 0)
  279. return (true);
  280. }
  281. }
  282. # endif
  283. #endif
  284. } // namespace xrcu
  285. #endif