callin-recv.cpp 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. #include "cxxcall.hpp"
  2. #include <cstring>
  3. #include <atomic>
  4. #include <cerrno>
  5. struct cb_fct
  6. {
  7. void (*fn) (cxxcall::cb_data *, void *);
  8. void *arg;
  9. };
  10. #if defined (__clang__) && defined (__APPLE__)
  11. extern "C" void sys_icache_invalidate (void *, size_t);
  12. #endif
  13. #if defined (CXXCALL_AMD64)
  14. #include "sysdeps/x86-64/callin-recv.cpp"
  15. #elif defined (CXXCALL_I386)
  16. #include "sysdeps/i386/callin-recv.cpp"
  17. #elif defined (CXXCALL_ARM64)
  18. #include "sysdeps/aarch64/callin-recv.cpp"
  19. #elif defined (CXXCALL_ARM32)
  20. #include "sysdeps/arm/callin-recv.cpp"
  21. #else
  22. # error "unsupported platform"
  23. #endif
  24. static unsigned int cxx_pagesize;
  25. #ifndef _WIN32
  26. #include <sys/mman.h>
  27. #include <unistd.h>
  28. static inline void
  29. cxx_init_pagesize ()
  30. {
  31. if (!cxx_pagesize)
  32. cxx_pagesize = sysconf (_SC_PAGESIZE);
  33. }
  34. static inline void*
  35. cxx_alloc_rwx (bool exec_p)
  36. {
  37. cxx_init_pagesize ();
  38. int prot = PROT_READ | PROT_WRITE | (exec_p ? PROT_EXEC : 0);
  39. void *ret = mmap (nullptr, cxx_pagesize, prot,
  40. MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
  41. return (ret == MAP_FAILED ? nullptr : ret);
  42. }
  43. static inline void
  44. cxx_yield_thr ()
  45. {
  46. usleep (1);
  47. }
  48. static inline bool
  49. cxx_mprot_rx (void *mem)
  50. {
  51. if (mprotect (mem, cxx_pagesize, PROT_READ | PROT_EXEC) < 0)
  52. return (false);
  53. #if defined (__clang__) && defined (__APPLE__)
  54. sys_icache_invalidate (mem, cxx_pagesize);
  55. #else
  56. __builtin___clear_cache ((char *)mem, (char *)mem + cxx_pagesize);
  57. #endif
  58. return (true);
  59. }
  60. static inline void
  61. cxx_dealloc (void *mem)
  62. {
  63. munmap (mem, cxx_pagesize);
  64. }
  65. #else
  66. # define WIN32_LEAN_AND_MEAN
  67. #include <windows.h>
  68. static inline void
  69. cxx_inig_pagesize ()
  70. {
  71. if (cxx_pagesize)
  72. return;
  73. SYSTEM_INFO info;
  74. GetSystemInfo (&info);
  75. cxx_pagesize = info.dwPagesize;
  76. }
  77. static inline void*
  78. cxx_alloc_rwx (bool exec_p)
  79. {
  80. cxx_init_pagesize ();
  81. DWORD prot = PAGE_READWRITE | (exec_p ? PAGE_EXECUTE : 0);
  82. return (VirtualAlloc (nullptr, cxx_pagesize,
  83. MEM_COMMIT | MEM_RESERVE, prot));
  84. }
  85. static inline void
  86. cxx_yield_thr ()
  87. {
  88. Sleep (1);
  89. }
  90. static inline bool
  91. cxx_mprot_rx (void *mem)
  92. {
  93. if (!VirtualProtect (mem, cxx_pagesize, PAGE_READ | PAGE_EXECUTE, nullptr))
  94. return (false);
  95. FlushInstructionCache (GetCurrentProcess (), mem, cxx_pagesize);
  96. return (true);
  97. }
  98. static inline void
  99. cxx_dealloc (void *mem)
  100. {
  101. VirtualFree (mem, 0, MEM_RELEASE);
  102. }
  103. #endif // _WIN32
  104. namespace cxxcall
  105. {
  106. #ifndef CXXCALL_DISABLE_WXPAGES
  107. union tramp_entry
  108. {
  109. tramp_entry *next;
  110. unsigned char buf[TRAMP_LEN + 2 * sizeof (void *)];
  111. std::max_align_t align;
  112. };
  113. static tramp_entry *tramps_freelist;
  114. static std::atomic<size_t> tramps_lock;
  115. /* We use a simple spinlock that sleeps on contention instead of the standard
  116. * mutex types in order to avoid (potentially) pulling libpthread, which can
  117. * slow down programs considerably. */
  118. static void
  119. tramps_lock_grab (std::atomic<size_t>& lk)
  120. {
  121. size_t zero = 0;
  122. if (lk.compare_exchange_weak (zero, 1,
  123. std::memory_order_relaxed, std::memory_order_acq_rel))
  124. return;
  125. for (zero = 0 ; ; zero = 0)
  126. {
  127. int nspins = 1000;
  128. while (lk.load (std::memory_order_relaxed) != 0 && --nspins)
  129. std::atomic_thread_fence (std::memory_order_acquire);
  130. if (lk.compare_exchange_weak (zero, 1,
  131. std::memory_order_relaxed, std::memory_order_acq_rel))
  132. break;
  133. cxx_yield_thr ();
  134. }
  135. }
  136. static void
  137. tramps_lock_drop (std::atomic<size_t>& lk)
  138. {
  139. lk.store (0, std::memory_order_release);
  140. }
  141. struct tramps_lock_guard
  142. {
  143. std::atomic<size_t>& lock;
  144. tramps_lock_guard (std::atomic<size_t>& lk) : lock (lk)
  145. {
  146. tramps_lock_grab (this->lock);
  147. }
  148. ~tramps_lock_guard ()
  149. {
  150. tramps_lock_drop (this->lock);
  151. }
  152. };
  153. static void*
  154. cb_data_raw_alloc ()
  155. {
  156. tramps_lock_guard g { tramps_lock };
  157. auto rv = tramps_freelist;
  158. if (rv)
  159. {
  160. tramps_freelist = rv->next;
  161. return (rv);
  162. }
  163. rv = (tramp_entry *)cxx_alloc_rwx (true);
  164. if (!rv)
  165. return (rv);
  166. void *end = (char *)rv + cxx_pagesize;
  167. auto tmp = rv + 1;
  168. for ( ; ; ++tmp)
  169. {
  170. if ((void *)(tmp + 1) >= end)
  171. break;
  172. tmp->next = tmp + 1;
  173. }
  174. tramps_freelist = rv + 1;
  175. return (rv);
  176. }
  177. void* cb_data::alloc (void (*fct) (cb_data *, void *), void *arg)
  178. {
  179. void *mem = cb_data_raw_alloc ();
  180. if (!mem)
  181. return (nullptr);
  182. cb_data::fill (mem, fct, arg);
  183. return (mem);
  184. }
  185. void cb_data::dealloc (void *mem)
  186. {
  187. if (!mem)
  188. return;
  189. auto tp = (tramp_entry *)mem;
  190. tramps_lock_guard g { tramps_lock };
  191. tp->next = tramps_freelist;
  192. tramps_freelist = tp;
  193. }
  194. #else
  195. /* For platforms with W^X, we take a simpler approach and just allocate a
  196. * full page for each callin. This may seem a bit wasteful, but it's much
  197. * easier to mantain and reason about, and doesn't have tricky multithreaded
  198. * problems, nor does it require an open file for multiple mappings. */
  199. void* cb_data::alloc (void (*fct) (cb_data *, void *), void *arg)
  200. {
  201. void *mem = cxx_alloc_rwx (false);
  202. if (!mem)
  203. return (mem);
  204. cb_data::fill (mem, fct, arg);
  205. if (!cxx_mprot_rx (mem))
  206. {
  207. // Preserve errno.
  208. int tmp = errno;
  209. cxx_dealloc (mem);
  210. errno = tmp;
  211. mem = nullptr;
  212. }
  213. return (mem);
  214. }
  215. void cb_data::dealloc (void *mem)
  216. {
  217. if (mem)
  218. cxx_dealloc (mem);
  219. }
  220. #endif // CXXCALL_DISABLE_WXPAGES
  221. } // namespace cxxcall