123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277 |
- #include "cxxcall.hpp"
- #include <cstring>
- #include <atomic>
- #include <cerrno>
- struct cb_fct
- {
- void (*fn) (cxxcall::cb_data *, void *);
- void *arg;
- };
- #if defined (__clang__) && defined (__APPLE__)
- extern "C" void sys_icache_invalidate (void *, size_t);
- #endif
- #if defined (CXXCALL_AMD64)
- #include "sysdeps/x86-64/callin-recv.cpp"
- #elif defined (CXXCALL_I386)
- #include "sysdeps/i386/callin-recv.cpp"
- #elif defined (CXXCALL_ARM64)
- #include "sysdeps/aarch64/callin-recv.cpp"
- #elif defined (CXXCALL_ARM32)
- #include "sysdeps/arm/callin-recv.cpp"
- #else
- # error "unsupported platform"
- #endif
- static unsigned int cxx_pagesize;
- #ifndef _WIN32
- #include <sys/mman.h>
- #include <unistd.h>
- static inline void
- cxx_init_pagesize ()
- {
- if (!cxx_pagesize)
- cxx_pagesize = sysconf (_SC_PAGESIZE);
- }
- static inline void*
- cxx_alloc_rwx (bool exec_p)
- {
- cxx_init_pagesize ();
- int prot = PROT_READ | PROT_WRITE | (exec_p ? PROT_EXEC : 0);
- void *ret = mmap (nullptr, cxx_pagesize, prot,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- return (ret == MAP_FAILED ? nullptr : ret);
- }
- static inline void
- cxx_yield_thr ()
- {
- usleep (1);
- }
- static inline bool
- cxx_mprot_rx (void *mem)
- {
- if (mprotect (mem, cxx_pagesize, PROT_READ | PROT_EXEC) < 0)
- return (false);
- #if defined (__clang__) && defined (__APPLE__)
- sys_icache_invalidate (mem, cxx_pagesize);
- #else
- __builtin___clear_cache ((char *)mem, (char *)mem + cxx_pagesize);
- #endif
- return (true);
- }
- static inline void
- cxx_dealloc (void *mem)
- {
- munmap (mem, cxx_pagesize);
- }
- #else
- # define WIN32_LEAN_AND_MEAN
- #include <windows.h>
- static inline void
- cxx_inig_pagesize ()
- {
- if (cxx_pagesize)
- return;
- SYSTEM_INFO info;
- GetSystemInfo (&info);
- cxx_pagesize = info.dwPagesize;
- }
- static inline void*
- cxx_alloc_rwx (bool exec_p)
- {
- cxx_init_pagesize ();
- DWORD prot = PAGE_READWRITE | (exec_p ? PAGE_EXECUTE : 0);
- return (VirtualAlloc (nullptr, cxx_pagesize,
- MEM_COMMIT | MEM_RESERVE, prot));
- }
- static inline void
- cxx_yield_thr ()
- {
- Sleep (1);
- }
- static inline bool
- cxx_mprot_rx (void *mem)
- {
- if (!VirtualProtect (mem, cxx_pagesize, PAGE_READ | PAGE_EXECUTE, nullptr))
- return (false);
- FlushInstructionCache (GetCurrentProcess (), mem, cxx_pagesize);
- return (true);
- }
- static inline void
- cxx_dealloc (void *mem)
- {
- VirtualFree (mem, 0, MEM_RELEASE);
- }
- #endif // _WIN32
- namespace cxxcall
- {
- #ifndef CXXCALL_DISABLE_WXPAGES
- union tramp_entry
- {
- tramp_entry *next;
- unsigned char buf[TRAMP_LEN + 2 * sizeof (void *)];
- std::max_align_t align;
- };
- static tramp_entry *tramps_freelist;
- static std::atomic<size_t> tramps_lock;
- /* We use a simple spinlock that sleeps on contention instead of the standard
- * mutex types in order to avoid (potentially) pulling libpthread, which can
- * slow down programs considerably. */
- static void
- tramps_lock_grab (std::atomic<size_t>& lk)
- {
- size_t zero = 0;
- if (lk.compare_exchange_weak (zero, 1,
- std::memory_order_relaxed, std::memory_order_acq_rel))
- return;
- for (zero = 0 ; ; zero = 0)
- {
- int nspins = 1000;
- while (lk.load (std::memory_order_relaxed) != 0 && --nspins)
- std::atomic_thread_fence (std::memory_order_acquire);
- if (lk.compare_exchange_weak (zero, 1,
- std::memory_order_relaxed, std::memory_order_acq_rel))
- break;
- cxx_yield_thr ();
- }
- }
- static void
- tramps_lock_drop (std::atomic<size_t>& lk)
- {
- lk.store (0, std::memory_order_release);
- }
- struct tramps_lock_guard
- {
- std::atomic<size_t>& lock;
- tramps_lock_guard (std::atomic<size_t>& lk) : lock (lk)
- {
- tramps_lock_grab (this->lock);
- }
- ~tramps_lock_guard ()
- {
- tramps_lock_drop (this->lock);
- }
- };
- static void*
- cb_data_raw_alloc ()
- {
- tramps_lock_guard g { tramps_lock };
- auto rv = tramps_freelist;
- if (rv)
- {
- tramps_freelist = rv->next;
- return (rv);
- }
- rv = (tramp_entry *)cxx_alloc_rwx (true);
- if (!rv)
- return (rv);
- void *end = (char *)rv + cxx_pagesize;
- auto tmp = rv + 1;
- for ( ; ; ++tmp)
- {
- if ((void *)(tmp + 1) >= end)
- break;
- tmp->next = tmp + 1;
- }
- tramps_freelist = rv + 1;
- return (rv);
- }
- void* cb_data::alloc (void (*fct) (cb_data *, void *), void *arg)
- {
- void *mem = cb_data_raw_alloc ();
- if (!mem)
- return (nullptr);
- cb_data::fill (mem, fct, arg);
- return (mem);
- }
- void cb_data::dealloc (void *mem)
- {
- if (!mem)
- return;
- auto tp = (tramp_entry *)mem;
- tramps_lock_guard g { tramps_lock };
- tp->next = tramps_freelist;
- tramps_freelist = tp;
- }
- #else
- /* For platforms with W^X, we take a simpler approach and just allocate a
- * full page for each callin. This may seem a bit wasteful, but it's much
- * easier to mantain and reason about, and doesn't have tricky multithreaded
- * problems, nor does it require an open file for multiple mappings. */
- void* cb_data::alloc (void (*fct) (cb_data *, void *), void *arg)
- {
- void *mem = cxx_alloc_rwx (false);
- if (!mem)
- return (mem);
- cb_data::fill (mem, fct, arg);
- if (!cxx_mprot_rx (mem))
- {
- // Preserve errno.
- int tmp = errno;
- cxx_dealloc (mem);
- errno = tmp;
- mem = nullptr;
- }
- return (mem);
- }
- void cb_data::dealloc (void *mem)
- {
- if (mem)
- cxx_dealloc (mem);
- }
- #endif // CXXCALL_DISABLE_WXPAGES
- } // namespace cxxcall
|