slab.c 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591
  1. /*
  2. * Copyright (c) 2011 Free Software Foundation.
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License along
  15. * with this program; if not, write to the Free Software Foundation, Inc.,
  16. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  17. */
  18. /*
  19. * Copyright (c) 2010, 2011 Richard Braun.
  20. * All rights reserved.
  21. *
  22. * Redistribution and use in source and binary forms, with or without
  23. * modification, are permitted provided that the following conditions
  24. * are met:
  25. * 1. Redistributions of source code must retain the above copyright
  26. * notice, this list of conditions and the following disclaimer.
  27. * 2. Redistributions in binary form must reproduce the above copyright
  28. * notice, this list of conditions and the following disclaimer in the
  29. * documentation and/or other materials provided with the distribution.
  30. *
  31. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  32. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  34. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  35. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  37. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  38. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  39. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  40. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  41. *
  42. *
  43. * Object caching and general purpose memory allocator.
  44. *
  45. * This allocator is based on the paper "The Slab Allocator: An Object-Caching
  46. * Kernel Memory Allocator" by Jeff Bonwick.
  47. *
  48. * It allows the allocation of objects (i.e. fixed-size typed buffers) from
  49. * caches and is efficient in both space and time. This implementation follows
  50. * many of the indications from the paper mentioned. The most notable
  51. * differences are outlined below.
  52. *
  53. * The per-cache self-scaling hash table for buffer-to-bufctl conversion,
  54. * described in 3.2.3 "Slab Layout for Large Objects", has been replaced by
  55. * a red-black tree storing slabs, sorted by address. The use of a
  56. * self-balancing tree for buffer-to-slab conversions provides a few advantages
  57. * over a hash table. Unlike a hash table, a BST provides a "lookup nearest"
  58. * operation, so obtaining the slab data (whether it is embedded in the slab or
  59. * off slab) from a buffer address simply consists of a "lookup nearest towards
  60. * 0" tree search. Finally, a self-balancing tree is a true self-scaling data
  61. * structure, whereas a hash table requires periodic maintenance and complete
  62. * resizing, which is expensive. The only drawback is that releasing a buffer
  63. * to the slab layer takes logarithmic time instead of constant time.
  64. *
  65. * This implementation uses per-cpu pools of objects, which service most
  66. * allocation requests. These pools act as caches (but are named differently
  67. * to avoid confusion with CPU caches) that reduce contention on multiprocessor
  68. * systems. When a pool is empty and cannot provide an object, it is filled by
  69. * transferring multiple objects from the slab layer. The symmetric case is
  70. * handled likewise.
  71. */
  72. #include <string.h>
  73. #include <kern/assert.h>
  74. #include <kern/mach_clock.h>
  75. #include <kern/macros.h>
  76. #include <kern/printf.h>
  77. #include <kern/slab.h>
  78. #include <kern/kalloc.h>
  79. #include <kern/cpu_number.h>
  80. #include <mach/vm_param.h>
  81. #include <mach/machine/vm_types.h>
  82. #include <vm/vm_kern.h>
  83. #include <vm/vm_page.h>
  84. #include <vm/vm_types.h>
  85. #include <sys/types.h>
  86. #ifdef MACH_DEBUG
  87. #include <mach_debug/slab_info.h>
  88. #endif
  89. /*
  90. * Utility macros.
  91. */
  92. #define P2ALIGNED(x, a) (((x) & ((a) - 1)) == 0)
  93. #define ISP2(x) P2ALIGNED(x, x)
  94. #define P2ALIGN(x, a) ((x) & -(a))
  95. #define P2ROUND(x, a) (-(-(x) & -(a)))
  96. #define P2END(x, a) (-(~(x) & -(a)))
  97. #define likely(expr) __builtin_expect(!!(expr), 1)
  98. #define unlikely(expr) __builtin_expect(!!(expr), 0)
  99. /*
  100. * Minimum required alignment.
  101. */
  102. #define KMEM_ALIGN_MIN 8
  103. /*
  104. * Special buffer size under which slab data is unconditionnally allocated
  105. * from its associated slab.
  106. */
  107. #define KMEM_BUF_SIZE_THRESHOLD (PAGE_SIZE / 8)
  108. /*
  109. * Time (in ticks) between two garbage collection operations.
  110. */
  111. #define KMEM_GC_INTERVAL (5 * hz)
  112. /*
  113. * The transfer size of a CPU pool is computed by dividing the pool size by
  114. * this value.
  115. */
  116. #define KMEM_CPU_POOL_TRANSFER_RATIO 2
  117. /*
  118. * Redzone guard word.
  119. */
  120. #ifdef __LP64__
  121. #if _HOST_BIG_ENDIAN
  122. #define KMEM_REDZONE_WORD 0xfeedfacefeedfaceUL
  123. #else /* _HOST_BIG_ENDIAN */
  124. #define KMEM_REDZONE_WORD 0xcefaedfecefaedfeUL
  125. #endif /* _HOST_BIG_ENDIAN */
  126. #else /* __LP64__ */
  127. #if _HOST_BIG_ENDIAN
  128. #define KMEM_REDZONE_WORD 0xfeedfaceUL
  129. #else /* _HOST_BIG_ENDIAN */
  130. #define KMEM_REDZONE_WORD 0xcefaedfeUL
  131. #endif /* _HOST_BIG_ENDIAN */
  132. #endif /* __LP64__ */
  133. /*
  134. * Redzone byte for padding.
  135. */
  136. #define KMEM_REDZONE_BYTE 0xbb
  137. /*
  138. * Shift for the first kalloc cache size.
  139. */
  140. #define KALLOC_FIRST_SHIFT 5
  141. /*
  142. * Number of caches backing general purpose allocations.
  143. */
  144. #define KALLOC_NR_CACHES 13
  145. /*
  146. * Values the buftag state member can take.
  147. */
  148. #ifdef __LP64__
  149. #if _HOST_BIG_ENDIAN
  150. #define KMEM_BUFTAG_ALLOC 0xa110c8eda110c8edUL
  151. #define KMEM_BUFTAG_FREE 0xf4eeb10cf4eeb10cUL
  152. #else /* _HOST_BIG_ENDIAN */
  153. #define KMEM_BUFTAG_ALLOC 0xedc810a1edc810a1UL
  154. #define KMEM_BUFTAG_FREE 0x0cb1eef40cb1eef4UL
  155. #endif /* _HOST_BIG_ENDIAN */
  156. #else /* __LP64__ */
  157. #if _HOST_BIG_ENDIAN
  158. #define KMEM_BUFTAG_ALLOC 0xa110c8edUL
  159. #define KMEM_BUFTAG_FREE 0xf4eeb10cUL
  160. #else /* _HOST_BIG_ENDIAN */
  161. #define KMEM_BUFTAG_ALLOC 0xedc810a1UL
  162. #define KMEM_BUFTAG_FREE 0x0cb1eef4UL
  163. #endif /* _HOST_BIG_ENDIAN */
  164. #endif /* __LP64__ */
  165. /*
  166. * Free and uninitialized patterns.
  167. *
  168. * These values are unconditionnally 64-bit wide since buffers are at least
  169. * 8-byte aligned.
  170. */
  171. #if _HOST_BIG_ENDIAN
  172. #define KMEM_FREE_PATTERN 0xdeadbeefdeadbeefULL
  173. #define KMEM_UNINIT_PATTERN 0xbaddcafebaddcafeULL
  174. #else /* _HOST_BIG_ENDIAN */
  175. #define KMEM_FREE_PATTERN 0xefbeaddeefbeaddeULL
  176. #define KMEM_UNINIT_PATTERN 0xfecaddbafecaddbaULL
  177. #endif /* _HOST_BIG_ENDIAN */
  178. /*
  179. * Cache flags.
  180. *
  181. * The flags don't change once set and can be tested without locking.
  182. */
  183. #define KMEM_CF_SLAB_EXTERNAL 0x01 /* Slab data is off slab */
  184. #define KMEM_CF_PHYSMEM 0x02 /* Allocate from physical memory */
  185. #define KMEM_CF_DIRECT 0x04 /* Direct buf-to-slab translation
  186. (implies !KMEM_CF_SLAB_EXTERNAL) */
  187. #define KMEM_CF_USE_TREE 0x08 /* Use red-black tree to track slab
  188. data */
  189. #define KMEM_CF_USE_PAGE 0x10 /* Use page private data to track slab
  190. data (implies KMEM_CF_SLAB_EXTERNAL
  191. and KMEM_CF_PHYSMEM) */
  192. #define KMEM_CF_VERIFY 0x20 /* Debugging facilities enabled
  193. (implies KMEM_CF_USE_TREE) */
  194. /*
  195. * Options for kmem_cache_alloc_verify().
  196. */
  197. #define KMEM_AV_NOCONSTRUCT 0
  198. #define KMEM_AV_CONSTRUCT 1
  199. /*
  200. * Error codes for kmem_cache_error().
  201. */
  202. #define KMEM_ERR_INVALID 0 /* Invalid address being freed */
  203. #define KMEM_ERR_DOUBLEFREE 1 /* Freeing already free address */
  204. #define KMEM_ERR_BUFTAG 2 /* Invalid buftag content */
  205. #define KMEM_ERR_MODIFIED 3 /* Buffer modified while free */
  206. #define KMEM_ERR_REDZONE 4 /* Redzone violation */
  207. #if SLAB_USE_CPU_POOLS
  208. /*
  209. * Available CPU pool types.
  210. *
  211. * For each entry, the CPU pool size applies from the entry buf_size
  212. * (excluded) up to (and including) the buf_size of the preceding entry.
  213. *
  214. * See struct kmem_cpu_pool_type for a description of the values.
  215. */
  216. static struct kmem_cpu_pool_type kmem_cpu_pool_types[] = {
  217. { 32768, 1, 0, NULL },
  218. { 4096, 8, CPU_L1_SIZE, NULL },
  219. { 256, 64, CPU_L1_SIZE, NULL },
  220. { 0, 128, CPU_L1_SIZE, NULL }
  221. };
  222. /*
  223. * Caches where CPU pool arrays are allocated from.
  224. */
  225. static struct kmem_cache kmem_cpu_array_caches[ARRAY_SIZE(kmem_cpu_pool_types)];
  226. #endif /* SLAB_USE_CPU_POOLS */
  227. /*
  228. * Cache for off slab data.
  229. */
  230. static struct kmem_cache kmem_slab_cache;
  231. /*
  232. * General purpose caches array.
  233. */
  234. static struct kmem_cache kalloc_caches[KALLOC_NR_CACHES];
  235. /*
  236. * List of all caches managed by the allocator.
  237. */
  238. static struct list kmem_cache_list;
  239. static unsigned int kmem_nr_caches;
  240. static simple_lock_data_t __attribute__((used)) kmem_cache_list_lock;
  241. /*
  242. * Time of the last memory reclaim, in clock ticks.
  243. */
  244. static unsigned long kmem_gc_last_tick;
  245. #define kmem_error(format, ...) \
  246. panic("mem: error: %s(): " format "\n", __func__, \
  247. ## __VA_ARGS__)
  248. #define kmem_warn(format, ...) \
  249. printf("mem: warning: %s(): " format "\n", __func__, \
  250. ## __VA_ARGS__)
  251. #define kmem_print(format, ...) \
  252. printf(format "\n", ## __VA_ARGS__)
  253. static void kmem_cache_error(struct kmem_cache *cache, void *buf, int error,
  254. void *arg);
  255. static void * kmem_cache_alloc_from_slab(struct kmem_cache *cache);
  256. static void kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf);
  257. static void * kmem_buf_verify_bytes(void *buf, void *pattern, size_t size)
  258. {
  259. char *ptr, *pattern_ptr, *end;
  260. end = buf + size;
  261. for (ptr = buf, pattern_ptr = pattern; ptr < end; ptr++, pattern_ptr++)
  262. if (*ptr != *pattern_ptr)
  263. return ptr;
  264. return NULL;
  265. }
  266. static void * kmem_buf_verify(void *buf, uint64_t pattern, vm_size_t size)
  267. {
  268. uint64_t *ptr, *end;
  269. assert(P2ALIGNED((unsigned long)buf, sizeof(uint64_t)));
  270. assert(P2ALIGNED(size, sizeof(uint64_t)));
  271. end = buf + size;
  272. for (ptr = buf; ptr < end; ptr++)
  273. if (*ptr != pattern)
  274. return kmem_buf_verify_bytes(ptr, &pattern, sizeof(pattern));
  275. return NULL;
  276. }
  277. static void kmem_buf_fill(void *buf, uint64_t pattern, size_t size)
  278. {
  279. uint64_t *ptr, *end;
  280. assert(P2ALIGNED((unsigned long)buf, sizeof(uint64_t)));
  281. assert(P2ALIGNED(size, sizeof(uint64_t)));
  282. end = buf + size;
  283. for (ptr = buf; ptr < end; ptr++)
  284. *ptr = pattern;
  285. }
  286. static void * kmem_buf_verify_fill(void *buf, uint64_t old, uint64_t new,
  287. size_t size)
  288. {
  289. uint64_t *ptr, *end;
  290. assert(P2ALIGNED((unsigned long)buf, sizeof(uint64_t)));
  291. assert(P2ALIGNED(size, sizeof(uint64_t)));
  292. end = buf + size;
  293. for (ptr = buf; ptr < end; ptr++) {
  294. if (*ptr != old)
  295. return kmem_buf_verify_bytes(ptr, &old, sizeof(old));
  296. *ptr = new;
  297. }
  298. return NULL;
  299. }
  300. static inline union kmem_bufctl *
  301. kmem_buf_to_bufctl(void *buf, struct kmem_cache *cache)
  302. {
  303. return (union kmem_bufctl *)(buf + cache->bufctl_dist);
  304. }
  305. static inline struct kmem_buftag *
  306. kmem_buf_to_buftag(void *buf, struct kmem_cache *cache)
  307. {
  308. return (struct kmem_buftag *)(buf + cache->buftag_dist);
  309. }
  310. static inline void * kmem_bufctl_to_buf(union kmem_bufctl *bufctl,
  311. struct kmem_cache *cache)
  312. {
  313. return (void *)bufctl - cache->bufctl_dist;
  314. }
  315. static vm_offset_t
  316. kmem_pagealloc_physmem(vm_size_t size)
  317. {
  318. struct vm_page *page;
  319. assert(size == PAGE_SIZE);
  320. for (;;) {
  321. page = vm_page_grab();
  322. if (page != NULL)
  323. break;
  324. VM_PAGE_WAIT(NULL);
  325. }
  326. return phystokv(vm_page_to_pa(page));
  327. }
  328. static void
  329. kmem_pagefree_physmem(vm_offset_t addr, vm_size_t size)
  330. {
  331. struct vm_page *page;
  332. assert(size == PAGE_SIZE);
  333. page = vm_page_lookup_pa(kvtophys(addr));
  334. assert(page != NULL);
  335. vm_page_release(page, FALSE, FALSE);
  336. }
  337. static vm_offset_t
  338. kmem_pagealloc_virtual(vm_size_t size, vm_size_t align)
  339. {
  340. vm_offset_t addr;
  341. kern_return_t kr;
  342. assert(size > PAGE_SIZE);
  343. size = vm_page_round(size);
  344. if (align <= PAGE_SIZE)
  345. kr = kmem_alloc_wired(kernel_map, &addr, size);
  346. else
  347. kr = kmem_alloc_aligned(kernel_map, &addr, size);
  348. if (kr != KERN_SUCCESS)
  349. return 0;
  350. return addr;
  351. }
  352. static void
  353. kmem_pagefree_virtual(vm_offset_t addr, vm_size_t size)
  354. {
  355. assert(size > PAGE_SIZE);
  356. size = vm_page_round(size);
  357. kmem_free(kernel_map, addr, size);
  358. }
  359. static vm_offset_t
  360. kmem_pagealloc(vm_size_t size, vm_size_t align, int flags)
  361. {
  362. assert(align <= size);
  363. return (flags & KMEM_CF_PHYSMEM)
  364. ? kmem_pagealloc_physmem(size)
  365. : kmem_pagealloc_virtual(size, align);
  366. }
  367. static void
  368. kmem_pagefree(vm_offset_t addr, vm_size_t size, int flags)
  369. {
  370. return (flags & KMEM_CF_PHYSMEM)
  371. ? kmem_pagefree_physmem(addr, size)
  372. : kmem_pagefree_virtual(addr, size);
  373. }
  374. static void kmem_slab_create_verify(struct kmem_slab *slab,
  375. struct kmem_cache *cache)
  376. {
  377. struct kmem_buftag *buftag;
  378. size_t buf_size;
  379. unsigned long buffers;
  380. void *buf;
  381. buf_size = cache->buf_size;
  382. buf = slab->addr;
  383. buftag = kmem_buf_to_buftag(buf, cache);
  384. for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) {
  385. kmem_buf_fill(buf, KMEM_FREE_PATTERN, cache->bufctl_dist);
  386. buftag->state = KMEM_BUFTAG_FREE;
  387. buf += buf_size;
  388. buftag = kmem_buf_to_buftag(buf, cache);
  389. }
  390. }
  391. /*
  392. * Create an empty slab for a cache.
  393. *
  394. * The caller must drop all locks before calling this function.
  395. */
  396. static struct kmem_slab * kmem_slab_create(struct kmem_cache *cache,
  397. size_t color)
  398. {
  399. struct kmem_slab *slab;
  400. union kmem_bufctl *bufctl;
  401. size_t buf_size;
  402. unsigned long buffers;
  403. vm_offset_t slab_buf;
  404. slab_buf = kmem_pagealloc(cache->slab_size, cache->align, cache->flags);
  405. if (slab_buf == 0)
  406. return NULL;
  407. if (cache->flags & KMEM_CF_SLAB_EXTERNAL) {
  408. slab = (struct kmem_slab *)kmem_cache_alloc(&kmem_slab_cache);
  409. if (slab == NULL) {
  410. kmem_pagefree(slab_buf, cache->slab_size, cache->flags);
  411. return NULL;
  412. }
  413. if (cache->flags & KMEM_CF_USE_PAGE) {
  414. struct vm_page *page;
  415. page = vm_page_lookup_pa(kvtophys(slab_buf));
  416. assert(page != NULL);
  417. vm_page_set_priv(page, slab);
  418. }
  419. } else {
  420. slab = (struct kmem_slab *)(slab_buf + cache->slab_size) - 1;
  421. }
  422. slab->cache = cache;
  423. list_node_init(&slab->list_node);
  424. rbtree_node_init(&slab->tree_node);
  425. slab->nr_refs = 0;
  426. slab->first_free = NULL;
  427. slab->addr = (void *)(slab_buf + color);
  428. buf_size = cache->buf_size;
  429. bufctl = kmem_buf_to_bufctl(slab->addr, cache);
  430. for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) {
  431. bufctl->next = slab->first_free;
  432. slab->first_free = bufctl;
  433. bufctl = (union kmem_bufctl *)((void *)bufctl + buf_size);
  434. }
  435. if (cache->flags & KMEM_CF_VERIFY)
  436. kmem_slab_create_verify(slab, cache);
  437. return slab;
  438. }
  439. static void kmem_slab_destroy_verify(struct kmem_slab *slab,
  440. struct kmem_cache *cache)
  441. {
  442. struct kmem_buftag *buftag;
  443. size_t buf_size;
  444. unsigned long buffers;
  445. void *buf, *addr;
  446. buf_size = cache->buf_size;
  447. buf = slab->addr;
  448. buftag = kmem_buf_to_buftag(buf, cache);
  449. for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) {
  450. if (buftag->state != KMEM_BUFTAG_FREE)
  451. kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag);
  452. addr = kmem_buf_verify(buf, KMEM_FREE_PATTERN, cache->bufctl_dist);
  453. if (addr != NULL)
  454. kmem_cache_error(cache, buf, KMEM_ERR_MODIFIED, addr);
  455. buf += buf_size;
  456. buftag = kmem_buf_to_buftag(buf, cache);
  457. }
  458. }
  459. /*
  460. * Destroy a slab.
  461. *
  462. * The caller must drop all locks before calling this function.
  463. */
  464. static void kmem_slab_destroy(struct kmem_slab *slab, struct kmem_cache *cache)
  465. {
  466. vm_offset_t slab_buf;
  467. assert(slab->nr_refs == 0);
  468. assert(slab->first_free != NULL);
  469. if (cache->flags & KMEM_CF_VERIFY)
  470. kmem_slab_destroy_verify(slab, cache);
  471. slab_buf = (vm_offset_t)P2ALIGN((unsigned long)slab->addr, PAGE_SIZE);
  472. if (cache->flags & KMEM_CF_SLAB_EXTERNAL) {
  473. if (cache->flags & KMEM_CF_USE_PAGE) {
  474. struct vm_page *page;
  475. /* Not strictly needed, but let's increase safety */
  476. page = vm_page_lookup_pa(kvtophys(slab_buf));
  477. assert(page != NULL);
  478. vm_page_set_priv(page, NULL);
  479. }
  480. kmem_cache_free(&kmem_slab_cache, (vm_offset_t)slab);
  481. }
  482. kmem_pagefree(slab_buf, cache->slab_size, cache->flags);
  483. }
  484. static inline int kmem_slab_cmp_lookup(const void *addr,
  485. const struct rbtree_node *node)
  486. {
  487. struct kmem_slab *slab;
  488. slab = rbtree_entry(node, struct kmem_slab, tree_node);
  489. if (addr == slab->addr)
  490. return 0;
  491. else if (addr < slab->addr)
  492. return -1;
  493. else
  494. return 1;
  495. }
  496. static inline int kmem_slab_cmp_insert(const struct rbtree_node *a,
  497. const struct rbtree_node *b)
  498. {
  499. struct kmem_slab *slab;
  500. slab = rbtree_entry(a, struct kmem_slab, tree_node);
  501. return kmem_slab_cmp_lookup(slab->addr, b);
  502. }
  503. #if SLAB_USE_CPU_POOLS
  504. static void kmem_cpu_pool_init(struct kmem_cpu_pool *cpu_pool,
  505. struct kmem_cache *cache)
  506. {
  507. simple_lock_init(&cpu_pool->lock);
  508. cpu_pool->flags = cache->flags;
  509. cpu_pool->size = 0;
  510. cpu_pool->transfer_size = 0;
  511. cpu_pool->nr_objs = 0;
  512. cpu_pool->array = NULL;
  513. }
  514. /*
  515. * Return a CPU pool.
  516. *
  517. * This function will generally return the pool matching the CPU running the
  518. * calling thread. Because of context switches and thread migration, the
  519. * caller might be running on another processor after this function returns.
  520. * Although not optimal, this should rarely happen, and it doesn't affect the
  521. * allocator operations in any other way, as CPU pools are always valid, and
  522. * their access is serialized by a lock.
  523. */
  524. static inline struct kmem_cpu_pool * kmem_cpu_pool_get(struct kmem_cache *cache)
  525. {
  526. return &cache->cpu_pools[cpu_number()];
  527. }
  528. static inline void kmem_cpu_pool_build(struct kmem_cpu_pool *cpu_pool,
  529. struct kmem_cache *cache, void **array)
  530. {
  531. cpu_pool->size = cache->cpu_pool_type->array_size;
  532. cpu_pool->transfer_size = (cpu_pool->size
  533. + KMEM_CPU_POOL_TRANSFER_RATIO - 1)
  534. / KMEM_CPU_POOL_TRANSFER_RATIO;
  535. cpu_pool->array = array;
  536. }
  537. static inline void * kmem_cpu_pool_pop(struct kmem_cpu_pool *cpu_pool)
  538. {
  539. cpu_pool->nr_objs--;
  540. return cpu_pool->array[cpu_pool->nr_objs];
  541. }
  542. static inline void kmem_cpu_pool_push(struct kmem_cpu_pool *cpu_pool, void *obj)
  543. {
  544. cpu_pool->array[cpu_pool->nr_objs] = obj;
  545. cpu_pool->nr_objs++;
  546. }
  547. static int kmem_cpu_pool_fill(struct kmem_cpu_pool *cpu_pool,
  548. struct kmem_cache *cache)
  549. {
  550. kmem_cache_ctor_t ctor;
  551. void *buf;
  552. int i;
  553. ctor = (cpu_pool->flags & KMEM_CF_VERIFY) ? NULL : cache->ctor;
  554. simple_lock(&cache->lock);
  555. for (i = 0; i < cpu_pool->transfer_size; i++) {
  556. buf = kmem_cache_alloc_from_slab(cache);
  557. if (buf == NULL)
  558. break;
  559. if (ctor != NULL)
  560. ctor(buf);
  561. kmem_cpu_pool_push(cpu_pool, buf);
  562. }
  563. simple_unlock(&cache->lock);
  564. return i;
  565. }
  566. static void kmem_cpu_pool_drain(struct kmem_cpu_pool *cpu_pool,
  567. struct kmem_cache *cache)
  568. {
  569. void *obj;
  570. int i;
  571. simple_lock(&cache->lock);
  572. for (i = cpu_pool->transfer_size; i > 0; i--) {
  573. obj = kmem_cpu_pool_pop(cpu_pool);
  574. kmem_cache_free_to_slab(cache, obj);
  575. }
  576. simple_unlock(&cache->lock);
  577. }
  578. #endif /* SLAB_USE_CPU_POOLS */
  579. static void kmem_cache_error(struct kmem_cache *cache, void *buf, int error,
  580. void *arg)
  581. {
  582. struct kmem_buftag *buftag;
  583. kmem_warn("cache: %s, buffer: %p", cache->name, (void *)buf);
  584. switch(error) {
  585. case KMEM_ERR_INVALID:
  586. kmem_error("freeing invalid address");
  587. break;
  588. case KMEM_ERR_DOUBLEFREE:
  589. kmem_error("attempting to free the same address twice");
  590. break;
  591. case KMEM_ERR_BUFTAG:
  592. buftag = arg;
  593. kmem_error("invalid buftag content, buftag state: %p",
  594. (void *)buftag->state);
  595. break;
  596. case KMEM_ERR_MODIFIED:
  597. kmem_error("free buffer modified, fault address: %p, "
  598. "offset in buffer: %td", arg, arg - buf);
  599. break;
  600. case KMEM_ERR_REDZONE:
  601. kmem_error("write beyond end of buffer, fault address: %p, "
  602. "offset in buffer: %td", arg, arg - buf);
  603. break;
  604. default:
  605. kmem_error("unknown error");
  606. }
  607. /*
  608. * Never reached.
  609. */
  610. }
  611. /*
  612. * Compute properties such as slab size for the given cache.
  613. *
  614. * Once the slab size is known, this function sets the related properties
  615. * (buffers per slab and maximum color). It can also set some KMEM_CF_xxx
  616. * flags depending on the resulting layout.
  617. */
  618. static void kmem_cache_compute_properties(struct kmem_cache *cache, int flags)
  619. {
  620. size_t size, waste;
  621. int embed;
  622. if (cache->buf_size < KMEM_BUF_SIZE_THRESHOLD)
  623. flags |= KMEM_CACHE_NOOFFSLAB;
  624. cache->slab_size = PAGE_SIZE;
  625. for (;;) {
  626. if (flags & KMEM_CACHE_NOOFFSLAB)
  627. embed = 1;
  628. else {
  629. waste = cache->slab_size % cache->buf_size;
  630. embed = (sizeof(struct kmem_slab) <= waste);
  631. }
  632. size = cache->slab_size;
  633. if (embed)
  634. size -= sizeof(struct kmem_slab);
  635. if (size >= cache->buf_size)
  636. break;
  637. cache->slab_size += PAGE_SIZE;
  638. }
  639. cache->bufs_per_slab = size / cache->buf_size;
  640. cache->color_max = size % cache->buf_size;
  641. if (cache->color_max >= PAGE_SIZE)
  642. cache->color_max = 0;
  643. if (!embed)
  644. cache->flags |= KMEM_CF_SLAB_EXTERNAL;
  645. if ((flags & KMEM_CACHE_PHYSMEM) || (cache->slab_size == PAGE_SIZE)) {
  646. cache->flags |= KMEM_CF_PHYSMEM;
  647. /*
  648. * Avoid using larger-than-page slabs backed by the direct physical
  649. * mapping to completely prevent physical memory fragmentation from
  650. * making slab allocations fail.
  651. */
  652. if (cache->slab_size != PAGE_SIZE)
  653. panic("slab: invalid cache parameters");
  654. }
  655. if (cache->flags & KMEM_CF_VERIFY)
  656. cache->flags |= KMEM_CF_USE_TREE;
  657. if (cache->flags & KMEM_CF_SLAB_EXTERNAL) {
  658. if (cache->flags & KMEM_CF_PHYSMEM)
  659. cache->flags |= KMEM_CF_USE_PAGE;
  660. else
  661. cache->flags |= KMEM_CF_USE_TREE;
  662. } else {
  663. if (cache->slab_size == PAGE_SIZE)
  664. cache->flags |= KMEM_CF_DIRECT;
  665. else
  666. cache->flags |= KMEM_CF_USE_TREE;
  667. }
  668. }
  669. void kmem_cache_init(struct kmem_cache *cache, const char *name,
  670. size_t obj_size, size_t align,
  671. kmem_cache_ctor_t ctor, int flags)
  672. {
  673. #if SLAB_USE_CPU_POOLS
  674. struct kmem_cpu_pool_type *cpu_pool_type;
  675. size_t i;
  676. #endif /* SLAB_USE_CPU_POOLS */
  677. size_t buf_size;
  678. #if SLAB_VERIFY
  679. cache->flags = KMEM_CF_VERIFY;
  680. #else /* SLAB_VERIFY */
  681. cache->flags = 0;
  682. #endif /* SLAB_VERIFY */
  683. if (flags & KMEM_CACHE_VERIFY)
  684. cache->flags |= KMEM_CF_VERIFY;
  685. if (align < KMEM_ALIGN_MIN)
  686. align = KMEM_ALIGN_MIN;
  687. assert(obj_size > 0);
  688. assert(ISP2(align));
  689. buf_size = P2ROUND(obj_size, align);
  690. simple_lock_init(&cache->lock);
  691. list_node_init(&cache->node);
  692. list_init(&cache->partial_slabs);
  693. list_init(&cache->free_slabs);
  694. rbtree_init(&cache->active_slabs);
  695. cache->obj_size = obj_size;
  696. cache->align = align;
  697. cache->buf_size = buf_size;
  698. cache->bufctl_dist = buf_size - sizeof(union kmem_bufctl);
  699. cache->color = 0;
  700. cache->nr_objs = 0;
  701. cache->nr_bufs = 0;
  702. cache->nr_slabs = 0;
  703. cache->nr_free_slabs = 0;
  704. cache->ctor = ctor;
  705. strncpy(cache->name, name, sizeof(cache->name));
  706. cache->name[sizeof(cache->name) - 1] = '\0';
  707. cache->buftag_dist = 0;
  708. cache->redzone_pad = 0;
  709. if (cache->flags & KMEM_CF_VERIFY) {
  710. cache->bufctl_dist = buf_size;
  711. cache->buftag_dist = cache->bufctl_dist + sizeof(union kmem_bufctl);
  712. cache->redzone_pad = cache->bufctl_dist - cache->obj_size;
  713. buf_size += sizeof(union kmem_bufctl) + sizeof(struct kmem_buftag);
  714. buf_size = P2ROUND(buf_size, align);
  715. cache->buf_size = buf_size;
  716. }
  717. kmem_cache_compute_properties(cache, flags);
  718. #if SLAB_USE_CPU_POOLS
  719. for (cpu_pool_type = kmem_cpu_pool_types;
  720. buf_size <= cpu_pool_type->buf_size;
  721. cpu_pool_type++);
  722. cache->cpu_pool_type = cpu_pool_type;
  723. for (i = 0; i < ARRAY_SIZE(cache->cpu_pools); i++)
  724. kmem_cpu_pool_init(&cache->cpu_pools[i], cache);
  725. #endif /* SLAB_USE_CPU_POOLS */
  726. simple_lock(&kmem_cache_list_lock);
  727. list_insert_tail(&kmem_cache_list, &cache->node);
  728. kmem_nr_caches++;
  729. simple_unlock(&kmem_cache_list_lock);
  730. }
  731. static inline int kmem_cache_empty(struct kmem_cache *cache)
  732. {
  733. return cache->nr_objs == cache->nr_bufs;
  734. }
  735. static int kmem_cache_grow(struct kmem_cache *cache)
  736. {
  737. struct kmem_slab *slab;
  738. size_t color;
  739. int empty;
  740. simple_lock(&cache->lock);
  741. if (!kmem_cache_empty(cache)) {
  742. simple_unlock(&cache->lock);
  743. return 1;
  744. }
  745. color = cache->color;
  746. cache->color += cache->align;
  747. if (cache->color > cache->color_max)
  748. cache->color = 0;
  749. simple_unlock(&cache->lock);
  750. slab = kmem_slab_create(cache, color);
  751. simple_lock(&cache->lock);
  752. if (slab != NULL) {
  753. list_insert_head(&cache->free_slabs, &slab->list_node);
  754. cache->nr_bufs += cache->bufs_per_slab;
  755. cache->nr_slabs++;
  756. cache->nr_free_slabs++;
  757. }
  758. /*
  759. * Even if our slab creation failed, another thread might have succeeded
  760. * in growing the cache.
  761. */
  762. empty = kmem_cache_empty(cache);
  763. simple_unlock(&cache->lock);
  764. return !empty;
  765. }
  766. static void kmem_cache_reap(struct kmem_cache *cache, struct list *dead_slabs)
  767. {
  768. simple_lock(&cache->lock);
  769. list_concat(dead_slabs, &cache->free_slabs);
  770. list_init(&cache->free_slabs);
  771. cache->nr_bufs -= cache->bufs_per_slab * cache->nr_free_slabs;
  772. cache->nr_slabs -= cache->nr_free_slabs;
  773. cache->nr_free_slabs = 0;
  774. simple_unlock(&cache->lock);
  775. }
  776. /*
  777. * Allocate a raw (unconstructed) buffer from the slab layer of a cache.
  778. *
  779. * The cache must be locked before calling this function.
  780. */
  781. static void * kmem_cache_alloc_from_slab(struct kmem_cache *cache)
  782. {
  783. struct kmem_slab *slab;
  784. union kmem_bufctl *bufctl;
  785. if (!list_empty(&cache->partial_slabs))
  786. slab = list_first_entry(&cache->partial_slabs, struct kmem_slab,
  787. list_node);
  788. else if (!list_empty(&cache->free_slabs))
  789. slab = list_first_entry(&cache->free_slabs, struct kmem_slab,
  790. list_node);
  791. else
  792. return NULL;
  793. bufctl = slab->first_free;
  794. assert(bufctl != NULL);
  795. slab->first_free = bufctl->next;
  796. slab->nr_refs++;
  797. cache->nr_objs++;
  798. if (slab->nr_refs == cache->bufs_per_slab) {
  799. /* The slab has become complete */
  800. list_remove(&slab->list_node);
  801. if (slab->nr_refs == 1)
  802. cache->nr_free_slabs--;
  803. } else if (slab->nr_refs == 1) {
  804. /*
  805. * The slab has become partial. Insert the new slab at the end of
  806. * the list to reduce fragmentation.
  807. */
  808. list_remove(&slab->list_node);
  809. list_insert_tail(&cache->partial_slabs, &slab->list_node);
  810. cache->nr_free_slabs--;
  811. }
  812. if ((slab->nr_refs == 1) && (cache->flags & KMEM_CF_USE_TREE))
  813. rbtree_insert(&cache->active_slabs, &slab->tree_node,
  814. kmem_slab_cmp_insert);
  815. return kmem_bufctl_to_buf(bufctl, cache);
  816. }
  817. /*
  818. * Release a buffer to the slab layer of a cache.
  819. *
  820. * The cache must be locked before calling this function.
  821. */
  822. static void kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf)
  823. {
  824. struct kmem_slab *slab;
  825. union kmem_bufctl *bufctl;
  826. if (cache->flags & KMEM_CF_DIRECT) {
  827. assert(cache->slab_size == PAGE_SIZE);
  828. slab = (struct kmem_slab *)P2END((unsigned long)buf, cache->slab_size)
  829. - 1;
  830. } else if (cache->flags & KMEM_CF_USE_PAGE) {
  831. struct vm_page *page;
  832. page = vm_page_lookup_pa(kvtophys((vm_offset_t)buf));
  833. assert(page != NULL);
  834. slab = vm_page_get_priv(page);
  835. } else {
  836. struct rbtree_node *node;
  837. assert(cache->flags & KMEM_CF_USE_TREE);
  838. node = rbtree_lookup_nearest(&cache->active_slabs, buf,
  839. kmem_slab_cmp_lookup, RBTREE_LEFT);
  840. assert(node != NULL);
  841. slab = rbtree_entry(node, struct kmem_slab, tree_node);
  842. }
  843. assert((unsigned long)buf >= (unsigned long)slab->addr);
  844. assert(((unsigned long)buf + cache->buf_size)
  845. <= vm_page_trunc((unsigned long)slab->addr + cache->slab_size));
  846. assert(slab->nr_refs >= 1);
  847. assert(slab->nr_refs <= cache->bufs_per_slab);
  848. bufctl = kmem_buf_to_bufctl(buf, cache);
  849. bufctl->next = slab->first_free;
  850. slab->first_free = bufctl;
  851. slab->nr_refs--;
  852. cache->nr_objs--;
  853. if (slab->nr_refs == 0) {
  854. /* The slab has become free */
  855. if (cache->flags & KMEM_CF_USE_TREE)
  856. rbtree_remove(&cache->active_slabs, &slab->tree_node);
  857. if (cache->bufs_per_slab > 1)
  858. list_remove(&slab->list_node);
  859. list_insert_head(&cache->free_slabs, &slab->list_node);
  860. cache->nr_free_slabs++;
  861. } else if (slab->nr_refs == (cache->bufs_per_slab - 1)) {
  862. /* The slab has become partial */
  863. list_insert_head(&cache->partial_slabs, &slab->list_node);
  864. }
  865. }
  866. static void kmem_cache_alloc_verify(struct kmem_cache *cache, void *buf,
  867. int construct)
  868. {
  869. struct kmem_buftag *buftag;
  870. union kmem_bufctl *bufctl;
  871. void *addr;
  872. buftag = kmem_buf_to_buftag(buf, cache);
  873. if (buftag->state != KMEM_BUFTAG_FREE)
  874. kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag);
  875. addr = kmem_buf_verify_fill(buf, KMEM_FREE_PATTERN, KMEM_UNINIT_PATTERN,
  876. cache->bufctl_dist);
  877. if (addr != NULL)
  878. kmem_cache_error(cache, buf, KMEM_ERR_MODIFIED, addr);
  879. addr = buf + cache->obj_size;
  880. memset(addr, KMEM_REDZONE_BYTE, cache->redzone_pad);
  881. bufctl = kmem_buf_to_bufctl(buf, cache);
  882. bufctl->redzone = KMEM_REDZONE_WORD;
  883. buftag->state = KMEM_BUFTAG_ALLOC;
  884. if (construct && (cache->ctor != NULL))
  885. cache->ctor(buf);
  886. }
  887. vm_offset_t kmem_cache_alloc(struct kmem_cache *cache)
  888. {
  889. int filled;
  890. void *buf;
  891. #if SLAB_USE_CPU_POOLS
  892. struct kmem_cpu_pool *cpu_pool;
  893. cpu_pool = kmem_cpu_pool_get(cache);
  894. if (cpu_pool->flags & KMEM_CF_NO_CPU_POOL)
  895. goto slab_alloc;
  896. simple_lock(&cpu_pool->lock);
  897. fast_alloc:
  898. if (likely(cpu_pool->nr_objs > 0)) {
  899. buf = kmem_cpu_pool_pop(cpu_pool);
  900. simple_unlock(&cpu_pool->lock);
  901. if (cpu_pool->flags & KMEM_CF_VERIFY)
  902. kmem_cache_alloc_verify(cache, buf, KMEM_AV_CONSTRUCT);
  903. return (vm_offset_t)buf;
  904. }
  905. if (cpu_pool->array != NULL) {
  906. filled = kmem_cpu_pool_fill(cpu_pool, cache);
  907. if (!filled) {
  908. simple_unlock(&cpu_pool->lock);
  909. filled = kmem_cache_grow(cache);
  910. if (!filled)
  911. return 0;
  912. simple_lock(&cpu_pool->lock);
  913. }
  914. goto fast_alloc;
  915. }
  916. simple_unlock(&cpu_pool->lock);
  917. #endif /* SLAB_USE_CPU_POOLS */
  918. slab_alloc:
  919. simple_lock(&cache->lock);
  920. buf = kmem_cache_alloc_from_slab(cache);
  921. simple_unlock(&cache->lock);
  922. if (buf == NULL) {
  923. filled = kmem_cache_grow(cache);
  924. if (!filled)
  925. return 0;
  926. goto slab_alloc;
  927. }
  928. if (cache->flags & KMEM_CF_VERIFY)
  929. kmem_cache_alloc_verify(cache, buf, KMEM_AV_NOCONSTRUCT);
  930. if (cache->ctor != NULL)
  931. cache->ctor(buf);
  932. return (vm_offset_t)buf;
  933. }
  934. static void kmem_cache_free_verify(struct kmem_cache *cache, void *buf)
  935. {
  936. struct rbtree_node *node;
  937. struct kmem_buftag *buftag;
  938. struct kmem_slab *slab;
  939. union kmem_bufctl *bufctl;
  940. unsigned char *redzone_byte;
  941. unsigned long slabend;
  942. assert(cache->flags & KMEM_CF_USE_TREE);
  943. simple_lock(&cache->lock);
  944. node = rbtree_lookup_nearest(&cache->active_slabs, buf,
  945. kmem_slab_cmp_lookup, RBTREE_LEFT);
  946. simple_unlock(&cache->lock);
  947. if (node == NULL)
  948. kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL);
  949. slab = rbtree_entry(node, struct kmem_slab, tree_node);
  950. slabend = P2ALIGN((unsigned long)slab->addr + cache->slab_size, PAGE_SIZE);
  951. if ((unsigned long)buf >= slabend)
  952. kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL);
  953. if ((((unsigned long)buf - (unsigned long)slab->addr) % cache->buf_size)
  954. != 0)
  955. kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL);
  956. /*
  957. * As the buffer address is valid, accessing its buftag is safe.
  958. */
  959. buftag = kmem_buf_to_buftag(buf, cache);
  960. if (buftag->state != KMEM_BUFTAG_ALLOC) {
  961. if (buftag->state == KMEM_BUFTAG_FREE)
  962. kmem_cache_error(cache, buf, KMEM_ERR_DOUBLEFREE, NULL);
  963. else
  964. kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag);
  965. }
  966. redzone_byte = buf + cache->obj_size;
  967. bufctl = kmem_buf_to_bufctl(buf, cache);
  968. while (redzone_byte < (unsigned char *)bufctl) {
  969. if (*redzone_byte != KMEM_REDZONE_BYTE)
  970. kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte);
  971. redzone_byte++;
  972. }
  973. if (bufctl->redzone != KMEM_REDZONE_WORD) {
  974. unsigned long word;
  975. word = KMEM_REDZONE_WORD;
  976. redzone_byte = kmem_buf_verify_bytes(&bufctl->redzone, &word,
  977. sizeof(bufctl->redzone));
  978. kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte);
  979. }
  980. kmem_buf_fill(buf, KMEM_FREE_PATTERN, cache->bufctl_dist);
  981. buftag->state = KMEM_BUFTAG_FREE;
  982. }
  983. void kmem_cache_free(struct kmem_cache *cache, vm_offset_t obj)
  984. {
  985. #if SLAB_USE_CPU_POOLS
  986. struct kmem_cpu_pool *cpu_pool;
  987. void **array;
  988. cpu_pool = kmem_cpu_pool_get(cache);
  989. if (cpu_pool->flags & KMEM_CF_VERIFY) {
  990. #else /* SLAB_USE_CPU_POOLS */
  991. if (cache->flags & KMEM_CF_VERIFY) {
  992. #endif /* SLAB_USE_CPU_POOLS */
  993. kmem_cache_free_verify(cache, (void *)obj);
  994. }
  995. #if SLAB_USE_CPU_POOLS
  996. if (cpu_pool->flags & KMEM_CF_NO_CPU_POOL)
  997. goto slab_free;
  998. simple_lock(&cpu_pool->lock);
  999. fast_free:
  1000. if (likely(cpu_pool->nr_objs < cpu_pool->size)) {
  1001. kmem_cpu_pool_push(cpu_pool, (void *)obj);
  1002. simple_unlock(&cpu_pool->lock);
  1003. return;
  1004. }
  1005. if (cpu_pool->array != NULL) {
  1006. kmem_cpu_pool_drain(cpu_pool, cache);
  1007. goto fast_free;
  1008. }
  1009. simple_unlock(&cpu_pool->lock);
  1010. array = (void *)kmem_cache_alloc(cache->cpu_pool_type->array_cache);
  1011. if (array != NULL) {
  1012. simple_lock(&cpu_pool->lock);
  1013. /*
  1014. * Another thread may have built the CPU pool while the lock was
  1015. * dropped.
  1016. */
  1017. if (cpu_pool->array != NULL) {
  1018. simple_unlock(&cpu_pool->lock);
  1019. kmem_cache_free(cache->cpu_pool_type->array_cache,
  1020. (vm_offset_t)array);
  1021. simple_lock(&cpu_pool->lock);
  1022. goto fast_free;
  1023. }
  1024. kmem_cpu_pool_build(cpu_pool, cache, array);
  1025. goto fast_free;
  1026. }
  1027. slab_free:
  1028. #endif /* SLAB_USE_CPU_POOLS */
  1029. simple_lock(&cache->lock);
  1030. kmem_cache_free_to_slab(cache, (void *)obj);
  1031. simple_unlock(&cache->lock);
  1032. }
  1033. void slab_collect(void)
  1034. {
  1035. struct kmem_cache *cache;
  1036. struct kmem_slab *slab;
  1037. struct list dead_slabs;
  1038. if (elapsed_ticks <= (kmem_gc_last_tick + KMEM_GC_INTERVAL))
  1039. return;
  1040. kmem_gc_last_tick = elapsed_ticks;
  1041. list_init(&dead_slabs);
  1042. simple_lock(&kmem_cache_list_lock);
  1043. list_for_each_entry(&kmem_cache_list, cache, node)
  1044. kmem_cache_reap(cache, &dead_slabs);
  1045. simple_unlock(&kmem_cache_list_lock);
  1046. while (!list_empty(&dead_slabs)) {
  1047. slab = list_first_entry(&dead_slabs, struct kmem_slab, list_node);
  1048. list_remove(&slab->list_node);
  1049. kmem_slab_destroy(slab, slab->cache);
  1050. }
  1051. }
  1052. void slab_bootstrap(void)
  1053. {
  1054. /* Make sure a bufctl can always be stored in a buffer */
  1055. assert(sizeof(union kmem_bufctl) <= KMEM_ALIGN_MIN);
  1056. list_init(&kmem_cache_list);
  1057. simple_lock_init(&kmem_cache_list_lock);
  1058. }
  1059. void slab_init(void)
  1060. {
  1061. #if SLAB_USE_CPU_POOLS
  1062. struct kmem_cpu_pool_type *cpu_pool_type;
  1063. char name[KMEM_CACHE_NAME_SIZE];
  1064. size_t i, size;
  1065. #endif /* SLAB_USE_CPU_POOLS */
  1066. #if SLAB_USE_CPU_POOLS
  1067. for (i = 0; i < ARRAY_SIZE(kmem_cpu_pool_types); i++) {
  1068. cpu_pool_type = &kmem_cpu_pool_types[i];
  1069. cpu_pool_type->array_cache = &kmem_cpu_array_caches[i];
  1070. sprintf(name, "kmem_cpu_array_%d", cpu_pool_type->array_size);
  1071. size = sizeof(void *) * cpu_pool_type->array_size;
  1072. kmem_cache_init(cpu_pool_type->array_cache, name, size,
  1073. cpu_pool_type->array_align, NULL, 0);
  1074. }
  1075. #endif /* SLAB_USE_CPU_POOLS */
  1076. /*
  1077. * Prevent off slab data for the slab cache to avoid infinite recursion.
  1078. */
  1079. kmem_cache_init(&kmem_slab_cache, "kmem_slab", sizeof(struct kmem_slab),
  1080. 0, NULL, KMEM_CACHE_NOOFFSLAB);
  1081. }
  1082. void kalloc_init(void)
  1083. {
  1084. char name[KMEM_CACHE_NAME_SIZE];
  1085. size_t i, size;
  1086. size = 1 << KALLOC_FIRST_SHIFT;
  1087. for (i = 0; i < ARRAY_SIZE(kalloc_caches); i++) {
  1088. sprintf(name, "kalloc_%lu", size);
  1089. kmem_cache_init(&kalloc_caches[i], name, size, 0, NULL, 0);
  1090. size <<= 1;
  1091. }
  1092. }
  1093. /*
  1094. * Return the kalloc cache index matching the given allocation size, which
  1095. * must be strictly greater than 0.
  1096. */
  1097. static inline size_t kalloc_get_index(unsigned long size)
  1098. {
  1099. assert(size != 0);
  1100. size = (size - 1) >> KALLOC_FIRST_SHIFT;
  1101. if (size == 0)
  1102. return 0;
  1103. else
  1104. return (sizeof(long) * 8) - __builtin_clzl(size);
  1105. }
  1106. static void kalloc_verify(struct kmem_cache *cache, void *buf, size_t size)
  1107. {
  1108. size_t redzone_size;
  1109. void *redzone;
  1110. assert(size <= cache->obj_size);
  1111. redzone = buf + size;
  1112. redzone_size = cache->obj_size - size;
  1113. memset(redzone, KMEM_REDZONE_BYTE, redzone_size);
  1114. }
  1115. vm_offset_t kalloc(vm_size_t size)
  1116. {
  1117. size_t index;
  1118. void *buf;
  1119. if (size == 0)
  1120. return 0;
  1121. index = kalloc_get_index(size);
  1122. if (index < ARRAY_SIZE(kalloc_caches)) {
  1123. struct kmem_cache *cache;
  1124. cache = &kalloc_caches[index];
  1125. buf = (void *)kmem_cache_alloc(cache);
  1126. if ((buf != 0) && (cache->flags & KMEM_CF_VERIFY))
  1127. kalloc_verify(cache, buf, size);
  1128. } else {
  1129. buf = (void *)kmem_pagealloc_virtual(size, 0);
  1130. }
  1131. return (vm_offset_t)buf;
  1132. }
  1133. static void kfree_verify(struct kmem_cache *cache, void *buf, size_t size)
  1134. {
  1135. unsigned char *redzone_byte, *redzone_end;
  1136. assert(size <= cache->obj_size);
  1137. redzone_byte = buf + size;
  1138. redzone_end = buf + cache->obj_size;
  1139. while (redzone_byte < redzone_end) {
  1140. if (*redzone_byte != KMEM_REDZONE_BYTE)
  1141. kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte);
  1142. redzone_byte++;
  1143. }
  1144. }
  1145. void kfree(vm_offset_t data, vm_size_t size)
  1146. {
  1147. size_t index;
  1148. if ((data == 0) || (size == 0))
  1149. return;
  1150. index = kalloc_get_index(size);
  1151. if (index < ARRAY_SIZE(kalloc_caches)) {
  1152. struct kmem_cache *cache;
  1153. cache = &kalloc_caches[index];
  1154. if (cache->flags & KMEM_CF_VERIFY)
  1155. kfree_verify(cache, (void *)data, size);
  1156. kmem_cache_free(cache, data);
  1157. } else {
  1158. kmem_pagefree_virtual(data, size);
  1159. }
  1160. }
  1161. static void _slab_info(int (printx)(const char *fmt, ...))
  1162. {
  1163. struct kmem_cache *cache;
  1164. vm_size_t mem_usage, mem_reclaimable, mem_total, mem_total_reclaimable;
  1165. mem_total = 0;
  1166. mem_total_reclaimable = 0;
  1167. printx("cache obj slab bufs objs bufs"
  1168. " total reclaimable\n"
  1169. "name flags size size /slab usage count"
  1170. " memory memory\n");
  1171. simple_lock(&kmem_cache_list_lock);
  1172. list_for_each_entry(&kmem_cache_list, cache, node) {
  1173. simple_lock(&cache->lock);
  1174. mem_usage = (cache->nr_slabs * cache->slab_size) >> 10;
  1175. mem_reclaimable = (cache->nr_free_slabs * cache->slab_size) >> 10;
  1176. printx("%-20s %04x %7lu %3luk %4lu %6lu %6lu %7uk %10uk\n",
  1177. cache->name, cache->flags, cache->obj_size,
  1178. cache->slab_size >> 10,
  1179. cache->bufs_per_slab, cache->nr_objs, cache->nr_bufs,
  1180. mem_usage, mem_reclaimable);
  1181. simple_unlock(&cache->lock);
  1182. mem_total += mem_usage;
  1183. mem_total_reclaimable += mem_reclaimable;
  1184. }
  1185. simple_unlock(&kmem_cache_list_lock);
  1186. printx("total: %uk, reclaimable: %uk\n",
  1187. mem_total, mem_total_reclaimable);
  1188. }
  1189. void slab_info(void)
  1190. {
  1191. _slab_info(printf);
  1192. }
  1193. #if MACH_KDB
  1194. #include <ddb/db_output.h>
  1195. void db_show_slab_info(void)
  1196. {
  1197. _slab_info(db_printf);
  1198. }
  1199. #endif /* MACH_KDB */
  1200. #if MACH_DEBUG
  1201. kern_return_t host_slab_info(host_t host, cache_info_array_t *infop,
  1202. unsigned int *infoCntp)
  1203. {
  1204. struct kmem_cache *cache;
  1205. cache_info_t *info;
  1206. unsigned int i, nr_caches;
  1207. vm_size_t info_size;
  1208. kern_return_t kr;
  1209. if (host == HOST_NULL)
  1210. return KERN_INVALID_HOST;
  1211. /* Assume the cache list is mostly unaltered once the kernel is ready */
  1212. retry:
  1213. /* Harmless unsynchronized access, real value checked later */
  1214. nr_caches = kmem_nr_caches;
  1215. info_size = nr_caches * sizeof(*info);
  1216. info = (cache_info_t *)kalloc(info_size);
  1217. if (info == NULL)
  1218. return KERN_RESOURCE_SHORTAGE;
  1219. i = 0;
  1220. simple_lock(&kmem_cache_list_lock);
  1221. if (nr_caches != kmem_nr_caches) {
  1222. simple_unlock(&kmem_cache_list_lock);
  1223. kfree((vm_offset_t)info, info_size);
  1224. goto retry;
  1225. }
  1226. list_for_each_entry(&kmem_cache_list, cache, node) {
  1227. simple_lock(&cache->lock);
  1228. info[i].flags = cache->flags;
  1229. #if SLAB_USE_CPU_POOLS
  1230. info[i].cpu_pool_size = cache->cpu_pool_type->array_size;
  1231. #else /* SLAB_USE_CPU_POOLS */
  1232. info[i].cpu_pool_size = 0;
  1233. #endif /* SLAB_USE_CPU_POOLS */
  1234. info[i].obj_size = cache->obj_size;
  1235. info[i].align = cache->align;
  1236. info[i].buf_size = cache->buf_size;
  1237. info[i].slab_size = cache->slab_size;
  1238. info[i].bufs_per_slab = cache->bufs_per_slab;
  1239. info[i].nr_objs = cache->nr_objs;
  1240. info[i].nr_bufs = cache->nr_bufs;
  1241. info[i].nr_slabs = cache->nr_slabs;
  1242. info[i].nr_free_slabs = cache->nr_free_slabs;
  1243. strncpy(info[i].name, cache->name, sizeof(info[i].name));
  1244. info[i].name[sizeof(info[i].name) - 1] = '\0';
  1245. simple_unlock(&cache->lock);
  1246. i++;
  1247. }
  1248. simple_unlock(&kmem_cache_list_lock);
  1249. if (nr_caches <= *infoCntp) {
  1250. memcpy(*infop, info, info_size);
  1251. } else {
  1252. vm_offset_t info_addr;
  1253. vm_size_t total_size;
  1254. vm_map_copy_t copy;
  1255. kr = kmem_alloc_pageable(ipc_kernel_map, &info_addr, info_size);
  1256. if (kr != KERN_SUCCESS)
  1257. goto out;
  1258. memcpy((char *)info_addr, info, info_size);
  1259. total_size = round_page(info_size);
  1260. if (info_size < total_size)
  1261. memset((char *)(info_addr + info_size),
  1262. 0, total_size - info_size);
  1263. kr = vm_map_copyin(ipc_kernel_map, info_addr, info_size, TRUE, &copy);
  1264. assert(kr == KERN_SUCCESS);
  1265. *infop = (cache_info_t *)copy;
  1266. }
  1267. *infoCntp = nr_caches;
  1268. kr = KERN_SUCCESS;
  1269. out:
  1270. kfree((vm_offset_t)info, info_size);
  1271. return kr;
  1272. }
  1273. #endif /* MACH_DEBUG */