vm_page.c 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089
  1. /*
  2. * Copyright (c) 2010-2014 Richard Braun.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. *
  18. * This implementation uses the binary buddy system to manage its heap.
  19. * Descriptions of the buddy system can be found in the following works :
  20. * - "UNIX Internals: The New Frontiers", by Uresh Vahalia.
  21. * - "Dynamic Storage Allocation: A Survey and Critical Review",
  22. * by Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles.
  23. *
  24. * In addition, this allocator uses per-CPU pools of pages for order 0
  25. * (i.e. single page) allocations. These pools act as caches (but are named
  26. * differently to avoid confusion with CPU caches) that reduce contention on
  27. * multiprocessor systems. When a pool is empty and cannot provide a page,
  28. * it is filled by transferring multiple pages from the backend buddy system.
  29. * The symmetric case is handled likewise.
  30. *
  31. * TODO Limit number of dirty pages, block allocations above a top limit.
  32. */
  33. #include <string.h>
  34. #include <kern/assert.h>
  35. #include <kern/counters.h>
  36. #include <kern/cpu_number.h>
  37. #include <kern/debug.h>
  38. #include <kern/list.h>
  39. #include <kern/lock.h>
  40. #include <kern/macros.h>
  41. #include <kern/printf.h>
  42. #include <kern/thread.h>
  43. #include <mach/vm_param.h>
  44. #include <machine/pmap.h>
  45. #include <sys/types.h>
  46. #include <vm/memory_object.h>
  47. #include <vm/vm_page.h>
  48. #include <vm/vm_pageout.h>
  49. #define DEBUG 0
  50. #define __init
  51. #define __initdata
  52. #define __read_mostly
  53. #define thread_pin()
  54. #define thread_unpin()
  55. /*
  56. * Number of free block lists per segment.
  57. */
  58. #define VM_PAGE_NR_FREE_LISTS 11
  59. /*
  60. * The size of a CPU pool is computed by dividing the number of pages in its
  61. * containing segment by this value.
  62. */
  63. #define VM_PAGE_CPU_POOL_RATIO 1024
  64. /*
  65. * Maximum number of pages in a CPU pool.
  66. */
  67. #define VM_PAGE_CPU_POOL_MAX_SIZE 128
  68. /*
  69. * The transfer size of a CPU pool is computed by dividing the pool size by
  70. * this value.
  71. */
  72. #define VM_PAGE_CPU_POOL_TRANSFER_RATIO 2
  73. /*
  74. * Per-processor cache of pages.
  75. */
  76. struct vm_page_cpu_pool {
  77. simple_lock_data_t lock;
  78. int size;
  79. int transfer_size;
  80. int nr_pages;
  81. struct list pages;
  82. } __aligned(CPU_L1_SIZE);
  83. /*
  84. * Special order value for pages that aren't in a free list. Such pages are
  85. * either allocated, or part of a free block of pages but not the head page.
  86. */
  87. #define VM_PAGE_ORDER_UNLISTED ((unsigned short)-1)
  88. /*
  89. * Doubly-linked list of free blocks.
  90. */
  91. struct vm_page_free_list {
  92. unsigned long size;
  93. struct list blocks;
  94. };
  95. /*
  96. * XXX Because of a potential deadlock involving the default pager (see
  97. * vm_map_lock()), it's currently impossible to reliably determine the
  98. * minimum number of free pages required for successful pageout. Since
  99. * that process is dependent on the amount of physical memory, we scale
  100. * the minimum number of free pages from it, in the hope that memory
  101. * exhaustion happens as rarely as possible...
  102. */
  103. /*
  104. * Ratio used to compute the minimum number of pages in a segment.
  105. */
  106. #define VM_PAGE_SEG_THRESHOLD_MIN_NUM 5
  107. #define VM_PAGE_SEG_THRESHOLD_MIN_DENOM 100
  108. /*
  109. * Number of pages reserved for privileged allocations in a segment.
  110. */
  111. #define VM_PAGE_SEG_THRESHOLD_MIN 500
  112. /*
  113. * Ratio used to compute the threshold below which pageout is started.
  114. */
  115. #define VM_PAGE_SEG_THRESHOLD_LOW_NUM 6
  116. #define VM_PAGE_SEG_THRESHOLD_LOW_DENOM 100
  117. /*
  118. * Minimum value the low threshold can have for a segment.
  119. */
  120. #define VM_PAGE_SEG_THRESHOLD_LOW 600
  121. #if VM_PAGE_SEG_THRESHOLD_LOW <= VM_PAGE_SEG_THRESHOLD_MIN
  122. #error VM_PAGE_SEG_THRESHOLD_LOW invalid
  123. #endif /* VM_PAGE_SEG_THRESHOLD_LOW >= VM_PAGE_SEG_THRESHOLD_MIN */
  124. /*
  125. * Ratio used to compute the threshold above which pageout is stopped.
  126. */
  127. #define VM_PAGE_SEG_THRESHOLD_HIGH_NUM 10
  128. #define VM_PAGE_SEG_THRESHOLD_HIGH_DENOM 100
  129. /*
  130. * Minimum value the high threshold can have for a segment.
  131. */
  132. #define VM_PAGE_SEG_THRESHOLD_HIGH 1000
  133. #if VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW
  134. #error VM_PAGE_SEG_THRESHOLD_HIGH invalid
  135. #endif /* VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW */
  136. /*
  137. * Minimum number of pages allowed for a segment.
  138. */
  139. #define VM_PAGE_SEG_MIN_PAGES 2000
  140. #if VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH
  141. #error VM_PAGE_SEG_MIN_PAGES invalid
  142. #endif /* VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH */
  143. /*
  144. * Ratio used to compute the threshold of active pages beyond which
  145. * to refill the inactive queue.
  146. */
  147. #define VM_PAGE_HIGH_ACTIVE_PAGE_NUM 1
  148. #define VM_PAGE_HIGH_ACTIVE_PAGE_DENOM 3
  149. /*
  150. * Page cache queue.
  151. *
  152. * XXX The current implementation hardcodes a preference to evict external
  153. * pages first and keep internal ones as much as possible. This is because
  154. * the Hurd default pager implementation suffers from bugs that can easily
  155. * cause the system to freeze.
  156. */
  157. struct vm_page_queue {
  158. struct list internal_pages;
  159. struct list external_pages;
  160. };
  161. /*
  162. * Segment name buffer size.
  163. */
  164. #define VM_PAGE_NAME_SIZE 16
  165. /*
  166. * Segment of contiguous memory.
  167. *
  168. * XXX Per-segment locking is probably useless, since one or both of the
  169. * page queues lock and the free page queue lock is held on any access.
  170. * However it should first be made clear which lock protects access to
  171. * which members of a segment.
  172. */
  173. struct vm_page_seg {
  174. struct vm_page_cpu_pool cpu_pools[NCPUS];
  175. phys_addr_t start;
  176. phys_addr_t end;
  177. struct vm_page *pages;
  178. struct vm_page *pages_end;
  179. simple_lock_data_t lock;
  180. struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS];
  181. unsigned long nr_free_pages;
  182. /* Free memory thresholds */
  183. unsigned long min_free_pages; /* Privileged allocations only */
  184. unsigned long low_free_pages; /* Pageout daemon starts scanning */
  185. unsigned long high_free_pages; /* Pageout daemon stops scanning,
  186. unprivileged allocations resume */
  187. /* Page cache related data */
  188. struct vm_page_queue active_pages;
  189. unsigned long nr_active_pages;
  190. unsigned long high_active_pages;
  191. struct vm_page_queue inactive_pages;
  192. unsigned long nr_inactive_pages;
  193. };
  194. /*
  195. * Bootstrap information about a segment.
  196. */
  197. struct vm_page_boot_seg {
  198. phys_addr_t start;
  199. phys_addr_t end;
  200. boolean_t heap_present;
  201. phys_addr_t avail_start;
  202. phys_addr_t avail_end;
  203. };
  204. static int vm_page_is_ready __read_mostly;
  205. /*
  206. * Segment table.
  207. *
  208. * The system supports a maximum of 4 segments :
  209. * - DMA: suitable for DMA
  210. * - DMA32: suitable for DMA when devices support 32-bits addressing
  211. * - DIRECTMAP: direct physical mapping, allows direct access from
  212. * the kernel with a simple offset translation
  213. * - HIGHMEM: must be mapped before it can be accessed
  214. *
  215. * Segments are ordered by priority, 0 being the lowest priority. Their
  216. * relative priorities are DMA < DMA32 < DIRECTMAP < HIGHMEM. Some segments
  217. * may actually be aliases for others, e.g. if DMA is always possible from
  218. * the direct physical mapping, DMA and DMA32 are aliases for DIRECTMAP,
  219. * in which case the segment table contains DIRECTMAP and HIGHMEM only.
  220. */
  221. static struct vm_page_seg vm_page_segs[VM_PAGE_MAX_SEGS];
  222. /*
  223. * Bootstrap segment table.
  224. */
  225. static struct vm_page_boot_seg vm_page_boot_segs[VM_PAGE_MAX_SEGS] __initdata;
  226. /*
  227. * Number of loaded segments.
  228. */
  229. static unsigned int vm_page_segs_size __read_mostly;
  230. /*
  231. * If true, unprivileged allocations are blocked, disregarding any other
  232. * condition.
  233. *
  234. * This variable is also used to resume clients once pages are available.
  235. *
  236. * The free page queue lock must be held when accessing this variable.
  237. */
  238. static boolean_t vm_page_alloc_paused;
  239. static void __init
  240. vm_page_init_pa(struct vm_page *page, unsigned short seg_index, phys_addr_t pa)
  241. {
  242. memset(page, 0, sizeof(*page));
  243. vm_page_init(page); /* vm_resident members */
  244. page->type = VM_PT_RESERVED;
  245. page->seg_index = seg_index;
  246. page->order = VM_PAGE_ORDER_UNLISTED;
  247. page->priv = NULL;
  248. page->phys_addr = pa;
  249. }
  250. void
  251. vm_page_set_type(struct vm_page *page, unsigned int order, unsigned short type)
  252. {
  253. unsigned int i, nr_pages;
  254. nr_pages = 1 << order;
  255. for (i = 0; i < nr_pages; i++)
  256. page[i].type = type;
  257. }
  258. static boolean_t
  259. vm_page_pageable(const struct vm_page *page)
  260. {
  261. return (page->object != NULL)
  262. && (page->wire_count == 0)
  263. && (page->active || page->inactive);
  264. }
  265. static boolean_t
  266. vm_page_can_move(const struct vm_page *page)
  267. {
  268. /*
  269. * This function is called on pages pulled from the page queues,
  270. * implying they're pageable, which is why the wire count isn't
  271. * checked here.
  272. */
  273. return !page->busy
  274. && !page->wanted
  275. && !page->absent
  276. && page->object->alive;
  277. }
  278. static void
  279. vm_page_remove_mappings(struct vm_page *page)
  280. {
  281. page->busy = TRUE;
  282. pmap_page_protect(page->phys_addr, VM_PROT_NONE);
  283. if (!page->dirty) {
  284. page->dirty = pmap_is_modified(page->phys_addr);
  285. }
  286. }
  287. static void __init
  288. vm_page_free_list_init(struct vm_page_free_list *free_list)
  289. {
  290. free_list->size = 0;
  291. list_init(&free_list->blocks);
  292. }
  293. static inline void
  294. vm_page_free_list_insert(struct vm_page_free_list *free_list,
  295. struct vm_page *page)
  296. {
  297. assert(page->order == VM_PAGE_ORDER_UNLISTED);
  298. free_list->size++;
  299. list_insert_head(&free_list->blocks, &page->node);
  300. }
  301. static inline void
  302. vm_page_free_list_remove(struct vm_page_free_list *free_list,
  303. struct vm_page *page)
  304. {
  305. assert(page->order != VM_PAGE_ORDER_UNLISTED);
  306. free_list->size--;
  307. list_remove(&page->node);
  308. }
  309. static struct vm_page *
  310. vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, unsigned int order)
  311. {
  312. struct vm_page_free_list *free_list = free_list;
  313. struct vm_page *page, *buddy;
  314. unsigned int i;
  315. assert(order < VM_PAGE_NR_FREE_LISTS);
  316. if (vm_page_alloc_paused && current_thread()
  317. && !current_thread()->vm_privilege) {
  318. return NULL;
  319. } else if (seg->nr_free_pages <= seg->low_free_pages) {
  320. vm_pageout_start();
  321. if ((seg->nr_free_pages <= seg->min_free_pages)
  322. && current_thread() && !current_thread()->vm_privilege) {
  323. vm_page_alloc_paused = TRUE;
  324. return NULL;
  325. }
  326. }
  327. for (i = order; i < VM_PAGE_NR_FREE_LISTS; i++) {
  328. free_list = &seg->free_lists[i];
  329. if (free_list->size != 0)
  330. break;
  331. }
  332. if (i == VM_PAGE_NR_FREE_LISTS)
  333. return NULL;
  334. page = list_first_entry(&free_list->blocks, struct vm_page, node);
  335. vm_page_free_list_remove(free_list, page);
  336. page->order = VM_PAGE_ORDER_UNLISTED;
  337. while (i > order) {
  338. i--;
  339. buddy = &page[1 << i];
  340. vm_page_free_list_insert(&seg->free_lists[i], buddy);
  341. buddy->order = i;
  342. }
  343. seg->nr_free_pages -= (1 << order);
  344. if (seg->nr_free_pages < seg->min_free_pages) {
  345. vm_page_alloc_paused = TRUE;
  346. }
  347. return page;
  348. }
  349. static void
  350. vm_page_seg_free_to_buddy(struct vm_page_seg *seg, struct vm_page *page,
  351. unsigned int order)
  352. {
  353. struct vm_page *buddy;
  354. phys_addr_t pa, buddy_pa;
  355. unsigned int nr_pages;
  356. assert(page >= seg->pages);
  357. assert(page < seg->pages_end);
  358. assert(page->order == VM_PAGE_ORDER_UNLISTED);
  359. assert(order < VM_PAGE_NR_FREE_LISTS);
  360. nr_pages = (1 << order);
  361. pa = page->phys_addr;
  362. while (order < (VM_PAGE_NR_FREE_LISTS - 1)) {
  363. buddy_pa = pa ^ vm_page_ptoa(1 << order);
  364. if ((buddy_pa < seg->start) || (buddy_pa >= seg->end))
  365. break;
  366. buddy = &seg->pages[vm_page_atop(buddy_pa - seg->start)];
  367. if (buddy->order != order)
  368. break;
  369. vm_page_free_list_remove(&seg->free_lists[order], buddy);
  370. buddy->order = VM_PAGE_ORDER_UNLISTED;
  371. order++;
  372. pa &= -vm_page_ptoa(1 << order);
  373. page = &seg->pages[vm_page_atop(pa - seg->start)];
  374. }
  375. vm_page_free_list_insert(&seg->free_lists[order], page);
  376. page->order = order;
  377. seg->nr_free_pages += nr_pages;
  378. }
  379. static void __init
  380. vm_page_cpu_pool_init(struct vm_page_cpu_pool *cpu_pool, int size)
  381. {
  382. simple_lock_init(&cpu_pool->lock);
  383. cpu_pool->size = size;
  384. cpu_pool->transfer_size = (size + VM_PAGE_CPU_POOL_TRANSFER_RATIO - 1)
  385. / VM_PAGE_CPU_POOL_TRANSFER_RATIO;
  386. cpu_pool->nr_pages = 0;
  387. list_init(&cpu_pool->pages);
  388. }
  389. static inline struct vm_page_cpu_pool *
  390. vm_page_cpu_pool_get(struct vm_page_seg *seg)
  391. {
  392. return &seg->cpu_pools[cpu_number()];
  393. }
  394. static inline struct vm_page *
  395. vm_page_cpu_pool_pop(struct vm_page_cpu_pool *cpu_pool)
  396. {
  397. struct vm_page *page;
  398. assert(cpu_pool->nr_pages != 0);
  399. cpu_pool->nr_pages--;
  400. page = list_first_entry(&cpu_pool->pages, struct vm_page, node);
  401. list_remove(&page->node);
  402. return page;
  403. }
  404. static inline void
  405. vm_page_cpu_pool_push(struct vm_page_cpu_pool *cpu_pool, struct vm_page *page)
  406. {
  407. assert(cpu_pool->nr_pages < cpu_pool->size);
  408. cpu_pool->nr_pages++;
  409. list_insert_head(&cpu_pool->pages, &page->node);
  410. }
  411. static int
  412. vm_page_cpu_pool_fill(struct vm_page_cpu_pool *cpu_pool,
  413. struct vm_page_seg *seg)
  414. {
  415. struct vm_page *page;
  416. int i;
  417. assert(cpu_pool->nr_pages == 0);
  418. simple_lock(&seg->lock);
  419. for (i = 0; i < cpu_pool->transfer_size; i++) {
  420. page = vm_page_seg_alloc_from_buddy(seg, 0);
  421. if (page == NULL)
  422. break;
  423. vm_page_cpu_pool_push(cpu_pool, page);
  424. }
  425. simple_unlock(&seg->lock);
  426. return i;
  427. }
  428. static void
  429. vm_page_cpu_pool_drain(struct vm_page_cpu_pool *cpu_pool,
  430. struct vm_page_seg *seg)
  431. {
  432. struct vm_page *page;
  433. int i;
  434. assert(cpu_pool->nr_pages == cpu_pool->size);
  435. simple_lock(&seg->lock);
  436. for (i = cpu_pool->transfer_size; i > 0; i--) {
  437. page = vm_page_cpu_pool_pop(cpu_pool);
  438. vm_page_seg_free_to_buddy(seg, page, 0);
  439. }
  440. simple_unlock(&seg->lock);
  441. }
  442. static void
  443. vm_page_queue_init(struct vm_page_queue *queue)
  444. {
  445. list_init(&queue->internal_pages);
  446. list_init(&queue->external_pages);
  447. }
  448. static void
  449. vm_page_queue_push(struct vm_page_queue *queue, struct vm_page *page)
  450. {
  451. if (page->external) {
  452. list_insert_tail(&queue->external_pages, &page->node);
  453. } else {
  454. list_insert_tail(&queue->internal_pages, &page->node);
  455. }
  456. }
  457. static void
  458. vm_page_queue_remove(struct vm_page_queue *queue, struct vm_page *page)
  459. {
  460. (void)queue;
  461. list_remove(&page->node);
  462. }
  463. static struct vm_page *
  464. vm_page_queue_first(struct vm_page_queue *queue, boolean_t external_only)
  465. {
  466. struct vm_page *page;
  467. if (!list_empty(&queue->external_pages)) {
  468. page = list_first_entry(&queue->external_pages, struct vm_page, node);
  469. return page;
  470. }
  471. if (!external_only && !list_empty(&queue->internal_pages)) {
  472. page = list_first_entry(&queue->internal_pages, struct vm_page, node);
  473. return page;
  474. }
  475. return NULL;
  476. }
  477. static struct vm_page_seg *
  478. vm_page_seg_get(unsigned short index)
  479. {
  480. assert(index < vm_page_segs_size);
  481. return &vm_page_segs[index];
  482. }
  483. static unsigned int
  484. vm_page_seg_index(const struct vm_page_seg *seg)
  485. {
  486. unsigned int index;
  487. index = seg - vm_page_segs;
  488. assert(index < vm_page_segs_size);
  489. return index;
  490. }
  491. static phys_addr_t __init
  492. vm_page_seg_size(struct vm_page_seg *seg)
  493. {
  494. return seg->end - seg->start;
  495. }
  496. static int __init
  497. vm_page_seg_compute_pool_size(struct vm_page_seg *seg)
  498. {
  499. phys_addr_t size;
  500. size = vm_page_atop(vm_page_seg_size(seg)) / VM_PAGE_CPU_POOL_RATIO;
  501. if (size == 0)
  502. size = 1;
  503. else if (size > VM_PAGE_CPU_POOL_MAX_SIZE)
  504. size = VM_PAGE_CPU_POOL_MAX_SIZE;
  505. return size;
  506. }
  507. static void __init
  508. vm_page_seg_compute_pageout_thresholds(struct vm_page_seg *seg)
  509. {
  510. unsigned long nr_pages;
  511. nr_pages = vm_page_atop(vm_page_seg_size(seg));
  512. if (nr_pages < VM_PAGE_SEG_MIN_PAGES) {
  513. panic("vm_page: segment too small");
  514. }
  515. seg->min_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_MIN_NUM
  516. / VM_PAGE_SEG_THRESHOLD_MIN_DENOM;
  517. if (seg->min_free_pages < VM_PAGE_SEG_THRESHOLD_MIN) {
  518. seg->min_free_pages = VM_PAGE_SEG_THRESHOLD_MIN;
  519. }
  520. seg->low_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_LOW_NUM
  521. / VM_PAGE_SEG_THRESHOLD_LOW_DENOM;
  522. if (seg->low_free_pages < VM_PAGE_SEG_THRESHOLD_LOW) {
  523. seg->low_free_pages = VM_PAGE_SEG_THRESHOLD_LOW;
  524. }
  525. seg->high_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_HIGH_NUM
  526. / VM_PAGE_SEG_THRESHOLD_HIGH_DENOM;
  527. if (seg->high_free_pages < VM_PAGE_SEG_THRESHOLD_HIGH) {
  528. seg->high_free_pages = VM_PAGE_SEG_THRESHOLD_HIGH;
  529. }
  530. }
  531. static void __init
  532. vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end,
  533. struct vm_page *pages)
  534. {
  535. phys_addr_t pa;
  536. int pool_size;
  537. unsigned int i;
  538. seg->start = start;
  539. seg->end = end;
  540. pool_size = vm_page_seg_compute_pool_size(seg);
  541. for (i = 0; i < ARRAY_SIZE(seg->cpu_pools); i++)
  542. vm_page_cpu_pool_init(&seg->cpu_pools[i], pool_size);
  543. seg->pages = pages;
  544. seg->pages_end = pages + vm_page_atop(vm_page_seg_size(seg));
  545. simple_lock_init(&seg->lock);
  546. for (i = 0; i < ARRAY_SIZE(seg->free_lists); i++)
  547. vm_page_free_list_init(&seg->free_lists[i]);
  548. seg->nr_free_pages = 0;
  549. vm_page_seg_compute_pageout_thresholds(seg);
  550. vm_page_queue_init(&seg->active_pages);
  551. seg->nr_active_pages = 0;
  552. vm_page_queue_init(&seg->inactive_pages);
  553. seg->nr_inactive_pages = 0;
  554. i = vm_page_seg_index(seg);
  555. for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE)
  556. vm_page_init_pa(&pages[vm_page_atop(pa - seg->start)], i, pa);
  557. }
  558. static struct vm_page *
  559. vm_page_seg_alloc(struct vm_page_seg *seg, unsigned int order,
  560. unsigned short type)
  561. {
  562. struct vm_page_cpu_pool *cpu_pool;
  563. struct vm_page *page;
  564. int filled;
  565. assert(order < VM_PAGE_NR_FREE_LISTS);
  566. if (order == 0) {
  567. thread_pin();
  568. cpu_pool = vm_page_cpu_pool_get(seg);
  569. simple_lock(&cpu_pool->lock);
  570. if (cpu_pool->nr_pages == 0) {
  571. filled = vm_page_cpu_pool_fill(cpu_pool, seg);
  572. if (!filled) {
  573. simple_unlock(&cpu_pool->lock);
  574. thread_unpin();
  575. return NULL;
  576. }
  577. }
  578. page = vm_page_cpu_pool_pop(cpu_pool);
  579. simple_unlock(&cpu_pool->lock);
  580. thread_unpin();
  581. } else {
  582. simple_lock(&seg->lock);
  583. page = vm_page_seg_alloc_from_buddy(seg, order);
  584. simple_unlock(&seg->lock);
  585. if (page == NULL)
  586. return NULL;
  587. }
  588. assert(page->type == VM_PT_FREE);
  589. vm_page_set_type(page, order, type);
  590. return page;
  591. }
  592. static void
  593. vm_page_seg_free(struct vm_page_seg *seg, struct vm_page *page,
  594. unsigned int order)
  595. {
  596. struct vm_page_cpu_pool *cpu_pool;
  597. assert(page->type != VM_PT_FREE);
  598. assert(order < VM_PAGE_NR_FREE_LISTS);
  599. vm_page_set_type(page, order, VM_PT_FREE);
  600. if (order == 0) {
  601. thread_pin();
  602. cpu_pool = vm_page_cpu_pool_get(seg);
  603. simple_lock(&cpu_pool->lock);
  604. if (cpu_pool->nr_pages == cpu_pool->size)
  605. vm_page_cpu_pool_drain(cpu_pool, seg);
  606. vm_page_cpu_pool_push(cpu_pool, page);
  607. simple_unlock(&cpu_pool->lock);
  608. thread_unpin();
  609. } else {
  610. simple_lock(&seg->lock);
  611. vm_page_seg_free_to_buddy(seg, page, order);
  612. simple_unlock(&seg->lock);
  613. }
  614. }
  615. static void
  616. vm_page_seg_add_active_page(struct vm_page_seg *seg, struct vm_page *page)
  617. {
  618. assert(page->object != NULL);
  619. assert(page->seg_index == vm_page_seg_index(seg));
  620. assert(page->type != VM_PT_FREE);
  621. assert(page->order == VM_PAGE_ORDER_UNLISTED);
  622. assert(!page->free && !page->active && !page->inactive);
  623. page->active = TRUE;
  624. page->reference = TRUE;
  625. vm_page_queue_push(&seg->active_pages, page);
  626. seg->nr_active_pages++;
  627. vm_page_active_count++;
  628. }
  629. static void
  630. vm_page_seg_remove_active_page(struct vm_page_seg *seg, struct vm_page *page)
  631. {
  632. assert(page->object != NULL);
  633. assert(page->seg_index == vm_page_seg_index(seg));
  634. assert(page->type != VM_PT_FREE);
  635. assert(page->order == VM_PAGE_ORDER_UNLISTED);
  636. assert(!page->free && page->active && !page->inactive);
  637. page->active = FALSE;
  638. vm_page_queue_remove(&seg->active_pages, page);
  639. seg->nr_active_pages--;
  640. vm_page_active_count--;
  641. }
  642. static void
  643. vm_page_seg_add_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
  644. {
  645. assert(page->object != NULL);
  646. assert(page->seg_index == vm_page_seg_index(seg));
  647. assert(page->type != VM_PT_FREE);
  648. assert(page->order == VM_PAGE_ORDER_UNLISTED);
  649. assert(!page->free && !page->active && !page->inactive);
  650. page->inactive = TRUE;
  651. vm_page_queue_push(&seg->inactive_pages, page);
  652. seg->nr_inactive_pages++;
  653. vm_page_inactive_count++;
  654. }
  655. static void
  656. vm_page_seg_remove_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
  657. {
  658. assert(page->object != NULL);
  659. assert(page->seg_index == vm_page_seg_index(seg));
  660. assert(page->type != VM_PT_FREE);
  661. assert(page->order == VM_PAGE_ORDER_UNLISTED);
  662. assert(!page->free && !page->active && page->inactive);
  663. page->inactive = FALSE;
  664. vm_page_queue_remove(&seg->inactive_pages, page);
  665. seg->nr_inactive_pages--;
  666. vm_page_inactive_count--;
  667. }
  668. /*
  669. * Attempt to pull an active page.
  670. *
  671. * If successful, the object containing the page is locked.
  672. */
  673. static struct vm_page *
  674. vm_page_seg_pull_active_page(struct vm_page_seg *seg, boolean_t external_only)
  675. {
  676. struct vm_page *page, *first;
  677. boolean_t locked;
  678. first = NULL;
  679. for (;;) {
  680. page = vm_page_queue_first(&seg->active_pages, external_only);
  681. if (page == NULL) {
  682. break;
  683. } else if (first == NULL) {
  684. first = page;
  685. } else if (first == page) {
  686. break;
  687. }
  688. vm_page_seg_remove_active_page(seg, page);
  689. locked = vm_object_lock_try(page->object);
  690. if (!locked) {
  691. vm_page_seg_add_active_page(seg, page);
  692. continue;
  693. }
  694. if (!vm_page_can_move(page)) {
  695. vm_page_seg_add_active_page(seg, page);
  696. vm_object_unlock(page->object);
  697. continue;
  698. }
  699. return page;
  700. }
  701. return NULL;
  702. }
  703. /*
  704. * Attempt to pull an inactive page.
  705. *
  706. * If successful, the object containing the page is locked.
  707. *
  708. * XXX See vm_page_seg_pull_active_page (duplicated code).
  709. */
  710. static struct vm_page *
  711. vm_page_seg_pull_inactive_page(struct vm_page_seg *seg, boolean_t external_only)
  712. {
  713. struct vm_page *page, *first;
  714. boolean_t locked;
  715. first = NULL;
  716. for (;;) {
  717. page = vm_page_queue_first(&seg->inactive_pages, external_only);
  718. if (page == NULL) {
  719. break;
  720. } else if (first == NULL) {
  721. first = page;
  722. } else if (first == page) {
  723. break;
  724. }
  725. vm_page_seg_remove_inactive_page(seg, page);
  726. locked = vm_object_lock_try(page->object);
  727. if (!locked) {
  728. vm_page_seg_add_inactive_page(seg, page);
  729. continue;
  730. }
  731. if (!vm_page_can_move(page)) {
  732. vm_page_seg_add_inactive_page(seg, page);
  733. vm_object_unlock(page->object);
  734. continue;
  735. }
  736. return page;
  737. }
  738. return NULL;
  739. }
  740. /*
  741. * Attempt to pull a page cache page.
  742. *
  743. * If successful, the object containing the page is locked.
  744. */
  745. static struct vm_page *
  746. vm_page_seg_pull_cache_page(struct vm_page_seg *seg,
  747. boolean_t external_only,
  748. boolean_t *was_active)
  749. {
  750. struct vm_page *page;
  751. page = vm_page_seg_pull_inactive_page(seg, external_only);
  752. if (page != NULL) {
  753. *was_active = FALSE;
  754. return page;
  755. }
  756. page = vm_page_seg_pull_active_page(seg, external_only);
  757. if (page != NULL) {
  758. *was_active = TRUE;
  759. return page;
  760. }
  761. return NULL;
  762. }
  763. static boolean_t
  764. vm_page_seg_page_available(const struct vm_page_seg *seg)
  765. {
  766. return (seg->nr_free_pages > seg->high_free_pages);
  767. }
  768. static boolean_t
  769. vm_page_seg_usable(const struct vm_page_seg *seg)
  770. {
  771. if ((seg->nr_active_pages + seg->nr_inactive_pages) == 0) {
  772. /* Nothing to page out, assume segment is usable */
  773. return TRUE;
  774. }
  775. return (seg->nr_free_pages >= seg->high_free_pages);
  776. }
  777. static void
  778. vm_page_seg_double_lock(struct vm_page_seg *seg1, struct vm_page_seg *seg2)
  779. {
  780. assert(seg1 != seg2);
  781. if (seg1 < seg2) {
  782. simple_lock(&seg1->lock);
  783. simple_lock(&seg2->lock);
  784. } else {
  785. simple_lock(&seg2->lock);
  786. simple_lock(&seg1->lock);
  787. }
  788. }
  789. static void
  790. vm_page_seg_double_unlock(struct vm_page_seg *seg1, struct vm_page_seg *seg2)
  791. {
  792. simple_unlock(&seg1->lock);
  793. simple_unlock(&seg2->lock);
  794. }
  795. /*
  796. * Attempt to balance a segment by moving one page to another segment.
  797. *
  798. * Return TRUE if a page was actually moved.
  799. */
  800. static boolean_t
  801. vm_page_seg_balance_page(struct vm_page_seg *seg,
  802. struct vm_page_seg *remote_seg)
  803. {
  804. struct vm_page *src, *dest;
  805. vm_object_t object;
  806. vm_offset_t offset;
  807. boolean_t was_active;
  808. vm_page_lock_queues();
  809. simple_lock(&vm_page_queue_free_lock);
  810. vm_page_seg_double_lock(seg, remote_seg);
  811. if (vm_page_seg_usable(seg)
  812. || !vm_page_seg_page_available(remote_seg)) {
  813. goto error;
  814. }
  815. src = vm_page_seg_pull_cache_page(seg, FALSE, &was_active);
  816. if (src == NULL) {
  817. goto error;
  818. }
  819. assert(src->object != NULL);
  820. assert(!src->fictitious && !src->private);
  821. assert(src->wire_count == 0);
  822. assert(src->type != VM_PT_FREE);
  823. assert(src->order == VM_PAGE_ORDER_UNLISTED);
  824. dest = vm_page_seg_alloc_from_buddy(remote_seg, 0);
  825. assert(dest != NULL);
  826. vm_page_seg_double_unlock(seg, remote_seg);
  827. simple_unlock(&vm_page_queue_free_lock);
  828. if (!was_active && !src->reference && pmap_is_referenced(src->phys_addr)) {
  829. src->reference = TRUE;
  830. }
  831. object = src->object;
  832. offset = src->offset;
  833. vm_page_remove(src);
  834. vm_page_remove_mappings(src);
  835. vm_page_set_type(dest, 0, src->type);
  836. memcpy(&dest->vm_page_header, &src->vm_page_header,
  837. sizeof(*dest) - VM_PAGE_HEADER_SIZE);
  838. vm_page_copy(src, dest);
  839. if (!src->dirty) {
  840. pmap_clear_modify(dest->phys_addr);
  841. }
  842. dest->busy = FALSE;
  843. simple_lock(&vm_page_queue_free_lock);
  844. vm_page_init(src);
  845. src->free = TRUE;
  846. simple_lock(&seg->lock);
  847. vm_page_set_type(src, 0, VM_PT_FREE);
  848. vm_page_seg_free_to_buddy(seg, src, 0);
  849. simple_unlock(&seg->lock);
  850. simple_unlock(&vm_page_queue_free_lock);
  851. vm_page_insert(dest, object, offset);
  852. vm_object_unlock(object);
  853. if (was_active) {
  854. vm_page_activate(dest);
  855. } else {
  856. vm_page_deactivate(dest);
  857. }
  858. vm_page_unlock_queues();
  859. return TRUE;
  860. error:
  861. vm_page_seg_double_unlock(seg, remote_seg);
  862. simple_unlock(&vm_page_queue_free_lock);
  863. vm_page_unlock_queues();
  864. return FALSE;
  865. }
  866. static boolean_t
  867. vm_page_seg_balance(struct vm_page_seg *seg)
  868. {
  869. struct vm_page_seg *remote_seg;
  870. unsigned int i;
  871. boolean_t balanced;
  872. /*
  873. * It's important here that pages are moved to lower priority
  874. * segments first.
  875. */
  876. for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
  877. remote_seg = vm_page_seg_get(i);
  878. if (remote_seg == seg) {
  879. continue;
  880. }
  881. balanced = vm_page_seg_balance_page(seg, remote_seg);
  882. if (balanced) {
  883. return TRUE;
  884. }
  885. }
  886. return FALSE;
  887. }
  888. static boolean_t
  889. vm_page_seg_evict(struct vm_page_seg *seg, boolean_t external_only,
  890. boolean_t alloc_paused)
  891. {
  892. struct vm_page *page;
  893. boolean_t reclaim, double_paging;
  894. vm_object_t object;
  895. boolean_t was_active;
  896. page = NULL;
  897. object = NULL;
  898. double_paging = FALSE;
  899. restart:
  900. vm_page_lock_queues();
  901. simple_lock(&seg->lock);
  902. if (page != NULL) {
  903. vm_object_lock(page->object);
  904. } else {
  905. page = vm_page_seg_pull_cache_page(seg, external_only, &was_active);
  906. if (page == NULL) {
  907. goto out;
  908. }
  909. }
  910. assert(page->object != NULL);
  911. assert(!page->fictitious && !page->private);
  912. assert(page->wire_count == 0);
  913. assert(page->type != VM_PT_FREE);
  914. assert(page->order == VM_PAGE_ORDER_UNLISTED);
  915. object = page->object;
  916. if (!was_active
  917. && (page->reference || pmap_is_referenced(page->phys_addr))) {
  918. vm_page_seg_add_active_page(seg, page);
  919. simple_unlock(&seg->lock);
  920. vm_object_unlock(object);
  921. vm_stat.reactivations++;
  922. current_task()->reactivations++;
  923. vm_page_unlock_queues();
  924. page = NULL;
  925. goto restart;
  926. }
  927. vm_page_remove_mappings(page);
  928. if (!page->dirty && !page->precious) {
  929. reclaim = TRUE;
  930. goto out;
  931. }
  932. reclaim = FALSE;
  933. /*
  934. * If we are very low on memory, then we can't rely on an external
  935. * pager to clean a dirty page, because external pagers are not
  936. * vm-privileged.
  937. *
  938. * The laundry bit tells vm_pageout_setup not to do any special
  939. * processing of this page since it's immediately going to be
  940. * double paged out to the default pager. The laundry bit is
  941. * reset and the page is inserted into an internal object by
  942. * vm_pageout_setup before the second double paging pass.
  943. *
  944. * There is one important special case: the default pager can
  945. * back external memory objects. When receiving the first
  946. * pageout request, where the page is no longer present, a
  947. * fault could occur, during which the map would be locked.
  948. * This fault would cause a new paging request to the default
  949. * pager. Receiving that request would deadlock when trying to
  950. * lock the map again. Instead, the page isn't double paged
  951. * and vm_pageout_setup wires the page down, trusting the
  952. * default pager as for internal pages.
  953. */
  954. assert(!page->laundry);
  955. assert(!(double_paging && page->external));
  956. if (object->internal || !alloc_paused ||
  957. memory_manager_default_port(object->pager)) {
  958. double_paging = FALSE;
  959. } else {
  960. double_paging = page->laundry = TRUE;
  961. }
  962. out:
  963. simple_unlock(&seg->lock);
  964. if (object == NULL) {
  965. vm_page_unlock_queues();
  966. return FALSE;
  967. }
  968. if (reclaim) {
  969. vm_page_free(page);
  970. vm_page_unlock_queues();
  971. if (vm_object_collectable(object)) {
  972. vm_object_collect(object);
  973. } else {
  974. vm_object_unlock(object);
  975. }
  976. return TRUE;
  977. }
  978. vm_page_unlock_queues();
  979. /*
  980. * If there is no memory object for the page, create one and hand it
  981. * to the default pager. First try to collapse, so we don't create
  982. * one unnecessarily.
  983. */
  984. if (!object->pager_initialized) {
  985. vm_object_collapse(object);
  986. }
  987. if (!object->pager_initialized) {
  988. vm_object_pager_create(object);
  989. }
  990. if (!object->pager_initialized) {
  991. panic("vm_page_seg_evict");
  992. }
  993. vm_pageout_page(page, FALSE, TRUE); /* flush it */
  994. vm_object_unlock(object);
  995. if (double_paging) {
  996. goto restart;
  997. }
  998. return TRUE;
  999. }
  1000. static void
  1001. vm_page_seg_compute_high_active_page(struct vm_page_seg *seg)
  1002. {
  1003. unsigned long nr_pages;
  1004. nr_pages = seg->nr_active_pages + seg->nr_inactive_pages;
  1005. seg->high_active_pages = nr_pages * VM_PAGE_HIGH_ACTIVE_PAGE_NUM
  1006. / VM_PAGE_HIGH_ACTIVE_PAGE_DENOM;
  1007. }
  1008. static void
  1009. vm_page_seg_refill_inactive(struct vm_page_seg *seg)
  1010. {
  1011. struct vm_page *page;
  1012. simple_lock(&seg->lock);
  1013. vm_page_seg_compute_high_active_page(seg);
  1014. while (seg->nr_active_pages > seg->high_active_pages) {
  1015. page = vm_page_seg_pull_active_page(seg, FALSE);
  1016. if (page == NULL) {
  1017. break;
  1018. }
  1019. page->reference = FALSE;
  1020. pmap_clear_reference(page->phys_addr);
  1021. vm_page_seg_add_inactive_page(seg, page);
  1022. vm_object_unlock(page->object);
  1023. }
  1024. simple_unlock(&seg->lock);
  1025. }
  1026. void __init
  1027. vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end)
  1028. {
  1029. struct vm_page_boot_seg *seg;
  1030. assert(seg_index < ARRAY_SIZE(vm_page_boot_segs));
  1031. assert(vm_page_aligned(start));
  1032. assert(vm_page_aligned(end));
  1033. assert(start < end);
  1034. assert(vm_page_segs_size < ARRAY_SIZE(vm_page_boot_segs));
  1035. seg = &vm_page_boot_segs[seg_index];
  1036. seg->start = start;
  1037. seg->end = end;
  1038. seg->heap_present = FALSE;
  1039. #if DEBUG
  1040. printf("vm_page: load: %s: %llx:%llx\n",
  1041. vm_page_seg_name(seg_index),
  1042. (unsigned long long)start, (unsigned long long)end);
  1043. #endif
  1044. vm_page_segs_size++;
  1045. }
  1046. void
  1047. vm_page_load_heap(unsigned int seg_index, phys_addr_t start, phys_addr_t end)
  1048. {
  1049. struct vm_page_boot_seg *seg;
  1050. assert(seg_index < ARRAY_SIZE(vm_page_boot_segs));
  1051. assert(vm_page_aligned(start));
  1052. assert(vm_page_aligned(end));
  1053. seg = &vm_page_boot_segs[seg_index];
  1054. assert(seg->start <= start);
  1055. assert(end <= seg-> end);
  1056. seg->avail_start = start;
  1057. seg->avail_end = end;
  1058. seg->heap_present = TRUE;
  1059. #if DEBUG
  1060. printf("vm_page: heap: %s: %llx:%llx\n",
  1061. vm_page_seg_name(seg_index),
  1062. (unsigned long long)start, (unsigned long long)end);
  1063. #endif
  1064. }
  1065. int
  1066. vm_page_ready(void)
  1067. {
  1068. return vm_page_is_ready;
  1069. }
  1070. static unsigned int
  1071. vm_page_select_alloc_seg(unsigned int selector)
  1072. {
  1073. unsigned int seg_index;
  1074. switch (selector) {
  1075. case VM_PAGE_SEL_DMA:
  1076. seg_index = VM_PAGE_SEG_DMA;
  1077. break;
  1078. case VM_PAGE_SEL_DMA32:
  1079. seg_index = VM_PAGE_SEG_DMA32;
  1080. break;
  1081. case VM_PAGE_SEL_DIRECTMAP:
  1082. seg_index = VM_PAGE_SEG_DIRECTMAP;
  1083. break;
  1084. case VM_PAGE_SEL_HIGHMEM:
  1085. seg_index = VM_PAGE_SEG_HIGHMEM;
  1086. break;
  1087. default:
  1088. panic("vm_page: invalid selector");
  1089. }
  1090. return MIN(vm_page_segs_size - 1, seg_index);
  1091. }
  1092. static int __init
  1093. vm_page_boot_seg_loaded(const struct vm_page_boot_seg *seg)
  1094. {
  1095. return (seg->end != 0);
  1096. }
  1097. static void __init
  1098. vm_page_check_boot_segs(void)
  1099. {
  1100. unsigned int i;
  1101. int expect_loaded;
  1102. if (vm_page_segs_size == 0)
  1103. panic("vm_page: no physical memory loaded");
  1104. for (i = 0; i < ARRAY_SIZE(vm_page_boot_segs); i++) {
  1105. expect_loaded = (i < vm_page_segs_size);
  1106. if (vm_page_boot_seg_loaded(&vm_page_boot_segs[i]) == expect_loaded)
  1107. continue;
  1108. panic("vm_page: invalid boot segment table");
  1109. }
  1110. }
  1111. static phys_addr_t __init
  1112. vm_page_boot_seg_size(struct vm_page_boot_seg *seg)
  1113. {
  1114. return seg->end - seg->start;
  1115. }
  1116. static phys_addr_t __init
  1117. vm_page_boot_seg_avail_size(struct vm_page_boot_seg *seg)
  1118. {
  1119. return seg->avail_end - seg->avail_start;
  1120. }
  1121. unsigned long __init
  1122. vm_page_bootalloc(size_t size)
  1123. {
  1124. struct vm_page_boot_seg *seg;
  1125. phys_addr_t pa;
  1126. unsigned int i;
  1127. for (i = vm_page_select_alloc_seg(VM_PAGE_SEL_DIRECTMAP);
  1128. i < vm_page_segs_size;
  1129. i--) {
  1130. seg = &vm_page_boot_segs[i];
  1131. if (size <= vm_page_boot_seg_avail_size(seg)) {
  1132. pa = seg->avail_start;
  1133. seg->avail_start += vm_page_round(size);
  1134. return pa;
  1135. }
  1136. }
  1137. panic("vm_page: no physical memory available");
  1138. }
  1139. void __init
  1140. vm_page_setup(void)
  1141. {
  1142. struct vm_page_boot_seg *boot_seg;
  1143. struct vm_page_seg *seg;
  1144. struct vm_page *table, *page, *end;
  1145. size_t nr_pages, table_size;
  1146. unsigned long va;
  1147. unsigned int i;
  1148. phys_addr_t pa;
  1149. vm_page_check_boot_segs();
  1150. /*
  1151. * Compute the page table size.
  1152. */
  1153. nr_pages = 0;
  1154. for (i = 0; i < vm_page_segs_size; i++)
  1155. nr_pages += vm_page_atop(vm_page_boot_seg_size(&vm_page_boot_segs[i]));
  1156. table_size = vm_page_round(nr_pages * sizeof(struct vm_page));
  1157. printf("vm_page: page table size: %lu entries (%luk)\n", nr_pages,
  1158. table_size >> 10);
  1159. table = (struct vm_page *)pmap_steal_memory(table_size);
  1160. va = (unsigned long)table;
  1161. /*
  1162. * Initialize the segments, associating them to the page table. When
  1163. * the segments are initialized, all their pages are set allocated.
  1164. * Pages are then released, which populates the free lists.
  1165. */
  1166. for (i = 0; i < vm_page_segs_size; i++) {
  1167. seg = &vm_page_segs[i];
  1168. boot_seg = &vm_page_boot_segs[i];
  1169. vm_page_seg_init(seg, boot_seg->start, boot_seg->end, table);
  1170. page = seg->pages + vm_page_atop(boot_seg->avail_start
  1171. - boot_seg->start);
  1172. end = seg->pages + vm_page_atop(boot_seg->avail_end
  1173. - boot_seg->start);
  1174. while (page < end) {
  1175. page->type = VM_PT_FREE;
  1176. vm_page_seg_free_to_buddy(seg, page, 0);
  1177. page++;
  1178. }
  1179. table += vm_page_atop(vm_page_seg_size(seg));
  1180. }
  1181. while (va < (unsigned long)table) {
  1182. pa = pmap_extract(kernel_pmap, va);
  1183. page = vm_page_lookup_pa(pa);
  1184. assert((page != NULL) && (page->type == VM_PT_RESERVED));
  1185. page->type = VM_PT_TABLE;
  1186. va += PAGE_SIZE;
  1187. }
  1188. vm_page_is_ready = 1;
  1189. }
  1190. void __init
  1191. vm_page_manage(struct vm_page *page)
  1192. {
  1193. assert(page->seg_index < ARRAY_SIZE(vm_page_segs));
  1194. assert(page->type == VM_PT_RESERVED);
  1195. vm_page_set_type(page, 0, VM_PT_FREE);
  1196. vm_page_seg_free_to_buddy(&vm_page_segs[page->seg_index], page, 0);
  1197. }
  1198. struct vm_page *
  1199. vm_page_lookup_pa(phys_addr_t pa)
  1200. {
  1201. struct vm_page_seg *seg;
  1202. unsigned int i;
  1203. for (i = 0; i < vm_page_segs_size; i++) {
  1204. seg = &vm_page_segs[i];
  1205. if ((pa >= seg->start) && (pa < seg->end))
  1206. return &seg->pages[vm_page_atop(pa - seg->start)];
  1207. }
  1208. return NULL;
  1209. }
  1210. static struct vm_page_seg *
  1211. vm_page_lookup_seg(const struct vm_page *page)
  1212. {
  1213. struct vm_page_seg *seg;
  1214. unsigned int i;
  1215. for (i = 0; i < vm_page_segs_size; i++) {
  1216. seg = &vm_page_segs[i];
  1217. if ((page->phys_addr >= seg->start) && (page->phys_addr < seg->end)) {
  1218. return seg;
  1219. }
  1220. }
  1221. return NULL;
  1222. }
  1223. void vm_page_check(const struct vm_page *page)
  1224. {
  1225. if (page->fictitious) {
  1226. if (page->private) {
  1227. panic("vm_page: page both fictitious and private");
  1228. }
  1229. if (page->phys_addr != vm_page_fictitious_addr) {
  1230. panic("vm_page: invalid fictitious page");
  1231. }
  1232. } else {
  1233. struct vm_page_seg *seg;
  1234. if (page->phys_addr == vm_page_fictitious_addr) {
  1235. panic("vm_page: real page has fictitious address");
  1236. }
  1237. seg = vm_page_lookup_seg(page);
  1238. if (seg == NULL) {
  1239. if (!page->private) {
  1240. panic("vm_page: page claims it's managed but not in any segment");
  1241. }
  1242. } else {
  1243. if (page->private) {
  1244. struct vm_page *real_page;
  1245. if (vm_page_pageable(page)) {
  1246. panic("vm_page: private page is pageable");
  1247. }
  1248. real_page = vm_page_lookup_pa(page->phys_addr);
  1249. if (vm_page_pageable(real_page)) {
  1250. panic("vm_page: page underlying private page is pageable");
  1251. }
  1252. if ((real_page->type == VM_PT_FREE)
  1253. || (real_page->order != VM_PAGE_ORDER_UNLISTED)) {
  1254. panic("vm_page: page underlying private pagei is free");
  1255. }
  1256. } else {
  1257. unsigned int index;
  1258. index = vm_page_seg_index(seg);
  1259. if (index != page->seg_index) {
  1260. panic("vm_page: page segment mismatch");
  1261. }
  1262. }
  1263. }
  1264. }
  1265. }
  1266. struct vm_page *
  1267. vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short type)
  1268. {
  1269. struct vm_page *page;
  1270. unsigned int i;
  1271. for (i = vm_page_select_alloc_seg(selector); i < vm_page_segs_size; i--) {
  1272. page = vm_page_seg_alloc(&vm_page_segs[i], order, type);
  1273. if (page != NULL)
  1274. return page;
  1275. }
  1276. if (!current_thread() || current_thread()->vm_privilege)
  1277. panic("vm_page: privileged thread unable to allocate page");
  1278. return NULL;
  1279. }
  1280. void
  1281. vm_page_free_pa(struct vm_page *page, unsigned int order)
  1282. {
  1283. assert(page != NULL);
  1284. assert(page->seg_index < ARRAY_SIZE(vm_page_segs));
  1285. vm_page_seg_free(&vm_page_segs[page->seg_index], page, order);
  1286. }
  1287. const char *
  1288. vm_page_seg_name(unsigned int seg_index)
  1289. {
  1290. /* Don't use a switch statement since segments can be aliased */
  1291. if (seg_index == VM_PAGE_SEG_HIGHMEM)
  1292. return "HIGHMEM";
  1293. else if (seg_index == VM_PAGE_SEG_DIRECTMAP)
  1294. return "DIRECTMAP";
  1295. else if (seg_index == VM_PAGE_SEG_DMA32)
  1296. return "DMA32";
  1297. else if (seg_index == VM_PAGE_SEG_DMA)
  1298. return "DMA";
  1299. else
  1300. panic("vm_page: invalid segment index");
  1301. }
  1302. void
  1303. vm_page_info_all(void)
  1304. {
  1305. struct vm_page_seg *seg;
  1306. unsigned long pages;
  1307. unsigned int i;
  1308. for (i = 0; i < vm_page_segs_size; i++) {
  1309. seg = &vm_page_segs[i];
  1310. pages = (unsigned long)(seg->pages_end - seg->pages);
  1311. printf("vm_page: %s: pages: %lu (%luM), free: %lu (%luM)\n",
  1312. vm_page_seg_name(i), pages, pages >> (20 - PAGE_SHIFT),
  1313. seg->nr_free_pages, seg->nr_free_pages >> (20 - PAGE_SHIFT));
  1314. printf("vm_page: %s: min:%lu low:%lu high:%lu\n",
  1315. vm_page_seg_name(vm_page_seg_index(seg)),
  1316. seg->min_free_pages, seg->low_free_pages, seg->high_free_pages);
  1317. }
  1318. }
  1319. phys_addr_t
  1320. vm_page_seg_end(unsigned int selector)
  1321. {
  1322. return vm_page_segs[vm_page_select_alloc_seg(selector)].end;
  1323. }
  1324. static unsigned long
  1325. vm_page_boot_table_size(void)
  1326. {
  1327. unsigned long nr_pages;
  1328. unsigned int i;
  1329. nr_pages = 0;
  1330. for (i = 0; i < vm_page_segs_size; i++) {
  1331. nr_pages += vm_page_atop(vm_page_boot_seg_size(&vm_page_boot_segs[i]));
  1332. }
  1333. return nr_pages;
  1334. }
  1335. unsigned long
  1336. vm_page_table_size(void)
  1337. {
  1338. unsigned long nr_pages;
  1339. unsigned int i;
  1340. if (!vm_page_is_ready) {
  1341. return vm_page_boot_table_size();
  1342. }
  1343. nr_pages = 0;
  1344. for (i = 0; i < vm_page_segs_size; i++) {
  1345. nr_pages += vm_page_atop(vm_page_seg_size(&vm_page_segs[i]));
  1346. }
  1347. return nr_pages;
  1348. }
  1349. unsigned long
  1350. vm_page_table_index(phys_addr_t pa)
  1351. {
  1352. struct vm_page_seg *seg;
  1353. unsigned long index;
  1354. unsigned int i;
  1355. index = 0;
  1356. for (i = 0; i < vm_page_segs_size; i++) {
  1357. seg = &vm_page_segs[i];
  1358. if ((pa >= seg->start) && (pa < seg->end)) {
  1359. return index + vm_page_atop(pa - seg->start);
  1360. }
  1361. index += vm_page_atop(vm_page_seg_size(seg));
  1362. }
  1363. panic("vm_page: invalid physical address");
  1364. }
  1365. phys_addr_t
  1366. vm_page_mem_size(void)
  1367. {
  1368. phys_addr_t total;
  1369. unsigned int i;
  1370. total = 0;
  1371. for (i = 0; i < vm_page_segs_size; i++) {
  1372. total += vm_page_seg_size(&vm_page_segs[i]);
  1373. }
  1374. return total;
  1375. }
  1376. unsigned long
  1377. vm_page_mem_free(void)
  1378. {
  1379. unsigned long total;
  1380. unsigned int i;
  1381. total = 0;
  1382. for (i = 0; i < vm_page_segs_size; i++) {
  1383. total += vm_page_segs[i].nr_free_pages;
  1384. }
  1385. return total;
  1386. }
  1387. /*
  1388. * Mark this page as wired down by yet another map, removing it
  1389. * from paging queues as necessary.
  1390. *
  1391. * The page's object and the page queues must be locked.
  1392. */
  1393. void
  1394. vm_page_wire(struct vm_page *page)
  1395. {
  1396. VM_PAGE_CHECK(page);
  1397. if (page->wire_count == 0) {
  1398. vm_page_queues_remove(page);
  1399. if (!page->private && !page->fictitious) {
  1400. vm_page_wire_count++;
  1401. }
  1402. }
  1403. page->wire_count++;
  1404. }
  1405. /*
  1406. * Release one wiring of this page, potentially enabling it to be paged again.
  1407. *
  1408. * The page's object and the page queues must be locked.
  1409. */
  1410. void
  1411. vm_page_unwire(struct vm_page *page)
  1412. {
  1413. struct vm_page_seg *seg;
  1414. VM_PAGE_CHECK(page);
  1415. assert(page->wire_count != 0);
  1416. page->wire_count--;
  1417. if ((page->wire_count != 0)
  1418. || page->fictitious
  1419. || page->private) {
  1420. return;
  1421. }
  1422. seg = vm_page_seg_get(page->seg_index);
  1423. simple_lock(&seg->lock);
  1424. vm_page_seg_add_active_page(seg, page);
  1425. simple_unlock(&seg->lock);
  1426. vm_page_wire_count--;
  1427. }
  1428. /*
  1429. * Returns the given page to the inactive list, indicating that
  1430. * no physical maps have access to this page.
  1431. * [Used by the physical mapping system.]
  1432. *
  1433. * The page queues must be locked.
  1434. */
  1435. void
  1436. vm_page_deactivate(struct vm_page *page)
  1437. {
  1438. struct vm_page_seg *seg;
  1439. VM_PAGE_CHECK(page);
  1440. /*
  1441. * This page is no longer very interesting. If it was
  1442. * interesting (active or inactive/referenced), then we
  1443. * clear the reference bit and (re)enter it in the
  1444. * inactive queue. Note wired pages should not have
  1445. * their reference bit cleared.
  1446. */
  1447. if (page->active || (page->inactive && page->reference)) {
  1448. if (!page->fictitious && !page->private && !page->absent) {
  1449. pmap_clear_reference(page->phys_addr);
  1450. }
  1451. page->reference = FALSE;
  1452. vm_page_queues_remove(page);
  1453. }
  1454. if ((page->wire_count == 0) && !page->fictitious
  1455. && !page->private && !page->inactive) {
  1456. seg = vm_page_seg_get(page->seg_index);
  1457. simple_lock(&seg->lock);
  1458. vm_page_seg_add_inactive_page(seg, page);
  1459. simple_unlock(&seg->lock);
  1460. }
  1461. }
  1462. /*
  1463. * Put the specified page on the active list (if appropriate).
  1464. *
  1465. * The page queues must be locked.
  1466. */
  1467. void
  1468. vm_page_activate(struct vm_page *page)
  1469. {
  1470. struct vm_page_seg *seg;
  1471. VM_PAGE_CHECK(page);
  1472. /*
  1473. * Unconditionally remove so that, even if the page was already
  1474. * active, it gets back to the end of the active queue.
  1475. */
  1476. vm_page_queues_remove(page);
  1477. if ((page->wire_count == 0) && !page->fictitious && !page->private) {
  1478. seg = vm_page_seg_get(page->seg_index);
  1479. if (page->active)
  1480. panic("vm_page_activate: already active");
  1481. simple_lock(&seg->lock);
  1482. vm_page_seg_add_active_page(seg, page);
  1483. simple_unlock(&seg->lock);
  1484. }
  1485. }
  1486. void
  1487. vm_page_queues_remove(struct vm_page *page)
  1488. {
  1489. struct vm_page_seg *seg;
  1490. assert(!page->active || !page->inactive);
  1491. if (!page->active && !page->inactive) {
  1492. return;
  1493. }
  1494. seg = vm_page_seg_get(page->seg_index);
  1495. simple_lock(&seg->lock);
  1496. if (page->active) {
  1497. vm_page_seg_remove_active_page(seg, page);
  1498. } else {
  1499. vm_page_seg_remove_inactive_page(seg, page);
  1500. }
  1501. simple_unlock(&seg->lock);
  1502. }
  1503. /*
  1504. * Check whether segments are all usable for unprivileged allocations.
  1505. *
  1506. * If all segments are usable, resume pending unprivileged allocations
  1507. * and return TRUE.
  1508. *
  1509. * This function acquires vm_page_queue_free_lock, which is held on return.
  1510. */
  1511. static boolean_t
  1512. vm_page_check_usable(void)
  1513. {
  1514. struct vm_page_seg *seg;
  1515. boolean_t usable;
  1516. unsigned int i;
  1517. simple_lock(&vm_page_queue_free_lock);
  1518. for (i = 0; i < vm_page_segs_size; i++) {
  1519. seg = vm_page_seg_get(i);
  1520. simple_lock(&seg->lock);
  1521. usable = vm_page_seg_usable(seg);
  1522. simple_unlock(&seg->lock);
  1523. if (!usable) {
  1524. return FALSE;
  1525. }
  1526. }
  1527. vm_page_external_laundry_count = -1;
  1528. vm_page_alloc_paused = FALSE;
  1529. thread_wakeup(&vm_page_alloc_paused);
  1530. return TRUE;
  1531. }
  1532. static boolean_t
  1533. vm_page_may_balance(void)
  1534. {
  1535. struct vm_page_seg *seg;
  1536. boolean_t page_available;
  1537. unsigned int i;
  1538. for (i = 0; i < vm_page_segs_size; i++) {
  1539. seg = vm_page_seg_get(i);
  1540. simple_lock(&seg->lock);
  1541. page_available = vm_page_seg_page_available(seg);
  1542. simple_unlock(&seg->lock);
  1543. if (page_available) {
  1544. return TRUE;
  1545. }
  1546. }
  1547. return FALSE;
  1548. }
  1549. static boolean_t
  1550. vm_page_balance_once(void)
  1551. {
  1552. boolean_t balanced;
  1553. unsigned int i;
  1554. /*
  1555. * It's important here that pages are moved from higher priority
  1556. * segments first.
  1557. */
  1558. for (i = 0; i < vm_page_segs_size; i++) {
  1559. balanced = vm_page_seg_balance(vm_page_seg_get(i));
  1560. if (balanced) {
  1561. return TRUE;
  1562. }
  1563. }
  1564. return FALSE;
  1565. }
  1566. boolean_t
  1567. vm_page_balance(void)
  1568. {
  1569. boolean_t balanced;
  1570. while (vm_page_may_balance()) {
  1571. balanced = vm_page_balance_once();
  1572. if (!balanced) {
  1573. break;
  1574. }
  1575. }
  1576. return vm_page_check_usable();
  1577. }
  1578. static boolean_t
  1579. vm_page_evict_once(boolean_t external_only, boolean_t alloc_paused)
  1580. {
  1581. boolean_t evicted;
  1582. unsigned int i;
  1583. /*
  1584. * It's important here that pages are evicted from lower priority
  1585. * segments first.
  1586. */
  1587. for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
  1588. evicted = vm_page_seg_evict(vm_page_seg_get(i),
  1589. external_only, alloc_paused);
  1590. if (evicted) {
  1591. return TRUE;
  1592. }
  1593. }
  1594. return FALSE;
  1595. }
  1596. #define VM_PAGE_MAX_LAUNDRY 5
  1597. #define VM_PAGE_MAX_EVICTIONS 5
  1598. boolean_t
  1599. vm_page_evict(boolean_t *should_wait)
  1600. {
  1601. boolean_t pause, evicted, external_only, alloc_paused;
  1602. unsigned int i;
  1603. *should_wait = TRUE;
  1604. external_only = TRUE;
  1605. simple_lock(&vm_page_queue_free_lock);
  1606. vm_page_external_laundry_count = 0;
  1607. alloc_paused = vm_page_alloc_paused;
  1608. simple_unlock(&vm_page_queue_free_lock);
  1609. again:
  1610. vm_page_lock_queues();
  1611. pause = (vm_page_laundry_count >= VM_PAGE_MAX_LAUNDRY);
  1612. vm_page_unlock_queues();
  1613. if (pause) {
  1614. simple_lock(&vm_page_queue_free_lock);
  1615. return FALSE;
  1616. }
  1617. for (i = 0; i < VM_PAGE_MAX_EVICTIONS; i++) {
  1618. evicted = vm_page_evict_once(external_only, alloc_paused);
  1619. if (!evicted) {
  1620. break;
  1621. }
  1622. }
  1623. simple_lock(&vm_page_queue_free_lock);
  1624. /*
  1625. * Keep in mind eviction may not cause pageouts, since non-precious
  1626. * clean pages are simply released.
  1627. */
  1628. if ((vm_page_laundry_count == 0) && (vm_page_external_laundry_count == 0)) {
  1629. /*
  1630. * No pageout, but some clean pages were freed. Start a complete
  1631. * scan again without waiting.
  1632. */
  1633. if (evicted) {
  1634. *should_wait = FALSE;
  1635. return FALSE;
  1636. }
  1637. /*
  1638. * Eviction failed, consider pages from internal objects on the
  1639. * next attempt.
  1640. */
  1641. if (external_only) {
  1642. simple_unlock(&vm_page_queue_free_lock);
  1643. external_only = FALSE;
  1644. goto again;
  1645. }
  1646. /*
  1647. * TODO Find out what could cause this and how to deal with it.
  1648. * This will likely require an out-of-memory killer.
  1649. */
  1650. panic("vm_page: unable to recycle any page");
  1651. }
  1652. simple_unlock(&vm_page_queue_free_lock);
  1653. return vm_page_check_usable();
  1654. }
  1655. void
  1656. vm_page_refill_inactive(void)
  1657. {
  1658. unsigned int i;
  1659. vm_page_lock_queues();
  1660. for (i = 0; i < vm_page_segs_size; i++) {
  1661. vm_page_seg_refill_inactive(vm_page_seg_get(i));
  1662. }
  1663. vm_page_unlock_queues();
  1664. }
  1665. void
  1666. vm_page_wait(void (*continuation)(void))
  1667. {
  1668. assert(!current_thread()->vm_privilege);
  1669. simple_lock(&vm_page_queue_free_lock);
  1670. if (!vm_page_alloc_paused) {
  1671. simple_unlock(&vm_page_queue_free_lock);
  1672. return;
  1673. }
  1674. assert_wait(&vm_page_alloc_paused, FALSE);
  1675. simple_unlock(&vm_page_queue_free_lock);
  1676. if (continuation != 0) {
  1677. counter(c_vm_page_wait_block_user++);
  1678. thread_block(continuation);
  1679. } else {
  1680. counter(c_vm_page_wait_block_kernel++);
  1681. thread_block((void (*)(void)) 0);
  1682. }
  1683. }