84#include <sys/kernel.h>
85#include <sys/blockcount.h>
86#include <sys/eventhandler.h>
90#include <sys/kthread.h>
94#include <sys/resourcevar.h>
97#include <sys/signalvar.h>
100#include <sys/vnode.h>
101#include <sys/vmmeter.h>
102#include <sys/rwlock.h>
104#include <sys/sysctl.h>
129 int starting_page_shortage);
141SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start,
148#define VM_LAUNDER_RATE 10
149#define VM_INACT_SCAN_RATE 10
157 "Panic on the given number of out-of-memory errors instead of "
158 "killing the largest process");
163 "Maximum active LRU update period");
168 "Number of CPUs per pagedaemon worker thread");
172 "Low memory callback period");
177 "Disallow swapout of dirty pages");
182 "vget() lock misses during pageout");
187 "back-to-back calls to oom detector to start OOM");
192 "weight given to clean vs. dirty pages in active queue scans");
197 "background laundering rate, in kilobytes per second");
202 "background laundering cap, in kilobytes");
207 "system-wide limit to user-wired page count");
209static u_int
isqrt(u_int num);
224 vm_page_t marker, vm_page_t after,
int maxscan)
229 (
"marker %p already enqueued", marker));
232 TAILQ_INSERT_HEAD(&pq->
pq_pl, marker, plinks.q);
234 TAILQ_INSERT_AFTER(&pq->
pq_pl, after, marker, plinks.q);
253 (
"marker %p not enqueued", ss->
marker));
271static __always_inline
void
275 vm_page_t m, marker, n;
281 (
"marker %p not enqueued", ss->
marker));
284 for (m = TAILQ_NEXT(marker, plinks.q); m != NULL &&
287 n = TAILQ_NEXT(m, plinks.q);
290 (
"page %p not enqueued", m));
292 (
"Fictitious page %p cannot be in page queue", m));
294 (
"Unmanaged page %p cannot be in page queue", m));
300 TAILQ_REMOVE(&pq->
pq_pl, m, plinks.q);
304 TAILQ_REMOVE(&pq->
pq_pl, marker, plinks.q);
305 if (__predict_true(m != NULL))
306 TAILQ_INSERT_BEFORE(m, marker, plinks.q);
308 TAILQ_INSERT_TAIL(&pq->
pq_pl, marker, plinks.q);
317static __always_inline vm_page_t
330static __always_inline
bool
336 if (__predict_false(as.
queue != queue ||
357 int ib, is, page_base, pageout_count;
410 mc[--page_base] = pb = p;
422 pindex + is < object->size) {
439 mc[page_base + pageout_count] = ps = p;
475 int pageout_status[count];
491 for (i = 0; i < count; i++) {
493 (
"vm_pageout_flush: partially invalid page %p index %d/%d",
496 (
"vm_pageout_flush: writeable page %p", mc[i]));
503 runlen = count - mreq;
506 for (i = 0; i < count; i++) {
507 vm_page_t mt = mc[i];
510 !pmap_page_is_write_mapped(mt),
511 (
"vm_pageout_flush: page %p is not write protected", mt));
512 switch (pageout_status[i]) {
555 if (eio != NULL && i >= mreq && i - mreq < runlen)
559 if (i >= mreq && i - mreq < runlen)
577 return (numpagedout);
633 if (vp->v_type == VREG &&
634 vn_start_write(vp, &mp, V_NOWAIT) != 0) {
640 (
"vp %p with NULL v_mount", vp));
644 if (vget(vp, vn_lktype_write(NULL, vp) | LK_TIMELOCK) != 0) {
655 if (vp->v_object !=
object) {
668 m->pindex != pindex || m->dirty == 0) {
709 vn_finished_write(mp);
728 int act_delta, error, numpagedout, queue, refs, starting_target;
733 starting_target = launder;
756 if (__predict_false((m->flags &
PG_MARKER) != 0))
771 if (
object == NULL ||
object != m->object) {
774 object = atomic_load_ptr(&m->object);
775 if (__predict_false(
object == NULL))
781 if (__predict_false(m->object !=
object)) {
824 new.
flags &= ~PGA_REFERENCED;
827 if (act_delta == 0) {
841 new.flags &= ~PGA_QUEUE_OP_MASK;
857 VM_CNT_INC(v_reactivated);
926 launder -= numpagedout;
928 }
else if (error == EDEADLK) {
938 if (
object != NULL) {
955 if (vnodes_skipped > 0 && launder > 0)
956 (void)speedup_syncer();
958 return (starting_target - launder);
967 u_int bit, root, tmp;
969 bit = num != 0 ? (1u << ((fls(num) - 1) & ~1)) : 0;
993 uint64_t nclean, ndirty, nfreed;
994 int domain, last_target, launder, shortfall, shortfall_cycle, target;
997 domain = (uintptr_t)arg;
1000 KASSERT(vmd->
vmd_segs != 0, (
"domain without segments"));
1003 in_shortfall =
false;
1004 shortfall_cycle = 0;
1005 last_target = target = 0;
1012 EVENTHANDLER_PRI_ANY);
1014 EVENTHANDLER_PRI_ANY);
1020 KASSERT(target >= 0, (
"negative target %d", target));
1021 KASSERT(shortfall_cycle >= 0,
1022 (
"negative cycle %d", shortfall_cycle));
1029 if (shortfall > 0) {
1030 in_shortfall =
true;
1033 }
else if (!in_shortfall)
1043 in_shortfall =
false;
1047 launder = target / shortfall_cycle--;
1069 nclean = vmd->vmd_free_count +
1072 if (target == 0 && ndirty *
isqrt(howmany(nfreed + 1,
1088 last_target = target;
1089 }
else if (last_target - target >=
1095 if (launder > target)
1107 in_shortfall), target);
1127 (!in_shortfall || shortfall_cycle == 0)) {
1179 int act_delta, max_scan, ps_delta, refs, scan_tick;
1197 if (min_scan > 0 || (page_shortage > 0 && pq->
pq_cnt > 0))
1213 max_scan = page_shortage > 0 ? pq->
pq_cnt : min_scan;
1217 if (__predict_false(m == &vmd->
vmd_clock[1])) {
1229 if (__predict_false((m->flags &
PG_MARKER) != 0))
1245 object = atomic_load_ptr(&m->object);
1246 if (__predict_false(
object == NULL))
1255 if (m->object ==
object)
1300 new.
flags &= ~PGA_REFERENCED;
1303 if (act_delta != 0) {
1308 new.act_count -= min(
new.act_count,
1312 if (
new.act_count > 0) {
1323 new.flags &= ~PGA_QUEUE_OP_MASK;
1350 if (page_shortage <= 0) {
1353 }
else if (m->dirty == 0) {
1361 new.flags &= ~PGA_QUEUE_OP_MASK;
1367 page_shortage -= ps_delta;
1371 TAILQ_INSERT_AFTER(&pq->
pq_pl, marker, &vmd->
vmd_clock[0], plinks.q);
1388 TAILQ_INSERT_BEFORE(marker, m, plinks.q);
1427 struct timeval start, end;
1431 vm_page_t m, marker;
1435 int act_delta, addl_page_shortage, starting_page_shortage, refs;
1439 getmicrouptime(&start);
1447 addl_page_shortage = 0;
1455 starting_page_shortage = page_shortage;
1456 marker = &marker_page;
1461 while (page_shortage > 0 && (m =
vm_pageout_next(&ss,
true)) != NULL) {
1463 (
"marker page %p was dequeued", m));
1477 if (
object == NULL ||
object != m->object) {
1480 object = atomic_load_ptr(&m->object);
1481 if (__predict_false(
object == NULL))
1487 if (__predict_false(m->object !=
object)) {
1503 addl_page_shortage++;
1544 new.
flags &= ~PGA_REFERENCED;
1547 if (act_delta == 0) {
1562 new.flags &= ~PGA_QUEUE_OP_MASK;
1568 VM_CNT_INC(v_reactivated);
1600 if (m->dirty == 0) {
1641 getmicrouptime(&end);
1642 timevalsub(&end, &start);
1644 end.tv_sec * 1000000 + end.tv_usec);
1646 starting_page_shortage - page_shortage);
1656 u_int freed, pps, slop, threads, us;
1669 slop = shortage % threads;
1686 VM_CNT_ADD(v_dfree, freed);
1696 pps = (freed * 10) / ((us * 10) / 1000000);
1698 pps = (1000000 / us) * freed;
1701 return (shortage - freed);
1712 u_int addl_page_shortage, deficit, page_shortage;
1713 u_int starting_page_shortage;
1722 starting_page_shortage = shortage + deficit;
1741 if (starting_page_shortage > 0) {
1746 if (page_shortage > 0) {
1748 VM_CNT_INC(v_pdshortfalls);
1756 starting_page_shortage - page_shortage;
1764 if (page_shortage > 0)
1781 *addl_shortage = addl_page_shortage + deficit;
1783 return (page_shortage <= 0);
1795 int starting_page_shortage)
1799 if (starting_page_shortage <= 0 || starting_page_shortage !=
1878 sx_assert(&map->
lock, SA_LOCKED);
1906 struct proc *p, *bigproc;
1907 vm_offset_t size, bigsize;
1944 sx_slock(&allproc_lock);
1945 FOREACH_PROC_IN_SYSTEM(p) {
1951 if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC |
1952 P_PROTECTED | P_SYSTEM | P_WEXIT)) != 0 ||
1953 p->p_pid == 1 || P_KILLED(p) ||
1963 FOREACH_THREAD_IN_PROC(p, td) {
1965 if (!TD_ON_RUNQ(td) &&
1966 !TD_IS_RUNNING(td) &&
1967 !TD_IS_SLEEPING(td) &&
1968 !TD_IS_SUSPENDED(td) &&
1969 !TD_IS_SWAPPED(td)) {
1990 sx_sunlock(&allproc_lock);
1993 sx_slock(&allproc_lock);
2002 sx_slock(&allproc_lock);
2008 if (size > bigsize) {
2009 if (bigproc != NULL)
2017 sx_sunlock(&allproc_lock);
2019 if (bigproc != NULL) {
2022 reason =
"failed to reclaim memory";
2025 reason =
"a thread waited too long to allocate a page";
2028 reason =
"out of swap space";
2031 panic(
"unknown OOM reason %d", shortage);
2034 panic(
"%s", reason);
2036 killproc(bigproc, reason);
2037 sched_nice(bigproc, PRIO_MIN);
2039 PROC_UNLOCK(bigproc);
2051 static int lowmem_ticks = 0;
2057 last = atomic_load_int(&lowmem_ticks);
2059 if (atomic_fcmpset_int(&lowmem_ticks, &last, ticks) == 0)
2065 SDT_PROBE0(vm, , , vm__lowmem_scan);
2088 if (vm_page_count_severe())
2099 int addl_shortage, domain, shortage;
2102 domain = (uintptr_t)arg;
2113 KASSERT(vmd->
vmd_segs != 0, (
"domain without segments"));
2149 VM_CNT_INC(v_pdwakeups);
2161 shortage = pidctrl_daemon(&vmd->
vmd_pid, vmd->vmd_free_count);
2163 ofree = vmd->vmd_free_count;
2165 shortage -= min(vmd->vmd_free_count - ofree,
2192 domain = (uintptr_t)arg;
2216 unsigned total_pageout_threads, eligible_cpus, domain_cpus;
2231 domain_cpus = CPU_COUNT(&cpuset_domain[vmd->
vmd_domain]);
2234 eligible_cpus = mp_ncpus;
2237 eligible_cpus -= CPU_COUNT(&cpuset_domain[i]);
2245 return (howmany(total_pageout_threads * domain_cpus, eligible_cpus));
2257 struct sysctl_oid *oid;
2297 PIDCTRL_KPD, PIDCTRL_KID, PIDCTRL_KDD);
2298 oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(vmd->
vmd_oid), OID_AUTO,
2299 "pidctrl", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
"");
2300 pidctrl_init_sysctl(&vmd->
vmd_pid, SYSCTL_CHILDREN(oid));
2314 if (
vm_cnt.v_page_count < 2000)
2330 freecount += vmd->vmd_free_count;
2358 int error, first, i, j, pageout_threads;
2368 printf(
"domain %d empty; skipping pageout\n",
2376 (
void *)(uintptr_t)i, p, NULL, 0, 0,
"dom%d", i);
2378 panic(
"starting pageout for domain %d: %d\n",
2381 pageout_threads =
VM_DOMAIN(i)->vmd_inactive_threads;
2382 for (j = 0; j < pageout_threads - 1; j++) {
2384 (
void *)(uintptr_t)i, p, NULL, 0, 0,
2385 "dom%d helper%d", i, j);
2387 panic(
"starting pageout helper %d for domain "
2388 "%d: %d\n", j, i, error);
2391 (
void *)(uintptr_t)i, p, NULL, 0, 0,
"laundry: dom%d", i);
2393 panic(
"starting laundry for domain %d: %d", i, error);
2397 panic(
"starting uma_reclaim helper, error %d\n", error);
2399 snprintf(td->td_name,
sizeof(td->td_name),
"dom%d", first);
int pmap_ts_referenced(vm_page_t m)
volatile u_int vmd_inactive_freed
u_int vmd_inactive_target
blockcount_t vmd_inactive_starting
u_int vmd_clean_pages_freed
struct vm_page vmd_clock[2]
u_int vmd_inactive_threads
struct vm_page vmd_markers[PQ_COUNT]
volatile u_int vmd_addl_shortage
u_int vmd_pageout_free_min
struct sysctl_oid * vmd_oid
u_int vmd_background_launder_target
blockcount_t vmd_inactive_running
volatile u_int vmd_inactive_us
u_int vmd_inactive_shortage
u_int vmd_pageout_wakeup_thresh
u_int vmd_pageout_deficit
enum vm_domain::@14 vmd_laundry_request
u_int vmd_interrupt_free_min
struct vm_pagequeue vmd_pagequeues[PQ_COUNT]
union vm_map_object object
void uma_reclaim(int req)
void uma_reclaim_worker(void *)
void uma_reclaim_wakeup(void)
struct vm_object * vm_object
void vmspace_free(struct vmspace *)
struct vmspace * vmspace_acquire_ref(struct proc *)
#define MAP_ENTRY_NEEDS_COPY
#define vm_map_unlock_read(map)
#define VM_MAP_ENTRY_FOREACH(it, map)
#define MAP_ENTRY_IS_SUB_MAP
#define vm_map_trylock_read(map)
struct vmmeter __read_mostly vm_cnt
void vm_object_pip_add(vm_object_t object, short i)
void vm_object_pip_wakeup(vm_object_t object)
void vm_object_reference_locked(vm_object_t object)
void vm_object_deallocate(vm_object_t object)
#define VM_OBJECT_TRYWLOCK(object)
#define VM_OBJECT_WLOCK(object)
#define VM_OBJECT_WUNLOCK(object)
#define VM_OBJECT_ASSERT_WLOCKED(object)
void vm_page_activate(vm_page_t m)
void vm_page_init_marker(vm_page_t marker, int queue, uint16_t aflags)
void vm_page_launder(vm_page_t m)
bool vm_page_pqstate_commit(vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new)
void vm_page_unswappable(vm_page_t m)
vm_page_t vm_page_prev(vm_page_t m)
int vm_page_tryxbusy(vm_page_t m)
void vm_page_pqbatch_submit(vm_page_t m, uint8_t queue)
vm_page_t vm_page_next(vm_page_t m)
bool vm_page_try_remove_all(vm_page_t m)
void vm_page_sunbusy(vm_page_t m)
void vm_page_busy_downgrade(vm_page_t m)
void vm_page_test_dirty(vm_page_t m)
bool vm_page_try_remove_write(vm_page_t m)
void vm_page_free(vm_page_t m)
void vm_page_deactivate_noreuse(vm_page_t m)
static void vm_page_aflag_set(vm_page_t m, uint16_t bits)
#define vm_page_assert_xbusied(m)
static bool vm_page_in_laundry(vm_page_t m)
static bool vm_page_all_valid(vm_page_t m)
static uint8_t _vm_page_queue(vm_page_astate_t as)
static bool vm_page_none_valid(vm_page_t m)
static __inline void vm_page_undirty(vm_page_t m)
static vm_page_astate_t vm_page_astate_load(vm_page_t m)
static void vm_page_aflag_clear(vm_page_t m, uint16_t bits)
#define vm_page_xunbusy(m)
static bool vm_page_wired(vm_page_t m)
#define PGA_QUEUE_OP_MASK
static void vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage)
static int vm_pageout_active_target(struct vm_domain *vmd)
void pagedaemon_wakeup(int domain)
static int pageout_cpus_per_thread
int vm_pageout_page_count
static int vm_pageout_clean(vm_page_t m, int *numpagedout)
static int disable_swap_pageouts
static int vm_pageout_oom_vote
static void vm_pageout_swapon(void *arg __unused, struct swdevt *sp __unused)
static int vm_panic_on_oom
SYSCTL_ULONG(_vm, OID_AUTO, max_user_wired, CTLFLAG_RW, &vm_page_max_user_wired, 0, "system-wide limit to user-wired page count")
static int vm_pageout_inactive_dispatch(struct vm_domain *vmd, int shortage)
static int vm_pageout_reinsert_inactive_page(struct vm_pagequeue *pq, vm_page_t marker, vm_page_t m)
static int vm_pageout_oom_seq
static __always_inline void vm_pageout_collect_batch(struct scan_state *ss, const bool dequeue)
static int vm_oom_pf_secs
static void vm_pageout_helper(void *arg)
static int vm_pageout_update_period
static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage, int starting_page_shortage)
static void vm_pageout(void)
static int pageout_lock_miss
static void vm_pageout_init(void)
static long vm_pageout_oom_pagecount(struct vmspace *vmspace)
void vm_pageout_oom(int shortage)
SYSCTL_INT(_vm, OID_AUTO, panic_on_oom, CTLFLAG_RWTUN, &vm_panic_on_oom, 0, "Panic on the given number of out-of-memory errors instead of " "killing the largest process")
SDT_PROBE_DEFINE(vm,,, vm__lowmem_scan)
static void vm_pageout_worker(void *arg)
SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RWTUN, &vm_background_launder_rate, 0, "background laundering rate, in kilobytes per second")
static void vm_pageout_init_scan(struct scan_state *ss, struct vm_pagequeue *pq, vm_page_t marker, vm_page_t after, int maxscan)
static bool vm_pageout_lowmem(void)
static void vm_pageout_laundry_worker(void *arg)
static u_int vm_background_launder_rate
static u_int vm_background_launder_max
static int act_scan_laundry_weight
u_long vm_page_max_user_wired
#define VM_INACT_SCAN_RATE
static struct kproc_desc page_kp
static __always_inline bool vm_pageout_defer(vm_page_t m, const uint8_t queue, const bool enqueued)
static __always_inline vm_page_t vm_pageout_next(struct scan_state *ss, const bool dequeue)
static struct mtx vm_oom_ratelim_mtx
static void vm_pageout_scan_inactive(struct vm_domain *vmd, int page_shortage)
static void vm_pageout_reinsert_inactive(struct scan_state *ss, struct vm_batchqueue *bq, vm_page_t m)
static void vm_pageout_end_scan(struct scan_state *ss)
static int vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall)
static int get_pageout_threads_per_domain(const struct vm_domain *vmd)
SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init, NULL)
static int vm_pageout_inactive(struct vm_domain *vmd, int shortage, int *addl_shortage)
static int vm_oom_ratelim_last
static void vm_pageout_swapoff(void *arg __unused, struct swdevt *sp __unused)
static int vm_pageout_cluster(vm_page_t m)
static void vm_pageout_init_domain(int domain)
int vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen, boolean_t *eio)
static u_int isqrt(u_int num)
static int swapdev_enabled
void vm_swapout_run(void)
void vm_swapout_run_idle(void)
#define vm_domain_pageout_assert_unlocked(n)
static int vm_paging_needed(struct vm_domain *vmd, u_int free_count)
static bool vm_batchqueue_insert(struct vm_batchqueue *bq, vm_page_t m)
#define vm_pagequeue_assert_locked(pq)
#define vm_pagequeue_lockptr(pq)
#define vm_pagequeue_unlock(pq)
static int vm_laundry_target(struct vm_domain *vmd)
static __inline void vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
static void vm_batchqueue_init(struct vm_batchqueue *bq)
#define vm_pagequeue_lock(pq)
#define vm_domain_pageout_lockptr(d)
#define vm_domain_pageout_unlock(d)
static vm_page_t vm_batchqueue_pop(struct vm_batchqueue *bq)
#define vm_domain_pageout_lock(d)
#define VM_BATCHQUEUE_SIZE
static int vm_paging_target(struct vm_domain *vmd)
#define VM_DOMAIN_EMPTY(n)