54#include <sys/bitset.h>
55#include <sys/boottrace.h>
58#include <sys/counter.h>
59#include <sys/devicestat.h>
60#include <sys/eventhandler.h>
63#include <sys/limits.h>
65#include <sys/malloc.h>
68#include <sys/kernel.h>
69#include <sys/kthread.h>
72#include <sys/refcount.h>
73#include <sys/resourcevar.h>
74#include <sys/rwlock.h>
76#include <sys/sysctl.h>
77#include <sys/syscallsubr.h>
79#include <sys/vmmeter.h>
81#include <sys/watchdog.h>
84#include <vm/vm_param.h>
85#include <vm/vm_kern.h>
86#include <vm/vm_object.h>
87#include <vm/vm_page.h>
88#include <vm/vm_pageout.h>
89#include <vm/vm_pager.h>
90#include <vm/vm_extern.h>
92#include <vm/swap_pager.h>
99 .bop_name =
"buf_ops_bio",
110 uint16_t bq_subqueue;
114#define BQ_LOCKPTR(bq) (&(bq)->bq_lock)
115#define BQ_LOCK(bq) mtx_lock(BQ_LOCKPTR((bq)))
116#define BQ_UNLOCK(bq) mtx_unlock(BQ_LOCKPTR((bq)))
117#define BQ_ASSERT_LOCKED(bq) mtx_assert(BQ_LOCKPTR((bq)), MA_OWNED)
138 int __aligned(CACHE_LINE_SIZE) bd_numdirtybuffers;
139 int __aligned(CACHE_LINE_SIZE) bd_running;
140 long __aligned(CACHE_LINE_SIZE) bd_bufspace;
141 int __aligned(CACHE_LINE_SIZE) bd_freebuffers;
144#define BD_LOCKPTR(bd) (&(bd)->bd_cleanq->bq_lock)
145#define BD_LOCK(bd) mtx_lock(BD_LOCKPTR((bd)))
146#define BD_UNLOCK(bd) mtx_unlock(BD_LOCKPTR((bd)))
147#define BD_ASSERT_LOCKED(bd) mtx_assert(BD_LOCKPTR((bd)), MA_OWNED)
148#define BD_RUN_LOCKPTR(bd) (&(bd)->bd_run_lock)
149#define BD_RUN_LOCK(bd) mtx_lock(BD_RUN_LOCKPTR((bd)))
150#define BD_RUN_UNLOCK(bd) mtx_unlock(BD_RUN_LOCKPTR((bd)))
151#define BD_DOMAIN(bd) (bd - bdomain)
157 return ((
struct buf *)(
buf + (
sizeof(
struct buf) +
178 daddr_t lblkno, daddr_t blkno);
179static void breada(
struct vnode *, daddr_t *,
int *,
int,
struct ucred *,
int,
180 void (*)(
struct buf *));
188static int buf_import(
void *,
void **,
int,
int,
int);
196 const char *lockname);
205 "Use the VM system for directory writes");
208 "Amount of presently outstanding async buffer io");
209SYSCTL_PROC(_vfs, OID_AUTO, bufspace, CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RD,
213 "Kernel virtual memory used for buffers");
216 CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &
maxbufspace,
218 "Maximum allowed value of bufspace (including metadata)");
221 "Amount of malloced memory for buffers");
224 0,
"Maximum amount of malloced memory for buffers");
227 CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &
lobufspace,
229 "Minimum amount of buffers we want to have");
232 CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &
hibufspace,
234 "Maximum allowed value of bufspace (excluding metadata)");
239 "Bufspace consumed before waking the daemon to free some");
242 "Number of times we have freed the KVA space from some buffer");
245 "Number of times we have had to repeat buffer allocation to defragment");
249 "Minimum preferred space used for in-progress I/O");
253 "Maximum amount of space to use for in-progress I/O");
256 0,
"Number of bdwrite to bawrite conversions to limit dirty buffers");
259 0,
"Number of buffers supplied to bdwrite with snapshot deadlock risk");
269 "Number of buffers that are dirty (has unwritten changes) at the moment");
274 "How many buffers we want to have free before bufdaemon can sleep");
279 "When the number of dirty buffers is considered severe");
284 "Number of bdwrite to bawrite conversions to clear dirty buffers");
287 "Number of free buffers");
292 "Target number of free buffers");
297 "Threshold for clean buffer recycling");
304 "Number of times getnewbuf has had to restart a buffer acquisition");
308 "Number of times getblk has had to restart a buffer mapping for "
315 "Amount of work to do in flushbufqueues when helping bufdaemon");
318 "Number of dirty buffer flushes done by the bufdaemon helpers");
323 &unmapped_buf_allowed, 0,
324 "Permit the use of the unmapped i/o");
327 "Maximum size of a buffer cache block");
332static struct mtx_padalign __exclusive_cache_line
bdlock;
338static struct mtx_padalign __exclusive_cache_line
rbreqlock;
384#define QUEUE_SENTINEL 4
410 value = *(
long *)arg1;
412 if (error != 0 || req->newptr == NULL)
422 (
"%s: unknown arg1", __func__));
439 value = *(
int *)arg1;
441 if (error != 0 || req->newptr == NULL)
443 *(
int *)arg1 =
value;
445 *(
int *)(uintptr_t)(((uintptr_t)&
bdomain[i]) + arg2) =
458 value = *(
long *)arg1;
460 if (error != 0 || req->newptr == NULL)
462 *(
long *)arg1 =
value;
464 *(
long *)(uintptr_t)(((uintptr_t)&
bdomain[i]) + arg2) =
470#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
471 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
481 lvalue +=
bdomain[i].bd_bufspace;
482 if (
sizeof(
int) ==
sizeof(
long) || req->oldlen >=
sizeof(
long))
484 if (lvalue > INT_MAX)
499 lvalue +=
bdomain[i].bd_bufspace;
581 num = atomic_fetchadd_int(&bd->bd_numdirtybuffers, -1);
605 num = atomic_fetchadd_int(&bd->bd_numdirtybuffers, 1);
624 if (atomic_fetchadd_int(&bd->bd_running, 1) == 0) {
626 atomic_store_int(&bd->bd_running, 1);
645 KASSERT((bp->b_flags & B_MALLOC) == 0,
646 (
"bufspace_adjust: malloc buf %p", bp));
648 diff = bufsize - bp->b_bufsize;
650 atomic_subtract_long(&bd->bd_bufspace, -diff);
651 }
else if (diff > 0) {
652 space = atomic_fetchadd_long(&bd->bd_bufspace, diff);
658 bp->b_bufsize = bufsize;
677 space = atomic_fetchadd_long(&bd->bd_bufspace, size);
680 atomic_subtract_long(&bd->bd_bufspace, size);
700 atomic_subtract_long(&bd->bd_bufspace, size);
712 int slpflag,
int slptimeo)
715 int error, fl, norunbuf;
717 if ((gbflags & GB_NOWAIT_BD) != 0)
723 if (vp != NULL && vp->v_type != VCHR &&
724 (td->td_pflags & TDP_BUFNEED) == 0) {
734 norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) |
735 (td->td_pflags & TDP_NORUNNINGBUF);
745 td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF;
747 td->td_pflags &= norunbuf;
755 (PRIBIO + 4) | slpflag,
"newbuf", slptimeo);
772 "bufspace_shutdown", 60 *
hz);
775 printf(
"bufspacedaemon wait error: %d\n", error);
791 SHUTDOWN_PRI_LAST + 100);
839 PRIBIO|PDROP,
"bufspace",
hz/10);
855 atomic_store_int(&bd->bd_running, 0);
862 atomic_store_int(&bd->bd_running, 1);
881 KASSERT((bp->b_flags & B_MALLOC) != 0,
882 (
"bufmallocadjust: non-malloc buf %p", bp));
883 diff = bufsize - bp->b_bufsize;
888 bp->b_bufsize = bufsize;
919 bspace = bp->b_runningbufspace;
923 KASSERT(space >= bspace, (
"runningbufspace underflow %ld %ld",
925 bp->b_runningbufspace = 0;
970 vm_offset_t size, vm_page_t m)
978 if (bp->b_flags & B_CACHE) {
979 int base = (foff + off) & PAGE_MASK;
980 if (vm_page_is_valid(m, base, size) == 0)
981 bp->b_flags &= ~B_CACHE;
1041#define TRANSIENT_DENOM 5
1043#define TRANSIENT_DENOM 10
1056 long maxbuf, maxbuf_sz, buf_sz, biotmap_sz;
1064 physmem_est = (physmem_est * KASAN_SHADOW_SCALE) /
1065 (KASAN_SHADOW_SCALE + 1);
1073 unmapped_buf_allowed = 0;
1080 physmem_est = physmem_est * (PAGE_SIZE / 1024);
1094 int factor = 4 * BKVASIZE / 1024;
1097 if (physmem_est > 4096)
1098 nbuf += min((physmem_est - 4096) / factor,
1100 if (physmem_est > 65536)
1101 nbuf += min((physmem_est - 65536) * 2 / (factor * 5),
1102 32 * 1024 * 1024 / (factor * 5));
1111 maxbuf = (LONG_MAX / 3) / BKVASIZE;
1112 if (
nbuf > maxbuf) {
1114 printf(
"Warning: nbufs lowered from %d to %ld\n",
nbuf,
1134 buf_sz = (long)
nbuf * BKVASIZE;
1142 biotmap_sz = maxbuf_sz - buf_sz;
1150 buf_sz -= biotmap_sz;
1152 if (biotmap_sz / INT_MAX >
maxphys)
1163 nbuf = buf_sz / BKVASIZE;
1176 v = (caddr_t)
buf + (
sizeof(
struct buf) +
sizeof(vm_page_t) *
1196 (
"maxbcachebuf (%d) must be >= MAXBSIZE (%d)\n",
maxbcachebuf,
1199 mtx_init(&
rbreqlock,
"runningbufspace lock", NULL, MTX_DEF);
1200 mtx_init(&
bdlock,
"buffer daemon lock", NULL, MTX_DEF);
1201 mtx_init(&
bdirtylock,
"dirty buf lock", NULL, MTX_DEF);
1206 for (i = 0; i <
nbuf; i++) {
1208 bzero(bp,
sizeof(*bp) +
sizeof(vm_page_t) * atop(
maxbcachebuf));
1209 bp->b_flags = B_INVAL;
1210 bp->b_rcred = NOCRED;
1211 bp->b_wcred = NOCRED;
1217 LIST_INIT(&bp->b_dep);
1248 16 * 1024 * 1024), 1024 * 1024);
1292 buf_zone = uma_zcache_create(
"buf free cache",
1310 bd->bd_bufspace = 0;
1315 bd->bd_numdirtybuffers = 0;
1334vfs_buf_check_mapped(
struct buf *bp)
1338 (
"mapped buf: b_kvabase was not updated %p", bp));
1340 (
"mapped buf: b_data was not updated %p", bp));
1342 maxphys, (
"b_data + b_offset unmapped %p", bp));
1346vfs_buf_check_unmapped(
struct buf *bp)
1350 (
"unmapped buf: corrupted b_data %p", bp));
1353#define BUF_CHECK_MAPPED(bp) vfs_buf_check_mapped(bp)
1354#define BUF_CHECK_UNMAPPED(bp) vfs_buf_check_unmapped(bp)
1356#define BUF_CHECK_MAPPED(bp) do {} while (0)
1357#define BUF_CHECK_UNMAPPED(bp) do {} while (0)
1363 if (((bp->b_flags & B_INVAL) == 0 && BUF_ISLOCKED(bp)) ||
1364 ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI))
1375 static int first_buf_printf = 1;
1377 int i, iter, nbusy, pbusy;
1385 wdog_kern_pat(WD_LASTVAL);
1393 for (iter = pbusy = 0; iter < 20; iter++) {
1395 for (i =
nbuf - 1; i >= 0; i--) {
1401 if (first_buf_printf)
1402 printf(
"All buffers synced.");
1405 if (first_buf_printf) {
1406 printf(
"Syncing disks, buffers remaining... ");
1407 first_buf_printf = 0;
1414 wdog_kern_pat(WD_LASTVAL);
1421 DELAY(50000 * iter);
1427 for (subiter = 0; subiter < 50 * iter; subiter++) {
1428 thread_lock(curthread);
1440 for (i =
nbuf - 1; i >= 0; i--) {
1445 if (bp->b_dev == NULL) {
1447 bp->b_vp->v_mount, mnt_list);
1454 "%d: buf:%p, vnode:%p, flags:%0x, blkno:%jd, lblkno:%jd, buflock:",
1455 nbusy, bp, bp->b_vp, bp->b_flags,
1456 (intmax_t)bp->b_blkno,
1457 (intmax_t)bp->b_lblkno);
1458 BUF_LOCKPRINTINFO(bp);
1470 BOOTTRACE(
"shutdown failed to sync buffers");
1471 printf(
"Giving up on %d buffers\n", nbusy);
1475 BOOTTRACE(
"shutdown sync complete");
1476 if (!first_buf_printf)
1477 printf(
"Final sync complete\n");
1491 if (!KERNEL_PANICKED()) {
1495 BOOTTRACE(
"shutdown unmounted all filesystems");
1510 bp->b_data = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
1511 pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages);
1512 bp->b_data = (caddr_t)((vm_offset_t)bp->b_data |
1513 (vm_offset_t)(bp->b_offset & PAGE_MASK));
1520 return (&
bdomain[bp->b_domain]);
1527 switch (bp->b_qindex) {
1541 panic(
"bufqueue(%p): Unhandled type %d\n", bp, bp->b_qindex);
1583 (
"binsfree: Invalid qindex %d", qindex));
1584 BUF_ASSERT_XLOCKED(bp);
1589 if (bp->b_flags & B_REMFREE) {
1590 if (bp->b_qindex == qindex) {
1591 bp->b_flags |= B_REUSE;
1592 bp->b_flags &= ~B_REMFREE;
1603 bq = &bd->
bd_subq[PCPU_GET(cpuid)];
1620 if (bp->b_flags & B_REMFREE)
1622 if (bp->b_vflags & BV_BKGRDINPROG)
1623 panic(
"losing buffer 1");
1624 if (bp->b_rcred != NOCRED) {
1626 bp->b_rcred = NOCRED;
1628 if (bp->b_wcred != NOCRED) {
1630 bp->b_wcred = NOCRED;
1632 if (!LIST_EMPTY(&bp->b_dep))
1635 atomic_add_int(&
bufdomain(bp)->bd_freebuffers, 1);
1636 MPASS((bp->b_flags & B_MAXPHYS) == 0);
1656 for (i = 0; i < cnt; i++) {
1657 bp = TAILQ_FIRST(&
bqempty.bq_queue);
1682 for (i = 0; i < cnt; i++) {
1685 TAILQ_INSERT_TAIL(&bq->bq_queue, bp, b_freelist);
1686 bp->b_flags &= ~(B_AGE | B_REUSE);
1688 bp->b_qindex = bq->bq_index;
1702 int freebufs, error;
1710 freebufs = atomic_fetchadd_int(&bd->bd_freebuffers, -1);
1712 bp = uma_zalloc(
buf_zone, M_NOWAIT);
1714 atomic_add_int(&bd->bd_freebuffers, 1);
1725 error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWITNESS, NULL);
1726 KASSERT(error == 0, (
"%s: BUF_LOCK on free buf %p: %d.", __func__, bp,
1730 KASSERT(bp->b_vp == NULL,
1731 (
"bp: %p still has vnode %p.", bp, bp->b_vp));
1732 KASSERT((bp->b_flags & (B_DELWRI | B_NOREUSE)) == 0,
1733 (
"invalid buffer %p flags %#x", bp, bp->b_flags));
1734 KASSERT((bp->b_xflags & (BX_VNCLEAN|BX_VNDIRTY)) == 0,
1735 (
"bp: %p still on a buffer list. xflags %X", bp, bp->b_xflags));
1736 KASSERT(bp->b_npages == 0,
1737 (
"bp: %p still has %d vm pages\n", bp, bp->b_npages));
1738 KASSERT(bp->b_kvasize == 0, (
"bp: %p still has kva\n", bp));
1739 KASSERT(bp->b_bufsize == 0, (
"bp: %p still has bufspace\n", bp));
1740 MPASS((bp->b_flags & B_MAXPHYS) == 0);
1748 bp->b_blkno = bp->b_lblkno = 0;
1749 bp->b_offset = NOOFFSET;
1755 bp->b_dirtyoff = bp->b_dirtyend = 0;
1756 bp->b_bufobj = NULL;
1758 bp->b_fsprivate1 = NULL;
1759 bp->b_fsprivate2 = NULL;
1760 bp->b_fsprivate3 = NULL;
1761 LIST_INIT(&bp->b_dep);
1777 struct buf *bp, *nbp;
1785 (
"buf_recycle: Locks don't match"));
1786 nbp = TAILQ_FIRST(&bq->bq_queue);
1792 while ((bp = nbp) != NULL) {
1797 nbp = TAILQ_NEXT(bp, b_freelist);
1803 if (kva && bp->b_kvasize == 0)
1806 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
1813 if ((bp->b_flags & B_REUSE) != 0) {
1814 TAILQ_REMOVE(&bq->bq_queue, bp, b_freelist);
1815 TAILQ_INSERT_TAIL(&bq->bq_queue, bp, b_freelist);
1816 bp->b_flags &= ~B_REUSE;
1824 if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
1830 (
"buf_recycle: inconsistent queue %d bp %p",
1833 (
"getnewbuf: queue domain %d doesn't match request %d",
1846 if ((bp->b_vflags & BV_BKGRDERR) != 0) {
1849 nbp = TAILQ_FIRST(&bq->bq_queue);
1852 bp->b_flags |= B_INVAL;
1872 CTR3(KTR_BUF,
"bremfree(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1873 KASSERT((bp->b_flags & B_REMFREE) == 0,
1874 (
"bremfree: buffer %p already marked for delayed removal.", bp));
1876 (
"bremfree: buffer %p not on a queue.", bp));
1877 BUF_ASSERT_XLOCKED(bp);
1879 bp->b_flags |= B_REMFREE;
1902 mtx_init(&bq->
bq_lock, lockname, NULL, MTX_DEF);
1903 TAILQ_INIT(&bq->bq_queue);
1905 bq->bq_index = qindex;
1906 bq->bq_subqueue = subqueue;
1919 "bufq clean subqueue lock");
1920 mtx_init(&bd->
bd_run_lock,
"bufspace daemon run lock", NULL, MTX_DEF);
1933 CTR3(KTR_BUF,
"bq_remove(%p) vp %p flags %X",
1934 bp, bp->b_vp, bp->b_flags);
1936 (
"bq_remove: buffer %p not on a queue.", bp));
1938 (
"bq_remove: Remove buffer %p from wrong queue.", bp));
1942 BUF_ASSERT_XLOCKED(bp);
1944 KASSERT(bq->bq_len >= 1,
1945 (
"queue %d underflow", bp->b_qindex));
1946 TAILQ_REMOVE(&bq->bq_queue, bp, b_freelist);
1949 bp->b_flags &= ~(B_REMFREE | B_REUSE);
1960 while ((bp = TAILQ_FIRST(&bq->bq_queue)) != NULL) {
1961 TAILQ_REMOVE(&bq->bq_queue, bp, b_freelist);
1962 TAILQ_INSERT_TAIL(&bd->
bd_cleanq->bq_queue, bp,
1964 bp->b_subqueue = bd->
bd_cleanq->bq_subqueue;
1989 if (bq->bq_len == 0)
2006 panic(
"bq_insert: free buffer %p onto another queue?", bp);
2009 if (bp->b_flags & B_AGE) {
2014 TAILQ_INSERT_HEAD(&bq->bq_queue, bp, b_freelist);
2017 TAILQ_INSERT_TAIL(&bq->bq_queue, bp, b_freelist);
2019 bp->b_flags &= ~(B_AGE | B_REUSE);
2021 bp->b_qindex = bq->bq_index;
2022 bp->b_subqueue = bq->bq_subqueue;
2036 bq->bq_len >= bd->
bd_lim))
2053 if (bp->b_kvasize == 0) {
2056 (
"Leaked KVA space on %p", bp));
2057 }
else if (buf_mapped(bp))
2062 if (bp->b_kvasize == 0)
2065 vmem_free(buffer_arena, (vm_offset_t)bp->b_kvabase, bp->b_kvasize);
2083 KASSERT((gbflags & GB_UNMAPPED) == 0 || (gbflags & GB_KVAALLOC) != 0,
2084 (
"Invalid gbflags 0x%x in %s", gbflags, __func__));
2085 MPASS((bp->b_flags & B_MAXPHYS) == 0);
2087 (
"bufkva_alloc kva too large %d %u", maxsize,
maxbcachebuf));
2092 error =
vmem_alloc(buffer_arena, maxsize, M_BESTFIT | M_NOWAIT, &
addr);
2100 bp->b_kvabase = (caddr_t)
addr;
2101 bp->b_kvasize = maxsize;
2103 if ((gbflags & GB_UNMAPPED) != 0) {
2107 bp->b_data = bp->b_kvabase;
2127 for (i = 0; i < 5; i++) {
2143breada(
struct vnode * vp, daddr_t * rablkno,
int * rabsize,
int cnt,
2144 struct ucred * cred,
int flags,
void (*ckhashfunc)(
struct buf *))
2152 for (i = 0; i < cnt; i++, rablkno++, rabsize++) {
2153 if (
inmem(vp, *rablkno))
2155 rabp =
getblk(vp, *rablkno, *rabsize, 0, 0, 0);
2156 if ((rabp->b_flags & B_CACHE) != 0) {
2163 racct_add_buf(curproc, rabp, 0);
2164 PROC_UNLOCK(curproc);
2167 td->td_ru.ru_inblock++;
2168 rabp->b_flags |= B_ASYNC;
2169 rabp->b_flags &= ~B_INVAL;
2170 if ((
flags & GB_CKHASH) != 0) {
2171 rabp->b_flags |= B_CKHASH;
2172 rabp->b_ckhashcalc = ckhashfunc;
2174 rabp->b_ioflags &= ~BIO_ERROR;
2175 rabp->b_iocmd = BIO_READ;
2176 if (rabp->b_rcred == NOCRED && cred != NOCRED)
2177 rabp->b_rcred =
crhold(cred);
2180 rabp->b_iooffset = dbtob(rabp->b_blkno);
2204 daddr_t *rablkno,
int *rabsize,
int cnt,
struct ucred *cred,
int flags,
2205 void (*ckhashfunc)(
struct buf *),
struct buf **bpp)
2209 int error, readwait, rv;
2211 CTR3(KTR_BUF,
"breadn(%p, %jd, %d)", vp, blkno, size);
2217 error =
getblkx(vp, blkno, dblkno, size, 0, 0,
flags, &bp);
2222 KASSERT(blkno == bp->b_lblkno,
2223 (
"getblkx returned buffer for blkno %jd instead of blkno %jd",
2224 (intmax_t)bp->b_lblkno, (intmax_t)blkno));
2225 flags &= ~GB_NOSPARSE;
2232 if ((bp->b_flags & B_CACHE) == 0) {
2235 PROC_LOCK(td->td_proc);
2236 racct_add_buf(td->td_proc, bp, 0);
2237 PROC_UNLOCK(td->td_proc);
2240 td->td_ru.ru_inblock++;
2241 bp->b_iocmd = BIO_READ;
2242 bp->b_flags &= ~B_INVAL;
2243 if ((
flags & GB_CKHASH) != 0) {
2244 bp->b_flags |= B_CKHASH;
2245 bp->b_ckhashcalc = ckhashfunc;
2247 if ((
flags & GB_CVTENXIO) != 0)
2248 bp->b_xflags |= BX_CVTENXIO;
2249 bp->b_ioflags &= ~BIO_ERROR;
2250 if (bp->b_rcred == NOCRED && cred != NOCRED)
2251 bp->b_rcred =
crhold(cred);
2253 bp->b_iooffset = dbtob(bp->b_blkno);
2261 breada(vp, rablkno, rabsize, cnt, cred,
flags, ckhashfunc);
2293 CTR3(KTR_BUF,
"bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
2294 if ((bp->b_bufobj->bo_flag & BO_DEAD) != 0) {
2295 bp->b_flags |= B_INVAL | B_RELBUF;
2296 bp->b_flags &= ~B_CACHE;
2300 if (bp->b_flags & B_INVAL) {
2305 if (bp->b_flags & B_BARRIER)
2308 oldflags = bp->b_flags;
2310 KASSERT(!(bp->b_vflags & BV_BKGRDINPROG),
2311 (
"FFS background buffer should not get here %p", bp));
2315 vp_md = vp->v_vflag & VV_MD;
2328 bp->b_flags &= ~B_DONE;
2329 bp->b_ioflags &= ~BIO_ERROR;
2330 bp->b_flags |= B_CACHE;
2331 bp->b_iocmd = BIO_WRITE;
2338 bp->b_runningbufspace = bp->b_bufsize;
2339 space = atomic_fetchadd_long(&
runningbufspace, bp->b_runningbufspace);
2344 racct_add_buf(curproc, bp, 1);
2345 PROC_UNLOCK(curproc);
2348 curthread->td_ru.ru_oublock++;
2349 if (oldflags & B_ASYNC)
2351 bp->b_iooffset = dbtob(bp->b_blkno);
2352 buf_track(bp, __func__);
2355 if ((oldflags & B_ASYNC) == 0) {
2368 if ((curthread->td_pflags & TDP_NORUNNINGBUF) == 0 && !vp_md)
2383 (void) VOP_FSYNC(bp->b_vp, MNT_NOWAIT, curthread);
2390 TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) {
2391 if ((nbp->b_vflags & BV_BKGRDINPROG) ||
2393 LK_EXCLUSIVE | LK_NOWAIT, NULL))
2396 panic(
"bdwrite: found ourselves");
2399 if (buf_countdeps(nbp, 0)) {
2404 if (nbp->b_flags & B_CLUSTEROK) {
2430 struct thread *td = curthread;
2434 CTR3(KTR_BUF,
"bdwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
2435 KASSERT(bp->b_bufobj != NULL, (
"No b_bufobj %p", bp));
2436 KASSERT((bp->b_flags & B_BARRIER) == 0,
2437 (
"Barrier request in delayed write %p", bp));
2439 if (bp->b_flags & B_INVAL) {
2453 if ((td->td_pflags & (TDP_COWINPROGRESS|TDP_INBDFLUSH)) == 0) {
2454 td->td_pflags |= TDP_INBDFLUSH;
2456 td->td_pflags &= ~TDP_INBDFLUSH;
2465 bp->b_flags |= B_CACHE;
2476 if (vp->v_type != VCHR && bp->b_lblkno == bp->b_blkno) {
2477 VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
2480 buf_track(bp, __func__);
2524 CTR3(KTR_BUF,
"bdirty(%p) vp %p flags %X",
2525 bp, bp->b_vp, bp->b_flags);
2526 KASSERT(bp->b_bufobj != NULL, (
"No b_bufobj %p", bp));
2527 KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex ==
QUEUE_NONE,
2528 (
"bdirty: buffer %p still on queue %d", bp, bp->b_qindex));
2529 bp->b_flags &= ~(B_RELBUF);
2530 bp->b_iocmd = BIO_WRITE;
2532 if ((bp->b_flags & B_DELWRI) == 0) {
2533 bp->b_flags |= B_DELWRI;
2554 CTR3(KTR_BUF,
"bundirty(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
2555 KASSERT(bp->b_bufobj != NULL, (
"No b_bufobj %p", bp));
2556 KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex ==
QUEUE_NONE,
2557 (
"bundirty: buffer %p still on queue %d", bp, bp->b_qindex));
2559 if (bp->b_flags & B_DELWRI) {
2560 bp->b_flags &= ~B_DELWRI;
2567 bp->b_flags &= ~B_DEFERRED;
2583 bp->b_flags |= B_ASYNC;
2600 bp->b_flags |= B_ASYNC | B_BARRIER;
2617 bp->b_flags |= B_BARRIER;
2618 return (bwrite(bp));
2665 struct mount *v_mnt;
2674 CTR3(KTR_BUF,
"brelse(%p) vp %p flags %X",
2675 bp, bp->b_vp, bp->b_flags);
2676 KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)),
2677 (
"brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
2678 KASSERT((bp->b_flags & B_VMIO) != 0 || (bp->b_flags & B_NOREUSE) == 0,
2679 (
"brelse: non-VMIO buffer marked NOREUSE"));
2681 if (BUF_LOCKRECURSED(bp)) {
2690 if (bp->b_flags & B_MANAGED) {
2695 if (LIST_EMPTY(&bp->b_dep)) {
2696 bp->b_flags &= ~B_IOSTARTED;
2698 KASSERT((bp->b_flags & B_IOSTARTED) == 0,
2699 (
"brelse: SU io not finished bp %p", bp));
2702 if ((bp->b_vflags & (BV_BKGRDINPROG | BV_BKGRDERR)) == BV_BKGRDERR) {
2703 BO_LOCK(bp->b_bufobj);
2704 bp->b_vflags &= ~BV_BKGRDERR;
2705 BO_UNLOCK(bp->b_bufobj);
2709 if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) &&
2710 (bp->b_flags & B_INVALONERR)) {
2717 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
2718 bp->b_flags &= ~(B_ASYNC | B_CACHE);
2721 if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) &&
2722 (bp->b_error != ENXIO || !LIST_EMPTY(&bp->b_dep)) &&
2723 !(bp->b_flags & B_INVAL)) {
2745 bp->b_ioflags &= ~BIO_ERROR;
2747 }
else if ((bp->b_flags & (B_NOCACHE | B_INVAL)) ||
2748 (bp->b_ioflags & BIO_ERROR) || (bp->b_bufsize <= 0)) {
2754 bp->b_flags |= B_INVAL;
2755 if (!LIST_EMPTY(&bp->b_dep))
2757 if (bp->b_flags & B_DELWRI)
2759 bp->b_flags &= ~(B_DELWRI | B_CACHE);
2760 if ((bp->b_flags & B_VMIO) == 0) {
2776 if (bp->b_flags & B_DELWRI)
2777 bp->b_flags &= ~B_RELBUF;
2797 v_mnt = bp->b_vp != NULL ? bp->b_vp->v_mount : NULL;
2799 if ((bp->b_flags & B_VMIO) && (bp->b_flags & B_NOCACHE ||
2800 (bp->b_ioflags & BIO_ERROR && bp->b_iocmd == BIO_READ)) &&
2801 (v_mnt == NULL || (v_mnt->mnt_vfc->vfc_flags & VFCF_NETWORK) == 0 ||
2802 vn_isdisk(bp->b_vp) || (bp->b_flags & B_DELWRI) == 0)) {
2807 if ((bp->b_flags & (B_INVAL | B_RELBUF)) != 0 ||
2808 (bp->b_flags & (B_DELWRI | B_NOREUSE)) == B_NOREUSE) {
2810 bp->b_flags &= ~B_NOREUSE;
2811 if (bp->b_vp != NULL)
2820 if (bp->b_bufsize == 0 || (bp->b_ioflags & BIO_ERROR) != 0 ||
2821 (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF)) != 0)
2822 bp->b_flags |= B_INVAL;
2823 if (bp->b_flags & B_INVAL) {
2824 if (bp->b_flags & B_DELWRI)
2830 buf_track(bp, __func__);
2833 if (bp->b_bufsize == 0) {
2838 if (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF) ||
2839 (bp->b_ioflags & BIO_ERROR)) {
2840 bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA);
2841 if (bp->b_vflags & BV_BKGRDINPROG)
2842 panic(
"losing buffer 2");
2844 bp->b_flags |= B_AGE;
2846 }
else if (bp->b_flags & B_DELWRI)
2851 if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
2852 panic(
"brelse: not dirty");
2854 bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_RELBUF | B_DIRECT);
2855 bp->b_xflags &= ~(BX_CVTENXIO);
2876 CTR3(KTR_BUF,
"bqrelse(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
2877 KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)),
2878 (
"bqrelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
2881 if (BUF_LOCKRECURSED(bp)) {
2886 bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
2887 bp->b_xflags &= ~(BX_CVTENXIO);
2889 if (LIST_EMPTY(&bp->b_dep)) {
2890 bp->b_flags &= ~B_IOSTARTED;
2892 KASSERT((bp->b_flags & B_IOSTARTED) == 0,
2893 (
"bqrelse: SU io not finished bp %p", bp));
2896 if (bp->b_flags & B_MANAGED) {
2897 if (bp->b_flags & B_REMFREE)
2903 if ((bp->b_flags & B_DELWRI) != 0 || (bp->b_vflags & (BV_BKGRDINPROG |
2904 BV_BKGRDERR)) == BV_BKGRDERR) {
2905 BO_LOCK(bp->b_bufobj);
2906 bp->b_vflags &= ~BV_BKGRDERR;
2907 BO_UNLOCK(bp->b_bufobj);
2910 if ((bp->b_flags & B_DELWRI) == 0 &&
2911 (bp->b_xflags & BX_VNDIRTY))
2912 panic(
"bqrelse: not dirty");
2913 if ((bp->b_flags & B_NOREUSE) != 0) {
2919 buf_track(bp, __func__);
2925 buf_track(bp, __func__);
2940 struct vnode *vp __unused;
2941 int i, iosize, resid;
2944 obj = bp->b_bufobj->bo_object;
2945 KASSERT(blockcount_read(&obj->paging_in_progress) >= bp->b_npages,
2946 (
"vfs_vmio_iodone: paging in progress(%d) < b_npages(%d)",
2947 blockcount_read(&obj->paging_in_progress), bp->b_npages));
2950 VNPASS(vp->v_holdcnt > 0, vp);
2951 VNPASS(vp->v_object != NULL, vp);
2953 foff = bp->b_offset;
2954 KASSERT(bp->b_offset != NOOFFSET,
2955 (
"vfs_vmio_iodone: bp %p has no buffer offset", bp));
2958 iosize = bp->b_bcount - bp->b_resid;
2959 for (i = 0; i < bp->b_npages; i++) {
2960 resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff;
2968 if (m == bogus_page) {
2970 m = vm_page_relookup(obj, OFF_TO_IDX(foff));
2972 panic(
"biodone: page disappeared!");
2974 }
else if ((bp->b_iocmd == BIO_READ) && resid > 0) {
2980 KASSERT((m->dirty & vm_page_bits(foff & PAGE_MASK,
2981 resid)) == 0, (
"vfs_vmio_iodone: page %p "
2982 "has unexpected dirty bits", m));
2985 KASSERT(OFF_TO_IDX(foff) == m->pindex,
2986 (
"vfs_vmio_iodone: foff(%jd)/pindex(%ju) mismatch",
2987 (intmax_t)foff, (uintmax_t)m->pindex));
2990 foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
2993 vm_object_pip_wakeupn(obj, bp->b_npages);
2994 if (bogus && buf_mapped(bp)) {
2996 pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
2997 bp->b_pages, bp->b_npages);
3010 int flags, i, resid, poffset, presid;
3012 if (buf_mapped(bp)) {
3014 pmap_qremove(trunc_page((vm_offset_t)bp->b_data), bp->b_npages);
3029 flags = (bp->b_flags & B_NOREUSE) != 0 ? VPR_NOREUSE : 0;
3030 obj = bp->b_bufobj->bo_object;
3031 resid = bp->b_bufsize;
3032 poffset = bp->b_offset & PAGE_MASK;
3033 VM_OBJECT_WLOCK(obj);
3034 for (i = 0; i < bp->b_npages; i++) {
3036 if (m == bogus_page)
3037 panic(
"vfs_vmio_invalidate: Unexpected bogus page.");
3038 bp->b_pages[i] = NULL;
3040 presid = resid > (PAGE_SIZE - poffset) ?
3041 (PAGE_SIZE - poffset) : resid;
3042 KASSERT(presid >= 0, (
"brelse: extra page"));
3043 vm_page_busy_acquire(m, VM_ALLOC_SBUSY);
3044 if (pmap_page_wired_mappings(m) == 0)
3045 vm_page_set_invalid(m, poffset, presid);
3047 vm_page_release_locked(m,
flags);
3051 VM_OBJECT_WUNLOCK(obj);
3065 if (bp->b_npages == desiredpages)
3068 if (buf_mapped(bp)) {
3070 pmap_qremove((vm_offset_t)trunc_page((vm_offset_t)bp->b_data) +
3071 (desiredpages << PAGE_SHIFT), bp->b_npages - desiredpages);
3078 flags = (bp->b_flags & B_NOREUSE) != 0 ? VPR_NOREUSE : 0;
3079 if ((bp->b_flags & B_DIRECT) != 0) {
3080 flags |= VPR_TRYFREE;
3081 obj = bp->b_bufobj->bo_object;
3082 VM_OBJECT_WLOCK(obj);
3086 for (i = desiredpages; i < bp->b_npages; i++) {
3088 KASSERT(m != bogus_page, (
"allocbuf: bogus page found"));
3089 bp->b_pages[i] = NULL;
3091 vm_page_release_locked(m,
flags);
3093 vm_page_release(m,
flags);
3096 VM_OBJECT_WUNLOCK(obj);
3097 bp->b_npages = desiredpages;
3120 obj = bp->b_bufobj->bo_object;
3121 if (bp->b_npages < desiredpages) {
3123 (
"vfs_vmio_extend past maxbcachebuf %p %d %u",
3136 (void)vm_page_grab_pages_unlocked(obj,
3137 OFF_TO_IDX(bp->b_offset) + bp->b_npages,
3138 VM_ALLOC_SYSTEM | VM_ALLOC_IGN_SBUSY |
3139 VM_ALLOC_NOBUSY | VM_ALLOC_WIRED,
3140 &bp->b_pages[bp->b_npages], desiredpages - bp->b_npages);
3141 bp->b_npages = desiredpages;
3158 toff = bp->b_bcount;
3159 tinc = PAGE_SIZE - ((bp->b_offset + toff) & PAGE_MASK);
3160 while ((bp->b_flags & B_CACHE) && toff < size) {
3163 if (tinc > (size - toff))
3165 pi = ((bp->b_offset & PAGE_MASK) + toff) >> PAGE_SHIFT;
3166 m = bp->b_pages[pi];
3194 if ((bpa =
gbincore(&vp->v_bufobj, lblkno)) == NULL)
3198 if (BUF_LOCK(bpa, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
3202 if ((bpa->b_flags & (B_DELWRI | B_CLUSTEROK | B_INVAL)) !=
3203 (B_DELWRI | B_CLUSTEROK))
3206 if (bpa->b_bufsize != size)
3213 if ((bpa->b_blkno != bpa->b_lblkno) && (bpa->b_blkno == blkno))
3234 daddr_t lblkno = bp->b_lblkno;
3235 struct vnode *vp = bp->b_vp;
3243 gbflags = (bp->b_data ==
unmapped_buf) ? GB_UNMAPPED : 0;
3249 if ((vp->v_type == VREG) &&
3250 (vp->v_mount != 0) &&
3251 (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) {
3252 size = vp->v_mount->mnt_stat.f_iosize;
3256 for (i = 1; i < maxcl; i++)
3258 bp->b_blkno + ((i * size) >> DEV_BSHIFT)) == 0)
3261 for (j = 1; i + j <= maxcl && j <= lblkno; j++)
3263 bp->b_blkno - ((j * size) >> DEV_BSHIFT)) == 0)
3279 bp->b_flags |= B_ASYNC;
3285 nwritten = bp->b_bufsize;
3300 if ((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_UNMAPPED) {
3305 maxsize = (maxsize + BKVAMASK) & ~BKVAMASK;
3307 if (maxsize != bp->b_kvasize &&
3330getnewbuf(
struct vnode *vp,
int slpflag,
int slptimeo,
int maxsize,
int gbflags)
3334 bool metadata, reserved;
3337 KASSERT((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC,
3338 (
"GB_KVAALLOC only makes sense with GB_UNMAPPED"));
3339 if (!unmapped_buf_allowed)
3340 gbflags &= ~(GB_UNMAPPED | GB_KVAALLOC);
3342 if (vp == NULL || (vp->v_vflag & (VV_MD | VV_SYSTEM)) != 0 ||
3350 bd = &
bdomain[vp->v_bufobj.bo_domain];
3355 if (reserved ==
false &&
3373 bp->b_flags |= B_INVAL;
3407 if (vp != NULL && target > 2)
3426 printf(
"bufdaemon wait error: %d\n", error);
3441 SHUTDOWN_PRI_LAST + 100);
3450 &
bdomain[i], curproc, NULL, 0, 0,
"bufspacedaemon-%d", i);
3452 panic(
"error %d spawning bufspace daemon", error);
3458 curthread->td_pflags |= TDP_NORUNNINGBUF | TDP_BUFNEED;
3478 lodirty = bd->bd_numdirtybuffers / 2;
3481 while (bd->bd_numdirtybuffers > lodirty) {
3483 bd->bd_numdirtybuffers - lodirty) == 0)
3542 "Number of buffers flushed with dependencies that require rollbacks");
3549 struct buf *sentinel;
3561 sentinel =
malloc(
sizeof(
struct buf), M_TEMP, M_WAITOK | M_ZERO);
3564 TAILQ_INSERT_HEAD(&bq->bq_queue, sentinel, b_freelist);
3566 while (flushed != target) {
3569 bp = TAILQ_NEXT(sentinel, b_freelist);
3571 TAILQ_REMOVE(&bq->bq_queue, sentinel, b_freelist);
3572 TAILQ_INSERT_AFTER(&bq->bq_queue, bp, sentinel,
3590 error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL);
3599 if ((bp->b_vflags & BV_BKGRDINPROG) != 0 ||
3600 (bp->b_flags & B_DELWRI) == 0) {
3604 if (bp->b_flags & B_INVAL) {
3611 if (!LIST_EMPTY(&bp->b_dep) && buf_countdeps(bp, 0)) {
3612 if (flushdeps == 0) {
3636 error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT);
3638 ASSERT_VOP_LOCKED(vp,
"getbuf");
3640 error = VOP_ISLOCKED(vp) == LK_EXCLUSIVE ? 0 :
3641 vn_lock(vp, LK_TRYUPGRADE);
3644 CTR3(KTR_BUF,
"flushbufqueue(%p) vp %p flags %X",
3645 bp, bp->b_vp, bp->b_flags);
3672 TAILQ_REMOVE(&bq->bq_queue, sentinel, b_freelist);
3674 free(sentinel, M_TEMP);
3696 vm_offset_t toff, tinc, size;
3701 ASSERT_VOP_LOCKED(vp,
"inmem");
3703 if (
incore(&vp->v_bufobj, blkno))
3705 if (vp->v_mount == NULL)
3712 if (size > vp->v_mount->mnt_stat.f_iosize)
3713 size = vp->v_mount->mnt_stat.f_iosize;
3714 off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize;
3716 for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
3717 m = vm_page_lookup_unlocked(obj, OFF_TO_IDX(off + toff));
3723 if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK))
3724 tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
3729 valid = vm_page_is_valid(m,
3730 (vm_offset_t)((toff + off) & PAGE_MASK), tinc);
3731 n = vm_page_lookup_unlocked(obj, OFF_TO_IDX(off + toff));
3757 vm_ooffset_t foff, noff, eoff;
3761 if ((bp->b_flags & B_VMIO) == 0 || bp->b_bufsize == 0)
3764 foff = bp->b_offset;
3765 KASSERT(bp->b_offset != NOOFFSET,
3766 (
"vfs_clean_pages_dirty_buf: no buffer offset"));
3770 for (i = 0; i < bp->b_npages; i++) {
3771 noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
3773 if (eoff > bp->b_offset + bp->b_bufsize)
3774 eoff = bp->b_offset + bp->b_bufsize;
3786 vm_offset_t boffset;
3787 vm_offset_t eoffset;
3794 for (i = 0; i < bp->b_npages; i++)
3795 vm_page_test_dirty(bp->b_pages[i]);
3802 for (i = 0; i < bp->b_npages; i++) {
3803 if (bp->b_pages[i]->dirty)
3806 boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
3808 for (i = bp->b_npages - 1; i >= 0; --i) {
3809 if (bp->b_pages[i]->dirty) {
3813 eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
3819 if (eoffset > bp->b_bcount)
3820 eoffset = bp->b_bcount;
3827 if (boffset < eoffset) {
3828 if (bp->b_dirtyoff > boffset)
3829 bp->b_dirtyoff = boffset;
3830 if (bp->b_dirtyend < eoffset)
3831 bp->b_dirtyend = eoffset;
3843 int bsize, maxsize, need_mapping, need_kva;
3847 (gbflags & GB_UNMAPPED) == 0;
3850 (gbflags & GB_KVAALLOC) != 0;
3851 if (!need_mapping && !need_kva)
3869 bsize =
vn_isdisk(bp->b_vp) ? DEV_BSIZE : bp->b_bufobj->bo_bsize;
3870 KASSERT(bsize != 0, (
"bsize == 0, check bo->bo_bsize"));
3871 offset = blkno * bsize;
3872 maxsize = size + (offset & PAGE_MASK);
3873 maxsize = imax(maxsize, bsize);
3876 if ((gbflags & GB_NOWAIT_BD) != 0) {
3881 panic(
"GB_NOWAIT_BD and GB_UNMAPPED %p", bp);
3889 bp->b_data = bp->b_kvabase;
3896getblk(
struct vnode *vp, daddr_t blkno,
int size,
int slpflag,
int slptimeo,
3902 error =
getblkx(vp, blkno, blkno, size, slpflag, slptimeo,
flags, &bp);
3952getblkx(
struct vnode *vp, daddr_t blkno, daddr_t dblkno,
int size,
int slpflag,
3953 int slptimeo,
int flags,
struct buf **bpp)
3958 int bsize, error, maxsize, vmio;
3961 CTR3(KTR_BUF,
"getblk(%p, %ld, %d)", vp, (
long)blkno, size);
3962 KASSERT((
flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC,
3963 (
"GB_KVAALLOC only makes sense with GB_UNMAPPED"));
3964 if (vp->v_type != VCHR)
3965 ASSERT_VOP_LOCKED(vp,
"getblk");
3967 panic(
"getblk: size(%d) > maxbcachebuf(%d)\n", size,
3969 if (!unmapped_buf_allowed)
3970 flags &= ~(GB_UNMAPPED | GB_KVAALLOC);
3982 if ((
flags & GB_NOCREAT) != 0)
3984 goto newbuf_unlocked;
3987 error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL,
"getblku", 0,
3993 if (bp->b_bufobj == bo && bp->b_lblkno == blkno)
3994 goto foundbuf_fastpath;
4009 lockflags = LK_EXCLUSIVE | LK_INTERLOCK |
4010 ((
flags & GB_LOCK_NOWAIT) != 0 ? LK_NOWAIT : LK_SLEEPFAIL);
4012 lockflags |= (
flags & GB_NOWITNESS) != 0 ? LK_NOWITNESS : 0;
4015 error = BUF_TIMELOCK(bp, lockflags,
4016 BO_LOCKPTR(bo),
"getblk", slpflag, slptimeo);
4022 if (error == ENOLCK)
4025 else if (error != 0)
4030 if (BUF_LOCKRECURSED(bp))
4039 if (bp->b_flags & B_INVAL)
4040 bp->b_flags &= ~B_CACHE;
4041 else if ((bp->b_flags & (B_VMIO | B_INVAL)) == 0)
4042 bp->b_flags |= B_CACHE;
4043 if (bp->b_flags & B_MANAGED)
4051 if (bp->b_bcount != size) {
4052 if ((bp->b_flags & B_VMIO) == 0 ||
4053 (size > bp->b_kvasize)) {
4054 if (bp->b_flags & B_DELWRI) {
4055 bp->b_flags |= B_NOCACHE;
4058 if (LIST_EMPTY(&bp->b_dep)) {
4059 bp->b_flags |= B_RELBUF;
4062 bp->b_flags |= B_NOCACHE;
4085 KASSERT(bp->b_offset != NOOFFSET,
4086 (
"getblk: no buffer offset"));
4115 if ((bp->b_flags & (B_CACHE|B_DELWRI)) == B_DELWRI) {
4116 bp->b_flags |= B_NOCACHE;
4120 bp->b_flags &= ~B_DONE;
4133 if (
flags & GB_NOCREAT)
4136 bsize =
vn_isdisk(vp) ? DEV_BSIZE : bo->bo_bsize;
4137 KASSERT(bsize != 0, (
"bsize == 0, check bo->bo_bsize"));
4138 offset = blkno * bsize;
4139 vmio = vp->v_object != NULL;
4141 maxsize = size + (offset & PAGE_MASK);
4145 flags &= ~(GB_UNMAPPED | GB_KVAALLOC);
4147 maxsize = imax(maxsize, bsize);
4148 if ((
flags & GB_NOSPARSE) != 0 && vmio &&
4150 error = VOP_BMAP(vp, blkno, NULL, &d_blkno, 0, 0);
4151 KASSERT(error != EOPNOTSUPP,
4152 (
"GB_NOSPARSE from fs not supporting bmap, vp %p",
4157 return (EJUSTRETURN);
4162 if (slpflag || slptimeo)
4199 bp->b_flags |= B_INVAL;
4209 bp->b_lblkno = blkno;
4210 bp->b_blkno = d_blkno;
4211 bp->b_offset = offset;
4223 bp->b_flags |= B_VMIO;
4224 KASSERT(vp->v_object == bp->b_bufobj->bo_object,
4225 (
"ARGH! different b_bufobj->bo_object %p %p %p\n",
4226 bp, vp->v_object, bp->b_bufobj->bo_object));
4228 bp->b_flags &= ~B_VMIO;
4229 KASSERT(bp->b_bufobj->bo_object == NULL,
4230 (
"ARGH! has b_bufobj->bo_object %p %p\n",
4231 bp, bp->b_bufobj->bo_object));
4237 bp->b_flags &= ~B_DONE;
4239 CTR4(KTR_BUF,
"getblk(%p, %ld, %d) = %p", vp, (
long)blkno, size, bp);
4241 buf_track(bp, __func__);
4242 KASSERT(bp->b_bufobj == bo,
4243 (
"bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
4258 maxsize = (size + BKVAMASK) & ~BKVAMASK;
4260 if ((
flags & GB_NOWAIT_BD) &&
4261 (curthread->td_pflags & TDP_BUFNEED) != 0)
4266 bp->b_flags |= B_INVAL;
4277 if (bp->b_flags & B_MALLOC) {
4281 if (newbsize == 0) {
4283 free(bp->b_data, M_BIOBUF);
4284 bp->b_data = bp->b_kvabase;
4285 bp->b_flags &= ~B_MALLOC;
4312 if (bp->b_bufsize == 0 && newbsize <= PAGE_SIZE/2 &&
4314 bp->b_data =
malloc(newbsize, M_BIOBUF, M_WAITOK);
4315 bp->b_flags |= B_MALLOC;
4327 if (bp->b_flags & B_MALLOC) {
4328 origbuf = bp->b_data;
4329 origbufsize = bp->b_bufsize;
4330 bp->b_data = bp->b_kvabase;
4332 bp->b_flags &= ~B_MALLOC;
4333 newbsize = round_page(newbsize);
4336 (vm_offset_t) bp->b_data + newbsize);
4337 if (origbuf != NULL) {
4338 bcopy(origbuf, bp->b_data, origbufsize);
4339 free(origbuf, M_BIOBUF);
4363 if (bp->b_bcount == size)
4366 if (bp->b_kvasize != 0 && bp->b_kvasize < size)
4367 panic(
"allocbuf: buffer too small");
4369 newbsize = roundup2(size, DEV_BSIZE);
4370 if ((bp->b_flags & B_VMIO) == 0) {
4371 if ((bp->b_flags & B_MALLOC) == 0)
4372 newbsize = round_page(newbsize);
4377 if (newbsize < bp->b_bufsize)
4379 else if (newbsize > bp->b_bufsize)
4384 desiredpages = (size == 0) ? 0 :
4385 num_pages((bp->b_offset & PAGE_MASK) + newbsize);
4387 if (bp->b_flags & B_MALLOC)
4388 panic(
"allocbuf: VMIO buffer can't be malloced");
4393 if (size == 0 || bp->b_bufsize == 0)
4394 bp->b_flags |= B_CACHE;
4396 if (newbsize < bp->b_bufsize)
4399 else if (size > bp->b_bcount)
4403 bp->b_bcount = size;
4415 void (*done)(
struct bio *);
4416 vm_offset_t
start, end;
4418 biotrack(bp, __func__);
4426 if (__predict_false(
dumping && SCHEDULER_STOPPED())) {
4430 if ((bp->bio_flags & BIO_TRANSIENT_MAPPING) != 0) {
4431 bp->bio_flags &= ~BIO_TRANSIENT_MAPPING;
4432 bp->bio_flags |= BIO_UNMAPPED;
4433 start = trunc_page((vm_offset_t)bp->bio_data);
4434 end = round_page((vm_offset_t)bp->bio_data + bp->bio_length);
4440 done = bp->bio_done;
4445 if (done == NULL || done ==
biodone) {
4448 bp->bio_flags |= BIO_DONE;
4465 while ((bp->bio_flags & BIO_DONE) == 0)
4466 msleep(bp, mtxp, PRIBIO, wmesg, 0);
4468 if (bp->bio_error != 0)
4469 return (bp->bio_error);
4470 if (!(bp->bio_flags & BIO_ERROR))
4480 bp->bio_error = error;
4481 bp->bio_flags |= BIO_ERROR;
4488#if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
4490biotrack_buf(
struct bio *bp,
const char *location)
4493 buf_track(bp->bio_track_bp, location);
4507 if (bp->b_iocmd == BIO_READ)
4508 bwait(bp, PRIBIO,
"biord");
4510 bwait(bp, PRIBIO,
"biowr");
4511 if (bp->b_flags & B_EINTR) {
4512 bp->b_flags &= ~B_EINTR;
4515 if (bp->b_ioflags & BIO_ERROR) {
4516 return (bp->b_error ? bp->b_error : EIO);
4544 struct bufobj *dropobj;
4547 buf_track(bp, __func__);
4548 CTR3(KTR_BUF,
"bufdone(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
4551 KASSERT(!(bp->b_flags & B_DONE), (
"biodone: bp %p already done", bp));
4554 if (bp->b_iocmd == BIO_WRITE)
4555 dropobj = bp->b_bufobj;
4557 if (bp->b_iodone != NULL) {
4559 bp->b_iodone = NULL;
4565 if (bp->b_flags & B_VMIO) {
4571 if (bp->b_iocmd == BIO_READ &&
4572 !(bp->b_flags & (B_INVAL|B_NOCACHE)) &&
4573 !(bp->b_ioflags & BIO_ERROR))
4574 bp->b_flags |= B_CACHE;
4577 if (!LIST_EMPTY(&bp->b_dep))
4579 if ((bp->b_flags & B_CKHASH) != 0) {
4580 KASSERT(bp->b_iocmd == BIO_READ,
4581 (
"bufdone: b_iocmd %d not BIO_READ", bp->b_iocmd));
4582 KASSERT(buf_mapped(bp), (
"bufdone: bp %p not mapped", bp));
4583 (*bp->b_ckhashcalc)(bp);
4590 if (bp->b_flags & B_ASYNC) {
4591 if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_RELBUF)) ||
4592 (bp->b_ioflags & BIO_ERROR))
4615 if (!(bp->b_flags & B_VMIO))
4618 obj = bp->b_bufobj->bo_object;
4619 for (i = 0; i < bp->b_npages; i++) {
4621 if (m == bogus_page) {
4622 m = vm_page_relookup(obj, OFF_TO_IDX(bp->b_offset) + i);
4624 panic(
"vfs_unbusy_pages: page missing\n");
4626 if (buf_mapped(bp)) {
4628 pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
4629 bp->b_pages, bp->b_npages);
4635 vm_object_pip_wakeupn(obj, bp->b_npages);
4657 eoff = (off + PAGE_SIZE) & ~(vm_ooffset_t)PAGE_MASK;
4658 if (eoff > bp->b_offset + bp->b_bcount)
4659 eoff = bp->b_offset + bp->b_bcount;
4666 vm_page_set_valid_range(m, off & PAGE_MASK, eoff - off);
4678 vm_ooffset_t soff, eoff;
4687 eoff = (off + PAGE_SIZE) & ~(off_t)PAGE_MASK;
4688 if (eoff > bp->b_offset + bp->b_bcount)
4689 eoff = bp->b_offset + bp->b_bcount;
4696 vm_page_set_validclean(
4698 (vm_offset_t) (soff & PAGE_MASK),
4699 (vm_offset_t) (eoff - soff)
4712 for (i = 0; i < bp->b_npages; i++)
4713 vm_page_busy_acquire(bp->b_pages[i], VM_ALLOC_SBUSY);
4721 for (i = 0; i < bp->b_npages; i++)
4722 vm_page_sunbusy(bp->b_pages[i]);
4746 if (!(bp->b_flags & B_VMIO))
4749 obj = bp->b_bufobj->bo_object;
4750 foff = bp->b_offset;
4751 KASSERT(bp->b_offset != NOOFFSET,
4752 (
"vfs_busy_pages: no buffer offset"));
4753 if ((bp->b_flags & B_CLUSTER) == 0) {
4754 vm_object_pip_add(obj, bp->b_npages);
4757 if (bp->b_bufsize != 0)
4760 for (i = 0; i < bp->b_npages; i++) {
4762 vm_page_assert_sbusied(m);
4780 pmap_remove_write(m);
4782 }
else if (vm_page_all_valid(m) &&
4783 (bp->b_flags & B_CACHE) == 0) {
4784 bp->b_pages[i] = bogus_page;
4787 foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
4789 if (bogus && buf_mapped(bp)) {
4791 pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
4792 bp->b_pages, bp->b_npages);
4810 if (!(bp->b_flags & B_VMIO))
4818 base += (bp->b_offset & PAGE_MASK);
4819 n = PAGE_SIZE - (base & PAGE_MASK);
4828 for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) {
4832 vm_page_set_valid_range(m, base & PAGE_MASK, n);
4855 int i, j,
mask, sa, ea, slide;
4857 if ((bp->b_flags & (B_VMIO | B_MALLOC)) != B_VMIO) {
4861 bp->b_flags &= ~B_INVAL;
4862 bp->b_ioflags &= ~BIO_ERROR;
4864 sa = bp->b_offset & PAGE_MASK;
4866 for (i = 0; i < bp->b_npages; i++, sa = 0) {
4867 slide = imin(slide + PAGE_SIZE, bp->b_offset + bp->b_bufsize);
4868 ea = slide & PAGE_MASK;
4871 if (bp->b_pages[i] == bogus_page)
4874 mask = ((1 << ((ea - sa) / DEV_BSIZE)) - 1) << j;
4875 if ((bp->b_pages[i]->valid &
mask) ==
mask)
4877 if ((bp->b_pages[i]->valid &
mask) == 0)
4878 pmap_zero_page_area(bp->b_pages[i], sa, ea - sa);
4880 for (; sa < ea; sa += DEV_BSIZE, j++) {
4881 if ((bp->b_pages[i]->valid & (1 << j)) == 0) {
4882 pmap_zero_page_area(bp->b_pages[i],
4887 vm_page_set_valid_range(bp->b_pages[i], j * DEV_BSIZE,
4888 roundup2(ea - sa, DEV_BSIZE));
4900 if (buf_mapped(bp)) {
4902 bzero(bp->b_data + base, size);
4905 n = PAGE_SIZE - (base & PAGE_MASK);
4906 for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) {
4910 pmap_zero_page_area(m, base & PAGE_MASK, n);
4928 KASSERT((ioflag & IO_NOREUSE) == 0 || (ioflag & IO_VMIO) != 0,
4929 (
"buf %p non-VMIO noreuse", bp));
4931 if ((ioflag & IO_DIRECT) != 0)
4932 bp->b_flags |= B_DIRECT;
4933 if ((ioflag & IO_EXT) != 0)
4934 bp->b_xflags |= BX_ALTDATA;
4935 if ((ioflag & (IO_VMIO | IO_DIRECT)) != 0 && LIST_EMPTY(&bp->b_dep)) {
4936 bp->b_flags |= B_RELBUF;
4937 if ((ioflag & IO_NOREUSE) != 0)
4938 bp->b_flags |= B_NOREUSE;
4973 to = round_page(to);
4974 from = round_page(from);
4975 index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
4976 MPASS((bp->b_flags & B_MAXPHYS) == 0);
4978 (
"vm_hold_load_pages too large %p %#jx %#jx %u",
4981 for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
4987 p = vm_page_alloc_noobj(VM_ALLOC_SYSTEM | VM_ALLOC_WIRED |
4988 VM_ALLOC_COUNT((to - pg) >> PAGE_SHIFT) | VM_ALLOC_WAITOK);
4989 pmap_qenter(pg, &p, 1);
4990 bp->b_pages[index] = p;
4992 bp->b_npages = index;
5001 int index, newnpages;
5005 from = round_page((vm_offset_t)bp->b_data + newbsize);
5006 newnpages = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
5007 if (bp->b_npages > newnpages)
5008 pmap_qremove(from, bp->b_npages - newnpages);
5009 for (index = newnpages; index < bp->b_npages; index++) {
5010 p = bp->b_pages[index];
5011 bp->b_pages[index] = NULL;
5012 vm_page_unwire_noq(p);
5015 bp->b_npages = newnpages;
5038 MPASS((bp->b_flags & B_MAXPHYS) != 0);
5039 prot = VM_PROT_READ;
5040 if (bp->b_iocmd == BIO_READ)
5041 prot |= VM_PROT_WRITE;
5042 pidx = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
5043 (vm_offset_t)uaddr, len, prot, bp->b_pages, PBUF_PAGES);
5046 bp->b_bufsize = len;
5047 bp->b_npages = pidx;
5048 bp->b_offset = ((vm_offset_t)uaddr) & PAGE_MASK;
5049 if (mapbuf || !unmapped_buf_allowed) {
5050 pmap_qenter((vm_offset_t)bp->b_kvabase, bp->b_pages, pidx);
5051 bp->b_data = bp->b_kvabase + bp->b_offset;
5068 npages = bp->b_npages;
5070 pmap_qremove(trunc_page((vm_offset_t)bp->b_data), npages);
5071 vm_page_unhold_pages(bp->b_pages, npages);
5083 bp->b_flags |= B_DONE;
5095 while ((bp->b_flags & B_DONE) == 0)
5096 msleep(bp, mtxp, pri, wchan, 0);
5104 return (VOP_FSYNC(bo2vnode(bo), waitfor, curthread));
5114 KASSERT(vp == bo->bo_private, (
"Inconsistent vnode bufstrategy"));
5115 KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
5116 (
"Wrong vnode in bufstrategy(bp=%p, vp=%p)", bp, vp));
5117 i = VOP_STRATEGY(vp, bp);
5118 KASSERT(i == 0, (
"VOP_STRATEGY failed bp=%p vp=%p", bp, bp->b_vp));
5127 static volatile int bufobj_cleanq;
5130 atomic_fetchadd_int(&bufobj_cleanq, 1) %
buf_domains;
5131 rw_init(BO_LOCKPTR(bo),
"bufobj interlock");
5132 bo->bo_private =
private;
5133 TAILQ_INIT(&bo->bo_clean.bv_hd);
5134 TAILQ_INIT(&bo->bo_dirty.bv_hd);
5141 KASSERT(bo != NULL, (
"NULL bo in bufobj_wref"));
5142 ASSERT_BO_WLOCKED(bo);
5150 KASSERT(bo != NULL, (
"NULL bo in bufobj_wref"));
5160 KASSERT(bo != NULL, (
"NULL bo in bufobj_wdrop"));
5162 KASSERT(bo->bo_numoutput > 0, (
"bufobj_wdrop non-positive count"));
5163 if ((--bo->bo_numoutput == 0) && (bo->bo_flag & BO_WWAIT)) {
5164 bo->bo_flag &= ~BO_WWAIT;
5165 wakeup(&bo->bo_numoutput);
5175 KASSERT(bo != NULL, (
"NULL bo in bufobj_wwait"));
5176 ASSERT_BO_WLOCKED(bo);
5178 while (bo->bo_numoutput) {
5179 bo->bo_flag |= BO_WWAIT;
5180 error = msleep(&bo->bo_numoutput, BO_LOCKPTR(bo),
5181 slpflag | (PRIBIO + 1),
"bo_wwait", timeo);
5195 if (!buf_mapped(bp)) {
5196 KASSERT(unmapped_buf_allowed, (
"unmapped"));
5197 bip->bio_ma = bp->b_pages;
5198 bip->bio_ma_n = bp->b_npages;
5200 bip->bio_ma_offset = (vm_offset_t)bp->b_offset & PAGE_MASK;
5201 bip->bio_flags |= BIO_UNMAPPED;
5202 KASSERT(round_page(bip->bio_ma_offset + bip->bio_length) /
5203 PAGE_SIZE == bp->b_npages,
5204 (
"Buffer %p too short: %d %lld %d", bp, bip->bio_ma_offset,
5205 (
long long)bip->bio_length, bip->bio_ma_n));
5207 bip->bio_data = bp->b_data;
5232 "Make buffer pager release buffers after reading");
5254 int *rbehind,
int *rahead, vbg_get_lblkno_t get_lblkno,
5255 vbg_get_blksize_t get_blksize)
5262 vm_ooffset_t la, lb, poff, poffe;
5264 int br_flags, error, i, pgsin, pgsin_a, pgsin_b;
5267 object = vp->v_object;
5270 la = IDX_TO_OFF(ma[
count - 1]->pindex);
5271 if (la >= object->un_pager.vnp.vnp_size)
5272 return (VM_PAGER_BAD);
5280 lpart = la >
object->un_pager.vnp.vnp_size;
5281 error = get_blksize(vp, get_lblkno(vp, IDX_TO_OFF(ma[0]->pindex)),
5284 return (VM_PAGER_ERROR);
5290 lb = IDX_TO_OFF(ma[0]->pindex);
5291 pgsin_b = OFF_TO_IDX(lb - rounddown2(lb, bo_bs));
5293 if (rbehind != NULL)
5295 pgsin_a = OFF_TO_IDX(roundup2(la, bo_bs) - la);
5296 if (la + IDX_TO_OFF(pgsin_a) >= object->un_pager.vnp.vnp_size)
5297 pgsin_a = OFF_TO_IDX(roundup2(object->un_pager.vnp.vnp_size,
5302 VM_CNT_INC(v_vnodein);
5303 VM_CNT_ADD(v_vnodepgsin, pgsin);
5305 br_flags = (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS)
5306 != 0) ? GB_UNMAPPED : 0;
5308 for (i = 0; i <
count; i++) {
5309 if (ma[i] != bogus_page)
5310 vm_page_busy_downgrade(ma[i]);
5314 for (i = 0; i <
count; i++) {
5316 if (m == bogus_page)
5329 if (vm_page_all_valid(m))
5332 poff = IDX_TO_OFF(m->pindex);
5333 poffe = MIN(poff + PAGE_SIZE, object->un_pager.vnp.vnp_size);
5334 for (; poff < poffe; poff += bsize) {
5335 lbn = get_lblkno(vp, poff);
5340 error = get_blksize(vp, lbn, &bsize);
5342 error = bread_gb(vp, lbn, bsize,
5343 curthread->td_ucred, br_flags, &bp);
5346 if (bp->b_rcred == curthread->td_ucred) {
5348 bp->b_rcred = NOCRED;
5350 if (LIST_EMPTY(&bp->b_dep)) {
5364 !vm_page_all_valid(m))
5365 bp->b_flags |= B_RELBUF;
5367 bp->b_flags &= ~B_NOCACHE;
5374 vm_page_all_valid(m) || i ==
count - 1,
5375 (
"buf %d %p invalid", i, m));
5376 if (i ==
count - 1 && lpart) {
5377 if (!vm_page_none_valid(m) &&
5378 !vm_page_all_valid(m))
5379 vm_page_zero_invalid(m, TRUE);
5386 for (i = 0; i <
count; i++) {
5387 if (ma[i] == bogus_page)
5389 if (vm_page_busy_tryupgrade(ma[i]) == 0) {
5390 vm_page_sunbusy(ma[i]);
5391 ma[i] = vm_page_grab_unlocked(
object, ma[i]->pindex,
5408 if (!vm_page_all_valid(ma[i]))
5411 if (redo && error == 0)
5413 return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
5421DB_SHOW_COMMAND(buffer, db_show_buffer)
5425#ifdef FULL_BUF_TRACKING
5430 db_printf(
"usage: show buffer <addr>\n");
5434 db_printf(
"buf at %p\n", bp);
5435 db_printf(
"b_flags = 0x%b, b_xflags=0x%b\n",
5436 (u_int)bp->b_flags, PRINT_BUF_FLAGS,
5437 (u_int)bp->b_xflags, PRINT_BUF_XFLAGS);
5438 db_printf(
"b_vflags=0x%b b_ioflags0x%b\n",
5439 (u_int)bp->b_vflags, PRINT_BUF_VFLAGS,
5440 (u_int)bp->b_ioflags, PRINT_BIO_FLAGS);
5442 "b_error = %d, b_bufsize = %ld, b_bcount = %ld, b_resid = %ld\n"
5443 "b_bufobj = (%p), b_data = %p\n, b_blkno = %jd, b_lblkno = %jd, "
5444 "b_vp = %p, b_dep = %p\n",
5445 bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid,
5446 bp->b_bufobj, bp->b_data, (intmax_t)bp->b_blkno,
5447 (intmax_t)bp->b_lblkno, bp->b_vp, bp->b_dep.lh_first);
5448 db_printf(
"b_kvabase = %p, b_kvasize = %d\n",
5449 bp->b_kvabase, bp->b_kvasize);
5452 db_printf(
"b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages);
5453 for (i = 0; i < bp->b_npages; i++) {
5457 db_printf(
"(%p, 0x%lx, 0x%lx)", m->object,
5459 (u_long)VM_PAGE_TO_PHYS(m));
5461 db_printf(
"( ??? )");
5462 if ((i + 1) < bp->b_npages)
5467 BUF_LOCKPRINTINFO(bp);
5468#if defined(FULL_BUF_TRACKING)
5469 db_printf(
"b_io_tracking: b_io_tcnt = %u\n", bp->b_io_tcnt);
5471 i = bp->b_io_tcnt % BUF_TRACKING_SIZE;
5472 for (j = 1; j <= BUF_TRACKING_SIZE; j++) {
5473 if (bp->b_io_tracking[BUF_TRACKING_ENTRY(i - j)] == NULL)
5475 db_printf(
" %2u: %s\n", j,
5476 bp->b_io_tracking[BUF_TRACKING_ENTRY(i - j)]);
5478#elif defined(BUF_TRACKING)
5479 db_printf(
"b_io_tracking: %s\n", bp->b_io_tracking);
5484DB_SHOW_COMMAND(bufqueues, bufqueues)
5491 db_printf(
"bqempty: %d\n",
bqempty.bq_len);
5495 db_printf(
"Buf domain %d\n", i);
5496 db_printf(
"\tfreebufs\t%d\n", bd->bd_freebuffers);
5500 db_printf(
"\tbufspace\t%ld\n", bd->bd_bufspace);
5506 db_printf(
"\tnumdirtybuffers\t%d\n", bd->bd_numdirtybuffers);
5512 TAILQ_FOREACH(bp, &bd->
bd_cleanq->bq_queue, b_freelist)
5513 total += bp->b_bufsize;
5514 db_printf(
"\tcleanq count\t%d (%ld)\n",
5517 TAILQ_FOREACH(bp, &bd->
bd_dirtyq.bq_queue, b_freelist)
5518 total += bp->b_bufsize;
5519 db_printf(
"\tdirtyq count\t%d (%ld)\n",
5521 db_printf(
"\twakeup\t\t%d\n", bd->
bd_wanted);
5522 db_printf(
"\tlim\t\t%d\n", bd->
bd_lim);
5523 db_printf(
"\tCPU ");
5525 db_printf(
"%d, ", bd->
bd_subq[j].bq_len);
5529 for (j = 0; j <
nbuf; j++) {
5531 if (bp->b_domain == i && BUF_ISLOCKED(bp)) {
5533 total += bp->b_bufsize;
5536 db_printf(
"\tLocked buffers: %d space %ld\n", cnt, total);
5539 for (j = 0; j <
nbuf; j++) {
5541 if (bp->b_domain == i) {
5543 total += bp->b_bufsize;
5546 db_printf(
"\tTotal buffers: %d space %ld\n", cnt, total);
5550DB_SHOW_COMMAND(lockedbufs, lockedbufs)
5555 for (i = 0; i <
nbuf; i++) {
5557 if (BUF_ISLOCKED(bp)) {
5558 db_show_buffer((uintptr_t)bp, 1, 0, NULL);
5566DB_SHOW_COMMAND(vnodebufs, db_show_vnodebufs)
5572 db_printf(
"usage: show vnodebufs <addr>\n");
5575 vp = (
struct vnode *)
addr;
5576 db_printf(
"Clean buffers:\n");
5577 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_clean.bv_hd, b_bobufs) {
5578 db_show_buffer((uintptr_t)bp, 1, 0, NULL);
5581 db_printf(
"Dirty buffers:\n");
5582 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) {
5583 db_show_buffer((uintptr_t)bp, 1, 0, NULL);
5588DB_COMMAND(countfreebufs, db_coundfreebufs)
5591 int i, used = 0, nfree = 0;
5594 db_printf(
"usage: countfreebufs\n");
5598 for (i = 0; i <
nbuf; i++) {
5606 db_printf(
"Counted %d free, %d used (%d tot)\n", nfree, used,
TAILQ_HEAD(note_info_list, note_info)
void kproc_start(const void *udata)
int kthread_add(void(*func)(void *), void *arg, struct proc *p, struct thread **newtdp, int flags, int pages, const char *fmt,...)
void *() malloc(size_t size, struct malloc_type *mtp, int flags)
void free(void *addr, struct malloc_type *mtp)
struct mtx_pool __read_mostly * mtxpool_sleep
struct mtx * mtx_pool_find(struct mtx_pool *pool, void *ptr)
struct ucred * crhold(struct ucred *cr)
void crfree(struct ucred *cr)
int __read_mostly dumping
void panic(const char *fmt,...)
void kern_yield(int prio)
void mi_switch(int flags)
void wakeup(const void *ident)
int sysctl_handle_long(SYSCTL_HANDLER_ARGS)
int sysctl_handle_int(SYSCTL_HANDLER_ARGS)
struct iommu_domain ** domain
struct mtx_padalign bd_run_lock
struct bufqueue * bd_cleanq
struct bufqueue bd_dirtyq
struct bufqueue bd_subq[MAXCPU+1]
struct mtx_padalign bq_lock
static bool kasan_enabled __read_mostly
counter_u64_t counter_u64_alloc(int flags)
void devstat_end_transaction_bio(struct devstat *ds, const struct bio *bp)
int printf(const char *fmt,...)
int vmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp)
void vmem_set_reclaim(vmem_t *vm, vmem_reclaim_t *reclaimfn)
void vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
struct bufdomainset bdlodirty
#define BUF_CHECK_UNMAPPED(bp)
void biofinish(struct bio *bp, struct devstat *stat, int error)
static void vfs_vmio_iodone(struct buf *bp)
struct bufdomainset bdhidirty
static void bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags)
#define BQ_ASSERT_LOCKED(bq)
static int flushbufqtarget
int bufsync(struct bufobj *bo, int waitfor)
static struct bufqueue * bufqueue(struct buf *bp)
void runningbufwakeup(struct buf *bp)
static int sysctl_bufdomain_long(SYSCTL_HANDLER_ARGS)
static void bd_init(struct bufdomain *bd)
static void bq_init(struct bufqueue *bq, int qindex, int cpu, const char *lockname)
void bufstrategy(struct bufobj *bo, struct buf *bp)
void bufdone(struct buf *bp)
static int bufkva_alloc(struct buf *bp, int maxsize, int gbflags)
static struct bufqueue * bufqueue_acquire(struct buf *bp)
bool inmem(struct vnode *vp, daddr_t blkno)
void bufobj_wrefl(struct bufobj *bo)
void bufbdflush(struct bufobj *bo, struct buf *bp)
#define BUF_CHECK_MAPPED(bp)
caddr_t __read_mostly unmapped_buf
static counter_u64_t numbufallocfails
static void bpmap_qenter(struct buf *bp)
void bremfree(struct buf *bp)
static int bufspace_reserve(struct bufdomain *bd, int size, bool metadata)
static int buf_import(void *, void **, int, int, int)
static void bd_set(struct bufdomain *bd)
int inflight_transient_maps
void biodone(struct bio *bp)
static int buf_recycle(struct bufdomain *, bool kva)
static void bd_clear(struct bufdomain *bd)
int bufwait(struct buf *bp)
static void bufspace_adjust(struct buf *bp, int bufsize)
static void buf_daemon_shutdown(void *arg __unused, int howto __unused)
int bbarrierwrite(struct buf *bp)
static void bufspace_daemon(void *arg)
int breadn_flags(struct vnode *vp, daddr_t blkno, daddr_t dblkno, int size, daddr_t *rablkno, int *rabsize, int cnt, struct ucred *cred, int flags, void(*ckhashfunc)(struct buf *), struct buf **bpp)
static struct mtx_padalign __exclusive_cache_line rbreqlock
static int getnewbuf_kva(struct buf *bp, int gbflags, int maxsize)
static int lodirtybuffers
void bufobj_init(struct bufobj *bo, void *private)
static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, vm_page_t m)
struct bufqueue bd_dirtyq
void waitrunningbufspace(void)
static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, vm_page_t m)
static int buf_flush(struct vnode *vp, struct bufdomain *, int)
static struct mtx_padalign __exclusive_cache_line bdlock
static void bdirtysub(struct buf *bp)
static void vfs_vmio_invalidate(struct buf *bp)
void bufshutdown(int show_busybufs)
#define BD_RUN_UNLOCK(bd)
int bufwrite(struct buf *bp)
static struct buf * getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int maxsize, int gbflags)
struct bufqueue bd_subq[MAXCPU+1]
static int sysctl_bufdomain_int(SYSCTL_HANDLER_ARGS)
static counter_u64_t getnewbufrestarts
int bufobj_wwait(struct bufobj *bo, int slpflag, int timeo)
static void bufkva_reclaim(vmem_t *, int)
void bdone(struct buf *bp)
int vmapbuf(struct buf *bp, void *uaddr, size_t len, int mapbuf)
static int isbufbusy(struct buf *bp)
static int sysctl_runningspace(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_vfs, OID_AUTO, bufspace, CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RD, NULL, 0, sysctl_bufspace, "L", "Physical memory used for buffers")
static struct buf * nbufp(unsigned i)
void bdirty(struct buf *bp)
static void bdirtywakeup(void)
BITSET_DEFINE(bufdomainset, BUF_DOMAINS)
void vfs_bio_set_flags(struct buf *bp, int ioflag)
void babarrierwrite(struct buf *bp)
static void binsfree(struct buf *bp, int qindex)
void vfs_unbusy_pages(struct buf *bp)
struct proc * bufdaemonproc
static int sysctl_numdirtybuffers(SYSCTL_HANDLER_ARGS)
static counter_u64_t buffreekvacnt
void bufobj_wdrop(struct bufobj *bo)
static int buf_pager_relbuf
void bawrite(struct buf *bp)
static void vfs_setdirty_range(struct buf *bp)
static void vm_hold_load_pages(struct buf *bp, vm_offset_t from, vm_offset_t to)
void vunmapbuf(struct buf *bp)
struct buf * incore(struct bufobj *bo, daddr_t blkno)
struct bufqueue __exclusive_cache_line bqempty
static int hidirtybuffers
void vfs_bio_bzero_buf(struct buf *bp, int base, int size)
static int bd_flushall(struct bufdomain *bd)
SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW, &vmiodirenable, 0, "Use the VM system for directory writes")
static __inline void bd_wakeup(void)
void vfs_busy_pages_acquire(struct buf *bp)
int biowait(struct bio *bp, const char *wmesg)
int allocbuf(struct buf *bp, int size)
static __inline void vfs_buf_test_cache(struct buf *bp, vm_ooffset_t foff, vm_offset_t off, vm_offset_t size, vm_page_t m)
static void buf_free(struct buf *bp)
static int recursiveflushes
struct buf_ops buf_ops_bio
struct buf * getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo, int flags)
static void bd_flush(struct bufdomain *bd, struct bufqueue *bq)
static void vfs_vmio_truncate(struct buf *bp, int npages)
static void breada(struct vnode *, daddr_t *, int *, int, struct ucred *, int, void(*)(struct buf *))
static void bq_remove(struct bufqueue *bq, struct buf *bp)
void bufobj_wref(struct bufobj *bo)
static void bufspace_release(struct bufdomain *bd, int size)
struct bufdomain __exclusive_cache_line bdomain[BUF_DOMAINS]
struct bufqueue __aligned(CACHE_LINE_SIZE)
static int __read_mostly buf_domains
void vfs_busy_pages(struct buf *bp, int clear_modify)
static void runningwakeup(void)
static void bufkva_free(struct buf *)
static struct mtx_padalign __exclusive_cache_line bdirtylock
void bdata2bio(struct buf *bp, struct bio *bip)
static void b_io_dismiss(struct buf *bp, int ioflag, bool release)
void vfs_bio_clrbuf(struct buf *bp)
void bundirty(struct buf *bp)
static long maxbufmallocspace
int getblkx(struct vnode *vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag, int slptimeo, int flags, struct buf **bpp)
static struct kproc_desc buf_kp
void bremfreef(struct buf *bp)
static long bufmallocspace
SYSCTL_LONG(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0, "Amount of presently outstanding async buffer io")
static const char buf_wmesg[]
static counter_u64_t notbufdflushes
static long hirunningspace
static counter_u64_t getnewbufcalls
void bwait(struct buf *bp, u_char pri, const char *wchan)
static void bufmallocadjust(struct buf *bp, int bufsize)
static counter_u64_t mappingrestarts
struct buf * geteblk(int size, int flags)
static struct bufdomain * bufdomain(struct buf *)
static int flushbufqueues(struct vnode *, struct bufdomain *, int, int)
static counter_u64_t bufdefragcnt
void vfs_bio_set_valid(struct buf *bp, int base, int size)
static void buf_daemon(void)
static void buf_release(void *, void **, int)
void vfs_busy_pages_release(struct buf *bp)
static void maxbcachebuf_adjust(void)
static void bufspace_daemon_wakeup(struct bufdomain *bd)
static long lorunningspace
#define BD_RUN_LOCKPTR(bd)
caddr_t kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
static void bufspace_daemon_shutdown(void *arg, int howto __unused)
static void bq_insert(struct bufqueue *bq, struct buf *bp, bool unlock)
static struct bio_queue nondump_bios
void bqrelse(struct buf *bp)
void bdwrite(struct buf *bp)
void vfs_bio_brelse(struct buf *bp, int ioflag)
static void vfs_clean_pages_dirty_buf(struct buf *bp)
static void vfs_nonvmio_extend(struct buf *bp, int newbsize)
static void vfs_vmio_extend(struct buf *bp, int npages, int size)
int vfs_bio_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, int *rahead, vbg_get_lblkno_t get_lblkno, vbg_get_blksize_t get_blksize)
static void bufspace_wait(struct bufdomain *bd, struct vnode *vp, int gbflags, int slpflag, int slptimeo)
static struct buf * buf_alloc(struct bufdomain *bd)
static int numfreebuffers
SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp)
int vfs_bio_awrite(struct buf *bp)
static long barrierwrites
void brelse(struct buf *bp)
int buf_dirty_count_severe(void)
static counter_u64_t bufkvaspace
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, bufkvaspace, CTLFLAG_RD, &bufkvaspace, "Kernel virtual memory used for buffers")
static MALLOC_DEFINE(M_BIOBUF, "biobuf", "BIO buffer")
static int sysctl_bufspace(SYSCTL_HANDLER_ARGS)
static void vfs_nonvmio_truncate(struct buf *bp, int newbsize)
static void vm_hold_free_pages(struct buf *bp, int newbsize)
static int vfs_bio_clcheck(struct vnode *vp, int size, daddr_t lblkno, daddr_t blkno)
static void bdirtyadd(struct buf *bp)
int cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len, int gbflags)
struct buf * gbincore_unlocked(struct bufobj *bo, daddr_t lblkno)
void bgetvp(struct vnode *vp, struct buf *bp)
void vfs_unmountall(void)
void vn_printf(struct vnode *vp, const char *fmt,...)
void brelvp(struct buf *bp)
bool vn_isdisk(struct vnode *vp)
void reassignbuf(struct buf *bp)
struct buf * gbincore(struct bufobj *bo, daddr_t lblkno)
int kern_sync(struct thread *td)
int vn_start_write(struct vnode *vp, struct mount **mpp, int flags)
void vn_finished_write(struct mount *mp)