FreeBSD virtual memory subsystem code
vm_radix.c
Go to the documentation of this file.
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2013 EMC Corp.
5 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
6 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 */
31
32/*
33 * Path-compressed radix trie implementation.
34 * The following code is not generalized into a general purpose library
35 * because there are way too many parameters embedded that should really
36 * be decided by the library consumers. At the same time, consumers
37 * of this code must achieve highest possible performance.
38 *
39 * The implementation takes into account the following rationale:
40 * - Size of the nodes should be as small as possible but still big enough
41 * to avoid a large maximum depth for the trie. This is a balance
42 * between the necessity to not wire too much physical memory for the nodes
43 * and the necessity to avoid too much cache pollution during the trie
44 * operations.
45 * - There is not a huge bias toward the number of lookup operations over
46 * the number of insert and remove operations. This basically implies
47 * that optimizations supposedly helping one operation but hurting the
48 * other might be carefully evaluated.
49 * - On average not many nodes are expected to be fully populated, hence
50 * level compression may just complicate things.
51 */
52
53#include <sys/cdefs.h>
54__FBSDID("$FreeBSD$");
55
56#include "opt_ddb.h"
57
58#include <sys/param.h>
59#include <sys/systm.h>
60#include <sys/kernel.h>
61#include <sys/proc.h>
62#include <sys/vmmeter.h>
63#include <sys/smr.h>
64#include <sys/smr_types.h>
65
66#include <vm/uma.h>
67#include <vm/vm.h>
68#include <vm/vm_param.h>
69#include <vm/vm_object.h>
70#include <vm/vm_page.h>
71#include <vm/vm_radix.h>
72
73#ifdef DDB
74#include <ddb/ddb.h>
75#endif
76
77/*
78 * These widths should allow the pointers to a node's children to fit within
79 * a single cache line. The extra levels from a narrow width should not be
80 * a problem thanks to path compression.
81 */
82#ifdef __LP64__
83#define VM_RADIX_WIDTH 4
84#else
85#define VM_RADIX_WIDTH 3
86#endif
87
88#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH)
89#define VM_RADIX_MASK (VM_RADIX_COUNT - 1)
90#define VM_RADIX_LIMIT \
91 (howmany(sizeof(vm_pindex_t) * NBBY, VM_RADIX_WIDTH) - 1)
92
93/* Flag bits stored in node pointers. */
94#define VM_RADIX_ISLEAF 0x1
95#define VM_RADIX_FLAGS 0x1
96#define VM_RADIX_PAD VM_RADIX_FLAGS
97
98/* Returns one unit associated with specified level. */
99#define VM_RADIX_UNITLEVEL(lev) \
100 ((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH))
101
103
104struct vm_radix_node;
105typedef SMR_POINTER(struct vm_radix_node *) smrnode_t;
106
107struct vm_radix_node {
108 vm_pindex_t rn_owner; /* Owner of record. */
109 uint16_t rn_count; /* Valid children. */
110 uint8_t rn_clev; /* Current level. */
111 int8_t rn_last; /* zero last ptr. */
112 smrnode_t rn_child[VM_RADIX_COUNT]; /* Child nodes. */
113};
114
116static smr_t vm_radix_smr;
117
118static void vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v,
119 enum vm_radix_access access);
120
121/*
122 * Allocate a radix node.
123 */
124static struct vm_radix_node *
125vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
126{
127 struct vm_radix_node *rnode;
128
129 rnode = uma_zalloc_smr(vm_radix_node_zone, M_NOWAIT);
130 if (rnode == NULL)
131 return (NULL);
132
133 /*
134 * We want to clear the last child pointer after the final section
135 * has exited so lookup can not return false negatives. It is done
136 * here because it will be cache-cold in the dtor callback.
137 */
138 if (rnode->rn_last != 0) {
139 vm_radix_node_store(&rnode->rn_child[rnode->rn_last - 1],
140 NULL, UNSERIALIZED);
141 rnode->rn_last = 0;
142 }
143 rnode->rn_owner = owner;
144 rnode->rn_count = count;
145 rnode->rn_clev = clevel;
146 return (rnode);
147}
148
149/*
150 * Free radix node.
151 */
152static __inline void
153vm_radix_node_put(struct vm_radix_node *rnode, int8_t last)
154{
155#ifdef INVARIANTS
156 int slot;
157
158 KASSERT(rnode->rn_count == 0,
159 ("vm_radix_node_put: rnode %p has %d children", rnode,
160 rnode->rn_count));
161 for (slot = 0; slot < VM_RADIX_COUNT; slot++) {
162 if (slot == last)
163 continue;
164 KASSERT(smr_unserialized_load(&rnode->rn_child[slot], true) ==
165 NULL, ("vm_radix_node_put: rnode %p has a child", rnode));
166 }
167#endif
168 /* Off by one so a freshly zero'd node is not assigned to. */
169 rnode->rn_last = last + 1;
171}
172
173/*
174 * Return the position in the array for a given level.
175 */
176static __inline int
177vm_radix_slot(vm_pindex_t index, uint16_t level)
178{
179
180 return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
181}
182
183/* Trims the key after the specified level. */
184static __inline vm_pindex_t
185vm_radix_trimkey(vm_pindex_t index, uint16_t level)
186{
187 vm_pindex_t ret;
188
189 ret = index;
190 if (level > 0) {
191 ret >>= level * VM_RADIX_WIDTH;
192 ret <<= level * VM_RADIX_WIDTH;
193 }
194 return (ret);
195}
196
197/*
198 * Fetch a node pointer from a slot in another node.
199 */
200static __inline struct vm_radix_node *
201vm_radix_node_load(smrnode_t *p, enum vm_radix_access access)
202{
203
204 switch (access) {
205 case UNSERIALIZED:
206 return (smr_unserialized_load(p, true));
207 case LOCKED:
208 return (smr_serialized_load(p, true));
209 case SMR:
210 return (smr_entered_load(p, vm_radix_smr));
211 }
212 __assert_unreachable();
213}
214
215static __inline void
216vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v,
217 enum vm_radix_access access)
218{
219
220 switch (access) {
221 case UNSERIALIZED:
222 smr_unserialized_store(p, v, true);
223 break;
224 case LOCKED:
225 smr_serialized_store(p, v, true);
226 break;
227 case SMR:
228 panic("vm_radix_node_store: Not supported in smr section.");
229 }
230}
231
232/*
233 * Get the root node for a radix tree.
234 */
235static __inline struct vm_radix_node *
236vm_radix_root_load(struct vm_radix *rtree, enum vm_radix_access access)
237{
238
239 return (vm_radix_node_load((smrnode_t *)&rtree->rt_root, access));
240}
241
242/*
243 * Set the root node for a radix tree.
244 */
245static __inline void
246vm_radix_root_store(struct vm_radix *rtree, struct vm_radix_node *rnode,
247 enum vm_radix_access access)
248{
249
250 vm_radix_node_store((smrnode_t *)&rtree->rt_root, rnode, access);
251}
252
253/*
254 * Returns TRUE if the specified radix node is a leaf and FALSE otherwise.
255 */
256static __inline boolean_t
257vm_radix_isleaf(struct vm_radix_node *rnode)
258{
259
260 return (((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0);
261}
262
263/*
264 * Returns the associated page extracted from rnode.
265 */
266static __inline vm_page_t
267vm_radix_topage(struct vm_radix_node *rnode)
268{
269
270 return ((vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS));
271}
272
273/*
274 * Adds the page as a child of the provided node.
275 */
276static __inline void
277vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
278 vm_page_t page, enum vm_radix_access access)
279{
280 int slot;
281
282 slot = vm_radix_slot(index, clev);
283 vm_radix_node_store(&rnode->rn_child[slot],
284 (struct vm_radix_node *)((uintptr_t)page | VM_RADIX_ISLEAF), access);
285}
286
287/*
288 * Returns the slot where two keys differ.
289 * It cannot accept 2 equal keys.
290 */
291static __inline uint16_t
292vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
293{
294 uint16_t clev;
295
296 KASSERT(index1 != index2, ("%s: passing the same key value %jx",
297 __func__, (uintmax_t)index1));
298
299 index1 ^= index2;
300 for (clev = VM_RADIX_LIMIT;; clev--)
301 if (vm_radix_slot(index1, clev) != 0)
302 return (clev);
303}
304
305/*
306 * Returns TRUE if it can be determined that key does not belong to the
307 * specified rnode. Otherwise, returns FALSE.
308 */
309static __inline boolean_t
310vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
311{
312
313 if (rnode->rn_clev < VM_RADIX_LIMIT) {
314 idx = vm_radix_trimkey(idx, rnode->rn_clev + 1);
315 return (idx != rnode->rn_owner);
316 }
317 return (FALSE);
318}
319
320/*
321 * Internal helper for vm_radix_reclaim_allnodes().
322 * This function is recursive.
323 */
324static void
325vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
326{
327 struct vm_radix_node *child;
328 int slot;
329
330 KASSERT(rnode->rn_count <= VM_RADIX_COUNT,
331 ("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode));
332 for (slot = 0; rnode->rn_count != 0; slot++) {
333 child = vm_radix_node_load(&rnode->rn_child[slot], UNSERIALIZED);
334 if (child == NULL)
335 continue;
336 if (!vm_radix_isleaf(child))
338 vm_radix_node_store(&rnode->rn_child[slot], NULL, UNSERIALIZED);
339 rnode->rn_count--;
340 }
341 vm_radix_node_put(rnode, -1);
342}
343
344#ifndef UMA_MD_SMALL_ALLOC
345void vm_radix_reserve_kva(void);
346/*
347 * Reserve the KVA necessary to satisfy the node allocation.
348 * This is mandatory in architectures not supporting direct
349 * mapping as they will need otherwise to carve into the kernel maps for
350 * every node allocation, resulting into deadlocks for consumers already
351 * working with kernel maps.
352 */
353void
355{
356
357 /*
358 * Calculate the number of reserved nodes, discounting the pages that
359 * are needed to store them.
360 */
362 ((vm_paddr_t)vm_cnt.v_page_count * PAGE_SIZE) / (PAGE_SIZE +
363 sizeof(struct vm_radix_node))))
364 panic("%s: unable to reserve KVA", __func__);
365}
366#endif
367
368/*
369 * Initialize the UMA slab zone.
370 */
371void
373{
374
375 vm_radix_node_zone = uma_zcreate("RADIX NODE",
376 sizeof(struct vm_radix_node), NULL, NULL, NULL, NULL,
379}
380
381/*
382 * Inserts the key-value pair into the trie.
383 * Panics if the key already exists.
384 */
385int
386vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
387{
388 vm_pindex_t index, newind;
389 struct vm_radix_node *rnode, *tmp;
390 smrnode_t *parentp;
391 vm_page_t m;
392 int slot;
393 uint16_t clev;
394
395 index = page->pindex;
396
397 /*
398 * The owner of record for root is not really important because it
399 * will never be used.
400 */
401 rnode = vm_radix_root_load(rtree, LOCKED);
402 if (rnode == NULL) {
403 rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF;
404 return (0);
405 }
406 parentp = (smrnode_t *)&rtree->rt_root;
407 for (;;) {
408 if (vm_radix_isleaf(rnode)) {
409 m = vm_radix_topage(rnode);
410 if (m->pindex == index)
411 panic("%s: key %jx is already present",
412 __func__, (uintmax_t)index);
413 clev = vm_radix_keydiff(m->pindex, index);
415 clev + 1), 2, clev);
416 if (tmp == NULL)
417 return (ENOMEM);
418 /* These writes are not yet visible due to ordering. */
419 vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED);
420 vm_radix_addpage(tmp, m->pindex, clev, m, UNSERIALIZED);
421 /* Synchronize to make leaf visible. */
422 vm_radix_node_store(parentp, tmp, LOCKED);
423 return (0);
424 } else if (vm_radix_keybarr(rnode, index))
425 break;
426 slot = vm_radix_slot(index, rnode->rn_clev);
427 parentp = &rnode->rn_child[slot];
428 tmp = vm_radix_node_load(parentp, LOCKED);
429 if (tmp == NULL) {
430 rnode->rn_count++;
431 vm_radix_addpage(rnode, index, rnode->rn_clev, page,
432 LOCKED);
433 return (0);
434 }
435 rnode = tmp;
436 }
437
438 /*
439 * A new node is needed because the right insertion level is reached.
440 * Setup the new intermediate node and add the 2 children: the
441 * new object and the older edge.
442 */
443 newind = rnode->rn_owner;
444 clev = vm_radix_keydiff(newind, index);
445 tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev);
446 if (tmp == NULL)
447 return (ENOMEM);
448 slot = vm_radix_slot(newind, clev);
449 /* These writes are not yet visible due to ordering. */
450 vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED);
451 vm_radix_node_store(&tmp->rn_child[slot], rnode, UNSERIALIZED);
452 /* Serializing write to make the above visible. */
453 vm_radix_node_store(parentp, tmp, LOCKED);
454
455 return (0);
456}
457
458/*
459 * Returns TRUE if the specified radix tree contains a single leaf and FALSE
460 * otherwise.
461 */
462boolean_t
464{
465 struct vm_radix_node *rnode;
466
467 rnode = vm_radix_root_load(rtree, LOCKED);
468 if (rnode == NULL)
469 return (FALSE);
470 return (vm_radix_isleaf(rnode));
471}
472
473/*
474 * Returns the value stored at the index. If the index is not present,
475 * NULL is returned.
476 */
477static __always_inline vm_page_t
478_vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index,
479 enum vm_radix_access access)
480{
481 struct vm_radix_node *rnode;
482 vm_page_t m;
483 int slot;
484
485 rnode = vm_radix_root_load(rtree, access);
486 while (rnode != NULL) {
487 if (vm_radix_isleaf(rnode)) {
488 m = vm_radix_topage(rnode);
489 if (m->pindex == index)
490 return (m);
491 break;
492 }
493 if (vm_radix_keybarr(rnode, index))
494 break;
495 slot = vm_radix_slot(index, rnode->rn_clev);
496 rnode = vm_radix_node_load(&rnode->rn_child[slot], access);
497 }
498 return (NULL);
499}
500
501/*
502 * Returns the value stored at the index assuming there is an external lock.
503 *
504 * If the index is not present, NULL is returned.
505 */
506vm_page_t
507vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
508{
509
510 return _vm_radix_lookup(rtree, index, LOCKED);
511}
512
513/*
514 * Returns the value stored at the index without requiring an external lock.
515 *
516 * If the index is not present, NULL is returned.
517 */
518vm_page_t
519vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index)
520{
521 vm_page_t m;
522
523 smr_enter(vm_radix_smr);
524 m = _vm_radix_lookup(rtree, index, SMR);
525 smr_exit(vm_radix_smr);
526
527 return (m);
528}
529
530/*
531 * Look up the nearest entry at a position greater than or equal to index.
532 */
533vm_page_t
534vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
535{
536 struct vm_radix_node *stack[VM_RADIX_LIMIT];
537 vm_pindex_t inc;
538 vm_page_t m;
539 struct vm_radix_node *child, *rnode;
540#ifdef INVARIANTS
541 int loops = 0;
542#endif
543 int slot, tos;
544
545 rnode = vm_radix_root_load(rtree, LOCKED);
546 if (rnode == NULL)
547 return (NULL);
548 else if (vm_radix_isleaf(rnode)) {
549 m = vm_radix_topage(rnode);
550 if (m->pindex >= index)
551 return (m);
552 else
553 return (NULL);
554 }
555 tos = 0;
556 for (;;) {
557 /*
558 * If the keys differ before the current bisection node,
559 * then the search key might rollback to the earliest
560 * available bisection node or to the smallest key
561 * in the current node (if the owner is greater than the
562 * search key).
563 */
564 if (vm_radix_keybarr(rnode, index)) {
565 if (index > rnode->rn_owner) {
566ascend:
567 KASSERT(++loops < 1000,
568 ("vm_radix_lookup_ge: too many loops"));
569
570 /*
571 * Pop nodes from the stack until either the
572 * stack is empty or a node that could have a
573 * matching descendant is found.
574 */
575 do {
576 if (tos == 0)
577 return (NULL);
578 rnode = stack[--tos];
579 } while (vm_radix_slot(index,
580 rnode->rn_clev) == (VM_RADIX_COUNT - 1));
581
582 /*
583 * The following computation cannot overflow
584 * because index's slot at the current level
585 * is less than VM_RADIX_COUNT - 1.
586 */
587 index = vm_radix_trimkey(index,
588 rnode->rn_clev);
589 index += VM_RADIX_UNITLEVEL(rnode->rn_clev);
590 } else
591 index = rnode->rn_owner;
592 KASSERT(!vm_radix_keybarr(rnode, index),
593 ("vm_radix_lookup_ge: keybarr failed"));
594 }
595 slot = vm_radix_slot(index, rnode->rn_clev);
596 child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
597 if (vm_radix_isleaf(child)) {
598 m = vm_radix_topage(child);
599 if (m->pindex >= index)
600 return (m);
601 } else if (child != NULL)
602 goto descend;
603
604 /*
605 * Look for an available edge or page within the current
606 * bisection node.
607 */
608 if (slot < (VM_RADIX_COUNT - 1)) {
609 inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
610 index = vm_radix_trimkey(index, rnode->rn_clev);
611 do {
612 index += inc;
613 slot++;
614 child = vm_radix_node_load(&rnode->rn_child[slot],
615 LOCKED);
616 if (vm_radix_isleaf(child)) {
617 m = vm_radix_topage(child);
618 if (m->pindex >= index)
619 return (m);
620 } else if (child != NULL)
621 goto descend;
622 } while (slot < (VM_RADIX_COUNT - 1));
623 }
624 KASSERT(child == NULL || vm_radix_isleaf(child),
625 ("vm_radix_lookup_ge: child is radix node"));
626
627 /*
628 * If a page or edge greater than the search slot is not found
629 * in the current node, ascend to the next higher-level node.
630 */
631 goto ascend;
632descend:
633 KASSERT(rnode->rn_clev > 0,
634 ("vm_radix_lookup_ge: pushing leaf's parent"));
635 KASSERT(tos < VM_RADIX_LIMIT,
636 ("vm_radix_lookup_ge: stack overflow"));
637 stack[tos++] = rnode;
638 rnode = child;
639 }
640}
641
642/*
643 * Look up the nearest entry at a position less than or equal to index.
644 */
645vm_page_t
646vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
647{
648 struct vm_radix_node *stack[VM_RADIX_LIMIT];
649 vm_pindex_t inc;
650 vm_page_t m;
651 struct vm_radix_node *child, *rnode;
652#ifdef INVARIANTS
653 int loops = 0;
654#endif
655 int slot, tos;
656
657 rnode = vm_radix_root_load(rtree, LOCKED);
658 if (rnode == NULL)
659 return (NULL);
660 else if (vm_radix_isleaf(rnode)) {
661 m = vm_radix_topage(rnode);
662 if (m->pindex <= index)
663 return (m);
664 else
665 return (NULL);
666 }
667 tos = 0;
668 for (;;) {
669 /*
670 * If the keys differ before the current bisection node,
671 * then the search key might rollback to the earliest
672 * available bisection node or to the largest key
673 * in the current node (if the owner is smaller than the
674 * search key).
675 */
676 if (vm_radix_keybarr(rnode, index)) {
677 if (index > rnode->rn_owner) {
678 index = rnode->rn_owner + VM_RADIX_COUNT *
679 VM_RADIX_UNITLEVEL(rnode->rn_clev);
680 } else {
681ascend:
682 KASSERT(++loops < 1000,
683 ("vm_radix_lookup_le: too many loops"));
684
685 /*
686 * Pop nodes from the stack until either the
687 * stack is empty or a node that could have a
688 * matching descendant is found.
689 */
690 do {
691 if (tos == 0)
692 return (NULL);
693 rnode = stack[--tos];
694 } while (vm_radix_slot(index,
695 rnode->rn_clev) == 0);
696
697 /*
698 * The following computation cannot overflow
699 * because index's slot at the current level
700 * is greater than 0.
701 */
702 index = vm_radix_trimkey(index,
703 rnode->rn_clev);
704 }
705 index--;
706 KASSERT(!vm_radix_keybarr(rnode, index),
707 ("vm_radix_lookup_le: keybarr failed"));
708 }
709 slot = vm_radix_slot(index, rnode->rn_clev);
710 child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
711 if (vm_radix_isleaf(child)) {
712 m = vm_radix_topage(child);
713 if (m->pindex <= index)
714 return (m);
715 } else if (child != NULL)
716 goto descend;
717
718 /*
719 * Look for an available edge or page within the current
720 * bisection node.
721 */
722 if (slot > 0) {
723 inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
724 index |= inc - 1;
725 do {
726 index -= inc;
727 slot--;
728 child = vm_radix_node_load(&rnode->rn_child[slot],
729 LOCKED);
730 if (vm_radix_isleaf(child)) {
731 m = vm_radix_topage(child);
732 if (m->pindex <= index)
733 return (m);
734 } else if (child != NULL)
735 goto descend;
736 } while (slot > 0);
737 }
738 KASSERT(child == NULL || vm_radix_isleaf(child),
739 ("vm_radix_lookup_le: child is radix node"));
740
741 /*
742 * If a page or edge smaller than the search slot is not found
743 * in the current node, ascend to the next higher-level node.
744 */
745 goto ascend;
746descend:
747 KASSERT(rnode->rn_clev > 0,
748 ("vm_radix_lookup_le: pushing leaf's parent"));
749 KASSERT(tos < VM_RADIX_LIMIT,
750 ("vm_radix_lookup_le: stack overflow"));
751 stack[tos++] = rnode;
752 rnode = child;
753 }
754}
755
756/*
757 * Remove the specified index from the trie, and return the value stored at
758 * that index. If the index is not present, return NULL.
759 */
760vm_page_t
761vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
762{
763 struct vm_radix_node *rnode, *parent, *tmp;
764 vm_page_t m;
765 int i, slot;
766
767 rnode = vm_radix_root_load(rtree, LOCKED);
768 if (vm_radix_isleaf(rnode)) {
769 m = vm_radix_topage(rnode);
770 if (m->pindex != index)
771 return (NULL);
772 vm_radix_root_store(rtree, NULL, LOCKED);
773 return (m);
774 }
775 parent = NULL;
776 for (;;) {
777 if (rnode == NULL)
778 return (NULL);
779 slot = vm_radix_slot(index, rnode->rn_clev);
780 tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
781 if (vm_radix_isleaf(tmp)) {
782 m = vm_radix_topage(tmp);
783 if (m->pindex != index)
784 return (NULL);
785 vm_radix_node_store(&rnode->rn_child[slot], NULL, LOCKED);
786 rnode->rn_count--;
787 if (rnode->rn_count > 1)
788 return (m);
789 for (i = 0; i < VM_RADIX_COUNT; i++)
790 if (vm_radix_node_load(&rnode->rn_child[i],
791 LOCKED) != NULL)
792 break;
793 KASSERT(i != VM_RADIX_COUNT,
794 ("%s: invalid node configuration", __func__));
795 tmp = vm_radix_node_load(&rnode->rn_child[i], LOCKED);
796 if (parent == NULL)
797 vm_radix_root_store(rtree, tmp, LOCKED);
798 else {
799 slot = vm_radix_slot(index, parent->rn_clev);
800 KASSERT(vm_radix_node_load(
801 &parent->rn_child[slot], LOCKED) == rnode,
802 ("%s: invalid child value", __func__));
803 vm_radix_node_store(&parent->rn_child[slot],
804 tmp, LOCKED);
805 }
806 /*
807 * The child is still valid and we can not zero the
808 * pointer until all smr references are gone.
809 */
810 rnode->rn_count--;
811 vm_radix_node_put(rnode, i);
812 return (m);
813 }
814 parent = rnode;
815 rnode = tmp;
816 }
817}
818
819/*
820 * Remove and free all the nodes from the radix tree.
821 * This function is recursive but there is a tight control on it as the
822 * maximum depth of the tree is fixed.
823 */
824void
826{
827 struct vm_radix_node *root;
828
829 root = vm_radix_root_load(rtree, LOCKED);
830 if (root == NULL)
831 return;
832 vm_radix_root_store(rtree, NULL, UNSERIALIZED);
833 if (!vm_radix_isleaf(root))
835}
836
837/*
838 * Replace an existing page in the trie with another one.
839 * Panics if there is not an old page in the trie at the new page's index.
840 */
841vm_page_t
842vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage)
843{
844 struct vm_radix_node *rnode, *tmp;
845 vm_page_t m;
846 vm_pindex_t index;
847 int slot;
848
849 index = newpage->pindex;
850 rnode = vm_radix_root_load(rtree, LOCKED);
851 if (rnode == NULL)
852 panic("%s: replacing page on an empty trie", __func__);
853 if (vm_radix_isleaf(rnode)) {
854 m = vm_radix_topage(rnode);
855 if (m->pindex != index)
856 panic("%s: original replacing root key not found",
857 __func__);
858 rtree->rt_root = (uintptr_t)newpage | VM_RADIX_ISLEAF;
859 return (m);
860 }
861 for (;;) {
862 slot = vm_radix_slot(index, rnode->rn_clev);
863 tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
864 if (vm_radix_isleaf(tmp)) {
865 m = vm_radix_topage(tmp);
866 if (m->pindex == index) {
867 vm_radix_node_store(&rnode->rn_child[slot],
868 (struct vm_radix_node *)((uintptr_t)newpage |
870 return (m);
871 } else
872 break;
873 } else if (tmp == NULL || vm_radix_keybarr(tmp, index))
874 break;
875 rnode = tmp;
876 }
877 panic("%s: original replacing page not found", __func__);
878}
879
880void
882{
884}
885
886#ifdef DDB
887/*
888 * Show details about the given radix node.
889 */
890DB_SHOW_COMMAND(radixnode, db_show_radixnode)
891{
892 struct vm_radix_node *rnode, *tmp;
893 int i;
894
895 if (!have_addr)
896 return;
897 rnode = (struct vm_radix_node *)addr;
898 db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
899 (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
900 rnode->rn_clev);
901 for (i = 0; i < VM_RADIX_COUNT; i++) {
902 tmp = vm_radix_node_load(&rnode->rn_child[i], UNSERIALIZED);
903 if (tmp != NULL)
904 db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
905 i, (void *)tmp,
906 vm_radix_isleaf(tmp) ? vm_radix_topage(tmp) : NULL,
907 rnode->rn_clev);
908 }
909}
910#endif /* DDB */
uintptr_t rt_root
Definition: _vm_radix.h:40
void uma_zwait(uma_zone_t zone)
Definition: uma_core.c:3365
void * uma_zalloc_smr(uma_zone_t zone, int flags)
Definition: uma_core.c:3610
#define UMA_ZONE_VM
Definition: uma.h:243
#define UMA_ZONE_ZINIT
Definition: uma.h:237
smr_t uma_zone_get_smr(uma_zone_t zone)
Definition: uma_core.c:5073
void uma_zfree_smr(uma_zone_t zone, void *item)
Definition: uma_core.c:4366
#define UMA_ZONE_SMR
Definition: uma.h:253
uma_zone_t uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor, uma_init uminit, uma_fini fini, int align, uint32_t flags)
Definition: uma_core.c:3251
int uma_zone_reserve_kva(uma_zone_t zone, int nitems)
Definition: uma_core.c:5092
struct vmmeter __read_mostly vm_cnt
Definition: vm_meter.c:61
static __inline struct vm_radix_node * vm_radix_root_load(struct vm_radix *rtree, enum vm_radix_access access)
Definition: vm_radix.c:236
static __inline boolean_t vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
Definition: vm_radix.c:310
static __inline void vm_radix_root_store(struct vm_radix *rtree, struct vm_radix_node *rnode, enum vm_radix_access access)
Definition: vm_radix.c:246
static uma_zone_t vm_radix_node_zone
Definition: vm_radix.c:113
static void vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
Definition: vm_radix.c:325
#define VM_RADIX_MASK
Definition: vm_radix.c:89
typedef SMR_POINTER(struct vm_radix_node *)
Definition: vm_radix.c:105
#define VM_RADIX_UNITLEVEL(lev)
Definition: vm_radix.c:99
#define VM_RADIX_FLAGS
Definition: vm_radix.c:95
void vm_radix_reclaim_allnodes(struct vm_radix *rtree)
Definition: vm_radix.c:825
static __always_inline vm_page_t _vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, enum vm_radix_access access)
Definition: vm_radix.c:478
static smr_t vm_radix_smr
Definition: vm_radix.c:116
static __inline void vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev, vm_page_t page, enum vm_radix_access access)
Definition: vm_radix.c:277
void vm_radix_zinit(void)
Definition: vm_radix.c:372
static __inline vm_page_t vm_radix_topage(struct vm_radix_node *rnode)
Definition: vm_radix.c:267
int vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
Definition: vm_radix.c:386
static __inline vm_pindex_t vm_radix_trimkey(vm_pindex_t index, uint16_t level)
Definition: vm_radix.c:185
#define VM_RADIX_WIDTH
Definition: vm_radix.c:85
static __inline struct vm_radix_node * vm_radix_node_load(smrnode_t *p, enum vm_radix_access access)
Definition: vm_radix.c:201
#define VM_RADIX_LIMIT
Definition: vm_radix.c:90
void vm_radix_wait(void)
Definition: vm_radix.c:881
static __inline void vm_radix_node_put(struct vm_radix_node *rnode, int8_t last)
Definition: vm_radix.c:153
__FBSDID("$FreeBSD$")
vm_page_t vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
Definition: vm_radix.c:534
#define VM_RADIX_ISLEAF
Definition: vm_radix.c:94
vm_radix_access
Definition: vm_radix.c:102
@ SMR
Definition: vm_radix.c:102
@ LOCKED
Definition: vm_radix.c:102
@ UNSERIALIZED
Definition: vm_radix.c:102
vm_page_t vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage)
Definition: vm_radix.c:842
vm_page_t vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
Definition: vm_radix.c:761
boolean_t vm_radix_is_singleton(struct vm_radix *rtree)
Definition: vm_radix.c:463
#define VM_RADIX_PAD
Definition: vm_radix.c:96
static __inline int vm_radix_slot(vm_pindex_t index, uint16_t level)
Definition: vm_radix.c:177
static __inline boolean_t vm_radix_isleaf(struct vm_radix_node *rnode)
Definition: vm_radix.c:257
vm_page_t vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index)
Definition: vm_radix.c:519
#define VM_RADIX_COUNT
Definition: vm_radix.c:88
vm_page_t vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
Definition: vm_radix.c:507
vm_page_t vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
Definition: vm_radix.c:646
static struct vm_radix_node * vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
Definition: vm_radix.c:125
static __inline uint16_t vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
Definition: vm_radix.c:292
void vm_radix_reserve_kva(void)
Definition: vm_radix.c:354
static void vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v, enum vm_radix_access access)
Definition: vm_radix.c:216