38#include <sys/kernel.h>
43#include <sys/malloc.h>
48#include <sys/sysctl.h>
50#include <machine/cpu.h>
51#include <machine/smp.h>
58volatile cpuset_t stopped_cpus;
59volatile cpuset_t started_cpus;
60volatile cpuset_t suspended_cpus;
61cpuset_t hlt_cpus_mask;
62cpuset_t logical_cpus_mask;
64void (*cpustop_restartfunc)(void);
80 CTLFLAG_RD | CTLFLAG_CAPRD | CTLFLAG_MPSAFE, NULL,
87 0,
"Max number of CPUs that the system was compiled for.");
89SYSCTL_PROC(_kern_smp, OID_AUTO, active, CTLFLAG_RD|CTLTYPE_INT|CTLFLAG_MPSAFE,
91 "Indicates system is running in SMP mode");
94SYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN|CTLFLAG_CAPRD,
95 &
smp_disabled, 0,
"SMP has been disabled from the loader");
99 "Number of CPUs online");
102SYSCTL_INT(_kern_smp, OID_AUTO, threads_per_core, CTLFLAG_RD|CTLFLAG_CAPRD,
107 "Number of physical cores online");
111 "Topology override setting; 0 is default provided by hardware.");
115static int forward_signal_enabled = 1;
116SYSCTL_INT(_kern_smp, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
117 &forward_signal_enabled, 0,
118 "Forwarding of a signal to a process on a different CPU");
121static volatile int smp_rv_ncpus;
122static void (*
volatile smp_rv_setup_func)(
void *arg);
123static void (*
volatile smp_rv_action_func)(
void *arg);
124static void (*
volatile smp_rv_teardown_func)(
void *arg);
125static void *
volatile smp_rv_func_arg;
126static volatile int smp_rv_waiters[4];
134struct mtx smp_ipi_mtx;
140mp_setmaxid(
void *
dummy)
145 KASSERT(
mp_ncpus >= 1, (
"%s: CPU count < 1", __func__));
147 (
"%s: one CPU but mp_maxid is not zero", __func__));
149 (
"%s: counters out of sync: max %d, count %d", __func__,
152SYSINIT(cpu_mp_setmaxid, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setmaxid, NULL);
161 mtx_init(&smp_ipi_mtx,
"smp rendezvous", NULL, MTX_SPIN);
167 CPU_SETOF(PCPU_GET(cpuid), &
all_cpus);
172 printf(
"FreeBSD/SMP: Multiprocessor System Detected: %d CPUs\n",
181SYSINIT(cpu_mp, SI_SUB_CPU, SI_ORDER_THIRD, mp_start, NULL);
184forward_signal(
struct thread *td)
193 THREAD_LOCK_ASSERT(td, MA_OWNED);
194 KASSERT(TD_IS_RUNNING(td),
195 (
"forward_signal: thread is not TDS_RUNNING"));
197 CTR1(KTR_SMP,
"forward_signal(%p)", td->td_proc);
201 if (!forward_signal_enabled)
211 ipi_cpu(
id, IPI_AST);
229#if defined(__amd64__) || defined(__i386__)
235generic_stop_cpus(cpuset_t map, u_int
type)
238 char cpusetbuf[CPUSETBUFSIZ];
240 static volatile u_int stopping_cpu = NOCPU;
242 volatile cpuset_t *
cpus;
245 type == IPI_STOP ||
type == IPI_STOP_HARD
247 ||
type == IPI_SUSPEND
249 , (
"%s: invalid stop type", __func__));
254 CTR2(KTR_SMP,
"stop_cpus(%s) with %u type",
265 if (
type == IPI_SUSPEND)
266 mtx_lock_spin(&smp_ipi_mtx);
270 if (!nmi_is_broadcast || nmi_kdb_lock == 0) {
272 if (stopping_cpu != PCPU_GET(cpuid))
273 while (atomic_cmpset_int(&stopping_cpu, NOCPU,
274 PCPU_GET(cpuid)) == 0)
275 while (stopping_cpu != NOCPU)
279 ipi_selected(map,
type);
285 if (
type == IPI_SUSPEND)
286 cpus = &suspended_cpus;
289 cpus = &stopped_cpus;
292 while (!CPU_SUBSET(
cpus, &map)) {
296 if (i == 100000000) {
297 printf(
"timeout stopping cpus\n");
303 if (
type == IPI_SUSPEND)
304 mtx_unlock_spin(&smp_ipi_mtx);
307 stopping_cpu = NOCPU;
312stop_cpus(cpuset_t map)
315 return (generic_stop_cpus(map, IPI_STOP));
319stop_cpus_hard(cpuset_t map)
322 return (generic_stop_cpus(map, IPI_STOP_HARD));
327suspend_cpus(cpuset_t map)
330 return (generic_stop_cpus(map, IPI_SUSPEND));
348generic_restart_cpus(cpuset_t map, u_int
type)
351 char cpusetbuf[CPUSETBUFSIZ];
353 volatile cpuset_t *
cpus;
356 KASSERT(
type == IPI_STOP ||
type == IPI_STOP_HARD
357 ||
type == IPI_SUSPEND, (
"%s: invalid stop type", __func__));
364 if (
type == IPI_SUSPEND)
365 cpus = &resuming_cpus;
367 cpus = &stopped_cpus;
370 if (
type == IPI_SUSPEND)
371 CPU_COPY_STORE_REL(&map, &toresume_cpus);
373 CPU_COPY_STORE_REL(&map, &started_cpus);
380 if (
type == IPI_STOP) {
381 struct monitorbuf *mb;
385 if (!CPU_ISSET(
id, &map))
389 atomic_store_int(&mb->stop_state,
390 MONITOR_STOPSTATE_RUNNING);
394 if (!nmi_is_broadcast || nmi_kdb_lock == 0) {
396 while (CPU_OVERLAP(
cpus, &map))
400 KASSERT(
type == IPI_STOP ||
type == IPI_STOP_HARD,
401 (
"%s: invalid stop type", __func__));
408 cpus = &stopped_cpus;
411 CPU_COPY_STORE_REL(&map, &started_cpus);
414 while (CPU_OVERLAP(
cpus, &map))
421restart_cpus(cpuset_t map)
424 return (generic_restart_cpus(map, IPI_STOP));
429resume_cpus(cpuset_t map)
432 return (generic_restart_cpus(map, IPI_SUSPEND));
447smp_rendezvous_action(
void)
450 void *local_func_arg;
451 void (*local_setup_func)(
void*);
452 void (*local_action_func)(
void*);
453 void (*local_teardown_func)(
void*);
459 atomic_add_acq_int(&smp_rv_waiters[0], 1);
460 while (smp_rv_waiters[0] < smp_rv_ncpus)
464 local_func_arg = smp_rv_func_arg;
465 local_setup_func = smp_rv_setup_func;
466 local_action_func = smp_rv_action_func;
467 local_teardown_func = smp_rv_teardown_func;
494 owepreempt = td->td_owepreempt;
503 if (smp_rv_setup_func != NULL)
504 smp_rv_setup_func(smp_rv_func_arg);
505 atomic_add_int(&smp_rv_waiters[1], 1);
506 while (smp_rv_waiters[1] < smp_rv_ncpus)
510 if (local_action_func != NULL)
511 local_action_func(local_func_arg);
519 atomic_add_int(&smp_rv_waiters[2], 1);
520 while (smp_rv_waiters[2] < smp_rv_ncpus)
523 if (local_teardown_func != NULL)
524 local_teardown_func(local_func_arg);
538 atomic_add_rel_int(&smp_rv_waiters[3], 1);
541 KASSERT(owepreempt == td->td_owepreempt,
542 (
"rendezvous action changed td_owepreempt"));
547 void (* setup_func)(
void *),
548 void (* action_func)(
void *),
549 void (* teardown_func)(
void *),
552 int curcpumap, i, ncpus = 0;
557 if (setup_func != NULL)
559 if (action_func != NULL)
561 if (teardown_func != NULL)
571 MPASS(curthread->td_md.md_spinlock_count == 0);
574 if (CPU_ISSET(i, &map))
578 panic(
"ncpus is 0 with non-zero map");
580 mtx_lock_spin(&smp_ipi_mtx);
583 smp_rv_ncpus = ncpus;
584 smp_rv_setup_func = setup_func;
585 smp_rv_action_func = action_func;
586 smp_rv_teardown_func = teardown_func;
587 smp_rv_func_arg = arg;
588 smp_rv_waiters[1] = 0;
589 smp_rv_waiters[2] = 0;
590 smp_rv_waiters[3] = 0;
591 atomic_store_rel_int(&smp_rv_waiters[0], 0);
597 curcpumap = CPU_ISSET(curcpu, &map);
598 CPU_CLR(curcpu, &map);
599 ipi_selected(map, IPI_RENDEZVOUS);
603 smp_rendezvous_action();
616 while (atomic_load_acq_int(&smp_rv_waiters[3]) < ncpus)
619 mtx_unlock_spin(&smp_ipi_mtx);
624 void (* action_func)(
void *),
625 void (* teardown_func)(
void *),
631static struct cpu_group group[MAXCPU * MAX_CACHE_LEVELS + 1];
634smp_topo_fill(
struct cpu_group *cg)
638 for (c = 0; c < cg->cg_children; c++)
639 smp_topo_fill(&cg->cg_child[c]);
640 cg->cg_first = CPU_FFS(&cg->cg_mask) - 1;
641 cg->cg_last = CPU_FLS(&cg->cg_mask) - 1;
647 char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
648 struct cpu_group *top;
656 top = smp_topo_1level(CG_SHARE_NONE, 2, 0);
660 top = smp_topo_none();
664 top = smp_topo_1level(CG_SHARE_L2, 2, 0);
668 top = smp_topo_1level(CG_SHARE_L3, 4, 0);
672 top = smp_topo_2level(CG_SHARE_NONE, 2, CG_SHARE_L2, 2, 0);
676 top = smp_topo_1level(CG_SHARE_L1, 2, CG_FLAG_HTT);
680 top = smp_topo_2level(CG_SHARE_L3, 4, CG_SHARE_L2, 8,
692 panic(
"Built bad topology at %p. CPU count %d != %d",
694 if (CPU_CMP(&top->cg_mask, &
all_cpus))
695 panic(
"Built bad topology at %p. CPU mask (%s) != (%s)",
703 while (top->cg_children == 1) {
704 top = &top->cg_child[0];
705 top->cg_parent = NULL;
712smp_topo_alloc(u_int
count)
719 return (&group[curr]);
725 struct cpu_group *top;
728 top->cg_parent = NULL;
729 top->cg_child = NULL;
732 top->cg_children = 0;
733 top->cg_level = CG_SHARE_NONE;
740smp_topo_addleaf(
struct cpu_group *
parent,
struct cpu_group *
child,
int share,
743 char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
751 child->cg_child = NULL;
752 child->cg_children = 0;
753 child->cg_level = share;
759 if (CPU_OVERLAP(&
parent->cg_mask, &
child->cg_mask))
760 panic(
"Duplicate children in %p. mask (%s) child (%s)",
772smp_topo_1level(
int share,
int count,
int flags)
774 struct cpu_group *
child;
775 struct cpu_group *top;
783 top->cg_child =
child = &group[1];
784 top->cg_level = CG_SHARE_NONE;
785 for (i = 0; i < packages; i++,
child++)
791smp_topo_2level(
int l2share,
int l2count,
int l1share,
int l1count,
794 struct cpu_group *top;
795 struct cpu_group *l1g;
796 struct cpu_group *l2g;
805 top->cg_level = CG_SHARE_NONE;
806 top->cg_children =
mp_ncpus / (l2count * l1count);
807 l1g = l2g + top->cg_children;
808 for (i = 0; i < top->cg_children; i++, l2g++) {
809 l2g->cg_parent = top;
811 l2g->cg_level = l2share;
812 for (j = 0; j < l2count; j++, l1g++)
813 cpu = smp_topo_addleaf(l2g, l1g, l1share, l1count,
820smp_topo_find(
struct cpu_group *top,
int cpu)
822 struct cpu_group *cg;
827 CPU_SETOF(cpu, &
mask);
830 if (!CPU_OVERLAP(&cg->cg_mask, &
mask))
832 if (cg->cg_children == 0)
834 children = cg->cg_children;
835 for (i = 0, cg = cg->cg_child; i < children; cg++, i++)
836 if (CPU_OVERLAP(&cg->cg_mask, &
mask))
845 void (*setup_func)(
void *),
846 void (*action_func)(
void *),
847 void (*teardown_func)(
void *),
855 if (setup_func != NULL)
857 if (action_func != NULL)
859 if (teardown_func != NULL)
866 void (*action_func)(
void *),
867 void (*teardown_func)(
void *),
886 KASSERT(PCPU_GET(cpuid) == 0, (
"UP must have a CPU ID of zero"));
888SYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST,
896 KASSERT((!
smp_started),(
"smp_no_rendezvous called and smp is started"));
902 void (* setup_func)(
void *),
903 void (* action_func)(
void *),
904 void (* teardown_func)(
void *),
905 void (* wait_func)(
void *,
int),
906 struct smp_rendezvous_cpus_retry_arg *arg)
910 CPU_COPY(&map, &arg->cpus);
917 if (setup_func != NULL)
919 if (action_func != NULL)
921 if (teardown_func != NULL)
939 if (CPU_EMPTY(&arg->cpus))
943 if (!CPU_ISSET(cpu, &arg->cpus))
954 CPU_CLR_ATOMIC(curcpu, &arg->cpus);
975 if ((prio & PDROP) == 0) {
976 gen =
malloc(
sizeof(u_int) * MAXCPU, M_TEMP, M_WAITOK);
977 for (cpu = 0; cpu <=
mp_maxid; cpu++) {
978 if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu))
981 gen[cpu] = pcpu->pc_idlethread->td_generation;
984 for (cpu = 0; cpu <=
mp_maxid; cpu++) {
985 if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu))
988 thread_lock(curthread);
990 thread_unlock(curthread);
991 if ((prio & PDROP) != 0)
993 while (gen[cpu] == pcpu->pc_idlethread->td_generation) {
995 if (error != EWOULDBLOCK)
1001 thread_lock(curthread);
1003 thread_unlock(curthread);
1004 if ((prio & PDROP) == 0)
1026 struct thread *td, *newtd;
1030 MPASS(curthread->td_critnest == 0);
1033 pcpu = cpuid_to_pcpu[cpu];
1034 td = pcpu->pc_curthread;
1036 if (td->td_critnest == 0)
1039 newtd = (
struct thread *)
1040 atomic_load_acq_ptr((
void *)pcpu->pc_curthread);
1051 atomic_thread_fence_seq_cst();
1084 error = SYSCTL_OUT(req, &active,
sizeof(active));
1090topo_init_node(
struct topo_node *node)
1093 bzero(node,
sizeof(*node));
1094 TAILQ_INIT(&node->children);
1098topo_init_root(
struct topo_node *root)
1101 topo_init_node(root);
1102 root->type = TOPO_TYPE_SYSTEM;
1110topo_add_node_by_hwid(
struct topo_node *
parent,
int hwid,
1111 topo_node_type
type, uintptr_t subtype)
1113 struct topo_node *node;
1115 TAILQ_FOREACH_REVERSE(node, &
parent->children,
1116 topo_children, siblings) {
1117 if (node->hwid == hwid
1118 && node->type ==
type && node->subtype == subtype) {
1123 node =
malloc(
sizeof(*node), M_TOPO, M_WAITOK);
1124 topo_init_node(node);
1128 node->subtype = subtype;
1129 TAILQ_INSERT_TAIL(&
parent->children, node, siblings);
1139topo_find_node_by_hwid(
struct topo_node *
parent,
int hwid,
1140 topo_node_type
type, uintptr_t subtype)
1143 struct topo_node *node;
1145 TAILQ_FOREACH(node, &
parent->children, siblings) {
1146 if (node->hwid == hwid
1147 && node->type ==
type && node->subtype == subtype) {
1162topo_promote_child(
struct topo_node *
child)
1164 struct topo_node *next;
1165 struct topo_node *node;
1166 struct topo_node *
parent;
1169 next = TAILQ_NEXT(
child, siblings);
1171 TAILQ_INSERT_HEAD(&
parent->children,
child, siblings);
1173 while (next != NULL) {
1175 next = TAILQ_NEXT(node, siblings);
1176 TAILQ_REMOVE(&
parent->children, node, siblings);
1177 TAILQ_INSERT_AFTER(&
parent->children,
child, node, siblings);
1187topo_next_node(
struct topo_node *top,
struct topo_node *node)
1189 struct topo_node *next;
1191 if ((next = TAILQ_FIRST(&node->children)) != NULL)
1194 if ((next = TAILQ_NEXT(node, siblings)) != NULL)
1197 while (node != top && (node = node->parent) != top)
1198 if ((next = TAILQ_NEXT(node, siblings)) != NULL)
1209topo_next_nonchild_node(
struct topo_node *top,
struct topo_node *node)
1211 struct topo_node *next;
1213 if ((next = TAILQ_NEXT(node, siblings)) != NULL)
1216 while (node != top && (node = node->parent) != top)
1217 if ((next = TAILQ_NEXT(node, siblings)) != NULL)
1228topo_set_pu_id(
struct topo_node *node, cpuid_t
id)
1231 KASSERT(node->type == TOPO_TYPE_PU,
1232 (
"topo_set_pu_id: wrong node type: %u", node->type));
1233 KASSERT(CPU_EMPTY(&node->cpuset) && node->cpu_count == 0,
1234 (
"topo_set_pu_id: cpuset already not empty"));
1236 CPU_SET(
id, &node->cpuset);
1237 node->cpu_count = 1;
1240 while ((node = node->parent) != NULL) {
1241 KASSERT(!CPU_ISSET(
id, &node->cpuset),
1242 (
"logical ID %u is already set in node %p",
id, node));
1243 CPU_SET(
id, &node->cpuset);
1248static struct topology_spec {
1249 topo_node_type
type;
1252} topology_level_table[TOPO_LEVEL_COUNT] = {
1253 [TOPO_LEVEL_PKG] = { .type = TOPO_TYPE_PKG, },
1254 [TOPO_LEVEL_GROUP] = { .type = TOPO_TYPE_GROUP, },
1255 [TOPO_LEVEL_CACHEGROUP] = {
1256 .type = TOPO_TYPE_CACHE,
1257 .match_subtype =
true,
1258 .subtype = CG_SHARE_L3,
1260 [TOPO_LEVEL_CORE] = { .type = TOPO_TYPE_CORE, },
1261 [TOPO_LEVEL_THREAD] = { .type = TOPO_TYPE_PU, },
1265topo_analyze_table(
struct topo_node *root,
int all,
enum topo_level
level,
1266 struct topo_analysis *results)
1268 struct topology_spec *spec;
1269 struct topo_node *node;
1272 if (
level >= TOPO_LEVEL_COUNT)
1275 spec = &topology_level_table[
level];
1277 node = topo_next_node(root, root);
1279 while (node != NULL) {
1280 if (node->type != spec->type ||
1281 (spec->match_subtype && node->subtype != spec->subtype)) {
1282 node = topo_next_node(root, node);
1285 if (!all && CPU_EMPTY(&node->cpuset)) {
1286 node = topo_next_nonchild_node(root, node);
1292 if (!topo_analyze_table(node, all,
level + 1, results))
1295 node = topo_next_nonchild_node(root, node);
1302 if (!topo_analyze_table(root, all,
level + 1, results))
1306 if (results->entities[
level] == -1)
1323topo_analyze(
struct topo_node *topo_root,
int all,
1324 struct topo_analysis *results)
1327 results->entities[TOPO_LEVEL_PKG] = -1;
1328 results->entities[TOPO_LEVEL_CORE] = -1;
1329 results->entities[TOPO_LEVEL_THREAD] = -1;
1330 results->entities[TOPO_LEVEL_GROUP] = -1;
1331 results->entities[TOPO_LEVEL_CACHEGROUP] = -1;
1333 if (!topo_analyze_table(topo_root, all, TOPO_LEVEL_PKG, results))
1336 KASSERT(results->entities[TOPO_LEVEL_PKG] > 0,
1337 (
"bug in topology or analysis"));
const struct cf_level * level
device_property_type_t type
MALLOC_DEFINE(M_BINMISC, KMOD_NAME, "misc binary image activator")
char * cpusetobj_strprint(char *buf, const cpuset_t *set)
void *() malloc(size_t size, struct malloc_type *mtp, int flags)
void free(void *addr, struct malloc_type *mtp)
void panic(const char *fmt,...)
void sched_bind(struct thread *td, int cpu)
void sched_unbind(struct thread *td)
struct pcpu * pcpu_find(u_int cpuid)
int printf(const char *fmt,...)
void smp_no_rendezvous_barrier(void *dummy)
void smp_rendezvous_cpus(cpuset_t map, void(*setup_func)(void *), void(*action_func)(void *), void(*teardown_func)(void *), void *arg)
void smp_rendezvous_cpus_retry(cpuset_t map, void(*setup_func)(void *), void(*action_func)(void *), void(*teardown_func)(void *), void(*wait_func)(void *, int), struct smp_rendezvous_cpus_retry_arg *arg)
static void cpus_fence_seq_cst_issue(void *arg __unused)
SYSCTL_PROC(_kern_smp, OID_AUTO, active, CTLFLAG_RD|CTLTYPE_INT|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_smp_active, "I", "Indicates system is running in SMP mode")
void smp_rendezvous_cpus_done(struct smp_rendezvous_cpus_retry_arg *arg)
void cpus_fence_seq_cst(void)
SYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setvariables_for_up, NULL)
static void mp_setvariables_for_up(void *dummy)
int quiesce_cpus(cpuset_t map, const char *wmesg, int prio)
void smp_rendezvous(void(*setup_func)(void *), void(*action_func)(void *), void(*teardown_func)(void *), void *arg)
SYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxid, 0, "Max CPU ID.")
int quiesce_all_cpus(const char *wmesg, int prio)
void quiesce_all_critical(void)
static SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD|CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, "Kernel SMP")
static int sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS)