FreeBSD kernel kern code
kern_shutdown.c
Go to the documentation of this file.
1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1986, 1988, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94
37 */
38
39#include <sys/cdefs.h>
40__FBSDID("$FreeBSD$");
41
42#include "opt_ddb.h"
43#include "opt_ekcd.h"
44#include "opt_kdb.h"
45#include "opt_panic.h"
46#include "opt_printf.h"
47#include "opt_sched.h"
48#include "opt_watchdog.h"
49
50#include <sys/param.h>
51#include <sys/systm.h>
52#include <sys/bio.h>
53#include <sys/boottrace.h>
54#include <sys/buf.h>
55#include <sys/conf.h>
56#include <sys/compressor.h>
57#include <sys/cons.h>
58#include <sys/disk.h>
59#include <sys/eventhandler.h>
60#include <sys/filedesc.h>
61#include <sys/jail.h>
62#include <sys/kdb.h>
63#include <sys/kernel.h>
64#include <sys/kerneldump.h>
65#include <sys/kthread.h>
66#include <sys/ktr.h>
67#include <sys/malloc.h>
68#include <sys/mbuf.h>
69#include <sys/mount.h>
70#include <sys/priv.h>
71#include <sys/proc.h>
72#include <sys/reboot.h>
73#include <sys/resourcevar.h>
74#include <sys/rwlock.h>
75#include <sys/sbuf.h>
76#include <sys/sched.h>
77#include <sys/smp.h>
78#include <sys/sysctl.h>
79#include <sys/sysproto.h>
80#include <sys/taskqueue.h>
81#include <sys/vnode.h>
82#include <sys/watchdog.h>
83
84#include <crypto/chacha20/chacha.h>
85#include <crypto/rijndael/rijndael-api-fst.h>
86#include <crypto/sha2/sha256.h>
87
88#include <ddb/ddb.h>
89
90#include <machine/cpu.h>
91#include <machine/dump.h>
92#include <machine/pcb.h>
93#include <machine/smp.h>
94
95#include <security/mac/mac_framework.h>
96
97#include <vm/vm.h>
98#include <vm/vm_object.h>
99#include <vm/vm_page.h>
100#include <vm/vm_pager.h>
101#include <vm/swap_pager.h>
102
103#include <sys/signalvar.h>
104
105static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer");
106
107#ifndef PANIC_REBOOT_WAIT_TIME
108#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
109#endif
111SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN,
113 "Seconds to wait before rebooting after a panic");
114
115/*
116 * Note that stdarg.h and the ANSI style va_start macro is used for both
117 * ANSI and traditional C compilers.
118 */
119#include <machine/stdarg.h>
120
121#ifdef KDB
122#ifdef KDB_UNATTENDED
123int debugger_on_panic = 0;
124#else
125int debugger_on_panic = 1;
126#endif
127SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic,
128 CTLFLAG_RWTUN | CTLFLAG_SECURE,
129 &debugger_on_panic, 0, "Run debugger on kernel panic");
130
131static bool debugger_on_recursive_panic = false;
132SYSCTL_BOOL(_debug, OID_AUTO, debugger_on_recursive_panic,
133 CTLFLAG_RWTUN | CTLFLAG_SECURE,
134 &debugger_on_recursive_panic, 0, "Run debugger on recursive kernel panic");
135
136int debugger_on_trap = 0;
137SYSCTL_INT(_debug, OID_AUTO, debugger_on_trap,
138 CTLFLAG_RWTUN | CTLFLAG_SECURE,
139 &debugger_on_trap, 0, "Run debugger on kernel trap before panic");
140
141#ifdef KDB_TRACE
142static int trace_on_panic = 1;
143static bool trace_all_panics = true;
144#else
145static int trace_on_panic = 0;
146static bool trace_all_panics = false;
147#endif
148SYSCTL_INT(_debug, OID_AUTO, trace_on_panic,
149 CTLFLAG_RWTUN | CTLFLAG_SECURE,
150 &trace_on_panic, 0, "Print stack trace on kernel panic");
151SYSCTL_BOOL(_debug, OID_AUTO, trace_all_panics, CTLFLAG_RWTUN,
152 &trace_all_panics, 0, "Print stack traces on secondary kernel panics");
153#endif /* KDB */
154
155static int sync_on_panic = 0;
156SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN,
157 &sync_on_panic, 0, "Do a sync before rebooting from a panic");
158
159static bool poweroff_on_panic = 0;
160SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN,
161 &poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic");
162
163static bool powercycle_on_panic = 0;
164SYSCTL_BOOL(_kern, OID_AUTO, powercycle_on_panic, CTLFLAG_RWTUN,
165 &powercycle_on_panic, 0, "Do a power cycle instead of a reboot on a panic");
166
167static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
168 "Shutdown environment");
169
170#ifndef DIAGNOSTIC
171static int show_busybufs;
172#else
173static int show_busybufs = 1;
174#endif
175SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW,
176 &show_busybufs, 0,
177 "Show busy buffers during shutdown");
178
180SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW,
181 &suspend_blocked, 0, "Block suspend due to a pending shutdown");
182
183#ifdef EKCD
184FEATURE(ekcd, "Encrypted kernel crash dumps support");
185
186MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data");
187
188struct kerneldumpcrypto {
189 uint8_t kdc_encryption;
190 uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE];
191 union {
192 struct {
193 keyInstance aes_ki;
194 cipherInstance aes_ci;
195 } u_aes;
196 struct chacha_ctx u_chacha;
197 } u;
198#define kdc_ki u.u_aes.aes_ki
199#define kdc_ci u.u_aes.aes_ci
200#define kdc_chacha u.u_chacha
201 uint32_t kdc_dumpkeysize;
202 struct kerneldumpkey kdc_dumpkey[];
203};
204#endif
205
207 uint8_t kdc_format;
209 uint8_t *kdc_buf;
210 size_t kdc_resid;
211};
212
213static struct kerneldumpcomp *kerneldumpcomp_create(struct dumperinfo *di,
214 uint8_t compression);
215static void kerneldumpcomp_destroy(struct dumperinfo *di);
216static int kerneldumpcomp_write_cb(void *base, size_t len, off_t off, void *arg);
217
218static int kerneldump_gzlevel = 6;
219SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN,
221 "Kernel crash dump compression level");
222
223/*
224 * Variable panicstr contains argument to first call to panic; used as flag
225 * to indicate that the kernel has already called panic.
226 */
227const char *panicstr;
228bool __read_frequently panicked;
229
230int __read_mostly dumping; /* system is dumping */
231int rebooting; /* system is rebooting */
232/*
233 * Used to serialize between sysctl kern.shutdown.dumpdevname and list
234 * modifications via ioctl.
235 */
236static struct mtx dumpconf_list_lk;
237MTX_SYSINIT(dumper_configs, &dumpconf_list_lk, "dumper config list", MTX_DEF);
238
239/* Our selected dumper(s). */
240static TAILQ_HEAD(dumpconflist, dumperinfo) dumper_configs =
241 TAILQ_HEAD_INITIALIZER(dumper_configs);
242
243/* Context information for dump-debuggers. */
244static struct pcb dumppcb; /* Registers. */
245lwpid_t dumptid; /* Thread ID. */
246
247static struct cdevsw reroot_cdevsw = {
248 .d_version = D_VERSION,
249 .d_name = "reroot",
250};
251
252static void poweroff_wait(void *, int);
253static void shutdown_halt(void *junk, int howto);
254static void shutdown_panic(void *junk, int howto);
255static void shutdown_reset(void *junk, int howto);
256static int kern_reroot(void);
257
258/* register various local shutdown events */
259static void
260shutdown_conf(void *unused)
261{
262
263 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL,
264 SHUTDOWN_PRI_FIRST);
265 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL,
266 SHUTDOWN_PRI_LAST + 100);
267 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL,
268 SHUTDOWN_PRI_LAST + 100);
269 EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL,
270 SHUTDOWN_PRI_LAST + 200);
271}
272
273SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL);
274
275/*
276 * The only reason this exists is to create the /dev/reroot/ directory,
277 * used by reroot code in init(8) as a mountpoint for tmpfs.
278 */
279static void
280reroot_conf(void *unused)
281{
282 int error;
283 struct cdev *cdev;
284
285 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev,
286 &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot");
287 if (error != 0) {
288 printf("%s: failed to create device node, error %d",
289 __func__, error);
290 }
291}
292
293SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL);
294
295/*
296 * The system call that results in a reboot.
297 */
298/* ARGSUSED */
299int
300sys_reboot(struct thread *td, struct reboot_args *uap)
301{
302 int error;
303
304 error = 0;
305#ifdef MAC
306 error = mac_system_check_reboot(td->td_ucred, uap->opt);
307#endif
308 if (error == 0)
309 error = priv_check(td, PRIV_REBOOT);
310 if (error == 0) {
311 if (uap->opt & RB_REROOT)
312 error = kern_reroot();
313 else
314 kern_reboot(uap->opt);
315 }
316 return (error);
317}
318
319static void
320shutdown_nice_task_fn(void *arg, int pending __unused)
321{
322 int howto;
323
324 howto = (uintptr_t)arg;
325 /* Send a signal to init(8) and have it shutdown the world. */
326 PROC_LOCK(initproc);
327 if ((howto & RB_POWEROFF) != 0) {
328 BOOTTRACE("SIGUSR2 to init(8)");
329 kern_psignal(initproc, SIGUSR2);
330 } else if ((howto & RB_POWERCYCLE) != 0) {
331 BOOTTRACE("SIGWINCH to init(8)");
332 kern_psignal(initproc, SIGWINCH);
333 } else if ((howto & RB_HALT) != 0) {
334 BOOTTRACE("SIGUSR1 to init(8)");
335 kern_psignal(initproc, SIGUSR1);
336 } else {
337 BOOTTRACE("SIGINT to init(8)");
338 kern_psignal(initproc, SIGINT);
339 }
340 PROC_UNLOCK(initproc);
341}
342
343static struct task shutdown_nice_task = TASK_INITIALIZER(0,
344 &shutdown_nice_task_fn, NULL);
345
346/*
347 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC
348 */
349void
351{
352
353 if (initproc != NULL && !SCHEDULER_STOPPED()) {
354 BOOTTRACE("shutdown initiated");
355 shutdown_nice_task.ta_context = (void *)(uintptr_t)howto;
356 taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task);
357 } else {
358 /*
359 * No init(8) running, or scheduler would not allow it
360 * to run, so simply reboot.
361 */
362 kern_reboot(howto | RB_NOSYNC);
363 }
364}
365
366static void
368{
369 int f;
370 struct timespec ts;
371
373 printf("Uptime: ");
374 f = 0;
375 if (ts.tv_sec >= 86400) {
376 printf("%ldd", (long)ts.tv_sec / 86400);
377 ts.tv_sec %= 86400;
378 f = 1;
379 }
380 if (f || ts.tv_sec >= 3600) {
381 printf("%ldh", (long)ts.tv_sec / 3600);
382 ts.tv_sec %= 3600;
383 f = 1;
384 }
385 if (f || ts.tv_sec >= 60) {
386 printf("%ldm", (long)ts.tv_sec / 60);
387 ts.tv_sec %= 60;
388 f = 1;
389 }
390 printf("%lds\n", (long)ts.tv_sec);
391}
392
393int
394doadump(boolean_t textdump)
395{
396 boolean_t coredump;
397 int error;
398
399 error = 0;
400 if (dumping)
401 return (EBUSY);
402 if (TAILQ_EMPTY(&dumper_configs))
403 return (ENXIO);
404
405 savectx(&dumppcb);
406 dumptid = curthread->td_tid;
407 dumping++;
408
409 coredump = TRUE;
410#ifdef DDB
411 if (textdump && textdump_pending) {
412 coredump = FALSE;
413 textdump_dumpsys(TAILQ_FIRST(&dumper_configs));
414 }
415#endif
416 if (coredump) {
417 struct dumperinfo *di;
418
419 TAILQ_FOREACH(di, &dumper_configs, di_next) {
420 error = dumpsys(di);
421 if (error == 0)
422 break;
423 }
424 }
425
426 dumping--;
427 return (error);
428}
429
430/*
431 * Trace the shutdown reason.
432 */
433static void
434reboottrace(int howto)
435{
436 if ((howto & RB_DUMP) != 0) {
437 if ((howto & RB_HALT) != 0)
438 BOOTTRACE("system panic: halting...");
439 if ((howto & RB_POWEROFF) != 0)
440 BOOTTRACE("system panic: powering off...");
441 if ((howto & (RB_HALT|RB_POWEROFF)) == 0)
442 BOOTTRACE("system panic: rebooting...");
443 } else {
444 if ((howto & RB_HALT) != 0)
445 BOOTTRACE("system halting...");
446 if ((howto & RB_POWEROFF) != 0)
447 BOOTTRACE("system powering off...");
448 if ((howto & (RB_HALT|RB_POWEROFF)) == 0)
449 BOOTTRACE("system rebooting...");
450 }
451}
452
453/*
454 * kern_reboot(9): Shut down the system cleanly to prepare for reboot, halt, or
455 * power off.
456 */
457void
458kern_reboot(int howto)
459{
460 static int once = 0;
461
462 if (initproc != NULL && curproc != initproc)
463 BOOTTRACE("kernel shutdown (dirty) started");
464 else
465 BOOTTRACE("kernel shutdown (clean) started");
466
467 /*
468 * Normal paths here don't hold Giant, but we can wind up here
469 * unexpectedly with it held. Drop it now so we don't have to
470 * drop and pick it up elsewhere. The paths it is locking will
471 * never be returned to, and it is preferable to preclude
472 * deadlock than to lock against code that won't ever
473 * continue.
474 */
475 while (mtx_owned(&Giant))
476 mtx_unlock(&Giant);
477
478#if defined(SMP)
479 /*
480 * Bind us to the first CPU so that all shutdown code runs there. Some
481 * systems don't shutdown properly (i.e., ACPI power off) if we
482 * run on another processor.
483 */
484 if (!SCHEDULER_STOPPED()) {
485 thread_lock(curthread);
486 sched_bind(curthread, CPU_FIRST());
487 thread_unlock(curthread);
488 KASSERT(PCPU_GET(cpuid) == CPU_FIRST(),
489 ("%s: not running on cpu 0", __func__));
490 }
491#endif
492 /* We're in the process of rebooting. */
493 rebooting = 1;
494 reboottrace(howto);
495
496 /* We are out of the debugger now. */
497 kdb_active = 0;
498
499 /*
500 * Do any callouts that should be done BEFORE syncing the filesystems.
501 */
502 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto);
503 BOOTTRACE("shutdown pre sync complete");
504
505 /*
506 * Now sync filesystems
507 */
508 if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) {
509 once = 1;
510 BOOTTRACE("bufshutdown begin");
512 BOOTTRACE("bufshutdown end");
513 }
514
515 print_uptime();
516
517 cngrab();
518
519 /*
520 * Ok, now do things that assume all filesystem activity has
521 * been completed.
522 */
523 EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
524 BOOTTRACE("shutdown post sync complete");
525
526 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping)
527 doadump(TRUE);
528
529 /* Now that we're going to really halt the system... */
530 BOOTTRACE("shutdown final begin");
531
532 if (shutdown_trace)
534
535 EVENTHANDLER_INVOKE(shutdown_final, howto);
536
537 for(;;) ; /* safety against shutdown_reset not working */
538 /* NOTREACHED */
539}
540
541/*
542 * The system call that results in changing the rootfs.
543 */
544static int
546{
547 struct vnode *oldrootvnode, *vp;
548 struct mount *mp, *devmp;
549 int error;
550
551 if (curproc != initproc)
552 return (EPERM);
553
554 /*
555 * Mark the filesystem containing currently-running executable
556 * (the temporary copy of init(8)) busy.
557 */
558 vp = curproc->p_textvp;
559 error = vn_lock(vp, LK_SHARED);
560 if (error != 0)
561 return (error);
562 mp = vp->v_mount;
563 error = vfs_busy(mp, MBF_NOWAIT);
564 if (error != 0) {
565 vfs_ref(mp);
566 VOP_UNLOCK(vp);
567 error = vfs_busy(mp, 0);
568 vn_lock(vp, LK_SHARED | LK_RETRY);
569 vfs_rel(mp);
570 if (error != 0) {
571 VOP_UNLOCK(vp);
572 return (ENOENT);
573 }
574 if (VN_IS_DOOMED(vp)) {
575 VOP_UNLOCK(vp);
576 vfs_unbusy(mp);
577 return (ENOENT);
578 }
579 }
580 VOP_UNLOCK(vp);
581
582 /*
583 * Remove the filesystem containing currently-running executable
584 * from the mount list, to prevent it from being unmounted
585 * by vfs_unmountall(), and to avoid confusing vfs_mountroot().
586 *
587 * Also preserve /dev - forcibly unmounting it could cause driver
588 * reinitialization.
589 */
590
592 devmp = rootdevmp;
593 rootdevmp = NULL;
594
595 mtx_lock(&mountlist_mtx);
596 TAILQ_REMOVE(&mountlist, mp, mnt_list);
597 TAILQ_REMOVE(&mountlist, devmp, mnt_list);
598 mtx_unlock(&mountlist_mtx);
599
600 oldrootvnode = rootvnode;
601
602 /*
603 * Unmount everything except for the two filesystems preserved above.
604 */
606
607 /*
608 * Add /dev back; vfs_mountroot() will move it into its new place.
609 */
610 mtx_lock(&mountlist_mtx);
611 TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list);
612 mtx_unlock(&mountlist_mtx);
613 rootdevmp = devmp;
615
616 /*
617 * Mount the new rootfs.
618 */
620
621 /*
622 * Update all references to the old rootvnode.
623 */
624 mountcheckdirs(oldrootvnode, rootvnode);
625
626 /*
627 * Add the temporary filesystem back and unbusy it.
628 */
629 mtx_lock(&mountlist_mtx);
630 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
631 mtx_unlock(&mountlist_mtx);
632 vfs_unbusy(mp);
633
634 return (0);
635}
636
637/*
638 * If the shutdown was a clean halt, behave accordingly.
639 */
640static void
641shutdown_halt(void *junk, int howto)
642{
643
644 if (howto & RB_HALT) {
645 printf("\n");
646 printf("The operating system has halted.\n");
647 printf("Please press any key to reboot.\n\n");
648
649 wdog_kern_pat(WD_TO_NEVER);
650
651 switch (cngetc()) {
652 case -1: /* No console, just die */
653 cpu_halt();
654 /* NOTREACHED */
655 default:
656 break;
657 }
658 }
659}
660
661/*
662 * Check to see if the system paniced, pause and then reboot
663 * according to the specified delay.
664 */
665static void
666shutdown_panic(void *junk, int howto)
667{
668 int loop;
669
670 if (howto & RB_DUMP) {
671 if (panic_reboot_wait_time != 0) {
672 if (panic_reboot_wait_time != -1) {
673 printf("Automatic reboot in %d seconds - "
674 "press a key on the console to abort\n",
676 for (loop = panic_reboot_wait_time * 10;
677 loop > 0; --loop) {
678 DELAY(1000 * 100); /* 1/10th second */
679 /* Did user type a key? */
680 if (cncheckc() != -1)
681 break;
682 }
683 if (!loop)
684 return;
685 }
686 } else { /* zero time specified - reboot NOW */
687 return;
688 }
689 printf("--> Press a key on the console to reboot,\n");
690 printf("--> or switch off the system now.\n");
691 cngetc();
692 }
693}
694
695/*
696 * Everything done, now reset
697 */
698static void
699shutdown_reset(void *junk, int howto)
700{
701
702 printf("Rebooting...\n");
703 DELAY(1000000); /* wait 1 sec for printf's to complete and be read */
704
705 /*
706 * Acquiring smp_ipi_mtx here has a double effect:
707 * - it disables interrupts avoiding CPU0 preemption
708 * by fast handlers (thus deadlocking against other CPUs)
709 * - it avoids deadlocks against smp_rendezvous() or, more
710 * generally, threads busy-waiting, with this spinlock held,
711 * and waiting for responses by threads on other CPUs
712 * (ie. smp_tlb_shootdown()).
713 *
714 * For the !SMP case it just needs to handle the former problem.
715 */
716#ifdef SMP
717 mtx_lock_spin(&smp_ipi_mtx);
718#else
719 spinlock_enter();
720#endif
721
722 cpu_reset();
723 /* NOTREACHED */ /* assuming reset worked */
724}
725
726#if defined(WITNESS) || defined(INVARIANT_SUPPORT)
727static int kassert_warn_only = 0;
728#ifdef KDB
729static int kassert_do_kdb = 0;
730#endif
731#ifdef KTR
732static int kassert_do_ktr = 0;
733#endif
734static int kassert_do_log = 1;
735static int kassert_log_pps_limit = 4;
736static int kassert_log_mute_at = 0;
737static int kassert_log_panic_at = 0;
738static int kassert_suppress_in_panic = 0;
739static int kassert_warnings = 0;
740
741SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
742 "kassert options");
743
744#ifdef KASSERT_PANIC_OPTIONAL
745#define KASSERT_RWTUN CTLFLAG_RWTUN
746#else
747#define KASSERT_RWTUN CTLFLAG_RDTUN
748#endif
749
750SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, KASSERT_RWTUN,
751 &kassert_warn_only, 0,
752 "KASSERT triggers a panic (0) or just a warning (1)");
753
754#ifdef KDB
755SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, KASSERT_RWTUN,
756 &kassert_do_kdb, 0, "KASSERT will enter the debugger");
757#endif
758
759#ifdef KTR
760SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, KASSERT_RWTUN,
761 &kassert_do_ktr, 0,
762 "KASSERT does a KTR, set this to the KTRMASK you want");
763#endif
764
765SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, KASSERT_RWTUN,
766 &kassert_do_log, 0,
767 "If warn_only is enabled, log (1) or do not log (0) assertion violations");
768
769SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RD | CTLFLAG_STATS,
770 &kassert_warnings, 0, "number of KASSERTs that have been triggered");
771
772SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, KASSERT_RWTUN,
773 &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic");
774
775SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, KASSERT_RWTUN,
776 &kassert_log_pps_limit, 0, "limit number of log messages per second");
777
778SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, KASSERT_RWTUN,
779 &kassert_log_mute_at, 0, "max number of KASSERTS to log");
780
781SYSCTL_INT(_debug_kassert, OID_AUTO, suppress_in_panic, KASSERT_RWTUN,
782 &kassert_suppress_in_panic, 0,
783 "KASSERTs will be suppressed while handling a panic");
784#undef KASSERT_RWTUN
785
786static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS);
787
788SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert,
789 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0,
790 kassert_sysctl_kassert, "I",
791 "set to trigger a test kassert");
792
793static int
794kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS)
795{
796 int error, i;
797
798 error = sysctl_wire_old_buffer(req, sizeof(int));
799 if (error == 0) {
800 i = 0;
801 error = sysctl_handle_int(oidp, &i, 0, req);
802 }
803 if (error != 0 || req->newptr == NULL)
804 return (error);
805 KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i));
806 return (0);
807}
808
809#ifdef KASSERT_PANIC_OPTIONAL
810/*
811 * Called by KASSERT, this decides if we will panic
812 * or if we will log via printf and/or ktr.
813 */
814void
815kassert_panic(const char *fmt, ...)
816{
817 static char buf[256];
818 va_list ap;
819
820 va_start(ap, fmt);
821 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
822 va_end(ap);
823
824 /*
825 * If we are suppressing secondary panics, log the warning but do not
826 * re-enter panic/kdb.
827 */
828 if (panicstr != NULL && kassert_suppress_in_panic) {
829 if (kassert_do_log) {
830 printf("KASSERT failed: %s\n", buf);
831#ifdef KDB
832 if (trace_all_panics && trace_on_panic)
834#endif
835 }
836 return;
837 }
838
839 /*
840 * panic if we're not just warning, or if we've exceeded
841 * kassert_log_panic_at warnings.
842 */
843 if (!kassert_warn_only ||
844 (kassert_log_panic_at > 0 &&
845 kassert_warnings >= kassert_log_panic_at)) {
846 va_start(ap, fmt);
847 vpanic(fmt, ap);
848 /* NORETURN */
849 }
850#ifdef KTR
851 if (kassert_do_ktr)
852 CTR0(ktr_mask, buf);
853#endif /* KTR */
854 /*
855 * log if we've not yet met the mute limit.
856 */
857 if (kassert_do_log &&
858 (kassert_log_mute_at == 0 ||
859 kassert_warnings < kassert_log_mute_at)) {
860 static struct timeval lasterr;
861 static int curerr;
862
863 if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) {
864 printf("KASSERT failed: %s\n", buf);
866 }
867 }
868#ifdef KDB
869 if (kassert_do_kdb) {
870 kdb_enter(KDB_WHY_KASSERT, buf);
871 }
872#endif
873 atomic_add_int(&kassert_warnings, 1);
874}
875#endif /* KASSERT_PANIC_OPTIONAL */
876#endif
877
878/*
879 * Panic is called on unresolvable fatal errors. It prints "panic: mesg",
880 * and then reboots. If we are called twice, then we avoid trying to sync
881 * the disks as this often leads to recursive panics.
882 */
883void
884panic(const char *fmt, ...)
885{
886 va_list ap;
887
888 va_start(ap, fmt);
889 vpanic(fmt, ap);
890}
891
892void
893vpanic(const char *fmt, va_list ap)
894{
895#ifdef SMP
896 cpuset_t other_cpus;
897#endif
898 struct thread *td = curthread;
899 int bootopt, newpanic;
900 static char buf[256];
901
902 spinlock_enter();
903
904#ifdef SMP
905 /*
906 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from
907 * concurrently entering panic. Only the winner will proceed
908 * further.
909 */
910 if (panicstr == NULL && !kdb_active) {
911 other_cpus = all_cpus;
912 CPU_CLR(PCPU_GET(cpuid), &other_cpus);
913 stop_cpus_hard(other_cpus);
914 }
915#endif
916
917 /*
918 * Ensure that the scheduler is stopped while panicking, even if panic
919 * has been entered from kdb.
920 */
921 td->td_stopsched = 1;
922
923 bootopt = RB_AUTOBOOT;
924 newpanic = 0;
925 if (panicstr)
926 bootopt |= RB_NOSYNC;
927 else {
928 bootopt |= RB_DUMP;
929 panicstr = fmt;
930 panicked = true;
931 newpanic = 1;
932 }
933
934 if (newpanic) {
935 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
936 panicstr = buf;
937 cngrab();
938 printf("panic: %s\n", buf);
939 } else {
940 printf("panic: ");
941 vprintf(fmt, ap);
942 printf("\n");
943 }
944#ifdef SMP
945 printf("cpuid = %d\n", PCPU_GET(cpuid));
946#endif
947 printf("time = %jd\n", (intmax_t )time_second);
948#ifdef KDB
949 if ((newpanic || trace_all_panics) && trace_on_panic)
951 if (debugger_on_panic)
952 kdb_enter(KDB_WHY_PANIC, "panic");
953 else if (!newpanic && debugger_on_recursive_panic)
954 kdb_enter(KDB_WHY_PANIC, "re-panic");
955#endif
956 /*thread_lock(td); */
957 td->td_flags |= TDF_INPANIC;
958 /* thread_unlock(td); */
959 if (!sync_on_panic)
960 bootopt |= RB_NOSYNC;
962 bootopt |= RB_POWEROFF;
964 bootopt |= RB_POWERCYCLE;
965 kern_reboot(bootopt);
966}
967
968/*
969 * Support for poweroff delay.
970 *
971 * Please note that setting this delay too short might power off your machine
972 * before the write cache on your hard disk has been flushed, leading to
973 * soft-updates inconsistencies.
974 */
975#ifndef POWEROFF_DELAY
976# define POWEROFF_DELAY 5000
977#endif
979
980SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
981 &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)");
982
983static void
984poweroff_wait(void *junk, int howto)
985{
986
987 if ((howto & (RB_POWEROFF | RB_POWERCYCLE)) == 0 || poweroff_delay <= 0)
988 return;
989 DELAY(poweroff_delay * 1000);
990}
991
992/*
993 * Some system processes (e.g. syncer) need to be stopped at appropriate
994 * points in their main loops prior to a system shutdown, so that they
995 * won't interfere with the shutdown process (e.g. by holding a disk buf
996 * to cause sync to fail). For each of these system processes, register
997 * shutdown_kproc() as a handler for one of shutdown events.
998 */
999static int kproc_shutdown_wait = 60;
1000SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
1001 &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process");
1002
1003void
1004kproc_shutdown(void *arg, int howto)
1005{
1006 struct proc *p;
1007 int error;
1008
1009 if (panicstr)
1010 return;
1011
1012 p = (struct proc *)arg;
1013 printf("Waiting (max %d seconds) for system process `%s' to stop... ",
1014 kproc_shutdown_wait, p->p_comm);
1015 error = kproc_suspend(p, kproc_shutdown_wait * hz);
1016
1017 if (error == EWOULDBLOCK)
1018 printf("timed out\n");
1019 else
1020 printf("done\n");
1021}
1022
1023void
1024kthread_shutdown(void *arg, int howto)
1025{
1026 struct thread *td;
1027 int error;
1028
1029 if (panicstr)
1030 return;
1031
1032 td = (struct thread *)arg;
1033 printf("Waiting (max %d seconds) for system thread `%s' to stop... ",
1034 kproc_shutdown_wait, td->td_name);
1035 error = kthread_suspend(td, kproc_shutdown_wait * hz);
1036
1037 if (error == EWOULDBLOCK)
1038 printf("timed out\n");
1039 else
1040 printf("done\n");
1041}
1042
1043static int
1044dumpdevname_sysctl_handler(SYSCTL_HANDLER_ARGS)
1045{
1046 char buf[256];
1047 struct dumperinfo *di;
1048 struct sbuf sb;
1049 int error;
1050
1051 error = sysctl_wire_old_buffer(req, 0);
1052 if (error != 0)
1053 return (error);
1054
1055 sbuf_new_for_sysctl(&sb, buf, sizeof(buf), req);
1056
1057 mtx_lock(&dumpconf_list_lk);
1058 TAILQ_FOREACH(di, &dumper_configs, di_next) {
1059 if (di != TAILQ_FIRST(&dumper_configs))
1060 sbuf_putc(&sb, ',');
1061 sbuf_cat(&sb, di->di_devname);
1062 }
1063 mtx_unlock(&dumpconf_list_lk);
1064
1065 error = sbuf_finish(&sb);
1066 sbuf_delete(&sb);
1067 return (error);
1068}
1069SYSCTL_PROC(_kern_shutdown, OID_AUTO, dumpdevname,
1070 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, &dumper_configs, 0,
1072 "Device(s) for kernel dumps");
1073
1074static int _dump_append(struct dumperinfo *di, void *virtual,
1075 vm_offset_t physical, size_t length);
1076
1077#ifdef EKCD
1078static struct kerneldumpcrypto *
1079kerneldumpcrypto_create(size_t blocksize, uint8_t encryption,
1080 const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey)
1081{
1082 struct kerneldumpcrypto *kdc;
1083 struct kerneldumpkey *kdk;
1084 uint32_t dumpkeysize;
1085
1086 dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize);
1087 kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO);
1088
1089 arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0);
1090
1091 kdc->kdc_encryption = encryption;
1092 switch (kdc->kdc_encryption) {
1093 case KERNELDUMP_ENC_AES_256_CBC:
1094 if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0)
1095 goto failed;
1096 break;
1097 case KERNELDUMP_ENC_CHACHA20:
1098 chacha_keysetup(&kdc->kdc_chacha, key, 256);
1099 break;
1100 default:
1101 goto failed;
1102 }
1103
1104 kdc->kdc_dumpkeysize = dumpkeysize;
1105 kdk = kdc->kdc_dumpkey;
1106 kdk->kdk_encryption = kdc->kdc_encryption;
1107 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv));
1108 kdk->kdk_encryptedkeysize = htod32(encryptedkeysize);
1109 memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize);
1110
1111 return (kdc);
1112failed:
1113 zfree(kdc, M_EKCD);
1114 return (NULL);
1115}
1116
1117static int
1118kerneldumpcrypto_init(struct kerneldumpcrypto *kdc)
1119{
1120 uint8_t hash[SHA256_DIGEST_LENGTH];
1121 SHA256_CTX ctx;
1122 struct kerneldumpkey *kdk;
1123 int error;
1124
1125 error = 0;
1126
1127 if (kdc == NULL)
1128 return (0);
1129
1130 /*
1131 * When a user enters ddb it can write a crash dump multiple times.
1132 * Each time it should be encrypted using a different IV.
1133 */
1134 SHA256_Init(&ctx);
1135 SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv));
1136 SHA256_Final(hash, &ctx);
1137 bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv));
1138
1139 switch (kdc->kdc_encryption) {
1140 case KERNELDUMP_ENC_AES_256_CBC:
1141 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC,
1142 kdc->kdc_iv) <= 0) {
1143 error = EINVAL;
1144 goto out;
1145 }
1146 break;
1147 case KERNELDUMP_ENC_CHACHA20:
1148 chacha_ivsetup(&kdc->kdc_chacha, kdc->kdc_iv, NULL);
1149 break;
1150 default:
1151 error = EINVAL;
1152 goto out;
1153 }
1154
1155 kdk = kdc->kdc_dumpkey;
1156 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv));
1157out:
1158 explicit_bzero(hash, sizeof(hash));
1159 return (error);
1160}
1161
1162static uint32_t
1163kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc)
1164{
1165
1166 if (kdc == NULL)
1167 return (0);
1168 return (kdc->kdc_dumpkeysize);
1169}
1170#endif /* EKCD */
1171
1172static struct kerneldumpcomp *
1173kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression)
1174{
1175 struct kerneldumpcomp *kdcomp;
1176 int format;
1177
1178 switch (compression) {
1179 case KERNELDUMP_COMP_GZIP:
1180 format = COMPRESS_GZIP;
1181 break;
1182 case KERNELDUMP_COMP_ZSTD:
1183 format = COMPRESS_ZSTD;
1184 break;
1185 default:
1186 return (NULL);
1187 }
1188
1189 kdcomp = malloc(sizeof(*kdcomp), M_DUMPER, M_WAITOK | M_ZERO);
1190 kdcomp->kdc_format = compression;
1192 format, di->maxiosize, kerneldump_gzlevel, di);
1193 if (kdcomp->kdc_stream == NULL) {
1194 free(kdcomp, M_DUMPER);
1195 return (NULL);
1196 }
1197 kdcomp->kdc_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP);
1198 return (kdcomp);
1199}
1200
1201static void
1202kerneldumpcomp_destroy(struct dumperinfo *di)
1203{
1204 struct kerneldumpcomp *kdcomp;
1205
1206 kdcomp = di->kdcomp;
1207 if (kdcomp == NULL)
1208 return;
1209 compressor_fini(kdcomp->kdc_stream);
1210 zfree(kdcomp->kdc_buf, M_DUMPER);
1211 free(kdcomp, M_DUMPER);
1212}
1213
1214/*
1215 * Must not be present on global list.
1216 */
1217static void
1218free_single_dumper(struct dumperinfo *di)
1219{
1220
1221 if (di == NULL)
1222 return;
1223
1224 zfree(di->blockbuf, M_DUMPER);
1225
1227
1228#ifdef EKCD
1229 zfree(di->kdcrypto, M_EKCD);
1230#endif
1231 zfree(di, M_DUMPER);
1232}
1233
1234/* Registration of dumpers */
1235int
1236dumper_insert(const struct dumperinfo *di_template, const char *devname,
1237 const struct diocskerneldump_arg *kda)
1238{
1239 struct dumperinfo *newdi, *listdi;
1240 bool inserted;
1241 uint8_t index;
1242 int error;
1243
1244 index = kda->kda_index;
1245 MPASS(index != KDA_REMOVE && index != KDA_REMOVE_DEV &&
1246 index != KDA_REMOVE_ALL);
1247
1248 error = priv_check(curthread, PRIV_SETDUMPER);
1249 if (error != 0)
1250 return (error);
1251
1252 newdi = malloc(sizeof(*newdi) + strlen(devname) + 1, M_DUMPER, M_WAITOK
1253 | M_ZERO);
1254 memcpy(newdi, di_template, sizeof(*newdi));
1255 newdi->blockbuf = NULL;
1256 newdi->kdcrypto = NULL;
1257 newdi->kdcomp = NULL;
1258 strcpy(newdi->di_devname, devname);
1259
1260 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) {
1261#ifdef EKCD
1262 newdi->kdcrypto = kerneldumpcrypto_create(di_template->blocksize,
1263 kda->kda_encryption, kda->kda_key,
1264 kda->kda_encryptedkeysize, kda->kda_encryptedkey);
1265 if (newdi->kdcrypto == NULL) {
1266 error = EINVAL;
1267 goto cleanup;
1268 }
1269#else
1270 error = EOPNOTSUPP;
1271 goto cleanup;
1272#endif
1273 }
1274 if (kda->kda_compression != KERNELDUMP_COMP_NONE) {
1275#ifdef EKCD
1276 /*
1277 * We can't support simultaneous unpadded block cipher
1278 * encryption and compression because there is no guarantee the
1279 * length of the compressed result is exactly a multiple of the
1280 * cipher block size.
1281 */
1282 if (kda->kda_encryption == KERNELDUMP_ENC_AES_256_CBC) {
1283 error = EOPNOTSUPP;
1284 goto cleanup;
1285 }
1286#endif
1287 newdi->kdcomp = kerneldumpcomp_create(newdi,
1288 kda->kda_compression);
1289 if (newdi->kdcomp == NULL) {
1290 error = EINVAL;
1291 goto cleanup;
1292 }
1293 }
1294
1295 newdi->blockbuf = malloc(newdi->blocksize, M_DUMPER, M_WAITOK | M_ZERO);
1296
1297 /* Add the new configuration to the queue */
1298 mtx_lock(&dumpconf_list_lk);
1299 inserted = false;
1300 TAILQ_FOREACH(listdi, &dumper_configs, di_next) {
1301 if (index == 0) {
1302 TAILQ_INSERT_BEFORE(listdi, newdi, di_next);
1303 inserted = true;
1304 break;
1305 }
1306 index--;
1307 }
1308 if (!inserted)
1309 TAILQ_INSERT_TAIL(&dumper_configs, newdi, di_next);
1310 mtx_unlock(&dumpconf_list_lk);
1311
1312 return (0);
1313
1314cleanup:
1315 free_single_dumper(newdi);
1316 return (error);
1317}
1318
1319#ifdef DDB
1320void
1321dumper_ddb_insert(struct dumperinfo *newdi)
1322{
1323 TAILQ_INSERT_HEAD(&dumper_configs, newdi, di_next);
1324}
1325
1326void
1327dumper_ddb_remove(struct dumperinfo *di)
1328{
1329 TAILQ_REMOVE(&dumper_configs, di, di_next);
1330}
1331#endif
1332
1333static bool
1334dumper_config_match(const struct dumperinfo *di, const char *devname,
1335 const struct diocskerneldump_arg *kda)
1336{
1337 if (kda->kda_index == KDA_REMOVE_ALL)
1338 return (true);
1339
1340 if (strcmp(di->di_devname, devname) != 0)
1341 return (false);
1342
1343 /*
1344 * Allow wildcard removal of configs matching a device on g_dev_orphan.
1345 */
1346 if (kda->kda_index == KDA_REMOVE_DEV)
1347 return (true);
1348
1349 if (di->kdcomp != NULL) {
1350 if (di->kdcomp->kdc_format != kda->kda_compression)
1351 return (false);
1352 } else if (kda->kda_compression != KERNELDUMP_COMP_NONE)
1353 return (false);
1354#ifdef EKCD
1355 if (di->kdcrypto != NULL) {
1356 if (di->kdcrypto->kdc_encryption != kda->kda_encryption)
1357 return (false);
1358 /*
1359 * Do we care to verify keys match to delete? It seems weird
1360 * to expect multiple fallback dump configurations on the same
1361 * device that only differ in crypto key.
1362 */
1363 } else
1364#endif
1365 if (kda->kda_encryption != KERNELDUMP_ENC_NONE)
1366 return (false);
1367
1368 return (true);
1369}
1370
1371int
1372dumper_remove(const char *devname, const struct diocskerneldump_arg *kda)
1373{
1374 struct dumperinfo *di, *sdi;
1375 bool found;
1376 int error;
1377
1378 error = priv_check(curthread, PRIV_SETDUMPER);
1379 if (error != 0)
1380 return (error);
1381
1382 /*
1383 * Try to find a matching configuration, and kill it.
1384 *
1385 * NULL 'kda' indicates remove any configuration matching 'devname',
1386 * which may remove multiple configurations in atypical configurations.
1387 */
1388 found = false;
1389 mtx_lock(&dumpconf_list_lk);
1390 TAILQ_FOREACH_SAFE(di, &dumper_configs, di_next, sdi) {
1391 if (dumper_config_match(di, devname, kda)) {
1392 found = true;
1393 TAILQ_REMOVE(&dumper_configs, di, di_next);
1395 }
1396 }
1397 mtx_unlock(&dumpconf_list_lk);
1398
1399 /* Only produce ENOENT if a more targeted match didn't match. */
1400 if (!found && kda->kda_index == KDA_REMOVE)
1401 return (ENOENT);
1402 return (0);
1403}
1404
1405static int
1406dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length)
1407{
1408
1409 if (di->mediasize > 0 && length != 0 && (offset < di->mediaoffset ||
1410 offset - di->mediaoffset + length > di->mediasize)) {
1411 if (di->kdcomp != NULL && offset >= di->mediaoffset) {
1412 printf(
1413 "Compressed dump failed to fit in device boundaries.\n");
1414 return (E2BIG);
1415 }
1416
1417 printf("Attempt to write outside dump device boundaries.\n"
1418 "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n",
1419 (intmax_t)offset, (intmax_t)di->mediaoffset,
1420 (uintmax_t)length, (intmax_t)di->mediasize);
1421 return (ENOSPC);
1422 }
1423 if (length % di->blocksize != 0) {
1424 printf("Attempt to write partial block of length %ju.\n",
1425 (uintmax_t)length);
1426 return (EINVAL);
1427 }
1428 if (offset % di->blocksize != 0) {
1429 printf("Attempt to write at unaligned offset %jd.\n",
1430 (intmax_t)offset);
1431 return (EINVAL);
1432 }
1433
1434 return (0);
1435}
1436
1437#ifdef EKCD
1438static int
1439dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size)
1440{
1441
1442 switch (kdc->kdc_encryption) {
1443 case KERNELDUMP_ENC_AES_256_CBC:
1444 if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf,
1445 8 * size, buf) <= 0) {
1446 return (EIO);
1447 }
1448 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC,
1449 buf + size - 16 /* IV size for AES-256-CBC */) <= 0) {
1450 return (EIO);
1451 }
1452 break;
1453 case KERNELDUMP_ENC_CHACHA20:
1454 chacha_encrypt_bytes(&kdc->kdc_chacha, buf, buf, size);
1455 break;
1456 default:
1457 return (EINVAL);
1458 }
1459
1460 return (0);
1461}
1462
1463/* Encrypt data and call dumper. */
1464static int
1465dump_encrypted_write(struct dumperinfo *di, void *virtual,
1466 vm_offset_t physical, off_t offset, size_t length)
1467{
1468 static uint8_t buf[KERNELDUMP_BUFFER_SIZE];
1469 struct kerneldumpcrypto *kdc;
1470 int error;
1471 size_t nbytes;
1472
1473 kdc = di->kdcrypto;
1474
1475 while (length > 0) {
1476 nbytes = MIN(length, sizeof(buf));
1477 bcopy(virtual, buf, nbytes);
1478
1479 if (dump_encrypt(kdc, buf, nbytes) != 0)
1480 return (EIO);
1481
1482 error = dump_write(di, buf, physical, offset, nbytes);
1483 if (error != 0)
1484 return (error);
1485
1486 offset += nbytes;
1487 virtual = (void *)((uint8_t *)virtual + nbytes);
1488 length -= nbytes;
1489 }
1490
1491 return (0);
1492}
1493#endif /* EKCD */
1494
1495static int
1496kerneldumpcomp_write_cb(void *base, size_t length, off_t offset, void *arg)
1497{
1498 struct dumperinfo *di;
1499 size_t resid, rlength;
1500 int error;
1501
1502 di = arg;
1503
1504 if (length % di->blocksize != 0) {
1505 /*
1506 * This must be the final write after flushing the compression
1507 * stream. Write as many full blocks as possible and stash the
1508 * residual data in the dumper's block buffer. It will be
1509 * padded and written in dump_finish().
1510 */
1511 rlength = rounddown(length, di->blocksize);
1512 if (rlength != 0) {
1513 error = _dump_append(di, base, 0, rlength);
1514 if (error != 0)
1515 return (error);
1516 }
1517 resid = length - rlength;
1518 memmove(di->blockbuf, (uint8_t *)base + rlength, resid);
1519 bzero((uint8_t *)di->blockbuf + resid, di->blocksize - resid);
1520 di->kdcomp->kdc_resid = resid;
1521 return (EAGAIN);
1522 }
1523 return (_dump_append(di, base, 0, length));
1524}
1525
1526/*
1527 * Write kernel dump headers at the beginning and end of the dump extent.
1528 * Write the kernel dump encryption key after the leading header if we were
1529 * configured to do so.
1530 */
1531static int
1532dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh)
1533{
1534#ifdef EKCD
1535 struct kerneldumpcrypto *kdc;
1536#endif
1537 void *buf;
1538 size_t hdrsz;
1539 uint64_t extent;
1540 uint32_t keysize;
1541 int error;
1542
1543 hdrsz = sizeof(*kdh);
1544 if (hdrsz > di->blocksize)
1545 return (ENOMEM);
1546
1547#ifdef EKCD
1548 kdc = di->kdcrypto;
1549 keysize = kerneldumpcrypto_dumpkeysize(kdc);
1550#else
1551 keysize = 0;
1552#endif
1553
1554 /*
1555 * If the dump device has special handling for headers, let it take care
1556 * of writing them out.
1557 */
1558 if (di->dumper_hdr != NULL)
1559 return (di->dumper_hdr(di, kdh));
1560
1561 if (hdrsz == di->blocksize)
1562 buf = kdh;
1563 else {
1564 buf = di->blockbuf;
1565 memset(buf, 0, di->blocksize);
1566 memcpy(buf, kdh, hdrsz);
1567 }
1568
1569 extent = dtoh64(kdh->dumpextent);
1570#ifdef EKCD
1571 if (kdc != NULL) {
1572 error = dump_write(di, kdc->kdc_dumpkey, 0,
1573 di->mediaoffset + di->mediasize - di->blocksize - extent -
1574 keysize, keysize);
1575 if (error != 0)
1576 return (error);
1577 }
1578#endif
1579
1580 error = dump_write(di, buf, 0,
1581 di->mediaoffset + di->mediasize - 2 * di->blocksize - extent -
1582 keysize, di->blocksize);
1583 if (error == 0)
1584 error = dump_write(di, buf, 0, di->mediaoffset + di->mediasize -
1585 di->blocksize, di->blocksize);
1586 return (error);
1587}
1588
1589/*
1590 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This is to
1591 * protect us from metadata and metadata from us.
1592 */
1593#define SIZEOF_METADATA (64 * 1024)
1594
1595/*
1596 * Do some preliminary setup for a kernel dump: initialize state for encryption,
1597 * if requested, and make sure that we have enough space on the dump device.
1598 *
1599 * We set things up so that the dump ends before the last sector of the dump
1600 * device, at which the trailing header is written.
1601 *
1602 * +-----------+------+-----+----------------------------+------+
1603 * | | lhdr | key | ... kernel dump ... | thdr |
1604 * +-----------+------+-----+----------------------------+------+
1605 * 1 blk opt <------- dump extent --------> 1 blk
1606 *
1607 * Dumps written using dump_append() start at the beginning of the extent.
1608 * Uncompressed dumps will use the entire extent, but compressed dumps typically
1609 * will not. The true length of the dump is recorded in the leading and trailing
1610 * headers once the dump has been completed.
1611 *
1612 * The dump device may provide a callback, in which case it will initialize
1613 * dumpoff and take care of laying out the headers.
1614 */
1615int
1616dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh)
1617{
1618#ifdef EKCD
1619 struct kerneldumpcrypto *kdc;
1620#endif
1621 void *key;
1622 uint64_t dumpextent, span;
1623 uint32_t keysize;
1624 int error;
1625
1626#ifdef EKCD
1627 /* Send the key before the dump so a partial dump is still usable. */
1628 kdc = di->kdcrypto;
1629 error = kerneldumpcrypto_init(kdc);
1630 if (error != 0)
1631 return (error);
1632 keysize = kerneldumpcrypto_dumpkeysize(kdc);
1633 key = keysize > 0 ? kdc->kdc_dumpkey : NULL;
1634#else
1635 error = 0;
1636 keysize = 0;
1637 key = NULL;
1638#endif
1639
1640 if (di->dumper_start != NULL) {
1641 error = di->dumper_start(di, key, keysize);
1642 } else {
1643 dumpextent = dtoh64(kdh->dumpextent);
1644 span = SIZEOF_METADATA + dumpextent + 2 * di->blocksize +
1645 keysize;
1646 if (di->mediasize < span) {
1647 if (di->kdcomp == NULL)
1648 return (E2BIG);
1649
1650 /*
1651 * We don't yet know how much space the compressed dump
1652 * will occupy, so try to use the whole swap partition
1653 * (minus the first 64KB) in the hope that the
1654 * compressed dump will fit. If that doesn't turn out to
1655 * be enough, the bounds checking in dump_write()
1656 * will catch us and cause the dump to fail.
1657 */
1658 dumpextent = di->mediasize - span + dumpextent;
1659 kdh->dumpextent = htod64(dumpextent);
1660 }
1661
1662 /*
1663 * The offset at which to begin writing the dump.
1664 */
1665 di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize -
1666 dumpextent;
1667 }
1668 di->origdumpoff = di->dumpoff;
1669 return (error);
1670}
1671
1672static int
1673_dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical,
1674 size_t length)
1675{
1676 int error;
1677
1678#ifdef EKCD
1679 if (di->kdcrypto != NULL)
1680 error = dump_encrypted_write(di, virtual, physical, di->dumpoff,
1681 length);
1682 else
1683#endif
1684 error = dump_write(di, virtual, physical, di->dumpoff, length);
1685 if (error == 0)
1686 di->dumpoff += length;
1687 return (error);
1688}
1689
1690/*
1691 * Write to the dump device starting at dumpoff. When compression is enabled,
1692 * writes to the device will be performed using a callback that gets invoked
1693 * when the compression stream's output buffer is full.
1694 */
1695int
1696dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical,
1697 size_t length)
1698{
1699 void *buf;
1700
1701 if (di->kdcomp != NULL) {
1702 /* Bounce through a buffer to avoid CRC errors. */
1703 if (length > di->maxiosize)
1704 return (EINVAL);
1705 buf = di->kdcomp->kdc_buf;
1706 memmove(buf, virtual, length);
1707 return (compressor_write(di->kdcomp->kdc_stream, buf, length));
1708 }
1709 return (_dump_append(di, virtual, physical, length));
1710}
1711
1712/*
1713 * Write to the dump device at the specified offset.
1714 */
1715int
1716dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical,
1717 off_t offset, size_t length)
1718{
1719 int error;
1720
1721 error = dump_check_bounds(di, offset, length);
1722 if (error != 0)
1723 return (error);
1724 return (di->dumper(di->priv, virtual, physical, offset, length));
1725}
1726
1727/*
1728 * Perform kernel dump finalization: flush the compression stream, if necessary,
1729 * write the leading and trailing kernel dump headers now that we know the true
1730 * length of the dump, and optionally write the encryption key following the
1731 * leading header.
1732 */
1733int
1734dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh)
1735{
1736 int error;
1737
1738 if (di->kdcomp != NULL) {
1739 error = compressor_flush(di->kdcomp->kdc_stream);
1740 if (error == EAGAIN) {
1741 /* We have residual data in di->blockbuf. */
1742 error = _dump_append(di, di->blockbuf, 0, di->blocksize);
1743 if (error == 0)
1744 /* Compensate for _dump_append()'s adjustment. */
1745 di->dumpoff -= di->blocksize - di->kdcomp->kdc_resid;
1746 di->kdcomp->kdc_resid = 0;
1747 }
1748 if (error != 0)
1749 return (error);
1750
1751 /*
1752 * We now know the size of the compressed dump, so update the
1753 * header accordingly and recompute parity.
1754 */
1755 kdh->dumplength = htod64(di->dumpoff - di->origdumpoff);
1756 kdh->parity = 0;
1757 kdh->parity = kerneldump_parity(kdh);
1758
1759 compressor_reset(di->kdcomp->kdc_stream);
1760 }
1761
1762 error = dump_write_headers(di, kdh);
1763 if (error != 0)
1764 return (error);
1765
1766 (void)dump_write(di, NULL, 0, 0, 0);
1767 return (0);
1768}
1769
1770void
1771dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh,
1772 const char *magic, uint32_t archver, uint64_t dumplen)
1773{
1774 size_t dstsize;
1775
1776 bzero(kdh, sizeof(*kdh));
1777 strlcpy(kdh->magic, magic, sizeof(kdh->magic));
1778 strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
1779 kdh->version = htod32(KERNELDUMPVERSION);
1780 kdh->architectureversion = htod32(archver);
1781 kdh->dumplength = htod64(dumplen);
1782 kdh->dumpextent = kdh->dumplength;
1783 kdh->dumptime = htod64(time_second);
1784#ifdef EKCD
1785 kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdcrypto));
1786#else
1787 kdh->dumpkeysize = 0;
1788#endif
1789 kdh->blocksize = htod32(di->blocksize);
1790 strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname));
1791 dstsize = sizeof(kdh->versionstring);
1792 if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize)
1793 kdh->versionstring[dstsize - 2] = '\n';
1794 if (panicstr != NULL)
1795 strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
1796 if (di->kdcomp != NULL)
1797 kdh->compression = di->kdcomp->kdc_format;
1798 kdh->parity = kerneldump_parity(kdh);
1799}
1800
1801#ifdef DDB
1802DB_SHOW_COMMAND(panic, db_show_panic)
1803{
1804
1805 if (panicstr == NULL)
1806 db_printf("panicstr not set\n");
1807 else
1808 db_printf("panic: %s\n", panicstr);
1809}
1810#endif
struct timespec * ts
Definition: clock_if.m:39
METHOD int shutdown
Called during system shutdown.
Definition: device_if.m:263
int __elfN() coredump(struct thread *td, struct vnode *vp, off_t limit, int flags)
Definition: imgact_elf.c:1567
struct proc * initproc
Definition: init_main.c:112
void boottrace_dump_console(void)
bool shutdown_trace
SYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RWTUN, &idletick, 0, "Run periodic events when idle")
int make_dev_p(int flags, struct cdev **cdev, struct cdevsw *devsw, struct ucred *cr, uid_t uid, gid_t gid, int mode, const char *fmt,...)
Definition: kern_conf.c:930
void cngrab()
Definition: kern_cons.c:376
int cngetc(void)
Definition: kern_cons.c:418
int cncheckc(void)
Definition: kern_cons.c:432
void mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
FEATURE(kdtrace_hooks, "Kernel DTrace hooks which are required to load DTrace kernel modules")
struct prison prison0
Definition: kern_jail.c:101
int kproc_suspend(struct proc *p, int timo)
Definition: kern_kthread.c:180
int kthread_suspend(struct thread *td, int timo)
Definition: kern_kthread.c:368
uint64_t ktr_mask
Definition: kern_ktr.c:97
void *() malloc(size_t size, struct malloc_type *mtp, int flags)
Definition: kern_malloc.c:632
void zfree(void *addr, struct malloc_type *mtp)
Definition: kern_malloc.c:947
void free(void *addr, struct malloc_type *mtp)
Definition: kern_malloc.c:907
struct mtx __exclusive_cache_line Giant
Definition: kern_mutex.c:181
int priv_check(struct thread *td, int priv)
Definition: kern_priv.c:271
static bool powercycle_on_panic
static bool dumper_config_match(const struct dumperinfo *di, const char *devname, const struct diocskerneldump_arg *kda)
static int sync_on_panic
static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer")
static int poweroff_delay
static struct kerneldumpcomp * kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression)
int suspend_blocked
int sys_reboot(struct thread *td, struct reboot_args *uap)
#define PANIC_REBOOT_WAIT_TIME
static void shutdown_conf(void *unused)
static int dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length)
static struct task shutdown_nice_task
int rebooting
SYSCTL_PROC(_kern_shutdown, OID_AUTO, dumpdevname, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE, &dumper_configs, 0, dumpdevname_sysctl_handler, "A", "Device(s) for kernel dumps")
static int kerneldumpcomp_write_cb(void *base, size_t len, off_t off, void *arg)
static int kern_reroot(void)
static void reroot_conf(void *unused)
static void print_uptime(void)
static void shutdown_nice_task_fn(void *arg, int pending __unused)
bool __read_frequently panicked
int dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh)
static int dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh)
static bool poweroff_on_panic
void kthread_shutdown(void *arg, int howto)
int dumper_remove(const char *devname, const struct diocskerneldump_arg *kda)
int dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh)
static void reboottrace(int howto)
static void free_single_dumper(struct dumperinfo *di)
int dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, off_t offset, size_t length)
const char * panicstr
static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW|CTLFLAG_MPSAFE, 0, "Shutdown environment")
int __read_mostly dumping
static void kerneldumpcomp_destroy(struct dumperinfo *di)
void dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh, const char *magic, uint32_t archver, uint64_t dumplen)
void shutdown_nice(int howto)
#define POWEROFF_DELAY
static void shutdown_reset(void *junk, int howto)
__FBSDID("$FreeBSD$")
static struct mtx dumpconf_list_lk
void kproc_shutdown(void *arg, int howto)
static TAILQ_HEAD(dumpconflist, dumperinfo)
int doadump(boolean_t textdump)
int dumper_insert(const struct dumperinfo *di_template, const char *devname, const struct diocskerneldump_arg *kda)
static int show_busybufs
static int dumpdevname_sysctl_handler(SYSCTL_HANDLER_ARGS)
static void shutdown_panic(void *junk, int howto)
SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, &panic_reboot_wait_time, 0, "Seconds to wait before rebooting after a panic")
void kern_reboot(int howto)
static int _dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, size_t length)
SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN, &poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic")
static int kproc_shutdown_wait
SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL)
void panic(const char *fmt,...)
int dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, size_t length)
void vpanic(const char *fmt, va_list ap)
static int panic_reboot_wait_time
static void poweroff_wait(void *, int)
MTX_SYSINIT(dumper_configs, &dumpconf_list_lk, "dumper config list", MTX_DEF)
static void shutdown_halt(void *junk, int howto)
#define SIZEOF_METADATA
static int kerneldump_gzlevel
void kern_psignal(struct proc *p, int sig)
Definition: kern_sig.c:2117
int sysctl_wire_old_buffer(struct sysctl_req *req, size_t len)
Definition: kern_sysctl.c:2136
int sysctl_handle_int(SYSCTL_HANDLER_ARGS)
Definition: kern_sysctl.c:1644
struct sbuf * sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length, struct sysctl_req *req)
Definition: kern_sysctl.c:2503
void getnanouptime(struct timespec *tsp)
Definition: kern_tc.c:447
volatile time_t time_second
Definition: kern_tc.c:105
int ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
Definition: kern_time.c:1118
void sched_bind(struct thread *td, int cpu)
Definition: sched_4bsd.c:1531
uint8_t kdc_format
uint8_t * kdc_buf
struct compressor * kdc_stream
static bool kasan_enabled __read_mostly
Definition: subr_asan.c:95
int compressor_flush(struct compressor *stream)
void compressor_reset(struct compressor *stream)
void compressor_fini(struct compressor *stream)
int compressor_write(struct compressor *stream, void *data, size_t len)
struct compressor * compressor_init(compressor_cb_t cb, int format, size_t maxiosize, int level, void *arg)
u_char __read_frequently kdb_active
Definition: subr_kdb.c:56
void kdb_enter(const char *why, const char *msg)
Definition: subr_kdb.c:498
void kdb_backtrace(void)
Definition: subr_kdb.c:429
int hz
Definition: subr_param.c:85
int vsnprintf(char *str, size_t size, const char *format, va_list ap)
Definition: subr_prf.c:565
int printf(const char *fmt,...)
Definition: subr_prf.c:397
int vprintf(const char *fmt, va_list ap)
Definition: subr_prf.c:410
int sbuf_finish(struct sbuf *s)
Definition: subr_sbuf.c:833
int sbuf_putc(struct sbuf *s, int c)
Definition: subr_sbuf.c:754
void sbuf_delete(struct sbuf *s)
Definition: subr_sbuf.c:898
int sbuf_cat(struct sbuf *s, const char *str)
Definition: subr_sbuf.c:566
cpuset_t all_cpus
Definition: subr_smp.c:70
int taskqueue_enqueue(struct taskqueue *queue, struct task *task)
struct mtx mtx
Definition: uipc_ktls.c:0
void bufshutdown(int show_busybufs)
Definition: vfs_bio.c:1373
size_t nbytes
Definition: vfs_extattr.c:718
struct mtx_padalign __exclusive_cache_line mountlist_mtx
Definition: vfs_mount.c:124
struct mntlist mountlist
Definition: vfs_mount.c:121
void vfs_rel(struct mount *mp)
Definition: vfs_mount.c:645
void vfs_ref(struct mount *mp)
Definition: vfs_mount.c:527
struct vnode * rootvnode
struct mount * rootdevmp
void vfs_mountroot(void)
void vfs_unmountall(void)
Definition: vfs_subr.c:4768
void vfs_unbusy(struct mount *mp)
Definition: vfs_subr.c:850
int vfs_busy(struct mount *mp, int flags)
Definition: vfs_subr.c:786
struct stat * buf