FreeBSD kernel kern code
kern_dump.c
Go to the documentation of this file.
1/*-
2 * Copyright (c) 2002 Marcel Moolenaar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/conf.h>
33#include <sys/cons.h>
34#include <sys/kdb.h>
35#include <sys/kernel.h>
36#include <sys/kerneldump.h>
37#include <sys/malloc.h>
38#include <sys/msgbuf.h>
39#include <sys/proc.h>
40#include <sys/watchdog.h>
41
42#include <vm/vm.h>
43#include <vm/vm_param.h>
44#include <vm/vm_page.h>
45#include <vm/vm_phys.h>
46#include <vm/vm_dumpset.h>
47#include <vm/pmap.h>
48
49#include <machine/dump.h>
50#include <machine/elf.h>
51#include <machine/md_var.h>
52#include <machine/pcb.h>
53
54CTASSERT(sizeof(struct kerneldumpheader) == 512);
55
56#define MD_ALIGN(x) roundup2((off_t)(x), PAGE_SIZE)
57
58/* Handle buffered writes. */
59static size_t fragsz;
60
61struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS];
62
63#if !defined(__powerpc__)
64void
66{
67 int n, idx;
68
69 bzero(dump_map, sizeof(dump_map));
70 for (n = 0; n < nitems(dump_map); n++) {
71 idx = n * 2;
72 if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0)
73 break;
74 dump_map[n].pa_start = dump_avail[idx];
75 dump_map[n].pa_size = dump_avail[idx + 1] - dump_avail[idx];
76 }
77}
78#endif
79
80struct dump_pa *
81dumpsys_gen_pa_next(struct dump_pa *mdp)
82{
83
84 if (mdp == NULL)
85 return (&dump_map[0]);
86
87 mdp++;
88 if (mdp->pa_size == 0)
89 mdp = NULL;
90 return (mdp);
91}
92
93void
95{
96
97}
98
99void
100dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused,
101 void *va __unused)
102{
103
104}
105
106int
107dumpsys_gen_write_aux_headers(struct dumperinfo *di)
108{
109
110 return (0);
111}
112
113int
114dumpsys_buf_seek(struct dumperinfo *di, size_t sz)
115{
116 static uint8_t buf[DEV_BSIZE];
117 size_t nbytes;
118 int error;
119
120 bzero(buf, sizeof(buf));
121
122 while (sz > 0) {
123 nbytes = MIN(sz, sizeof(buf));
124
125 error = dump_append(di, buf, 0, nbytes);
126 if (error)
127 return (error);
128 sz -= nbytes;
129 }
130
131 return (0);
132}
133
134int
135dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz)
136{
137 size_t len;
138 int error;
139
140 while (sz) {
141 len = di->blocksize - fragsz;
142 if (len > sz)
143 len = sz;
144 memcpy((char *)di->blockbuf + fragsz, ptr, len);
145 fragsz += len;
146 ptr += len;
147 sz -= len;
148 if (fragsz == di->blocksize) {
149 error = dump_append(di, di->blockbuf, 0, di->blocksize);
150 if (error)
151 return (error);
152 fragsz = 0;
153 }
154 }
155 return (0);
156}
157
158int
159dumpsys_buf_flush(struct dumperinfo *di)
160{
161 int error;
162
163 if (fragsz == 0)
164 return (0);
165
166 error = dump_append(di, di->blockbuf, 0, di->blocksize);
167 fragsz = 0;
168 return (error);
169}
170
171CTASSERT(PAGE_SHIFT < 20);
172#define PG2MB(pgs) ((pgs + (1 << (20 - PAGE_SHIFT)) - 1) >> (20 - PAGE_SHIFT))
173
174int
175dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg)
176{
177 struct dumperinfo *di = (struct dumperinfo*)arg;
178 vm_paddr_t pa;
179 void *va;
180 uint64_t pgs;
181 size_t counter, sz, chunk;
182 int c, error;
183 u_int maxdumppgs;
184
185 error = 0; /* catch case in which chunk size is 0 */
186 counter = 0; /* Update twiddle every 16MB */
187 va = NULL;
188 pgs = mdp->pa_size / PAGE_SIZE;
189 pa = mdp->pa_start;
190 maxdumppgs = min(di->maxiosize / PAGE_SIZE, MAXDUMPPGS);
191 if (maxdumppgs == 0) /* seatbelt */
192 maxdumppgs = 1;
193
194 printf(" chunk %d: %juMB (%ju pages)", seqnr, (uintmax_t)PG2MB(pgs),
195 (uintmax_t)pgs);
196
197 dumpsys_wbinv_all();
198 while (pgs) {
199 chunk = pgs;
200 if (chunk > maxdumppgs)
201 chunk = maxdumppgs;
202 sz = chunk << PAGE_SHIFT;
203 counter += sz;
204 if (counter >> 24) {
205 printf(" %ju", (uintmax_t)PG2MB(pgs));
206 counter &= (1 << 24) - 1;
207 }
208
209 dumpsys_map_chunk(pa, chunk, &va);
210 wdog_kern_pat(WD_LASTVAL);
211
212 error = dump_append(di, va, 0, sz);
213 dumpsys_unmap_chunk(pa, chunk, va);
214 if (error)
215 break;
216 pgs -= chunk;
217 pa += sz;
218
219 /* Check for user abort. */
220 c = cncheckc();
221 if (c == 0x03)
222 return (ECANCELED);
223 if (c != -1)
224 printf(" (CTRL-C to abort) ");
225 }
226 printf(" ... %s\n", (error) ? "fail" : "ok");
227 return (error);
228}
229
230int
231dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg)
232{
233 struct dump_pa *mdp;
234 int error, seqnr;
235
236 seqnr = 0;
237 mdp = dumpsys_pa_next(NULL);
238 while (mdp != NULL) {
239 error = (*cb)(mdp, seqnr++, arg);
240 if (error)
241 return (-error);
242 mdp = dumpsys_pa_next(mdp);
243 }
244 return (seqnr);
245}
246
247static off_t fileofs;
248
249static int
250cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg)
251{
252 struct dumperinfo *di = (struct dumperinfo*)arg;
253 Elf_Phdr phdr;
254 uint64_t size;
255 int error;
256
257 size = mdp->pa_size;
258 bzero(&phdr, sizeof(phdr));
259 phdr.p_type = PT_LOAD;
260 phdr.p_flags = PF_R; /* XXX */
261 phdr.p_offset = fileofs;
262#ifdef __powerpc__
263 phdr.p_vaddr = (do_minidump? mdp->pa_start : ~0L);
264 phdr.p_paddr = (do_minidump? ~0L : mdp->pa_start);
265#else
266 phdr.p_vaddr = mdp->pa_start;
267 phdr.p_paddr = mdp->pa_start;
268#endif
269 phdr.p_filesz = size;
270 phdr.p_memsz = size;
271 phdr.p_align = PAGE_SIZE;
272
273 error = dumpsys_buf_write(di, (char*)&phdr, sizeof(phdr));
274 fileofs += phdr.p_filesz;
275 return (error);
276}
277
278static int
279cb_size(struct dump_pa *mdp, int seqnr, void *arg)
280{
281 uint64_t *sz;
282
283 sz = (uint64_t *)arg;
284 *sz += (uint64_t)mdp->pa_size;
285 return (0);
286}
287
288int
289dumpsys_generic(struct dumperinfo *di)
290{
291 static struct kerneldumpheader kdh;
292 Elf_Ehdr ehdr;
293 uint64_t dumpsize;
294 off_t hdrgap;
295 size_t hdrsz;
296 int error;
297
298#if MINIDUMP_PAGE_TRACKING == 1
299 if (do_minidump)
300 return (minidumpsys(di, false));
301#endif
302
303 bzero(&ehdr, sizeof(ehdr));
304 ehdr.e_ident[EI_MAG0] = ELFMAG0;
305 ehdr.e_ident[EI_MAG1] = ELFMAG1;
306 ehdr.e_ident[EI_MAG2] = ELFMAG2;
307 ehdr.e_ident[EI_MAG3] = ELFMAG3;
308 ehdr.e_ident[EI_CLASS] = ELF_CLASS;
309#if BYTE_ORDER == LITTLE_ENDIAN
310 ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
311#else
312 ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
313#endif
314 ehdr.e_ident[EI_VERSION] = EV_CURRENT;
315 ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */
316 ehdr.e_type = ET_CORE;
317 ehdr.e_machine = EM_VALUE;
318 ehdr.e_phoff = sizeof(ehdr);
319 ehdr.e_flags = 0;
320 ehdr.e_ehsize = sizeof(ehdr);
321 ehdr.e_phentsize = sizeof(Elf_Phdr);
322 ehdr.e_shentsize = sizeof(Elf_Shdr);
323
324 dumpsys_pa_init();
325
326 /* Calculate dump size. */
327 dumpsize = 0L;
328 ehdr.e_phnum = dumpsys_foreach_chunk(cb_size, &dumpsize) +
329 DUMPSYS_NUM_AUX_HDRS;
330 hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize;
331 fileofs = MD_ALIGN(hdrsz);
332 dumpsize += fileofs;
333 hdrgap = fileofs - roundup2((off_t)hdrsz, di->blocksize);
334
335 dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION,
336 dumpsize);
337
338 error = dump_start(di, &kdh);
339 if (error != 0)
340 goto fail;
341
342 printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20,
343 ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS);
344
345 /* Dump ELF header */
346 error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr));
347 if (error)
348 goto fail;
349
350 /* Dump program headers */
352 if (error < 0)
353 goto fail;
354 error = dumpsys_write_aux_headers(di);
355 if (error < 0)
356 goto fail;
358
359 /*
360 * All headers are written using blocked I/O, so we know the
361 * current offset is (still) block aligned. Skip the alignement
362 * in the file to have the segment contents aligned at page
363 * boundary.
364 */
365 error = dumpsys_buf_seek(di, (size_t)hdrgap);
366 if (error)
367 goto fail;
368
369 /* Dump memory chunks. */
371 if (error < 0)
372 goto fail;
373
374 error = dump_finish(di, &kdh);
375 if (error != 0)
376 goto fail;
377
378 printf("\nDump complete\n");
379 return (0);
380
381 fail:
382 if (error < 0)
383 error = -error;
384
385 if (error == ECANCELED)
386 printf("\nDump aborted\n");
387 else if (error == E2BIG || error == ENOSPC)
388 printf("\nDump failed. Partition too small.\n");
389 else
390 printf("\n** DUMP FAILED (ERROR %d) **\n", error);
391 return (error);
392}
393
394#if MINIDUMP_PAGE_TRACKING == 1
395
396/* Minidump progress bar */
397static struct {
398 const int min_per;
399 const int max_per;
400 bool visited;
401} progress_track[10] = {
402 { 0, 10, false},
403 { 10, 20, false},
404 { 20, 30, false},
405 { 30, 40, false},
406 { 40, 50, false},
407 { 50, 60, false},
408 { 60, 70, false},
409 { 70, 80, false},
410 { 80, 90, false},
411 { 90, 100, false}
412};
413
414static uint64_t dumpsys_pb_size;
415static uint64_t dumpsys_pb_remaining;
416static uint64_t dumpsys_pb_check;
417
418/* Reset the progress bar for a dump of dumpsize. */
419void
420dumpsys_pb_init(uint64_t dumpsize)
421{
422 int i;
423
424 dumpsys_pb_size = dumpsys_pb_remaining = dumpsize;
425 dumpsys_pb_check = 0;
426
427 for (i = 0; i < nitems(progress_track); i++)
428 progress_track[i].visited = false;
429}
430
431/*
432 * Update the progress according to the delta bytes that were written out.
433 * Check and print the progress percentage.
434 */
435void
436dumpsys_pb_progress(size_t delta)
437{
438 int sofar, i;
439
440 dumpsys_pb_remaining -= delta;
441 dumpsys_pb_check += delta;
442
443 /*
444 * To save time while dumping, only loop through progress_track
445 * occasionally.
446 */
447 if ((dumpsys_pb_check >> DUMPSYS_PB_CHECK_BITS) == 0)
448 return;
449 else
450 dumpsys_pb_check &= (1 << DUMPSYS_PB_CHECK_BITS) - 1;
451
452 sofar = 100 - ((dumpsys_pb_remaining * 100) / dumpsys_pb_size);
453 for (i = 0; i < nitems(progress_track); i++) {
454 if (sofar < progress_track[i].min_per ||
455 sofar > progress_track[i].max_per)
456 continue;
457 if (!progress_track[i].visited) {
458 progress_track[i].visited = true;
459 printf("..%d%%", sofar);
460 }
461 break;
462 }
463}
464
465int
466minidumpsys(struct dumperinfo *di, bool livedump)
467{
468 struct minidumpstate state;
469 struct msgbuf mb_copy;
470 char *msg_ptr;
471 size_t sz;
472 int error;
473
474 if (livedump) {
475 KASSERT(!dumping, ("live dump invoked from incorrect context"));
476
477 /*
478 * Before invoking cpu_minidumpsys() on the live system, we
479 * must snapshot some required global state: the message
480 * buffer, and the page dump bitset. They may be modified at
481 * any moment, so for the sake of the live dump it is best to
482 * have an unchanging snapshot to work with. Both are included
483 * as part of the dump and consumed by userspace tools.
484 *
485 * Other global state important to the minidump code is the
486 * dump_avail array and the kernel's page tables, but snapshots
487 * are not taken of these. For one, dump_avail[] is expected
488 * not to change after boot. Snapshotting the kernel page
489 * tables would involve an additional walk, so this is avoided
490 * too.
491 *
492 * This means live dumps are best effort, and the result may or
493 * may not be usable; there are no guarantees about the
494 * consistency of the dump's contents. Any of the following
495 * (and likely more) may affect the live dump:
496 *
497 * - Data may be modified, freed, or remapped during the
498 * course of the dump, such that the contents written out
499 * are partially or entirely unrecognizable. This means
500 * valid references may point to destroyed/mangled objects,
501 * and vice versa.
502 *
503 * - The dumped context of any threads that ran during the
504 * dump process may be unreliable.
505 *
506 * - The set of kernel page tables included in the dump likely
507 * won't correspond exactly to the copy of the dump bitset.
508 * This means some pages will be dumped without any way to
509 * locate them, and some pages may not have been dumped
510 * despite appearing as if they should.
511 */
512 msg_ptr = malloc(msgbufsize, M_TEMP, M_WAITOK);
513 msgbuf_duplicate(msgbufp, &mb_copy, msg_ptr);
514 state.msgbufp = &mb_copy;
515
516 sz = BITSET_SIZE(vm_page_dump_pages);
517 state.dump_bitset = malloc(sz, M_TEMP, M_WAITOK);
518 BIT_COPY_STORE_REL(sz, vm_page_dump, state.dump_bitset);
519 } else {
520 KASSERT(dumping, ("minidump invoked outside of doadump()"));
521
522 /* Use the globals. */
523 state.msgbufp = msgbufp;
524 state.dump_bitset = vm_page_dump;
525 }
526
527 error = cpu_minidumpsys(di, &state);
528 if (livedump) {
529 free(msg_ptr, M_TEMP);
530 free(state.dump_bitset, M_TEMP);
531 }
532
533 return (error);
534}
535#endif /* MINIDUMP_PAGE_TRACKING == 1 */
int cncheckc(void)
Definition: kern_cons.c:432
void dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused, void *va __unused)
Definition: kern_dump.c:100
int dumpsys_generic(struct dumperinfo *di)
Definition: kern_dump.c:289
int dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz)
Definition: kern_dump.c:135
static size_t fragsz
Definition: kern_dump.c:59
int dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg)
Definition: kern_dump.c:175
int dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg)
Definition: kern_dump.c:231
struct dump_pa * dumpsys_gen_pa_next(struct dump_pa *mdp)
Definition: kern_dump.c:81
struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS]
Definition: kern_dump.c:61
static int cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg)
Definition: kern_dump.c:250
#define PG2MB(pgs)
Definition: kern_dump.c:172
int dumpsys_buf_seek(struct dumperinfo *di, size_t sz)
Definition: kern_dump.c:114
int dumpsys_gen_write_aux_headers(struct dumperinfo *di)
Definition: kern_dump.c:107
__FBSDID("$FreeBSD$")
void dumpsys_gen_wbinv_all(void)
Definition: kern_dump.c:94
static off_t fileofs
Definition: kern_dump.c:247
CTASSERT(sizeof(struct kerneldumpheader)==512)
void dumpsys_gen_pa_init(void)
Definition: kern_dump.c:65
#define MD_ALIGN(x)
Definition: kern_dump.c:56
static int cb_size(struct dump_pa *mdp, int seqnr, void *arg)
Definition: kern_dump.c:279
int dumpsys_buf_flush(struct dumperinfo *di)
Definition: kern_dump.c:159
void *() malloc(size_t size, struct malloc_type *mtp, int flags)
Definition: kern_malloc.c:632
void free(void *addr, struct malloc_type *mtp)
Definition: kern_malloc.c:907
int dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh)
int dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh)
int __read_mostly dumping
void dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh, const char *magic, uint32_t archver, uint64_t dumplen)
int dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, size_t length)
void msgbuf_duplicate(struct msgbuf *src, struct msgbuf *dst, char *dst_msgptr)
Definition: subr_msgbuf.c:418
int msgbufsize
Definition: subr_param.c:94
int printf(const char *fmt,...)
Definition: subr_prf.c:397
struct msgbuf * msgbufp
Definition: subr_prf.c:128
size_t nbytes
Definition: vfs_extattr.c:718
struct stat * buf