42#include "opt_ktrace.h"
43#include "opt_kstack_pages.h"
47#include <sys/bitstring.h>
48#include <sys/sysproto.h>
49#include <sys/eventhandler.h>
51#include <sys/filedesc.h>
53#include <sys/kernel.h>
54#include <sys/kthread.h>
55#include <sys/sysctl.h>
57#include <sys/malloc.h>
62#include <sys/procdesc.h>
63#include <sys/ptrace.h>
65#include <sys/resourcevar.h>
67#include <sys/syscall.h>
68#include <sys/vmmeter.h>
72#include <sys/ktrace.h>
73#include <sys/unistd.h>
76#include <sys/sysent.h>
77#include <sys/signalvar.h>
79#include <security/audit/audit.h>
80#include <security/mac/mac_framework.h>
85#include <vm/vm_extern.h>
89#include <sys/dtrace_bsd.h>
90dtrace_fork_func_t dtrace_fasttrap_fork;
96#ifndef _SYS_SYSPROTO_H_
109 bzero(&fr,
sizeof(fr));
110 fr.fr_flags = RFFDG | RFPROC;
112 error =
fork1(td, &fr);
114 td->td_retval[0] = pid;
115 td->td_retval[1] = 0;
127 bzero(&fr,
sizeof(fr));
128 fr.fr_flags = RFFDG | RFPROC | RFPROCDESC;
131 fr.fr_pd_flags = uap->flags;
132 AUDIT_ARG_FFLAGS(uap->flags);
138 error =
fork1(td, &fr);
140 td->td_retval[0] = pid;
141 td->td_retval[1] = 0;
142 error = copyout(&
fd, uap->fdp,
sizeof(
fd));
154 bzero(&fr,
sizeof(fr));
155 fr.fr_flags = RFFDG | RFPROC | RFPPWAIT | RFMEM;
157 error =
fork1(td, &fr);
159 td->td_retval[0] = pid;
160 td->td_retval[1] = 0;
172 if ((uap->flags & RFKERNELONLY) != 0)
175 if ((uap->flags & RFSPAWN) != 0 && uap->flags != RFSPAWN)
178 AUDIT_ARG_FFLAGS(uap->flags);
179 bzero(&fr,
sizeof(fr));
180 if ((uap->flags & RFSPAWN) != 0) {
181 fr.fr_flags = RFFDG | RFPROC | RFPPWAIT | RFMEM;
182 fr.fr_flags2 = FR2_DROPSIG_CAUGHT;
184 fr.fr_flags = uap->flags;
187 error =
fork1(td, &fr);
189 td->td_retval[0] = pid;
190 td->td_retval[1] = 0;
221 if (error == 0 && req->newptr != NULL) {
227 else if (pid < 0 || pid >
pid_max - 100)
241 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
243 "Random PID modulus. Special values: 0: disable, 1: choose random value");
272 if (
flags & RFHIGHPID) {
284 KASSERT(trypid != 2, (
"unexpectedly ran out of IDs"));
298 if ((
flags & RFHIGHPID) == 0)
313 KASSERT((
flags & RFPROC) == 0,
314 (
"fork_norfproc called with RFPROC set"));
322 if ((p1->p_flag & (P_HADTHREADS | P_SYSTEM)) == P_HADTHREADS &&
323 ((
flags & (RFCFDG | RFFDG)) != 0 || (
flags & RFMEM) == 0)) {
332 error = vm_forkproc(td, NULL, NULL, NULL,
flags);
339 if (
flags & RFCFDG) {
340 struct filedesc *fdtmp;
341 struct pwddesc *pdtmp;
342 pdtmp =
pdinit(td->td_proc->p_pd,
false);
359 if ((p1->p_flag & (P_HADTHREADS | P_SYSTEM)) == P_HADTHREADS &&
360 ((
flags & (RFCFDG | RFFDG)) != 0 || (
flags & RFMEM) == 0)) {
369do_fork(
struct thread *td,
struct fork_req *fr,
struct proc *p2,
struct thread *td2,
370 struct vmspace *vm2,
struct file *fp_procdesc)
372 struct proc *p1, *pptr;
374 struct filedesc_to_leader *fdtol;
376 struct sigacts *newsigacts;
381 bcopy(&p1->p_startcopy, &p2->p_startcopy,
382 __rangeof(
struct proc, p_startcopy, p_endcopy));
386 bzero(&p2->p_startzero,
387 __rangeof(
struct proc, p_startzero, p_endzero));
392 p2->p_state = PRS_NEW;
394 AUDIT_ARG_PID(p2->p_pid);
395 TSFORK(p2->p_pid, p1->p_pid);
398 LIST_INSERT_HEAD(&
allproc, p2, p_list);
402 sx_xlock(PIDHASHLOCK(p2->p_pid));
403 LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
404 sx_xunlock(PIDHASHLOCK(p2->p_pid));
411 if (fr->fr_flags & RFSIGSHARE)
419 if (fr->fr_flags & RFCFDG) {
420 pd =
pdinit(p1->p_pd,
false);
423 }
else if (fr->fr_flags & RFFDG) {
424 if (fr->fr_flags2 & FR2_SHARE_PATHS)
431 if (fr->fr_flags2 & FR2_SHARE_PATHS)
436 if (p1->p_fdtol == NULL)
439 if ((fr->fr_flags & RFTHREAD) != 0) {
445 FILEDESC_XLOCK(p1->p_fd);
446 fdtol->fdl_refcount++;
447 FILEDESC_XUNLOCK(p1->p_fd);
466 bzero(&td2->td_startzero,
467 __rangeof(
struct thread, td_startzero, td_endzero));
469 bcopy(&td->td_startcopy, &td2->td_startcopy,
470 __rangeof(
struct thread, td_startcopy, td_endcopy));
472 bcopy(&p2->p_comm, &td2->td_name,
sizeof(td2->td_name));
473 td2->td_sigstk = td->td_sigstk;
474 td2->td_flags = TDF_INMEM;
475 td2->td_lend_user_pri = PRI_MAX;
479 td2->td_vnet_lpush = NULL;
491 if ((fr->fr_flags & RFPPWAIT) != 0)
492 td->td_flags |= TDF_ASTPENDING;
499 p2->p_flag = P_INMEM;
500 p2->p_flag2 = p1->p_flag2 & (P2_ASLR_DISABLE | P2_ASLR_ENABLE |
501 P2_ASLR_IGNSTART | P2_NOTRACE | P2_NOTRACE_EXEC |
502 P2_PROTMAX_ENABLE | P2_PROTMAX_DISABLE | P2_TRAPCAP |
503 P2_STKGAP_DISABLE | P2_STKGAP_DISABLE_EXEC | P2_NO_NEW_PRIVS |
504 P2_WXORX_DISABLE | P2_WXORX_ENABLE_EXEC);
505 p2->p_swtick =
ticks;
506 if (p1->p_flag & P_PROFIL)
509 if (fr->fr_flags & RFSIGSHARE) {
513 p2->p_sigacts = newsigacts;
514 if ((fr->fr_flags2 & (FR2_DROPSIG_CAUGHT | FR2_KPROC)) != 0) {
515 mtx_lock(&p2->p_sigacts->ps_mtx);
516 if ((fr->fr_flags2 & FR2_DROPSIG_CAUGHT) != 0)
518 if ((fr->fr_flags2 & FR2_KPROC) != 0)
519 p2->p_sigacts->ps_flag |= PS_NOCLDWAIT;
520 mtx_unlock(&p2->p_sigacts->ps_mtx);
524 if (fr->fr_flags & RFTSIGZMB)
525 p2->p_sigparent = RFTSIGNUM(fr->fr_flags);
526 else if (fr->fr_flags & RFLINUXTHPN)
527 p2->p_sigparent = SIGUSR1;
529 p2->p_sigparent = SIGCHLD;
531 if ((fr->fr_flags2 & FR2_KPROC) != 0) {
532 p2->p_flag |= P_SYSTEM | P_KPROC;
533 td2->td_pflags |= TDP_KTHREAD;
536 p2->p_textvp = p1->p_textvp;
537 p2->p_textdvp = p1->p_textdvp;
542 if (p1->p_flag2 & P2_INHERIT_PROTECTED) {
543 p2->p_flag |= P_PROTECTED;
544 p2->p_flag2 |= P2_INHERIT_PROTECTED;
563 if (p2->p_textvp != NULL)
565 if (p2->p_textdvp != NULL)
567 p2->p_binname = p1->p_binname == NULL ? NULL :
568 strdup(p1->p_binname, M_PARGS);
573 if ((fr->fr_flags & RFTHREAD) != 0) {
575 p2->p_peers = p1->p_peers;
577 p2->p_leader = p1->p_leader;
579 PROC_LOCK(p1->p_leader);
580 if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
581 PROC_UNLOCK(p1->p_leader);
599 PROC_UNLOCK(p1->p_leader);
606 PGRP_LOCK(p1->p_pgrp);
614 p2->p_flag |= p1->p_flag & P_SUGID;
615 td2->td_pflags |= (td->td_pflags & (TDP_ALTSTACK | TDP_SIGFASTBLOCK));
616 SESS_LOCK(p1->p_session);
617 if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
618 p2->p_flag |= P_CONTROLT;
619 SESS_UNLOCK(p1->p_session);
620 if (fr->fr_flags & RFPPWAIT)
621 p2->p_flag |= P_PPWAIT;
623 p2->p_pgrp = p1->p_pgrp;
624 LIST_INSERT_AFTER(p1, p2, p_pglist);
625 PGRP_UNLOCK(p1->p_pgrp);
626 LIST_INIT(&p2->p_children);
627 LIST_INIT(&p2->p_orphans);
629 callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0);
630 TAILQ_INIT(&p2->p_kqtim_stop);
646 if ((fr->fr_flags & RFNOWAIT) != 0) {
650 p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ?
655 p2->p_oppid = pptr->p_pid;
656 LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
657 LIST_INIT(&p2->p_reaplist);
658 LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling);
659 if (p2->p_reaper == p1 && p1 !=
initproc) {
660 p2->p_reapsubtree = p2->p_pid;
666 p2->p_acflag = AFORK;
677 vm_forkproc(td, p2, td2, vm2, fr->fr_flags);
679 if (fr->fr_flags == (RFFDG | RFPROC)) {
681 VM_CNT_ADD(v_forkpages, p2->p_vmspace->vm_dsize +
682 p2->p_vmspace->vm_ssize);
683 }
else if (fr->fr_flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
684 VM_CNT_INC(v_vforks);
685 VM_CNT_ADD(v_vforkpages, p2->p_vmspace->vm_dsize +
686 p2->p_vmspace->vm_ssize);
687 }
else if (p1 == &
proc0) {
688 VM_CNT_INC(v_kthreads);
689 VM_CNT_ADD(v_kthreadpages, p2->p_vmspace->vm_dsize +
690 p2->p_vmspace->vm_ssize);
692 VM_CNT_INC(v_rforks);
693 VM_CNT_ADD(v_rforkpages, p2->p_vmspace->vm_dsize +
694 p2->p_vmspace->vm_ssize);
702 if (fr->fr_flags & RFPROCDESC)
709 EVENTHANDLER_DIRECT_INVOKE(process_fork, p1, p2, fr->fr_flags);
718 p2->p_state = PRS_NORMAL;
728 if ((fr->fr_flags & RFMEM) == 0 && dtrace_fasttrap_fork)
729 dtrace_fasttrap_fork(p1, p2);
731 if (fr->fr_flags & RFPPWAIT) {
732 td->td_pflags |= TDP_RFPPWAIT;
733 td->td_rfppwait_p = p2;
734 td->td_dbgflags |= TDB_VFORK;
748 SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags);
750 if (fr->fr_flags & RFPROCDESC) {
752 fdrop(fp_procdesc, td);
760 if ((p1->p_ptevents & PTRACE_FORK) != 0) {
769 if ((p1->p_ptevents & PTRACE_FORK) != 0) {
777 td->td_dbgflags |= TDB_FORK;
778 td->td_dbg_forked = p2->p_pid;
779 td2->td_dbgflags |= TDB_STOPATFORK;
782 "do_fork: attaching to new child pid %d: oppid %d",
783 p2->p_pid, p2->p_oppid);
790 racct_proc_fork_done(p2);
792 if ((fr->fr_flags & RFSTOPPED) == 0) {
793 if (fr->fr_pidp != NULL)
794 *fr->fr_pidp = p2->p_pid;
812 MPASS(td->td_pflags & TDP_RFPPWAIT);
825 td->td_pflags &= ~TDP_RFPPWAIT;
826 p2 = td->td_rfppwait_p;
829 while (p2->p_flag & P_PPWAIT) {
839 cv_timedwait(&p2->p_pwait, &p2->p_mtx,
hz);
843 if (td->td_dbgflags & TDB_VFORK) {
845 if (p->p_ptevents & PTRACE_VFORK)
847 td->td_dbgflags &= ~TDB_VFORK;
853fork1(
struct thread *td,
struct fork_req *fr)
855 struct proc *p1, *newproc;
859 struct file *fp_procdesc;
860 vm_ooffset_t mem_charged;
861 int error, nprocs_new;
863 static struct timeval lastfail;
866 flags = fr->fr_flags;
867 pages = fr->fr_pages;
869 if ((
flags & RFSTOPPED) != 0)
870 MPASS(fr->fr_procp != NULL && fr->fr_pidp == NULL);
872 MPASS(fr->fr_procp == NULL);
875 if ((
flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0)
879 if ((
flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (
flags & RFTSIGZMB) == 0)
883 if ((
flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
887 if ((
flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(
flags) > _SIG_MAXSIG)
890 if ((
flags & RFPROCDESC) != 0) {
892 if ((
flags & RFPROC) == 0)
896 if (fr->fr_pd_fd == NULL)
900 if ((fr->fr_pd_flags & ~PD_ALLOWED_AT_FORK) != 0)
910 if ((
flags & RFPROC) == 0) {
911 if (fr->fr_procp != NULL)
912 *fr->fr_procp = NULL;
913 else if (fr->fr_pidp != NULL)
933 nprocs_new = atomic_fetchadd_int(&
nprocs, 1) + 1;
934 if (nprocs_new >=
maxproc - 10) {
940 printf(
"maxproc limit exceeded by uid %u "
941 "(pid %d); see tuning(7) and "
943 td->td_ucred->cr_ruid, p1->p_pid);
955 if (
flags & RFPROCDESC) {
957 fr->fr_pd_flags, fr->fr_pd_fcaps);
960 AUDIT_ARG_FD(*fr->fr_pd_fd);
967 newproc = uma_zalloc(
proc_zone, M_WAITOK);
968 td2 = FIRST_THREAD_IN_PROC(newproc);
978 if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) {
979 if (td2->td_kstack != 0)
980 vm_thread_dispose(td2);
988 if ((
flags & RFMEM) == 0) {
989 vm2 = vmspace_fork(p1->p_vmspace, &mem_charged);
994 if (!swap_reserve(mem_charged)) {
1001 swap_reserve_force(mem_charged);
1017 error = racct_proc_fork(p1, newproc);
1024 mac_proc_init(newproc);
1027 STAILQ_INIT(&newproc->p_ktr);
1033 cred = td->td_ucred;
1040 do_fork(td, fr, newproc, td2, vm2, fp_procdesc);
1045 mac_proc_destroy(newproc);
1047 racct_proc_exit(newproc);
1054 if ((
flags & RFPROCDESC) != 0 && fp_procdesc != NULL) {
1055 fdclose(td, fp_procdesc, *fr->fr_pd_fd);
1056 fdrop(fp_procdesc, td);
1058 atomic_add_int(&
nprocs, -1);
1059 pause(
"fork",
hz / 2);
1068fork_exit(
void (*callout)(
void *,
struct trapframe *),
void *arg,
1069 struct trapframe *frame)
1075 kmsan_mark(frame,
sizeof(*frame), KMSAN_STATE_INITED);
1079 KASSERT(p->p_state == PRS_NORMAL, (
"executing process is still new"));
1081 CTR4(KTR_PROC,
"fork_exit: new thread %p (td_sched %p, pid %d, %s)",
1082 td, td_get_sched(td), p->p_pid, td->td_name);
1090 if ((dtd = PCPU_GET(deadthread))) {
1091 PCPU_SET(deadthread, NULL);
1101 KASSERT(callout != NULL, (
"NULL callout in fork_exit"));
1102 callout(arg, frame);
1108 if (p->p_flag & P_KPROC) {
1109 printf(
"Kernel thread \"%s\" (pid %d) exited prematurely.\n",
1110 td->td_name, p->p_pid);
1113 mtx_assert(&
Giant, MA_NOTOWNED);
1115 if (p->p_sysent->sv_schedtail != NULL)
1116 (p->p_sysent->sv_schedtail)(td);
1131 if (td->td_dbgflags & TDB_STOPATFORK) {
1133 if ((p->p_flag & P_TRACED) != 0) {
1137 td->td_dbgflags |= TDB_CHILD | TDB_SCX | TDB_FSTP;
1139 td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX);
1144 td->td_dbgflags &= ~TDB_STOPATFORK;
1147 }
else if (p->p_flag & P_TRACED || td->td_dbgflags & TDB_BORN) {
1153 td->td_dbgflags |= TDB_SCX;
1154 if ((p->p_ptevents & PTRACE_SCX) != 0 ||
1155 (td->td_dbgflags & TDB_BORN) != 0)
1157 td->td_dbgflags &= ~(TDB_SCX | TDB_BORN);
1165 exit1(td, 0, SIGKILL);
1170 if (KTRPOINT(td, KTR_SYSRET))
1171 ktrsysret(SYS_fork, 0, 0);
void startprofclock(struct proc *p)
void pdescfree(struct thread *td)
struct filedesc * fdshare(struct filedesc *fdp)
struct pwddesc * pdinit(struct pwddesc *pdp, bool keeplock)
struct pwddesc * pdshare(struct pwddesc *pdp)
struct filedesc * fdcopy(struct filedesc *fdp)
struct filedesc * fdinit(void)
void pdunshare(struct thread *td)
void fdunshare(struct thread *td)
void fdclose(struct thread *td, struct file *fp, int idx)
struct filedesc_to_leader * filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader)
struct pwddesc * pdcopy(struct pwddesc *pdp)
void fdescfree(struct thread *td)
void knote_fork(struct knlist *list, int pid)
struct knlist * knlist_alloc(struct mtx *lock)
void exit1(struct thread *td, int rval, int signo)
void proc_reparent(struct proc *child, struct proc *parent, bool set_oppid)
int fork1(struct thread *td, struct fork_req *fr)
void fork_rfppwait(struct thread *td)
SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, "Last used PID")
int sys_pdfork(struct thread *td, struct pdfork_args *uap)
static int fork_findpid(int flags)
int sys_vfork(struct thread *td, struct vfork_args *uap)
static int sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
static void do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *td2, struct vmspace *vm2, struct file *fp_procdesc)
SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE, 0, 0, sysctl_kern_randompid, "I", "Random PID modulus. Special values: 0: disable, 1: choose random value")
void fork_return(struct thread *td, struct trapframe *frame)
int sys_fork(struct thread *td, struct fork_args *uap)
int __exclusive_cache_line nprocs
SDT_PROVIDER_DECLARE(proc)
SDT_PROBE_DEFINE3(proc,,, create, "struct proc *", "struct proc *", "int")
bitstr_t proc_id_grpidmap
static int fork_norfproc(struct thread *td, int flags)
bitstr_t proc_id_sessidmap
int sys_rfork(struct thread *td, struct rfork_args *uap)
void fork_exit(void(*callout)(void *, struct trapframe *), void *arg, struct trapframe *frame)
void prison_proc_hold(struct prison *pr)
bool prison_isalive(const struct prison *pr)
struct mtx __exclusive_cache_line Giant
int priv_check_cred(struct ucred *cred, int priv)
void pstats_fork(struct pstats *src, struct pstats *dst)
void pargs_hold(struct pargs *pa)
struct sx __exclusive_cache_line proctree_lock
struct mtx __exclusive_cache_line ppeers_lock
struct mtx __exclusive_cache_line procid_lock
void proc_id_set_cond(int type, pid_t id)
struct sx __exclusive_cache_line allproc_lock
void proc_unset_cred(struct proc *p)
void proc_set_cred_init(struct proc *p, struct ucred *newcred)
rlim_t() lim_cur(struct thread *td, int which)
void lim_fork(struct proc *p1, struct proc *p2)
int chgproccnt(struct uidinfo *uip, int diff, rlim_t max)
struct sigacts * sigacts_alloc(void)
void sigacts_copy(struct sigacts *dest, struct sigacts *src)
struct sigacts * sigacts_hold(struct sigacts *ps)
void kern_psignal(struct proc *p, int sig)
void sig_drop_caught(struct proc *p)
int ptracestop(struct thread *td, int sig, ksiginfo_t *si)
int sysctl_wire_old_buffer(struct sysctl_req *req, size_t len)
int sysctl_handle_int(SYSCTL_HANDLER_ARGS)
void microuptime(struct timeval *tvp)
void tidhash_add(struct thread *td)
void thread_stash(struct thread *td)
void thread_cow_get_proc(struct thread *newtd, struct proc *p)
void proc_linkup(struct proc *p, struct thread *td)
int thread_suspend_check(int return_instead)
void thread_single_end(struct proc *p, int mode)
struct thread * thread_alloc(int pages)
int thread_single(struct proc *p, int mode)
int thread_alloc_stack(struct thread *td, int pages)
bool thread_suspend_check_needed(void)
int ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
void sched_fork(struct thread *td, struct thread *childtd)
void sched_fork_exit(struct thread *td)
void sched_add(struct thread *td, int flags)
void kmsan_thread_alloc(struct thread *td)
void kmsan_mark(const void *addr, size_t size, uint8_t c)
int printf(const char *fmt,...)
void userret(struct thread *td, struct trapframe *frame)
int procdesc_falloc(struct thread *td, struct file **resultfp, int *resultfd, int flags, struct filecaps *fcaps)
void procdesc_new(struct proc *p, int flags)
void procdesc_finit(struct procdesc *pdp, struct file *fp)
void proc_set_traced(struct proc *p, bool stop)
void vrefact(struct vnode *vp)