版权声明:本文为博主原创文章,未经博主允许不得转载。
目录(?)[-]
应用层怎样使用fork and execve
fork的返回值怎样区分0pid
fork系统调用的入口参数来自哪里
how to implement do_fork
copy_process
How to check the kernel stack correctivity
How to set the new process entry
new process entry point
sys_execve
对elf 格式文件而言
应用层怎样使用fork and execve
/**************************************************************************/
main()
{
int ret_from_fork,mypid;
mypid = getPid();
printf("before:my pid is d%\n",mypid);
ret_from_fork = fork();
/*该方法返回生成的子进程的进程id号。用于复制出一个进程后,他们都运行到同样的地方,
*所以父进程中的ret_from_fork的值是id值,而不时初值0,
*而子进程的ret_from_fork却没有获得值,还是0.通过这样就可以区别两个进程改变两个进程的走向
**/
switch(ret_from_fork){
case -1:
perror(" fork failed");
exit(1);
/*以下就是子进程要执行的代码,他调用exec载入用户输入的命令指定的程序,
*清除进程空间执行用户指定的程序
**/
case 0:
execvp(arglist[0],arglist);//arglist[0]中指定用户想执行的命令名。
perror("execvp failed");
exit(1);
default:
while(wait(&exitstatus)!=ret_from_fork);
/*shell程序,等待子进程运行结束后,再接受用户输入*/
}
}
fork的返回值怎样区分0/pid
/*
*用户空间fork函数调用时,返回的0也不是内核的do_fork返回的,do_fork只会返回新进程的pid,
*而 fork的0返回值是内核在ret_from_fork之后进入用户空间前RESTORE_ALL的时候pop到eax中的,
*然后库实现的fork将 eax作为返回值;
*实际上,fork的子进程在进入用户空间前从来不经过do_fork这条路,可以看看它的thread的eip是 ret_from_fork,
*也就是只要开始运行子进程,就在switch_to中会执行ret_from_fork,而从ret_from_fork顺 着看,
*一直就到了RESTORE_ALL从 而返回用户空间
**/
fork系统调用的入口,参数来自哪里?
入口参数保存在当前的内核栈中:结构为struct pt_regs
系统调用的入口:
arch/arm/kernel/entry-common.S
sys_fork_wrapper:
add r0, sp, #S_OFF
b sys_fork
ENDPROC(sys_fork_wrapper)
crash> dis sys_fork_wrapper
0xc000e800 <sys_fork_wrapper>: add r0, sp, #8
0xc000e804 <sys_fork_wrapper+4>: b 0xc0011d28 <sys_fork>
arch/arm/kernel/sys_arm.c
/* Fork a new task - this creates a new program thread.
* This is called indirectly via a small wrapper
*/
asmlinkage int sys_fork(struct pt_regs *regs)
{
#ifdef CONFIG_MMU
return do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
#else
/* can not support in nommu mode */
return(-EINVAL);
#endif
}
crash> dis sys_fork
0xc0011d28 <sys_fork>: mov r12, sp
0xc0011d2c <sys_fork+4>: push {r11, r12, lr, pc}
0xc0011d30 <sys_fork+8>: sub r11, r12, #4
0xc0011d34 <sys_fork+12>: sub sp, sp, #8
0xc0011d38 <sys_fork+16>: mov r12, #0
0xc0011d3c <sys_fork+20>: mov r1, r0
0xc0011d40 <sys_fork+24>: ldr r1, [r1, #52] ; 0x34
0xc0011d44 <sys_fork+28>: mov r2, r0
0xc0011d48 <sys_fork+32>: mov r3, r12
0xc0011d4c <sys_fork+36>: mov r0, #17
0xc0011d50 <sys_fork+40>: str r12, [sp]
0xc0011d54 <sys_fork+44>: str r12, [sp, #4]
0xc0011d58 <sys_fork+48>: bl 0xc0027550 <do_fork>
0xc0011d5c <sys_fork+52>: sub sp, r11, #12
0xc0011d60 <sys_fork+56>: ldm sp, {r11, sp, pc}
/**************************************************************/
/arch/arm/kernel/entry-header.s
@
@ Most of the stack format comes from struct pt_regs, but with
@ the addition of 8 bytes for storing syscall args 5 and 6.
@ This _must_ remain a multiple of 8 for EABI.
@
#define S_OFF 8
/**************************************************************/
/arch/arm/include/asm/ptrace.h
/*
* This struct defines the way the registers are stored on the
* stack during a system call. Note that sizeof(struct pt_regs)
* has to be a multiple of 8.
*/
struct pt_regs {
unsigned long uregs[18];
};
#define ARM_cpsr uregs[16]
#define ARM_pc uregs[15]
#define ARM_lr uregs[14]
#define ARM_sp uregs[13]
#define ARM_ip uregs[12]/*?*/
#define ARM_fp uregs[11]/*frame point*/
#define ARM_r10 uregs[10]
#define ARM_r9 uregs[9]
#define ARM_r8 uregs[8]
#define ARM_r7 uregs[7]
#define ARM_r6 uregs[6]
#define ARM_r5 uregs[5]
#define ARM_r4 uregs[4]
#define ARM_r3 uregs[3]
#define ARM_r2 uregs[2]
#define ARM_r1 uregs[1]
#define ARM_r0 uregs[0]
#define ARM_ORIG_r0 uregs[17]
how to implement do_fork
/**************************************************************/
do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
/*
* Ok, this is the main fork-routine.
*
* It copies the process, and if successful kick-starts
* it and waits for it to finish using the VM if required.
*/
long do_fork(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr)
{
struct task_struct *p;
int trace = 0;
long nr;
p = copy_process(clone_flags, stack_start, regs, stack_size,
child_tidptr, NULL, trace);
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
*/
if (!IS_ERR(p)) {
nr = task_pid_vnr(p);
wake_up_new_task(p);
}
return nr;
}
copy_process
/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
*
* It copies the registers, and all the appropriate
* parts of the process environment (as per the clone
* flags). The actual kick-off is left to the caller.
*/
static struct task_struct *copy_process(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
int __user *child_tidptr,
struct pid *pid,
int trace)
{/*分配了相关结构体的memory;并用原来的赋值*/
struct task_struct *p;
p = dup_task_struct(current);
----
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p);
retval = perf_event_init_task(p);
if (retval)
goto bad_fork_cleanup_policy;
retval = audit_alloc(p);
if (retval)
goto bad_fork_cleanup_policy;
/* copy all the process information */
retval = copy_semundo(clone_flags, p);
if (retval)
goto bad_fork_cleanup_audit;
retval = copy_files(clone_flags, p);
if (retval)
goto bad_fork_cleanup_semundo;
retval = copy_fs(clone_flags, p);
if (retval)
goto bad_fork_cleanup_files;
retval = copy_sighand(clone_flags, p);
if (retval)
goto bad_fork_cleanup_fs;
retval = copy_signal(clone_flags, p);
if (retval)
goto bad_fork_cleanup_sighand;
retval = copy_mm(clone_flags, p);
if (retval)
goto bad_fork_cleanup_signal;
retval = copy_namespaces(clone_flags, p);
if (retval)
goto bad_fork_cleanup_mm;
retval = copy_io(clone_flags, p);
if (retval)
goto bad_fork_cleanup_namespaces;
retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
}
static struct task_struct *dup_task_struct(struct task_struct *orig)
{
struct task_struct *tsk;
struct thread_info *ti;
unsigned long *stackend;
int node = tsk_fork_get_node(orig);
int err;
/*分配了memory for task_struct and thread_info*/
tsk = alloc_task_struct_node(node);
if (!tsk)
return NULL;
ti = alloc_thread_info_node(tsk, node);
if (!ti) {
free_task_struct(tsk);
return NULL;
}
/*
int arch_dup_task_struct(struct task_struct *dst,
struct task_struct *src)
{
*dst = *src;
return 0;
}
*/
err = arch_dup_task_struct(tsk, orig);
if (err)
goto out;
tsk->stack = ti;
setup_thread_stack(tsk, orig);
clear_user_return_notifier(tsk);
clear_tsk_need_resched(tsk);
stackend = end_of_stack(tsk);
*stackend = STACK_END_MAGIC; /* for overflow detection */
/*
* One for us, one for whoever does the "release_task()" (usually
* parent)
*/
atomic_set(&tsk->usage, 2);
tsk->splice_pipe = NULL;
account_kernel_stack(ti, 1);
return tsk;
out:
free_thread_info(ti);
free_task_struct(tsk);
return NULL;
}
How to check the kernel stack correctivity
static inline unsigned long *end_of_stack(struct task_struct *p)
{
return (unsigned long *)(task_thread_info(p) + 1);
}
#define STACK_END_MAGIC 0x57AC6E9D
COMMAND: "dwc_otg"
TASK: ee1a3420 [THREAD_INFO: ee1c6000]
CPU: 0
STATE: TASK_INTERRUPTIBLE
crash> thread_info ee1c6000
struct thread_info {
flags = 0,
preempt_count = 1,
addr_limit = 0,
task = 0xee1a3420,
crash> struct task_struct.stack 0xee1a3420
stack = 0xee1c6000
crash> bt -r
PID: 760 TASK: ee1a3420 CPU: 0 COMMAND: "dwc_otg"
ee1c6000: 00000000 00000001 00000000 ee1a3420
ee1c6010: default_exec_domain 00000000 00000015 ee1a3420
ee1c6020: c0f88420 init_task ee1c6000 00000000
ee1c6030: 00000001 init_mm ee1c7f5c ee1c7f18
ee1c6040: __schedule+1412 00000000 00000000 00000000
ee1c6050: 00000000 00000000 00000000 00000000
ee1c6060: 00000000 00000000 00000000 00000000
ee1c6070: 00000000 00000000 00000000 00000000
ee1c6080: 00000000 00000000 00000000 00000000
ee1c6090: 00000000 00000000 00000000 00000000
ee1c60a0: 00000000 00000000 00000000 00000000
ee1c60b0: 00000000 00000000 00000000 00000000
ee1c60c0: 00000000 00000000 00000000 00000000
ee1c60d0: 00000000 00000000 00000000 00000000
ee1c60e0: 00000000 00000000 00000000 00000000
ee1c60f0: 00000000 00000000 00000000 00000000
ee1c6100: 00000000 00000000 00000000 00000000
ee1c6110: 00000000 00000000 00000000 00000000
ee1c6120: 00000000 00000000 00000000 00000000
ee1c6130: 00000000 00000000 00000000 00000000
ee1c6140: 00000000 00000000 00000000 00000000
ee1c6150: 00000000 00000000 00000000 00000000
ee1c6160: 00000000 00000000 00000000 00000000
ee1c6170: 00000000 00000000 00000000 00000000
ee1c6180: 00000000 00000000 00000000 00000000
ee1c6190: 00000000 00000000 00000000 00000000
ee1c61a0: 00000000 00000000 00000000 00000000
ee1c61b0: 00000000 00000000 00000000 00000000
ee1c61c0: 00000000 00000000 00000000 00000000
ee1c61d0: 00000000 00000000 00000000 00000000
ee1c61e0: 00000000 00000000 00000000 00000000
ee1c61f0: 00000000 00000000 00000000 00000000
ee1c6200: 00000000 00000000 00000000 00000000
ee1c6210: 00000000 00000000 00000000 00000000
ee1c6220: 00000000 00000000 00000000 00000000
ee1c6230: 00000000 00000000 00000000 00000000
ee1c6240: 00000000 00000000 00000000 00000000
ee1c6250: 00000000 00000000 00000000 00000000
ee1c6260: 00000000 00000000 00000000 00000000
ee1c6270: 00000000 00000000 00000000 00000000
ee1c6280: 00000000 00000000 00000000 00000000
ee1c6290: 00000000 00000000 00000000 00000000
ee1c62a0: 00000000 00000000 00000000 00000000
ee1c62b0: 00000000 00000000 00000000 00000000
ee1c62c0: 00000000 00000000 do_no_restart_syscall 00000000
ee1c62d0: 00000000 00000000 00000000 00000000
ee1c62e0: 00000000 00000000 00000000 00000000
ee1c62f0: 57ac6e9d/*STACK_END_MAGIC*/
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
How to set the new process entry
int
copy_thread(unsigned long clone_flags, unsigned long stack_start,
unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs)
{
struct thread_info *thread = task_thread_info(p);
struct pt_regs *childregs = task_pt_regs(p);
*childregs = *regs;
childregs->ARM_r0 = 0;
childregs->ARM_sp = stack_start;
memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
thread->cpu_context.sp = (unsigned long)childregs;
thread->cpu_context.pc = (unsigned long)ret_from_fork;
clear_ptrace_hw_breakpoint(p);
if (clone_flags & CLONE_SETTLS)
thread->tp_value = regs->ARM_r3;
thread_notify(THREAD_NOTIFY_COPY, thread);
return 0;
}
/*8K内核栈的最后是 struct pt_regs
*对它进行赋值:返回到用户空间后使用的栈,返回地址
**/
#define task_pt_regs(p) \
((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
/*
* low level task data that entry.S needs immediate access to.
* __switch_to() assumes cpu_context follows immediately after cpu_domain.
*/
crash> struct thread_info -o
struct thread_info {
[0] unsigned long flags;
[4] int preempt_count;
[8] mm_segment_t addr_limit;
[12] struct task_struct *task;
[16] struct exec_domain *exec_domain;
[20] __u32 cpu;
[24] __u32 cpu_domain;
[28] struct cpu_context_save cpu_context;
[76] __u32 syscall;
[80] __u8 used_cp[16];
[96] unsigned long tp_value;
[100] struct crunch_state crunchstate;
[288] union fp_state fpstate;
[432] union vfp_state vfpstate;
[712] struct restart_block restart_block;
}
new process entry point
/*
* This is how we return from a fork.
*/
ENTRY(ret_from_fork)
bl schedule_tail
get_thread_info tsk
ldr r1, [tsk, #TI_FLAGS] @ check for syscall tracing
mov why, #1
tst r1, #_TIF_SYSCALL_WORK @ are we tracing syscalls?
beq ret_slow_syscall
mov r1, sp
mov r0, #1 @ trace exit [IP = 1]
bl syscall_trace
b ret_slow_syscall
ENDPROC(ret_from_fork)
sys_execve
/**************************************************************/
arch/arm/kernel/sys_arm.c
/* sys_execve() executes a new program.
* This is called indirectly via a small wrapper
*/
asmlinkage int sys_execve(const char __user *filenamei,
const char __user *const __user *argv,
const char __user *const __user *envp, struct pt_regs *regs)
{
int error;
char * filename;
filename = getname(filenamei);
error = PTR_ERR(filename);
if (IS_ERR(filename))
goto out;
error = do_execve(filename, argv, envp, regs);
putname(filename);
out:
return error;
}
int do_execve(const char *filename,
const char __user *const __user *__argv,
const char __user *const __user *__envp,
struct pt_regs *regs)
{
struct user_arg_ptr argv = { .ptr.native = __argv };
struct user_arg_ptr envp = { .ptr.native = __envp };
return do_execve_common(filename, argv, envp, regs);
}
/**************************************************************/
/*
* sys_execve() executes a new program.
*/
static int do_execve_common(const char *filename,
struct user_arg_ptr argv,
struct user_arg_ptr envp,
struct pt_regs *regs)
{
struct linux_binprm *bprm;
struct file *file;
bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
file = open_exec(filename);
sched_exec();
bprm->file = file;
bprm->filename = filename;
bprm->interp = filename;
bprm_mm_init(bprm);
bprm->argc = count(argv, MAX_ARG_STRINGS);
bprm->envc = count(envp, MAX_ARG_STRINGS);
prepare_binprm(bprm);
search_binary_handler(bprm,regs);
}
/*
* Create a new mm_struct and populate it with a temporary stack
* vm_area_struct. We don't have enough context at this point to set the stack
* flags, permissions, and offset, so we use temporary values. We'll update
* them later in setup_arg_pages().
*/
int bprm_mm_init(struct linux_binprm *bprm)
{
int err;
struct mm_struct *mm = NULL;
/*mm_struct*/
bprm->mm = mm = mm_alloc();
/*vma_struct*/
err = __bprm_mm_init(bprm);
return 0;
}
/*
* cycle the list of binary formats handler, until one recognizes the image
*/
int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
{
struct linux_binfmt *fmt;
list_for_each_entry(fmt, &formats, lh)
int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
fn(bprm, regs);
}
对elf 格式文件而言
fs/binfmt_elf.c
static struct linux_binfmt elf_format = {
.module = THIS_MODULE,
.load_binary = load_elf_binary,
.load_shlib = load_elf_library,
.core_dump = elf_core_dump,
.min_coredump = ELF_EXEC_PAGESIZE,
};
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
{
----
kernel_read();
start_thread(regs, elf_entry, bprm->p);
}
#define start_thread(regs,pc,sp) \
({ \
unsigned long *stack = (unsigned long *)sp; \
memset(regs->uregs, 0, sizeof(regs->uregs)); \
if (current->personality & ADDR_LIMIT_32BIT) \
regs->ARM_cpsr = USR_MODE; \
else \
regs->ARM_cpsr = USR26_MODE; \
if (elf_hwcap & HWCAP_THUMB && pc & 1) \
regs->ARM_cpsr |= PSR_T_BIT; \
regs->ARM_cpsr |= PSR_ENDSTATE; \
regs->ARM_pc = pc & ~1; /* pc */ \
regs->ARM_sp = sp; /* sp */ \
regs->ARM_r2 = stack[2]; /* r2 (envp) */ \
regs->ARM_r1 = stack[1]; /* r1 (argv) */ \
regs->ARM_r0 = stack[0]; /* r0 (argc) */ \
})
总结:当运行execve时已经运行新创建的进程,不是说在old进程中加载后,再运行新进程的。
版权声明:本文为博主原创文章,未经博主允许不得转载。
- How to check the kernel stack correctivity
- How to set the new process entry
- new process entry point
/**************************************************************************/
main()
{
int ret_from_fork,mypid;
mypid = getPid();
printf("before:my pid is d%\n",mypid);
ret_from_fork = fork();
/*该方法返回生成的子进程的进程id号。用于复制出一个进程后,他们都运行到同样的地方,
*所以父进程中的ret_from_fork的值是id值,而不时初值0,
*而子进程的ret_from_fork却没有获得值,还是0.通过这样就可以区别两个进程改变两个进程的走向
**/
switch(ret_from_fork){
case -1:
perror(" fork failed");
exit(1);
/*以下就是子进程要执行的代码,他调用exec载入用户输入的命令指定的程序,
*清除进程空间执行用户指定的程序
**/
case 0:
execvp(arglist[0],arglist);//arglist[0]中指定用户想执行的命令名。
perror("execvp failed");
exit(1);
default:
while(wait(&exitstatus)!=ret_from_fork);
/*shell程序,等待子进程运行结束后,再接受用户输入*/
}
}
fork的返回值怎样区分0/pid
/*
*用户空间fork函数调用时,返回的0也不是内核的do_fork返回的,do_fork只会返回新进程的pid,
*而 fork的0返回值是内核在ret_from_fork之后进入用户空间前RESTORE_ALL的时候pop到eax中的,
*然后库实现的fork将 eax作为返回值;
*实际上,fork的子进程在进入用户空间前从来不经过do_fork这条路,可以看看它的thread的eip是 ret_from_fork,
*也就是只要开始运行子进程,就在switch_to中会执行ret_from_fork,而从ret_from_fork顺 着看,
*一直就到了RESTORE_ALL从 而返回用户空间
**/
入口参数保存在当前的内核栈中:结构为struct pt_regs
系统调用的入口:
arch/arm/kernel/entry-common.S
sys_fork_wrapper:
add r0, sp, #S_OFF
b sys_fork
ENDPROC(sys_fork_wrapper)
crash> dis sys_fork_wrapper
0xc000e800 <sys_fork_wrapper>: add r0, sp, #8
0xc000e804 <sys_fork_wrapper+4>: b 0xc0011d28 <sys_fork>
arch/arm/kernel/sys_arm.c
/* Fork a new task - this creates a new program thread.
* This is called indirectly via a small wrapper
*/
asmlinkage int sys_fork(struct pt_regs *regs)
{
#ifdef CONFIG_MMU
return do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
#else
/* can not support in nommu mode */
return(-EINVAL);
#endif
}
crash> dis sys_fork
0xc0011d28 <sys_fork>: mov r12, sp
0xc0011d2c <sys_fork+4>: push {r11, r12, lr, pc}
0xc0011d30 <sys_fork+8>: sub r11, r12, #4
0xc0011d34 <sys_fork+12>: sub sp, sp, #8
0xc0011d38 <sys_fork+16>: mov r12, #0
0xc0011d3c <sys_fork+20>: mov r1, r0
0xc0011d40 <sys_fork+24>: ldr r1, [r1, #52] ; 0x34
0xc0011d44 <sys_fork+28>: mov r2, r0
0xc0011d48 <sys_fork+32>: mov r3, r12
0xc0011d4c <sys_fork+36>: mov r0, #17
0xc0011d50 <sys_fork+40>: str r12, [sp]
0xc0011d54 <sys_fork+44>: str r12, [sp, #4]
0xc0011d58 <sys_fork+48>: bl 0xc0027550 <do_fork>
0xc0011d5c <sys_fork+52>: sub sp, r11, #12
0xc0011d60 <sys_fork+56>: ldm sp, {r11, sp, pc}
/**************************************************************/
/arch/arm/kernel/entry-header.s
@
@ Most of the stack format comes from struct pt_regs, but with
@ the addition of 8 bytes for storing syscall args 5 and 6.
@ This _must_ remain a multiple of 8 for EABI.
@
#define S_OFF 8
/**************************************************************/
/arch/arm/include/asm/ptrace.h
/*
* This struct defines the way the registers are stored on the
* stack during a system call. Note that sizeof(struct pt_regs)
* has to be a multiple of 8.
*/
struct pt_regs {
unsigned long uregs[18];
};
#define ARM_cpsr uregs[16]
#define ARM_pc uregs[15]
#define ARM_lr uregs[14]
#define ARM_sp uregs[13]
#define ARM_ip uregs[12]/*?*/
#define ARM_fp uregs[11]/*frame point*/
#define ARM_r10 uregs[10]
#define ARM_r9 uregs[9]
#define ARM_r8 uregs[8]
#define ARM_r7 uregs[7]
#define ARM_r6 uregs[6]
#define ARM_r5 uregs[5]
#define ARM_r4 uregs[4]
#define ARM_r3 uregs[3]
#define ARM_r2 uregs[2]
#define ARM_r1 uregs[1]
#define ARM_r0 uregs[0]
#define ARM_ORIG_r0 uregs[17]
how to implement do_fork
/**************************************************************/
do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
/*
* Ok, this is the main fork-routine.
*
* It copies the process, and if successful kick-starts
* it and waits for it to finish using the VM if required.
*/
long do_fork(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr)
{
struct task_struct *p;
int trace = 0;
long nr;
p = copy_process(clone_flags, stack_start, regs, stack_size,
child_tidptr, NULL, trace);
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
*/
if (!IS_ERR(p)) {
nr = task_pid_vnr(p);
wake_up_new_task(p);
}
return nr;
}
copy_process
/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
*
* It copies the registers, and all the appropriate
* parts of the process environment (as per the clone
* flags). The actual kick-off is left to the caller.
*/
static struct task_struct *copy_process(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
int __user *child_tidptr,
struct pid *pid,
int trace)
{/*分配了相关结构体的memory;并用原来的赋值*/
struct task_struct *p;
p = dup_task_struct(current);
----
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p);
retval = perf_event_init_task(p);
if (retval)
goto bad_fork_cleanup_policy;
retval = audit_alloc(p);
if (retval)
goto bad_fork_cleanup_policy;
/* copy all the process information */
retval = copy_semundo(clone_flags, p);
if (retval)
goto bad_fork_cleanup_audit;
retval = copy_files(clone_flags, p);
if (retval)
goto bad_fork_cleanup_semundo;
retval = copy_fs(clone_flags, p);
if (retval)
goto bad_fork_cleanup_files;
retval = copy_sighand(clone_flags, p);
if (retval)
goto bad_fork_cleanup_fs;
retval = copy_signal(clone_flags, p);
if (retval)
goto bad_fork_cleanup_sighand;
retval = copy_mm(clone_flags, p);
if (retval)
goto bad_fork_cleanup_signal;
retval = copy_namespaces(clone_flags, p);
if (retval)
goto bad_fork_cleanup_mm;
retval = copy_io(clone_flags, p);
if (retval)
goto bad_fork_cleanup_namespaces;
retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
}
static struct task_struct *dup_task_struct(struct task_struct *orig)
{
struct task_struct *tsk;
struct thread_info *ti;
unsigned long *stackend;
int node = tsk_fork_get_node(orig);
int err;
/*分配了memory for task_struct and thread_info*/
tsk = alloc_task_struct_node(node);
if (!tsk)
return NULL;
ti = alloc_thread_info_node(tsk, node);
if (!ti) {
free_task_struct(tsk);
return NULL;
}
/*
int arch_dup_task_struct(struct task_struct *dst,
struct task_struct *src)
{
*dst = *src;
return 0;
}
*/
err = arch_dup_task_struct(tsk, orig);
if (err)
goto out;
tsk->stack = ti;
setup_thread_stack(tsk, orig);
clear_user_return_notifier(tsk);
clear_tsk_need_resched(tsk);
stackend = end_of_stack(tsk);
*stackend = STACK_END_MAGIC; /* for overflow detection */
/*
* One for us, one for whoever does the "release_task()" (usually
* parent)
*/
atomic_set(&tsk->usage, 2);
tsk->splice_pipe = NULL;
account_kernel_stack(ti, 1);
return tsk;
out:
free_thread_info(ti);
free_task_struct(tsk);
return NULL;
}
How to check the kernel stack correctivity
static inline unsigned long *end_of_stack(struct task_struct *p)
{
return (unsigned long *)(task_thread_info(p) + 1);
}
#define STACK_END_MAGIC 0x57AC6E9D
COMMAND: "dwc_otg"
TASK: ee1a3420 [THREAD_INFO: ee1c6000]
CPU: 0
STATE: TASK_INTERRUPTIBLE
crash> thread_info ee1c6000
struct thread_info {
flags = 0,
preempt_count = 1,
addr_limit = 0,
task = 0xee1a3420,
crash> struct task_struct.stack 0xee1a3420
stack = 0xee1c6000
crash> bt -r
PID: 760 TASK: ee1a3420 CPU: 0 COMMAND: "dwc_otg"
ee1c6000: 00000000 00000001 00000000 ee1a3420
ee1c6010: default_exec_domain 00000000 00000015 ee1a3420
ee1c6020: c0f88420 init_task ee1c6000 00000000
ee1c6030: 00000001 init_mm ee1c7f5c ee1c7f18
ee1c6040: __schedule+1412 00000000 00000000 00000000
ee1c6050: 00000000 00000000 00000000 00000000
ee1c6060: 00000000 00000000 00000000 00000000
ee1c6070: 00000000 00000000 00000000 00000000
ee1c6080: 00000000 00000000 00000000 00000000
ee1c6090: 00000000 00000000 00000000 00000000
ee1c60a0: 00000000 00000000 00000000 00000000
ee1c60b0: 00000000 00000000 00000000 00000000
ee1c60c0: 00000000 00000000 00000000 00000000
ee1c60d0: 00000000 00000000 00000000 00000000
ee1c60e0: 00000000 00000000 00000000 00000000
ee1c60f0: 00000000 00000000 00000000 00000000
ee1c6100: 00000000 00000000 00000000 00000000
ee1c6110: 00000000 00000000 00000000 00000000
ee1c6120: 00000000 00000000 00000000 00000000
ee1c6130: 00000000 00000000 00000000 00000000
ee1c6140: 00000000 00000000 00000000 00000000
ee1c6150: 00000000 00000000 00000000 00000000
ee1c6160: 00000000 00000000 00000000 00000000
ee1c6170: 00000000 00000000 00000000 00000000
ee1c6180: 00000000 00000000 00000000 00000000
ee1c6190: 00000000 00000000 00000000 00000000
ee1c61a0: 00000000 00000000 00000000 00000000
ee1c61b0: 00000000 00000000 00000000 00000000
ee1c61c0: 00000000 00000000 00000000 00000000
ee1c61d0: 00000000 00000000 00000000 00000000
ee1c61e0: 00000000 00000000 00000000 00000000
ee1c61f0: 00000000 00000000 00000000 00000000
ee1c6200: 00000000 00000000 00000000 00000000
ee1c6210: 00000000 00000000 00000000 00000000
ee1c6220: 00000000 00000000 00000000 00000000
ee1c6230: 00000000 00000000 00000000 00000000
ee1c6240: 00000000 00000000 00000000 00000000
ee1c6250: 00000000 00000000 00000000 00000000
ee1c6260: 00000000 00000000 00000000 00000000
ee1c6270: 00000000 00000000 00000000 00000000
ee1c6280: 00000000 00000000 00000000 00000000
ee1c6290: 00000000 00000000 00000000 00000000
ee1c62a0: 00000000 00000000 00000000 00000000
ee1c62b0: 00000000 00000000 00000000 00000000
ee1c62c0: 00000000 00000000 do_no_restart_syscall 00000000
ee1c62d0: 00000000 00000000 00000000 00000000
ee1c62e0: 00000000 00000000 00000000 00000000
ee1c62f0: 57ac6e9d/*STACK_END_MAGIC*/
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
How to set the new process entry
int
copy_thread(unsigned long clone_flags, unsigned long stack_start,
unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs)
{
struct thread_info *thread = task_thread_info(p);
struct pt_regs *childregs = task_pt_regs(p);
*childregs = *regs;
childregs->ARM_r0 = 0;
childregs->ARM_sp = stack_start;
memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
thread->cpu_context.sp = (unsigned long)childregs;
thread->cpu_context.pc = (unsigned long)ret_from_fork;
clear_ptrace_hw_breakpoint(p);
if (clone_flags & CLONE_SETTLS)
thread->tp_value = regs->ARM_r3;
thread_notify(THREAD_NOTIFY_COPY, thread);
return 0;
}
/*8K内核栈的最后是 struct pt_regs
*对它进行赋值:返回到用户空间后使用的栈,返回地址
**/
#define task_pt_regs(p) \
((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
/*
* low level task data that entry.S needs immediate access to.
* __switch_to() assumes cpu_context follows immediately after cpu_domain.
*/
crash> struct thread_info -o
struct thread_info {
[0] unsigned long flags;
[4] int preempt_count;
[8] mm_segment_t addr_limit;
[12] struct task_struct *task;
[16] struct exec_domain *exec_domain;
[20] __u32 cpu;
[24] __u32 cpu_domain;
[28] struct cpu_context_save cpu_context;
[76] __u32 syscall;
[80] __u8 used_cp[16];
[96] unsigned long tp_value;
[100] struct crunch_state crunchstate;
[288] union fp_state fpstate;
[432] union vfp_state vfpstate;
[712] struct restart_block restart_block;
}
new process entry point
/*
* This is how we return from a fork.
*/
ENTRY(ret_from_fork)
bl schedule_tail
get_thread_info tsk
ldr r1, [tsk, #TI_FLAGS] @ check for syscall tracing
mov why, #1
tst r1, #_TIF_SYSCALL_WORK @ are we tracing syscalls?
beq ret_slow_syscall
mov r1, sp
mov r0, #1 @ trace exit [IP = 1]
bl syscall_trace
b ret_slow_syscall
ENDPROC(ret_from_fork)
sys_execve
/**************************************************************/
arch/arm/kernel/sys_arm.c
/* sys_execve() executes a new program.
* This is called indirectly via a small wrapper
*/
asmlinkage int sys_execve(const char __user *filenamei,
const char __user *const __user *argv,
const char __user *const __user *envp, struct pt_regs *regs)
{
int error;
char * filename;
filename = getname(filenamei);
error = PTR_ERR(filename);
if (IS_ERR(filename))
goto out;
error = do_execve(filename, argv, envp, regs);
putname(filename);
out:
return error;
}
int do_execve(const char *filename,
const char __user *const __user *__argv,
const char __user *const __user *__envp,
struct pt_regs *regs)
{
struct user_arg_ptr argv = { .ptr.native = __argv };
struct user_arg_ptr envp = { .ptr.native = __envp };
return do_execve_common(filename, argv, envp, regs);
}
/**************************************************************/
/*
* sys_execve() executes a new program.
*/
static int do_execve_common(const char *filename,
struct user_arg_ptr argv,
struct user_arg_ptr envp,
struct pt_regs *regs)
{
struct linux_binprm *bprm;
struct file *file;
bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
file = open_exec(filename);
sched_exec();
bprm->file = file;
bprm->filename = filename;
bprm->interp = filename;
bprm_mm_init(bprm);
bprm->argc = count(argv, MAX_ARG_STRINGS);
bprm->envc = count(envp, MAX_ARG_STRINGS);
prepare_binprm(bprm);
search_binary_handler(bprm,regs);
}
/*
* Create a new mm_struct and populate it with a temporary stack
* vm_area_struct. We don't have enough context at this point to set the stack
* flags, permissions, and offset, so we use temporary values. We'll update
* them later in setup_arg_pages().
*/
int bprm_mm_init(struct linux_binprm *bprm)
{
int err;
struct mm_struct *mm = NULL;
/*mm_struct*/
bprm->mm = mm = mm_alloc();
/*vma_struct*/
err = __bprm_mm_init(bprm);
return 0;
}
/*
* cycle the list of binary formats handler, until one recognizes the image
*/
int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
{
struct linux_binfmt *fmt;
list_for_each_entry(fmt, &formats, lh)
int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
fn(bprm, regs);
}
对elf 格式文件而言
fs/binfmt_elf.c
static struct linux_binfmt elf_format = {
.module = THIS_MODULE,
.load_binary = load_elf_binary,
.load_shlib = load_elf_library,
.core_dump = elf_core_dump,
.min_coredump = ELF_EXEC_PAGESIZE,
};
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
{
----
kernel_read();
start_thread(regs, elf_entry, bprm->p);
}
#define start_thread(regs,pc,sp) \
({ \
unsigned long *stack = (unsigned long *)sp; \
memset(regs->uregs, 0, sizeof(regs->uregs)); \
if (current->personality & ADDR_LIMIT_32BIT) \
regs->ARM_cpsr = USR_MODE; \
else \
regs->ARM_cpsr = USR26_MODE; \
if (elf_hwcap & HWCAP_THUMB && pc & 1) \
regs->ARM_cpsr |= PSR_T_BIT; \
regs->ARM_cpsr |= PSR_ENDSTATE; \
regs->ARM_pc = pc & ~1; /* pc */ \
regs->ARM_sp = sp; /* sp */ \
regs->ARM_r2 = stack[2]; /* r2 (envp) */ \
regs->ARM_r1 = stack[1]; /* r1 (argv) */ \
regs->ARM_r0 = stack[0]; /* r0 (argc) */ \
})
总结:当运行execve时已经运行新创建的进程,不是说在old进程中加载后,再运行新进程的。