今天想看下dd命令启动后调用的系统调用。于是就用strace了一把,考虑到dd命令是需要带参数的,所有执行一个字节的写,写10次,所在文件系统为xfs, 执行如下。

  如下,注释直接加下输出的内容中了:

# strace dd if=/dev/zero of=test.log bs=1 count=10

execve("/usr/bin/dd", ["dd", "if=/dev/zero", "of=test.log", "bs=1", "count=10"], [/* 23 vars */]) = 0 //调用execve,开始执行dd

brk(0)                                  = 0xf34000 //通过brk来设置段大小,因为参数为0,所以用来取得当前结束地址。

mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fd90780f000//调用mmap,来映射内存,第一个参数为NULL,所以由内核来选择要映射的地址,长度为4096即4K一页,可读写,私有,匿名映射不涉及文件和偏移。

access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)//访问/etc/ld.so.preload文件,是环境变量,没有找到

open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3//打开/etc/ld.so.cache,包含了各种动态文件,返回句柄为3.

fstat(3, {st_mode=S_IFREG|0644, st_size=88433, ...}) = 0//查看该文件的状态

mmap(NULL, 88433, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fd9077f9000//通过mmap映射,应该用于读取

close(3)                                = 0//完事后,就关闭/etc/ld.so.cache文件。

open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3//打开c库

read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@\34\2\0\0\0\0\0"..., 832) = 832//读取c库内容

fstat(3, {st_mode=S_IFREG|0755, st_size=2118128, ...}) = 0//查看该文件状态

mmap(NULL, 3932672, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fd907230000//映射该文件到内存,这个应该是代码段

mprotect(0x7fd9073e6000, 2097152, PROT_NONE) = 0//设置内存区域的保护,这个区域应该是映射的c 库文件中的一部分

mmap(0x7fd9075e6000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b6000) = 0x7fd9075e6000//映射到内存,这个应该是数据或栈

mmap(0x7fd9075ec000, 16896, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fd9075ec000//又是一个匿名映射

close(3)                                = 0//映射完毕后,就可以关闭句柄了

mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fd9077f8000//又是一个匿名映射

mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fd9077f6000//又是一个匿名映射

arch_prctl(ARCH_SET_FS, 0x7fd9077f6740) = 0

mprotect(0x7fd9075e6000, 16384, PROT_READ) = 0//保护映射的c库代码段内存。

mprotect(0x610000, 4096, PROT_READ)     = 0

mprotect(0x7fd907810000, 4096, PROT_READ) = 0

munmap(0x7fd9077f9000, 88433)           = 0//取消/etc/ld.so.cache文件映射

rt_sigaction(SIGUSR1, NULL, {SIG_DFL, [], 0}, 8) = 0//设置进程的信号处理方式

rt_sigaction(SIGINT, NULL, {SIG_DFL, [], 0}, 8) = 0

rt_sigaction(SIGUSR1, {0x403cd0, [INT USR1], SA_RESTORER, 0x7fd907265250}, NULL, 8) = 0

rt_sigaction(SIGINT, {0x403cc0, [INT USR1], SA_RESTORER|SA_NODEFER|SA_RESETHAND, 0x7fd907265250}, NULL, 8) = 0

brk(0)                                  = 0xf34000//获取进程当前结束地址

brk(0xf55000)                           = 0xf55000//设置当前结束地址

brk(0)                                  = 0xf55000//获取进程当前结束地址

open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3//打开文件locale-archive,这个应该是系统安装的locals.

fstat(3, {st_mode=S_IFREG|0644, st_size=106070960, ...}) = 0//查看locale-archive状态

mmap(NULL, 106070960, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fd900d07000//映射locale-archive到内存

close(3)                                = 0//关闭locale-archive

open("/dev/zero", O_RDONLY)             = 3//打开/dev/zero,这个才是我们去数据的源

dup2(3, 0)                              = 0//复制该文件句柄为0

close(3)                                = 0//关闭/dev/zero

lseek(0, 0, SEEK_CUR)                   = 0//寻找光标

open("test.log", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 3//打开test.log,我们的目的文件

dup2(3, 1)                              = 1//复制test.log的句柄为1

close(3)                                = 0//关闭test.log

read(0, "\0", 1)                        = 1//调用系统调用read,从/dev/zero中读一个字节的0

write(1, "\0", 1)                       = 1//调用系统调用write,写一个字节的0到test.log

read(0, "\0", 1)                        = 1

write(1, "\0", 1)                       = 1

read(0, "\0", 1)                        = 1

write(1, "\0", 1)                       = 1

read(0, "\0", 1)                        = 1

write(1, "\0", 1)                       = 1

read(0, "\0", 1)                        = 1

write(1, "\0", 1)                       = 1

read(0, "\0", 1)                        = 1

write(1, "\0", 1)                       = 1

read(0, "\0", 1)                        = 1

write(1, "\0", 1)                       = 1

read(0, "\0", 1)                        = 1

write(1, "\0", 1)                       = 1

read(0, "\0", 1)                        = 1

write(1, "\0", 1)                       = 1

read(0, "\0", 1)                        = 1

write(1, "\0", 1)                       = 1

close(0)                                = 0//关闭/dev/zero复制出来的句柄

close(1)                                = 0//关闭test.log复制出来的句柄,后面部分不详述了。

open("/usr/share/locale/locale.alias", O_RDONLY|O_CLOEXEC) = 0

fstat(0, {st_mode=S_IFREG|0644, st_size=2502, ...}) = 0

mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fd90780e000

read(0, "# Locale name alias data base.\n#"..., 4096) = 2502

read(0, "", 4096)                       = 0

close(0)                                = 0

munmap(0x7fd90780e000, 4096)            = 0

open("/usr/share/locale/zh_CN.UTF-8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory)

open("/usr/share/locale/zh_CN.utf8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory)

open("/usr/share/locale/zh_CN/LC_MESSAGES/coreutils.mo", O_RDONLY) = 0

fstat(0, {st_mode=S_IFREG|0644, st_size=190751, ...}) = 0

mmap(NULL, 190751, PROT_READ, MAP_PRIVATE, 0, 0) = 0x7fd9077c7000

close(0)                                = 0

open("/usr/lib64/gconv/gconv-modules.cache", O_RDONLY) = 0

fstat(0, {st_mode=S_IFREG|0644, st_size=26254, ...}) = 0

mmap(NULL, 26254, PROT_READ, MAP_SHARED, 0, 0) = 0x7fd907808000

close(0)                                = 0

write(2, "\350\256\260\345\275\225\344\272\20610+0 \347\232\204\350\257\273\345\205\245\n\350\256\260\345\275\225\344\272"..., 48记录了10+0 的读入

记录了10+0 的写出

) = 48

write(2, "10\345\255\227\350\212\202(10 B)\345\267\262\345\244\215\345\210\266", 2310字节(10 B)已复制) = 23

write(2, "\357\274\2140.00188268 \347\247\222\357\274\2145.3 kB/\347\247\222\n", 31,0.00188268 秒,5.3 kB/秒

) = 31

close(2)                                = 0

exit_group(0)                           = ?

+++ exited with 0 +++



            这里可以看到的是,dd命令会调用read和write系统调用.

另外在使用dd命令的过程中,进行监控发现,如果目的是那么该盘是时刻有io,如果目的为文件系统上的文件,那么io是间歇性才有的,也就是说有使用缓存,并没有绕开pagecache。从而说明,dd命令在过程的调用路径,是取决于其源和目的的。

往一个无文件系统的块文件dd时候,其块层的调用栈如下,发现都是kernel函数并无文件系统相关函数:

0xffffffff812ee1a0 : submit_bio+0x0/0x150 [kernel]

 0xffffffff812327b3 : _submit_bh+0x143/0x210 [kernel]

 0xffffffff81235392 : __block_write_full_page+0x162/0x370 [kernel]

 0xffffffff81235757 : block_write_full_page+0xd7/0xf0 [kernel]

 0xffffffff812395e8 : blkdev_writepage+0x18/0x20 [kernel]

 0xffffffff8118af83 : __writepage+0x13/0x50 [kernel]

 0xffffffff8118baa1 : write_cache_pages+0x251/0x4d0 [kernel]

 0xffffffff8118bd6d : generic_writepages+0x4d/0x80 [kernel]

 0xffffffff812395a5 : blkdev_writepages+0x35/0x40 [kernel]

 0xffffffff8118ce1e : do_writepages+0x1e/0x40 [kernel]

 0xffffffff81181a05 : __filemap_fdatawrite_range+0x65/0x80 [kernel]

 0xffffffff81181a8d : filemap_write_and_wait+0x3d/0x80 [kernel]

 0xffffffff8123a39f : __sync_blockdev+0x1f/0x40 [kernel]

 0xffffffff8123a6ec : __blkdev_put+0x5c/0x1a0 [kernel]

 0xffffffff8123b18e : blkdev_put+0x4e/0x140 [kernel]

 0xffffffff8123b335 : blkdev_close+0x25/0x30 [kernel]

 0xffffffff811ffad9 : __fput+0xe9/0x260 [kernel]

 0xffffffff811ffd8e : ____fput+0xe/0x10 [kernel]

 0xffffffff810accc7 : task_work_run+0xa7/0xe0 [kernel]

0xffffffff8102ab22 : do_notify_resume+0x92/0xb0 [kernel]



            而往一个文件系统dd时候,其块层的调用栈如下,存在文件系统的相关函数,验证了我们的猜想:

0xffffffff812ee1a0 : submit_bio+0x0/0x150 [kernel]

 0xffffffffa01e7d23 : xfs_submit_ioend_bio.isra.16+0x33/0x40 [xfs]

 0xffffffffa01e7dfa : xfs_submit_ioend+0xca/0x130 [xfs]

 0xffffffffa01e9012 : xfs_vm_writepage+0x2a2/0x5d0 [xfs]

 0xffffffff8118af83 : __writepage+0x13/0x50 [kernel]

 0xffffffff8118baa1 : write_cache_pages+0x251/0x4d0 [kernel]

 0xffffffff8118bd6d : generic_writepages+0x4d/0x80 [kernel]

 0xffffffffa01e8063 : xfs_vm_writepages+0x53/0x90 [xfs]

 0xffffffff8118ce1e : do_writepages+0x1e/0x40 [kernel]

 0xffffffff81181a05 : __filemap_fdatawrite_range+0x65/0x80 [kernel]

 0xffffffff81181aec : filemap_flush+0x1c/0x20 [kernel]

 0xffffffffa0203117 : xfs_release+0x137/0x170 [xfs]

 0xffffffffa01f3275 : xfs_file_release+0x15/0x20 [xfs]

 0xffffffff811ffad9 : __fput+0xe9/0x260 [kernel]

 0xffffffff811ffd8e : ____fput+0xe/0x10 [kernel]

 0xffffffff810accc7 : task_work_run+0xa7/0xe0 [kernel]

 0xffffffff8102ab22 : do_notify_resume+0x92/0xb0 [kernel]

 0xffffffff816962bd : int_signal+0x12/0x17 [kernel]