linux各种模式下io路径跟踪

原创
2019/06/13 21:45
阅读数 1.6K

       接上一篇:https://my.oschina.net/fileoptions/blog/3061997 ,linux的io路径比较复杂,通常我们在阅读内核源码的时候,有时候也很难完整的跟踪整个路径。其实,我们可以使用工具跟踪代码的io路径,看一下一次完成的open、write、read都调用了哪些内核函数,这里我使用ftrace工具。假设要跟踪的代码编译成iotrace ,则可以使用trace-cmd 对iotrace的io路径进行跟踪(命令如下)。 

 sudo trace-cmd record -p function -F ./iotrace

      trace-cmd 会启动iotrace程序,然后开始跟踪它的内核函数调用(并不是所有内核函数都会被跟踪到,只有ftrace支持的才可以),   由于代码很少,因此这里trace会很快结束,然后使用trace-cmd report命令将产生的trace文件格式化为人类可读的文本,就可以清晰的看到io的调用路径和层次关系。    

     由于我不想在虚拟机Linux上做这些测试(总感觉虚拟机会带来很多没必要的虚拟机相关的代码调用,增加复杂性),而我又没有运行Linux的物理机,怎么办呢?幸好在我玩硬件的时候入手了一块树莓派3B+,上面运行着完整的linux系统,而且我还把内核升级到5.x版本,更重要的是,在树莓派上可以尽情的编写、加载内核模块和修改内核,而不用担心把内核搞core dump,大不了快速重启或者重新烧写内核。

 

       iotrace的源码如下,测试了7中io场景,主要为了测试普通模式、带O_DIRECT、O_SYNC标志、mmap模式、fsync和msync的调用路径等。

#define _GNU_SOURCE
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>

#define BUF_SIZE 1024

int main(void)
{
    int fd,size,len;
    int ret = 0;

    /*1. 普通io模式 */
    char * normal_write_buf = "normal io";
    len = strlen(normal_write_buf);
    char normal_read_buf[100] = {0};

    /* mark start */
    pid_t pid  = getpid();

    if ((fd = open("hello.c", O_CREAT | O_TRUNC | O_RDWR,0666 ))<0) {
        perror("open:");
        exit(1);
    }  

    /* 第一次wrirte,此时文件应该还不在page cache */
    if ((size = write( fd, normal_write_buf, len)) < 0){
        perror("write:");
        exit(1);
    }  

    /* 第二次wrirte,此时文件应该在page cache,因此路径会比上一次端 */
    if ((size = write( fd, normal_write_buf, len)) < 0){
        perror("write:");
        exit(1);
    }  
  
    lseek(fd, 0, SEEK_SET );
    
    if ((size = read( fd, normal_read_buf, len))<0) {
        perror("read:");
        exit(1);
    }  

    if(strncmp(normal_read_buf,normal_write_buf,len) != 0){
       perror("strncmp:");
       exit(1); 
    }

    /* 强制刷盘 */
    if(fsync(fd) < 0){
        perror("fysnc:");
        exit(1);
    }

    if ( close(fd) < 0 )    {
        perror("close:");
        exit(1);
    }  

    /*2. direct io模式*/
    
    char *direct_write_buf,*direct_read_buf;
    ret = posix_memalign((void **)&direct_write_buf, 512, BUF_SIZE);
    if (ret) {
        perror("posix_memalign:");
        exit(1);
    }

    ret = posix_memalign((void **)&direct_read_buf, 512, BUF_SIZE);
    if (ret) {
        perror("posix_memalign:");
        exit(1);
    }

    strcpy(direct_write_buf,"direct mode");
    len = strlen("direct mode");

    /* mark start */
    pid  = getpid();

    if ((fd = open("hello.c", O_CREAT | O_TRUNC | O_RDWR | O_DIRECT,0666 ))<0) {
        perror("open:");
        exit(1);
    }  

    if ((size = write(fd, direct_write_buf, BUF_SIZE)) < 0){
        perror("write:");
        exit(1);
    }  

    /* direct模式只是说不经过page cache,但是数据要想持久化还必须用fsync */
    if(fsync(fd) < 0){
        perror("fysnc:");
        exit(1);
    }

    lseek(fd, 0, SEEK_SET );
    
    if ((size = read( fd, direct_read_buf, BUF_SIZE))<0) {
        perror("read:");
        exit(1);
    }  

    if(strncmp(direct_read_buf,direct_write_buf,len) != 0){
       perror("strncmp:");
       exit(1); 
    }

    if ( close(fd) < 0 )    {
        perror("close:");
        exit(1);
    }  

    /*3. 使用O_SYNC标志 */

    char * o_sync_write_buf = "o_sync io";
    len = strlen(o_sync_write_buf);
    char o_sync_read_buf[100] = {0};

    pid  = getpid();

    if ((fd = open("hello.c", O_CREAT | O_TRUNC | O_RDWR | O_SYNC ,0666 ))<0) {
        perror("open:");
        exit(1);
    }  

    if ((size = write( fd, o_sync_write_buf, len)) < 0){
        perror("write:");
        exit(1);
    }  

    if ((size = write( fd, o_sync_write_buf, len)) < 0){
        perror("write:");
        exit(1);
    }  

    /* 使用了O_SYNC模式后再调用fsync会是什么行为 */
    if(fsync(fd) < 0){
        perror("fysnc:");
        exit(1);
    }

    lseek(fd, 0, SEEK_SET );
    
    if ((size = read( fd, o_sync_read_buf, len))<0) {
        perror("read:");
        exit(1);
    }  

    if(strncmp(o_sync_read_buf,o_sync_write_buf,len) != 0){
       perror("strncmp:");
       exit(1); 
    }

    if ( close(fd) < 0 )    {
        perror("close:");
        exit(1);
    }  

    /*4. 使用__O_DIRECT 和 O_SYNC 组合 */

    char *o_sync_direct_write_buf,*o_sync_direct_read_buf;
    ret = posix_memalign((void **)&o_sync_direct_write_buf, 512, BUF_SIZE);
    if (ret) {
        perror("posix_memalign:");
        exit(1);
    }

    ret = posix_memalign((void **)&o_sync_direct_read_buf, 512, BUF_SIZE);
    if (ret) {
        perror("posix_memalign:");
        exit(1);
    }

    strcpy(direct_write_buf,"o_sync o_direct mode");
    len = strlen("o_sync o_direct mode");

    pid  = getpid();

    if ((fd = open("hello.c", O_CREAT | O_TRUNC | O_RDWR | O_DIRECT | O_SYNC ,0666 ))<0) {
        perror("open:");
        exit(1);
    }  

    if ((size = write( fd, o_sync_direct_write_buf, BUF_SIZE)) < 0){
        perror("write:");
        exit(1);
    }  

    if(fsync(fd) < 0){
        perror("fysnc:");
        exit(1);
    }

    lseek(fd, 0, SEEK_SET );
    
    if ((size = read( fd, o_sync_direct_read_buf, BUF_SIZE))<0) {
        perror("read:");
        exit(1);
    }  

    if(strncmp(o_sync_direct_read_buf,o_sync_direct_write_buf,len) != 0){
       perror("strncmp:");
       exit(1); 
    }

    if ( close(fd) < 0 )    {
        perror("close:");
        exit(1);
    }  

    /*5. mmap io use msync */

    char * mmap_write_buf = "mmap io";
    char mmap_read_buf[100];
    len = strlen(mmap_write_buf);
    
    pid  = getpid();
    char * file_map = NULL;

    if ((fd = open("hello.c", O_CREAT | O_TRUNC | O_RDWR ,0666 ))<0) {
        perror("open:");
        exit(1);
    }  

    if((file_map = mmap(0, 1024, PROT_READ | PROT_WRITE, MAP_SHARED , fd, 0)) == (void *)-1){
        perror("mmap:");
        exit(1);  
    }

    if(fallocate(fd,0,0,BUF_SIZE) < 0){
       perror("fallocate:");
       exit(1);
    }

    memcpy(file_map,mmap_write_buf,strlen(mmap_write_buf));

    /* 此处不签msync就可以read,因为都在page cache中 */
    if ((size = read( fd, mmap_read_buf, strlen(mmap_write_buf)))<0) {
        perror("read:");
        exit(1);
    }  

    if(strncmp(mmap_read_buf,mmap_write_buf,strlen(mmap_write_buf)) != 0){
        perror("strncmp:");
        exit(1);
    }
    
    /* msync和fsync区别? */
    if(msync(file_map,strlen(mmap_write_buf),MS_SYNC) < 0){
        perror("msync:");
        exit(1);
    }

    if ( close(fd) < 0 )    {
        perror("close:");
        exit(1);
    }  

    munmap(file_map,1024);

    /*6. mmap io use fsync */

    char * mmap_write_buf2 = "mmap io 2";
    char mmap_read_buf2[100];
    len = strlen(mmap_write_buf2);
    
    pid  = getpid();
    file_map = NULL;

    if ((fd = open("hello.c", O_CREAT | O_TRUNC | O_RDWR ,0666 ))<0) {
        perror("open:");
        exit(1);
    }  

    if((file_map = mmap(0, 1024, PROT_READ | PROT_WRITE, MAP_SHARED , fd, 0)) == (void *)-1){
        perror("mmap:");
        exit(1);  
    }

    if(fallocate(fd,0,0,BUF_SIZE) < 0){
       perror("fallocate:");
       exit(1);
    }

    memcpy(file_map,mmap_write_buf2,strlen(mmap_write_buf2));

    /* 此处不签msync就可以read,因为都在page cache中 */
    if ((size = read( fd, mmap_read_buf2, strlen(mmap_write_buf2)))<0) {
        perror("read:");
        exit(1);
    }  

    if(strncmp(mmap_read_buf2,mmap_write_buf2,strlen(mmap_write_buf)) != 0){
        perror("strncmp:");
        exit(1);
    }
    
    /* msync和fsync区别? */
    if(fsync(fd) < 0){
        perror("fsync:");
        exit(1);
    }

    if ( close(fd) < 0 )    {
        perror("close:");
        exit(1);
    }  

    munmap(file_map,1024);

    /*7. mmap io with O_DIRECT */
    char * mmap_direct_write_buf = "mmap direct mode";
    char * mmap_direct_read_buf;

    len = strlen(mmap_direct_write_buf);

    ret = posix_memalign((void **)&mmap_direct_read_buf, 512, BUF_SIZE);
    if (ret) {
        perror("posix_memalign:");
        exit(1);
    }

    pid  = getpid();
    file_map = NULL;

    if ((fd = open("hello.c", O_CREAT | O_TRUNC | O_RDWR | O_DIRECT,0666 ))<0) {
        perror("open:");
        exit(1);
    }  

    if((file_map = mmap(0, 1024, PROT_READ | PROT_WRITE, MAP_SHARED , fd, 0)) == (void *)-1){
        perror("mmap:");
        exit(1);  
    }

    if(fallocate(fd,0,0,BUF_SIZE) < 0){
       perror("fallocate:");
       exit(1);
    }

    memcpy(file_map,mmap_direct_write_buf,len);

    /* 由于使用了O_DIRECT,如果之前不msync,那么此时read会绕过page cahce,能读到正确数据? */
    if ((size = read( fd, mmap_direct_read_buf, BUF_SIZE))<0) {
        perror("read:");
        exit(1);
    }  

    if(strncmp(mmap_direct_read_buf,mmap_direct_write_buf,len) != 0){
        perror("strncmp:");
        exit(1);
    }

    /* msync和fsync区别? */
    if(msync(file_map,strlen(mmap_write_buf),MS_SYNC) < 0){
        perror("msync:");
        exit(1);
    }

    if ( close(fd) < 0 )    {
        perror("close:");
        exit(1);
    }  

    munmap(file_map,1024);

    return 0;
}

 

1.普通io模式

  •  open

        https://github.com/chenyang8094/iotrace/blob/master/1.open

  • 第一次write

       https://github.com/chenyang8094/iotrace/blob/master/1.write1

  • 第二次write

       https://github.com/chenyang8094/iotrace/blob/master/1.write2

     可以通过http://tool.chinaz.com/tools/diff/ 这个比较第一次和第二次write路径的区别,如下图所示,左边是第一次write,右边是第二次write。由于第一次write的时候,文件没有在page cache中,因此调用路径会比第二次长,会有读磁盘数据并添加page cache的动作。

 

 

  • read

       https://github.com/chenyang8094/iotrace/blob/master/1.read

  • fsync

       https://github.com/chenyang8094/iotrace/blob/master/1.fsync

2.O_DIRECT模式

  •  open

         https://github.com/chenyang8094/iotrace/blob/master/2.open

        使用O_DIRECT打开之后,和上一中open的差异可以看下图:

  • write

        https://github.com/chenyang8094/iotrace/blob/master/2.write        

  • read

        https://github.com/chenyang8094/iotrace/blob/master/2.read      

  • fsync

        https://github.com/chenyang8094/iotrace/blob/master/2.fsync   

3.O_SYNC模式

  •  open

         https://github.com/chenyang8094/iotrace/blob/master/3.open

  • 第一次write

        https://github.com/chenyang8094/iotrace/blob/master/3.write1

  • 第二次write

        https://github.com/chenyang8094/iotrace/blob/master/3.write2

  • read

        https://github.com/chenyang8094/iotrace/blob/master/3.read      

  • fsync

        https://github.com/chenyang8094/iotrace/blob/master/3.fsync      

4.O_DIRECT+O_SYNC模式

  •  open

         https://github.com/chenyang8094/iotrace/blob/master/4.open

  • write

        https://github.com/chenyang8094/iotrace/blob/master/4.write

  • fsync

        https://github.com/chenyang8094/iotrace/blob/master/4.sync

  • read

        https://github.com/chenyang8094/iotrace/blob/master/4.read

5.mmap使用msync刷新

  •  open

        https://github.com/chenyang8094/iotrace/blob/master/5.open         

  •  mmap

        https://github.com/chenyang8094/iotrace/blob/master/5.mmap         

  • read

        https://github.com/chenyang8094/iotrace/blob/master/5.read      

  • msync

        https://github.com/chenyang8094/iotrace/blob/master/5.msync

6.mmap使用fsync刷新

  •  open

         https://github.com/chenyang8094/iotrace/blob/master/6.open   

  • read

         https://github.com/chenyang8094/iotrace/blob/master/6.read

  • fsync

         https://github.com/chenyang8094/iotrace/blob/master/6.fsync      

7.mmap使用O_DIRECT方式open

  •  open

         https://github.com/chenyang8094/iotrace/blob/master/7.open

  • read

         https://github.com/chenyang8094/iotrace/blob/master/7.read

  • msync

         https://github.com/chenyang8094/iotrace/blob/master/7.msync

      

      

 

展开阅读全文
加载中
点击引领话题📣 发布并加入讨论🔥
0 评论
7 收藏
1
分享
返回顶部
顶部