1. 程式人生 > >Linux pipe 源代碼分析

Linux pipe 源代碼分析

unlock 更新 內核 block opera private tracking 引用 LV

Linux pipe 源代碼分析


? ? ?管道pipe作為Unix中歷史最悠久的IPC機制,存在各個版本號的Unix中,主要用於父子進程之間的通信(使用fork,從而子進程會獲得父進程的打開文件表)。pipe()系統調用底層的實現就相當於一個特殊的文件系統,每次調用的時候創建一個inode關聯著兩個file。一個用於讀,一個用於寫。從而實現數據的單向流動。



用戶層API:

 #include <unistd.h>

       int pipe(int pipefd[2]);

       #define _GNU_SOURCE             /* See feature_test_macros(7) */
       #include <unistd.h>

       int pipe2(int pipefd[2], int flags);


內核源代碼路徑例如以下:
// sys_pipe(.......)
SYSCALL_DEFINE1(pipe, int __user *, fildes)
{
     return sys_pipe2(fildes, 0);
}

SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
{
     struct file *files[2];
     int fd[2];
     int error;
     // 核心是do_pipe
     error = __do_pipe_flags(fd, files, flags);
     if (!error) {
          // 一切準備就緒後 把剛才和管道關聯的2個fd復制到用戶空間
          if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
               fput(files[0]);
               fput(files[1]);
               put_unused_fd(fd[0]);
               put_unused_fd(fd[1]);
               error = -EFAULT;
          } else {
               // 把fd和file的映射關系更新到該進程的文件描寫敘述表中fdtable
               fd_install(fd[0], files[0]);
               fd_install(fd[1], files[1]);
          }
     }
     return error;
}

static int __do_pipe_flags(int *fd, struct file **files, int flags)
{
     int error;
     int fdw, fdr;

     if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
          return -EINVAL;
     // 為該管道創建倆struct file
     error = create_pipe_files(files, flags);
     if (error)
          return error;
     // 獲得兩個能用的文件描寫敘述符
     error = get_unused_fd_flags(flags);
     if (error < 0)
          goto err_read_pipe;
     fdr = error;

     error = get_unused_fd_flags(flags);
     if (error < 0)
          goto err_fdr;
     fdw = error;

     audit_fd_pair(fdr, fdw);
     fd[0] = fdr;
     fd[1] = fdw;
     return 0;

err_fdr:
     put_unused_fd(fdr);
err_read_pipe:
     fput(files[0]);
     fput(files[1]);
     return error;
}


/*
* 為管道創建兩個file實例
*/
int create_pipe_files(struct file **res, int flags)
{
     int err;
     // 為pipe創建一個inode並做一定的初始化
     struct inode *inode = get_pipe_inode();
     struct file *f;
     struct path path;
     static struct qstr name = { .name = "" }; // quick string ??

     if (!inode)
          return -ENFILE;

     err = -ENOMEM;
     // 分配一個directory entry
     path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
     if (!path.dentry)
          goto err_inode;
     path.mnt = mntget(pipe_mnt);  // 引用計數加1

     d_instantiate(path.dentry, inode);

     err = -ENFILE;
     f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);
     if (IS_ERR(f))
          goto err_dentry;

     f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
     f->private_data = inode->i_pipe;
     // 所以你會明確 fd[0]是讀 fd[1]是寫
     res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
     if (IS_ERR(res[0]))
          goto err_file;

     path_get(&path);
     res[0]->private_data = inode->i_pipe;
     res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
     res[1] = f;
     return 0;

err_file:
     put_filp(f);
err_dentry:
     free_pipe_info(inode->i_pipe);
     path_put(&path);
     return err;

err_inode:
     free_pipe_info(inode->i_pipe);
     iput(inode);
     return err;
}


static struct inode * get_pipe_inode(void)
{
     struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
     struct pipe_inode_info *pipe;

     if (!inode)
          goto fail_inode;
     // 分配一個inode號
     inode->i_ino = get_next_ino();
     // 分配一個pipe的內核級對象
     pipe = alloc_pipe_info();
     if (!pipe)
          goto fail_iput;

     inode->i_pipe = pipe;
     pipe->files = 2;
     pipe->readers = pipe->writers = 1;
     inode->i_fop = &pipefifo_fops;

     /*
     * Mark the inode dirty from the very beginning,
     * that way it will never be moved to the dirty
     * list because "mark_inode_dirty()" will think
     * that it already _is_ on the dirty list.
     */
     inode->i_state = I_DIRTY;
     inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
     inode->i_uid = current_fsuid();
     inode->i_gid = current_fsgid();
     inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;

     return inode;

fail_iput:
     iput(inode);

fail_inode:
     return NULL;
}


// 針對pipe的文件操作實例
const struct file_operations pipefifo_fops = {
     .open          = fifo_open,
     .llseek          = no_llseek,
     .read          = new_sync_read,
     .read_iter     = pipe_read,
     .write          = new_sync_write,
     .write_iter     = pipe_write,
     .poll          = pipe_poll,
     .unlocked_ioctl     = pipe_ioctl,
     .release     = pipe_release,
     .fasync          = pipe_fasync,
};



總體的邏輯圖能夠這樣: 技術分享圖片

TODO:詳細讀寫的實現細節new_sync_read/write()有待分析。

參考: (1)Linux kernel 3.18 source code? (2)Linux man page (3)Linux內核源代碼情景分析



Linux pipe 源代碼分析