1. 程式人生 > >Linux VFS相關結構體

Linux VFS相關結構體

1. 概述

Linux 虛擬檔案系統是建立在具體檔案系統之上,其包括幾種主要的物件,分別是超級塊物件,目錄項物件,索引節點物件,與程序相關的檔案物件,安裝點物件,檔案系統型別物件。在VFS中,多個檔案系統可以被安裝在同一個目錄,例如/dev/sda和/dev/sdb先後被安裝在/project目錄下,則/dev/sdb會隱藏/dev/sda檔案系統。同一個檔案系統也可以安裝在不同的目錄下,此時只有一個超級塊物件。因此,超級塊物件代表了檔案系統。目錄項物件主要用於路徑查詢過程,例如,將檔案系統/dev/sda安裝在/project目錄下,首先需要找到/project/目錄的目錄項物件,另外,目錄項物件和目錄不同,每個目錄和檔案都具有目錄項物件,存在於記憶體。索引節點物件描述了檔案或目錄的元資料,包括檔案的訪問時間等,在建立索引節點的時候,將檔案的相關操作指標賦給索引節點物件i_fpos。每個開啟的檔案物件對應一個檔案描述符,常用於檔案的open操作。每個檔案系統都有對應的型別物件,如ext3,ext2,ext4和fuse檔案系統,而這個結構體儲存了相關的檔案型別。安裝點物件表示檔案系統的掛載位置以及不同檔案系統之間的關係,新掛載的檔案系統都需要加入mount tree.

2. 物件

(1) superblock物件<linux/fs.h>

struct super_block{

Type                  Field            Description

 struct list_head      s_list                            /*Pointers for superblock list所有的superblock連結串列*/

 dev_t                 s_dev                                     /*Device identifier超級塊對應的裝置型別*/
 unsigned long         s_blocksize                  /*Block size in bytes每個塊的位元組數*/
unsigned long         s_old_blocksize          /*Block size in bytes as reported by the underlying block device drive*/
unsigned char  s_blocksize_bits              /*Block size in number of bits每個塊佔多少位元*/
unsigned char         s_dirt                          /*Modified (dirty) flag超級塊為髒,需要和磁碟上的超級塊同步*/
unsigned long long    s_maxbytes             /*Maximum size of the files檔案的最大長度*/
struct file_system_type * s_type         /*Filesystem type所屬的檔案系統型別如ext3*/
struct super_operations *  s_op           /*Superblock methods超級塊的相關操作,物件包括屬性和操作*/
struct dquot_operations * dq_op          /*Disk quota handling methods*/
struct quotactl_ops * s_qcop                /*Disk quota administration methods*/
struct export_operations * s_export_op    /*Export operations used by network filesystems網路檔案系統相關操作*/
unsigned long         s_flags                        /*Mount flags掛載標誌*/
unsigned long         s_magic                        /*Filesystem magic number檔案系統魔數*/
struct dentry *       s_root                      /*Dentry object of the filesystem's root directory檔案系統的根目錄項物件*/
struct rw_semaphore   s_umount          /*Semaphore used for unmounting umount時使用*/
struct semaphore      s_lock                  /*Superblock semaphore*/
int                   s_count                             /*Reference counter計數器,一個檔案系統可能被安裝到多個目錄*/
int                  s_syncing                          /*Flag indicating that inodes of the superblock are being synchronized */
int                  s_need_sync_fs               /*Flag used when synchronizing the superblock's mounted filesystem*/                                
atomic_t             s_active                      /*Secondary reference counter*/                                     
void *               s_security                      /* Pointer to superblock security structure*/
struct xattr_handler ** s_xattr          /*Pointer to superblock extended attribute structure*/
struct list_head     s_inodes                 /*List of all inodes所有的inode節點*/
struct list_head     s_dirty                   /*List of modified inodes修改的inode節點連結串列*/
struct list_head     s_io                        /*List of inodes waiting to be written to disk等待寫到磁碟上的inode節點列表*/
struct hlist_head    s_anon                   /*List of anonymous dentries for handling remote network filesystems*/
struct list_head     s_files                   /*List of file objects檔案物件連結串列*/                   
struct block_device *s_bdev                /*Pointer to the block device driver descriptor塊裝置描述符*/
struct list_head     s_instances          /*Pointers for a list of superblock objects of a given filesystem type屬於指定檔案系統型別的連結串列指標*/
struct quota_info    s_dquot                 /*Descriptor for disk quota*/
int                  s_frozen                             /*Flag used when freezing the filesystem (forcing it to a consistent state)*/
wait_queue_head_t    s_wait_unfrozen  /*Wait queue where processes sleep until the filesystem is unforzen*/
char[]               s_id                                      /*Name of the block device containing the superblock*/
void *               s_fs_info                             /*Pointer to superblock information of a specific filesystem具體檔案系統的超級塊資訊*/
struct semaphore     s_vfs_rename_sem   /*Semaphore used by VFS when renaming files across directories*/
u32                  s_time_gran                           /*Timestamp's granularity (in nanoseconds)*/
}

/*
 * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called
 * without the big kernel lock held in all filesystems.
 */
struct super_operations {
 struct inode *(*alloc_inode)(struct super_block *sb);              /*為索引節點分配空間*/
  void (*destroy_inode)(struct inode *);                                         /*destory 索引節點物件*/
  void (*read_inode) (struct inode *);                                              /*利用i_ino索引節點號從磁碟上讀取相關的資料來填充索引節點*/
   void (*dirty_inode) (struct inode *);                                           /*當索引節點被標識為dirty呼叫,即修改過*/
   int (*write_inode) (struct inode *, int flag);                             /*更新檔案系統的索引節點,flag表示是否需要同步*/
   void (*put_inode) (struct inode *);                                               /*當索引節點被釋放時*/     
   void (*drop_inode) (struct inode *);                                            /*當索引節點即將被destory*/
   void (*delete_inode) (struct inode *);                                        /*當索引節點被destory時,刪除VFS的索引節點和磁碟上的檔案資料和元資料*/
   void (*put_super) (struct super_block *);                                /*釋放超級塊物件,因為相應的檔案系統已經umounted*/
   void (*write_super) (struct super_block *);                            /*更新檔案系統的超級塊*/
   int (*sync_fs)(struct super_block *sb, int wait);                  /*同步檔案系統*/
   void (*write_super_lockfs) (struct super_block *);
   void (*unlockfs) (struct super_block *);
   int (*statfs) (struct dentry *, struct kstatfs *buf);                  /*返回檔案的資訊放在buf*/
   int (*remount_fs) (struct super_block *, int *, char *);              /'*重新安裝檔案系統*/
   void (*clear_inode) (struct inode *);                                              /*當索引節點正在destory*/
   void (*umount_begin) (struct vfsmount *, int);                            /*終止mount操作,因為相應的umount正在開始,僅用於網路檔案系統*/
   int (*show_options)(struct seq_file *, struct vfsmount *);
   int (*show_stats)(struct seq_file *, struct vfsmount *);
#ifdef CONFIG_QUOTA
    ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
    ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
#endif

};

(2)inode節點物件,每個檔案擁有一個inode節點物件,包含了檔案的元資料資訊,在<linux/fs.h>標頭檔案

struct inode {
    struct hlist_node    i_hash;                     /*Pointers for the hash list指向hash連結串列指標*/
    struct list_head    i_list;                       /*Pointers for the list that describes the inode's current state索引節點狀態連結串列,分為沒有使用,正在使用和髒連結串列*/
    struct list_head    i_sb_list;                /*指向超級塊連結串列的指標*/
    struct list_head    i_dentry;                 /*與這個inode節點相關聯的目錄項物件*/
    unsigned long        i_ino;                         /*索引節點號*/
    atomic_t        i_count;                            /*索引節點的使用計數*/
    unsigned int        i_nlink;                      /*硬連線數,多個硬連線共享同一個索引節點,但目錄項物件不同(在不同的目錄下)*/
    uid_t            i_uid;                                  /*owner identifier*/
    gid_t            i_gid;                                 /*group identifier*/
    dev_t            i_rdev;                              /*real device identifier實際裝置標識*/
    unsigned long        i_version;             /*version number,automatically increased after each use*/
    loff_t            i_size;                           /*檔案長度*/
#ifdef __NEED_I_SIZE_ORDERED
    seqcount_t        i_size_seqcount;
#endif
    struct timespec        i_atime;           /*檔案的最後訪問時間*/
    struct timespec        i_mtime;          /*檔案最後修改時間*/
    struct timespec        i_ctime;          /*索引節點最後修改時間*/
    unsigned int        i_blkbits;            /*塊大小*/
    blkcnt_t        i_blocks;                   /*檔案的塊數量*/
    unsigned short          i_bytes;        /*檔案最後一個塊的位元組數*/
    umode_t            i_mode;                   /*檔案型別和存取許可權*/
    spinlock_t        i_lock;    /* i_blocks, i_bytes, maybe i_size */
    struct mutex        i_mutex;
    struct rw_semaphore    i_alloc_sem;
    struct inode_operations    *i_op;        /*索引節點相關操作*/
    const struct file_operations    *i_fop;    /* former ->i_op->default_file_ops 在索引節點建立時賦值,在開啟檔案時賦值給檔案物件file*/
    struct super_block    *i_sb;                       /*超級塊物件指標*/
    struct file_lock    *i_flock;
    struct address_space    *i_mapping;        /*地址空間物件指標*/
    struct address_space    i_data;               /*地址空間物件*/
#ifdef CONFIG_QUOTA
    struct dquot        *i_dquot[MAXQUOTAS];
#endif
    struct list_head    i_devices;
    union {
        struct pipe_inode_info    *i_pipe;
        struct block_device    *i_bdev;
        struct cdev        *i_cdev;
    };
    int            i_cindex;

    __u32            i_generation;

#ifdef CONFIG_DNOTIFY
    unsigned long        i_dnotify_mask; /* Directory notify events */
    struct dnotify_struct    *i_dnotify; /* for directory notifications */
#endif

#ifdef CONFIG_INOTIFY
    struct list_head    inotify_watches; /* watches on this inode */
    struct mutex        inotify_mutex;    /* protects the watches list */
#endif

    unsigned long        i_state;                 /*索引節點狀態,是否為髒*/
    unsigned long        dirtied_when;    /* jiffies of first dirtying */

    unsigned int        i_flags;            /*檔案系統安裝標誌*/

    atomic_t        i_writecount;
#ifdef CONFIG_SECURITY
    void            *i_security;
#endif
    void            *i_private; /* fs or device private pointer */
};


struct inode_operations {
    int (*create) (struct inode *,struct dentry *,int, struct nameidata *);                       /*建立一個和目錄項物件相關聯的磁碟索引節點*/
    struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);       /*在目錄項快取記憶體查詢和索引節點相關的目錄項物件*/
    int (*link) (struct dentry *old_dentry,struct inode *,struct dentry *new_dentry);  /*建立一個硬連線,關聯新的目錄項物件*/
    int (*unlink) (struct inode *,struct dentry *);                                                                    /*移除硬連線*/
    int (*symlink) (struct inode *,struct dentry *,const char *);                                           /*建立符號連結*/  
    int (*mkdir) (struct inode *,struct dentry *,int);                                                              /*建立一個新的索引節點*/
    int (*rmdir) (struct inode *,struct dentry *);
    int (*mknod) (struct inode *,struct dentry *,int,dev_t);
    int (*rename) (struct inode *, struct dentry *,
     struct inode *, struct dentry *);
    int (*readlink) (struct dentry *, char __user *,int);
    void * (*follow_link) (struct dentry *, struct nameidata *);                                       /*轉化符號連結*/
    void (*put_link) (struct dentry *, struct nameidata *, void *);
    void (*truncate) (struct inode *);
    int (*permission) (struct inode *, int, struct nameidata *);
    int (*setattr) (struct dentry *, struct iattr *);
    int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
    int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
    ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
    ssize_t (*listxattr) (struct dentry *, char *, size_t);
    int (*removexattr) (struct dentry *, const char *);
    void (*truncate_range)(struct inode *, loff_t, loff_t);
};


(3)檔案物件struct file,當程序開啟檔案時,在記憶體建立一個檔案物件,在磁碟上沒有相對應的映像,所以沒有dirty標誌,<linux/fs.h>

struct file {
    /*
     * fu_list becomes invalid after file_free is called and queued via
     * fu_rcuhead for RCU freeing
     */
    union {
        struct list_head    fu_list;                /*指向檔案物件的連結串列指標*/
        struct rcu_head     fu_rcuhead;    
    } f_u;
    struct path        f_path;
#define f_dentry    f_path.dentry              /*和檔案物件相關聯的目錄項物件*/
#define f_vfsmnt    f_path.mnt                   /*包含檔案物件的安裝點物件*/
    const struct file_operations    *f_op;       /*檔案操作表指標*/
    atomic_t        f_count;                                 /*檔案物件索引次數,一個檔案可以被多個程序同時開啟,每個程序的struct file不相同*/
    unsigned int         f_flags;                          /*開啟檔案時的指定標誌*/
    mode_t            f_mode;                                 /*開啟方式*/
    loff_t            f_pos;                                   /*當前檔案指標的位置,非常重要*/
    struct fown_struct    f_owner;
    unsigned int        f_uid, f_gid;                /*使用者ID,組ID*/
    struct file_ra_state    f_ra;                /*檔案預讀狀態*/

    unsigned long        f_version;
#ifdef CONFIG_SECURITY
    void            *f_security;
#endif
    /* needed for tty driver, and maybe others */
    void            *private_data;

#ifdef CONFIG_EPOLL
    /* Used by fs/eventpoll.c to link all the hooks to this file */
    struct list_head    f_ep_links;
    spinlock_t        f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
    struct address_space    *f_mapping;              /*地址空間指標*/
};


/*
 * NOTE:
 * read, write, poll, fsync, readv, writev, unlocked_ioctl and compat_ioctl
 * can be called without the big kernel lock held in all filesystems.
 */
struct file_operations {
    struct module *owner;
    loff_t (*llseek) (struct file *, loff_t, int);
    ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
    ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
    ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
    ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
    int (*readdir) (struct file *, void *, filldir_t);
    unsigned int (*poll) (struct file *, struct poll_table_struct *);
    int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
    long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
    long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
    int (*mmap) (struct file *, struct vm_area_struct *);
    int (*open) (struct inode *, struct file *);
    int (*flush) (struct file *, fl_owner_t id);
    int (*release) (struct inode *, struct file *);
    int (*fsync) (struct file *, struct dentry *, int datasync);
    int (*aio_fsync) (struct kiocb *, int datasync);
    int (*fasync) (int, struct file *, int);
    int (*lock) (struct file *, int, struct file_lock *);
    ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
    ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
    unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
    int (*check_flags)(int);
    int (*dir_notify)(struct file *filp, unsigned long arg);
    int (*flock) (struct file *, int, struct file_lock *);
    ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
    ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
};


(4)目錄項物件,核心為路徑的每一部分建立目錄項物件,如/tmp/test,會為/,tmp和test分別建立目錄項物件,用於定位檔案或目錄。為了加快查詢,目錄項物件通常存放在目錄項快取記憶體。目錄項物件在磁碟上沒有相對應的映像。當目錄項物件不在dentry_cache中,就需要從磁碟讀取相關資訊,在記憶體建立一個目錄項物件和相關聯的索引節點物件。在<linux/dcache.h>

struct dentry {
    atomic_t d_count;                 /*使用計數*/
    unsigned int d_flags;        /* protected by d_lock 目錄項快取標誌 */
    spinlock_t d_lock;        /* per dentry lock */
    struct inode *d_inode;        /* Where the name belongs to - NULL is
                     * negative 和檔名相關聯的inode節點*/
    /*
     * The next three fields are touched by __d_lookup.  Place them here
     * so they all fit in a cache line.
     */
    struct hlist_node d_hash;    /* lookup hash list */
    struct dentry *d_parent;    /* parent directory 父目錄項物件*/
    struct qstr d_name;           /*檔名結構體,包括檔名長度,檔名,hash值*/

    struct list_head d_lru;        /* LRU list */
    /*
     * d_child and d_rcu can share memory
     */
    union {
        struct list_head d_child;    /* child of parent list子目錄項鍊表 */
         struct rcu_head d_rcu;
    } d_u;
    struct list_head d_subdirs;    /* our children */
    struct list_head d_alias;    /* inode alias list */
    unsigned long d_time;        /* used by d_revalidate */
    struct dentry_operations *d_op;  /*目錄項操作方法*/
    struct super_block *d_sb;    /* The root of the dentry tree */
    void *d_fsdata;            /* fs-specific data */
#ifdef CONFIG_PROFILING
    struct dcookie_struct *d_cookie; /* cookie, if any */
#endif
    int d_mounted;                                /*檔案系統的安裝次數,同一目錄可以安裝不同的檔案系統*/
    unsigned char d_iname[DNAME_INLINE_LEN_MIN];    /* small names */
};

struct dentry_operations {
    int (*d_revalidate)(struct dentry *, struct nameidata *);                     /*判定目錄項是否還有效*/
    int (*d_hash) (struct dentry *, struct qstr *);                                         /*hash值*/
    int (*d_compare) (struct dentry *, struct qstr *, struct qstr *);         /*比較檔名*/
    int (*d_delete)(struct dentry *);                                                                /*d_count為0時呼叫*/
    void (*d_release)(struct dentry *);
    void (*d_iput)(struct dentry *, struct inode *);
};


(5)程序相關結構體:每個程序擁有自己的當前工作目錄和根目錄,通過fs_struct結構體維護<linux/fs_struct.h>

struct fs_struct {
    atomic_t count;  
    rwlock_t lock;
    int umask;                                                       /*設定檔案許可權*/
    struct dentry * root, * pwd, * altroot;      /*根目錄項物件,當前目錄項物件,altroot通常為空*/
    struct vfsmount * rootmnt, * pwdmnt, * altrootmnt;   /*根目錄的安裝的檔案系統安裝點物件,當前工作目錄安裝的檔案系統安裝點物件*/
};


(6)與程序關聯的files_struct物件,包括指向struct file物件的陣列指標fd,一個程序開啟的最多檔案數max_fds等,在<linux/file.h>

/*
 * Open file table structure
 */
struct files_struct {
  /*
   * read mostly part
   */
    atomic_t count;
    struct fdtable *fdt;   /*檔案表指標*/
    struct fdtable fdtab;
  /*
   * written part on a separate cache line in SMP
   */
    spinlock_t file_lock ____cacheline_aligned_in_smp;
    int next_fd;                                                              /*下一個檔案描述符*/
    struct embedded_fd_set close_on_exec_init;
    struct embedded_fd_set open_fds_init;
    struct file * fd_array[NR_OPEN_DEFAULT];      /*檔案物件陣列指標*/
};

struct fdtable {
    unsigned int max_fds;      /*當前最大的檔案描述符數*/
    struct file ** fd;      /* current fd array */
    fd_set *close_on_exec;
    fd_set *open_fds;             /*已經開啟的檔案描述符集*/
    struct rcu_head rcu;
    struct fdtable *next;
};


(7)檔案系統型別file_system_type ,每個檔案系統型別對應多個具體的檔案系統,在<linux/fs.h>

/*檔案系統型別說明 */
struct file_system_type {
    const char *name;/*檔案系統名稱*/
    int fs_flags;/*檔案系統型別標誌*/
    int (*get_sb) (struct file_system_type *, int,
    const char *, void *, struct vfsmount *);/*讀超級塊*/
    void (*kill_sb) (struct super_block *);/*移除超級塊*/
    struct module *owner;/*指向實現檔案系統的module*/
    struct file_system_type * next;/*指向檔案系統型別表的下一個*/
    struct list_head fs_supers;/*給定型別檔案系統的超級塊表頭*/
    struct lock_class_key s_lock_key;
    struct lock_class_key s_umount_key;
};
其中,

(1) file_systems是所有的檔案系統型別連結串列頭,如ext2,ext3,ext4. 而next指標指向下一個檔案系統型別。
(2)fs_supers是同類檔案系統的連結串列頭,下一個由超級塊物件的s_instances指標指向.

(3)get_sb和kill_sb分配一個超級塊物件和銷燬一個超級塊物件。


(8)mount point 物件,為了在記憶體中儲存掛載點,掛載標誌,以及和其它檔案系統之間的關係,如父子關係。必須維護一個vfsmount物件,在<linux/mount.h>

struct vfsmount {
    struct list_head mnt_hash; /*指向hash表指標*/
    struct vfsmount *mnt_parent;    /* fs we are mounted on 父安裝點*/
    struct dentry *mnt_mountpoint;    /* dentry of mountpoint 安裝點目錄項物件*/
    struct dentry *mnt_root;    /* root of the mounted tree 安裝點根的目錄項物件*/
    struct super_block *mnt_sb;    /* pointer to superblock 指向超級塊指標*/
    struct list_head mnt_mounts;    /* list of children, anchored here 子安裝點頭指標*/
    struct list_head mnt_child;    /* and going through their mnt_child 子安裝點的下一個物件*/
    atomic_t mnt_count;/*使用次數*/
    int mnt_flags;/*安裝標誌*/
    int mnt_expiry_mark;        /* true if marked for expiry */
    char *mnt_devname;        /* Name of device e.g. /dev/dsk/hda1 裝置名*/
    struct list_head mnt_list;    /*指向名稱空間的下一個安裝點物件,其中連結串列頭存在名稱空間的list域內*/
    struct list_head mnt_expire;    /* link in fs-specific expiry list */
    struct list_head mnt_share;    /* circular list of shared mounts */
    struct list_head mnt_slave_list;/* list of slave mounts */
    struct list_head mnt_slave;    /* slave list entry */
    struct vfsmount *mnt_master;    /* slave is on master->mnt_slave_list */
    struct mnt_namespace *mnt_ns;    /* containing namespace 指向程序的名稱空間 */
    int mnt_pinned;
};

處理檔案系統掛載的相關函式:

alloc_vfsmnt(name)

Allocates and initializes a mounted filesystem descriptor


free_vfsmnt(mnt)

Frees a mounted filesystem descriptor pointed by mnt


lookup_mnt(mnt,dentry)

Looks up a descriptor in the hash table and returns its address


3. 總結

super_block-關聯每個具體的檔案系統 

inode-對應檔案元資料資訊

dentry-目錄項物件,記憶體表示形式,檔案和目錄都擁有目錄項物件

file-開啟檔案在記憶體的表示形式

fs_struct-程序相關的根目錄,工作目錄,根安裝點和當前目錄安裝點

file_system_type-在啟動時註冊檔案系統型別,如EXT3,或者是載入模組時註冊

vfsmount-維護已掛載的檔案系統之間的關係,如父子關係