1. 程式人生 > >Linux open系統呼叫流程(2)

Linux open系統呼叫流程(2)

1. 書結上文,繼續分析do_filp_open函式,其傳入4個引數:

dfd:相對目錄

tmp:檔案路徑名,例如要開啟/usr/src/kernels/linux-2.6.30

flags:開啟標誌

mode:開啟模式

/*
 * Note that while the flag value (low two bits) for sys_open means:
 *	00 - read-only
 *	01 - write-only
 *	10 - read-write
 *	11 - special
 * it is changed into
 *	00 - no permissions needed
 *	01 - read-permission
 *	10 - write-permission
 *	11 - read-write
 * for the internal routines (ie open_namei()/follow_link() etc). 00 is
 * used by symlinks.
 */
static struct file *do_filp_open(int dfd, const char *filename, int flags,
				 int mode)
{
   
	int namei_flags, error;
	/*建立nameidata結構體,返回的安裝點物件和目錄項物件放在此結構體*/
	struct nameidata nd;
	namei_flags = flags;
	if ((namei_flags+1) & O_ACCMODE)
		namei_flags++;
	/*根據上級的dentry物件得到新的dentry結構,並從中得到相關的inode節點號,再用iget函式分配新的inode結構,將新的dentry物件與inode物件關聯起來*/
	error = open_namei(dfd, filename, namei_flags, mode, &nd);
	/*將nameidata結構體轉化為struct file檔案物件結構體*/
	if (!error)
		return nameidata_to_filp(&nd, flags);

	return ERR_PTR(error);
}

初看起來,寥寥幾行程式碼,貌似簡單。其實不然,一會就知道了。此函式呼叫了open_namei和nameidata_to_filp. 後一個函式通過名字就可以猜出來,是將nameidata結構轉化為filp,也就是利用nd結構賦值給檔案指標file,然後返回這個檔案指標。而open_namei肯定是填充nd結構體,具體功能可表述為: 根據上級目錄項物件,查詢下一級的目錄項物件,如果在目錄項快取找到下一級的目錄項物件,則直接返回,並填充nd的掛載點物件和目錄項物件。否則,構建一個子目錄項物件,並利用iget函式分配一個新的inode結構,將子目錄項物件和inode結構相關聯。這樣,一直迴圈到最後一下分量。最後返回的是最後一個分量的目錄項物件和掛載點物件。可以看到,在這兩個函式中,都利用了nameidata結構,具體看一下神奇的結構:

struct nameidata {
	struct dentry	*dentry;/*當前目錄項物件*/
	struct vfsmount *mnt;/*已安裝的檔案系統物件的地址*/
	struct qstr	last;/*路徑名最後一部分*/
	unsigned int	flags;/*查詢標誌*/
	int		last_type;/*路徑名最後一部分的型別*/
	unsigned	depth;/*當前符號連結的深度,一般小於6*/
	char *saved_names[MAX_NESTED_LINKS + 1];/*關聯符號連結的路徑名陣列*/

	/* Intent data */
	union {
		struct open_intent open;/*想要開啟的檔案的聯合體*/
	} intent;
};
struct open_intent {
	int	flags;/*標誌*/
	int	create_mode;/*建立模式*/
	struct file *file;/*檔案物件指標*/
};
open_intent檔案物件就是最後返回的檔案物件。

由於namidata_to_filp比較簡單,先看一下:

/**將nameidata相關項賦值給struct file物件
 * nameidata_to_filp - convert a nameidata to an open filp.
 * @nd: pointer to nameidata
 * @flags: open flags
 *
 * Note that this function destroys the original nameidata
 */
struct file *nameidata_to_filp(struct nameidata *nd, int flags)
{
	struct file *filp;

	/* Pick up the filp from the open intent */
	/*取得檔案指標*/
	filp = nd->intent.open.file;
	/* Has the filesystem initialised the file for us? */
	/*檔案系統是否已經初始化了dentry*/
	if (filp->f_path.dentry == NULL)
		filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL);
	else
		path_release(nd);
	return filp;
}
首先取得檔案物件指標,然後判斷檔案物件是否已經初始化,如果沒有初始化,就呼叫__dentry_open函式,對檔案物件進行初始化。

/*對struct file結構體賦值*/
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
					int flags, struct file *f,
					int (*open)(struct inode *, struct file *))
{
	struct inode *inode;
	int error;
	/*設定檔案開啟標誌*/
	f->f_flags = flags;
	f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK |
				FMODE_PREAD | FMODE_PWRITE;
	/*取得inode節點*/
	inode = dentry->d_inode;
	if (f->f_mode & FMODE_WRITE) {
		error = get_write_access(inode);
		if (error)
			goto cleanup_file;
	}
	/*地址空間物件*/
	f->f_mapping = inode->i_mapping;
	/*目錄項物件*/
	f->f_path.dentry = dentry;
	/*掛載點物件*/
	f->f_path.mnt = mnt;
	/*檔案指標位置 */
	f->f_pos = 0;
	/*inode節點在初始化的時候已經賦值了i_fop,現在將檔案操作賦值給f_op*/ 
	f->f_op = fops_get(inode->i_fop);
	file_move(f, &inode->i_sb->s_files);
	/*檔案open操作*/
	if (!open && f->f_op)/*open為NULL*/
		open = f->f_op->open;
	/*普通檔案open為空,如果是裝置檔案,需要開啟*/
	if (open) {
		error = open(inode, f);
		if (error)
			goto cleanup_all;
	}
	
	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
	/*預讀初始化*/
	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);

	/* NB: we're sure to have correct a_ops only after f_op->open */
	if (f->f_flags & O_DIRECT) {
		if (!f->f_mapping->a_ops ||
		    ((!f->f_mapping->a_ops->direct_IO) &&
		    (!f->f_mapping->a_ops->get_xip_page))) {
			fput(f);
			f = ERR_PTR(-EINVAL);
		}
	}

	return f;

cleanup_all:
	fops_put(f->f_op);
	if (f->f_mode & FMODE_WRITE)
		put_write_access(inode);
	file_kill(f);
	f->f_path.dentry = NULL;
	f->f_path.mnt = NULL;
cleanup_file:
	put_filp(f);
	dput(dentry);
	mntput(mnt);
	return ERR_PTR(error);
}

首先,設定檔案開啟標誌f->f_flags. 然後初始化地址空間物件,目錄項物件,掛載點物件,檔案指標位置,檔案相關操作。需要說明兩點:

(1)地址空間物件和索引節點相關聯,在構建索引節點時已經賦值了。它涉及到具體的磁碟塊操作,在後面的章節將會解釋。

   (2)f_op這個非常重要,也是在構建索引節點時,將具體檔案系統的檔案操作函式集的指標賦給索引節點的i_fop域。對於開啟檔案,目錄,符號連結,對應的操作函式集是不相同的。

 接下來,第31行-38行,如果是普通檔案,可能不需要開啟。如果是裝置檔案,就需要開啟操作。例如SCSI裝置的sg_open函式。

最後,對檔案預讀進行初始化。

在說完nameidata_to_filp函式之後,需要解釋open_namei函式:

/*
 *	open_namei()
 *
 * namei for open - this is in fact almost the whole open-routine.
 *
 * Note that the low bits of "flag" aren't the same as in the open
 * system call - they are 00 - no permissions needed
 *			  01 - read permission needed
 *			  10 - write permission needed
 *			  11 - read/write permissions needed
 * which is a lot more logical, and also allows the "no perm" needed
 * for symlinks (where the permissions are checked later).
 * SMP-safe
 */
int open_namei(int dfd, const char *pathname, int flag,
		int mode, struct nameidata *nd)
{
	int acc_mode, error;
	/*定義path結構,包括安裝點物件和目錄項物件*/
	struct path path;
	struct dentry *dir;
	int count = 0;

	acc_mode = ACC_MODE(flag);

	/* O_TRUNC implies we need access checks for write permissions */
	/*截斷標誌,需要寫許可權*/
	if (flag & O_TRUNC)
		acc_mode |= MAY_WRITE;

	/* Allow the LSM permission hook to distinguish append 
	   access from general write access. */
	if (flag & O_APPEND)
		acc_mode |= MAY_APPEND;

	/*
	 * The simplest case - just a plain lookup.
	   不需要建立檔案,直接開啟檔案即可,建立目錄項物件和掛載點物件,並將它們填充到nd結構體
	 */
	if (!(flag & O_CREAT)) {
		error = path_lookup_open(dfd, pathname, lookup_flags(flag),
					 nd, flag);
		if (error)
			return error;
		goto ok;
	}

	/*
	 * Create - we need to know the parent.
		,由於是建立檔案,即檔案不存在,所以返回父目錄項物件
		  在建立檔案時設定	LOOKUP_PARENT
	 */
	error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
	if (error)
		return error;

	/*
	 * We have the parent and last component. First of all, check
	 * that we are not asked to creat(2) an obvious directory - that
	 * will not do.
	 */
	error = -EISDIR;
	if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
		goto exit;
	/*對於建立檔案,nd儲存了上一個分量的目錄項物件和掛載點物件。對於開啟檔案,nd儲存了最後一個分量的目錄項物件和掛載點物件*/
	dir = nd->dentry;
	nd->flags &= ~LOOKUP_PARENT;
	mutex_lock(&dir->d_inode->i_mutex);
	/*將path.dentry和mnt賦值*/
	path.dentry = lookup_hash(nd);
	path.mnt = nd->mnt;

do_last:
	error = PTR_ERR(path.dentry);
	if (IS_ERR(path.dentry)) {
		mutex_unlock(&dir->d_inode->i_mutex);
		goto exit;
	}

	if (IS_ERR(nd->intent.open.file)) {
		mutex_unlock(&dir->d_inode->i_mutex);
		error = PTR_ERR(nd->intent.open.file);
		goto exit_dput;
	}

	/* Negative dentry, just create the file */
	/*如果是建立檔案*/
	if (!path.dentry->d_inode) {
		/*建立索引節點,並標識為*/
		error = open_namei_create(nd, &path, flag, mode);
		if (error)
			goto exit;
		return 0;
	}

	/*
	 * It already exists.
	 */
	mutex_unlock(&dir->d_inode->i_mutex);
	audit_inode_update(path.dentry->d_inode);

	error = -EEXIST;
	if (flag & O_EXCL)
		goto exit_dput;

	if (__follow_mount(&path)) {
		error = -ELOOP;
		if (flag & O_NOFOLLOW)
			goto exit_dput;
	}

	error = -ENOENT;
	if (!path.dentry->d_inode)
		goto exit_dput;
	if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
		goto do_link;
	/*將path的目錄項物件和掛載點物件賦給nd*/
	path_to_nameidata(&path, nd);
	error = -EISDIR;
	if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
		goto exit;
ok:
	error = may_open(nd, acc_mode, flag);
	if (error)
		goto exit;
	return 0;

exit_dput:
	dput_path(&path, nd);
exit:
	if (!IS_ERR(nd->intent.open.file))
		release_open_intent(nd);
	path_release(nd);
	return error;

do_link:
	error = -ELOOP;
	if (flag & O_NOFOLLOW)
		goto exit_dput;
	/*
	 * This is subtle. Instead of calling do_follow_link() we do the
	 * thing by hands. The reason is that this way we have zero link_count
	 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
	 * After that we have the parent and last component, i.e.
	 * we are in the same situation as after the first path_walk().
	 * Well, almost - if the last component is normal we get its copy
	 * stored in nd->last.name and we will have to putname() it when we
	 * are done. Procfs-like symlinks just set LAST_BIND.
	 */
	nd->flags |= LOOKUP_PARENT;
	error = security_inode_follow_link(path.dentry, nd);
	if (error)
		goto exit_dput;
	error = __do_follow_link(&path, nd);
	if (error) {
		/* Does someone understand code flow here? Or it is only
		 * me so stupid? Anathema to whoever designed this non-sense
		 * with "intent.open".
		 */
		release_open_intent(nd);
		return error;
	}
	nd->flags &= ~LOOKUP_PARENT;
	if (nd->last_type == LAST_BIND)
		goto ok;
	error = -EISDIR;
	if (nd->last_type != LAST_NORM)
		goto exit;
	if (nd->last.name[nd->last.len]) {
		__putname(nd->last.name);
		goto exit;
	}
	error = -ELOOP;
	if (count++==32) {
		__putname(nd->last.name);
		goto exit;
	}
	dir = nd->dentry;
	mutex_lock(&dir->d_inode->i_mutex);
	path.dentry = lookup_hash(nd);
	path.mnt = nd->mnt;
	__putname(nd->last.name);
	goto do_last;
}

首先進行檔案開啟設定工作,第40行,如果是開啟操作,則呼叫path_lookup_open函式。第53行,如果檔案不存在,就建立一個檔案,呼叫path_lookup_create函式。在第88行,如果是建立檔案,需要建立磁碟上的索引節點,即呼叫open_namei_create函式。我們逐一解釋:

首先path_lookup_open函式:

/**
 * path_lookup_open - lookup a file path with open intent
 * @dfd: the directory to use as base, or AT_FDCWD
 * @name: pointer to file name
 * @lookup_flags: lookup intent flags
 * @nd: pointer to nameidata
 * @open_flags: open intent flags
 */
int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags,
		struct nameidata *nd, int open_flags)
{
	return __path_lookup_intent_open(dfd, name, lookup_flags, nd,
			open_flags, 0);
}
封裝了__path_lookup_intent_open函式。

path_lookup_create函式:

/**
 * path_lookup_create - lookup a file path with open + create intent
 * @dfd: the directory to use as base, or AT_FDCWD
 * @name: pointer to file name
 * @lookup_flags: lookup intent flags
 * @nd: pointer to nameidata
 * @open_flags: open intent flags
 * @create_mode: create intent flags
 */
static int path_lookup_create(int dfd, const char *name,
			      unsigned int lookup_flags, struct nameidata *nd,
			      int open_flags, int create_mode)
{
	return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE,
			nd, open_flags, create_mode);
}
也封裝了__path_lookup_intent_open函式,只是增加了建立標誌LOOKUP_CREATE, 在create操作的lookup_flags設定了LOOKUP_PARENT,接下來,將看到這個標誌的作用。

繼續跟蹤__path_lookup_intent_open函式:

static int __path_lookup_intent_open(int dfd, const char *name,

		unsigned int lookup_flags, struct nameidata *nd,
		int open_flags, int create_mode)
{
	/*分配struct file物件指標*/
	struct file *filp = get_empty_filp();
	int err;
	if (filp == NULL)
		return -ENFILE;
	/*想要開啟的檔案*/
	nd->intent.open.file = filp;
	/*開啟標誌*/
	nd->intent.open.flags = open_flags;
	/*建立模式*/
	nd->intent.open.create_mode = create_mode;
	/*呼叫do_path_lookup函式,設定LOOKUP_OPEN*/
	err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
	if (IS_ERR(nd->intent.open.file)) {
		if (err == 0) {
			err = PTR_ERR(nd->intent.open.file);
			path_release(nd);
		}
	} else if (err != 0)
		release_open_intent(nd);
	return err;
}

首先呼叫get_empty_flip函式分配一個空閒的檔案物件filp, 設定intent.open的相關域,包括“想要開啟的檔案”,開啟標誌和建立模式。最後,呼叫do_path_lookup對檔案路徑進行解析,並填充nd。

/*路徑查詢函式do_path_lookup*/
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */

static int fastcall do_path_lookup(int dfd, const char *name,
				unsigned int flags, struct nameidata *nd)
{
	int retval = 0;
	int fput_needed;
	struct file *file;
	struct fs_struct *fs = current->fs;
	/*如果只有斜線號,設定最後一個分量的型別為LAST_ROOT*/
	nd->last_type = LAST_ROOT; /* if there are only slashes... */
	nd->flags = flags;
	nd->depth = 0;
	/*如果是從根目錄開始查詢*/
	if (*name=='/') {
		read_lock(&fs->lock);
		if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
			/*nd->mnt設定為根安裝點*/
			nd->mnt = mntget(fs->altrootmnt);
			/*nd->dentry開始目錄項物件設定為根目錄項物件*/
			nd->dentry = dget(fs->altroot);
			read_unlock(&fs->lock);
			if (__emul_lookup_dentry(name,nd))
				goto out; /* found in altroot */
			read_lock(&fs->lock);
		}
		/*增加安裝點的引用計數*/
		nd->mnt = mntget(fs->rootmnt);
		/*增加目錄項的使用計數*/
		nd->dentry = dget(fs->root);
		read_unlock(&fs->lock);
	/*如果是當前工作目錄*/
	} else if (dfd == AT_FDCWD) {
		read_lock(&fs->lock);
		/*從程序的fs_struct物件找到當前掛載點物件*/
		nd->mnt = mntget(fs->pwdmnt);
		/*從程序的fs_struct物件找到當前目錄的目錄項物件*/
		nd->dentry = dget(fs->pwd);
		read_unlock(&fs->lock);
	} else {/*當dfd!=AT_FDCWD,這種情況也是有可能出現的*/
		struct dentry *dentry;
		/*根據dfd得到file物件*/
		file = fget_light(dfd, &fput_needed);
		retval = -EBADF;
		if (!file)
			goto out_fail;
		/*目錄項物件*/
		dentry = file->f_path.dentry;
		retval = -ENOTDIR;
		if (!S_ISDIR(dentry->d_inode->i_mode))
			goto fput_fail;

		retval = file_permission(file, MAY_EXEC);
		if (retval)
			goto fput_fail;
		/*nd->mnt賦值*/
		nd->mnt = mntget(file->f_path.mnt);
		/*nd->dentry賦值,f_path.dentry是和檔案相關的目錄項物件*/
		nd->dentry = dget(dentry);
		fput_light(file, fput_needed);
	}
	current->total_link_count = 0;
	/*路徑分解函式,呼叫實際檔案系統操作*/
	retval = link_path_walk(name, nd);
out:
	if (likely(retval == 0)) {
		if (unlikely(!audit_dummy_context() && nd && nd->dentry &&
				nd->dentry->d_inode))
		audit_inode(name, nd->dentry->d_inode);
	}
out_fail:
	return retval;

fput_fail:
	fput_light(file, fput_needed);
	goto out_fail;
}

第11-14行,設定初始化nd->last_type, flags和depth. 其中depth表示符號連結的深度。由於符號連結可以連結自己,因此需要限制連結的深度。

第16行,如果第一個字元為/,表示從根目錄開始解析,設定nd->mnt為根掛載點物件,nd->dentry為根目錄項物件,然後增加引用計數。

第34行,如果是從當前目錄開始,將nd->mnt設定為當前目錄的掛載點物件,nd->dentry設定為當前目錄的目錄項物件。

第41行,否則,將nd->mnt和nd->dentry分別設定為f_path.mnt和f_pat.dentry.

接下來,第63行,初始化符號連結總數,呼叫實際檔案系統的路徑分解函式link_path_walk.

int fastcall link_path_walk(const char *name, struct nameidata *nd)
{
	struct nameidata save = *nd;
	int result;

	/* make sure the stuff we saved doesn't go away */
	/*首先備份一下安裝點物件和目錄項物件*/
	dget(save.dentry);
	mntget(save.mnt);
	/*真正的名稱解析函式*/
	result = __link_path_walk(name, nd);
	if (result == -ESTALE) {
		*nd = save;
		dget(nd->dentry);
		mntget(nd->mnt);
		nd->flags |= LOOKUP_REVAL;
		result = __link_path_walk(name, nd);
	}
	/*減少並釋放備份的nameidata物件*/
	dput(save.dentry);
	mntput(save.mnt);
       return result;
}

首先,備份掛載點物件和目錄項物件,然後呼叫__link_path_walk解析.

這個函式也比較複雜,在下一節中繼續分析!