1. 程式人生 > >Linux 網絡卡驅動sk_buff核心原始碼隨筆

Linux 網絡卡驅動sk_buff核心原始碼隨筆

          這幾天在除錯有關網絡卡驅動的東西,有很多地方不清楚。而且網絡卡驅動主要有兩個很重要的結構體:struct net_device 和struct sk_buff。 驅動都是圍繞這兩個東西進行操作的,為了搞清楚該如何按協議棧處理資料包,週末閒來無事就看看核心程式碼去了解下這部分東西。哎,不得不說今天的天氣是真好,讓我我蠢蠢欲動。

/*
 *sk_buff->h		:傳輸層頭	 :udp頭和tcp頭
 *sk_buff->nh		:網路層頭	 :ip頭
 *sk_buff->mac	    :資料鏈路層頭 :mac頭
 *
 *
 *sk_buff->head	:指向資料緩衝區頭部
 *sk_buff->data	:指向實際資料的頭部
 *sk_buff->tail	:指向實際資料的尾部
 *sk_buff->end	:指向資料緩衝區尾部
 *
 *
 *sk_buff控制區	:struct sk_buff所在的區域
 *線性資料區		:資料緩衝區域 : sk_buff->head~sk_buff->end 之間的區域
 *非線性資料區	:資料緩衝區域的補充區域 :skb_shared_info區域
 *
 *
 *sk_buff->truesize:線性資料區+非線性資料區+sizeof(struct sk_buff)
 *sk_buff->len     :線性資料區+非線性資料區
 *sk_buff->data_len:非線性資料區
 *
 *
 *一個完整的網路幀(skb_buff)包括:
 *        線性資料區 + 非線性資料區 + skb_buff控制區
 *
 * skb_clone() :只複製skb_buff控制區,其中新分配的skb_buff和原來的skb_buff共享線性資料區和非線性資料區
 *
 * pskb_copy() :複製skb_buff控制區 + 線性資料區,共享非線性資料區
 *
 * skb_copy()  :複製skb_buff控制區 + 線性資料區 + 非線性資料區
 *
 *
 *
 *
 */

原始碼附上: 

struct sk_buff { /*表示接收或傳送資料包的包頭資訊,其成員變數在從一層向另一層傳遞時會發生修改*/
	/* These two members must be first. */
	struct sk_buff		*next;
	struct sk_buff		*prev;

	struct sk_buff_head	*list;
	struct sock			*sk;
	struct timeval			stamp;
	struct net_device		*dev;
	struct net_device		*input_dev;
	struct net_device		*real_dev;

	union {
		struct tcphdr		*th;
		struct udphdr		*uh;
		struct icmphdr	*icmph;
		struct igmphdr	*igmph;
		struct iphdr		*ipiph;
		struct ipv6hdr	*ipv6h;
		unsigned char		*raw;
	} h;								/*傳輸層*/

	union {
		struct iphdr		*iph;
		struct ipv6hdr	*ipv6h;
		struct arphdr		*arph;
		unsigned char		*raw;
	} nh;							/*網路層*/

	union {
	  	unsigned char 	*raw;
	} mac;							/*鏈路層*/

	struct  dst_entry		*dst;	/*記錄了到達目的地的路由資訊,以及其他的一些網路特徵資訊*/
	struct	sec_path		*sp;

	/*
	 * This is the control buffer. It is free to use for every
	 * layer. Please put your private variables there. If you
	 * want to keep them across layers you have to do a skb_clone()
	 * first. This is owned by whoever has the skb queued ATM.
	 */
	char					 cb[40];

	
	/*
 	 *
	 *在sk_buff這個裡面沒有實際的資料,這裡僅僅是控制資訊,資料是通過後面的data指標指向其他記憶體塊的!
	 *那個記憶體塊中是線性資料和非線性資料!那麼len 就是length(線性資料) + length(非線性資料),alloc分配的長度
	 *
	 */
	unsigned int		len,	/* len : 代表整個資料區域的長度!skb的組成是有sk_buff控制 + 線性資料 + 非線性資料(skb_shared_info) 組成!*/
					data_len, /*data_len: 指的是length(非線性資料)*/
					mac_len,
					csum;
	unsigned char		local_df,
					cloned:1,		
					nohdr:1,		/*僅僅引用資料區域*/
					pkt_type,
					ip_summed;
	__u32			priority;
	unsigned short	protocol,
					security;

	void				(*destructor)(struct sk_buff *skb);
	
#ifdef CONFIG_NETFILTER
        unsigned long		nfmark;			/*nfmark,用於鉤子之間通訊*/
	__u32				nfcache;
	__u32				nfctinfo;
	struct nf_conntrack	*nfct;
	
#ifdef CONFIG_NETFILTER_DEBUG
        unsigned int			nf_debug;
#endif

#ifdef CONFIG_BRIDGE_NETFILTER
	struct nf_bridge_info	*nf_bridge;
#endif
#endif /* CONFIG_NETFILTER */
#if defined(CONFIG_HIPPI)
	union {
		__u32			ifield;
	} private;
#endif
#ifdef CONFIG_NET_SCHED
       __u32			tc_index;        /* traffic control index */
#ifdef CONFIG_NET_CLS_ACT
	__u32          		 tc_verd;               /* traffic control verdict */
	__u32           		tc_classid;            /* traffic control classid */
#endif

#endif

	/*
	 *	劃重點!
	 *
	 * These elements must be at the end, see alloc_skb() for details. 
	 *
	 */
	unsigned int		truesize;
	atomic_t			users;
	unsigned char		*head,	/*指向分配給的線性資料記憶體首地址*/
					*data,	/*指向儲存資料內容的首地址*/
					*tail,	/*指向資料的結尾*/
					*end;	/*指向分配的記憶體塊的結尾*/
};
​
/**
 *	skb_copy	-	create private copy of an sk_buff  如果要修改資料,使用該函式。不僅複製sk_buff控制區,也複製資料區。是一個完整的備份
 *	@skb: buffer to copy
 *	@gfp_mask: allocation priority
 *
 *	Make a copy of both an &sk_buff and its data. This is used when the
 *	caller wishes to modify the data and needs a private copy of the
 *	data to alter. Returns %NULL on failure or the pointer to the buffer
 *	on success. The returned buffer has a reference count of 1.
 *
 *	As by-product this function converts non-linear &sk_buff to linear
 *	one, so that &sk_buff becomes completely private and caller is allowed
 *	to modify all the data of returned buffer. This means that this
 *	function is not recommended for use in circumstances when only
 *	header is going to be modified. Use pskb_copy() instead.
 */

struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
{
	int headerlen = skb->data - skb->head;
	/*
	 *	Allocate the copy buffer
	 */
	struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len, /*分配的空間大小為:sizeof(線性資料區  + 非線性資料區)*/
				      gfp_mask);
	if (!n)
		return NULL;

	/* Set the data pointer */
	skb_reserve(n, headerlen);	/*skb_reserve 分配headerlen大小的headroom 空間*/
	/* Set the tail pointer and length */
	skb_put(n, skb->len);
	n->csum	     = skb->csum;
	n->ip_summed = skb->ip_summed;

	if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
		BUG();

	copy_skb_header(n, skb);
	return n;
}

​
/**
 *	pskb_copy	-	create copy of an sk_buff with private head.
 *	@skb: buffer to copy
 *	@gfp_mask: allocation priority
 *
 *	Make a copy of both an &sk_buff and part of its data, located
 *	in header. Fragmented data remain shared. This is used when
 *	the caller wishes to modify only header of &sk_buff and needs
 *	private copy of the header to alter. Returns %NULL on failure
 *	or the pointer to the buffer on success.
 *	The returned buffer has a reference count of 1.
 */

struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)/*複製sk_buff控制區和線性資料區,非線性資料區依然共享*/
{
	/*
	 *	Allocate the copy buffer
	 */
	struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);

	if (!n)
		goto out;

	/* Set the data pointer */
	skb_reserve(n, skb->data - skb->head);
	/* Set the tail pointer and length */
	skb_put(n, skb_headlen(skb));
	/* Copy the bytes */
	memcpy(n->data, skb->data, n->len);
	n->csum	     = skb->csum;
	n->ip_summed = skb->ip_summed;

	n->data_len  = skb->data_len;
	n->len	     = skb->len;

	if (skb_shinfo(skb)->nr_frags) {
		int i;

		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
			get_page(skb_shinfo(n)->frags[i].page);
		}
		skb_shinfo(n)->nr_frags = i;
	}

	if (skb_shinfo(skb)->frag_list) {
		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
		skb_clone_fraglist(n);
	}

	copy_skb_header(n, skb);
out:
	return n;
}
/**
 *	skb_clone	-	duplicate an sk_buff :只複製一個和skb_buff,該skb_buff的指標的值與原來的skb值相同
 *	@skb: buffer to clone
 *	@gfp_mask: allocation priority
 *
 *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
 *	copies share the same packet data but not structure. The new
 *	buffer has a reference count of 1. If the allocation fails the
 *	function returns %NULL otherwise the new buffer is returned.
 *
 *	If this function is called from an interrupt gfp_mask() must be
 *	%GFP_ATOMIC.
 */

struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
{
	struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);

	if (!n) 
		return NULL;

#define C(x) n->x = skb->x     /*只複製控制區,因此定義一個巨集函式方便複製操作*/

	n->next = n->prev = NULL;
	n->list = NULL;
	n->sk = NULL;
	C(stamp);
	C(dev);
	C(real_dev);
	C(h);
	C(nh);
	C(mac);
	C(dst);
	dst_clone(skb->dst);
	C(sp);
#ifdef CONFIG_INET
	secpath_get(skb->sp);
#endif
	memcpy(n->cb, skb->cb, sizeof(skb->cb));
	C(len);
	C(data_len);
	C(csum);
	C(local_df);
	n->cloned = 1;
	n->nohdr = 0;
	C(pkt_type);
	C(ip_summed);
	C(priority);
	C(protocol);
	C(security);
	n->destructor = NULL;
#ifdef CONFIG_NETFILTER
	C(nfmark);
	C(nfcache);
	C(nfct);
	nf_conntrack_get(skb->nfct);
	C(nfctinfo);
#ifdef CONFIG_NETFILTER_DEBUG
	C(nf_debug);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
	C(nf_bridge);
	nf_bridge_get(skb->nf_bridge);
#endif
#endif /*CONFIG_NETFILTER*/
#if defined(CONFIG_HIPPI)
	C(private);
#endif
#ifdef CONFIG_NET_SCHED
	C(tc_index);
#ifdef CONFIG_NET_CLS_ACT
	n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
	n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
	n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
	C(input_dev);
	C(tc_classid);
#endif

#endif
	C(truesize);
	atomic_set(&n->users, 1);
	C(head);
	C(data);
	C(tail);
	C(end);

	atomic_inc(&(skb_shinfo(skb)->dataref));
	skb->cloned = 1;

	return n;
}