Linux 網絡卡驅動sk_buff核心原始碼隨筆
阿新 • • 發佈:2018-12-17
這幾天在除錯有關網絡卡驅動的東西,有很多地方不清楚。而且網絡卡驅動主要有兩個很重要的結構體:struct net_device 和struct sk_buff。 驅動都是圍繞這兩個東西進行操作的,為了搞清楚該如何按協議棧處理資料包,週末閒來無事就看看核心程式碼去了解下這部分東西。哎,不得不說今天的天氣是真好,讓我我蠢蠢欲動。
/* *sk_buff->h :傳輸層頭 :udp頭和tcp頭 *sk_buff->nh :網路層頭 :ip頭 *sk_buff->mac :資料鏈路層頭 :mac頭 * * *sk_buff->head :指向資料緩衝區頭部 *sk_buff->data :指向實際資料的頭部 *sk_buff->tail :指向實際資料的尾部 *sk_buff->end :指向資料緩衝區尾部 * * *sk_buff控制區 :struct sk_buff所在的區域 *線性資料區 :資料緩衝區域 : sk_buff->head~sk_buff->end 之間的區域 *非線性資料區 :資料緩衝區域的補充區域 :skb_shared_info區域 * * *sk_buff->truesize:線性資料區+非線性資料區+sizeof(struct sk_buff) *sk_buff->len :線性資料區+非線性資料區 *sk_buff->data_len:非線性資料區 * * *一個完整的網路幀(skb_buff)包括: * 線性資料區 + 非線性資料區 + skb_buff控制區 * * skb_clone() :只複製skb_buff控制區,其中新分配的skb_buff和原來的skb_buff共享線性資料區和非線性資料區 * * pskb_copy() :複製skb_buff控制區 + 線性資料區,共享非線性資料區 * * skb_copy() :複製skb_buff控制區 + 線性資料區 + 非線性資料區 * * * * */
原始碼附上:
struct sk_buff { /*表示接收或傳送資料包的包頭資訊,其成員變數在從一層向另一層傳遞時會發生修改*/ /* These two members must be first. */ struct sk_buff *next; struct sk_buff *prev; struct sk_buff_head *list; struct sock *sk; struct timeval stamp; struct net_device *dev; struct net_device *input_dev; struct net_device *real_dev; union { struct tcphdr *th; struct udphdr *uh; struct icmphdr *icmph; struct igmphdr *igmph; struct iphdr *ipiph; struct ipv6hdr *ipv6h; unsigned char *raw; } h; /*傳輸層*/ union { struct iphdr *iph; struct ipv6hdr *ipv6h; struct arphdr *arph; unsigned char *raw; } nh; /*網路層*/ union { unsigned char *raw; } mac; /*鏈路層*/ struct dst_entry *dst; /*記錄了到達目的地的路由資訊,以及其他的一些網路特徵資訊*/ struct sec_path *sp; /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you * want to keep them across layers you have to do a skb_clone() * first. This is owned by whoever has the skb queued ATM. */ char cb[40]; /* * *在sk_buff這個裡面沒有實際的資料,這裡僅僅是控制資訊,資料是通過後面的data指標指向其他記憶體塊的! *那個記憶體塊中是線性資料和非線性資料!那麼len 就是length(線性資料) + length(非線性資料),alloc分配的長度 * */ unsigned int len, /* len : 代表整個資料區域的長度!skb的組成是有sk_buff控制 + 線性資料 + 非線性資料(skb_shared_info) 組成!*/ data_len, /*data_len: 指的是length(非線性資料)*/ mac_len, csum; unsigned char local_df, cloned:1, nohdr:1, /*僅僅引用資料區域*/ pkt_type, ip_summed; __u32 priority; unsigned short protocol, security; void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER unsigned long nfmark; /*nfmark,用於鉤子之間通訊*/ __u32 nfcache; __u32 nfctinfo; struct nf_conntrack *nfct; #ifdef CONFIG_NETFILTER_DEBUG unsigned int nf_debug; #endif #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge; #endif #endif /* CONFIG_NETFILTER */ #if defined(CONFIG_HIPPI) union { __u32 ifield; } private; #endif #ifdef CONFIG_NET_SCHED __u32 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT __u32 tc_verd; /* traffic control verdict */ __u32 tc_classid; /* traffic control classid */ #endif #endif /* * 劃重點! * * These elements must be at the end, see alloc_skb() for details. * */ unsigned int truesize; atomic_t users; unsigned char *head, /*指向分配給的線性資料記憶體首地址*/ *data, /*指向儲存資料內容的首地址*/ *tail, /*指向資料的結尾*/ *end; /*指向分配的記憶體塊的結尾*/ };
/** * skb_copy - create private copy of an sk_buff 如果要修改資料,使用該函式。不僅複製sk_buff控制區,也複製資料區。是一個完整的備份 * @skb: buffer to copy * @gfp_mask: allocation priority * * Make a copy of both an &sk_buff and its data. This is used when the * caller wishes to modify the data and needs a private copy of the * data to alter. Returns %NULL on failure or the pointer to the buffer * on success. The returned buffer has a reference count of 1. * * As by-product this function converts non-linear &sk_buff to linear * one, so that &sk_buff becomes completely private and caller is allowed * to modify all the data of returned buffer. This means that this * function is not recommended for use in circumstances when only * header is going to be modified. Use pskb_copy() instead. */ struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) { int headerlen = skb->data - skb->head; /* * Allocate the copy buffer */ struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len, /*分配的空間大小為:sizeof(線性資料區 + 非線性資料區)*/ gfp_mask); if (!n) return NULL; /* Set the data pointer */ skb_reserve(n, headerlen); /*skb_reserve 分配headerlen大小的headroom 空間*/ /* Set the tail pointer and length */ skb_put(n, skb->len); n->csum = skb->csum; n->ip_summed = skb->ip_summed; if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) BUG(); copy_skb_header(n, skb); return n; }
/**
* pskb_copy - create copy of an sk_buff with private head.
* @skb: buffer to copy
* @gfp_mask: allocation priority
*
* Make a copy of both an &sk_buff and part of its data, located
* in header. Fragmented data remain shared. This is used when
* the caller wishes to modify only header of &sk_buff and needs
* private copy of the header to alter. Returns %NULL on failure
* or the pointer to the buffer on success.
* The returned buffer has a reference count of 1.
*/
struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)/*複製sk_buff控制區和線性資料區,非線性資料區依然共享*/
{
/*
* Allocate the copy buffer
*/
struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
if (!n)
goto out;
/* Set the data pointer */
skb_reserve(n, skb->data - skb->head);
/* Set the tail pointer and length */
skb_put(n, skb_headlen(skb));
/* Copy the bytes */
memcpy(n->data, skb->data, n->len);
n->csum = skb->csum;
n->ip_summed = skb->ip_summed;
n->data_len = skb->data_len;
n->len = skb->len;
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
if (skb_shinfo(skb)->frag_list) {
skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
skb_clone_fraglist(n);
}
copy_skb_header(n, skb);
out:
return n;
}
/**
* skb_clone - duplicate an sk_buff :只複製一個和skb_buff,該skb_buff的指標的值與原來的skb值相同
* @skb: buffer to clone
* @gfp_mask: allocation priority
*
* Duplicate an &sk_buff. The new one is not owned by a socket. Both
* copies share the same packet data but not structure. The new
* buffer has a reference count of 1. If the allocation fails the
* function returns %NULL otherwise the new buffer is returned.
*
* If this function is called from an interrupt gfp_mask() must be
* %GFP_ATOMIC.
*/
struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
{
struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
if (!n)
return NULL;
#define C(x) n->x = skb->x /*只複製控制區,因此定義一個巨集函式方便複製操作*/
n->next = n->prev = NULL;
n->list = NULL;
n->sk = NULL;
C(stamp);
C(dev);
C(real_dev);
C(h);
C(nh);
C(mac);
C(dst);
dst_clone(skb->dst);
C(sp);
#ifdef CONFIG_INET
secpath_get(skb->sp);
#endif
memcpy(n->cb, skb->cb, sizeof(skb->cb));
C(len);
C(data_len);
C(csum);
C(local_df);
n->cloned = 1;
n->nohdr = 0;
C(pkt_type);
C(ip_summed);
C(priority);
C(protocol);
C(security);
n->destructor = NULL;
#ifdef CONFIG_NETFILTER
C(nfmark);
C(nfcache);
C(nfct);
nf_conntrack_get(skb->nfct);
C(nfctinfo);
#ifdef CONFIG_NETFILTER_DEBUG
C(nf_debug);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
C(nf_bridge);
nf_bridge_get(skb->nf_bridge);
#endif
#endif /*CONFIG_NETFILTER*/
#if defined(CONFIG_HIPPI)
C(private);
#endif
#ifdef CONFIG_NET_SCHED
C(tc_index);
#ifdef CONFIG_NET_CLS_ACT
n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
C(input_dev);
C(tc_classid);
#endif
#endif
C(truesize);
atomic_set(&n->users, 1);
C(head);
C(data);
C(tail);
C(end);
atomic_inc(&(skb_shinfo(skb)->dataref));
skb->cloned = 1;
return n;
}