1. 程式人生 > >Tcpdump抓包核心程式碼分析

Tcpdump抓包核心程式碼分析

註冊pf_packet協議

   .create函式是在PF_PACKET型別socket建立時呼叫,呼叫時註冊了鉤子函式具體看packet_create函式的實現。

static const struct net_proto_familypacket_family_ops = {

         .family=   PF_PACKET,

         .create=  packet_create,

         .owner     =       THIS_MODULE,

};

static int __init packet_init(void)

{

         …………..

         sock_register(&packet_family_ops);

         …………..

}

建立SOCK_PACKET sock時註冊回撥函式

/*

 *     Create a packet of type SOCK_PACKET.

 */

static int packet_create(struct net *net,struct socket *sock, int protocol,

                             int kern)

{

         structsock *sk;

         structpacket_sock *po;

         __be16proto = (__force __be16)protocol; /* weird, but documented */

         interr;

         if(!ns_capable(net->user_ns, CAP_NET_RAW))

                   return-EPERM;

         if(sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&

             sock->type != SOCK_PACKET)

                   return-ESOCKTNOSUPPORT;

         sock->state= SS_UNCONNECTED;

         err= -ENOBUFS;

         sk= sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);

         if(sk == NULL)

                   gotoout;

         sock->ops= &packet_ops;

         if(sock->type == SOCK_PACKET)

                   sock->ops= &packet_ops_spkt;

         sock_init_data(sock,sk);

         po= pkt_sk(sk);

         sk->sk_family= PF_PACKET;

         po->num= proto;

         err= packet_alloc_pending(po);

         if(err)

                   gotoout2;

         packet_cached_dev_reset(po);

         sk->sk_destruct= packet_sock_destruct;

         sk_refcnt_debug_inc(sk);

         /*

          *     Attacha protocol block

          */

         spin_lock_init(&po->bind_lock);

         mutex_init(&po->pg_vec_lock);

         po->prot_hook.func= packet_rcv;

         //註冊處理函式

         if (sock->type == SOCK_PACKET)

                   po->prot_hook.func =packet_rcv_spkt;

         po->prot_hook.af_packet_priv= sk;

         if (proto) {

                   po->prot_hook.type =proto;

                   將這個socket掛載到ptype_all連結串列上

                   register_prot_hook(sk);

         }

         mutex_lock(&net->packet.sklist_lock);

         sk_add_node_rcu(sk,&net->packet.sklist);

         mutex_unlock(&net->packet.sklist_lock);

         preempt_disable();

         sock_prot_inuse_add(net,&packet_proto, 1);

         preempt_enable();

         return0;

out2:

         sk_free(sk);

out:

         returnerr;

}

接收方向核心抓包函式

    兩個呼叫場景,一個是網絡卡啟用NAPI,在輪詢流程中呼叫process_backlog;另外一個是非NAPI場景,直接netif_receive_skb接收資料報文,遞交給網路層。

static int __netif_receive_skb_core(structsk_buff *skb, bool pfmemalloc)

{

         structpacket_type *ptype, *pt_prev;

         rx_handler_func_t*rx_handler;

         structnet_device *orig_dev;

         structnet_device *null_or_dev;

         booldeliver_exact = false;

         intret = NET_RX_DROP;

         __be16type;

         net_timestamp_check(!netdev_tstamp_prequeue,skb);

         trace_netif_receive_skb(skb);

         orig_dev= skb->dev;

         skb_reset_network_header(skb);

         if(!skb_transport_header_was_set(skb))

                   skb_reset_transport_header(skb);

         skb_reset_mac_len(skb);

         pt_prev= NULL;

another_round:

         skb->skb_iif= skb->dev->ifindex;

         __this_cpu_inc(softnet_data.processed);

         if(skb->protocol == cpu_to_be16(ETH_P_8021Q) ||

             skb->protocol ==cpu_to_be16(ETH_P_8021AD)) {

                   skb= vlan_untag(skb);

                   if(unlikely(!skb))

                            gotoout;

         }

#ifdef CONFIG_NET_CLS_ACT

         if(skb->tc_verd & TC_NCLS) {

                   skb->tc_verd= CLR_TC_NCLS(skb->tc_verd);

                   gotoncls;

         }

#endif

         if(pfmemalloc)

                   gotoskip_taps;

//遍歷tcpdumpsocket建立時掛載的鉤子

         list_for_each_entry_rcu(ptype,&ptype_all, list) {

                   if (!ptype->dev ||ptype->dev == skb->dev) {

                            if (pt_prev)

                                     //拷貝資料報文

                                     ret =deliver_skb(skb, pt_prev, orig_dev);

                            pt_prev = ptype;

                   }

         }

skip_taps:

#ifdef CONFIG_NET_CLS_ACT

         skb= handle_ing(skb, &pt_prev, &ret, orig_dev);

         if(!skb)

                   gotoout;

ncls:

#endif

         if(pfmemalloc && !skb_pfmemalloc_protocol(skb))

                   gotodrop;

         if(skb_vlan_tag_present(skb)) {

                   if(pt_prev) {

                            ret= deliver_skb(skb, pt_prev, orig_dev);

                            pt_prev= NULL;

                   }

                   if(vlan_do_receive(&skb))

                            gotoanother_round;

                   elseif (unlikely(!skb))

                            gotoout;

         }

         rx_handler= rcu_dereference(skb->dev->rx_handler);

         if(rx_handler) {

                   if(pt_prev) {

                            ret= deliver_skb(skb, pt_prev, orig_dev);

                            pt_prev= NULL;

                   }

                   switch(rx_handler(&skb)) {

                   caseRX_HANDLER_CONSUMED:

                            ret= NET_RX_SUCCESS;

                            gotoout;

                   caseRX_HANDLER_ANOTHER:

                            gotoanother_round;

                   caseRX_HANDLER_EXACT:

                            deliver_exact= true;

                   caseRX_HANDLER_PASS:

                            break;

                   default:

                            BUG();

                   }

         }

         if(unlikely(skb_vlan_tag_present(skb))) {

                   if(skb_vlan_tag_get_id(skb))

                            skb->pkt_type= PACKET_OTHERHOST;

                   /*Note: we might in the future use prio bits

                    * and set skb->priority like invlan_do_receive()

                    * For the time being, just ignore PriorityCode Point

                    */

                   skb->vlan_tci= 0;

         }

         /*deliver only exact match when indicated */

         null_or_dev= deliver_exact ? skb->dev : NULL;

         type= skb->protocol;

         //真實的資料報文處理流程,如果是ip那麼呼叫ip_rcv函數了

        list_for_each_entry_rcu(ptype,

                            &ptype_base[ntohs(type)& PTYPE_HASH_MASK], list) {

                   if (ptype->type == type&&

                       (ptype->dev == null_or_dev ||ptype->dev == skb->dev ||

                        ptype->dev == orig_dev)) {

                            if (pt_prev)

                                     ret =deliver_skb(skb, pt_prev, orig_dev);

                            pt_prev = ptype;

                   }

         }

         if(pt_prev) {

                   if(unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))

                            gotodrop;

                   else

                            ret= pt_prev->func(skb, skb->dev, pt_prev, orig_dev);

         }else {

drop:

                   atomic_long_inc(&skb->dev->rx_dropped);

                   kfree_skb(skb);

                   /*Jamal, now you will not able to escape explaining

                    * me how you were going to use this. :-)

                    */

                   ret= NET_RX_DROP;

         }

out:

         returnret;

}

傳送方向核心抓包函式

         資料傳送也存在兩個分支,一個是呼叫dev_queue_xmit直接將資料遞交到網絡卡(沒有配置qdisc);另外一個分支是如果配置了qdisc,dev_queue_xmit流程檢查是否配置了queue,如果配置了將呼叫__dev_xmit_skb函式將資料放入到了qdisc佇列中,然後等待發送中斷函式net_tx_action輪詢呼叫,進而觸發拷貝呼叫流程。

/*

 *     Support routine. Sends outgoing frames toany network

 *     taps currently in use.

 */

static void dev_queue_xmit_nit(structsk_buff *skb, struct net_device *dev)

{

         structpacket_type *ptype;

         structsk_buff *skb2 = NULL;

         structpacket_type *pt_prev = NULL;

         rcu_read_lock();

        //遍歷tcpdumpsocket建立時掛載的鉤子

         list_for_each_entry_rcu(ptype,&ptype_all, list) {

                   /* Never send packets back tothe socket

                    * they originated from - MvS([email protected])

                    */

                   if ((ptype->dev == dev ||!ptype->dev) &&

                       (!skb_loop_sk(ptype, skb))) {

                            if (pt_prev) {

                                     //拷貝資料報文

                                     deliver_skb(skb2,pt_prev, skb->dev);

                                     pt_prev =ptype;

                                     continue;

                            }

                            skb2 =skb_clone(skb, GFP_ATOMIC);

                            if (!skb2)

                                     break;

                            net_timestamp_set(skb2);

                            /* skb->nh shouldbe correctly

                               set by sender, so that the second statementis

                               just protection against buggy protocols.

                             */

                            skb_reset_mac_header(skb2);

                            if(skb_network_header(skb2) < skb2->data ||

                                skb_network_header(skb2) >skb_tail_pointer(skb2)) {

                                     net_crit_ratelimited("protocol%04x is buggy, dev %s\n",

                                                             ntohs(skb2->protocol),

                                                             dev->name);

                                     skb_reset_network_header(skb2);

                            }

                            skb2->transport_header= skb2->network_header;

                            skb2->pkt_type =PACKET_OUTGOING;

                            pt_prev = ptype;

                   }

         }

         if(pt_prev)

                   pt_prev->func(skb2,skb->dev, pt_prev, skb->dev);

         rcu_read_unlock();

}

銷燬SOCK_PACKET sock時註冊回撥

當sock_packet型別 socket 關閉時會呼叫release函式,這時候會摘掉之前的註冊函式

static int packet_release(struct socket*sock)

{

         structsock *sk = sock->sk;

         structpacket_sock *po;

         structnet *net;

         uniontpacket_req_u req_u;

         if(!sk)

                   return0;

         net= sock_net(sk);

         po= pkt_sk(sk);

         mutex_lock(&net->packet.sklist_lock);

         sk_del_node_init_rcu(sk);

         mutex_unlock(&net->packet.sklist_lock);

         preempt_disable();

         sock_prot_inuse_add(net,sk->sk_prot, -1);

         preempt_enable();

         spin_lock(&po->bind_lock);

         //從ptype_all函式中摘掉註冊的鉤子函式

         unregister_prot_hook(sk, false);

         packet_cached_dev_reset(po);

         if(po->prot_hook.dev) {

                   dev_put(po->prot_hook.dev);

                   po->prot_hook.dev= NULL;

         }

         spin_unlock(&po->bind_lock);

         packet_flush_mclist(sk);

         if(po->rx_ring.pg_vec) {

                   memset(&req_u,0, sizeof(req_u));

                   packet_set_ring(sk,&req_u, 1, 0);

         }

         if(po->tx_ring.pg_vec) {

                   memset(&req_u,0, sizeof(req_u));

                   packet_set_ring(sk,&req_u, 1, 1);

         }

         fanout_release(sk);

         synchronize_net();

         /*

          *     Nowthe socket is dead. No more input will appear.

          */

         sock_orphan(sk);

         sock->sk= NULL;

         /*Purge queues */

         skb_queue_purge(&sk->sk_receive_queue);

         packet_free_pending(po);

         sk_refcnt_debug_release(sk);

         sock_put(sk);

         return0;

}

總結

        Tcpdump抓包時建立SOCK_PACKET型別的socket,並且在socket建立流程時呼叫了packet_family_opspacket_create函式(packet_create),進而將抓包的鉤子函式註冊到ptype_all連結串列,當在資料接收方向__netif_receive_skb_core函式中呼叫註冊的鉤子函式將資料報文拷貝到af_packet.c檔案的具體處理流程函式中;同樣在傳送函式dev_queue_xmit_nit中呼叫鉤子函式實現資料報文拷貝。