1. 程式人生 > >學習Linux-4.12核心網路協議棧(1.6)——協議棧的初始化(inet_init實現過程)

學習Linux-4.12核心網路協議棧(1.6)——協議棧的初始化(inet_init實現過程)

這篇文章主要分析inet_init()函式的實現過程:

1796 static int __init inet_init(void)
1797 {
1798     struct inet_protosw *q;
1799     struct list_head *r;
1800     int rc = -EINVAL;
1801
1802     sock_skb_cb_check_size(sizeof(struct inet_skb_parm));
1803
1804     rc = proto_register(&tcp_prot, 1);
1805     if (rc)
1806         goto out;
1807
1808     rc = proto_register(&udp_prot, 1);
1809     if (rc)
1810         goto out_unregister_tcp_proto;
1811
1812     rc = proto_register(&raw_prot, 1);
1813     if (rc)
1814         goto out_unregister_udp_proto;
1815
1816     rc = proto_register(&ping_prot, 1);
1817     if (rc)
1818         goto out_unregister_raw_proto;
1820     /*
1821      *  Tell SOCKET that we are alive...
1822      */
1823
1824     (void)sock_register(&inet_family_ops);
1825
1826 #ifdef CONFIG_SYSCTL
1827     ip_static_sysctl_init();
1828 #endif
1829
1830     /*
1831      *  Add all the base protocols.
1832      */
1833
1834     if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
1835         pr_crit("%s: Cannot add ICMP protocol\n", __func__);
1836     if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
1837         pr_crit("%s: Cannot add UDP protocol\n", __func__);
1838     if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
1839         pr_crit("%s: Cannot add TCP protocol\n", __func__);
1840 #ifdef CONFIG_IP_MULTICAST
1841     if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
1842         pr_crit("%s: Cannot add IGMP protocol\n", __func__);
1843 #endif
1844
1845     /* Register the socket-side information for inet_create. */
1846     for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
1847         INIT_LIST_HEAD(r);
1848
1849     for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
1850         inet_register_protosw(q);
1852     /*
1853      *  Set the ARP module up
1854      */
1855
1856     arp_init();
1857
1858     /*
1859      *  Set the IP module up
1860      */
1861
1862     ip_init();
1863
1864     /* Setup TCP slab cache for open requests. */
1865     tcp_init();
1866
1867     /* Setup UDP memory threshold */
1868     udp_init();
1869
1870     /* Add UDP-Lite (RFC 3828) */
1871     udplite4_register();
1872
1873     ping_init();
1874
1875     /*
1876      *  Set the ICMP layer up
1877      */
1878
1879     if (icmp_init() < 0)
1880         panic("Failed to create the ICMP control socket.\n");
1881
1882     /*
1883      *  Initialise the multicast router
1884      */
1885 #if defined(CONFIG_IP_MROUTE)
1886     if (ip_mr_init())
1887         pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
1888 #endif
1889
1890     if (init_inet_pernet_ops())
1891         pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
1892     /*
1893      *  Initialise per-cpu ipv4 mibs
1894      */
1895
1896     if (init_ipv4_mibs())
1897         pr_crit("%s: Cannot init ipv4 mibs\n", __func__);
1898
1899     ipv4_proc_init();
1900
1901     ipfrag_init();
1902
1903     dev_add_pack(&ip_packet_type);
1904
1905     ip_tunnel_core_init();
1906
1907     rc = 0;
1908 out:
1909     return rc;
1910 out_unregister_raw_proto:
1911     proto_unregister(&raw_prot);
1912 out_unregister_udp_proto:
1913     proto_unregister(&udp_prot);
1914 out_unregister_tcp_proto:
1915     proto_unregister(&tcp_prot);
1916     goto out;
1917 }
1918
1919 fs_initcall(inet_init);

其他函式在前面的都介紹過了,這篇文章我們主要介紹下面幾個函式:

1856     arp_init();
1857
1858     /*
1859      *  Set the IP module up
1860      */
1861
1862     ip_init();
1863
1864     /* Setup TCP slab cache for open requests. */
1865     tcp_init();
1866
1867     /* Setup UDP memory threshold */
1868     udp_init();
1869
1870     /* Add UDP-Lite (RFC 3828) */
1871     udplite4_register();
1872
1873     ping_init();
1874
1875     /*
1876      *  Set the ICMP layer up
1877      */
1878
1879     if (icmp_init() < 0)
1880         panic("Failed to create the ICMP control socket.\n");
1881
1882     /*
1883      *  Initialise the multicast router
1884      */
1885 #if defined(CONFIG_IP_MROUTE)
1886     if (ip_mr_init())
1887         pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
1888 #endif
1889
1890     if (init_inet_pernet_ops())
1891         pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
1892     /*
1893      *  Initialise per-cpu ipv4 mibs
1894      */
1895
1896     if (init_ipv4_mibs())
1897         pr_crit("%s: Cannot init ipv4 mibs\n", __func__);
1898
1899     ipv4_proc_init();
1900
1901     ipfrag_init();
1902
1903     dev_add_pack(&ip_packet_type);
1904
1905     ip_tunnel_core_init();

1. arp_init()

1281 void __init arp_init(void)
1282 {
1283     neigh_table_init(NEIGH_ARP_TABLE, &arp_tbl);
1284
1285     dev_add_pack(&arp_packet_type);
1286     arp_proc_init();
1287 #ifdef CONFIG_SYSCTL
1288     neigh_sysctl_register(NULL, &arp_tbl.parms, NULL);
1289 #endif
1290     register_netdevice_notifier(&arp_netdev_notifier);
1291 }

這個函式比較簡單,就是將arp的協議型別新增到ptype_base[]裡面,當IP層收到arp包的時候呼叫arp_rcv()函式進行處理。

同時也完成/proc/net/arp檔案的初始化。

2. ip_init()

//ip子系統初始化  
//呼叫路徑inet_init->ip_init  
//包括路由子系統的初始化,inet_peer快取的初始化  
void __init ip_init(void)  
{   
    ip_rt_init(); //路由子系統初始化  
    inet_initpeers();//inet_peer快取  
#if defined(CONFIG_IP_MULTICAST)
   igmp_mc_init();
#endif


}  
  
//呼叫路徑ip_init->inet_initpeers  
//inet_peer快取初始化  
//  1.每一個與linux通過inet協議交換過資料的主機,都會被認為是一個ip peer,linux為每個peer分配一個inet_peer結構。  
//  2.inet_peer的主要目的,使不同ip使用不同的ip id生成器。  
//  3.系統中所有的inet_peer例項組織成一課avl樹,方便查詢。  
1.2 void __init inet_initpeers(void)  
{  
    struct sysinfo si;  
  
    //獲取系統記憶體資訊  
    si_meminfo(&si);  
    //系統中inet_peer數量的閥值  
    if (si.totalram <= (32768*1024)/PAGE_SIZE)  
        inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */  
    if (si.totalram <= (16384*1024)/PAGE_SIZE)  
        inet_peer_threshold >>= 1; /* about 512KB */  
    if (si.totalram <= (8192*1024)/PAGE_SIZE)  
        inet_peer_threshold >>= 2; /* about 128KB */  
    //inet_peer SLAB cache  
    peer_cachep = kmem_cache_create("inet_peer_cache",  
            sizeof(struct inet_peer),  
            0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,  
            NULL);  
    //垃圾回收  
    peer_periodic_timer.expires = jiffies  
        + net_random() % inet_peer_gc_maxtime  
        + inet_peer_gc_maxtime;  
    add_timer(&peer_periodic_timer);  
}  

3. tcp_init()

tcp的初始化內容比較多,後面單獨講

4. udp_init()

2653 void __init udp_init(void)
2654 {
2655     unsigned long limit;
2656     unsigned int i;
2657
2658     udp_table_init(&udp_table, "UDP");
2659     limit = nr_free_buffer_pages() / 8;
2660     limit = max(limit, 128UL);
2661     sysctl_udp_mem[0] = limit / 4 * 3;
2662     sysctl_udp_mem[1] = limit;
2663     sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
2664
2665     sysctl_udp_rmem_min = SK_MEM_QUANTUM;
2666     sysctl_udp_wmem_min = SK_MEM_QUANTUM;
2667
2668     /* 16 spinlocks per cpu */
2669     udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
2670     udp_busylocks = kmalloc(sizeof(spinlock_t) << udp_busylocks_log,
2671                 GFP_KERNEL);
2672     if (!udp_busylocks)
2673         panic("UDP: failed to alloc udp_busylocks\n");
2674     for (i = 0; i < (1U << udp_busylocks_log); i++)
2675         spin_lock_init(udp_busylocks + i);
2676 }
這裡主要完成udp_table的初始化:
 67 /**
 68  *  struct udp_table - UDP table
 69  *
 70  *  @hash:  hash table, sockets are hashed on (local port)
 71  *  @hash2: hash table, sockets are hashed on (local port, local address)
 72  *  @mask:  number of slots in hash tables, minus 1
 73  *  @log:   log2(number of slots in hash table)
 74  */ 
 75 struct udp_table {
 76     struct udp_hslot    *hash;
 77     struct udp_hslot    *hash2;
 78     unsigned int        mask;
 79     unsigned int        log;
 80 };
這個函式主要建立了兩個hash表,用於串聯本地套接字對應的埠號

5. icmp_init()

1252 int __init icmp_init(void)
1253 {
1254     return register_pernet_subsys(&icmp_sk_ops);
1255 }

該函式的主要作用是將一個網路協議模組新增到每一個網路命令空間中,然後再執行其ops->init程式進行初始化,一般其ops->init會在其對應的proc目錄下,生成一個網路協議模組對應的proc檔案或proc目錄,並執行一些協議初始化相關的函式。
1247 static struct pernet_operations __net_initdata icmp_sk_ops = {
1248        .init = icmp_sk_init,
1249        .exit = icmp_sk_exit,
1250 };

1185 static int __net_init icmp_sk_init(struct net *net)
1186 {
1187     int i, err;
1188
1189     net->ipv4.icmp_sk = alloc_percpu(struct sock *);
1190     if (!net->ipv4.icmp_sk)
1191         return -ENOMEM;
1192
1193     for_each_possible_cpu(i) {
1194         struct sock *sk;
1195
1196         err = inet_ctl_sock_create(&sk, PF_INET,
1197                        SOCK_RAW, IPPROTO_ICMP, net);
1198         if (err < 0)
1199             goto fail;
1200
1201         *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk;
1202
1203         /* Enough space for 2 64K ICMP packets, including
1204          * sk_buff/skb_shared_info struct overhead.
1205          */
1206         sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1207
1208         /*
1209          * Speedup sock_wfree()
1210          */
1211         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1212         inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
1213     }
1214
1215     /* Control parameters for ECHO replies. */
1216     net->ipv4.sysctl_icmp_echo_ignore_all = 0;
1217     net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1;
1218
1219     /* Control parameter - ignore bogus broadcast responses? */
1220     net->ipv4.sysctl_icmp_ignore_bogus_error_responses = 1;
1221
1222     /*
1223      *  Configurable global rate limit.
1224      *
1225      *  ratelimit defines tokens/packet consumed for dst->rate_token
1226      *  bucket ratemask defines which icmp types are ratelimited by
1227      *  setting it's bit position.
1228      *
1229      *  default:
1230      *  dest unreachable (3), source quench (4),
1231      *  time exceeded (11), parameter problem (12)
1232      */
1233
1234     net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
1235     net->ipv4.sysctl_icmp_ratemask = 0x1818;
1236     net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
1237
1238     return 0;
1239
1240 fail:
1241     for_each_possible_cpu(i)
1242         inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
1243     free_percpu(net->ipv4.icmp_sk);
1244     return err;
1245 }

http://blog.csdn.net/lickylin/article/details/24208463

6.init_inet_pernet_ops()

它最後呼叫inet_init_net(struct net *net)完成以下網路名稱空間的初始化,每一個網路裝置都會有一個網路名稱空間,比如eth1, ath0等,可以檢視這裡瞭解。

7. init_ipv4_mibs()

前面說了,每一個網路介面都對應著一個網路名稱空間,這些網路空間串聯在一個由”struct net”物件組成的結構體中,該函式完成這些空間資料統計的一些資訊初始化。

static __net_init int ipv4_mib_init_net(struct net *net)
{
        int i;

        net->mib.tcp_statistics = alloc_percpu(struct tcp_mib);
        if (!net->mib.tcp_statistics)
                goto err_tcp_mib;
        net->mib.ip_statistics = alloc_percpu(struct ipstats_mib);
        if (!net->mib.ip_statistics)
                goto err_ip_mib;

        for_each_possible_cpu(i) {
                struct ipstats_mib *af_inet_stats;
                af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i);
                u64_stats_init(&af_inet_stats->syncp);
        }

        net->mib.net_statistics = alloc_percpu(struct linux_mib);
        if (!net->mib.net_statistics)
                goto err_net_mib;
        net->mib.udp_statistics = alloc_percpu(struct udp_mib);
        if (!net->mib.udp_statistics)
                goto err_udp_mib;
        net->mib.udplite_statistics = alloc_percpu(struct udp_mib);
        if (!net->mib.udplite_statistics)
                goto err_udplite_mib;
        net->mib.icmp_statistics = alloc_percpu(struct icmp_mib);
        if (!net->mib.icmp_statistics)
                goto err_icmp_mib;
        net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
                                              GFP_KERNEL);
        if (!net->mib.icmpmsg_statistics)
                goto err_icmpmsg_mib;

        tcp_mib_init(net);

8. ipfrag_init()

IP分片的初始化,其中指定了對應動作所呼叫的函式

void __init ipfrag_init(void)
{
        ip4_frags_ctl_register();
        register_pernet_subsys(&ip4_frags_ops);
        ip4_frags.hashfn = ip4_hashfn;
        ip4_frags.constructor = ip4_frag_init;
        ip4_frags.destructor = ip4_frag_free;
        ip4_frags.qsize = sizeof(struct ipq);
        ip4_frags.match = ip4_frag_match;
        ip4_frags.frag_expire = ip_expire;
        ip4_frags.frags_cache_name = ip_frag_cache_name;
        if (inet_frags_init(&ip4_frags))
                panic("IP: failed to allocate ip4_frags cache\n");
}

9. ipv4_proc_init()

在proc的ipv4目錄下,會根據不同的協議建立不同的檔案

static int __init ipv4_proc_init(void)
{
        int rc = 0;

        if (raw_proc_init())
                goto out_raw;
        if (tcp4_proc_init())
                goto out_tcp;
        if (udp4_proc_init())
                goto out_udp;
        if (ping_proc_init())
                goto out_ping;
        if (ip_misc_proc_init())
                goto out_misc;


root:/proc/213/net# cat tcp
  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode                                                     
   0: 00000000:8240 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 18057 1 d9aa0000 100 0 0 10 0                             
   1: FA01A8C0:C000 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 10027 1 d9828000 100 0 0 10 0                             
   2: 00000000:4E25 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 18624 1 d86b8000 100 0 0 10 0                             
   3: 00000000:0D05 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 3566 1 de690500 100 0 0 10 0                              
   4: 0100007F:1E6B 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 16883 1 d9828500 100 0 0 10 0                             
   5: 00000000:0050 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 3548 1 de690000 100 0 0 10 0                              
   6: 00000000:15B3 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 10551287 1 d86bc600 100 0 0 10 0                          
   7: 00000000:0277 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 18632 1 d86b8500 100 0 0 10 0                             
   8: 0100007F:DAEA 0100007F:0050 06 00000000:00000000 03:00000640 00000000     0        0 0 3 d86c7d80                                              
   9: FA01A8C0:A668 0A01A8C0:0B35 06 00000000:00000000 03:000011B2 00000000     0        0 0 3 de52c000                                              
  10: FA01A8C0:A664 0A01A8C0:0B35 06 00000000:00000000 03:0000107F 00000000     0        0 0 3 de52ca80                                              
  11: FA01A8C0:A669 0A01A8C0:0B35 06 00000000:00000000 03:000011B1 00000000     0        0 0 3 de52c180                                              
  12: FA01A8C0:A66A 0A01A8C0:0B35 06 00000000:00000000 03:000011B1 00000000     0        0 0 3 de52c480                                              
  13: FA01A8C0:A670 0A01A8C0:0B35 06 00000000:00000000 03:000014B9 00000000     0        0 0 3 d9fb3cc0                                              
  14: FA01A8C0:A64B 0A01A8C0:0B35 06 00000000:00000000 03:00000216 00000000     0        0 0 3 d9fb3e40 

也許你會問路徑裡面的213是什麼,應該是不同的name space 吧