學習Linux-4.12核心網路協議棧(1.6)——協議棧的初始化(inet_init實現過程)
這篇文章主要分析inet_init()函式的實現過程:
1796 static int __init inet_init(void) 1797 { 1798 struct inet_protosw *q; 1799 struct list_head *r; 1800 int rc = -EINVAL; 1801 1802 sock_skb_cb_check_size(sizeof(struct inet_skb_parm)); 1803 1804 rc = proto_register(&tcp_prot, 1); 1805 if (rc) 1806 goto out; 1807 1808 rc = proto_register(&udp_prot, 1); 1809 if (rc) 1810 goto out_unregister_tcp_proto; 1811 1812 rc = proto_register(&raw_prot, 1); 1813 if (rc) 1814 goto out_unregister_udp_proto; 1815 1816 rc = proto_register(&ping_prot, 1); 1817 if (rc) 1818 goto out_unregister_raw_proto; 1820 /* 1821 * Tell SOCKET that we are alive... 1822 */ 1823 1824 (void)sock_register(&inet_family_ops); 1825 1826 #ifdef CONFIG_SYSCTL 1827 ip_static_sysctl_init(); 1828 #endif 1829 1830 /* 1831 * Add all the base protocols. 1832 */ 1833 1834 if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0) 1835 pr_crit("%s: Cannot add ICMP protocol\n", __func__); 1836 if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0) 1837 pr_crit("%s: Cannot add UDP protocol\n", __func__); 1838 if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0) 1839 pr_crit("%s: Cannot add TCP protocol\n", __func__); 1840 #ifdef CONFIG_IP_MULTICAST 1841 if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0) 1842 pr_crit("%s: Cannot add IGMP protocol\n", __func__); 1843 #endif 1844 1845 /* Register the socket-side information for inet_create. */ 1846 for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r) 1847 INIT_LIST_HEAD(r); 1848 1849 for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q) 1850 inet_register_protosw(q); 1852 /* 1853 * Set the ARP module up 1854 */ 1855 1856 arp_init(); 1857 1858 /* 1859 * Set the IP module up 1860 */ 1861 1862 ip_init(); 1863 1864 /* Setup TCP slab cache for open requests. */ 1865 tcp_init(); 1866 1867 /* Setup UDP memory threshold */ 1868 udp_init(); 1869 1870 /* Add UDP-Lite (RFC 3828) */ 1871 udplite4_register(); 1872 1873 ping_init(); 1874 1875 /* 1876 * Set the ICMP layer up 1877 */ 1878 1879 if (icmp_init() < 0) 1880 panic("Failed to create the ICMP control socket.\n"); 1881 1882 /* 1883 * Initialise the multicast router 1884 */ 1885 #if defined(CONFIG_IP_MROUTE) 1886 if (ip_mr_init()) 1887 pr_crit("%s: Cannot init ipv4 mroute\n", __func__); 1888 #endif 1889 1890 if (init_inet_pernet_ops()) 1891 pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); 1892 /* 1893 * Initialise per-cpu ipv4 mibs 1894 */ 1895 1896 if (init_ipv4_mibs()) 1897 pr_crit("%s: Cannot init ipv4 mibs\n", __func__); 1898 1899 ipv4_proc_init(); 1900 1901 ipfrag_init(); 1902 1903 dev_add_pack(&ip_packet_type); 1904 1905 ip_tunnel_core_init(); 1906 1907 rc = 0; 1908 out: 1909 return rc; 1910 out_unregister_raw_proto: 1911 proto_unregister(&raw_prot); 1912 out_unregister_udp_proto: 1913 proto_unregister(&udp_prot); 1914 out_unregister_tcp_proto: 1915 proto_unregister(&tcp_prot); 1916 goto out; 1917 } 1918 1919 fs_initcall(inet_init);
其他函式在前面的都介紹過了,這篇文章我們主要介紹下面幾個函式:
1856 arp_init(); 1857 1858 /* 1859 * Set the IP module up 1860 */ 1861 1862 ip_init(); 1863 1864 /* Setup TCP slab cache for open requests. */ 1865 tcp_init(); 1866 1867 /* Setup UDP memory threshold */ 1868 udp_init(); 1869 1870 /* Add UDP-Lite (RFC 3828) */ 1871 udplite4_register(); 1872 1873 ping_init(); 1874 1875 /* 1876 * Set the ICMP layer up 1877 */ 1878 1879 if (icmp_init() < 0) 1880 panic("Failed to create the ICMP control socket.\n"); 1881 1882 /* 1883 * Initialise the multicast router 1884 */ 1885 #if defined(CONFIG_IP_MROUTE) 1886 if (ip_mr_init()) 1887 pr_crit("%s: Cannot init ipv4 mroute\n", __func__); 1888 #endif 1889 1890 if (init_inet_pernet_ops()) 1891 pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); 1892 /* 1893 * Initialise per-cpu ipv4 mibs 1894 */ 1895 1896 if (init_ipv4_mibs()) 1897 pr_crit("%s: Cannot init ipv4 mibs\n", __func__); 1898 1899 ipv4_proc_init(); 1900 1901 ipfrag_init(); 1902 1903 dev_add_pack(&ip_packet_type); 1904 1905 ip_tunnel_core_init();
1. arp_init()
1281 void __init arp_init(void) 1282 { 1283 neigh_table_init(NEIGH_ARP_TABLE, &arp_tbl); 1284 1285 dev_add_pack(&arp_packet_type); 1286 arp_proc_init(); 1287 #ifdef CONFIG_SYSCTL 1288 neigh_sysctl_register(NULL, &arp_tbl.parms, NULL); 1289 #endif 1290 register_netdevice_notifier(&arp_netdev_notifier); 1291 }
這個函式比較簡單,就是將arp的協議型別新增到ptype_base[]裡面,當IP層收到arp包的時候呼叫arp_rcv()函式進行處理。
同時也完成/proc/net/arp檔案的初始化。
2. ip_init()
//ip子系統初始化
//呼叫路徑inet_init->ip_init
//包括路由子系統的初始化,inet_peer快取的初始化
void __init ip_init(void)
{
ip_rt_init(); //路由子系統初始化
inet_initpeers();//inet_peer快取
#if defined(CONFIG_IP_MULTICAST)
igmp_mc_init();
#endif
}
//呼叫路徑ip_init->inet_initpeers
//inet_peer快取初始化
// 1.每一個與linux通過inet協議交換過資料的主機,都會被認為是一個ip peer,linux為每個peer分配一個inet_peer結構。
// 2.inet_peer的主要目的,使不同ip使用不同的ip id生成器。
// 3.系統中所有的inet_peer例項組織成一課avl樹,方便查詢。
1.2 void __init inet_initpeers(void)
{
struct sysinfo si;
//獲取系統記憶體資訊
si_meminfo(&si);
//系統中inet_peer數量的閥值
if (si.totalram <= (32768*1024)/PAGE_SIZE)
inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */
if (si.totalram <= (16384*1024)/PAGE_SIZE)
inet_peer_threshold >>= 1; /* about 512KB */
if (si.totalram <= (8192*1024)/PAGE_SIZE)
inet_peer_threshold >>= 2; /* about 128KB */
//inet_peer SLAB cache
peer_cachep = kmem_cache_create("inet_peer_cache",
sizeof(struct inet_peer),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
//垃圾回收
peer_periodic_timer.expires = jiffies
+ net_random() % inet_peer_gc_maxtime
+ inet_peer_gc_maxtime;
add_timer(&peer_periodic_timer);
}
3. tcp_init()
tcp的初始化內容比較多,後面單獨講
4. udp_init()
2653 void __init udp_init(void)
2654 {
2655 unsigned long limit;
2656 unsigned int i;
2657
2658 udp_table_init(&udp_table, "UDP");
2659 limit = nr_free_buffer_pages() / 8;
2660 limit = max(limit, 128UL);
2661 sysctl_udp_mem[0] = limit / 4 * 3;
2662 sysctl_udp_mem[1] = limit;
2663 sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
2664
2665 sysctl_udp_rmem_min = SK_MEM_QUANTUM;
2666 sysctl_udp_wmem_min = SK_MEM_QUANTUM;
2667
2668 /* 16 spinlocks per cpu */
2669 udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
2670 udp_busylocks = kmalloc(sizeof(spinlock_t) << udp_busylocks_log,
2671 GFP_KERNEL);
2672 if (!udp_busylocks)
2673 panic("UDP: failed to alloc udp_busylocks\n");
2674 for (i = 0; i < (1U << udp_busylocks_log); i++)
2675 spin_lock_init(udp_busylocks + i);
2676 }
這裡主要完成udp_table的初始化:
67 /**
68 * struct udp_table - UDP table
69 *
70 * @hash: hash table, sockets are hashed on (local port)
71 * @hash2: hash table, sockets are hashed on (local port, local address)
72 * @mask: number of slots in hash tables, minus 1
73 * @log: log2(number of slots in hash table)
74 */
75 struct udp_table {
76 struct udp_hslot *hash;
77 struct udp_hslot *hash2;
78 unsigned int mask;
79 unsigned int log;
80 };
這個函式主要建立了兩個hash表,用於串聯本地套接字對應的埠號5. icmp_init()
1252 int __init icmp_init(void)
1253 {
1254 return register_pernet_subsys(&icmp_sk_ops);
1255 }
該函式的主要作用是將一個網路協議模組新增到每一個網路命令空間中,然後再執行其ops->init程式進行初始化,一般其ops->init會在其對應的proc目錄下,生成一個網路協議模組對應的proc檔案或proc目錄,並執行一些協議初始化相關的函式。
1247 static struct pernet_operations __net_initdata icmp_sk_ops = {
1248 .init = icmp_sk_init,
1249 .exit = icmp_sk_exit,
1250 };
1185 static int __net_init icmp_sk_init(struct net *net)
1186 {
1187 int i, err;
1188
1189 net->ipv4.icmp_sk = alloc_percpu(struct sock *);
1190 if (!net->ipv4.icmp_sk)
1191 return -ENOMEM;
1192
1193 for_each_possible_cpu(i) {
1194 struct sock *sk;
1195
1196 err = inet_ctl_sock_create(&sk, PF_INET,
1197 SOCK_RAW, IPPROTO_ICMP, net);
1198 if (err < 0)
1199 goto fail;
1200
1201 *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk;
1202
1203 /* Enough space for 2 64K ICMP packets, including
1204 * sk_buff/skb_shared_info struct overhead.
1205 */
1206 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1207
1208 /*
1209 * Speedup sock_wfree()
1210 */
1211 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1212 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
1213 }
1214
1215 /* Control parameters for ECHO replies. */
1216 net->ipv4.sysctl_icmp_echo_ignore_all = 0;
1217 net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1;
1218
1219 /* Control parameter - ignore bogus broadcast responses? */
1220 net->ipv4.sysctl_icmp_ignore_bogus_error_responses = 1;
1221
1222 /*
1223 * Configurable global rate limit.
1224 *
1225 * ratelimit defines tokens/packet consumed for dst->rate_token
1226 * bucket ratemask defines which icmp types are ratelimited by
1227 * setting it's bit position.
1228 *
1229 * default:
1230 * dest unreachable (3), source quench (4),
1231 * time exceeded (11), parameter problem (12)
1232 */
1233
1234 net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
1235 net->ipv4.sysctl_icmp_ratemask = 0x1818;
1236 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
1237
1238 return 0;
1239
1240 fail:
1241 for_each_possible_cpu(i)
1242 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
1243 free_percpu(net->ipv4.icmp_sk);
1244 return err;
1245 }
http://blog.csdn.net/lickylin/article/details/24208463
6.init_inet_pernet_ops()
它最後呼叫inet_init_net(struct net *net)完成以下網路名稱空間的初始化,每一個網路裝置都會有一個網路名稱空間,比如eth1, ath0等,可以檢視這裡瞭解。
7. init_ipv4_mibs()
前面說了,每一個網路介面都對應著一個網路名稱空間,這些網路空間串聯在一個由”struct net”物件組成的結構體中,該函式完成這些空間資料統計的一些資訊初始化。
static __net_init int ipv4_mib_init_net(struct net *net)
{
int i;
net->mib.tcp_statistics = alloc_percpu(struct tcp_mib);
if (!net->mib.tcp_statistics)
goto err_tcp_mib;
net->mib.ip_statistics = alloc_percpu(struct ipstats_mib);
if (!net->mib.ip_statistics)
goto err_ip_mib;
for_each_possible_cpu(i) {
struct ipstats_mib *af_inet_stats;
af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i);
u64_stats_init(&af_inet_stats->syncp);
}
net->mib.net_statistics = alloc_percpu(struct linux_mib);
if (!net->mib.net_statistics)
goto err_net_mib;
net->mib.udp_statistics = alloc_percpu(struct udp_mib);
if (!net->mib.udp_statistics)
goto err_udp_mib;
net->mib.udplite_statistics = alloc_percpu(struct udp_mib);
if (!net->mib.udplite_statistics)
goto err_udplite_mib;
net->mib.icmp_statistics = alloc_percpu(struct icmp_mib);
if (!net->mib.icmp_statistics)
goto err_icmp_mib;
net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
GFP_KERNEL);
if (!net->mib.icmpmsg_statistics)
goto err_icmpmsg_mib;
tcp_mib_init(net);
8. ipfrag_init()
IP分片的初始化,其中指定了對應動作所呼叫的函式
void __init ipfrag_init(void)
{
ip4_frags_ctl_register();
register_pernet_subsys(&ip4_frags_ops);
ip4_frags.hashfn = ip4_hashfn;
ip4_frags.constructor = ip4_frag_init;
ip4_frags.destructor = ip4_frag_free;
ip4_frags.qsize = sizeof(struct ipq);
ip4_frags.match = ip4_frag_match;
ip4_frags.frag_expire = ip_expire;
ip4_frags.frags_cache_name = ip_frag_cache_name;
if (inet_frags_init(&ip4_frags))
panic("IP: failed to allocate ip4_frags cache\n");
}
9. ipv4_proc_init()
在proc的ipv4目錄下,會根據不同的協議建立不同的檔案
static int __init ipv4_proc_init(void)
{
int rc = 0;
if (raw_proc_init())
goto out_raw;
if (tcp4_proc_init())
goto out_tcp;
if (udp4_proc_init())
goto out_udp;
if (ping_proc_init())
goto out_ping;
if (ip_misc_proc_init())
goto out_misc;
root:/proc/213/net# cat tcp
sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode
0: 00000000:8240 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 18057 1 d9aa0000 100 0 0 10 0
1: FA01A8C0:C000 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 10027 1 d9828000 100 0 0 10 0
2: 00000000:4E25 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 18624 1 d86b8000 100 0 0 10 0
3: 00000000:0D05 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 3566 1 de690500 100 0 0 10 0
4: 0100007F:1E6B 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 16883 1 d9828500 100 0 0 10 0
5: 00000000:0050 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 3548 1 de690000 100 0 0 10 0
6: 00000000:15B3 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 10551287 1 d86bc600 100 0 0 10 0
7: 00000000:0277 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 18632 1 d86b8500 100 0 0 10 0
8: 0100007F:DAEA 0100007F:0050 06 00000000:00000000 03:00000640 00000000 0 0 0 3 d86c7d80
9: FA01A8C0:A668 0A01A8C0:0B35 06 00000000:00000000 03:000011B2 00000000 0 0 0 3 de52c000
10: FA01A8C0:A664 0A01A8C0:0B35 06 00000000:00000000 03:0000107F 00000000 0 0 0 3 de52ca80
11: FA01A8C0:A669 0A01A8C0:0B35 06 00000000:00000000 03:000011B1 00000000 0 0 0 3 de52c180
12: FA01A8C0:A66A 0A01A8C0:0B35 06 00000000:00000000 03:000011B1 00000000 0 0 0 3 de52c480
13: FA01A8C0:A670 0A01A8C0:0B35 06 00000000:00000000 03:000014B9 00000000 0 0 0 3 d9fb3cc0
14: FA01A8C0:A64B 0A01A8C0:0B35 06 00000000:00000000 03:00000216 00000000 0 0 0 3 d9fb3e40
也許你會問路徑裡面的213是什麼,應該是不同的name space 吧