1. 程式人生 > >kernel網絡協議棧(1) 初始化

kernel網絡協議棧(1) 初始化

c語言 fun 模型 做了 _array down ram www lag

今天打算開始寫linux內核網絡協議棧相關的文章. 不能保證一定會更新完畢,有時間有興趣的時候,會寫一寫。 2019.4.26 / fluray / 內核版本 5.0.9 原創文章,轉載請著名出處。 https://www.cnblogs.com/fluray/p/10773637.html 首先要知道c語言實現的最基本的TCP C/S的模型/代碼。 當我們在 Linux 下使用下列函數 創建套節字的時候,發生了什麽?
int socket(int af, int type, int protocol);
我們看socket這個syscall的調用棧:
SYSCALL_DEFINE3(socket, int
, family, int, type, int, protocol) { return __sys_socket(family, type, protocol); } //__sys_socket sock_create(family, type, protocol, &sock); __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); //讀 net_families[family] pf = rcu_dereference(net_families[family]); ... pf
->create(net, sock, protocol, kern); inet_create //在inet_create中有下列代碼 if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); tcp_v4_init_sock
這裏有2個問題(實質都是何時被初始化的問題) 1.pf = rcu_dereference(net_families[family])中的net_families數組何時被初始化? 換句話說 為什麽pf->create 是 inet_create ? 2.sk->sk_prot->init 為何是 tcp_v4_init_sock
首先了解一下linux kernel初始化時,tcp初始化相關的部分:
static int __init inet_init(void)
{
    struct inet_protosw *q;
    ...
    rc = proto_register(&tcp_prot, 1);//1
    ...
    rc = proto_register(&udp_prot, 1);
    ...
    (void)sock_register(&inet_family_ops);//2

    if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
        pr_crit("%s: Cannot add ICMP protocol\n", __func__);
    if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
        pr_crit("%s: Cannot add UDP protocol\n", __func__);
    if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)//3
        pr_crit("%s: Cannot add TCP protocol\n", __func__);
    ...
    /* Register the socket-side information for inet_create. */
    for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)//4
        INIT_LIST_HEAD(r);

    for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)//5
        inet_register_protosw(q); //這個函數初始化了 inetsw
    ...
}
這裏面有幾個關鍵的結構體,我做了標號,我們先看一下,知道大體上有那些成員即可
第一個:
struct proto tcp_prot = {
    .name            = "TCP",
    .owner            = THIS_MODULE,
    .close            = tcp_close,
    .pre_connect    = tcp_v4_pre_connect,
    .connect        = tcp_v4_connect,
    .disconnect        = tcp_disconnect,
    .accept            = inet_csk_accept,
    .ioctl            = tcp_ioctl,
    .init            = tcp_v4_init_sock,//初始化函數,在哪裏被調用呢?
    .destroy        = tcp_v4_destroy_sock,
    .shutdown        = tcp_shutdown,
    .setsockopt        = tcp_setsockopt,
    .getsockopt        = tcp_getsockopt,
    .keepalive        = tcp_set_keepalive,
    .recvmsg        = tcp_recvmsg,
    .sendmsg        = tcp_sendmsg,
    .sendpage        = tcp_sendpage,
    ...
};
EXPORT_SYMBOL(tcp_prot);
第二個:
static const struct net_proto_family inet_family_ops = {
    .family = PF_INET,
    .create = inet_create,//註意這個函數
    .owner    = THIS_MODULE,
};
//這個對象在註冊的時候,做了什麽?
(void)sock_register(&inet_family_ops);
//寫入到 net_families[family]中
        rcu_assign_pointer(net_families[ops->family], ops);

第三個:
static struct net_protocol tcp_protocol = {
    ...
    .handler    =    tcp_v4_rcv,
    ...
};
第五個:
static struct inet_protosw inetsw_array[] =
{
    {
        .type =       SOCK_STREAM,
        .protocol =   IPPROTO_TCP,
        .prot =       &tcp_prot,//註意這個函數
        .ops =        &inet_stream_ops,
        .flags =      INET_PROTOSW_PERMANENT |
                  INET_PROTOSW_ICSK,
    },

    {
        .type =       SOCK_DGRAM,
        .protocol =   IPPROTO_UDP,
        .prot =       &udp_prot,
        .ops =        &inet_dgram_ops,
        .flags =      INET_PROTOSW_PERMANENT,
       },
    ...
}
那麽第四個 struct list_head inetsw[SOCK_MAX]; 這個數組何時被寫入有效數據? inet_register_protosw 這個函數根據 inetsw_array 中的信息來初始化inetsw。
寫入有效數據以後,以後在用的時候,就可以這麽用
    struct inet_protosw *answer;
    ...
    rcu_read_lock();
    list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
        if (protocol == answer->protocol) {
            ...
        }
        ...
    }
    ...
    rcu_read_unlock();

//註:rcu_read_lock rcu_read_unlock 語義上可以認為是讀寫鎖中的讀鎖
問題1的解答:
// inet_create 在哪註冊的?
static const struct net_proto_family inet_family_ops = {
    .family = PF_INET,
    .create = inet_create,
    .owner    = THIS_MODULE,
};
inet_init
    (void)sock_register(&inet_family_ops);
//寫 net_families[family]
        rcu_assign_pointer(net_families[ops->family], ops);

問題二的解答:

static int inet_create(struct net *net, struct socket *sock, int protocol,
               int kern)
{
    struct sock *sk;
    struct inet_protosw *answer;
    struct inet_sock *inet;
    struct proto *answer_prot;
    ...
lookup_protocol:
    err = -ESOCKTNOSUPPORT;
    rcu_read_lock();
//這裏遍歷了 inetsw 這個數組,根據protocol查找
    list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {

        err = 0;
        /* Check the non-wild match. */
        if (protocol == answer->protocol) {
            if (protocol != IPPROTO_IP)
                break;
    ...
//sock_>ops的初始化
//對於tcp來說就是  inet_stream_ops
    sock->ops = answer->ops;  
    answer_prot = answer->prot;
    ...
//sk->sk_prot的初始化 
//對於tcp來說是 tcp_prot
    sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
        sk->sk_prot = sk->sk_prot_creator = prot;

/*
static struct inet_protosw inetsw_array[] =
{
    {
        .type =       SOCK_STREAM,
        .protocol =   IPPROTO_TCP,
        .prot =       &tcp_prot,//註意這個函數
        .ops =        &inet_stream_ops,
        .flags =      INET_PROTOSW_PERMANENT |
                  INET_PROTOSW_ICSK,
    },
*/
    if (sk->sk_prot->init) {
//那麽這裏就是 tcp_v4_init_sock
        err = sk->sk_prot->init(sk);
    ...
    }
}

這裏要記住:

socket持有的是 ops / inet_stream_ops sock持有的是 sk_prot / tcp_prot 以後的文章中會用到。

kernel網絡協議棧(1) 初始化