TCP協議和套接字、IP層之間的介面
1、TCP和套接字層之間的介面
TCP和套接字之間的介面資料結構是struct proto,這個結構體的元素是一系列的函式指標,從tcp_close到tcp_shutdown函式是tcp連線管理處理函式。TCP資料接受函式是tcp_recvmsg和tcp_v4_do_rcv函式實現。struct proto_tcp_prot定義如下:
struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, .close = tcp_close, //套接字關閉 .connect = tcp_v4_connect, //練級處理 .disconnect = tcp_disconnect, //斷開處理 .accept = inet_csk_accept, .ioctl = tcp_ioctl, .init = tcp_v4_init_sock, //初始化套接字 .destroy = tcp_v4_destroy_sock, .shutdown = tcp_shutdown, //立即關閉套接字 .setsockopt = tcp_setsockopt, //設定選項 .getsockopt = tcp_getsockopt, //獲取選項 .recvmsg = tcp_recvmsg, //套接字層接受資料包函式 .backlog_rcv = tcp_v4_do_rcv, .hash = inet_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, .sockets_allocated = &tcp_sockets_allocated, .orphan_count = &tcp_orphan_count, .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem = sysctl_tcp_wmem, .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), .slab_flags = SLAB_DESTROY_BY_RCU, .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, .h.hashinfo = &tcp_hashinfo, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif };
TCP和套接字層的介面資料結構在AF_INET協議族套接字初始化函式inet_init呼叫proto_register函式註冊,
static int __init inet_init(void)
{
...
//註冊tcp協議例項
rc = proto_register(&tcp_prot, 1);
...
}
2、TCP和IP層之間的介面
2.1、TCP和IP層之間的接受介面
TCP協議和IP層的資料結構介面是struct net_protocol,struct net_protocol tcp_protocol如下:
static const struct net_protocol tcp_protocol = { .handler = tcp_v4_rcv, //接受IP層資料包處理函式 .err_handler = tcp_v4_err, //icmp錯誤處理函式 .gso_send_check = tcp_v4_gso_send_check, .gso_segment = tcp_tso_segment, .gro_receive = tcp4_gro_receive, .gro_complete = tcp4_gro_complete, .no_policy = 1, .netns_ok = 1, };
IP層通過ip_local_deliver_finish函式處理後將資料包上傳到傳輸層,是根據協議號proto在inet_protos全域性陣列中找打傳輸層的接受函式。TCP和IP層的結構資料結構struct net_protocol tcp_protocol的註冊在inet_init函式中呼叫inet_add_protocol儲存到全域性陣列inet_protos中。
static int __init inet_init(void) { ... //註冊傳輸層的處理函式到inet_protos全域性陣列中 if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0) printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n"); ... }
2.2、TCP和IP層之間的傳送介面
TCP和IP層之間的傳送介面資料結構是struct inet_connection_sock_af_ops,TCP的的struct inet_connection_sock_af_ops的資料結構的例項是ipv4_specific,包含了一組AF_INET地址族中TCP協議例項操作函式,其目的是實現一組IPv4和IPv6都可以共享TCP和網路層之間的介面。
struct inet_connection_sock_af_ops:
struct inet_connection_sock_af_ops {
int (*queue_xmit)(struct sk_buff *skb); //傳送資料到網路層
void (*send_check)(struct sock *sk, struct sk_buff *skb); //傳送資料段校驗和
int (*rebuild_header)(struct sock *sk); //建立TCP協議頭
int (*conn_request)(struct sock *sk, struct sk_buff *skb);
struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst);
int (*remember_stamp)(struct sock *sk);
u16 net_header_len;
u16 sockaddr_len;
int (*setsockopt)(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen);
int (*getsockopt)(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
#ifdef CONFIG_COMPAT
int (*compat_setsockopt)(struct sock *sk,
int level, int optname,
char __user *optval, unsigned int optlen);
int (*compat_getsockopt)(struct sock *sk,
int level, int optname,
char __user *optval, int __user *optlen);
#endif
void (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
int (*bind_conflict)(const struct sock *sk,
const struct inet_bind_bucket *tb);
};
tcp協議struct inet_connection_sock_af_ops資料結構的例項是ipv4_specific
ipv4_specific:
const struct inet_connection_sock_af_ops ipv4_specific = {
//向IPv4網路層傳送函式
.queue_xmit = ip_queue_xmit,
//計算髮送資料段校驗和
.send_check = tcp_v4_send_check,
//建立TCP頭部
.rebuild_header = inet_sk_rebuild_header,
//處理連線請求資料段
.conn_request = tcp_v4_conn_request,
//從另一端收到SYNACK回答後建立新的子套接字的函式
.syn_recv_sock = tcp_v4_syn_recv_sock,
//儲存從某個站點收到最後一個數據包的時間戳
.remember_stamp = tcp_v4_remember_stamp,
//網路層協議頭的大小,設定為IPv4協議頭長度
.net_header_len = sizeof(struct iphdr),
//設定IPv4在網路層的套接字選項
.setsockopt = ip_setsockopt,
//獲取IPv4在網路層的套接字選項
.getsockopt = ip_getsockopt,
//為IPv4生成常規sockaddr_in型別地址
.addr2sockaddr = inet_csk_addr2sockaddr,
//IPv4的sockaddr_in型別地址大小
.sockaddr_len = sizeof(struct sockaddr_in),
.bind_conflict = inet_csk_bind_conflict,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
#endif
};
ipv4_specific的註冊是呼叫tcp_v4_init_sock函式
static int tcp_v4_init_sock(struct sock *sk)
{
...
icsk->icsk_af_ops = &ipv4_specific;
...
}
3、TCP、套接字、IP層之間介面函式
tcp、套接字、和ip層之間的介面函式關係如下圖,通過這張圖就能清晰知道TCP、套接字層、IP層資料包傳送接受流程。
4、TCP套接字初始化
當應用層開啟一個套接字後就會呼叫tcp_v4_init_sock函式初始化套接字,主要初始化TCP套接字結構,程式碼如下:
static int tcp_v4_init_sock(struct sock *sk)
{
//獲取套接字指標
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
//初始化TCP輸出佇列out_of_order_queue
skb_queue_head_init(&tp->out_of_order_queue);
//初始化傳送超時時鐘
tcp_init_xmit_timers(sk);
//初始化輸入佇列prequeue
tcp_prequeue_init(tp);
//初始化重傳時間isc_rto和介質偏差時間mdev,設定為3秒
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev = TCP_TIMEOUT_INIT;
/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed-ACK and congestion control
* algorithms that we must have the following bandaid to talk
* efficiently to them. -DaveM
*/
tp->snd_cwnd = 2;
/* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values.
*/
//snd_ssthresh設定為32位有效禁止slow start演算法
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
//傳送阻塞視窗最大設定16位
tp->snd_cwnd_clamp = ~0;
//TCP最小段大小536
tp->mss_cache = TCP_MSS_DEFAULT;
//初始化TCP選項結構的重排序域recordering
tp->reordering = sysctl_tcp_reordering;
//初始化inet連線套接字阻塞管理操作函式
icsk->icsk_ca_ops = &tcp_init_congestion_ops;
//此時套接字的狀態還是close
sk->sk_state = TCP_CLOSE;
//指向套接字的回撥函式,當套接字的寫緩衝區有效
//就呼叫該函式
sk->sk_write_space = sk_stream_write_space;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
//註冊ipv4_specific
icsk->icsk_af_ops = &ipv4_specific;
icsk->icsk_sync_mss = tcp_sync_mss;
#ifdef CONFIG_TCP_MD5SIG
tp->af_specific = &tcp_sock_ipv4_specific;
#endif
/* TCP Cookie Transactions */
if (sysctl_tcp_cookie_size > 0) {
/* Default, cookies without s_data_payload. */
tp->cookie_values =
kzalloc(sizeof(*tp->cookie_values),
sk->sk_allocation);
if (tp->cookie_values != NULL)
kref_init(&tp->cookie_values->kref);
}
/* Presumed zeroed, in order of appearance:
* cookie_in_always, cookie_out_never,
* s_data_constant, s_data_in, s_data_out
*/
//設定傳送緩衝區和接受緩衝區大小,
//應用層可以呼叫setsockopt設定
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
local_bh_disable();
//tcp_sockets_allocated是一個全域性變數
//儲存的套接字數量,加1
percpu_counter_inc(&tcp_sockets_allocated);
local_bh_enable();
return 0;
}