1. 程式人生 > >netfilter連線跟蹤初始化

netfilter連線跟蹤初始化

連線跟蹤的初始化主要有三個地方

(1)連線跟蹤本身初始化。

(3)在對應的Hook上註冊連線跟蹤的處理函式。

(4)初始化連線跟蹤和三層協議、四層協議相關的函式。

1、連線跟蹤本身初始化

連線跟蹤本身初始化函式是nf_conntrack_net_init主要做兩件事情:為連線跟蹤分配slab緩衝、初始化後proc檔案系統

1.1、nf_conntrack_net_init

nf_conntrack_net_init是連線跟蹤初始化的入口函式,主要呼叫nf_conntrack_init函式分配slab緩衝,呼叫nf_conntrack_standalone_init_proc初始化proc檔案系統。

static int nf_conntrack_net_init(struct net *net)
{
	int ret;

	/*初始化*/
	ret = nf_conntrack_init(net);
	if (ret < 0)
		goto out_init;
	/*proc檔案系統初始化*/
	ret = nf_conntrack_standalone_init_proc(net);
	if (ret < 0)
		goto out_proc;
	net->ct.sysctl_checksum = 1;
	net->ct.sysctl_log_invalid = 0;
	ret = nf_conntrack_standalone_init_sysctl(net);
	if (ret < 0)
		goto out_sysctl;
	return 0;

out_sysctl:
	nf_conntrack_standalone_fini_proc(net);
out_proc:
	nf_conntrack_cleanup(net);
out_init:
	return ret;
}

1.2、nf_conntrack_init

nf_conntrack_init主要呼叫nf_conntrack_init_init_ net

int nf_conntrack_init(struct net *net)
{
	int ret;

	if (net_eq(net, &init_net)) {
		ret = nf_conntrack_init_init_net();
		if (ret < 0)
			goto out_init_net;
	}
	ret = nf_conntrack_init_net(net);
	if (ret < 0)
		goto out_net;

	if (net_eq(net, &init_net)) {
		/* For use by REJECT target */
		rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
		rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);

		/* Howto get NAT offsets */
		rcu_assign_pointer(nf_ct_nat_offset, NULL);
	}
	return 0;

out_net:
	if (net_eq(net, &init_net))
		nf_conntrack_cleanup_init_net();
out_init_net:
	return ret;
}

1.3、nf_conntrack_init_init_net

static int nf_conntrack_init_init_net(void)
{
	int max_factor = 8;
	int ret;

	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
	 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
	if (!nf_conntrack_htable_size) {
		/*取記憶體的16384分之一*/
		nf_conntrack_htable_size
			= (((totalram_pages << PAGE_SHIFT) / 16384)
			   / sizeof(struct hlist_head));
		/*記憶體大於1G則取16384*/
		if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
			nf_conntrack_htable_size = 16384;
		if (nf_conntrack_htable_size < 32)
			nf_conntrack_htable_size = 32;

		/* Use a max. factor of four by default to get the same max as
		 * with the old struct list_heads. When a table size is given
		 * we use the old value of 8 to avoid reducing the max.
		 * entries. */
		max_factor = 4;
	}
	nf_conntrack_max = max_factor * nf_conntrack_htable_size;

	printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
	       nf_conntrack_max);

	/*初始化三層協議陣列nf_ct_l3protos*/
	ret = nf_conntrack_proto_init();
	if (ret < 0)
		goto err_proto;

	ret = nf_conntrack_helper_init();
	if (ret < 0)
		goto err_helper;

#ifdef CONFIG_NF_CONNTRACK_ZONES
	ret = nf_ct_extend_register(&nf_ct_zone_extend);
	if (ret < 0)
		goto err_extend;
#endif
	/* Set up fake conntrack: to never be deleted, not in any hashes */
#ifdef CONFIG_NET_NS
	nf_conntrack_untracked.ct_net = &init_net;
#endif
	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
	/*  - and look it like as a confirmed connection */
	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);

	return 0;

#ifdef CONFIG_NF_CONNTRACK_ZONES
err_extend:
	nf_conntrack_helper_fini();
#endif
err_helper:
	nf_conntrack_proto_fini();
err_proto:
	return ret;
}

2、註冊連線跟蹤的hook函式

2.1、ipv4_defrag_ops

在PREROUTING和OUT鏈上註冊ipv4_conntrack_defrag,這個函式主要是對資料包進行分片操作,PREROUTING和OUT鏈都是netflter框架的兩個入口,一個是接受外界的資料入口,一個是本機產生資料包的入口。

static struct nf_hook_ops ipv4_defrag_ops[] = {
	{
		/*對資料進行分片*/
		.hook		= ipv4_conntrack_defrag,
		.owner		= THIS_MODULE,
		.pf		= PF_INET,
		.hooknum	= NF_INET_PRE_ROUTING,
		.priority	= NF_IP_PRI_CONNTRACK_DEFRAG,
	},
	{
		.hook           = ipv4_conntrack_defrag,
		.owner          = THIS_MODULE,
		.pf             = PF_INET,
		.hooknum        = NF_INET_LOCAL_OUT,
		.priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
	},
};

2.2、ipv4_conntrack_ops

netfilter框架有兩個入口(PREROUTING鏈、OUT鏈),兩個出口(LOCAL_IN鏈、POSTROUTING鏈),PREROUTING是接受外界資料包進入的第一個鏈,OUT鏈是本機產生資料包進入的第一個鏈。LOCAL_IN是本機接受資料包的最後一個鏈,POSTROUTING是資料包前送最後一個鏈。所以在PREROUTING、OUT鏈上註冊ipvr_conntrack_in函式建立連線跟蹤,在LOCAL_IN和POSTROUTING鏈上註冊ipv4_confirm確認一條連線跟蹤。

static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
	{
		/*剛進入netfilter框架在第一個PREROUTEING鏈上建立連線跟蹤*/
		.hook		= ipv4_conntrack_in,
		.owner		= THIS_MODULE,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_PRE_ROUTING,
		.priority	= NF_IP_PRI_CONNTRACK,
	},
	{
		/*本機產生的資料包在OUT鏈上建立連線跟蹤*/
		.hook		= ipv4_conntrack_local,
		.owner		= THIS_MODULE,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_LOCAL_OUT,
		.priority	= NF_IP_PRI_CONNTRACK,
	},
	{
		/*資料包最後出去在POSTROUTING鏈上連線跟蹤確認*/
		.hook		= ipv4_confirm,
		.owner		= THIS_MODULE,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_POST_ROUTING,
		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
	},
	{
		/*在LOCAL_IN鏈進入本機的資料連線跟蹤確認*/
		.hook		= ipv4_confirm,
		.owner		= THIS_MODULE,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_LOCAL_IN,
		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
	},
};

2.3、註冊hook

呼叫nf_register_hooks註冊連線跟蹤的hook函式

static int __init nf_conntrack_l3proto_ipv4_init(void)
{

    ...
      /*註冊連線跟蹤的hook處理函式*/
    ret = nf_register_hooks(ipv4_conntrack_ops,
				ARRAY_SIZE(ipv4_conntrack_ops));
	if (ret < 0) {
		pr_err("nf_conntrack_ipv4: can't register hooks.\n");
		goto cleanup_ipv4;
	}
    ...
}

nf_register_hooks函式

int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
{
	unsigned int i;
	int err = 0;

	for (i = 0; i < n; i++) {
		err = nf_register_hook(&reg[i]);
		if (err)
			goto err;
	}
	return err;

err:
	if (i > 0)
		nf_unregister_hooks(reg, i);
	return err;
}

nf_register_hook函式

int nf_register_hook(struct nf_hook_ops *reg)
{
	struct nf_hook_ops *elem;
	int err;

	err = mutex_lock_interruptible(&nf_hook_mutex);
	if (err < 0)
		return err;
	//nf_hooks二維陣列,一維座標是協議號,二維座標是鏈
	list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
		//按優先順序插入
		if (reg->priority < elem->priority)
			break;
	}
	list_add_rcu(&reg->list, elem->list.prev);
	mutex_unlock(&nf_hook_mutex);
	return 0;
}

nf_hooks是一個二維陣列連結串列

extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];

3、註冊三層、四層相關的處理函式

ip、icmp、udp協議都一樣這裡就舉例tcp協議

呼叫nf_conntrack_l4proto_register函式註冊nf_conntrack_l4proto_tcp4到全域性陣列nf_ct_protos中

static int __init nf_conntrack_l3proto_ipv4_init(void)
{
...

	/*註冊tcp協議和連線相關處理函式到nf_ct_protos*/
	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4);
	if (ret < 0) {
		pr_err("nf_conntrack_ipv4: can't register tcp.\n");
		goto cleanup_sockopt;
	}
	/*註冊udp協議和連線相關處理函式到nf_ct_protos*/
	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
	if (ret < 0) {
		pr_err("nf_conntrack_ipv4: can't register udp.\n");
		goto cleanup_tcp;
	}
	/*註冊icmp協議和連線相關處理函式到nf_ct_protos*/
	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
	if (ret < 0) {
		pr_err("nf_conntrack_ipv4: can't register icmp.\n");
		goto cleanup_udp;
	}
	/*註冊ip協議和連線相關處理函式到nf_ct_protos*/
	ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
	if (ret < 0) {
		pr_err("nf_conntrack_ipv4: can't register ipv4\n");
		goto cleanup_icmp;
	}
...

}

nf_conntrack_l4proto_register

int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
{
...
    /*註冊到全域性資料nf_ct_protos陣列*/
	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
			   l4proto);

out_unlock:
	mutex_unlock(&nf_ct_proto_mutex);
	return ret;
}

nf_conntrack_l4proto_tcp4

struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
{
	.l3proto		= PF_INET,
	.l4proto 		= IPPROTO_TCP,
	.name 			= "tcp",
	/*從tcp協議skb中獲取一個tuple的源埠、目的埠*/
	.pkt_to_tuple 		= tcp_pkt_to_tuple,
	/*tcp協議把orig方向的源埠、目的埠賦值給reply方向的目的埠、源埠*/
	.invert_tuple 		= tcp_invert_tuple,
	/*列印輸出tuple的源埠、目的埠*/
	.print_tuple 		= tcp_print_tuple,
	.print_conntrack 	= tcp_print_conntrack,
	.packet 		= tcp_packet,
	.new 			= tcp_new,
	.error			= tcp_error,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
	/*tuple中的源埠、目的埠轉換為nfnetlink格式填充*/
	.to_nlattr		= tcp_to_nlattr,
	.nlattr_size		= tcp_nlattr_size,
	/*nfnetlink格式的源埠、目的埠轉換為tuple*/
	.from_nlattr		= nlattr_to_tcp,
	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
	.nla_policy		= nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
	.ctl_table_users	= &tcp_sysctl_table_users,
	.ctl_table_header	= &tcp_sysctl_header,
	.ctl_table		= tcp_sysctl_table,
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
	.ctl_compat_table	= tcp_compat_sysctl_table,
#endif
#endif
};

tcp_pkt_to_tuple從資料包skb中獲取源埠、目的埠

static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
			     struct nf_conntrack_tuple *tuple)
{
	const struct tcphdr *hp;
	struct tcphdr _hdr;

	/* Actually only need first 8 bytes. */
	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
	if (hp == NULL)
		return false;
	/*獲取源埠*/
	tuple->src.u.tcp.port = hp->source;
	/*獲取目的埠*/
	tuple->dst.u.tcp.port = hp->dest;

	return true;
}

tcp_invert_tuple將orig方向的端賦值給reply方向

static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
			     const struct nf_conntrack_tuple *orig)
{
	/*將orig方向的埠賦值給reply方向*/
	tuple->src.u.tcp.port = orig->dst.u.tcp.port;
	tuple->dst.u.tcp.port = orig->src.u.tcp.port;
	return true;
}