a. active模式下,client首先通过控制通道发送PORT消息给server,
此消息中包含了client监听的数据通道端口,server获取此端口,主动
向client发起tcp三次握手建立数据通道来传输数据(server端口
为20,client端口可获取)。
b. passive模式下,client首先通过控制通道发送PASV消息给
server,server进行回应,其中包含了server监听的数据端口,client
获取此端口,向server发起tcp三次握手建立数据通道来传输数据
(server端口可获取,但是client端口未知)。
由此可知,active和passive均是从server的角度来说的,active是server主动发送数据连接,passive是server被动接收数据连接。
netfilter的用法通常是在filter表上设置INPUT,FORWARD和OUTPUT的默认policy为DROP,再根据不同的需求设置相应的规则放行数据包。对于ftp来说,控制通道的双向数据包可以通过设置下面规则:
-A INPUT -p tcp --dport 21 -j ACCEPT
-A INPUT -m state --state ESTABLISHED -j ACCEPT
只有上面两条规则,数据通道的数据包是会被drop的,所以也需要针对数据通道设置规则。
对于active模式,因为server端的源端口20是确定的,也可以设置类似的规则放行。
但是对于passive模式,client的端口是不确定的,没办法提前设置规则。其实也有解决办法,参考这个链接,可以限制passive端口范围,再设置规则即可,但是实际用不到这么多端口,而且有安全风险。
pasv_enable=yes
pasv_max_port=23010
pasv_min_port=23000
iptables -A INPUT -p tcp -m tcp --dport 23000:23010 -j ACCEPT
综合上述,对于数据通道数据来说,需要根据不同的模式设置不同的规则,是比较麻烦的。所以需要helper来创建期望连接,即在控制通道的ct中关联helper,在helper处理函数中,获取client发送的PORT消息和server回复的PASV消息中携带的端口号,创建相应的期望连接。数据通道在ct创建阶段查找期望连接,如果可以查找成功,则将数据通道数据流的ct状态设置为related,这样就可以添加如下一条规则来放行数据通道的数据包。
-A INPUT -m state --state RELATED -j ACCEPT
基于以上问题,连接跟踪模块提出了helper函数和期望连接两个概念。
其中期望连接就是解决第一个问题,当创建一个连接跟踪项时,会根据reply方向的tuple结构查找helpers链表,并与该连接跟踪项进行绑定,接着在调用函数ipv4_help时就调用该连接跟踪项的helper函数,注册的helper函数如果有期望连接的概念,就会创建一个期望连接,并添加到期望连接表中。
而helper函数的另一个功能就是用来解决第二个问题的,但是要想完整的解决第二个问题,还需要一个nat转换函数,用于对应用层中携带的ip地址进行NAT转换操作。
每个helper使用nf_conntrack_helpe结构表示,调用nf_conntrack_helper_register将helper注册到nf_ct_helper_hash的一个hash桶中。
struct nf_conntrack_helper {
struct hlist_node hnode; /* Internal use. */
//helper的名字,在CT target指定helper时使用名字查找helper
char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */
struct module *me; /* pointer to self */
const struct nf_conntrack_expect_policy *expect_policy;
/* length of internal data, ie. sizeof(struct nf_ct_*_master) */
size_t data_len;
/* Tuple of things we will help (compared against server response) */
//注册helper时,会设置tuple。在legacy下根据tuple内容匹配helper
struct nf_conntrack_tuple tuple;
/* Function to call when data passes; return verdict, or -1 to
invalidate. */
//在ipv4_helper中执行的函数
int (*help)(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info conntrackinfo);
void (*destroy)(struct nf_conn *ct);
int (*from_nlattr)(struct nlattr *attr, struct nf_conn *ct);
int (*to_nlattr)(struct sk_buff *skb, const struct nf_conn *ct);
unsigned int expect_class_max;
unsigned int flags;
unsigned int queue_num; /* For user-space helpers. */
};
int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
{
int ret = 0;
struct nf_conntrack_helper *cur;
//根据tuple的src.l3num,dst.protonum和src.u.all计算出一个
//hash值,即根据地址族,四层协议号和源端口号计算hash
unsigned int h = helper_hash(&me->tuple);
mutex_lock(&nf_ct_helper_mutex);
//查找helper是否已经存在
hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
if (strncmp(cur->name, me->name, NF_CT_HELPER_NAME_LEN) == 0 &&
cur->tuple.src.l3num == me->tuple.src.l3num &&
cur->tuple.dst.protonum == me->tuple.dst.protonum) {
ret = -EEXIST;
goto out;
}
}
//将helper添加到全局变量nf_ct_helper_hash
hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
nf_ct_helper_count++;
out:
mutex_unlock(&nf_ct_helper_mutex);
return ret;
}
helper一般通过module形式加载,比如ftp helper,在加载nf_conntrack_ftp时注册helper,同时nf_conntrack_ftp还提供了一个参数ports:array,用来设置需要匹配的port,这也是legacy使用helper的方法。缺点是如果匹配的port需要改变,必须重新加载此module。
modprobe nf_conntrack_ftp ports=2121
modprobe nf_conntrack_ftp ports=2022,2023,2024
可以指定一个或者多个port(最多8个)。如果没有指定参数ports,则使用默认的port(#define FTP_PORT 21)。
//先初始化全局变量ftp,再将其注册到nf_ct_helper_hash。
//其中ftp的第一个下标代表最多可以添加8个端口,
//第二个下标代表两种地址族PF_INET和PF_INET6,注意不是表示original和reply两个方向。
#define MAX_PORTS 8
static struct nf_conntrack_helper ftp[MAX_PORTS][2]__read_mostly;
static const struct nf_conntrack_expect_policy ftp_exp_policy = {
.max_expected = 1,
.timeout = 5 * 60,
};
static u_int16_t ports[MAX_PORTS];
static unsigned int ports_c;
static char *ftp_buffer;
module_init(nf_conntrack_ftp_init);
static int __init nf_conntrack_ftp_init(void)
{
int i, j = -1, ret = 0;
ftp_buffer = kmalloc(65536, GFP_KERNEL);
if (!ftp_buffer)
return -ENOMEM;
//加载module时,没有指定ports,则使用默认port 21
if (ports_c == 0)
ports[ports_c++] = FTP_PORT;
/* FIXME should be configurable whether IPv4 and IPv6 FTP connections
are tracked or not - YK */
for (i = 0; i < ports_c; i++) {
ftp[i][0].tuple.src.l3num = PF_INET;
ftp[i][1].tuple.src.l3num = PF_INET6;
for (j = 0; j < 2; j++) {
ftp[i][j].data_len = sizeof(struct nf_ct_ftp_master);
ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]);
ftp[i][j].tuple.dst.protonum = IPPROTO_TCP;
ftp[i][j].expect_policy = &ftp_exp_policy;
ftp[i][j].me = THIS_MODULE;
//指定helper函数help
ftp[i][j].help = help;
ftp[i][j].from_nlattr = nf_ct_ftp_from_nlattr;
//#define FTP_PORT 21
//如果端口号为21,则name为ftp,否则name为ftp-端口
if (ports[i] == FTP_PORT)
sprintf(ftp[i][j].name, "ftp");
else
sprintf(ftp[i][j].name, "ftp-%d", ports[i]);
pr_debug("nf_ct_ftp: registering helper for pf: %d "
"port: %d\n",
ftp[i][j].tuple.src.l3num, ports[i]);
//注册helper
nf_conntrack_helper_register(&ftp[i][j]);
}
}
return 0;
}
注册helper后,如何使用它呢?有两种方法,下面分别介绍
根据tuple中源ip,源port,l3num和protonum匹配helper
//根据tuple中四个信息: 源ip,源port,l3num和protonum信息寻找合适的helper
//先计算hash,在一个hash桶里查找
__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
h = helper_hash(tuple);
hlist_for_each_entry_rcu(helper, &nf_ct_helper_hash[h], hnode) {
if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask))
return helper;
}
对于每条新连接,在conntrack创建阶段
init_conntrack->__nf_ct_try_assign_helper->
helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)根据reply方向的tuple寻找是否有注册的helper(因为client主动连接server,目的端口为21,而ftp注册时,source port为21,所以查找时得使用reply方向的)。因为ftp已经注册过,所以只要数据包source port是21就可以找到helper,将此helper添加到ct的扩展模块中: help = nf_ct_helper_ext_add(ct, helper, flags);
比如客户端ip为1.1.1.2,ftp服务器ip为1.1.1.3,
client访问server时: original:1.1.1.2: 2000->1.1.1.3:21
server响应client时: reply: 1.1.1.3:21 -> 1.1.1.2:2000
在original方向的第一个数据包经过conntack时,使用reply方向的信息查找helper,查找成功后,将helper关联到ct。
//根据name, l3num和protonum寻找合适的helper
//遍历所有的hash桶,而legacy只遍历一个hash桶
__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
for (i = 0; i < nf_ct_helper_hsize; i++) {
hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) {
if (!strcmp(h->name, name) &&
h->tuple.src.l3num == l3num &&
h->tuple.dst.protonum == protonum)
return h;
}
}
legacy的方法不灵活,如果匹配端口变化还得重新加载驱动。所以可以使用target CT 的--helper进行关联,此种方法需要在raw表上设置规则。假如ftp服务器监听端口为2121,可使用下面规则将此条流关联到ftp helper。注意此时仍然需要加载nf_conntrack_ftp,但是参数ports指定的端口号仅仅只是个名字而已,不会再被用作匹配信息。
//先注册ftp helper,name为ftp
modprobe nf_conntrack_ftp
//--helper的参数ftp必须是注册的helper的name
iptables -A PREROUTING -t raw -p tcp --dport 2121 -d 1.1.1.3 -j CT --helper ftp
//验证如果不是注册的helper的name,则会报错
root@master:~# iptables -A PREROUTING -t raw -p tcp --dport 2121 -d 1.1.1.3 -j CT --helper ftp1
iptables: No chain/target/match by that name.
//如果加载nf_conntrack_ftp时,添加了参数,则helper的name就会
//变成 "ftp-2121"。但是如果参数为21,即和默认参数一样,name还是ftp
root@master:~# modprobe nf_conntrack_ftp ports=2121
//所以添加规则时,--helper的参数必须为ftp-2121
root@master:~# iptables -A PREROUTING -t raw -p tcp --dport 2121 -d 1.1.1.3 -j CT --helper ftp
iptables: No chain/target/match by that name.
root@master:~# iptables -A PREROUTING -t raw -p tcp --dport 2121 -d 1.1.1.3 -j CT --helper ftp-2121
在添加这个规则时,会调用target提供的checkentry做校验.
check_target -> xt_check_target -> checkentry(par);
对于CT target来说,checkentry为xt_ct_tg_check_v0,在此函数中调用nf_conntrack_alloc分配ct,根据--helper的参数ftp调用xt_ct_set_helper->__nf_conntrack_helper_find寻找是否有匹配的helper,此时是通过helper->name进行匹配,而不是根据source port。将ftp的helper添加到ct的扩展模块。调用nf_conntrack_tmpl_insert将此ct添加到per cpu的tmp链表中pcpu->tmpl,同时将ct->status的IPS_TEMPLATE_BIT和IPS_CONFIRMED_BIT位置1。最终将ct保存到CT target的私有数据struct xt_ct_target_info->ct中,后面执行CT target时取出ct赋给skb。
static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par)
{
struct xt_ct_target_info *info = par->targinfo;
struct xt_ct_target_info_v1 info_v1 = {
.flags = info->flags,
.zone = info->zone,
.ct_events = info->ct_events,
.exp_events = info->exp_events,
};
int ret;
if (info->flags & ~XT_CT_NOTRACK)
return -EINVAL;
//保存设置的helper名字ftp
memcpy(info_v1.helper, info->helper, sizeof(info->helper));
ret = xt_ct_tg_check(par, &info_v1);
if (ret < 0)
return ret;
//将ct保存到info->ct
info->ct = info_v1.ct;
return ret;
}
static int xt_ct_tg_check(const struct xt_tgchk_param *par,
struct xt_ct_target_info_v1 *info)
{
struct nf_conntrack_tuple t;
struct nf_conn *ct;
int ret = -EOPNOTSUPP;
//如果 -j CT 的参数为notrack,则不用分配ct,返回成功即可
if (info->flags & XT_CT_NOTRACK) {
ct = NULL;
goto out;
}
#ifndef CONFIG_NF_CONNTRACK_ZONES
if (info->zone)
goto err1;
#endif
ret = nf_ct_l3proto_try_module_get(par->family);
if (ret < 0)
goto err1;
memset(&t, 0, sizeof(t));
//分配ct
ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL);
ret = 0;
if ((info->ct_events || info->exp_events) &&
!nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
GFP_KERNEL)) {
ret = -EINVAL;
goto err3;
}
//如果-j CT指定了helper,则调用xt_ct_set_helper根据name查
//找helper,如果找到了则将helper保存到ct的扩展空间中
if (info->helper[0]) {
ret = xt_ct_set_helper(ct, info->helper, par);
if (ret < 0)
goto err3;
}
if (info->timeout[0]) {
ret = xt_ct_set_timeout(ct, par, info->timeout);
if (ret < 0)
goto err3;
}
//将ct插入net->ct.pcpu_lists->tmpl链表中,并设置 ct->status为
//IPS_TEMPLATE_BIT和IPS_CONFIRMED_BIT
nf_conntrack_tmpl_insert(par->net, ct);
out:
info->ct = ct;
return 0;
}
将tmpl插入percpu net->ct.pcpu_lists->tmpl链表中
/* deletion from this larval template list happens via nf_ct_put() */
void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
{
struct ct_pcpu *pcpu;
//标志位置1
__set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
nf_conntrack_get(&tmpl->ct_general);
/* add this conntrack to the (per cpu) tmpl list */
local_bh_disable();
tmpl->cpu = smp_processor_id();
pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu);
spin_lock(&pcpu->lock);
/* Overload tuple linked list to put us in template list. */
hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&pcpu->tmpl);
spin_unlock_bh(&pcpu->lock);
}
数据流在PREROUTING链上匹配raw表中规则时,如果匹配到了此条rule,则执行rule的target,即xt_ct_target->xt_ct_target_v0,取出在checkentry分配的ct,将ct赋值给skb的skb->nfct,同时设置skb->nfctinfo = IP_CT_NEW。
static unsigned int xt_ct_target_v0(struct sk_buff *skb,
const struct xt_action_param *par)
{
const struct xt_ct_target_info *info = par->targinfo;
//取出之前分配的ct,IPS_TEMPLATE_BIT和IPS_CONFIRMED_BIT已经置1
struct nf_conn *ct = info->ct;
return xt_ct_target(skb, ct);
}
static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
{
/* Previously seen (loopback)? Ignore. */
if (skb->nfct != NULL)
return XT_CONTINUE;
/* special case the untracked ct : we want the percpu object */
//-j CT --notrack的情况下,ct为空,获nf_conntrack_untracked
//使用。nf_conntrack_untracked为percpu的静态ct,在初始化
//时将其状态设置为IPS_CONFIRMED和IPS_UNTRACKED。
//但是helper场景下,ct是不为空。
if (!ct)
ct = nf_ct_untracked_get();
atomic_inc(&ct->ct_general.use);
skb->nfct = &ct->ct_general;
skb->nfctinfo = IP_CT_NEW;
return XT_CONTINUE;
}
下一步是在PREROUTING链上执行nf_conntrack_init,因为skb已经在raw表上匹配成功,并分配过ct,此ct的作用主要是保存匹配到的helper信息。
nf_conntrack_in
if (skb->nfct) {
/* Previously seen (loopback or untracked)? Ignore. */
tmpl = (struct nf_conn *)skb->nfct;
if (!nf_ct_is_template(tmpl)) {
NF_CT_STAT_INC_ATOMIC(net, ignore);
return NF_ACCEPT;
}
//执行到这,需要把tmpl取出来,并且nfct要赋值NULL,后
//面流程会给skb重新分配ct
skb->nfct = NULL;
}
//分配新ct,将tmpl中的helper赋给新ct
resolve_normal_ct -> init_conntrack -> __nf_ct_try_assign_helper
out:
if (tmpl) {
/* Special case: we have to repeat this hook, assign the
* template again to this packet. We assume that this packet
* has no conntrack assigned. This is used by nf_ct_tcp. */
if (ret == NF_REPEAT)
skb->nfct = (struct nf_conntrack *)tmpl;
else
//最后tmpl的任务就算结束了,可以将其释放
nf_ct_put(tmpl);
}
__nf_ct_try_assign_helper
//后面在ct全局链表查找失败,需要重新分配ct,将tmpl参数传
//递进去,继续执行__nf_ct_try_assign_helper(ct, tmpl,
//GFP_ATOMIC); 将tmpl中保存的helper取出来赋给新ct。
if (tmpl != NULL) {
help = nfct_help(tmpl);
if (help != NULL) {
helper = help->helper;
set_bit(IPS_HELPER_BIT, &ct->status);
}
}
//将helper添加到新ct的扩展区域内,而不用再次调用__nf_ct_helper_find寻找helper
help = nf_ct_helper_ext_add(ct, helper, flags);
上面两种方法,只是将匹配rule或者满足要求(目的port为21或者为predefine的port值)的ct关联一个helper,helper的执行是在函数ipv4_helper,original和reply方向的报文都会执行helper去获取数据通道的端口,如果获取成功就创建相应的期望连接。
static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
//如果ct为空或者此数据流为期望连接,则返回NF_ACCEPT
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return NF_ACCEPT;
//从ct扩展空间取出help,如果为空说明ct没有关联help,则返回NF_ACCEPT
help = nfct_help(ct);
if (!help)
return NF_ACCEPT;
//取出helper
/* rcu_read_lock()ed by nf_hook_slow */
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
//执行helper的help函数
return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
ct, ctinfo);
}
对于ftp来说,helper->help函数为 nf_conntrack_ftp.c文件中的help函数,如下:
static int help(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
unsigned int dataoff, datalen;
const struct tcphdr *th;
struct tcphdr _tcph;
const char *fb_ptr;
int ret;
u32 seq;
int dir = CTINFO2DIR(ctinfo);
unsigned int uninitialized_var(matchlen), uninitialized_var(matchoff);
struct nf_ct_ftp_master *ct_ftp_info = nfct_help_data(ct);
struct nf_conntrack_expect *exp;
union nf_inet_addr *daddr;
struct nf_conntrack_man cmd = {};
unsigned int i;
int found = 0, ends_in_nl;
typeof(nf_nat_ftp_hook) nf_nat_ftp;
/* Until there's been traffic both ways, don't look in packets. */
if (ctinfo != IP_CT_ESTABLISHED &&
ctinfo != IP_CT_ESTABLISHED_REPLY) {
pr_debug("ftp: Conntrackinfo = %u\n", ctinfo);
return NF_ACCEPT;
}
th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
if (th == NULL)
return NF_ACCEPT;
dataoff = protoff + th->doff * 4;
/* No data? */
if (dataoff >= skb->len) {
pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff,
skb->len);
return NF_ACCEPT;
}
datalen = skb->len - dataoff;
spin_lock_bh(&nf_ftp_lock);
fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer);
BUG_ON(fb_ptr == NULL);
ends_in_nl = (fb_ptr[datalen - 1] == '\n');
seq = ntohl(th->seq) + datalen;
/* Look up to see if we're just after a \n. */
if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
/* We're picking up this, clear flags and let it continue */
if (unlikely(ct_ftp_info->flags[dir] & NF_CT_FTP_SEQ_PICKUP)) {
ct_ftp_info->flags[dir] ^= NF_CT_FTP_SEQ_PICKUP;
goto skip_nl_seq;
}
/* Now if this ends in \n, update ftp info. */
pr_debug("nf_conntrack_ftp: wrong seq pos %s(%u) or %s(%u)\n",
ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)",
ct_ftp_info->seq_aft_nl[dir][0],
ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)",
ct_ftp_info->seq_aft_nl[dir][1]);
ret = NF_ACCEPT;
goto out_update_nl;
}
skip_nl_seq:
/* Initialize IP/IPv6 addr to expected address (it's not mentioned
in EPSV responses) */
cmd.l3num = nf_ct_l3num(ct);
memcpy(cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all,
sizeof(cmd.u3.all));
//关键代码。
//解析tcp的payload匹配获取数据通道端口号
//对于active模式,需要解析original方向数据包的PORT消息
//对于passive模式,需要解析reply方向数据包的PASV响应消息
//这两种端口号都是数据通道连接的目的端口,存放在
//cmd.u.tcp.port中
for (i = 0; i < ARRAY_SIZE(search[dir]); i++) {
found = find_pattern(fb_ptr, datalen,
search[dir][i].pattern,
search[dir][i].plen,
search[dir][i].skip,
search[dir][i].term,
&matchoff, &matchlen,
&cmd,
search[dir][i].getnum);
if (found) break;
}
if (found == -1) {
/* We don't usually drop packets. After all, this is
connection tracking, not packet filtering.
However, it is necessary for accurate tracking in
this case. */
nf_ct_helper_log(skb, ct, "partial matching of `%s'",
search[dir][i].pattern);
ret = NF_DROP;
goto out;
} else if (found == 0) { /* No match */
ret = NF_ACCEPT;
goto out_update_nl;
}
pr_debug("conntrack_ftp: match `%.*s' (%u bytes at %u)\n",
matchlen, fb_ptr + matchoff,
matchlen, ntohl(th->seq) + matchoff);
//分配期望连接exp结构体
exp = nf_ct_expect_alloc(ct);
if (exp == NULL) {
nf_ct_helper_log(skb, ct, "cannot alloc expectation");
ret = NF_DROP;
goto out;
}
/* We refer to the reverse direction ("!dir") tuples here,
* because we're expecting something in the other direction.
* Doesn't matter unless NAT is happening. */
daddr = &ct->tuplehash[!dir].tuple.dst.u3;
/* Update the ftp info */
if ((cmd.l3num == nf_ct_l3num(ct)) &&
memcmp(&cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all,
sizeof(cmd.u3.all))) {
/* Enrico Scholz's passive FTP to partially RNAT'd ftp
server: it really wants us to connect to a
different IP address. Simply don't record it for
NAT. */
if (cmd.l3num == PF_INET) {
pr_debug("conntrack_ftp: NOT RECORDING: %pI4 != %pI4\n",
&cmd.u3.ip,
&ct->tuplehash[dir].tuple.src.u3.ip);
} else {
pr_debug("conntrack_ftp: NOT RECORDING: %pI6 != %pI6\n",
cmd.u3.ip6,
ct->tuplehash[dir].tuple.src.u3.ip6);
}
/* Thanks to Cristiano Lincoln Mattos
<lincoln@cesar.org.br> for reporting this potential
problem (DMZ machines opening holes to internal
networks, or the packet filter itself). */
if (!loose) {
ret = NF_ACCEPT;
goto out_put_expect;
}
daddr = &cmd.u3;
}
//初始化expect,主要是expect->tuple中的内容
//tuple->src.l3num = ipv4
//tuple->dst.protonum = IPPROTO_TCP
//tuple->src.u3 = saddr;
//tuple->dst.u3 = daddr;
//tuple->src.u.all = NULL
//tuple->dst.u.all = cmd.u.tcp.port;
//注意源端口号为NULL,active模式下,server端口协议规定是
//20,但是用户也是可以修改的,所以不能取20,passive模式
//下,client端口是随机的,也不能获取到。
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, cmd.l3num,
&ct->tuplehash[!dir].tuple.src.u3, daddr,
IPPROTO_TCP, NULL, &cmd.u.tcp.port);
/* Now, NAT might want to mangle the packet, and register the
* (possibly changed) expectation itself. */
//如果注册了nat处理函数nf_nat_ftp,则调用nf_nat_ftp。此函数
//除了nat 相关操作外,也会调用 nf_ct_expect_related将
//expect插入链表。
nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook);
if (nf_nat_ftp && ct->status & IPS_NAT_MASK)
ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype,
protoff, matchoff, matchlen, exp);
else {
/* Can't expect this? Best to drop packet now. */
//调用nf_ct_expect_related将expect插入链表
if (nf_ct_expect_related(exp) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
} else
ret = NF_ACCEPT;
}
out_put_expect:
nf_ct_expect_put(exp);
out_update_nl:
/* Now if this ends in \n, update ftp info. Seq may have been
* adjusted by NAT code. */
if (ends_in_nl)
update_nl_seq(ct, seq, ct_ftp_info, dir, skb);
out:
spin_unlock_bh(&nf_ftp_lock);
return ret;
}
初始化结构体struct nf_conntrack_expect
void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
u_int8_t family,
const union nf_inet_addr *saddr,
const union nf_inet_addr *daddr,
u_int8_t proto, const __be16 *src, const __be16 *dst)
{
int len;
if (family == AF_INET)
len = 4;
else
len = 16;
exp->flags = 0;
exp->class = class;
exp->expectfn = NULL;
exp->helper = NULL;
exp->tuple.src.l3num = family;
exp->tuple.dst.protonum = proto;
if (saddr) {
memcpy(&exp->tuple.src.u3, saddr, len);
if (sizeof(exp->tuple.src.u3) > len)
/* address needs to be cleared for nf_ct_tuple_equal */
memset((void *)&exp->tuple.src.u3 + len, 0x00,
sizeof(exp->tuple.src.u3) - len);
memset(&exp->mask.src.u3, 0xFF, len);
if (sizeof(exp->mask.src.u3) > len)
memset((void *)&exp->mask.src.u3 + len, 0x00,
sizeof(exp->mask.src.u3) - len);
} else {
memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
}
if (src) {
exp->tuple.src.u.all = *src;
exp->mask.src.u.all = htons(0xFFFF);
} else {
exp->tuple.src.u.all = 0;
exp->mask.src.u.all = 0;
}
memcpy(&exp->tuple.dst.u3, daddr, len);
if (sizeof(exp->tuple.dst.u3) > len)
/* address needs to be cleared for nf_ct_tuple_equal */
memset((void *)&exp->tuple.dst.u3 + len, 0x00,
sizeof(exp->tuple.dst.u3) - len);
exp->tuple.dst.u.all = *dst;
#ifdef CONFIG_NF_NAT_NEEDED
memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
#endif
}
检查期望连接数,如果不超过最大值则将期望连接插入全局hash链表。
nf_ct_expect_related -> nf_ct_expect_related_report ->
__nf_ct_expect_check -->检查当前期望连接数net->ct.expect_count是否大于期望连接最大值nf_ct_expect_max
nf_ct_expect_insert --> 将期望连接插入net->ct.expect_hash[h]
static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
struct nf_conntrack_helper *helper;
struct net *net = nf_ct_exp_net(exp);
unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
/* two references : one for hash insert, one for the timer */
atomic_add(2, &exp->use);
hlist_add_head(&exp->lnode, &master_help->expectations);
master_help->expecting[exp->class]++;
hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
net->ct.expect_count++;
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
helper = rcu_dereference_protected(master_help->helper,
lockdep_is_held(&nf_conntrack_expect_lock));
if (helper) {
exp->timeout.expires = jiffies +
helper->expect_policy[exp->class].timeout * HZ;
}
add_timer(&exp->timeout);
NF_CT_STAT_INC(net, expect_create);
return 0;
}
假如客户端ip为1.1.1.2,ftp服务器ip为1.1.1.3, 连接跟踪已经建立成功。两个方向tuple信息如下:
client访问server时: original:1.1.1.2: 2000->1.1.1.3:21
server响应client时: reply: 1.1.1.3:21 -> 1.1.1.2:2000
对于active模式下,期望连接如下,server主动发起数据通道连接
1.1.1.3:20(源端口在期望连接为0)->1.1.1.2:64523(64523为PORT消息携带)
对于passive模式下,期望连接如下,client主动发起数据通道连接
1.1.1.2:62345(client上随机端口,但在期望连接为0)->1.1.1.3:61232(61232为server响应PASV消息中携带)
所以不管哪种模式,第一个数据包都会在创建ct阶段找到期望连接,
并将ct状态设置为IPS_EXPECTED_BIT,将当前skb和后续数据包skb的ctinfo设置为IP_CT_RELATED,在filter表上匹配规则后放行。
注意:找到期望连接后,会将期望连接删除,并减少期望连接总数net->ct.expect_count。
nf_conntrack_in->resolve_normal_ct->init_conntrack:
if (net->ct.expect_count) {
spin_lock(&nf_conntrack_expect_lock);
exp = nf_ct_find_expectation(net, zone, tuple);
if (exp) {
pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
ct, exp);
/* Welcome, Mr. Bond. We've been expecting you... */
__set_bit(IPS_EXPECTED_BIT, &ct->status);
/* exp->master safe, refcnt bumped in nf_ct_find_expectation */
ct->master = exp->master;
if (exp->helper) {
help = nf_ct_helper_ext_add(ct, exp->helper,
GFP_ATOMIC);
if (help)
rcu_assign_pointer(help->helper, exp->helper);
}
#ifdef CONFIG_NF_CONNTRACK_MARK
ct->mark = exp->master->mark;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
ct->secmark = exp->master->secmark;
#endif
NF_CT_STAT_INC(net, expect_new);
}
spin_unlock(&nf_conntrack_expect_lock);
}
/* If an expectation for this connection is found, it gets delete from
* global list then returned. */
struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i, *exp = NULL;
unsigned int h;
if (!net->ct.expect_count)
return NULL;
h = nf_ct_expect_dst_hash(tuple);
hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) {
if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
nf_ct_zone(i->master) == zone) {
exp = i;
break;
}
}
if (!exp)
return NULL;
/* If master is not in hash table yet (ie. packet hasn't left
this machine yet), how can other end know about expected?
Hence these are not the droids you are looking for (if
master ct never got confirmed, we'd hold a reference to it
and weird things would happen to future packets). */
if (!nf_ct_is_confirmed(exp->master))
return NULL;
/* Avoid race with other CPUs, that for exp->master ct, is
* about to invoke ->destroy(), or nf_ct_delete() via timeout
* or early_drop().
*
* The atomic_inc_not_zero() check tells: If that fails, we
* know that the ct is being destroyed. If it succeeds, we
* can be sure the ct cannot disappear underneath.
*/
if (unlikely(nf_ct_is_dying(exp->master) ||
!atomic_inc_not_zero(&exp->master->ct_general.use)))
return NULL;
if (exp->flags & NF_CT_EXPECT_PERMANENT) {
atomic_inc(&exp->use);
return exp;
} else if (del_timer(&exp->timeout)) {
//如果找到了期望连接,并且不是永久性的,则将期望连接
//从全局链表删除,并减少期望连接总数
nf_ct_unlink_expect(exp);
return exp;
}
/* Undo exp->master refcnt increase, if del_timer() failed */
nf_ct_put(exp->master);
return NULL;
}
listen=NO
listen_ipv6=YES
anonymous_enable=NO
local_enable=YES
write_enable=YES
local_umask=022
dirmessage_enable=YES
use_localtime=YES
xferlog_enable=YES
connect_from_port_20=YES
chroot_local_user=YES
secure_chroot_dir=/var/run/vsftpd/empty
pam_service_name=vsftpd
pasv_enable=Yes
pasv_min_port=10000
pasv_max_port=11000
user_sub_token=$USER
local_root=/home/$USER/ftp
userlist_enable=YES
userlist_file=/etc/vsftpd.userlist
userlist_deny=NO
vim /etc/vsftpd.userlist 将允许登陆ftp的用户名添加到此文件,一行一个用户名
root@node1:~# cat /etc/vsftpd.userlist
test
添加用户test。注意用adduser添加的用户才生效,useradd添加的用户在连接ftp server时会认证失败。
root@node1:~# adduser test
Adding user `test' ...
Adding new group `test' (1000) ...
Adding new user `test' (1000) with group `test' ...
The home directory `/home/test' already exists. Not copying from `/etc/skel'.
New password:
Retype new password:
passwd: password updated successfully
Changing the user information for test
Enter the new value, or press ENTER for the default
Full Name []: test
Room Number []:
Work Phone []:
Home Phone []:
Other []:
Is the information correct? [Y/n]
mkdir /home/test/ftp
systemctl restart vsftpd
2.登陆ftp server
server ip为192.168.122.21
client ip为192.168.122.63
client登陆ftp server如下,-d打开debug开关
root@ubuntu:~# ftp -d 192.168.122.21
Connected to 192.168.122.21.
220 (vsFTPd 3.0.3)
ftp: setsockopt: Bad file descriptor
Name (192.168.122.21:root): test
---> USER test
331 Please specify the password.
Password:
---> PASS XXXX
230 Login successful.
---> SYST
215 UNIX Type: L8
Remote system type is UNIX.
Using binary mode to transfer files.
ftp>
这是ftp server上filter表规则都允许情况下的测试
iptables -P INPUT ACCEPT
iptables -P FORWARD ACCEPT
iptables -P OUTPUT ACCEPT
//修改默认策略为DROP
iptables -P INPUT DROP
iptables -P FORWARD DROP
iptables -P OUTPUT DROP
//因为需要ssh到server,所以放开22端口
//从client到server的规则
iptables -A INPUT -p tcp --dport 22 -j ACCEPT
//从server到client的规则
iptables -A OUTPUT -m state --state ESTABLISHED -j ACCEPT
数据流关联helper也有两种方法,下面分别配置验证
3.1 legacy helper用法
a. 首先保证ftp helper已经注册
root@node1:~# lsmod | grep ftp
nf_conntrack_ftp 24576 0
b. legacy下,net->ct.sysctl_auto_assign_helper为1时才会寻找helper,默认是关闭的,所以需要打开它
root@node1:~# sysctl -w net.netfilter.nf_conntrack_helper=1
net.netfilter.nf_conntrack_helper = 1
c. 添加规则,放行控制通道的数据
//下面这一条规则允许client到server的数据
iptables -A INPUT -p tcp --dport 21 -j ACCEPT
//server到client的数据是通过前面添加的规则实现的,又贴过来如下:
iptables -A OUTPUT -m state --state ESTABLISHED -j ACCEPT
此时控制通道已经可以工作,但是数据通道还不行,验证如下:
已经成功登陆server,但是LIST命令失败了,因为LIST命令结果需要通过数据通道传送,又没有放行数据通道,所以超时后失败
root@ubuntu:~# ftp -d 192.168.122.21
Connected to 192.168.122.21.
220 (vsFTPd 3.0.3)
ftp: setsockopt: Bad file descriptor
Name (192.168.122.21:root): test
---> USER test
331 Please specify the password.
Password:
---> PASS XXXX
230 Login successful.
---> SYST
215 UNIX Type: L8
Remote system type is UNIX.
Using binary mode to transfer files.
ftp> ls
---> PORT 192,168,122,63,158,9
200 PORT command successful. Consider using PASV.
---> LIST
425 Failed to establish connection.
d. 控制通道关联上helper后,控制通道的数据包都会执行helper函数,在helper函数中,解析ftp内容,目的是为了获取PORT消息或者PASV消息获取其中的端口号,如果获取成功,则创建期望连接。
数据通道的数据包在init_conntrack阶段查找期望连接,查找成功后会删除期望连接项。此时还没到filter过滤阶段,所以不管filter上规则设置与否,都可在server上观察到期望连接被创建,后被删除。
root@node1:~# conntrack -E expect
[NEW] 300 proto=6 src=192.168.122.21 dst=192.168.122.63 sport=0 dport=49855 mask-src=0.0.0.0 mask-dst=0.0.0.0 sport=0 dport=65535 master-src=192.168.122.63 master-dst=192.168.122.21 sport=35622 dport=21 class=0 helper=ftp
[DESTROY] 300 proto=6 src=192.168.122.21 dst=192.168.122.63 sport=0 dport=49855 mask-src=0.0.0.0 mask-dst=0.0.0.0 sport=0 dport=65535 master-src=192.168.122.63 master-dst=192.168.122.21 sport=35622 dport=21 class=0 helper=ftp
e. ftp默认情况下使用active模式,即server主动连接client。添加如下规则,可让数据通道正常工作。
//此规则添加在OUTPUT链上,因为是server主动连接,所以为original方向,所以第一个数据包状态为RELATED
iptables -A OUTPUT -m state --state RELATED -j ACCEPT
//此规则添加在INPUT链上,从client返回的数据相当于reply,所以状态为ESTABLISHED
iptables -A INPUT -m state --state ESTABLISHED -j ACCEPT
//因为已经收到reply方向的数据,所以original方向的后续数据包的状态为IP_CT_ESTABLISHED,需要如下规则,此规则已经在前面控制通道添加过,所以不用再次添加
iptables -A OUTPUT -m state --state ESTABLISHED -j ACCEPT
添加上面规则后,active模式下数据通道可正常传输数据。
root@ubuntu:~# ftp -d 192.168.122.21
Connected to 192.168.122.21.
220 (vsFTPd 3.0.3)
ftp: setsockopt: Bad file descriptor
Name (192.168.122.21:root): test
---> USER test
331 Please specify the password.
Password:
---> PASS XXXX
230 Login successful.
---> SYST
215 UNIX Type: L8
Remote system type is UNIX.
Using binary mode to transfer files.
ftp> ls
---> PORT 192,168,122,63,141,201
200 PORT command successful. Consider using PASV.
---> LIST
150 Here comes the directory listing.
226 Directory send OK.
f. 在ftp passive模式下,需要添加如下规则放行数据通道数据流
//因为是passive模式,client会主动连接server,称为orginal方向。第一个数据包会查找期望连接,并设置ctinfo为IP_CT_RELATED,所以匹配下面规则
iptables -A INPUT -m state --state RELATED -j ACCEPT
//从server发给client的数据包为reply方向,ctinfo为IP_CT_ESTABLISHED_REPLY,所以匹配下面规则
iptables -A OUTPUT -m state --state ESTABLISHED -j ACCEPT
//因为已经收到reply方向数据包,所以orginal方向后续数据包ctinfo为IP_CT_ESTABLISHED,匹配下面规则
iptables -A INPUT -m state --state ESTABLISHED -j ACCEPT
添加上面规则后,passive模式下(使用passive命令转换到passive模式)数据通道可正常传输数据。
root@ubuntu:~# ftp -d 192.168.122.21
Connected to 192.168.122.21.
220 (vsFTPd 3.0.3)
ftp: setsockopt: Bad file descriptor
Name (192.168.122.21:root): test
---> USER test
331 Please specify the password.
Password:
---> PASS XXXX
230 Login successful.
---> SYST
215 UNIX Type: L8
Remote system type is UNIX.
Using binary mode to transfer files.
ftp> passive
Passive mode on.
ftp> ls
---> PASV
227 Entering Passive Mode (192,168,122,21,252,179).
---> LIST
150 Here comes the directory listing.
226 Directory send OK.
3.2 -j CT --helper的方式
和legacy的区别就是,helper是通过raw表上的规则指定的。
filter上的规则都是一样的。
a. 因为ftp监听端口号21也是legacy默认的端口号,所以修改监听端口号为801,将 listen_port=801 添加到ftp配置文件/etc/vsftpd.conf中
root@node1:~# systemctl restart vsftpd
root@node1:~# netstat -nap | grep 801
tcp6 0 0 :::801 :::* LISTEN 7060/vsftpd
b. 也要保证nf_conntrack_ftp的存在,因为它是用来注册helper的
root@node1:~# lsmod | grep ftp
nf_conntrack_ftp 24576 0
c. 添加如下规则使client可以正常连接server并获取数据
//在raw表的PREROUTING 链上添加规则,将目的port为801的数据流分配ftp的helper
iptables -A PREROUTING -t raw -p tcp --dport 801 -j CT --helper ftp
//在filter表上添加规则,使控制通道可以正常工作
iptables -A INPUT -p tcp --dport 801 -j ACCEPT
iptables -A OUTPUT -m state --state ESTABLISHED -j ACCEPT
//在filter表上添加规则,使数据通道可以正常工作(active模式下)
iptables -A INPUT -m state --state ESTABLISHED -j ACCEPT
iptables -A OUTPUT -m state --state RELATED -j ACCEPT
d. 验证连接server的801端口,并获取数据,由此可得出 -j CT --helper指定的ftp helper也可正常工作。
root@ubuntu:~# ftp -d 192.168.122.21 801
Connected to 192.168.122.21.
220 (vsFTPd 3.0.3)
ftp: setsockopt: Bad file descriptor
Name (192.168.122.21:root): test
---> USER test
331 Please specify the password.
Password:
---> PASS XXXX
230 Login successful.
---> SYST
215 UNIX Type: L8
Remote system type is UNIX.
Using binary mode to transfer files.
ftp> ls
---> PORT 192,168,122,63,161,69
200 PORT command successful. Consider using PASV.
---> LIST
150 Here comes the directory listing.
226 Directory send OK.