众所周知,Linux的vxlan实现不支持穿透NAT网络环境。并且由于它没有分离listen port和dst port(两者都使用dst_port),没法整活UDPspeeder这种暴力发包工具。
言归正传,要让vxlan支持NAT穿透,必然要改造源码,直接贴patch(针对linux-5.10.146)
Index: linux-5.10.146/drivers/net/vxlan.c
===================================================================
--- linux-5.10.146.orig/drivers/net/vxlan.c
+++ linux-5.10.146/drivers/net/vxlan.c
@@ -84,6 +84,14 @@ struct vxlan_fdb {
struct vxlan_dev __rcu *vdev;
};
+struct extra_config_req_t {
+ u8 daddr[4];
+ u32 kal;
+ u8 spl : 1;
+ u8 dpl : 1;
+ u8 bk: 1;
+};
+
#define NTF_VXLAN_ADDED_BY_USER 0x100
/* salt for hash table */
@@ -1460,7 +1468,7 @@ errout:
*/
static bool vxlan_snoop(struct net_device *dev,
union vxlan_addr *src_ip, const u8 *src_mac,
- u32 src_ifindex, __be32 vni)
+ u32 src_ifindex, __be32 vni, __be16 dstport)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f;
@@ -1477,7 +1485,7 @@ static bool vxlan_snoop(struct net_devic
struct vxlan_rdst *rdst = first_remote_rcu(f);
if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
- rdst->remote_ifindex == ifindex))
+ rdst->remote_ifindex == ifindex && rdst->remote_port == dstport))
return false;
/* Don't migrate static entries, drop packets */
@@ -1494,6 +1502,7 @@ static bool vxlan_snoop(struct net_devic
src_mac, &rdst->remote_ip.sa, &src_ip->sa);
rdst->remote_ip = *src_ip;
+ rdst->remote_port = dstport;
f->updated = jiffies;
vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
} else {
@@ -1507,7 +1516,7 @@ static bool vxlan_snoop(struct net_devic
vxlan_fdb_update(vxlan, src_mac, src_ip,
NUD_REACHABLE,
NLM_F_EXCL|NLM_F_CREATE,
- vxlan->cfg.dst_port,
+ dstport,
vni,
vxlan->default_dst.remote_vni,
ifindex, NTF_SELF, 0, true, NULL);
@@ -1791,9 +1800,10 @@ static bool vxlan_set_mac(struct vxlan_d
saddr.sa.sa_family = AF_INET6;
#endif
}
-
+
+ struct udphdr *udph = udp_hdr(skb);
if ((vxlan->cfg.flags & VXLAN_F_LEARN) &&
- vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, ifindex, vni))
+ vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, ifindex, vni, vxlan->cfg.dpl ? udph->source : vxlan->cfg.dst_port))
return false;
return true;
@@ -2506,6 +2516,8 @@ static void vxlan_encap_bypass(struct sk
union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip;
struct net_device *dev;
int len = skb->len;
+
+ //printk("in vxlan_encap_bypass: vni %d, snoop %d\n", ntohl(vni), (int)snoop);
tx_stats = this_cpu_ptr(src_vxlan->dev->tstats);
rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats);
@@ -2532,7 +2544,7 @@ static void vxlan_encap_bypass(struct sk
}
if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop)
- vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
+ vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni, dst_vxlan->cfg.dst_port);
u64_stats_update_begin(&tx_stats->syncp);
tx_stats->tx_packets++;
@@ -2566,20 +2578,30 @@ static int encap_bypass_if_local(struct
BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL);
#endif
/* Bypass encapsulation if the destination is local */
+ //printk("in encap_bypass_if_local step1: rt_flags %d, dst_port %d\n", rt_flags, ntohs(dst_port));
+
if (rt_flags & RTCF_LOCAL &&
!(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
struct vxlan_dev *dst_vxlan;
- dst_release(dst);
dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
daddr->sa.sa_family, dst_port,
vxlan->cfg.flags);
+
+ //printk("in encap_bypass_if_local step2: vxlan %lld dst_vxlan %lld\n", vxlan, dst_vxlan);
+
+ /*
if (!dst_vxlan) {
dev->stats.tx_errors++;
kfree_skb(skb);
return -ENOENT;
- }
+ }*/
+
+ if(!dst_vxlan)
+ return 0;
+
+ dst_release(dst);
vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
return 1;
}
@@ -2675,7 +2697,7 @@ static void vxlan_xmit_one(struct sk_buf
label = info->key.label;
udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
}
- src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+ src_port = vxlan->cfg.spl ? (vxlan->cfg.listen_port ? vxlan->cfg.listen_port : vxlan->cfg.dst_port) : udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
vxlan->cfg.port_max, true);
rcu_read_lock();
@@ -2720,8 +2742,11 @@ static void vxlan_xmit_one(struct sk_buf
}
ndst = &rt->dst;
+
err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
netif_is_any_bridge_port(dev));
+
+ //printk("after skb_tunnel_check_pmtu step3, err %d\n", err);
if (err < 0) {
goto tx_error;
} else if (err) {
@@ -2747,6 +2772,11 @@ static void vxlan_xmit_one(struct sk_buf
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
vni, md, flags, udp_sum);
+
+ //printk("after vxlan_build_skb step4, err %d, src %pI4:%d, dst %pI4:%d, xnet %d\n", err,
+ // &local_ip.sin.sin_addr.s_addr, ntohs(src_port),
+ // &dst->sin.sin_addr.s_addr, ntohs(dst_port),
+ // xnet);
if (err < 0)
goto tx_error;
@@ -3140,6 +3170,7 @@ static int vxlan_stop(struct net_device
ret = vxlan_igmp_leave(vxlan);
del_timer_sync(&vxlan->age_timer);
+ del_timer_sync(&vxlan->notify_timer);
vxlan_flush(vxlan, false);
vxlan_sock_release(vxlan);
@@ -3562,7 +3593,7 @@ static int __vxlan_sock_add(struct vxlan
if (!vxlan->cfg.no_share) {
spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
- vxlan->cfg.dst_port, vxlan->cfg.flags,
+ vxlan->cfg.listen_port ? vxlan->cfg.listen_port : vxlan->cfg.dst_port, vxlan->cfg.flags,
l3mdev_index);
if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
spin_unlock(&vn->sock_lock);
@@ -3572,7 +3603,7 @@ static int __vxlan_sock_add(struct vxlan
}
if (!vs)
vs = vxlan_socket_create(vxlan->net, ipv6,
- vxlan->cfg.dst_port, vxlan->cfg.flags,
+ vxlan->cfg.listen_port ? vxlan->cfg.listen_port : vxlan->cfg.dst_port, vxlan->cfg.flags,
l3mdev_index);
if (IS_ERR(vs))
return PTR_ERR(vs);
@@ -3968,6 +3999,22 @@ static int vxlan_nl2flag(struct vxlan_co
return 0;
}
+static void vxlan_notify_peer(struct timer_list *t)
+{
+ struct vxlan_dev *vxlan = from_timer(vxlan, t, notify_timer);
+ u8 fha[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ __be32 ip = vxlan->notify_peer;
+
+ struct sk_buff *notify = arp_create(ARPOP_REQUEST, ETH_P_ARP, ip, vxlan->dev, ip, fha, vxlan->dev->dev_addr, all_zeros_mac);
+ if (notify == NULL)
+ goto next_round;
+
+ vxlan_xmit_one(notify, vxlan->dev, 0, &vxlan->default_dst, false);
+
+ next_round:
+ mod_timer(&vxlan->notify_timer, jiffies + vxlan->notify_interval * HZ);
+}
+
static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
struct net_device *dev, struct vxlan_config *conf,
bool changelink, struct netlink_ext_ack *extack)
@@ -3992,13 +4039,34 @@ static int vxlan_nl2conf(struct nlattr *
}
if (data[IFLA_VXLAN_GROUP]) {
- if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
- return -EOPNOTSUPP;
+ if(nla_len(data[IFLA_VXLAN_GROUP]) == sizeof(struct extra_config_req_t))
+ {
+ const struct extra_config_req_t *p = nla_data(data[IFLA_VXLAN_GROUP]);
+ conf->spl = p->spl;
+ conf->bk = p->bk;
+ conf->dpl = p->dpl;
+ printk("from netlink set vxlan extra conf: spl - %d, dpl - %d, bk - %d, kal - %d\n", conf->spl, conf->dpl, conf->bk, p->kal);
+
+ if(p->kal)
+ {
+ vxlan->notify_peer = *((__be32 *)p->daddr);
+ vxlan->notify_interval = p->kal;
+ timer_setup(&vxlan->notify_timer, vxlan_notify_peer, TIMER_DEFERRABLE);
+ mod_timer(&vxlan->notify_timer, jiffies + /*vxlan->cfg.age_interval * HZ*/ 3 * HZ);
+ }
+ else
+ del_timer_sync(&vxlan->notify_timer);
}
+ else
+ {
+ if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
+ return -EOPNOTSUPP;
+ }
- conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
- conf->remote_ip.sa.sa_family = AF_INET;
+ conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
+ conf->remote_ip.sa.sa_family = AF_INET;
+ }
} else if (data[IFLA_VXLAN_GROUP6]) {
if (!IS_ENABLED(CONFIG_IPV6)) {
NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "IPv6 support not enabled in the kernel");
@@ -4127,8 +4195,16 @@ static int vxlan_nl2conf(struct nlattr *
if (!changelink) {
const struct ifla_vxlan_port_range *p
= nla_data(data[IFLA_VXLAN_PORT_RANGE]);
- conf->port_min = ntohs(p->low);
- conf->port_max = ntohs(p->high);
+
+ if(ntohs(p->low) == 1)
+ {
+ conf->listen_port = p->high;
+ }
+ else
+ {
+ conf->port_min = ntohs(p->low);
+ conf->port_max = ntohs(p->high);
+ }
} else {
NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT_RANGE],
"Cannot change port range");
Index: linux-5.10.146/include/net/vxlan.h
===================================================================
--- linux-5.10.146.orig/include/net/vxlan.h
+++ linux-5.10.146/include/net/vxlan.h
@@ -214,6 +214,7 @@ struct vxlan_config {
int remote_ifindex;
int mtu;
__be16 dst_port;
+ __be16 listen_port;
u16 port_min;
u16 port_max;
u8 tos;
@@ -224,6 +225,9 @@ struct vxlan_config {
unsigned int addrmax;
bool no_share;
enum ifla_vxlan_df df;
+ u8 spl : 1;
+ u8 dpl : 1;
+ u8 bk : 1;
};
struct vxlan_dev_node {
@@ -247,6 +251,10 @@ struct vxlan_dev {
struct vxlan_rdst default_dst; /* default destination */
struct timer_list age_timer;
+ struct timer_list notify_timer;
+ __be32 notify_peer;
+ unsigned long notify_interval;
+
spinlock_t hash_lock[FDB_HASH_SIZE];
unsigned int addrcnt;
struct gro_cells gro_cells;
spl - 使用指定的listen port或dst port作为vxlan报文源端口
dpl - 对于匿名vxlan,学习对端的src port作为fdb表项的dst port
kal - 保活时间间隔,由于udp在nat中老化时间较短,需要一个保活机制使session一直处于活动,避免被nat回收
listen_port - 配置监听端口,取代dst port。这里使用了一个src_port的特殊组合来实现min=1,max=listen_port。当min==1时,max作为listen_port。实际业务使用中src_port的配置没什么用,这里复用了它而已。之所以要以这种蹩脚方式提供配置路径,是因为listen port的指定必须要在vxlan创建节点完成,因为vxlan_socket_create只在NLM_F_CREATE阶段执行一次,无法change。
相关结构体如下
struct extra_config_req_t {
u8 daddr[4]; //由于复用了IFLA_VXLAN_GROUP标志,用来对齐该标志原数据长度,当启用kal时
//使用一个本地地址填充,最终用来构造免费ARP请求报文
u32 kal; //keepalive时间间隔,保活机制
u8 spl : 1; //spl选项
u8 dpl : 1; //dpl选项
u8 bk: 1; //暂无用途,忽略
};
如何为vxlan 添加这些额外配置,使用一个调用netlink的 程序即可(只是不想改iproute2了),把以上结构数据放入到IFLA_VXLAN_GROUP标志的nlmsg中,调用netlink即可,如下
#include <libnetlink.h>
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/sockios.h>
#include <stdio.h>
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
static char *dev_type = "vxlan";
static struct rtnl_handle rth;
struct iplink_req {
struct nlmsghdr n;
struct ifinfomsg i;
char buf[1024];
};
struct extra_config_req_t {
u8 daddr[4];
u32 kal;
u8 spl : 1;
u8 dpl : 1;
u8 bk: 1;
};
int main(int argc, char **argv)
{
char *ifname = argv[1];
u8 is_spl = (0 != atoi(argv[2]));
u8 is_dpl = (0 != atoi(argv[3]));
u8 is_bk = (0 != atoi(argv[4]));
u32 kal = atoi(argv[5]);
u32 key = argc > 6 ? inet_addr(argv[6]) : 0;
u32 ifindex;
if (rtnl_open(&rth, 0) < 0)
perror("Cannot open rtnetlink: ");
if(0 == (ifindex = if_nametoindex(ifname)))
perror("get ifindex by ifname faild: ");
struct iplink_req req = {
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.n.nlmsg_flags = NLM_F_REQUEST/* | NLM_F_CREATE | NLM_F_EXCL*/,
.n.nlmsg_type = RTM_NEWLINK,
.i.ifi_family = AF_INET,
.i.ifi_index = ifindex,
};
struct rtattr *linkinfo, *data;
linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, dev_type, strlen(dev_type));
data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
struct extra_config_req_t ecr;
memcpy(ecr.daddr, (u8 *)&key, sizeof(ecr.daddr));
ecr.spl = is_spl;
ecr.dpl = is_dpl;
ecr.bk = is_bk;
ecr.kal = kal;
addattr_l(&req.n, 1024, IFLA_VXLAN_GROUP, (u8 *)&ecr, sizeof(struct extra_config_req_t));
addattr_nest_end(&req.n, data);
addattr_nest_end(&req.n, linkinfo);
if (rtnl_talk(&rth, &req.n, NULL) < 0)
perror("rtnl_talk faild: ");
return;
}
要使vxlan使用指定的src port来encap报文只需要修改:
@@ -2675,7 +2697,7 @@ static void vxlan_xmit_one(struct sk_buf
label = info->key.label;
udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
}
- src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+ src_port = vxlan->cfg.spl ? (vxlan->cfg.listen_port ? vxlan->cfg.listen_port : vxlan->cfg.dst_port) : udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
vxlan->cfg.port_max, true);
rcu_read_lock();
但要使vxlan的listen port 和dst port分离就要在它创建udp socket时介入:
@@ -3562,7 +3593,7 @@ static int __vxlan_sock_add(struct vxlan
if (!vxlan->cfg.no_share) {
spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
- vxlan->cfg.dst_port, vxlan->cfg.flags,
+ vxlan->cfg.listen_port ? vxlan->cfg.listen_port : vxlan->cfg.dst_port, vxlan->cfg.flags,
l3mdev_index);
if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
spin_unlock(&vn->sock_lock);
@@ -3572,7 +3603,7 @@ static int __vxlan_sock_add(struct vxlan
}
if (!vs)
vs = vxlan_socket_create(vxlan->net, ipv6,
- vxlan->cfg.dst_port, vxlan->cfg.flags,
+ vxlan->cfg.listen_port ? vxlan->cfg.listen_port : vxlan->cfg.dst_port, vxlan->cfg.flags,
l3mdev_index);
if (IS_ERR(vs))
return PTR_ERR(vs);
要实现dpl,只需要在vxlan学习mac的地方进行调整即可
@@ -1460,7 +1468,7 @@ errout:
*/
static bool vxlan_snoop(struct net_device *dev,
union vxlan_addr *src_ip, const u8 *src_mac,
- u32 src_ifindex, __be32 vni)
+ u32 src_ifindex, __be32 vni, __be16 dstport)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f;
@@ -1477,7 +1485,7 @@ static bool vxlan_snoop(struct net_devic
struct vxlan_rdst *rdst = first_remote_rcu(f);
if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
- rdst->remote_ifindex == ifindex))
+ rdst->remote_ifindex == ifindex && rdst->remote_port == dstport))
return false;
/* Don't migrate static entries, drop packets */
@@ -1494,6 +1502,7 @@ static bool vxlan_snoop(struct net_devic
src_mac, &rdst->remote_ip.sa, &src_ip->sa);
rdst->remote_ip = *src_ip;
+ rdst->remote_port = dstport;
f->updated = jiffies;
vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
} else {
@@ -1507,7 +1516,7 @@ static bool vxlan_snoop(struct net_devic
vxlan_fdb_update(vxlan, src_mac, src_ip,
NUD_REACHABLE,
NLM_F_EXCL|NLM_F_CREATE,
- vxlan->cfg.dst_port,
+ dstport,
vni,
vxlan->default_dst.remote_vni,
ifindex, NTF_SELF, 0, true, NULL);
为vxlan增加一个定时器,定时发送免费ARP报文
@@ -3968,6 +3999,22 @@ static int vxlan_nl2flag(struct vxlan_co
return 0;
}
+static void vxlan_notify_peer(struct timer_list *t)
+{
+ struct vxlan_dev *vxlan = from_timer(vxlan, t, notify_timer);
+ u8 fha[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ __be32 ip = vxlan->notify_peer;
+
+ struct sk_buff *notify = arp_create(ARPOP_REQUEST, ETH_P_ARP, ip, vxlan->dev, ip, fha, vxlan->dev->dev_addr, all_zeros_mac);
+ if (notify == NULL)
+ goto next_round;
+
+ vxlan_xmit_one(notify, vxlan->dev, 0, &vxlan->default_dst, false);
+
+ next_round:
+ mod_timer(&vxlan->notify_timer, jiffies + vxlan->notify_interval * HZ);
+}
+
static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
struct net_device *dev, struct vxlan_config *conf,
bool changelink, struct netlink_ext_ack *extack)
@@ -3992,13 +4039,34 @@ static int vxlan_nl2conf(struct nlattr *
}
if (data[IFLA_VXLAN_GROUP]) {
- if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
- return -EOPNOTSUPP;
+ if(nla_len(data[IFLA_VXLAN_GROUP]) == sizeof(struct extra_config_req_t))
+ {
+ const struct extra_config_req_t *p = nla_data(data[IFLA_VXLAN_GROUP]);
+ conf->spl = p->spl;
+ conf->bk = p->bk;
+ conf->dpl = p->dpl;
+ printk("from netlink set vxlan extra conf: spl - %d, dpl - %d, bk - %d, kal - %d\n", conf->spl, conf->dpl, conf->bk, p->kal);
+
+ if(p->kal)
+ {
+ vxlan->notify_peer = *((__be32 *)p->daddr);
+ vxlan->notify_interval = p->kal;
+ timer_setup(&vxlan->notify_timer, vxlan_notify_peer, TIMER_DEFERRABLE);
+ mod_timer(&vxlan->notify_timer, jiffies + /*vxlan->cfg.age_interval * HZ*/ 3 * HZ);
+ }
+ else
+ del_timer_sync(&vxlan->notify_timer);
}
+ else
+ {
+ if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
+ return -EOPNOTSUPP;
+ }
- conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
- conf->remote_ip.sa.sa_family = AF_INET;
+ conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
+ conf->remote_ip.sa.sa_family = AF_INET;
+ }
} else if (data[IFLA_VXLAN_GROUP6]) {
if (!IS_ENABLED(CONFIG_IPV6)) {
NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "IPv6 support not enabled in the kernel");
匿名vxlan是指不配置peer地址的vxlan,这种vxlan可以接收任何peer发送过来的报文,并学习到bridge fdb中,只是它不生成一条00:00:00:00:00:00的fdb记录(也就无法转发广播报文,可以开启proxyarp来解决arp广播问题)。
使用匿名vxlan作为服务端是最合适不过
客户端开启spl,peer指向服务端
服务端开启spl和dpl,使用匿名vxlan
这样构建起来的vxlan隧道就可以穿透NAT了。
项目地址:https://github.com/wangyu-/UDPspeeder.git
介绍就不说了,就是一个通过冗余数据对抗丢包率的UDP加速工具。
把 UDPSpeeder和vxlan结合,这里还有一个重要的问题没解决:
当peer为local时,vxlan会查找该dst_port是否注册了另一个vxlan,如果是则把发送的报文,扔到vxlan_encap_bypass处理,如果没有则丢弃报文。原生实现估计没考虑到有人这么改造,就是认为peer肯定是另一个vxlan
static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
struct vxlan_dev *vxlan,
union vxlan_addr *daddr,
__be16 dst_port, int dst_ifindex, __be32 vni,
struct dst_entry *dst,
u32 rt_flags)
{
#if IS_ENABLED(CONFIG_IPV6)
/* IPv6 rt-flags are checked against RTF_LOCAL, but the value of
* RTF_LOCAL is equal to RTCF_LOCAL. So to keep code simple
* we can use RTCF_LOCAL which works for ipv4 and ipv6 route entry.
*/
BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL);
#endif
/* Bypass encapsulation if the destination is local */
if (rt_flags & RTCF_LOCAL &&
!(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
struct vxlan_dev *dst_vxlan;
dst_release(dst);
dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
daddr->sa.sa_family, dst_port,
vxlan->cfg.flags);
if (!dst_vxlan) {
dev->stats.tx_errors++;
kfree_skb(skb);
return -ENOENT;
}
vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
return 1;
}
return 0;
}
但在使用UDPSpeeder时不可避免要使用回环路径,即转发给自己另一个监听端口,于是我们把丢弃动作屏蔽掉即可
@@ -2566,20 +2578,30 @@ static int encap_bypass_if_local(struct
BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL);
#endif
/* Bypass encapsulation if the destination is local */
+ //printk("in encap_bypass_if_local step1: rt_flags %d, dst_port %d\n", rt_flags, ntohs(dst_port));
+
if (rt_flags & RTCF_LOCAL &&
!(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
struct vxlan_dev *dst_vxlan;
- dst_release(dst);
dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
daddr->sa.sa_family, dst_port,
vxlan->cfg.flags);
+
+ //printk("in encap_bypass_if_local step2: vxlan %lld dst_vxlan %lld\n", vxlan, dst_vxlan);
+
+ /*
if (!dst_vxlan) {
dev->stats.tx_errors++;
kfree_skb(skb);
return -ENOENT;
- }
+ }*/
+
+ if(!dst_vxlan)
+ return 0;
+
+ dst_release(dst);
vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
return 1;
}
PS:
user@aaa# ip link add type vxlan id 1000 srcport 1 4790 dstport 4789 remote x.x.x.x //传建一个监听4790,目标为x.x.x.x:4789的vxlan0
user@aaa# ip addr add 1.1.1.1/24 dev vxlan0 //配置IP地址为1.1.1.1
user@aaa# ip link set vxlan0 up //接口up
user@aaa# kvxlan_extra_config_util vxlan0 1 0 0 3 1.1.1.1 //为接口启用spl,每3秒发送一个免费ARP给对端
user@aaa# /usr/bin/udpspeeder -c -l 0.0.0.0:4790 -r x.x.x.x:4789 ... //开启udpspeeder
Linux的设计好坏不做评价,patch分享出来,仅为大家一起学习讨论!