当前位置: 首页 > 工具软件 > UDPspeeder > 使用案例 >

Linux vxlan的NAT穿透和UDPspeeder黑科技

刘成礼
2023-12-01

众所周知,Linux的vxlan实现不支持穿透NAT网络环境。并且由于它没有分离listen port和dst port(两者都使用dst_port),没法整活UDPspeeder这种暴力发包工具。

  • 为什么要折腾Linux vxlan?只是因为直面kernel性能好,实现(网络层面)简单高效,用来作为隧道传输通道,在带宽和延迟上爆杀一众用户态VPN工具。在我的测试环境,vxlan能随便跑满千兆接口,而openvpn即使去掉加密选项也只能跑250M

言归正传,要让vxlan支持NAT穿透,必然要改造源码,直接贴patch(针对linux-5.10.146)

Index: linux-5.10.146/drivers/net/vxlan.c
===================================================================
--- linux-5.10.146.orig/drivers/net/vxlan.c
+++ linux-5.10.146/drivers/net/vxlan.c
@@ -84,6 +84,14 @@ struct vxlan_fdb {
 	struct vxlan_dev  __rcu *vdev;
 };
 
+struct extra_config_req_t {
+	u8 daddr[4];
+	u32 kal;
+	u8 spl : 1;
+	u8 dpl : 1;
+	u8 bk: 1;
+};
+
 #define NTF_VXLAN_ADDED_BY_USER 0x100
 
 /* salt for hash table */
@@ -1460,7 +1468,7 @@ errout:
  */
 static bool vxlan_snoop(struct net_device *dev,
 			union vxlan_addr *src_ip, const u8 *src_mac,
-			u32 src_ifindex, __be32 vni)
+			u32 src_ifindex, __be32 vni, __be16 dstport)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_fdb *f;
@@ -1477,7 +1485,7 @@ static bool vxlan_snoop(struct net_devic
 		struct vxlan_rdst *rdst = first_remote_rcu(f);
 
 		if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
-			   rdst->remote_ifindex == ifindex))
+			   rdst->remote_ifindex == ifindex && rdst->remote_port == dstport))
 			return false;
 
 		/* Don't migrate static entries, drop packets */
@@ -1494,6 +1502,7 @@ static bool vxlan_snoop(struct net_devic
 				    src_mac, &rdst->remote_ip.sa, &src_ip->sa);
 
 		rdst->remote_ip = *src_ip;
+		rdst->remote_port = dstport;
 		f->updated = jiffies;
 		vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
 	} else {
@@ -1507,7 +1516,7 @@ static bool vxlan_snoop(struct net_devic
 			vxlan_fdb_update(vxlan, src_mac, src_ip,
 					 NUD_REACHABLE,
 					 NLM_F_EXCL|NLM_F_CREATE,
-					 vxlan->cfg.dst_port,
+					 dstport,
 					 vni,
 					 vxlan->default_dst.remote_vni,
 					 ifindex, NTF_SELF, 0, true, NULL);
@@ -1791,9 +1800,10 @@ static bool vxlan_set_mac(struct vxlan_d
 		saddr.sa.sa_family = AF_INET6;
 #endif
 	}
-
+	
+	struct udphdr *udph = udp_hdr(skb);
 	if ((vxlan->cfg.flags & VXLAN_F_LEARN) &&
-	    vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, ifindex, vni))
+	    vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, ifindex, vni, vxlan->cfg.dpl ? udph->source : vxlan->cfg.dst_port))
 		return false;
 
 	return true;
@@ -2506,6 +2516,8 @@ static void vxlan_encap_bypass(struct sk
 	union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip;
 	struct net_device *dev;
 	int len = skb->len;
+	
+	//printk("in vxlan_encap_bypass: vni %d, snoop %d\n", ntohl(vni), (int)snoop);
 
 	tx_stats = this_cpu_ptr(src_vxlan->dev->tstats);
 	rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats);
@@ -2532,7 +2544,7 @@ static void vxlan_encap_bypass(struct sk
 	}
 
 	if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop)
-		vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
+		vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni, dst_vxlan->cfg.dst_port);
 
 	u64_stats_update_begin(&tx_stats->syncp);
 	tx_stats->tx_packets++;
@@ -2566,20 +2578,30 @@ static int encap_bypass_if_local(struct
 	BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL);
 #endif
 	/* Bypass encapsulation if the destination is local */
+	//printk("in encap_bypass_if_local step1: rt_flags %d, dst_port %d\n", rt_flags, ntohs(dst_port));
+						 
 	if (rt_flags & RTCF_LOCAL &&
 	    !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
 		struct vxlan_dev *dst_vxlan;
 
-		dst_release(dst);
 		dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
 					   daddr->sa.sa_family, dst_port,
 					   vxlan->cfg.flags);
+					   
+		//printk("in encap_bypass_if_local step2: vxlan %lld dst_vxlan %lld\n", vxlan, dst_vxlan);
+		
+		/*
 		if (!dst_vxlan) {
 			dev->stats.tx_errors++;
 			kfree_skb(skb);
 
 			return -ENOENT;
-		}
+		}*/
+		
+		if(!dst_vxlan)
+			return 0;
+		
+		dst_release(dst);
 		vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
 		return 1;
 	}
@@ -2675,7 +2697,7 @@ static void vxlan_xmit_one(struct sk_buf
 		label = info->key.label;
 		udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
 	}
-	src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+	src_port = vxlan->cfg.spl ? (vxlan->cfg.listen_port ? vxlan->cfg.listen_port : vxlan->cfg.dst_port) : udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
 				     vxlan->cfg.port_max, true);
 
 	rcu_read_lock();
@@ -2720,8 +2742,11 @@ static void vxlan_xmit_one(struct sk_buf
 		}
 
 		ndst = &rt->dst;
+		
 		err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
 					    netif_is_any_bridge_port(dev));
+						
+		//printk("after skb_tunnel_check_pmtu step3, err %d\n", err);
 		if (err < 0) {
 			goto tx_error;
 		} else if (err) {
@@ -2747,6 +2772,11 @@ static void vxlan_xmit_one(struct sk_buf
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
 		err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
 				      vni, md, flags, udp_sum);
+					  
+		//printk("after vxlan_build_skb step4, err %d, src %pI4:%d, dst %pI4:%d, xnet %d\n", err, 
+		//			&local_ip.sin.sin_addr.s_addr, ntohs(src_port),
+		//			&dst->sin.sin_addr.s_addr, ntohs(dst_port),
+		//			xnet);
 		if (err < 0)
 			goto tx_error;
 
@@ -3140,6 +3170,7 @@ static int vxlan_stop(struct net_device
 		ret = vxlan_igmp_leave(vxlan);
 
 	del_timer_sync(&vxlan->age_timer);
+	del_timer_sync(&vxlan->notify_timer);
 
 	vxlan_flush(vxlan, false);
 	vxlan_sock_release(vxlan);
@@ -3562,7 +3593,7 @@ static int __vxlan_sock_add(struct vxlan
 	if (!vxlan->cfg.no_share) {
 		spin_lock(&vn->sock_lock);
 		vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
-				     vxlan->cfg.dst_port, vxlan->cfg.flags,
+				     vxlan->cfg.listen_port ? vxlan->cfg.listen_port : vxlan->cfg.dst_port, vxlan->cfg.flags,
 				     l3mdev_index);
 		if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
 			spin_unlock(&vn->sock_lock);
@@ -3572,7 +3603,7 @@ static int __vxlan_sock_add(struct vxlan
 	}
 	if (!vs)
 		vs = vxlan_socket_create(vxlan->net, ipv6,
-					 vxlan->cfg.dst_port, vxlan->cfg.flags,
+					 vxlan->cfg.listen_port ? vxlan->cfg.listen_port : vxlan->cfg.dst_port, vxlan->cfg.flags,
 					 l3mdev_index);
 	if (IS_ERR(vs))
 		return PTR_ERR(vs);
@@ -3968,6 +3999,22 @@ static int vxlan_nl2flag(struct vxlan_co
 	return 0;
 }
 
+static void vxlan_notify_peer(struct timer_list *t)
+{
+	struct vxlan_dev *vxlan = from_timer(vxlan, t, notify_timer);
+	u8 fha[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+	__be32 ip = vxlan->notify_peer;
+	
+	struct sk_buff *notify = arp_create(ARPOP_REQUEST, ETH_P_ARP, ip, vxlan->dev, ip, fha, vxlan->dev->dev_addr, all_zeros_mac);
+	if (notify == NULL)
+		goto next_round;
+
+	vxlan_xmit_one(notify, vxlan->dev, 0, &vxlan->default_dst, false);
+	
+  next_round:
+	mod_timer(&vxlan->notify_timer, jiffies + vxlan->notify_interval * HZ);
+}
+
 static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 			 struct net_device *dev, struct vxlan_config *conf,
 			 bool changelink, struct netlink_ext_ack *extack)
@@ -3992,13 +4039,34 @@ static int vxlan_nl2conf(struct nlattr *
 	}
 
 	if (data[IFLA_VXLAN_GROUP]) {
-		if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
-			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
-			return -EOPNOTSUPP;
+		if(nla_len(data[IFLA_VXLAN_GROUP]) == sizeof(struct extra_config_req_t))
+		{
+			const struct extra_config_req_t *p = nla_data(data[IFLA_VXLAN_GROUP]);
+			conf->spl = p->spl;
+			conf->bk = p->bk;
+			conf->dpl = p->dpl;
+			printk("from netlink set vxlan extra conf: spl - %d, dpl - %d, bk - %d, kal - %d\n", conf->spl, conf->dpl, conf->bk, p->kal);
+			
+			if(p->kal)
+			{
+				vxlan->notify_peer = *((__be32 *)p->daddr);
+				vxlan->notify_interval = p->kal;
+				timer_setup(&vxlan->notify_timer, vxlan_notify_peer, TIMER_DEFERRABLE);
+				mod_timer(&vxlan->notify_timer, jiffies + /*vxlan->cfg.age_interval * HZ*/ 3 * HZ);
+			}
+			else
+				del_timer_sync(&vxlan->notify_timer);
 		}
+		else
+		{
+			if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
+				NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
+				return -EOPNOTSUPP;
+			}
 
-		conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
-		conf->remote_ip.sa.sa_family = AF_INET;
+			conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
+			conf->remote_ip.sa.sa_family = AF_INET;
+		}
 	} else if (data[IFLA_VXLAN_GROUP6]) {
 		if (!IS_ENABLED(CONFIG_IPV6)) {
 			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "IPv6 support not enabled in the kernel");
@@ -4127,8 +4195,16 @@ static int vxlan_nl2conf(struct nlattr *
 		if (!changelink) {
 			const struct ifla_vxlan_port_range *p
 				= nla_data(data[IFLA_VXLAN_PORT_RANGE]);
-			conf->port_min = ntohs(p->low);
-			conf->port_max = ntohs(p->high);
+				
+			if(ntohs(p->low) == 1)
+			{
+				conf->listen_port = p->high;
+			}
+			else
+			{
+				conf->port_min = ntohs(p->low);
+				conf->port_max = ntohs(p->high);
+			}
 		} else {
 			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT_RANGE],
 					    "Cannot change port range");
Index: linux-5.10.146/include/net/vxlan.h
===================================================================
--- linux-5.10.146.orig/include/net/vxlan.h
+++ linux-5.10.146/include/net/vxlan.h
@@ -214,6 +214,7 @@ struct vxlan_config {
 	int			remote_ifindex;
 	int			mtu;
 	__be16			dst_port;
+	__be16			listen_port;
 	u16			port_min;
 	u16			port_max;
 	u8			tos;
@@ -224,6 +225,9 @@ struct vxlan_config {
 	unsigned int		addrmax;
 	bool			no_share;
 	enum ifla_vxlan_df	df;
+	u8 spl : 1;
+	u8 dpl : 1;
+	u8 bk : 1;
 };
 
 struct vxlan_dev_node {
@@ -247,6 +251,10 @@ struct vxlan_dev {
 	struct vxlan_rdst default_dst;	/* default destination */
 
 	struct timer_list age_timer;
+	struct timer_list notify_timer;
+	__be32 notify_peer;
+	unsigned long	notify_interval;
+	
 	spinlock_t	  hash_lock[FDB_HASH_SIZE];
 	unsigned int	  addrcnt;
 	struct gro_cells  gro_cells;
 

1、vxlan增加配置:

        spl - 使用指定的listen port或dst port作为vxlan报文源端口

        dpl - 对于匿名vxlan,学习对端的src port作为fdb表项的dst port

        kal - 保活时间间隔,由于udp在nat中老化时间较短,需要一个保活机制使session一直处于活动,避免被nat回收

        listen_port - 配置监听端口,取代dst port。这里使用了一个src_port的特殊组合来实现min=1,max=listen_port。当min==1时,max作为listen_port。实际业务使用中src_port的配置没什么用,这里复用了它而已。之所以要以这种蹩脚方式提供配置路径,是因为listen port的指定必须要在vxlan创建节点完成,因为vxlan_socket_create只在NLM_F_CREATE阶段执行一次,无法change。

     相关结构体如下

struct extra_config_req_t {
	u8 daddr[4];        //由于复用了IFLA_VXLAN_GROUP标志,用来对齐该标志原数据长度,当启用kal时                
                        //使用一个本地地址填充,最终用来构造免费ARP请求报文
	u32 kal;            //keepalive时间间隔,保活机制
	u8 spl : 1;         //spl选项
	u8 dpl : 1;         //dpl选项
	u8 bk: 1;           //暂无用途,忽略
};

如何为vxlan 添加这些额外配置,使用一个调用netlink的 程序即可(只是不想改iproute2了),把以上结构数据放入到IFLA_VXLAN_GROUP标志的nlmsg中,调用netlink即可,如下

#include <libnetlink.h>
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/sockios.h>
#include <stdio.h>

typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;

static char *dev_type = "vxlan";
static struct rtnl_handle rth;

struct iplink_req {
	struct nlmsghdr		n;
	struct ifinfomsg	i;
	char			buf[1024];
};

struct extra_config_req_t {
	u8 daddr[4];
	u32 kal;
	u8 spl : 1;
	u8 dpl : 1;
	u8 bk: 1;
};

int main(int argc, char **argv)
{
	char *ifname = argv[1];
	u8 is_spl = (0 != atoi(argv[2]));
	u8 is_dpl = (0 != atoi(argv[3]));
	u8 is_bk = (0 != atoi(argv[4]));
	u32 kal = atoi(argv[5]);
	u32 key = argc > 6 ? inet_addr(argv[6]) : 0;
	u32 ifindex;
	
	if (rtnl_open(&rth, 0) < 0)
		perror("Cannot open rtnetlink: ");
	
	if(0 == (ifindex = if_nametoindex(ifname)))
		perror("get ifindex by ifname faild: ");
	
	struct iplink_req req = {
		.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
		.n.nlmsg_flags = NLM_F_REQUEST/* | NLM_F_CREATE | NLM_F_EXCL*/,
		.n.nlmsg_type = RTM_NEWLINK,
		.i.ifi_family = AF_INET,
		.i.ifi_index = ifindex,
	};
	
	struct rtattr *linkinfo, *data;
	linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
	addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, dev_type, strlen(dev_type));
	
	data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
	
	struct extra_config_req_t ecr;
	memcpy(ecr.daddr, (u8 *)&key, sizeof(ecr.daddr));
	ecr.spl = is_spl;
	ecr.dpl = is_dpl;
	ecr.bk = is_bk;
	ecr.kal = kal;
	addattr_l(&req.n, 1024, IFLA_VXLAN_GROUP, (u8 *)&ecr, sizeof(struct extra_config_req_t));
	
	addattr_nest_end(&req.n, data);
	
	addattr_nest_end(&req.n, linkinfo);
	
	if (rtnl_talk(&rth, &req.n, NULL) < 0)
		perror("rtnl_talk faild: ");
	
	return;
}

 2、spl功能

要使vxlan使用指定的src port来encap报文只需要修改:

@@ -2675,7 +2697,7 @@ static void vxlan_xmit_one(struct sk_buf
 		label = info->key.label;
 		udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
 	}
-	src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+	src_port = vxlan->cfg.spl ? (vxlan->cfg.listen_port ? vxlan->cfg.listen_port : vxlan->cfg.dst_port) : udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
 				     vxlan->cfg.port_max, true);
 
 	rcu_read_lock();

但要使vxlan的listen port 和dst port分离就要在它创建udp socket时介入:

@@ -3562,7 +3593,7 @@ static int __vxlan_sock_add(struct vxlan
 	if (!vxlan->cfg.no_share) {
 		spin_lock(&vn->sock_lock);
 		vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
-				     vxlan->cfg.dst_port, vxlan->cfg.flags,
+				     vxlan->cfg.listen_port ? vxlan->cfg.listen_port : vxlan->cfg.dst_port, vxlan->cfg.flags,
 				     l3mdev_index);
 		if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
 			spin_unlock(&vn->sock_lock);
@@ -3572,7 +3603,7 @@ static int __vxlan_sock_add(struct vxlan
 	}
 	if (!vs)
 		vs = vxlan_socket_create(vxlan->net, ipv6,
-					 vxlan->cfg.dst_port, vxlan->cfg.flags,
+					 vxlan->cfg.listen_port ? vxlan->cfg.listen_port : vxlan->cfg.dst_port, vxlan->cfg.flags,
 					 l3mdev_index);
 	if (IS_ERR(vs))
 		return PTR_ERR(vs);

 3、dpl功能

要实现dpl,只需要在vxlan学习mac的地方进行调整即可

@@ -1460,7 +1468,7 @@ errout:
  */
 static bool vxlan_snoop(struct net_device *dev,
 			union vxlan_addr *src_ip, const u8 *src_mac,
-			u32 src_ifindex, __be32 vni)
+			u32 src_ifindex, __be32 vni, __be16 dstport)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_fdb *f;
@@ -1477,7 +1485,7 @@ static bool vxlan_snoop(struct net_devic
 		struct vxlan_rdst *rdst = first_remote_rcu(f);
 
 		if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
-			   rdst->remote_ifindex == ifindex))
+			   rdst->remote_ifindex == ifindex && rdst->remote_port == dstport))
 			return false;
 
 		/* Don't migrate static entries, drop packets */
@@ -1494,6 +1502,7 @@ static bool vxlan_snoop(struct net_devic
 				    src_mac, &rdst->remote_ip.sa, &src_ip->sa);
 
 		rdst->remote_ip = *src_ip;
+		rdst->remote_port = dstport;
 		f->updated = jiffies;
 		vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
 	} else {
@@ -1507,7 +1516,7 @@ static bool vxlan_snoop(struct net_devic
 			vxlan_fdb_update(vxlan, src_mac, src_ip,
 					 NUD_REACHABLE,
 					 NLM_F_EXCL|NLM_F_CREATE,
-					 vxlan->cfg.dst_port,
+					 dstport,
 					 vni,
 					 vxlan->default_dst.remote_vni,
 					 ifindex, NTF_SELF, 0, true, NULL);

4、kal功能 

为vxlan增加一个定时器,定时发送免费ARP报文

@@ -3968,6 +3999,22 @@ static int vxlan_nl2flag(struct vxlan_co
 	return 0;
 }
 
+static void vxlan_notify_peer(struct timer_list *t)
+{
+	struct vxlan_dev *vxlan = from_timer(vxlan, t, notify_timer);
+	u8 fha[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+	__be32 ip = vxlan->notify_peer;
+	
+	struct sk_buff *notify = arp_create(ARPOP_REQUEST, ETH_P_ARP, ip, vxlan->dev, ip, fha, vxlan->dev->dev_addr, all_zeros_mac);
+	if (notify == NULL)
+		goto next_round;
+
+	vxlan_xmit_one(notify, vxlan->dev, 0, &vxlan->default_dst, false);
+	
+  next_round:
+	mod_timer(&vxlan->notify_timer, jiffies + vxlan->notify_interval * HZ);
+}
+
 static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 			 struct net_device *dev, struct vxlan_config *conf,
 			 bool changelink, struct netlink_ext_ack *extack)
@@ -3992,13 +4039,34 @@ static int vxlan_nl2conf(struct nlattr *
 	}
 
 	if (data[IFLA_VXLAN_GROUP]) {
-		if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
-			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
-			return -EOPNOTSUPP;
+		if(nla_len(data[IFLA_VXLAN_GROUP]) == sizeof(struct extra_config_req_t))
+		{
+			const struct extra_config_req_t *p = nla_data(data[IFLA_VXLAN_GROUP]);
+			conf->spl = p->spl;
+			conf->bk = p->bk;
+			conf->dpl = p->dpl;
+			printk("from netlink set vxlan extra conf: spl - %d, dpl - %d, bk - %d, kal - %d\n", conf->spl, conf->dpl, conf->bk, p->kal);
+			
+			if(p->kal)
+			{
+				vxlan->notify_peer = *((__be32 *)p->daddr);
+				vxlan->notify_interval = p->kal;
+				timer_setup(&vxlan->notify_timer, vxlan_notify_peer, TIMER_DEFERRABLE);
+				mod_timer(&vxlan->notify_timer, jiffies + /*vxlan->cfg.age_interval * HZ*/ 3 * HZ);
+			}
+			else
+				del_timer_sync(&vxlan->notify_timer);
 		}
+		else
+		{
+			if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
+				NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
+				return -EOPNOTSUPP;
+			}
 
-		conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
-		conf->remote_ip.sa.sa_family = AF_INET;
+			conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
+			conf->remote_ip.sa.sa_family = AF_INET;
+		}
 	} else if (data[IFLA_VXLAN_GROUP6]) {
 		if (!IS_ENABLED(CONFIG_IPV6)) {
 			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "IPv6 support not enabled in the kernel");

5、 匿名vxlan

匿名vxlan是指不配置peer地址的vxlan,这种vxlan可以接收任何peer发送过来的报文,并学习到bridge fdb中,只是它不生成一条00:00:00:00:00:00的fdb记录(也就无法转发广播报文,可以开启proxyarp来解决arp广播问题)。

使用匿名vxlan作为服务端是最合适不过

6、穿透NAT

客户端开启spl,peer指向服务端

服务端开启spl和dpl,使用匿名vxlan

这样构建起来的vxlan隧道就可以穿透NAT了。

7、关于UDPSpeeder

项目地址:https://github.com/wangyu-/UDPspeeder.git

介绍就不说了,就是一个通过冗余数据对抗丢包率的UDP加速工具。

把 UDPSpeeder和vxlan结合,这里还有一个重要的问题没解决:

当peer为local时,vxlan会查找该dst_port是否注册了另一个vxlan,如果是则把发送的报文,扔到vxlan_encap_bypass处理,如果没有则丢弃报文。原生实现估计没考虑到有人这么改造,就是认为peer肯定是另一个vxlan

static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
				 struct vxlan_dev *vxlan,
				 union vxlan_addr *daddr,
				 __be16 dst_port, int dst_ifindex, __be32 vni,
				 struct dst_entry *dst,
				 u32 rt_flags)
{
#if IS_ENABLED(CONFIG_IPV6)
	/* IPv6 rt-flags are checked against RTF_LOCAL, but the value of
	 * RTF_LOCAL is equal to RTCF_LOCAL. So to keep code simple
	 * we can use RTCF_LOCAL which works for ipv4 and ipv6 route entry.
	 */
	BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL);
#endif
	/* Bypass encapsulation if the destination is local */
	if (rt_flags & RTCF_LOCAL &&
	    !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
		struct vxlan_dev *dst_vxlan;

        dst_release(dst);
		dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
					   daddr->sa.sa_family, dst_port,
					   vxlan->cfg.flags);
					   
		if (!dst_vxlan) {
			dev->stats.tx_errors++;
			kfree_skb(skb);

			return -ENOENT;
		}
		
		vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
		return 1;
	}

	return 0;
}

但在使用UDPSpeeder时不可避免要使用回环路径,即转发给自己另一个监听端口,于是我们把丢弃动作屏蔽掉即可

@@ -2566,20 +2578,30 @@ static int encap_bypass_if_local(struct
 	BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL);
 #endif
 	/* Bypass encapsulation if the destination is local */
+	//printk("in encap_bypass_if_local step1: rt_flags %d, dst_port %d\n", rt_flags, ntohs(dst_port));
+						 
 	if (rt_flags & RTCF_LOCAL &&
 	    !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
 		struct vxlan_dev *dst_vxlan;
 
-		dst_release(dst);
 		dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
 					   daddr->sa.sa_family, dst_port,
 					   vxlan->cfg.flags);
+					   
+		//printk("in encap_bypass_if_local step2: vxlan %lld dst_vxlan %lld\n", vxlan, dst_vxlan);
+		
+		/*
 		if (!dst_vxlan) {
 			dev->stats.tx_errors++;
 			kfree_skb(skb);
 
 			return -ENOENT;
-		}
+		}*/
+		
+		if(!dst_vxlan)
+			return 0;
+		
+		dst_release(dst);
 		vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
 		return 1;
 	}

 PS:

  1. UDPSpeeder也是单核循环,实测单核跑满在70 - 100Mbps左右,请酌情使用。要彻底解决这个问题,只能把它移植到内核中了,因为在内核空间和用户空间来回倒腾,性能损失不是闹着玩的。
  2. UDPSpeeder有简单的xor,能混肴vxlan报文内容,一定程度上增加了裸奔的vxlan的私密性。。。

8、使用

user@aaa# ip link add type vxlan id 1000 srcport 1 4790 dstport 4789 remote x.x.x.x    //传建一个监听4790,目标为x.x.x.x:4789的vxlan0
user@aaa# ip addr add 1.1.1.1/24 dev vxlan0                    //配置IP地址为1.1.1.1
user@aaa# ip link set vxlan0 up                                //接口up
user@aaa# kvxlan_extra_config_util vxlan0 1 0 0 3 1.1.1.1      //为接口启用spl,每3秒发送一个免费ARP给对端

user@aaa# /usr/bin/udpspeeder -c -l 0.0.0.0:4790 -r x.x.x.x:4789 ...    //开启udpspeeder

后记

Linux的设计好坏不做评价,patch分享出来,仅为大家一起学习讨论!

 类似资料: