blackhole引起k8s DNS访问异常

司空锋
2023-12-01

环境背景

3个节点的k8s集群:

[root@kube-196 ~]# kubectl get nodes
NAME       STATUS   ROLES    AGE   VERSION
kube-195   Ready    <none>   8d    v1.14.2
kube-196   Ready    master   12d   v1.14.1
kube-197   Ready    <none>   12d   v1.14.1

网络方案为flannel host-gw+ipvs。

部署情况

[root@kube-196 ~]# kubectl get all -A -o wide
NAMESPACE     NAME                                             READY   STATUS             RESTARTS   AGE     IP                NODE       NOMINATED NODE   READINESS GATES
default       pod/apm-kafka-0                                  0/1     CrashLoopBackOff   16         47m     10.244.1.16       kube-197   <none>           <none>
default       pod/apm-kafka-zookeeper-0                        1/1     Running            0          47m     10.244.1.15       kube-197   <none>           <none>
default       pod/apm-kafka-zookeeper-1                        1/1     Running            0          46m     10.244.2.31       kube-195   <none>           <none>
default       pod/apm-kafka-zookeeper-2                        1/1     Running            1          45m     10.244.2.32       kube-195   <none>           <none>
 
default       pod/testclient                                   1/1     Running            0          29m     10.244.1.17       kube-197   <none>           <none>
 
kube-system   pod/coredns-5f6bc7cdd5-phjzg                     1/1     Running            0          4d23h   10.244.2.10       kube-195   <none>           <none>
kube-system   pod/coredns-5f6bc7cdd5-tvrx8                     1/1     Running            0          79m     10.244.1.14       kube-197   <none>           <none>
kube-system   pod/coredns-5f6bc7cdd5-v96x8                     1/1     Running            0          99m     10.244.1.13       kube-197   <none>           <none>
kube-system   pod/etcd-kube-196                                1/1     Running            4          12d     192.168.200.196   kube-196   <none>           <none>
kube-system   pod/kube-apiserver-kube-196                      1/1     Running            4          22h     192.168.200.196   kube-196   <none>           <none>
kube-system   pod/kube-controller-manager-kube-196             1/1     Running            4          22h     192.168.200.196   kube-196   <none>           <none>
kube-system   pod/kube-flannel-ds-amd64-24l6b                  1/1     Running            0          81m     192.168.200.196   kube-196   <none>           <none>
kube-system   pod/kube-flannel-ds-amd64-d9tbn                  1/1     Running            2          22h     192.168.200.197   kube-197   <none>           <none>
kube-system   pod/kube-flannel-ds-amd64-scmfp                  1/1     Running            0          22h     192.168.200.195   kube-195   <none>           <none>
kube-system   pod/kube-proxy-5nd72                             1/1     Running            0          80m     192.168.200.196   kube-196   <none>           <none>
kube-system   pod/kube-proxy-qrgnf                             1/1     Running            12         12d     192.168.200.197   kube-197   <none>           <none>
kube-system   pod/kube-proxy-sfgr2                             1/1     Running            0          8d      192.168.200.195   kube-195   <none>           <none>
kube-system   pod/kube-scheduler-kube-196                      1/1     Running            5          12d     192.168.200.196   kube-196   <none>           <none>
 
NAMESPACE     NAME                                   TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)                      AGE     SELECTOR
default       service/apm-kafka                      ClusterIP   10.110.242.169   <none>        9092/TCP                     47m     app=kafka,release=apm-kafka
default       service/apm-kafka-0-external           NodePort    10.101.213.10    <none>        19092:31090/TCP              47m     app=kafka,release=apm-kafka,statefulset.kubernetes.io/pod-name=apm-kafka-0
default       service/apm-kafka-1-external           NodePort    10.111.193.151   <none>        19092:31091/TCP              47m     app=kafka,release=apm-kafka,statefulset.kubernetes.io/pod-name=apm-kafka-1
default       service/apm-kafka-2-external           NodePort    10.96.221.224    <none>        19092:31092/TCP              47m     app=kafka,release=apm-kafka,statefulset.kubernetes.io/pod-name=apm-kafka-2
default       service/apm-kafka-headless             ClusterIP   None             <none>        9092/TCP                     47m     app=kafka,release=apm-kafka
default       service/apm-kafka-zookeeper            ClusterIP   10.107.245.32    <none>        2181/TCP                     47m     app=zookeeper,release=apm-kafka
default       service/apm-kafka-zookeeper-headless   ClusterIP   None             <none>        2181/TCP,3888/TCP,2888/TCP   47m     app=zookeeper,release=apm-kafka
 
default       service/kubernetes                     ClusterIP   10.96.0.1        <none>        443/TCP                      12d     <none>
kube-system   service/kube-dns                       ClusterIP   10.96.0.10       <none>        53/UDP,53/TCP,9153/TCP       4d23h   k8s-app=kube-dns


NAMESPACE     NAME                                     DESIRED   CURRENT   READY   UP-TO-DATE   AVAILABLE   NODE SELECTOR                     AGE   CONTAINERS     IMAGES                                   SELECTOR
kube-system   daemonset.apps/kube-flannel-ds-amd64     3         3         3       3            3           beta.kubernetes.io/arch=amd64     22h   kube-flannel   quay.io/coreos/flannel:v0.11.0-amd64     app=flannel,tier=node
kube-system   daemonset.apps/kube-proxy                3         3         3       3            3           <none>                            12d   kube-proxy     k8s.gcr.io/kube-proxy:v1.14.0            k8s-app=kube-proxy

NAMESPACE     NAME                                         READY   UP-TO-DATE   AVAILABLE   AGE     CONTAINERS                   IMAGES                                                               SELECTOR
kube-system   deployment.apps/coredns                      3/3     3            3           4d23h   coredns                      k8s.gcr.io/coredns:1.5.0                                             k8s-app=kube-dns
 

NAMESPACE     NAME                                                   DESIRED   CURRENT   READY   AGE     CONTAINERS                   IMAGES                                                               SELECTOR
kube-system   replicaset.apps/coredns-5f6bc7cdd5                     3         3         3       4d23h   coredns                      k8s.gcr.io/coredns:1.5.0                                             k8s-app=kube-dns,pod-template-hash=5f6bc7cdd5
 

NAMESPACE   NAME                                   READY   AGE   CONTAINERS     IMAGES
default     statefulset.apps/apm-kafka             0/3     47m   kafka-broker   confluentinc/cp-kafka:5.0.1
default     statefulset.apps/apm-kafka-zookeeper   3/3     47m   zookeeper      registry.cn-hangzhou.aliyuncs.com/appstore/k8szk:v2

可以看到从192.168.200.195主机的容器ping 192.168.200.197主机上的其他容器的ip和headless service名字,是可以ping通的。

[root@kube-196 ~]# kubectl exec -it testclient /bin/sh 
# ping apm-kafka-headless
PING apm-kafka-headless.default.svc.cluster.local (10.244.1.16): 56 data bytes
64 bytes from 10.244.1.16: icmp_seq=0 ttl=64 time=0.097 ms
64 bytes from 10.244.1.16: icmp_seq=1 ttl=64 time=0.103 ms
64 bytes from 10.244.1.16: icmp_seq=2 ttl=64 time=0.081 ms
^C--- apm-kafka-headless.default.svc.cluster.local ping statistics ---
3 packets transmitted, 3 packets received, 0% packet loss
round-trip min/avg/max/stddev = 0.081/0.094/0.103/0.000 ms
# ping apm-kafka-zookeeper-headless 
PING apm-kafka-zookeeper-headless.default.svc.cluster.local (10.244.1.15): 56 data bytes
64 bytes from 10.244.1.15: icmp_seq=0 ttl=64 time=0.116 ms
64 bytes from 10.244.1.15: icmp_seq=1 ttl=64 time=0.109 ms
64 bytes from 10.244.1.15: icmp_seq=2 ttl=64 time=0.100 ms
64 bytes from 10.244.1.15: icmp_seq=3 ttl=64 time=0.098 ms
64 bytes from 10.244.1.15: icmp_seq=4 ttl=64 time=0.093 ms
64 bytes from 10.244.1.15: icmp_seq=5 ttl=64 time=0.107 ms
64 bytes from 10.244.1.15: icmp_seq=6 ttl=64 time=0.103 ms
^C--- apm-kafka-zookeeper-headless.default.svc.cluster.local ping statistics ---
7 packets transmitted, 7 packets received, 0% packet loss
round-trip min/avg/max/stddev = 0.093/0.104/0.116/0.000 ms
 
# ping apm-kafka-zookeeper-headless
PING apm-kafka-zookeeper-headless.default.svc.cluster.local (10.244.1.15): 56 data bytes
64 bytes from 10.244.1.15: icmp_seq=0 ttl=64 time=0.095 ms
64 bytes from 10.244.1.15: icmp_seq=1 ttl=64 time=0.099 ms
64 bytes from 10.244.1.15: icmp_seq=2 ttl=64 time=0.118 ms
64 bytes from 10.244.1.15: icmp_seq=3 ttl=64 time=0.113 ms
64 bytes from 10.244.1.15: icmp_seq=4 ttl=64 time=0.145 ms
^C--- apm-kafka-zookeeper-headless.default.svc.cluster.local ping statistics ---
5 packets transmitted, 5 packets received, 0% packet loss
round-trip min/avg/max/stddev = 0.095/0.114/0.145/0.000 ms

但ping 不同192.168.200.195上的容器(10.244.2.31)。

# ping 10.244.2.31
PING 10.244.2.31 (10.244.2.31): 56 data bytes

直接ping 192.168.200.195的宿主机也是通的,也就是说网络包到达195的宿主机之后,没有路由到容器内。

查看195的路由

[root@kube-195 ~]# ip route
default via 192.168.200.254 dev eth0  proto static  metric 100 
10.244.0.0/24 via 10.244.0.0 dev flannel.1 onlink 
10.244.1.0/24 via 10.244.1.0 dev flannel.1 onlink 
blackhole 10.244.2.0/24  proto bird 
10.244.2.0/24 dev cni0  proto kernel  scope link  src 10.244.2.1 
169.254.169.254 via 192.168.200.130 dev eth0  proto dhcp  metric 100 
192.168.82.0/24 dev docker0  proto kernel  scope link  src 192.168.82.1 
192.168.200.0/24 dev eth0  proto kernel  scope link  src 192.168.200.195  metric 100 

发现有条

blackhole 10.244.2.0/24  proto bird

的路由规则。其在

10.244.2.0/24 dev cni0  proto kernel  scope link  src 10.244.2.1

之前。所以所有在195主机上路由到10.244.2.0/24 子网的包都会被丢弃。

[root@kube-195 ~]# ip route del 10.244.2.0/24

删除该路由规则。

[root@kube-195 ~]# ip route
default via 192.168.200.254 dev eth0  proto static  metric 100 
10.244.0.0/24 via 10.244.0.0 dev flannel.1 onlink 
10.244.1.0/24 via 10.244.1.0 dev flannel.1 onlink 
10.244.2.0/24 dev cni0  proto kernel  scope link  src 10.244.2.1 
169.254.169.254 via 192.168.200.130 dev eth0  proto dhcp  metric 100 
192.168.82.0/24 dev docker0  proto kernel  scope link  src 192.168.82.1 
192.168.200.0/24 dev eth0  proto kernel  scope link  src 192.168.200.195  metric 100 

就可以ping通了:

# ping apm-kafka-zookeeper-headless
PING apm-kafka-zookeeper-headless.default.svc.cluster.local (10.244.2.31): 56 data bytes
64 bytes from 10.244.2.31: icmp_seq=0 ttl=62 time=0.920 ms
64 bytes from 10.244.2.31: icmp_seq=1 ttl=62 time=0.765 ms
64 bytes from 10.244.2.31: icmp_seq=2 ttl=62 time=0.940 ms
64 bytes from 10.244.2.31: icmp_seq=3 ttl=62 time=0.924 ms
^C--- apm-kafka-zookeeper-headless.default.svc.cluster.local ping statistics ---
4 packets transmitted, 4 packets received, 0% packet loss
round-trip min/avg/max/stddev = 0.765/0.887/0.940/0.071 ms

发现calico网络也有类似的问题:
https://github.com/projectcalico/calico/issues/2457
因为我们的集群之前也安装过calico,而该规则适用于calico网络下本机访问的。

参考

blackhole可以用于阻止登录。

In the world of systems administration, having the commands to add or remove a blackhole / nullroute a host or problematic IP address can be very useful. So in this simple, yet useful tutorial, we’ll cover how to exactly go about doing this on nearly any type of Linux OS!

  • How to add a blackhole nullroute:

For this example, let’s assume that we are receiving unwanted SSH login attempts from 192.168.0.195 .

root@server:~# netstat -na | grep :22
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN
tcp 0 0 192.168.0.197:22 192.168.0.195:57776 ESTABLISHED

To add the blackhole for 192.168.0.195:

root@server:~# ip route add blackhole 192.168.0.195/32

To verify the route is in place will will use “ip route show “:

root@server:~# ip route show
default via 192.168.0.1 dev eth0 metric 100
blackhole 192.168.0.195

Subsequently, connections to and from that IP will fail:

root@attacker:~$ ssh 192.168.0.2
ssh: connect to host 192.168.0.2 port 22: No route to host
  • Removing a blackhole:

Typically, blackholes are useful when your server is under attack. After the attack has subsided, or you wish to remove the blackhole you can do so as follows:

root@server:~# ip route del 192.168.0.195
root@server:~# ip route show
default via 192.168.0.1 dev eth0 metric 100
 类似资料: