3个节点的k8s集群:
[root@kube-196 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
kube-195 Ready <none> 8d v1.14.2
kube-196 Ready master 12d v1.14.1
kube-197 Ready <none> 12d v1.14.1
网络方案为flannel host-gw+ipvs。
[root@kube-196 ~]# kubectl get all -A -o wide
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
default pod/apm-kafka-0 0/1 CrashLoopBackOff 16 47m 10.244.1.16 kube-197 <none> <none>
default pod/apm-kafka-zookeeper-0 1/1 Running 0 47m 10.244.1.15 kube-197 <none> <none>
default pod/apm-kafka-zookeeper-1 1/1 Running 0 46m 10.244.2.31 kube-195 <none> <none>
default pod/apm-kafka-zookeeper-2 1/1 Running 1 45m 10.244.2.32 kube-195 <none> <none>
default pod/testclient 1/1 Running 0 29m 10.244.1.17 kube-197 <none> <none>
kube-system pod/coredns-5f6bc7cdd5-phjzg 1/1 Running 0 4d23h 10.244.2.10 kube-195 <none> <none>
kube-system pod/coredns-5f6bc7cdd5-tvrx8 1/1 Running 0 79m 10.244.1.14 kube-197 <none> <none>
kube-system pod/coredns-5f6bc7cdd5-v96x8 1/1 Running 0 99m 10.244.1.13 kube-197 <none> <none>
kube-system pod/etcd-kube-196 1/1 Running 4 12d 192.168.200.196 kube-196 <none> <none>
kube-system pod/kube-apiserver-kube-196 1/1 Running 4 22h 192.168.200.196 kube-196 <none> <none>
kube-system pod/kube-controller-manager-kube-196 1/1 Running 4 22h 192.168.200.196 kube-196 <none> <none>
kube-system pod/kube-flannel-ds-amd64-24l6b 1/1 Running 0 81m 192.168.200.196 kube-196 <none> <none>
kube-system pod/kube-flannel-ds-amd64-d9tbn 1/1 Running 2 22h 192.168.200.197 kube-197 <none> <none>
kube-system pod/kube-flannel-ds-amd64-scmfp 1/1 Running 0 22h 192.168.200.195 kube-195 <none> <none>
kube-system pod/kube-proxy-5nd72 1/1 Running 0 80m 192.168.200.196 kube-196 <none> <none>
kube-system pod/kube-proxy-qrgnf 1/1 Running 12 12d 192.168.200.197 kube-197 <none> <none>
kube-system pod/kube-proxy-sfgr2 1/1 Running 0 8d 192.168.200.195 kube-195 <none> <none>
kube-system pod/kube-scheduler-kube-196 1/1 Running 5 12d 192.168.200.196 kube-196 <none> <none>
NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR
default service/apm-kafka ClusterIP 10.110.242.169 <none> 9092/TCP 47m app=kafka,release=apm-kafka
default service/apm-kafka-0-external NodePort 10.101.213.10 <none> 19092:31090/TCP 47m app=kafka,release=apm-kafka,statefulset.kubernetes.io/pod-name=apm-kafka-0
default service/apm-kafka-1-external NodePort 10.111.193.151 <none> 19092:31091/TCP 47m app=kafka,release=apm-kafka,statefulset.kubernetes.io/pod-name=apm-kafka-1
default service/apm-kafka-2-external NodePort 10.96.221.224 <none> 19092:31092/TCP 47m app=kafka,release=apm-kafka,statefulset.kubernetes.io/pod-name=apm-kafka-2
default service/apm-kafka-headless ClusterIP None <none> 9092/TCP 47m app=kafka,release=apm-kafka
default service/apm-kafka-zookeeper ClusterIP 10.107.245.32 <none> 2181/TCP 47m app=zookeeper,release=apm-kafka
default service/apm-kafka-zookeeper-headless ClusterIP None <none> 2181/TCP,3888/TCP,2888/TCP 47m app=zookeeper,release=apm-kafka
default service/kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 12d <none>
kube-system service/kube-dns ClusterIP 10.96.0.10 <none> 53/UDP,53/TCP,9153/TCP 4d23h k8s-app=kube-dns
NAMESPACE NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE CONTAINERS IMAGES SELECTOR
kube-system daemonset.apps/kube-flannel-ds-amd64 3 3 3 3 3 beta.kubernetes.io/arch=amd64 22h kube-flannel quay.io/coreos/flannel:v0.11.0-amd64 app=flannel,tier=node
kube-system daemonset.apps/kube-proxy 3 3 3 3 3 <none> 12d kube-proxy k8s.gcr.io/kube-proxy:v1.14.0 k8s-app=kube-proxy
NAMESPACE NAME READY UP-TO-DATE AVAILABLE AGE CONTAINERS IMAGES SELECTOR
kube-system deployment.apps/coredns 3/3 3 3 4d23h coredns k8s.gcr.io/coredns:1.5.0 k8s-app=kube-dns
NAMESPACE NAME DESIRED CURRENT READY AGE CONTAINERS IMAGES SELECTOR
kube-system replicaset.apps/coredns-5f6bc7cdd5 3 3 3 4d23h coredns k8s.gcr.io/coredns:1.5.0 k8s-app=kube-dns,pod-template-hash=5f6bc7cdd5
NAMESPACE NAME READY AGE CONTAINERS IMAGES
default statefulset.apps/apm-kafka 0/3 47m kafka-broker confluentinc/cp-kafka:5.0.1
default statefulset.apps/apm-kafka-zookeeper 3/3 47m zookeeper registry.cn-hangzhou.aliyuncs.com/appstore/k8szk:v2
可以看到从192.168.200.195主机的容器ping 192.168.200.197主机上的其他容器的ip和headless service名字,是可以ping通的。
[root@kube-196 ~]# kubectl exec -it testclient /bin/sh
# ping apm-kafka-headless
PING apm-kafka-headless.default.svc.cluster.local (10.244.1.16): 56 data bytes
64 bytes from 10.244.1.16: icmp_seq=0 ttl=64 time=0.097 ms
64 bytes from 10.244.1.16: icmp_seq=1 ttl=64 time=0.103 ms
64 bytes from 10.244.1.16: icmp_seq=2 ttl=64 time=0.081 ms
^C--- apm-kafka-headless.default.svc.cluster.local ping statistics ---
3 packets transmitted, 3 packets received, 0% packet loss
round-trip min/avg/max/stddev = 0.081/0.094/0.103/0.000 ms
# ping apm-kafka-zookeeper-headless
PING apm-kafka-zookeeper-headless.default.svc.cluster.local (10.244.1.15): 56 data bytes
64 bytes from 10.244.1.15: icmp_seq=0 ttl=64 time=0.116 ms
64 bytes from 10.244.1.15: icmp_seq=1 ttl=64 time=0.109 ms
64 bytes from 10.244.1.15: icmp_seq=2 ttl=64 time=0.100 ms
64 bytes from 10.244.1.15: icmp_seq=3 ttl=64 time=0.098 ms
64 bytes from 10.244.1.15: icmp_seq=4 ttl=64 time=0.093 ms
64 bytes from 10.244.1.15: icmp_seq=5 ttl=64 time=0.107 ms
64 bytes from 10.244.1.15: icmp_seq=6 ttl=64 time=0.103 ms
^C--- apm-kafka-zookeeper-headless.default.svc.cluster.local ping statistics ---
7 packets transmitted, 7 packets received, 0% packet loss
round-trip min/avg/max/stddev = 0.093/0.104/0.116/0.000 ms
# ping apm-kafka-zookeeper-headless
PING apm-kafka-zookeeper-headless.default.svc.cluster.local (10.244.1.15): 56 data bytes
64 bytes from 10.244.1.15: icmp_seq=0 ttl=64 time=0.095 ms
64 bytes from 10.244.1.15: icmp_seq=1 ttl=64 time=0.099 ms
64 bytes from 10.244.1.15: icmp_seq=2 ttl=64 time=0.118 ms
64 bytes from 10.244.1.15: icmp_seq=3 ttl=64 time=0.113 ms
64 bytes from 10.244.1.15: icmp_seq=4 ttl=64 time=0.145 ms
^C--- apm-kafka-zookeeper-headless.default.svc.cluster.local ping statistics ---
5 packets transmitted, 5 packets received, 0% packet loss
round-trip min/avg/max/stddev = 0.095/0.114/0.145/0.000 ms
但ping 不同192.168.200.195上的容器(10.244.2.31)。
# ping 10.244.2.31
PING 10.244.2.31 (10.244.2.31): 56 data bytes
直接ping 192.168.200.195的宿主机也是通的,也就是说网络包到达195的宿主机之后,没有路由到容器内。
查看195的路由
[root@kube-195 ~]# ip route
default via 192.168.200.254 dev eth0 proto static metric 100
10.244.0.0/24 via 10.244.0.0 dev flannel.1 onlink
10.244.1.0/24 via 10.244.1.0 dev flannel.1 onlink
blackhole 10.244.2.0/24 proto bird
10.244.2.0/24 dev cni0 proto kernel scope link src 10.244.2.1
169.254.169.254 via 192.168.200.130 dev eth0 proto dhcp metric 100
192.168.82.0/24 dev docker0 proto kernel scope link src 192.168.82.1
192.168.200.0/24 dev eth0 proto kernel scope link src 192.168.200.195 metric 100
发现有条
blackhole 10.244.2.0/24 proto bird
的路由规则。其在
10.244.2.0/24 dev cni0 proto kernel scope link src 10.244.2.1
之前。所以所有在195主机上路由到10.244.2.0/24 子网的包都会被丢弃。
[root@kube-195 ~]# ip route del 10.244.2.0/24
删除该路由规则。
[root@kube-195 ~]# ip route
default via 192.168.200.254 dev eth0 proto static metric 100
10.244.0.0/24 via 10.244.0.0 dev flannel.1 onlink
10.244.1.0/24 via 10.244.1.0 dev flannel.1 onlink
10.244.2.0/24 dev cni0 proto kernel scope link src 10.244.2.1
169.254.169.254 via 192.168.200.130 dev eth0 proto dhcp metric 100
192.168.82.0/24 dev docker0 proto kernel scope link src 192.168.82.1
192.168.200.0/24 dev eth0 proto kernel scope link src 192.168.200.195 metric 100
就可以ping通了:
# ping apm-kafka-zookeeper-headless
PING apm-kafka-zookeeper-headless.default.svc.cluster.local (10.244.2.31): 56 data bytes
64 bytes from 10.244.2.31: icmp_seq=0 ttl=62 time=0.920 ms
64 bytes from 10.244.2.31: icmp_seq=1 ttl=62 time=0.765 ms
64 bytes from 10.244.2.31: icmp_seq=2 ttl=62 time=0.940 ms
64 bytes from 10.244.2.31: icmp_seq=3 ttl=62 time=0.924 ms
^C--- apm-kafka-zookeeper-headless.default.svc.cluster.local ping statistics ---
4 packets transmitted, 4 packets received, 0% packet loss
round-trip min/avg/max/stddev = 0.765/0.887/0.940/0.071 ms
发现calico网络也有类似的问题:
https://github.com/projectcalico/calico/issues/2457
因为我们的集群之前也安装过calico,而该规则适用于calico网络下本机访问的。
blackhole可以用于阻止登录。
In the world of systems administration, having the commands to add or remove a blackhole / nullroute a host or problematic IP address can be very useful. So in this simple, yet useful tutorial, we’ll cover how to exactly go about doing this on nearly any type of Linux OS!
For this example, let’s assume that we are receiving unwanted SSH login attempts from 192.168.0.195 .
root@server:~# netstat -na | grep :22
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN
tcp 0 0 192.168.0.197:22 192.168.0.195:57776 ESTABLISHED
To add the blackhole for 192.168.0.195:
root@server:~# ip route add blackhole 192.168.0.195/32
To verify the route is in place will will use “ip route show “:
root@server:~# ip route show
default via 192.168.0.1 dev eth0 metric 100
blackhole 192.168.0.195
Subsequently, connections to and from that IP will fail:
root@attacker:~$ ssh 192.168.0.2
ssh: connect to host 192.168.0.2 port 22: No route to host
Typically, blackholes are useful when your server is under attack. After the attack has subsided, or you wish to remove the blackhole you can do so as follows:
root@server:~# ip route del 192.168.0.195
root@server:~# ip route show
default via 192.168.0.1 dev eth0 metric 100