当前位置: 首页 > 工具软件 > k8s-ovs > 使用案例 >

k8s网络基础学习-Calico网络插件

裴欣荣
2023-12-01

第十课 k8s网络基础学习-Calico网络插件

tags:

  • k8s网络
  • calico
  • proxy arp
  • bgp full mesh
  • bgp RR

第一节 Calico介绍

1.1 Calico环境安装

  1. Calico官网地址:https://projectcalico.docs.tigera.io/about/about-calico
    2.下载Calico配置清单:https://docs.projectcalico.org/manifests/calico.yaml
  2. 默认是IPIP模式的。注意VxLAN模式不需要BGP协议参与!!!但是IPIP模式是需要的。
data:
  # Typha is disabled.
  typha_service_name: "none" 
  # Configure the backend to use.
  calico_backend: "bird" # vxlan模式下可用可不用, ipip模式需要这个

containers:
        # Runs calico-node container on each Kubernetes node. This
        # container programs network policy and routes on each
        # host.
        - name: calico-node
          image: docker.io/calico/node:v3.18.1
          envFrom:
          - configMapRef:
              # Allow KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT to be overridden for eBPF mode.
              name: kubernetes-services-endpoint
              optional: true
          env:
            # Use Kubernetes API as the backing datastore.
            - name: DATASTORE_TYPE
              value: "kubernetes"
            # Wait for the datastore.
            - name: WAIT_FOR_DATASTORE
              value: "true"
            # Set based on the k8s node name.
            - name: NODENAME
              valueFrom:
                fieldRef:
                  fieldPath: spec.nodeName
            # Choose the backend to use.
            - name: CALICO_NETWORKING_BACKEND
              valueFrom:
                configMapKeyRef:
                  name: calico-config
                  key: calico_backend
            # Cluster type to identify the deployment type
            - name: CLUSTER_TYPE
              value: "k8s,bgp"
            # Auto-detect the BGP IP address.
            - name: IP
              value: "autodetect"
            # Enable IPIP
            - name: CALICO_IPV4POOL_IPIP                            # IPIP模式设置
              value: "Alway"                                        # IPIP模式设置的类型:Always,Never,CrossSubnet
            # Enable or Disable VXLAN on the default IP pool. 
            - name: CALICO_IPV4POOL_VXLAN                           # VxLAN模式设置
              value: "Never"                                        # VxLAN模式设置的类型:Always,Never,CrossSubnet
            # Set MTU for tunnel device used if ipip is enabled
            - name: FELIX_IPINIPMTU
              valueFrom:
                configMapKeyRef:
                  name: calico-config
                  key: veth_mtu
            # Set MTU for the VXLAN tunnel device.
            - name: FELIX_VXLANMTU
              valueFrom:
                configMapKeyRef:
                  name: calico-config
                  key: veth_mtu
            # Set MTU for the Wireguard tunnel device.
            - name: FELIX_WIREGUARDMTU
              valueFrom:
                configMapKeyRef:
                  name: calico-config
                  key: veth_mtu
            # The default IPv4 pool to create on startup if none exists. Pod IPs will be
            # chosen from this range. Changing this value after installation will have
            # no effect. This should fall within `--cluster-cidr`.
            # - name: CALICO_IPV4POOL_CIDR
            #   value: "192.168.0.0/16"
            # Disable file logging so `kubectl logs` works.
            - name: CALICO_DISABLE_FILE_LOGGING
              value: "true"
            # Set Felix endpoint to host default action to ACCEPT.
            - name: FELIX_DEFAULTENDPOINTTOHOSTACTION
              value: "ACCEPT"
            # Disable IPv6 on Kubernetes.
            - name: FELIX_IPV6SUPPORT
              value: "false"
            # Set Felix logging to "info"
            - name: FELIX_LOGSEVERITYSCREEN
              value: "info"
            - name: FELIX_HEALTHENABLED
              value: "true"
          securityContext:
            privileged: true # 特权模式运行pod
  1. 安装calicoctl工具。
# 安装calicoctl
curl -O -L https://github.com/projectcalico/calicoctl/releases/download/v3.18.1/calicoctl
chmod +x calicoctl
mv calicoctl /usr/local/bin

export DATASTORE_TYPE=kubernetes
export KUBECONFIG=~/.kube/config

# 一些常用命令
calicoctl node status
calicoctl get ippool -o wide
# SELECTOR表示哪些节点接受IPAM的管理
NAME                  CIDR            NAT    IPIPMODE   VXLANMODE   DISABLED   SELECTOR
default-ipv4-ippool   10.244.0.0/16   true   Always     Never       false      all()

第二节 Calico的IPIP模式

2.1 同节点不同pod的网络通信

  1. 创建测试的pod。
kubectl run cc --image=burlyluo/nettoolbox:v3
kubectl run cc1 --image=burlyluo/nettoolbox:v3
kubectl run cc2 --image=burlyluo/nettoolbox:v3
  1. 随便进入node1节点的pod查看。
[root@k8s-master ~]# kubectl exec -it cc bash
# 这里注意Mask:255.255.255.255 表示这个主机和任何别的主机都不在同一网段
bash-5.0# ifconfig
eth0      Link encap:Ethernet  HWaddr 76:4B:D7:80:EA:20
          inet addr:10.244.36.66  Bcast:0.0.0.0  Mask:255.255.255.255
          UP BROADCAST RUNNING MULTICAST  MTU:1480  Metric:1
          RX packets:5 errors:0 dropped:0 overruns:0 frame:0
          TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:0
          RX bytes:446 (446.0 B)  TX bytes:0 (0.0 B)

lo        Link encap:Local Loopback
          inet addr:127.0.0.1  Mask:255.0.0.0
          UP LOOPBACK RUNNING  MTU:65536  Metric:1
          RX packets:0 errors:0 dropped:0 overruns:0 frame:0
          TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000
          RX bytes:0 (0.0 B)  TX bytes:0 (0.0 B)

# 如果ping 本节点 10.244.36.67 它告诉我们会走下面169.254.1.1对应的MAC,但是宿主机上并没有这个MAC,eth0又不是一个tunl设备不能进行raw ip封装
bash-5.0# route -n
Kernel IP routing table
Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
0.0.0.0         169.254.1.1     0.0.0.0         UG    0      0        0 eth0
169.254.1.1     0.0.0.0         255.255.255.255 UH    0      0        0 eth0

# 先抓包看下
bash-5.0# tcpdump -ne -i eth0
14:54:30.652308 76:4b:d7:80:ea:20 > ff:ff:ff:ff:ff:ff, ethertype ARP (0x0806), length 42: Request who-has 169.254.1.1 tell 10.244.36.66, length 28
14:54:30.652353 ee:ee:ee:ee:ee:ee > 76:4b:d7:80:ea:20, ethertype ARP (0x0806), length 42: Reply 169.254.1.1 is-at ee:ee:ee:ee:ee:ee, length 28
14:54:30.652356 76:4b:d7:80:ea:20 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 98: 10.244.36.66 > 10.244.36.67: ICMP echo request, id 6144, seq 0, length 64
14:54:30.652508 ee:ee:ee:ee:ee:ee > 76:4b:d7:80:ea:20, ethertype IPv4 (0x0800), length 98: 10.244.36.67 > 10.244.36.66: ICMP echo reply, id 6144, seq 0, length 64

# 因为eth0肯定要发给对端的veth0 先过去看看
bash-5.0# ethtool -S eth0
NIC statistics:
     peer_ifindex: 6

# node1上看下 思考一下为什么不把169.254.1.1地址配置给cali70648170f23@if4 因为这种网卡很多,不可能在一台机器上把169.254.1.1配置给所有的这个网卡
6: cali70648170f23@if4: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1480 qdisc noqueue state UP group default
    link/ether ee:ee:ee:ee:ee:ee brd ff:ff:ff:ff:ff:ff link-netnsid 0
    inet6 fe80::ecee:eeff:feee:eeee/64 scope link
       valid_lft forever preferred_lft forever
       
# 那为什么还可以访问成功呢 答案就是proxy arp 官网解释:docs.projectcalico.org/reference/faq
#  flannel中同节点的pod通信走的是二层交换通过cni网桥,而calico是三层转发,因为掩码是32位的。可以看下第三课中的proxy arp实验
# calico 把报文发到root ns的veth的对端cali70648170f23@if4
[root@k8s-node1 ~]# route -n
Kernel IP routing table
Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
0.0.0.0         192.168.44.2    0.0.0.0         UG    100    0        0 ens33
10.244.36.64    0.0.0.0         255.255.255.192 U     0      0        0 *
10.244.36.66    0.0.0.0         255.255.255.255 UH    0      0        0 cali70648170f23
10.244.36.67    0.0.0.0         255.255.255.255 UH    0      0        0 cali78a0dc3b7f9
10.244.169.128  192.168.44.131  255.255.255.192 UG    0      0        0 tunl0
10.244.235.192  192.168.44.129  255.255.255.192 UG    0      0        0 tunl0
172.17.0.0      0.0.0.0         255.255.0.0     U     0      0        0 docker0
192.168.44.0    0.0.0.0         255.255.255.0   U     100    0        0 ens33

# 上面10.244.36.64    0.0.0.0         255.255.255.192 U     0      0        0 * 是一条黑洞路由 它防止别的机器上的pod ip误分配为本节点的pod地址然后发到本地有的地址中
# 如果没有 从上面黑洞路由走 报文就会被丢掉
10.244.36.66    0.0.0.0         255.255.255.255 UH    0      0        0 cali70648170f23
10.244.36.67    0.0.0.0         255.255.255.255 UH    0      0        0 cali78a0dc3b7f9


# 注意我们的地址都是32位的主机地址,也就意味着他们并不是属于同一个网段。这点很重要。
#2. 此时我们简化一下我们的网络拓扑:
              Pod(cni-j7klb)                                      Pod(cnitest)
10.244.36.66/32   MAC:76:4B:D7:80:EA:20         IP:10.244.36.67/32  MAC:FE:92:69:10:B1:04
                  |                                                     |
                   \                                                   /
                    \__________________    Router   __________________/                       
                                         |       |
                               169.254.1.1       169.254.1.1
                                         Proxy_ARP
# 从上边的topo:
此时Linux Host就扮演者一个Router的角色。那么问题就变成了路由上的两台主机的通信问题。
# 此时这里涉及到一个黑洞路由问题:以10.244.36.64/26为例说明:
CalicoIP地址的分配策略相关,具体表现为一个/26的IP Block10.244.36.64/26分配给了A机器之后,在另外一台B机器上又出现了该IP Block内的一个IP 10.244.36.64 ,同时因为A机器上有该IP Block的blackhole路由blackhole 10.244.36.64 /26 proto bird,所以导致A机器上所有的Pod访问10.244.36.64时因为黑洞路由原因直接失败。一种辅助防止地址冲突的手段算是。

2.2 不同节点不同pod的网络通信

[root@k8s-node1 ~]# route -n
Kernel IP routing table
Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
0.0.0.0         192.168.44.2    0.0.0.0         UG    100    0        0 ens33
10.244.36.64    0.0.0.0         255.255.255.192 U     0      0        0 *
10.244.36.66    0.0.0.0         255.255.255.255 UH    0      0        0 cali70648170f23
10.244.36.67    0.0.0.0         255.255.255.255 UH    0      0        0 cali78a0dc3b7f9
10.244.169.128  192.168.44.131  255.255.255.192 UG    0      0        0 tunl0
10.244.235.192  192.168.44.129  255.255.255.192 UG    0      0        0 tunl0
172.17.0.0      0.0.0.0         255.255.0.0     U     0      0        0 docker0
192.168.44.0    0.0.0.0         255.255.255.0   U     100    0        0 ens33

# 根据这个路由表可以看出访问ping 10.244.169.138 
# 容器内到ns跟上面一样通过veth pair 根据路由表发现通过tunl0封装成raw ip格式 这个过程和flannel是一样的
ip -d link show tunl0
# 抓包分析
tcpdump  -ne -i tunl0

第三节 Calico的vxlan模式

3.1 vxlan模式启动

  1. IPIP的模式和VxLan模式不能同时使用。启用如下。
# 第一修改backend
data:
  # Typha is disabled.
  typha_service_name: "none" 
  # Configure the backend to use.
  calico_backend: "vxlan" 
    
# 第二使能vxlan	
    		# Enable IPIP
            - name: CALICO_IPV4POOL_IPIP                            # IPIP模式设置
              value: "Never"                                        # IPIP模式设置的类型:Always,Never,CrossSubnet
            # Enable or Disable VXLAN on the default IP pool. 
            - name: CALICO_IPV4POOL_VXLAN                           # VxLAN模式设置
              value: "Alway"                                        # VxLAN模式设置的类型:Always,Never,CrossSubnet

# 第三修改探针
          livenessProbe:
            exec:
              command:
              - /bin/calico-node
              - -felix-live
             # - -bird-live
          readinessProbe:
            exec:
              command:
              - /bin/calico-node
              # - -bird-ready
              - -felix-ready

3.2 不同节点不同pod的网络

  1. 同节点不同pod和上面的IPIP一样,通过veth peer到ns root,然后根据arp proxy网络协议通信。
# 看下vxlan.calico设备 发现是vxlan类型的接口
ip -d link show vxlan.calico
# 关于对端Pod在哪一个node上,是采用fdb表的查询得到。和Flannel的VXLAN一样。
bridge fbd show | grep 66:c8:b2:49:0d:d9 # 对端的node的vxlan.calico的mac
# 过程: proxy arp到主机上,然后vxlan.calico对报文进行vxlan的封装通过ens33发送给对端节点。然后对端ens33收到后发给它的vxlan.calico内核进程解包。
# 从Calico的官网上说明运行的时候不用BGP,和尽量保持和Flannel一样的原因是:为了能使从Flannel上移植到Calico的环境上

第四节 Calico的BGP模式

4.1 BGP Full Mesh模式配置

  1. 因为IPIP和vxlan模式有封包解包的过程所以性能还是有些损耗,类似flannel中的host-gw模式,calico中也有BGP full Mesh模式进行大规模集群的路由学习。具体原理实验请看第七课。
# 第一修改backend
data:
  # Typha is disabled.
  typha_service_name: "none" 
  # Configure the backend to use.
  calico_backend: "bird"
    
# 第二使能vxlan	
    		# Enable IPIP
            - name: CALICO_IPV4POOL_IPIP                            # IPIP模式设置
              value: "Never"                                        # IPIP模式设置的类型:Always,Never,CrossSubnet
            # Enable or Disable VXLAN on the default IP pool. 
            - name: CALICO_IPV4POOL_VXLAN                           # VxLAN模式设置
              value: "Never"                                      # VxLAN模式设置的类型:Always,Never,CrossSubnet

4.2 BGP RR模式配置

  1. BGP RR是BGP的代理模式,假设在一个AS内部有n台设备,那么建立的IBGP连接数就为n(n-1)/2。当设备数目很多时,设备配置将十分复杂,而且配置后网络资源和CPU资源的消耗都很大。在IBGP对等体间使用路由反射器可以解决以上问题。具体原理实验请看第七课。
  2. calico中BGP RR模式配置。
# 第一步:BGP Full Mesh
[root@k8s-1 ~]# cat patch-bgp-rr.sh 
# if exist,use this:
#calicoctl patch bgpconfiguration default -p '{"spec": {"nodeToNodeMeshEnabled": false}}'

#if not exist,use below:
cat << EOF | calicoctl create -f -
apiVersion: projectcalico.org/v3
kind: BGPConfiguration
metadata:
  name: default
spec:
  logSeverityScreen: Info
  nodeToNodeMeshEnabled: false
  asNumber: 64512
EOF

# 第二步:设置指定Node为RR,比如选择ks8-1为RR,"添加router-reflector标签,设置routeReflectorClusterID"。
[root@k8s-1 ~]# cat 4-config-rr.yaml 
apiVersion: projectcalico.org/v3
kind: Node
metadata:
  creationTimestamp: null
  name: k8s-1
  labels:
    # 设置标签
    i-am-a-route-reflector: true
spec:
  bgp:
    ipv4Address: 172.12.1.11/24
    # 设置集群ID
    routeReflectorClusterID: 224.0.0.1
  orchRefs:
  - nodeName: k8s-1
    orchestrator: k8s


# 第三步:配置BGPPeer资源,告诉Node节点路由反射器。
[root@k8s-1 ~]# cat 3-rr-peer.sh 
cat << EOF | calicoctl create -f -
kind: BGPPeer
apiVersion: projectcalico.org/v3
metadata:
  name: peer-to-rrs
spec:
  # 规则1:普通 bgp node 与 rr 建立连接
  nodeSelector: !has(i-am-a-route-reflector)
  peerSelector: has(i-am-a-route-reflector)

---
kind: BGPPeer
apiVersion: projectcalico.org/v3
metadata:
  name: rr-mesh
spec:
  # 规则2:route reflectors 之间也建立连接
  nodeSelector: has(i-am-a-route-reflector)
  peerSelector: has(i-am-a-route-reflector)
EOF
 
 
# 第四步: 查看BGP RR status:
# RR 上查看:
[root@k8s-1 ~]# calicoctl node status 
Calico process is running.

IPv4 BGP status
+--------------+---------------+-------+------------+-------------+
| PEER ADDRESS |   PEER TYPE   | STATE |   SINCE    |    INFO     |
+--------------+---------------+-------+------------+-------------+
| 172.12.1.12  | node specific | up    | 2021-08-09 | Established |
+--------------+---------------+-------+------------+-------------+

IPv6 BGP status
No IPv6 peers found.
 
# BGP Client 查看:
[root@k8s-2 ~]# calicoctl node status
Calico process is running.

IPv4 BGP status
+--------------+-----------+-------+----------+-------------+
| PEER ADDRESS | PEER TYPE | STATE |  SINCE   |    INFO     |
+--------------+-----------+-------+----------+-------------+
| 172.12.1.11  | global    | up    | 13:12:37 | Established |
+--------------+-----------+-------+----------+-------------+

IPv6 BGP status
No IPv6 peers found.

# 测试环境Template:
[root@k8s-1 ~]# cat cni_demo_template.yaml 
apiVersion: apps/v1
kind: DaemonSet
metadata:
  labels:
    app: cni
  name: cni
spec:
  selector:
    matchLabels:
      app: cni
  template:
    metadata:
      labels:
        app: cni
    spec:
      containers:
      - image: burlyluo/nettoolbox
        name: nettoolbox

#env:
# k8s-1-[BGP RR]
[root@k8s-1 ~]# kubectl get pods -o wide
NAME        READY   STATUS    RESTARTS   AGE    IP               NODE    NOMINATED NODE   READINESS GATES
cni-78pg6   1/1     Running   1          128d   10.244.231.194   k8s-1   <none>           <none>
cni-gjt72   1/1     Running   1          128d   10.244.200.200   k8s-2   <none>           <none>

# 路由表信息:
[root@k8s-1 ~]# route -n 
Kernel IP routing table
Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
0.0.0.0         172.12.1.2      0.0.0.0         UG    100    0        0 ens33
10.244.200.192  172.12.1.12     255.255.255.192 UG    0      0        0 ens33
10.244.231.192  0.0.0.0         255.255.255.192 U     0      0        0 *
10.244.231.194  0.0.0.0         255.255.255.255 UH    0      0        0 calieb5ce3f8586
172.12.1.0      0.0.0.0         255.255.255.0   U     100    0        0 ens33
172.17.0.0      0.0.0.0         255.255.0.0     U     0      0        0 docker0


# k8s-2-[BGP Client]
[root@k8s-2 ~]# route -n 
Kernel IP routing table
Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
0.0.0.0         172.12.1.2      0.0.0.0         UG    100    0        0 ens33
10.244.200.192  0.0.0.0         255.255.255.192 U     0      0        0 *
10.244.200.197  0.0.0.0         255.255.255.255 UH    0      0        0 cali2e2bedebfa9
10.244.200.198  0.0.0.0         255.255.255.255 UH    0      0        0 cali6068269d8a9
10.244.200.199  0.0.0.0         255.255.255.255 UH    0      0        0 calib3f6025fba8
10.244.200.200  0.0.0.0         255.255.255.255 UH    0      0        0 calia83bc313c94
10.244.231.192  172.12.1.11     255.255.255.192 UG    0      0        0 ens33
172.12.1.0      0.0.0.0         255.255.255.0   U     100    0        0 ens33
172.17.0.0      0.0.0.0         255.255.0.0     U     0      0        0 docker0

第五节 Calico的IPAM

5.1 Calico分配IP的逻辑

  1. 如果节点有已绑定的IP Block,则从这些IP Block中分配IP
    • 如果第1步失败(没有已绑定的IP Block,或者这些绑定的Block里IP耗尽),判断AutoAllocateBlocks为true,则寻找一个没有被绑定的IP Block,并绑定到当前节点,再执行分配逻辑
    • 如果第2步失败(AutoAllocateBlocks为false或者没有空闲的IP Block),判断StrictAffinity为false,则从所有IP Blocks中寻找未使用的IP
    • 经历前1-3步依然没有分配好IP,则失败
  2. 在默认配置(StrictAffinity: true, AutoAllocateBlocks: false)下,当节点已有IP Block中没有空闲IP并且也没有空闲IP Block时,就会发生之前所说的情况,而恰好Calico在利用BIRD进行BGP路由广播时,针对每个已绑定的IP Block会设置blackhole路由,从而会导致Pod IP无法访问的问题。
  3. 根据上面的情况,目前看我们当前使用Calico还是有些问题的,特别是对当前IPPool的处理上。
    • 一方面也是Calico的实现并不是特别好,比如这个Issue(https://github.com/projectcalico/calico/issues/2713)里提到的,当前针对节点的IP Block绑定,只有自动绑定的功能,但是没有自动解绑定的功能,解绑只有在删除Calico Node对象的时候才会发生,这会引发一个问题,就是说如果集群中节点有变化了,比如某台机器下线,并有新的节点上线做替换。那如果不手动操作Calico删除对应Node,就会导致之前的IP Block不被释放,也就一直无法没绑定到其他节点。
    • 另一方面也是我们的使用问题,没有及时跟进Calico的更新,线上版本相对版本旧一点,导致在分配Block的时候,只能固定以/26的BlockSize,也就是说一个IPBlock包含64个IP,而目前每台节点的Pod数量限制是默认的110,那么在使用2个Block也就是128个IP的时候就会出现比较大的浪费现象。这个问题在Allow the blockSize to be configured for Calico IPAM(https://github.com/projectcalico/libcalico-go/pull/931)中已经得到改进,目标版本v3.3.0。
    • 因此,在高于v3.3.0版本的Calico中可以自定义BlockSize,比如定义为30,也就是一个Block 4个IP,这样可以比较好的提升IP的利用率,当然这样带来的问题是对外广播的路由数量的增加,所以需要权衡,找到一个合适的BlockSize。

5.2 通过Topology分配IP

# Assign blocks of IP addresses from an IP pool for different topological areas.

If you have workloads in different regions, zones, or rack, you may want them to get IP addresses from the same IP pool. This strategy is useful for reducing the number of routes that are required in the network, or to meet requirements imposed by an external firewall device or policy. Calico makes it easy to do this using an IP pool resource with node labels and node selectors.

# Concepts
# IP address assignment
Topology-based IP address assignment requires addresses to be per-host (node). As such, Kubernetes annotations cannot be used because annotations are only per-namespace and per-pod. And although you can configure IP addresses for nodes in the CNI configuration, you are making changes within the host’s file system. The best option is to use node-selection IP address assignment using IP pools.

# Node-selection IP address management
Node selection-based IP address assignment is exactly what it sounds like: node labels are set, and Calico uses node selectors to decide which IP pools to use when assigning IP addresses to the node.

# Best practice
Nodes only assign workload addresses from IP pools which select them. To avoid having a workload not get an IP and fail to start, it is important to ensure that all nodes are selected by at least one IP pool.

# Tutorial
In this tutorial, we create a cluster with four nodes across two racks (two nodes/rack).

       -------------------
       |    router       |
       -------------------
       |                 |
---------------   ---------------
| rack-0      |   | rack-1      |
---------------   ---------------
| kube-node-0 |   | kube-node-2 |
- - - - - - - -   - - - - - - - -
| kube-node-1 |   | kube-node-3 |
- - - - - - - -   - - - - - - - -
Using the pod IP range 192.168.0.0/16, we target the following setup: reserve the 192.168.0.0/24 and 192.168.1.0/24 pools for rack-0, rack-1. Let’s get started.

By installing Calico without setting the default IP pool to match, running calicoctl get ippool -o wide shows that Calico created its default IP pool of 192.168.0.0/16:

NAME                  CIDR             NAT    IPIPMODE   DISABLED   SELECTOR
default-ipv4-ippool   192.168.0.0/16   true   Always     false      all()



#1.Delete the default IP pool.

Since the default-ipv4-ippool IP pool resource already exists and accounts for the entire /16 block, we will have to delete this first:

calicoctl delete ippools default-ipv4-ippool

#2.Label the nodes.
To assign IP pools to specific nodes, these nodes must be labelled using kubectl label.

kubectl label nodes kube-node-0 rack=0
kubectl label nodes kube-node-1 rack=0
kubectl label nodes kube-node-2 rack=1
kubectl label nodes kube-node-3 rack=1

#3.Create an IP pool for each rack.

calicoctl create -f -<<EOF
apiVersion: projectcalico.org/v3
kind: IPPool
metadata:
  name: rack-0-ippool
spec:
  cidr: 192.168.0.0/24
  ipipMode: Always
  natOutgoing: true
  nodeSelector: rack == "0"
EOF
calicoctl create -f -<<EOF
apiVersion: projectcalico.org/v3
kind: IPPool
metadata:
  name: rack-1-ippool
spec:
  cidr: 192.168.1.0/24
  ipipMode: Always
  natOutgoing: true
  nodeSelector: rack == "1"
EOF

We should now have two enabled IP pools, which we can see when running calicoctl get ippool -o wide:

NAME                  CIDR             NAT    IPIPMODE   DISABLED   SELECTOR
rack-0-ippool         192.168.0.0/24   true   Always     false      rack == "0"
rack-1-ippool         192.168.1.0/24   true   Always     false      rack == "1"

#4.Verify that the IP pool node selectors are being respected.
We will create an nginx deployment with five replicas to get a workload running on each node.
kubectl run nginx --image nginx --replicas 5
Check that the new workloads now have an address in the proper IP pool allocated for the rack that the node is on with kubectl get pods -owide.

NAME                   READY   STATUS    RESTARTS   AGE    IP             NODE          NOMINATED NODE   READINESS GATES
nginx-5c7588df-prx4z   1/1     Running   0          6m3s   192.168.0.64   kube-node-0   <none>           <none>
nginx-5c7588df-s7qw6   1/1     Running   0          6m7s   192.168.0.129  kube-node-1   <none>           <none>
nginx-5c7588df-w7r7g   1/1     Running   0          6m3s   192.168.1.65   kube-node-2   <none>           <none>
nginx-5c7588df-62lnf   1/1     Running   0          6m3s   192.168.1.1    kube-node-3   <none>           <none>
nginx-5c7588df-pnsvv   1/1     Running   0          6m3s   192.168.1.64   kube-node-2   <none>           <none>
The grouping of IP addresses assigned to the workloads differ based on what node that they were scheduled to. Additionally, the assigned address for each workload falls within the respective IP pool that selects the rack that they run on.

5.3 Calico给pod分配一个确定的IP地址

  1. 官网地址:docs.projectcalico.org/networking/use-specific-ip
  2. 先确定ipam是不是calico-ipam
   "ipam": {
         "type": "calico-ipam"
    }, 
  1. 给pod指定一个特定的ip地址
#1.创建新的ippool:
[root@ovs ~]# cat net.yaml 
apiVersion: projectcalico.org/v3
kind: IPPool
metadata:
  name: external-pool
spec:
  cidr: 172.16.0.0/26
  blockSize: 29
  ipipMode: Always
  natOutgoing: true
#2.指定IP地址:
[root@ovs ~]# cat calico_static.yaml 
apiVersion: v1
kind: Pod
metadata:
  name: calico-static-pod
  labels:
    app: myapp
  annotations:
    cni.projectcalico.org/ipAddrs: "[\"172.16.0.2\"]"
spec:
  containers:
  - name: static-container
    image: burlyluo/nettoolbox
 
# show output:
[root@k8s-1 ~]# calicoctl get ippool -o wide 
NAME                  CIDR            NAT    IPIPMODE   VXLANMODE   DISABLED   SELECTOR   
default-ipv4-ippool   10.244.0.0/16   true   Always     Never       false      all()     
external-pool         172.16.0.0/26   true   Always     Never       false      all()      
# 地址信息:
[root@k8s-1 ~]# kubectl get pods -o wide 
NAME                READY   STATUS    RESTARTS   AGE     IP               NODE    NOMINATED NODE   READINESS GATES
calico-static-pod   1/1     Running   0          4m22s   172.16.0.2       k8s-2   <none>           <none>  
# Deploy DEMO:
apiVersion: apps/v1 
kind: Deployment
metadata:
  name: static-ip
spec:
  selector:
    matchLabels:
      app: nginx
  replicas: 1 # tells deployment to run 1 pods matching the template
  template:
    metadata:
      labels:
        app: nginx
      annotations:
        "cni.projectcalico.org/ipAddrs": "[\"172.16.0.5\"]"
    spec:
      containers:
      - name: nginx
        image: burlyluo/nettoolbox
 
#
# Note the use of the escaped \" for the inner double quotes around the addresses.
# The address must be within a configured Calico IP pool and not currently in use. The annotation must be present when the pod is created; adding it later has no effect.
# Note that currently only a single IP address is supported per-pod using this annotation
# Deployment这里只能使用单地址单pod

5.4 Calico迁移IP pool

# Step 1: Add a new IP pool
We add a new IPPool with the CIDR range, 10.0.0.0/16.

apiVersion: projectcalico.org/v3
kind: IPPool
metadata:
  name: new-pool
spec:
  cidr: 10.0.0.0/16
  ipipMode: Always
  natOutgoing: true
# 查看系统上的ippool信息:
[root@k8s-1 ~]# calicoctl get ippool -o wide 
NAME                  CIDR            NAT    IPIPMODE   VXLANMODE   DISABLED   SELECTOR   
default-ipv4-ippool   10.244.0.0/16   true   Always     Never       false       all()      
external-pool         172.16.0.0/26   true   Always     Never       false       all()      
new-pool              10.0.0.0/16     true   Always     Never       false       all()     

 # 2.Disable the old IP pool:
 calicoctl get ippool -o yaml > pool.yaml
 Disable this IP pool by setting: disabled: true
 
 [root@k8s-1 ~]# cat pool.yaml 
apiVersion: projectcalico.org/v3
items:
- apiVersion: projectcalico.org/v3
  kind: IPPool
  metadata:
    creationTimestamp: "2021-04-04T04:55:21Z"
    name: default-ipv4-ippool
    resourceVersion: "127281"
    uid: ce563f34-8423-4b39-9653-d98cd77bad8a
  spec:
    blockSize: 26
    cidr: 10.244.0.0/16
    disabled: true
    ipipMode: Always
    natOutgoing: true
    nodeSelector: all()
    vxlanMode: Never
    disabled: true   # disable this ippool
- apiVersion: projectcalico.org/v3
  kind: IPPool
  metadata:
    creationTimestamp: "2021-08-19T03:05:39Z"
    name: external-pool
    resourceVersion: "127282"
    uid: 9a8de79a-90af-4a46-a4b7-0649ba5b2453
  spec:
    blockSize: 29
    cidr: 172.16.0.0/26
    ipipMode: Always
    natOutgoing: true
    nodeSelector: all()
    vxlanMode: Never
    disabled: true   # disable this ippool
- apiVersion: projectcalico.org/v3
  kind: IPPool
  metadata:
    creationTimestamp: "2021-08-20T04:53:15Z"
    name: new-pool
    resourceVersion: "127286"
    uid: adddcb37-e7c9-4db4-b25b-d78795f9619f
  spec:
    blockSize: 26
    cidr: 10.0.0.0/16
    ipipMode: Always
    natOutgoing: true
    nodeSelector: all()
    vxlanMode: Never
kind: IPPoolList
metadata:
  resourceVersion: "127644"


#3.Step 3: Delete pods from the old IP pool
[root@ovs ~]# kubectl get pods -o wide -w
NAME                         READY   STATUS        RESTARTS   AGE     IP               NODE    NOMINATED NODE   READINESS GATES
cni-cgchc                    1/1     Terminating   0          3m38s   172.16.0.17      k8s-1   <none>           <none>
cni-mm9pd                    1/1     Terminating   1          137d    10.244.200.194   k8s-2   <none>           <none>
myapp-pod                    1/1     Running       1          90m     172.16.0.2       k8s-2   <none>           <none>
static-ip-5b7f96bbfd-sztpq   1/1     Running       0          76m     172.16.0.5       k8s-2   <none>           <none>
cni-cgchc                    1/1     Terminating   0          3m50s   172.16.0.17      k8s-1   <none>           <none>
cni-mm9pd                    1/1     Terminating   1          137d    10.244.200.194   k8s-2   <none>           <none>
cni-cgchc                    0/1     Terminating   0          3m51s   <none>           k8s-1   <none>           <none>
cni-mm9pd                    0/1     Terminating   1          137d    <none>           k8s-2   <none>           <none>
cni-cgchc                    0/1     Terminating   0          3m52s   <none>           k8s-1   <none>           <none>
cni-cgchc                    0/1     Terminating   0          3m52s   <none>           k8s-1   <none>           <none>
cni-wpg5g                    0/1     Pending       0          0s      <none>           <none>   <none>           <none>
cni-wpg5g                    0/1     Pending       0          0s      <none>           k8s-1    <none>           <none>
cni-wpg5g                    0/1     ContainerCreating   0          0s      <none>           k8s-1    <none>           <none>
cni-wpg5g                    0/1     ContainerCreating   0          1s      <none>           k8s-1    <none>           <none>
cni-mm9pd                    0/1     Terminating         1          137d    <none>           k8s-2    <none>           <none>
cni-mm9pd                    0/1     Terminating         1          137d    <none>           k8s-2    <none>           <none>
cni-zc2jl                    0/1     Pending             0          0s      <none>           <none>   <none>           <none>
cni-zc2jl                    0/1     Pending             0          0s      <none>           k8s-2    <none>           <none>
cni-zc2jl                    0/1     ContainerCreating   0          0s      <none>           k8s-2    <none>           <none>
cni-zc2jl                    0/1     ContainerCreating   0          0s      <none>           k8s-2    <none>           <none>
cni-wpg5g                    1/1     Running             0          16s     10.0.231.193     k8s-1    <none>           <none>
cni-zc2jl                    1/1     Running             0          16s     10.0.200.193     k8s-2    <none>           <none>
 
 
# Step4:Delete the old ippool:
 calicoctl delete pool default-ipv4-ippool

5.5 Calico指定IP pool

# 多pod固定IP池
# 需要创建额外IP池(除了默认IP池)。利用注解cni.projectcalico.org/ipv4pools。

# 示例yaml配置如下

[root@k8s-master3 ~]# cat ippool1.yaml 
apiVersion: projectcalico.org/v3
kind: IPPool
metadata:
  name: new-pool1
spec:
  blockSize: 31
  cidr: 10.21.0.0/31
  ipipMode: Never
  natOutgoing: true
[root@k8s-master3 ~]# calicoctl create -f ippool1.yaml 
Successfully created 1 'IPPool' resource(s)
[root@k8s-master3 ~]# cat ippool2.yaml 
apiVersion: projectcalico.org/v3
kind: IPPool
metadata:
  name: new-pool2
spec:
  blockSize: 31
  cidr: 10.21.0.2/31
  ipipMode: Never
  natOutgoing: true
[root@k8s-master3 ~]# calicoctl create -f ippool2.yaml 
Successfully created 1 'IPPool' resource(s)
[root@k8s-master3 ~]# 
[root@k8s-master3 ~]# 
[root@k8s-master3 ~]# calicoctl get ippool
NAME                  CIDR           SELECTOR   
default-ipv4-ippool   10.20.0.0/16   all()      
new-pool1             10.21.0.0/31   all()      
new-pool2             10.21.0.2/31   all() 

root@k8s-master3 ~]# cat nginx.yaml 
apiVersion: apps/v1 # for versions before 1.9.0 use apps/v1beta2
kind: Deployment
metadata:
  name: nginx-deployment
spec:
  selector:
    matchLabels:
      app: nginx
  replicas: 4 # tells deployment to run 2 pods matching the template
  template:
    metadata:
      labels:
        app: nginx
      annotations:
        "cni.projectcalico.org/ipv4pools": "[\"new-pool1\",\"new-pool2\"]"
    spec:
      containers:
      - name: nginx
        image: nginx:1.7.9
        ports:
        - containerPort: 80
[root@k8s-master3 ~]# kubectl create -f nginx.yaml 
deployment.apps/nginx-deployment created
[root@k8s-master3 ~]# kubectl get pods -o wide
NAME                               READY   STATUS    RESTARTS   AGE   IP          NODE          NOMINATED NODE   READINESS GATES
nginx-deployment-f49447c5d-4k4px   1/1     Running   0          11s   10.21.0.0   k8s-master4   <none>           <none>
nginx-deployment-f49447c5d-5sbrx   1/1     Running   0          11s   10.21.0.2   k8s-master4   <none>           <none>
nginx-deployment-f49447c5d-flfb8   1/1     Running   0          11s   10.21.0.3   k8s-master4   <none>           <none>
nginx-deployment-f49447c5d-q4945   1/1     Running   0          11s   10.21.0.1   k8s-master4   <none>           <none>


# 指定namespace:
kubectl get ns 
[root@k8s-1 ~]# kubectl edit ns cc 

# Please edit the object below. Lines beginning with a '#' will be ignored,
# and an empty file will abort the edit. If an error occurs while saving this file will be
# reopened with the relevant failures.
#
apiVersion: v1
kind: Namespace
metadata:
  annotations:
    cni.projectcalico.org/ipv4pools: '["new-pool-ns"]'   ### Add this annotations
  creationTimestamp: "2021-08-20T05:20:44Z"
  name: cc
  resourceVersion: "130057"
  uid: bb8ed59d-04c8-4449-864b-ced54e64bb59
spec:
  finalizers:
  - kubernetes
status:
  phase: Active
~
# 创建pod:
#kubectl run ccc --image=burlyluo/nettoolbox -n cc
#  ippool 信息:
[root@k8s-1 ~]# calicoctl get ippool -o wide 
NAME                  CIDR            NAT    IPIPMODE   VXLANMODE   DISABLED   SELECTOR   
default-ipv4-ippool   10.244.0.0/16   true   Always     Never       true       all()      
external-pool         172.16.0.0/26   true   Always     Never       true       all()      
new-pool              10.0.0.0/16     true   Always     Never       false      all()      
new-pool-ns           11.0.0.0/16     true   Always     Never       false      all()      
[root@k8s-1 ~]# kubectl get pods -o wide -n cc
NAME   READY   STATUS    RESTARTS   AGE   IP             NODE    NOMINATED NODE   READINESS GATES
ccc    1/1     Running   0          57m   11.0.200.193   k8s-2   <none>           <none>

calicoctl ipam show
 类似资料: