使用"kubectl get cs"查看集群状态,发现scheduler&controller-manager组件的状态都是 Unhealthy的,且提示"Get “http://127.0.0.1:10251/healthz”: dial tcp 127.0.0.1:10251: connect: connection refused"
查看相应的yaml文件,发现是kube-controller-manager.yaml和kube-scheduler.yaml设置的默认端口是0。
[root@master1 ~]# cd /etc/kubernetes/manifests/
[root@master1 manifests]# ll
total 16
-rw------- 1 root root 2109 Sep 6 16:27 etcd.yaml
-rw------- 1 root root 3171 Sep 6 16:27 kube-apiserver.yaml
-rw------- 1 root root 2859 Sep 7 14:02 kube-controller-manager.yaml
-rw------- 1 root root 1414 Sep 7 14:03 kube-scheduler.yaml
[root@master1 manifests]# vim kube-controller-manager.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-controller-manager
tier: control-plane
name: kube-controller-manager
namespace: kube-system
spec:
containers:
- command:
- kube-controller-manager
- --allocate-node-cidrs=true
- --authentication-kubeconfig=/etc/kubernetes/controller-manager.conf
- --authorization-kubeconfig=/etc/kubernetes/controller-manager.conf
- --bind-address=127.0.0.1
- --client-ca-file=/etc/kubernetes/pki/ca.crt
- --cluster-cidr=172.10.0.0/16
- --cluster-name=kubernetes
- --cluster-signing-cert-file=/etc/kubernetes/pki/ca.crt
- --cluster-signing-key-file=/etc/kubernetes/pki/ca.key
- --controllers=*,bootstrapsigner,tokencleaner
- --kubeconfig=/etc/kubernetes/controller-manager.conf
- --leader-elect=true
- --node-cidr-mask-size=24
# - --port=0 ###将这里注释
- --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
- --root-ca-file=/etc/kubernetes/pki/ca.crt
- --service-account-private-key-file=/etc/kubernetes/pki/sa.key
- --service-cluster-ip-range=10.96.0.0/12
- --use-service-account-credentials=true
image: registry.aliyuncs.com/google_containers/kube-controller-manager:v1.19.0
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 8
httpGet:
host: 127.0.0.1
path: /healthz
port: 10257
scheme: HTTPS
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
name: kube-controller-manager
resources:
requests:
cpu: 200m
startupProbe:
failureThreshold: 24
httpGet:
"kube-controller-manager.yaml" 97L, 2859C
[root@master1 manifests]# vim kube-scheduler.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-scheduler
tier: control-plane
name: kube-scheduler
namespace: kube-system
spec:
containers:
- command:
- kube-scheduler
- --authentication-kubeconfig=/etc/kubernetes/scheduler.conf
- --authorization-kubeconfig=/etc/kubernetes/scheduler.conf
- --bind-address=127.0.0.1
- --kubeconfig=/etc/kubernetes/scheduler.conf
- --leader-elect=true
# - --port=0
image: registry.aliyuncs.com/google_containers/kube-scheduler:v1.19.0
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 8
httpGet:
host: 127.0.0.1
path: /healthz
port: 10259
scheme: HTTPS
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
name: kube-scheduler
resources:
requests:
cpu: 100m
startupProbe:
failureThreshold: 24
httpGet:
host: 127.0.0.1
path: /healthz
port: 10259
scheme: HTTPS
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
volumeMounts:
- mountPath: /etc/kubernetes/scheduler.conf
name: kubeconfig
readOnly: true
hostNetwork: true
priorityClassName: system-node-critical
"kube-scheduler.yaml" 57L, 1414C
将scheduler和controller-manager的配置文件 kube-controller-manager.yaml和kube-scheduler.yaml 中的- --port=0注释掉,重启kubelet 服务
[root@master1 manifests]# systemctl restart kubelet.service
[root@master1 manifests]# systemctl status kubelet.service
● kubelet.service - kubelet: The Kubernetes Node Agent
Loaded: loaded (/usr/lib/systemd/system/kubelet.service; enabled; vendor preset: disabled)
Drop-In: /usr/lib/systemd/system/kubelet.service.d
└─10-kubeadm.conf
Active: active (running) since Wed 2022-09-07 14:03:31 CST; 13s ago
Docs: https://kubernetes.io/docs/
Main PID: 15560 (kubelet)
Tasks: 14
Memory: 35.7M
CGroup: /system.slice/kubelet.service
└─15560 /usr/bin/kubelet --bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf --config=/var/lib/kubelet/config.yaml --network-plugin=cni --pod-i...
Sep 07 14:03:39 master1 kubelet[15560]: I0907 14:03:39.068149 15560 reconciler.go:224] operationExecutor.VerifyControllerAttachedVolume started for volume "policysync" (UniqueName: "kubernet...977c317300f5")
Sep 07 14:03:39 master1 kubelet[15560]: I0907 14:03:39.068174 15560 reconciler.go:224] operationExecutor.VerifyControllerAttachedVolume started for volume "calico-node-token-kqjsk" (UniqueName: "kubernete...
Sep 07 14:03:39 master1 kubelet[15560]: I0907 14:03:39.068261 15560 reconciler.go:224] operationExecutor.VerifyControllerAttachedVolume started for volume "host-local-net-dir" (UniqueName: "kubernetes.io/...
Sep 07 14:03:39 master1 kubelet[15560]: I0907 14:03:39.068281 15560 reconciler.go:157] Reconciler: start to sync state
Sep 07 14:03:39 master1 kubelet[15560]: E0907 14:03:39.168525 15560 kubelet.go:1576] Failed creating a mirror pod for "etcd-master1_kube-system(82d637c4e972cf9835c7ee326b9ea1e3)": pods "etcd...already exists
Sep 07 14:03:39 master1 kubelet[15560]: I0907 14:03:39.952571 15560 request.go:645] Throttling request took 1.001693839s, request: GET:https://10.0.16.15:6443/api/v1/namespaces/kube-system/c...ourceVersion=0
Sep 07 14:03:40 master1 kubelet[15560]: E0907 14:03:40.170529 15560 secret.go:195] Couldn't get secret kube-system/calico-kube-controllers-token-zhdg5: failed to sync secret cache: timed out... the condition
Sep 07 14:03:40 master1 kubelet[15560]: E0907 14:03:40.170654 15560 nestedpendingoperations.go:301] Operation for "{volumeName:kubernetes.io/secret/ebedf9a1-043b-4f01-b211-6ca205c61c28-calic...-09-07 14:03:4
Sep 07 14:03:40 master1 kubelet[15560]: E0907 14:03:40.170970 15560 secret.go:195] Couldn't get secret kube-system/calico-node-token-kqjsk: failed to sync secret cache: timed out waiting for the condition
Sep 07 14:03:40 master1 kubelet[15560]: E0907 14:03:40.171047 15560 nestedpendingoperations.go:301] Operation for "{volumeName:kubernetes.io/secret/c5abc60b-c027-4eea-bf2c-977c317300f5-calic...:40.671007578
Hint: Some lines were ellipsized, use -l to show in full.
[root@master1 ~]# kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
scheduler Healthy ok
controller-manager Healthy ok
etcd-0 Healthy {"health":"true"}