管理
logging
View pod logkubectl logs pod/redis | grep WARNING
kubectl logs pod/redis | grep WARNING > ~/tmp/01
View pod log// 1. create pod
kubectl run foobar --image=busybox --generator=run-pod/v1 --command -- sh -c "echo 'hello' ; echo 'yes' ; echo 'error file-not-found' ; sleep 3600"
// 2. show all logs
# kubectl logs pod/foobar
\hello
yes
error file-not-found
// 3. grep error
# kubectl logs pod/foobar | grep error
error file-not-found
// 4. grep error to a file
# kubectl logs pod/foobar | grep error > foobar.log
# cat foobar.log
error file-not-found
https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands
sort-by
Podkubectl get pods --all-namespaces --sort-by=.metadata.name
kubectl get pods --all-namespaces --sort-by=.metadata.name > ~/tmp/02
PersistentVolumekubectl get pv
kubectl get pv --sort-by=.metadata.name
kubectl get pv --sort-by=.metadata.name > my_volumes
// delete all pv
for i in $(kubectl get pv --no-headers | awk '{print $1}') ; do kubectl delete pv $i ; done
Daemonset
// 1. sample daemonset
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nginx
labels:
app: nginx
spec:
selector:
matchLabels:
name: nginx
template:
metadata:
labels:
name: nginx
spec:
containers:
- name: nginx
image: nginx
// 2. create
kubectl apply -f deployments.yaml
// 3. view the deployments
https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/
nodeSelector
创建 Pod,名字为 nginx-abc101,镜像为 nginx,存放在 label 为 disk=ssd 的 node 上
// 1. assign a lable to node
kubectl label node machine03.example.com disk=ssd
kubectl get nodes --show-labels
// 2. deployments yaml
apiVersion: v1
kind: Pod
metadata:
name: nginx
labels:
env: test
spec:
containers:
- name: nginx
image: nginx
imagePullPolicy: IfNotPresent
nodeSelector:
disk: ssd
// 3. deploy
kubectl apply -f deploy.yaml
// 4. check the deloyment
kubectl get pod -o wide
// 5. clean up
kubectl delete all --all
kubectl label node machine03.example.com disk-
initContainer
apiVersion: v1
kind: Pod
metadata:
name: lumpy--koala
labels:
app: myapp
spec:
containers:
- name: lumpy--koala
image: busybox:1.28
command: ['sh', '-c', 'echo The app is running! && sleep 3600']
livenessProbe:
exec:
command: ['test', '-t', '/workdir/calm.txt']
volumeMounts:
- mountPath: /workdir
name: workdir-volume
initContainers:
- name: lumpy--initi
image: busybox:1.28
command: ['sh', '-c', 'touch -c /workdir/calm.txt']
volumeMounts:
- mountPath: /workdir
name: workdir-volume
volumes:
- name: workdir-volume
emptyDir: {}
kubectl apply -f deploy.yaml
https://kubernetes.io/docs/concepts/workloads/pods/init-containers/
Containers
创建一个名为 kucc4 的 Pod,其中内部运行 着 nginx+redis+memcached+consul 4 个容器。
kubectl run kucc4 --image=nginx --generator=run-pod/v1 --dry-run -o yaml
apiVersion: v1
kind: Pod
metadata:
name: kucc4
labels:
app: kucc4
spec:
containers:
- name: nginx
image: nginx
- name: redis
image: redis
- name: memcached
image: memcached
- name: consul
image: consul
kubectl apply -f pod.yaml
kubectl logs pod/kucc4 consul
https://kubernetes.io/docs/concepts/workloads/pods/pod-overview/
Deployments
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-app
labels:
app: nginx
spec:
replicas: 3
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx
image: nginx:1.11.9-alpine
ports:
- containerPort: 80
kubectl apply -f deploy.yaml
kubectl set image deployment/nginx-app nginx=nginx:1.12.0-alpine --record
kubectl rollout undo deployment/nginx-app
https://kubernetes.io/docs/concepts/workloads/controllers/deployment/
Service
创建和配置 service,名字为 front-end-service。可以通过 NodePort/ClusterIp 访问,并且路由到 front-end 的 Pod上。
kubectl expose pod front-end --name=front-end-service --type='NodePort' --port=80
Namespace
创建一个 Pod,名字为 Jenkins,镜像使用 Jenkins。在新的 namespace ns01上创建。
kubectl create namespace ns01
kubectl apply -f pod.yaml -n ns01
yaml
创建 deployment 的 spec 文件: 使用 redis 镜像,7 个副本,label 为 app_enb_stage=dev deployment 名字为 abc 保存这个 spec 文件到/opt/abc/deploy_spec.yaml 完成后,清理(删除)在此任务期间生成的任何新的 k8s API 对象
kubectl apply -f deploy.yaml
run kua100201 --image=redis --replicas=7 --labels=app_env_stage=dev
kubectl delete all -l app_enb_stage=dev
Resource Limits
// 1. set resource limit
cat <<EOF > ./pod.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
run: busybox
name: busybox
spec:
containers:
- image: busybox
name: busybox
command: ["sh", "-c", "sleep 3600"]
resources:
requests:
cpu: 100m
memory: 20Mi
EOF
// 2. create pod
kubectl create -f pod.yaml
Taints & Tolerations
本部分演示通过 Taints 和 Tolerations 控制将 POD 部署到特定节点上。
// 1. create taint
kubectl taint node machine02.example.com node-type=prod:NoSchedule
// 2. verify taints
# kubectl describe nodes | grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: node-type=prod:NoSchedule
Taints: <none>
// 3. deploy dev pods
cat <<EOF > ./dev.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
creationTimestamp: null
labels:
app: dev
name: dev
spec:
replicas: 3
selector:
matchLabels:
app: dev
strategy: {}
template:
metadata:
creationTimestamp: null
labels:
app: dev
spec:
containers:
- image: busybox:1.28
name: busybox
command: ["sh", "-c", "sleep 3600"]
EOF
kubectl create -f dev.yaml
// 4. verify all pods are not run on prod node, all 3 pods should go into machine03
# kubectl get pods -o wide -l app=dev --no-headers
dev-74cfd5fb55-c9x87 1/1 Running 0 95s 192.168.208.247 machine03.example.com <none> <none>
dev-74cfd5fb55-dblvb 1/1 Running 0 95s 192.168.208.239 machine03.example.com <none> <none>
dev-74cfd5fb55-dsfd4 1/1 Running 0 95s 192.168.208.242 machine03.example.com <none> <none>
// 5. deploy prod pods
cat <<EOF > ./prod.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
creationTimestamp: null
labels:
app: prod
name: prod
spec:
replicas: 3
selector:
matchLabels:
app: prod
strategy: {}
template:
metadata:
creationTimestamp: null
labels:
app: prod
spec:
containers:
- image: busybox:1.28
name: busybox
command: ["sh", "-c", "sleep 3600"]
tolerations:
- key: node-type
operator: Equal
value: prod
effect: NoSchedule
EOF
kubectl create -f prod.yaml
// 6. verify all pods are deploy to prod node
# kubectl get pods -o wide -l app=prod --no-headers
prod-8598bf8b7b-5mhb9 1/1 Running 0 59s 192.168.251.52 machine02.example.com <none> <none>
prod-8598bf8b7b-n2smj 1/1 Running 0 59s 192.168.208.245 machine03.example.com <none> <none>
prod-8598bf8b7b-tbq8x 1/1 Running 0 59s 192.168.251.53 machine02.example.com <none> <none>
// 7. remove taint
kubectl taint node machine02.example.com node-type:NoSchedule-
kubectl describe node machine02.example.com | grep Taints
Secret
Create a kubetnetes Secret as follows:
Name: super-secret
Credential: alice or username:bob
Create a Pod named pod-secrets-via-file using the redis image which mounts a secret named super-secret at /secrets
Create a second Pod named pod-secrets-via-env using the redis image,which exports credential/username as TOPSECRET/CREDENTIALS
kubectl create secret generic super-secret --from-literal=credential=alice --from-literal=username=bob
apiVersion: v1
kind: Pod
metadata:
name: pod-secrets-via-file
spec:
containers:
- name: pod-secrets-via-file
image: redis
volumeMounts:
- name: super-secret
mountPath: "/secrets"
volumes:
- name: super-secret
secret:
secretName: super-secret
apiVersion: v1
kind: Pod
metadata:
name: pod-secrets-via-env
spec:
containers:
- name: pod-secrets-via-env
image: redis
env:
- name: TOPSECRET
valueFrom:
secretKeyRef:
name: super-secret
key: credential
- name: CREDENTIALS
valueFrom:
secretKeyRef:
name: super-secret
key: username
restartPolicy: Never
Labels
Create a file /opt/KUCC00302/kucc00302.txt that lists all pods that implement Service foo in Namespce production。
kubectl get svc foo -o yaml
kubectl describe svc foo
kubectl get pods -l app=redis,role=slave,tier=backend
kubectl get pods -l app=redis,role=slave,tier=backend --no-headers
kubectl get pods -l app=redis,role=slave,tier=backend --no-headers | awk '{print $1}'
kubectl get pods -l app=redis,role=slave,tier=backend --no-headers | awk '{print $1}' > pods.txt
emptyDir
apiVersion: v1
kind: Pod
metadata:
name: non-persistent-redis
spec:
containers:
- image: redis
name: redis
volumeMounts:
- mountPath: "/data/redis"
name: cache-control
volumes:
- name: cache-control
emptyDir: {}
Scale
Scale the deployment webserver to 6 pods
kubectl scale deployment.apps/webserver --replicas=6
top
kubectl top pods -l name=cpu-utilizer
Nodes
Check to see how many nodes are ready (not including nodes tainted NoSchedule) and write the number
kubectl get nodes | grep Ready
kubectl get nodes | grep Ready | wc -l
kubectl describe nodes | grep Taints | grep NoSchedule
kubectl describe nodes | grep Taints | grep NoSchedule | wc -l
DNS
kubectl expose deployment nginx-dns --name=nginx-dns --port=80
kubectl exec -ti busybox1 -- nslookup nginx-dns
kubectl exec -ti busybox1 -- nslookup 10.105.132.132
etcd snapshot
Kubernets 备份一般步骤(etcd 负责持久化状态,所以只备份 etcd)// 1. install etcdctl(etcdctl defualt not installed, the api version 3 is necessary)
ETCDCTL_API=3 etcdctl --help
// 2. execute backup
ETCDCTL_API=3 etcdctl snapshot save etcd-2020-0305.db --cacert=/etc/kubernetes/pki/etcd/server.crt --cert=/etc/kubernetes/pki/etcd/ca.crt --key=/etc/kubernetes/pki/etcd/ca.key
// 3. view the backup
# ETCDCTL_API=3 etcdctl --write-out=table snapshot status etcd-2020-0305.db
+----------+----------+------------+------------+
| HASH | REVISION | TOTAL KEYS | TOTAL SIZE |
+----------+----------+------------+------------+
| 9613afde | 1101819 | 1758 | 2.9 MB |
+----------+----------+------------+------------+
远程备份etcdctl --endpoints=http://127.0.0.1:2379 \
--ca-file=/opt/KUCM00302/ca.crt \
--certfile=/opt/KUCM00302/etcd-client.crt \
--key=/opt/KUCM00302/etcd-client.key snapshot save /data/backup/etcd-snapshot.db
Drain
节点维护一般步骤// 1. drain node for maintenance
kubectl drain wk8s-node-1 --ignore-daemonsets=true --delete-local-data=true --force=true
// 2. do maintenance
// 3. add back the node
kubectl uncordon wk8s-node-1
Delete node
删除节点kubectl delete node [node_name]
Node NotReady
kubectl get node
systemctl status kubelet
Static Pod
// 1. yaml
cat <<EOF > ./myservice.yaml
apiVersion: v1
kind: Pod
metadata:
name: myservice
spec:
containers:
- name: myservice
image: nginx
ports:
- name: web
containerPort: 80
protocol: TCP
EOF
// 2. place to manifests
cd /etc/kubernetes/manifests/
cp myservice.yaml ./
// 3. verify the manifests path
# cat /var/lib/kubelet/config.yaml | grep staticPodPath
staticPodPath: /etc/kubernetes/manifests
// 4. restart service
systemctl restart kubelet
kubelet.service
systemctl list-units | grep schedule
systemctl list-units | grep etcd
systemctl list-units | grep controllor-manager
systemctl list-units | grep api-server
# cat /var/lib/kubelet/config.yaml | grep staticPodPath
staticPodPath: /etc/kubernetes/manifests
PersistentVolume
apiVersion: v1
kind: PersistentVolume
metadata:
name: app-config
spec:
capacity:
storage: 1Gi
volumeMode: Filesystem
accessModes:
- ReadWriteOnce
hostPath:
path: /srv/app-config
https://kubernetes.io/docs/concepts/storage/persistent-volumes/
安全
RBAC 认证授权
1. 创建 namespacekubectl create ns web
2. role.yamlapiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
namespace: web
name: service-reader
rules:
- apiGroups: [""]
resources: ["services"]
verbs: ["get", "list"]
3. 创建 rolebinding,将上面创建的 service-reader 绑定到一个 serviceaccountkubectl create rolebinding test --role=service-reader --serviceaccount=web:default -n web
4. 测试curl localhost:8001/api/v1/namespaces/web/services
创建问题分析步骤
Control Plane
// 1. find clue from events
kubectl get events
kubectl get events -n kube-system
// 2. find clue from pod logs
kubectl logs [kube_scheduler_pod_name] -n kube-system
// 3. is docker service run as expected?
systemctl status docker
systemctl enable docker && systemctl start docker
// 4. is kubelet service run as expected?
systemctl status kubelet
systemctl enable kubelet && systemctl start kubelet
// 5. is swap be turned off?
swapoff -a && sed -i '/ swap / s/^/#/' /etc/fstab
// 6. is firewall block api call
systemctl status firewalld
systemctl disable firewalld && systemctl stop firewalld
Worker Node
// 1. find clue from nodes
kubectl get nodes
kubectl get nodes -o wide
// 2. connect to specific node do the smae as control plane troubleshooting
// 3. find clud from sys logs
journalctl -u kubelet
more syslog | tail -120 | grep kubelet
Networking
// 1. deploy busybox
kubectl run -it --rm --restart=Never busybox --image=busybox:1.28 sh
# nslookup <NAME>
# cat /etc/resolv.conf
# nslookup kubernetes.default
// 2. check specific process
ps auxw | grep kube-proxy
// 3. iptables-save
iptables-save | grep <NAME>