#AWS EKS 创建k8s生产环境实例
- 在AWS部署海外节点, 图简单使用web控制台创建VPC和k8s集群出错(k8s), 使用cli命令行工具创建成功
- 本实例为复盘, 记录aws命令行工具创建eks, 安装efs驱动、LBS、ingress-nginx,使用ECR镜像储存等
#安装命令行工具
| #安装aws cli |
| cd /tmp |
| curl -kL "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" |
| unzip awscliv2.zip |
| sudo ./aws/install |
| aws --version |
| #配置aws key |
| aws configure |
| #查看配置 |
| aws configure list |
| |
| #安装kubectl |
| curl -o kubectl https://s3.us-west-2.amazonaws.com/amazon-eks/1.22.6/2022-03-09/bin/linux/amd64/kubectl |
| chmod +x ./kubectl |
| mv kubectl /usr/local/bin |
| kubectl version --short --client |
| |
| #安装eksctl |
| curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp |
| sudo mv /tmp/eksctl /usr/local/bin |
| eksctl version |
| |
#创建VPC网络和子网
#创建VPC网络和子网已单独发帖
aws命令行cli创建VPC网络、公有子网、私有子网、nat网关+EIP - Elvin™ - 博客园
#创建k8s集群
| #env |
| k8s_name=aws-k8s |
| Region=ap-southeast-1 #新加坡 |
| #获取aws账户id |
| OwnerId=$(aws ec2 describe-vpcs --region ${Region} |jq -r ".Vpcs[0].OwnerId") |
| #使用已有子网 |
| private-subnets-id="subnet-lan-a-xxx,subnet-lan-b-xxx" |
| public-subnets-id="subnet-public-a-xxx,subnet-public-b-xxx" |
| # k8s cluster |
| eksctl create cluster \ |
| --region ${Region} \ |
| --name ${k8s_name} \ |
| --version 1.22 \ |
| --vpc-private-subnets ${private-subnets-id} \ |
| --vpc-public-subnets ${public-subnets-id} \ |
| --managed \ |
| --without-nodegroup \ |
| --dry-run |
| |
| # 查看 |
| eksctl get cluster --name ${k8s_name} --region ${Region} |
| |
| # 出错或不要了,可删除 |
| # eksctl delete cluster --name=${k8s_name} |
| |
| # --dry-run 试运行,正式创建时去掉 |
| # --without-nodegroup 不创建node节点 |
| # --vpc-xx 添加已有网络,若不指定会自动创建 |
| # 建议使用多个可用区网络,k8s集群创建后无法更改 |
| # eksctl create cluster --help #查看帮助 |
#创建k8s计算节点组
| #创建b区k8s节点 |
| #k8s nodegroup test |
| eksctl create nodegroup \ |
| --region ${Region} \ |
| --cluster ${k8s_name} \ |
| --name k8s-work-test \ |
| --node-type m5.large \ |
| --nodes 1 \ |
| --nodes-min 1 \ |
| --nodes-max 10 \ |
| --instance-name test-node-b \ |
| --node-ami-family Ubuntu2004 \ |
| --node-private-networking \ |
| --node-zones ${Region}b \ |
| --node-security-groups sg-xxxxxxx \ |
| --ssh-access \ |
| --ssh-public-key aws-bastion \ |
| --full-ecr-access \ |
| --managed \ |
| --dry-run |
| |
| # --nodes 1 创建1个node节点, 规格 m5.large 2核8G |
| # --node-ami-family Ubuntu2004 操作系统Ubuntu20.04 |
| # --node-private-networking 使用私有子网 |
| # --node-zones 可用区 |
| # --node-security-groups 使用已创建的安全组 |
| # --full-ecr-access ECR镜像仓库权限,一定要 |
| # eksctl create nodegroup --help #查看帮助 |
| |
| #节点扩容 |
| eksctl scale nodegroup --region ${Region} \ |
| --cluster ${k8s_name} --nodes=2 --name k8s-work-test |
| |
| # 测试正常就可以删除, 创建配置更高的正式节点 |
| # delete node |
| # eksctl delete nodegroup --cluster=${k8s_name} --name=k8s-work-test |
| |
| #创建b区正式节点组 |
| eksctl create nodegroup \ |
| --region ${Region} \ |
| --cluster ${k8s_name} \ |
| --name k8s-work-b \ |
| --node-type m5.4xlarge \ |
| --nodes 2 \ |
| --nodes-min 1 \ |
| --nodes-max 10 \ |
| --instance-name k8s-node-b \ |
| --max-pods-per-node 110 \ |
| --node-ami-family Ubuntu2004 \ |
| --node-private-networking \ |
| --node-zones ${Region}b \ |
| --node-security-groups sg-xxxxxxx \ |
| --ssh-access \ |
| --ssh-public-key aws-bastion \ |
| --full-ecr-access \ |
| --external-dns-access \ |
| --managed \ |
| --dry-run |
| |
| #规格m5.4xlarge 16核64G |
| #node-zones创建多区,可用于高可用 |
#为k8s集群创建IAM OIDC提供商
| # IAM OIDC即 AWS Identity and Access Management (IAM) OpenID Connect (OIDC) |
| # 创建IMA权限角色时,需要此功能开启 |
| |
| #查看是否有OIDC,没有则创建 |
| oidc_id=$(aws eks describe-cluster --name ${k8s_name} --query "cluster.identity.oidc.issuer" --output text |cut -d'/' -f 5) |
| if [ $(aws iam list-open-id-connect-providers | grep $oidc_id | wc -l ) -eq 0 ]; then |
| eksctl utils associate-iam-oidc-provider --cluster ${k8s_name} --approve |
| fi |
#eks安装efs csi驱动
- k8s使用AWS EFS储存时用到csi驱动
- efs可使用nfs协议挂载,但k8s节点默认没安装nfs客户端
| #创建IAM policy和角色 |
| curl -o iam-policy-efs.json \ |
| https://raw.githubusercontent.com/kubernetes-sigs/aws-efs-csi-driver/master/docs/iam-policy-example.json |
| |
| aws iam create-policy \ |
| --policy-name EKS_EFS_CSI_Driver_Policy \ |
| --policy-document file://iam-policy-efs.json |
| |
| #创建权限 |
| eksctl create iamserviceaccount \ |
| --cluster ${k8s_name} \ |
| --namespace kube-system \ |
| --name efs-csi-controller-sa \ |
| --attach-policy-arn arn:aws:iam::${OwnerId}:policy/EKS_EFS_CSI_Driver_Policy \ |
| --approve \ |
| --region ${Region} |
| |
| # 更新kubeconfig ~/.kube/config |
| aws eks update-kubeconfig --region ${Region} --name ${k8s_name} |
| |
| #下载yaml文件 |
| kubectl kustomize \ |
| "github.com/kubernetes-sigs/aws-efs-csi-driver/deploy/kubernetes/overlays/stable/?ref=release-1.4" > aws-eks-efs-csi.1.4.yaml |
| |
| # vim aws-eks-efs-csi.1.4.yaml |
| # 手动删除如下部分 |
| apiVersion: v1 |
| kind: ServiceAccount |
| metadata: |
| labels: |
| app.kubernetes.io/name: aws-efs-csi-driver |
| name: efs-csi-controller-sa |
| namespace: kube-system |
| --- |
| |
| #部署efs csi |
| kubectl apply -f aws-eks-efs-csi.1.4.yaml |
#使用efs创建pvc实例
| apiVersion: v1 |
| kind: PersistentVolume |
| metadata: |
| name: aws-efs-test |
| spec: |
| capacity: |
| storage: 2000Gi |
| accessModes: |
| - ReadWriteMany |
| persistentVolumeReclaimPolicy: Retain |
| csi: |
| driver: efs.csi.aws.com |
| volumeHandle: fs-xxx:/data |
| --- |
| apiVersion: v1 |
| kind: PersistentVolumeClaim |
| metadata: |
| name: aws-efs-test |
| spec: |
| accessModes: |
| - ReadWriteMany |
| resources: |
| requests: |
| storage: 2000Gi |
| |
| # fs-xxx 为efs实例id,需要单独创建 |
| # 创建efs后需添加子网和安全组,否则无法访问 |
#安装AWS LB Controller
- AWS LoadBalancer默认使用Classic Load Balancer模式
- 使用NLB、ALB模式的负载均衡器,和绑定EIP(绑定固定IP),必须安装LB controller
| #创建IAM角色 |
| curl -o iam_lbs_v2.4.2.json \ |
| https://raw.githubusercontent.com/kubernetes-sigs/aws-load-balancer-controller/v2.4.2/docs/install/iam_policy.json |
| |
| aws iam create-policy \ |
| --policy-name iam_lbs_v2.4.2 \ |
| --policy-document file://iam_lbs_v2.4.2.json |
| |
| eksctl create iamserviceaccount \ |
| --cluster=${k8s_name} \ |
| --namespace=kube-system \ |
| --name=aws-load-balancer-controller \ |
| --role-name "AmazonEKSLoadBalancerControllerRole" \ |
| --attach-policy-arn=arn:aws:iam::${OwnerId}:policy/iam_lbs_v2.4.2 \ |
| --approve |
| |
| #安装cert-manager |
| kubectl apply \ |
| --validate=false \ |
| -f https://github.com/jetstack/cert-manager/releases/download/v1.5.4/cert-manager.yaml |
| |
| #下载yaml |
| curl -Lo aws-load-balancer-controller_2.4.2.yaml \ |
| https://github.com/kubernetes-sigs/aws-load-balancer-controller/releases/download/v2.4.2/v2_4_2_full.yaml |
| |
| #更改k8s集群名称 |
| sed -i.bak -e "s|your-cluster-name|${k8s_name}|" aws-load-balancer-controller_2.4.2.yaml |
| |
| #手动删除如下部分 |
| apiVersion: v1 |
| kind: ServiceAccount |
| metadata: |
| labels: |
| app.kubernetes.io/component: controller |
| app.kubernetes.io/name: aws-load-balancer-controller |
| name: aws-load-balancer-controller |
| namespace: kube-system |
| --- |
| |
| #部署lbs |
| kubectl apply -f aws-load-balancer-controller_2.4.2.yaml |
| |
| #查看 |
| kubectl get deployment -n kube-system aws-load-balancer-controller |
| |
#安装ingress-nginx-controller
| #下载yaml |
| curl -o aws-ingress-nginx.nlb.v1.3.0.yml \ |
| https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.3.0/deploy/static/provider/aws/deploy.yaml |
| |
| #增加spec.ipFamilyPolicy: SingleStack |
#修改LoadBalancer部分的Service如下
| --- |
| apiVersion: v1 |
| kind: Service |
| metadata: |
| annotations: |
| #负载均衡器自定义名称 |
| service.beta.kubernetes.io/aws-load-balancer-name: k8s-ingress-slb |
| #负载均衡 NLB模式 |
| service.beta.kubernetes.io/aws-load-balancer-type: "external" |
| service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: "ip" |
| #使用EIP,互联网模式 |
| service.beta.kubernetes.io/aws-load-balancer-scheme: "internet-facing" |
| #public子网 |
| service.beta.kubernetes.io/aws-load-balancer-subnets: subnet-axxx, subnet-bxxx |
| #弹性IP地址 |
| service.beta.kubernetes.io/aws-load-balancer-eip-allocations: eipalloc-axxx, eipalloc-bxxx |
| #获取客户端真事IP |
| service.beta.kubernetes.io/aws-load-balancer-target-group-attributes: preserve_client_ip.enabled=true |
| labels: |
| app.kubernetes.io/component: controller |
| app.kubernetes.io/instance: ingress-nginx |
| app.kubernetes.io/name: ingress-nginx |
| app.kubernetes.io/part-of: ingress-nginx |
| app.kubernetes.io/version: 1.3.0 |
| name: ingress-nginx-controller |
| namespace: ingress-nginx |
| spec: |
| type: LoadBalancer |
| # externalTrafficPolicy: Local |
| ipFamilyPolicy: SingleStack |
| ipFamilies: |
| - IPv4 |
| ports: |
| - appProtocol: http |
| name: http |
| port: 80 |
| protocol: TCP |
| targetPort: http |
| - appProtocol: https |
| name: https |
| port: 443 |
| protocol: TCP |
| targetPort: https |
| selector: |
| app.kubernetes.io/component: controller |
| app.kubernetes.io/instance: ingress-nginx |
| app.kubernetes.io/name: ingress-nginx |
| |
| #部署 |
| kubectl apply -f aws-ingress-nginx.nlb.v1.3.0.yml |
| |
| #查看,获得得到EXTERNAL-IP地址 |
| kubectl get svc ingress-nginx-controller -n ingress-nginx |
| |
| #ping测试EXTERNAL-IP地址ip是否为自己的EIP地址 |
| ping k8s-ingress-slb-xxx.elb.${Region}.amazonaws.com |
| |
| #访问测试 |
| curl -I k8s-ingress-slb-xxx.elb.${Region}.amazonaws.com |
#使用私有镜像仓库,并部署服务测试
| #创建存储库nginx |
| aws ecr create-repository \ |
| --repository-name nginx \ |
| --region $Region |
| |
| #登录储存库(缓存的登录凭证有效期12小时) |
| aws ecr get-login-password --region $Region \ |
| | docker login --username AWS --password-stdin ${OwnerId}.dkr.ecr.${Region}.amazonaws.com |
| |
| #下载公共镜像, 改tag为私有储存库地址 |
| docker pull public.ecr.aws/nginx/nginx:alpine |
| docker tag public.ecr.aws/nginx/nginx:alpine \ |
| ${OwnerId}.dkr.ecr.${Region}.amazonaws.com/nginx:alpine |
| |
| #push镜像到新建的储存库 |
| docker push ${OwnerId}.dkr.ecr.${Region}.amazonaws.com/nginx:alpine |
| |
| #deploy test |
| kubectl create deployment nginx --port=80 \ |
| --image=${OwnerId}.dkr.ecr.${Region}.amazonaws.com/nginx:alpine |
| |
| #查看 |
| kubectl get pod |
| |
| #生命周期策略示例,保持5个镜像版本(tag) |
| cat >aws-ecr-policy.json <<EOF |
| { |
| "rules": [ |
| { |
| "rulePriority": 1, |
| "description": "Keep only 3 image", |
| "selection": { |
| "tagStatus": "any", |
| "countType": "imageCountMoreThan", |
| "countNumber": 3 |
| }, |
| "action": { |
| "type": "expire" |
| } |
| } |
| ] |
| } |
| EOF |
| #创建策略 |
| aws ecr put-lifecycle-policy --region $Region \ |
| --repository-name nginx \ |
| --lifecycle-policy-text file://aws-ecr-policy.json |
| |
| #删除清理pod |
| kubectl delete deploy/nginx |
| |
| #删除存储库 |
| aws ecr delete-repository \ |
| --region $Region --force \ |
| --repository-name nginx |
- k8s有pull私有镜像仓库权限,是因为创建参数--full-ecr-access
- AWS ECR镜像储存服务不支持目录,只能分别给每个镜像创建储存库
- aws ecr get-login-password生成的凭证有效期12小时,可使用定时任务每天登录2次解决