vi /etc/kubernetes/manifests/kube-apiserver.yaml
......
spec:
containers:
- command:
- kube-apiserver
- --advertise-address=192.168.10.65
- --allow-privileged=true
- --authorization-mode=Node,RBAC
- --client-ca-file=/etc/kubernetes/pki/ca.crt
- --enable-admission-plugins=NodeRestriction
- --enable-bootstrap-token-auth=true
- --etcd-cafile=/etc/kubernetes/pki/etcd/ca.crt
- --etcd-certfile=/etc/kubernetes/pki/apiserver-etcd-client.crt
- --etcd-keyfile=/etc/kubernetes/pki/apiserver-etcd-client.key
- --etcd-servers=https://127.0.0.1:2379
- --kubelet-client-certificate=/etc/kubernetes/pki/apiserver-kubelet-client.crt
- --kubelet-client-key=/etc/kubernetes/pki/apiserver-kubelet-client.key
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.crt
- --proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client.key
- --requestheader-allowed-names=front-proxy-client
- --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
- --requestheader-extra-headers-prefix=X-Remote-Extra-
- --requestheader-group-headers=X-Remote-Group
- --requestheader-username-headers=X-Remote-User
- --secure-port=6443
- --service-account-issuer=https://kubernetes.default.svc.cluster.local
- --service-account-key-file=/etc/kubernetes/pki/sa.pub
- --service-account-signing-key-file=/etc/kubernetes/pki/sa.key
- --service-cluster-ip-range=10.96.0.0/12
- --tls-cert-file=/etc/kubernetes/pki/apiserver.crt
- --tls-private-key-file=/etc/kubernetes/pki/apiserver.key
- --feature-gates=RemoveSelfLink=false
# 添加如下两行配置
- --default-not-ready-toleration-seconds=40
- --default-unreachable-toleration-seconds=20
......
重启 kubelet
systemctl daemon-reload
systemctl restart kubelet
注意:修改的参数只对之后创建的 pod 生效,旧的 pod 不生效
[root@master ~]#kubectl get nodes
NAME STATUS ROLES AGE VERSION
master Ready control-plane,master 46d v1.22.5
node01 Ready <none> 46d v1.22.5
node02 Ready <none> 46d v1.22.5
[root@master ~]#kubectl get pods -o wide -n test
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
open-test-59fd6fdfc4-2tmlj 1/1 Running 0 27m 10.244.2.78 node02 <none> <none>
open-test-59fd6fdfc4-6ft4b 1/1 Running 0 26m 10.244.0.64 master <none> <none>
open-test-59fd6fdfc4-drcph 1/1 Running 0 27m 10.244.2.81 node02 <none> <none>
open-test-59fd6fdfc4-k85jj 1/1 Running 0 26m 10.244.2.82 node02 <none> <none>
open-test-59fd6fdfc4-snk8l 1/1 Running 0 27m 10.244.2.79 node02 <none> <none>
open-test-59fd6fdfc4-t2lk5 1/1 Running 0 27m 10.244.0.60 master <none> <none>
open-test-59fd6fdfc4-vz4tb 1/1 Running 0 27m 10.244.2.80 node02 <none> <none>
open-test-59fd6fdfc4-w2dq7 1/1 Running 0 27m 10.244.0.61 master <none> <none>
open-test-59fd6fdfc4-xlkdm 1/1 Running 0 26m 10.244.0.63 master <none> <none>
open-test-59fd6fdfc4-xt5b8 1/1 Running 0 27m 10.244.0.62 master <none> <none>
目前服务在 master 和 node02 上,关停 node02,观察多长时间 node02 节点 NotReady,多长时间 pod 开始漂移到其他节点
# 10.37
[root@master ~]#kubectl get nodes
NAME STATUS ROLES AGE VERSION
master Ready control-plane,master 46d v1.22.5
node01 Ready <none> 46d v1.22.5
node02 Ready <none> 46d v1.22.5
# 10.37 我秒表测试大概 40s 左右
[root@master ~]#kubectl get nodes
NAME STATUS ROLES AGE VERSION
master Ready control-plane,master 46d v1.22.5
node01 Ready <none> 46d v1.22.5
node02 NotReady <none> 46d v1.22.5
# 10.38 一分钟左右 pod 开始漂移
[root@master ~]#kubectl get pods -o wide -n test
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
open-test-59fd6fdfc4-2c52v 1/1 Running 0 12s 10.244.1.63 node01 <none> <none>
open-test-59fd6fdfc4-2tmlj 1/1 Terminating 0 35m 10.244.2.78 node02 <none> <none>
open-test-59fd6fdfc4-6ft4b 1/1 Running 0 33m 10.244.0.64 master <none> <none>
open-test-59fd6fdfc4-bc2bb 1/1 Running 0 12s 10.244.1.67 node01 <none> <none>
open-test-59fd6fdfc4-drcph 1/1 Terminating 0 35m 10.244.2.81 node02 <none> <none>
open-test-59fd6fdfc4-k85jj 1/1 Terminating 0 33m 10.244.2.82 node02 <none> <none>
open-test-59fd6fdfc4-mnz6v 1/1 Running 0 12s 10.244.1.64 node01 <none> <none>
open-test-59fd6fdfc4-snk8l 1/1 Terminating 0 35m 10.244.2.79 node02 <none> <none>
open-test-59fd6fdfc4-srxx5 1/1 Running 0 12s 10.244.1.65 node01 <none> <none>
open-test-59fd6fdfc4-t2lk5 1/1 Running 0 35m 10.244.0.60 master <none> <none>
open-test-59fd6fdfc4-v4ntz 1/1 Running 0 12s 10.244.1.66 node01 <none> <none>
open-test-59fd6fdfc4-vz4tb 1/1 Terminating 0 35m 10.244.2.80 node02 <none> <none>
open-test-59fd6fdfc4-w2dq7 1/1 Running 0 35m 10.244.0.61 master <none> <none>
open-test-59fd6fdfc4-xlkdm 1/1 Running 0 33m 10.244.0.63 master <none> <none>
open-test-59fd6fdfc4-xt5b8 1/1 Running 0 35m 10.244.0.62 master <none> <none>
官方默认的参数:节点大概 1m 会变成 NotReady,5m 左右开始 Pod 发生漂移,修改上述参数后速度明显变快。需要注意的是节点 NotReady 时间,Pod 漂移时间和集群规模,网络延迟等都有影响,不能一概而论。
toleration-custom.yaml
spec:
template:
spec:
tolerations:
- effect: NoExecute
key: node.kubernetes.io/not-ready
operator: Exists
tolerationSeconds: 40
- effect: NoExecute
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 20
使用 patch 的方式添加到指定 deployment 文件
kubectl patch deploy nginx-check --patch "$(cat toleration-custom.yaml)"
这是只对需要的 pod 应用修改参数,kube-apiserver.yaml 是全局设置