环境:centos7.6 docker-ce-20.10.9 kubernetes-version v1.22.17
本篇来讲解如何在centos下安装部署高可用k8s集群
#准备5台服务器,角色分配如下
192.168.100.23 master01、etcd01、keepalived+nginx(vip:192.168.100.200)
192.168.100.24 master02、etcd02、keepalived+nginx(vip:192.168.100.200)
192.168.100.25 master03、etcd03、keepalived+nginx(vip:192.168.100.200)
192.168.100.26 node01
192.168.100.27 node02
keepalived+nginx实现高可用+反向代理,这里为了节约服务器,将keepalived+nginx部署在master节点上。
keepalived会虚拟一个vip,vip任意绑定在一台master节点上,使用nginx对3台master节点进行反向代理。在初始化k8s集群的使用,IP填写的vip,这样安装好k8s集群之后,kubectl客户端而言,访问的vip:16443端口,该端口是nginx监听的端口,nginx会进行反向代理到3个master节点上的6443端口。
#yum install ntp -y && systemctl start ntpd && systemctl enable ntpd;
yum install chrony -y && systemctl enable --now chronyd
yum install epel-release -y && yum install jq -y
yum install vim lsof net-tools zip unzip tree wget curl bash-completion pciutils gcc make lrzsz tcpdump bind-utils -y
sed -ri 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
setenforce 0
echo "检查是否关闭selinux:";getenforce && grep 'SELINUX=disabled' /etc/selinux/config
systemctl stop firewalld.service && systemctl disable firewalld.service
echo "检查是否关闭防火墙:";systemctl status firewalld.service | grep -E 'Active|disabled'
sed -ri 's/.*swap.*/#&/' /etc/fstab
swapoff -a
echo "检查swap是否关闭:";grep -i 'swap' /etc/fstab;free -h | grep -i 'swap'
systemctl stop NetworkManager.service && systemctl disable NetworkManager.service
echo "检查是否关闭NetworkManager:";systemctl status NetworkManager.service | grep -E 'Active|disabled'
#每台主机设置自己的主机名
hostnamectl set-hostname master01
hostnamectl set-hostname master02
hostnamectl set-hostname master03
hostnamectl set-hostname node01
hostnamectl set-hostname node02
#写入/etc/hosts文件
cat >> /etc/hosts <<EOF
192.168.100.23 master01
192.168.100.24 master02
192.168.100.25 master03
192.168.100.26 node01
192.168.100.27 node02
EOF
3台master节点都要安装keepalived软件:
#3台master节点操作
yum install keepalived -y
cp /etc/keepalived/keepalived.conf /etc/keepalived/keepalived.conf_bak
#keepalived配置文件的参数含义可以参考:https://blog.csdn.net/MssGuo/article/details/127330115
#master01节点的keepalived配置文件内容
#这里配置Keepalived监听1644端口或nginx挂掉的情况,有需要自行添加即可
[root@master01 ~]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
vrrp_strict
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_instance VI_1 {
state MASTER
interface ens192
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.100.200
}
}
#master02节点的keepalived配置文件内容
[root@master02 ~]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
vrrp_strict
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_instance VI_1 {
state BACKUP
interface ens192
virtual_router_id 51
priority 60
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.100.200
}
}
#master03节点的keepalived配置文件内容
[root@master03 ~]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
vrrp_strict
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_instance VI_1 {
state BACKUP
interface ens192
virtual_router_id 51
priority 40
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.100.200
}
}
#3台依次启动keepalived
systemctl start keepalived.service && systemctl enable keepalived.service
systemctl status keepalived.service
#查看vip,发现vip现在是在master01上,master02和master03均没有vip
ip a | grep '192.168.100.200'
#检测vip是否会漂移,关闭master01节点的keepalived
systemctl stop keepalived.service
#这时发现vip漂移到了master02上,master01和master03均没有vip
#重启keepalived服务之后vip又回到了master01节点,因为默认配置的是vip抢占模式,符合设计逻辑
在3台master节点上nginx软件:
#nginx需要用到pcre库,pcre库全称是Perl compatible regular expressions ,翻译为Perl兼容正则表达式,是为了
#支持Nginx具备URL重写#rewrite模块,若不安装pcre库,则Nginx无法使用rewrite模块。
#安装nginx的依赖
yum -y install gcc gcc-c++ make pcre pcre-devel zlib-devel zlib openssl-devel openssl
#参照官网安装nginx,官网地址:http://nginx.org/en/linux_packages.html#RHEL
yum install yum-utils
cat >/etc/yum.repos.d/nginx.repo<<'EOF'
[nginx-stable]
name=nginx stable repo
baseurl=http://nginx.org/packages/centos/$releasever/$basearch/
gpgcheck=1
enabled=1
gpgkey=https://nginx.org/keys/nginx_signing.key
module_hotfixes=true
[nginx-mainline]
name=nginx mainline repo
baseurl=http://nginx.org/packages/mainline/centos/$releasever/$basearch/
gpgcheck=1
enabled=0
gpgkey=https://nginx.org/keys/nginx_signing.key
module_hotfixes=true
EOF
yum-config-manager --enable nginx-mainline
yum install nginx -y
#注意:nginx配置为4四层反向代理,配置7层反向代理的好像协议方面存在问题,暂未解决,配置4层就没有问题
#直接修改主配置文件,添加下面的这段stream内容
[root@master01 nginx]# cat /etc/nginx/nginx.conf
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log notice;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
#添加了stream 这一段,其他的保持默认即可
stream {
log_format main '$remote_addr $upstream_addr - [$time_local] $status $upstream_bytes_sent';
access_log /var/log/nginx/k8s-access.log main;
upstream k8s-apiserver {
server 192.168.100.23:6443; #master01的IP和6443端口
server 192.168.100.24:6443; #master02的IP和6443端口
server 192.168.100.25:6443; #master03的IP和6443端口
}
server {
listen 16443; #监听的是16443端口,因为nginx和master复用机器,所以不能是6443端口
proxy_pass k8s-apiserver; #使用proxy_pass模块进行反向代理
}
}
#http模块保持默认即可
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
#tcp_nopush on;
keepalive_timeout 65;
#gzip on;
include /etc/nginx/conf.d/*.conf;
}
[root@master01 nginx]#
systemctl enable --now nginx
systemctl status nginx
netstat -lntup| grep 16443
#将nginx配置文件发送到master02、master03
scp /etc/nginx/nginx.conf root@master02:/etc/nginx/
scp /etc/nginx/nginx.conf root@master03:/etc/nginx/
#同样启动master02、master03上的nginx
systemctl enable --now nginx
systemctl status nginx
netstat -lntup| grep 16443
#master节点和node节点都要配置
touch /etc/sysctl.d/k8s.conf
cat >> /etc/sysctl.d/k8s.conf <<EOF
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-iptables=1
net.ipv4.ip_forward=1
vm.swappiness=0
EOF
sysctl --system
#配置k8s的yum源,master节点和node节点都要配置
cat >/etc/yum.repos.d/kubernetes.repo <<'EOF'
[kubernetes]
name = Kubernetes
baseurl = https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled = 1
gpgcheck = 0
repo_gpgcheck = 0
gpgkey = https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
每台k8s节点都要安装docker:
#在所有k8s节点上,包含master节点和node节点上都要安装docker
yum remove docker \
docker-client \
docker-client-latest \
docker-common \
docker-latest \
docker-latest-logrotate \
docker-logrotate \
docker-engine \
docker-ce
yum install -y yum-utils
yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
yum list docker-ce --showduplicates | sort -r
#yum -y install docker-ce docker-ce-cli containerd.io
#安装docker-ce-20.10而不是安装最新的docker版本,因为k8s 1.22.17不一定支持最新的docker版本
yum -y install docker-ce-20.10.9 docker-ce-cli-20.10.9 containerd.io
mkdir /etc/docker/
cat>> /etc/docker/daemon.json <<'EOF'
{
"registry-mirrors": ["https://ghj8urvv.mirror.aliyuncs.com"],
"exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
#注意,上面这两句是添加镜像加速器地址和修改docker的cgroupdriver为systemd,镜像加速器可以去阿里云获取
#每个人的阿里云账号里面的镜像加速器都是不同的,不要使用我这个,当然也可以不配置镜像加速器
systemctl enable --now docker
systemctl status docker
#检查加速器配置和cgroup是否配置成功
docker info |grep 'Cgroup Driver' ;docker info | grep -A 1 'Registry Mirrors'
#master节点和node节点都安装kubeadm、kubelet、kubectl
yum list --showduplicates | grep kubeadm
#正常情况下kubectl只是master节点安装,但是这里因为如果不安装kubectl的话yum会默认作为依赖安装,而安装的版本可能不是1.22.17
#所以干脆所以节点都安装了
yum -y install kubelet-1.22.17 kubeadm-1.22.17 kubectl-1.22.17
systemctl enable kubelet
#仅在master01节点执行初始化
#注意
#apiserver-advertise-address设置master01本机的ip地址
#apiserver-bind-port是api-server的6443端口,默认也是6443端口
#control-plane-endpoint设置为vip+nginx的端口
#可以使用kubeadm init --help查看一下命令帮助
#模拟执行,这里加了--dry-run只是模拟执行看看有没有报错,并未真正安装
kubeadm init \
--apiserver-advertise-address=192.168.100.23 \
--apiserver-bind-port=6443 \
--control-plane-endpoint=192.168.100.200:16443 \
--image-repository registry.aliyuncs.com/google_containers \
--kubernetes-version v1.22.17 \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16 --dry-run
#如何输出没有报错,去掉--dry-run参数,开始真正执行:
kubeadm init \
--apiserver-advertise-address=192.168.100.23 \
--apiserver-bind-port=6443 \
--control-plane-endpoint=192.168.100.200:16443 \
--image-repository registry.aliyuncs.com/google_containers \
--kubernetes-version v1.22.17 \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16
#这时再开一个终端执行docker images就可以看到拉取了很多k8s的镜像
#如果报错了,需要排查错误,然后清空环境
kubeadm reset
rm -rf /etc/cni
iptables -F
yum install ipvsadm -y
ipvsadm --clear
rm -rf $HOME/.kube/config
#然后重新执行kubeadm init命令初始化即可
#最终初始成功的后会输出以下信息
.......................................
[addons] Applied essential addon: kube-proxy
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:
kubeadm join 192.168.100.200:16443 --token x1v36a.lqe5ul9zpzx55b10 \
--discovery-token-ca-cert-hash sha256:869a5df85403ce519a47b6444dd120d88feccbf54356e510dc3c09f55a76f678 \
--control-plane
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.100.200:16443 --token x1v36a.lqe5ul9zpzx55b10 \
--discovery-token-ca-cert-hash sha256:869a5df85403ce519a47b6444dd120d88feccbf54356e510dc3c09f55a76f678
[root@master01 nginx]#
#按照上面的信息提示,对应的步骤即可
#上面初始化完成master01节点之后会提示你在master节点或node节点执行对应的命令来将master节点或node节点加入k8s集群
#注意:这段kubeamd join命令的token只有24h,24h就过期,需要执行kubeadm token create --print-join-command 重新生成token,但是
#要注意,重新生成的加入集群命令默认是node节点角色加入的,如果新节点是作为master角色加入集群,需要在打印出来的命令后面添加--control-plane 参数再执行。
#首先需要在master02和master03上下载镜像
#可以在master01上看看需要下载哪些镜像
[root@master01 ~]# docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
registry.aliyuncs.com/google_containers/kube-apiserver v1.22.17 2b5e9c96248f 9 months ago 128MB
registry.aliyuncs.com/google_containers/kube-controller-manager v1.22.17 c7ab721dfdae 9 months ago 122MB
registry.aliyuncs.com/google_containers/kube-scheduler v1.22.17 d4893b67e97f 9 months ago 52.7MB
registry.aliyuncs.com/google_containers/kube-proxy v1.22.17 77c8bfac1781 9 months ago 104MB
registry.aliyuncs.com/google_containers/etcd 3.5.6-0 fce326961ae2 9 months ago 299MB
registry.aliyuncs.com/google_containers/coredns v1.8.4 8d147537fb7d 2 years ago 47.6MB
registry.aliyuncs.com/google_containers/pause 3.5 ed210e3e4a5b 2 years ago 683kB
[root@master01 ~]#
#然后去master02和master03上下载这些镜像即可
docker pull registry.aliyuncs.com/google_containers/kube-apiserver:v1.22.17
docker pull registry.aliyuncs.com/google_containers/kube-controller-manager:v1.22.17
docker pull registry.aliyuncs.com/google_containers/kube-scheduler:v1.22.17
docker pull registry.aliyuncs.com/google_containers/kube-proxy:v1.22.17
docker pull registry.aliyuncs.com/google_containers/etcd:3.5.6-0
docker pull registry.aliyuncs.com/google_containers/coredns:v1.8.4
docker pull registry.aliyuncs.com/google_containers/pause:3.5
#master02、master03节点上创建目录
mkdir /etc/kubernetes/pki/etcd -p
# 在master01节点上,将master01节点上的证书拷贝到master02、master03节点上
scp -rp /etc/kubernetes/pki/ca.* master02:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/sa.* master02:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/front-proxy-ca.* master02:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/etcd/ca.* master02:/etc/kubernetes/pki/etcd/
scp -rp /etc/kubernetes/admin.conf master02:/etc/kubernetes/
scp -rp /etc/kubernetes/pki/ca.* master03:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/sa.* master03:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/front-proxy-ca.* master03:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/etcd/ca.* master03:/etc/kubernetes/pki/etcd/
scp -rp /etc/kubernetes/admin.conf master03:/etc/kubernetes/
#由上面初始成功的信息提示,复制粘贴命令到master02、master03节点执行即可
kubeadm join 192.168.100.200:16443 --token x1v36a.lqe5ul9zpzx55b10 \
--discovery-token-ca-cert-hash sha256:869a5df85403ce519a47b6444dd120d88feccbf54356e510dc3c09f55a76f678 \
--control-plane
#执行成功如下,安装提示操作即可
[mark-control-plane] Marking the node master02 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
This node has joined the cluster and a new control plane instance was created:
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
[root@master02 pki]# mkdir -p $HOME/.kube
[root@master02 pki]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master02 pki]# sudo chown $(id -u):$(id -g) $HOME/.kube/config
#node节点直接执行命令即可,不需要做什么配置
#在node01、node02节点执行下面命令
kubeadm join 192.168.100.200:16443 --token x1v36a.lqe5ul9zpzx55b10 \
--discovery-token-ca-cert-hash sha256:869a5df85403ce519a47b6444dd120d88feccbf54356e510dc3c09f55a76f678
以上,就创建了3个master节点+2个node节点的k8s集群,在任意一个master节点检查集群:
[root@master01 ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
master01 NotReady control-plane,master 50m v1.22.17
master02 NotReady control-plane,master 6m58s v1.22.17
master03 NotReady control-plane,master 6m10s v1.22.17
node01 NotReady <none> 39s v1.22.17
node02 NotReady <none> 12s v1.22.17
[root@master01 ~]#
[root@master01 ~]# kubectl config view
apiVersion: v1
clusters:
- cluster:
certificate-authority-data: DATA+OMITTED
server: https://192.168.100.200:16443 #可以看到,监听在vip和16443端口上
name: kubernetes
contexts:
- context:
cluster: kubernetes
user: kubernetes-admin
name: kubernetes-admin@kubernetes
current-context: kubernetes-admin@kubernetes
kind: Config
preferences: {}
users:
- name: kubernetes-admin
user:
client-certificate-data: DATA+OMITTED
client-key-data: DATA+OMITTED
#安装flannel网络
wget https://raw.githubusercontent.com/flannel-io/flannel/master/Documentation/kube-flannel.yml
kubectl apply -f kube-flannel.yml
[root@master01 nginx]# kubectl get pod -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-flannel kube-flannel-ds-6tzzk 1/1 Running 0 5m43s
kube-flannel kube-flannel-ds-8n6nc 1/1 Running 0 5m43s
kube-flannel kube-flannel-ds-8rtgx 1/1 Running 0 5m43s
kube-flannel kube-flannel-ds-bwwrv 1/1 Running 0 5m43s
kube-flannel kube-flannel-ds-nmbzq 1/1 Running 0 5m43s
kube-system coredns-7f6cbbb7b8-mf22c 1/1 Running 0 60m
kube-system coredns-7f6cbbb7b8-n2w94 1/1 Running 0 60m
kube-system etcd-master01 1/1 Running 4 60m
kube-system etcd-master02 1/1 Running 0 17m
kube-system etcd-master03 1/1 Running 0 16m
kube-system kube-apiserver-master01 1/1 Running 4 60m
kube-system kube-apiserver-master02 1/1 Running 0 17m
kube-system kube-apiserver-master03 1/1 Running 1 (16m ago) 16m
kube-system kube-controller-manager-master01 1/1 Running 5 (17m ago) 60m
kube-system kube-controller-manager-master02 1/1 Running 0 17m
kube-system kube-controller-manager-master03 1/1 Running 0 15m
kube-system kube-proxy-6lzs9 1/1 Running 0 11m
kube-system kube-proxy-9tljk 1/1 Running 0 17m
kube-system kube-proxy-jzq49 1/1 Running 0 60m
kube-system kube-proxy-mk5w8 1/1 Running 0 10m
kube-system kube-proxy-rhmnv 1/1 Running 0 16m
kube-system kube-scheduler-master01 1/1 Running 5 (17m ago) 60m
kube-system kube-scheduler-master02 1/1 Running 0 17m
kube-system kube-scheduler-master03 1/1 Running 0 16m
[root@master01 nginx]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 Ready control-plane,master 61m v1.22.17
master02 Ready control-plane,master 17m v1.22.17
master03 Ready control-plane,master 16m v1.22.17
node01 Ready <none> 11m v1.22.17
node02 Ready <none> 11m v1.22.17
[root@master01 nginx]#
#每个节点都配置docker命令自动补全功能
yum install bash-completion -y
curl -L https://raw.githubusercontent.com/docker/compose/1.24.1/contrib/completion/bash/docker-compose -o /etc/bash_completion.d/docker-compose
source /etc/bash_completion.d/docker-compose
#master节点配置kubectl命令补全功能
yum install -y bash-completion
echo 'source /usr/share/bash-completion/bash_completion' >>/root/.bashrc
echo 'source <(kubectl completion bash)' >>/root/.bashrc
source /root/.bashrc
kubectl create deployment httpd --image=httpd
kubectl expose deployment httpd --port=80 --type=NodePort
#验证正常
[root@master01 nginx]# kubectl get svc httpd
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
httpd NodePort 10.106.207.85 <none> 80:30251/TCP 4s
[root@master01 nginx]# curl master02:30251
<html><body><h1>It works!</h1></body></html>
[root@master01 nginx]#
#把master01节点关机测试
#发现关掉任意一台master节点,k8s集群master节点kubectl get nodes 时行时不行,原因未知,不知是否与etcd有关,因为etcd都是安装在
#master节点上,也有可能是nginx仍然把请求发送给关机的master节点导致无法响应。
如果服务器足够,建议将keepalived+nginx单独准备两台服务器,如下:
#准备8台服务器,角色分配如下
192.168.100.21 keepalived+nginx(vip:192.168.100.200)
192.168.100.22 keepalived+nginx(vip:192.168.100.200)
192.168.100.23 master01、etcd01
192.168.100.24 master02、etcd02
192.168.100.25 master03、etcd03
192.168.100.26 node01
192.168.100.27 node02
其余安装步骤与上面相识。