• CentOS 7 安装 kubernetes1.24.x 保姆教程


    基础环境

    升级内核

    # 启用 ELRepo 仓库
    sudo rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
    sudo rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-6.el7.elrepo.noarch.rpm
    
    # 查看可用的系统内核包
    sudo yum --disablerepo="*" --enablerepo="elrepo-kernel" list available
    # lt:长期支持版本;ml:主线稳定版
    # kernel-lt.x86_64                                                               5.4.212-1.el7.elrepo                                              elrepo-kernel
    # kernel-lt-tools-libs.x86_64                                                    5.4.212-1.el7.elrepo                                              elrepo-kernel
    # kernel-lt-tools-libs-devel.x86_64                                              5.4.212-1.el7.elrepo                                              elrepo-kernel
    # kernel-ml.x86_64                                                               5.19.8-1.el7.elrepo                                               elrepo-kernel
    # kernel-ml-tools-libs.x86_64                                                    5.19.8-1.el7.elrepo                                               elrepo-kernel
    # kernel-ml-tools-libs-devel.x86_64                                              5.19.8-1.el7.elrepo                                               elrepo-kernel
    
    # 安装
    sudo yum --enablerepo=elrepo-kernel install kernel-ml
    # 查看系统上的所有可以内核
    sudo awk -F\' '$1=="menuentry " {print i++ " : " $2}' /etc/grub2.cfg
    # 以下两种方式均可以设置默认使用的内核
    sudo grub2-set-default 0
    # 或者编辑 /etc/default/grub 并写入以下内容(不包括 >)
    > GRUB_TIMEOUT=5
    > GRUB_DISTRIBUTOR="$(sed 's, release .*$,,g' /etc/system-release)"
    > GRUB_DEFAULT=0
    > GRUB_DISABLE_SUBMENU=true
    > GRUB_TERMINAL_OUTPUT="console"
    > GRUB_CMDLINE_LINUX="crashkernel=auto console=ttyS0 console=tty0 panic=5"
    > GRUB_DISABLE_RECOVERY="true"
    > GRUB_TERMINAL="serial console"
    > GRUB_TERMINAL_OUTPUT="serial console"
    > GRUB_SERIAL_COMMAND="serial --speed=9600 --unit=0 --word=8 --parity=no --stop=1"
    
    # 生成 grub 配置文件并重启
    sudo grub2-mkconfig -o /boot/grub2/grub.cfg
    sudo reboot
    
    # 重启后验证
    uname -a
    
    # (可选)删除旧内核
    # 查看现有内核
    sudo rpm -qa | grep kernel
    sudo yum remove kernel-tools-libs-3.10.0-1160.el7.x86_64 kernel-3.10.0-1160.el7.x86_64 kernel-tools-3.10.0-1160.el7.x86_64
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43

    设置 yum 源

    # 系统源替换为阿里源
    sudo curl -o /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo
    # 添加 kubernetes 源。需提前切换为管理员权限
    cat <<EOF > /etc/yum.repos.d/kubernetes.repo
    [kubernetes]
    name=Kubernetes
    baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
    enabled=1
    gpgcheck=0
    repo_gpgcheck=0
    EOF
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    安装必要工具

    sudo yum install wget jq psmisc vim net-tools telnet yum-utils device-mapper-persistent-data lvm2 git -y
    
    • 1

    系统设置

    # 关闭防火墙
    sudo systemctl disable --now firewalld 
    sudo systemctl disable --now dnsmasq
    sudo systemctl disable --now NetworkManager
    
    # 关闭 selinux
    sudo setenforce 0
    sudo sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/sysconfig/selinux
    sudo sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/selinux/config
    
    # 关闭交换分区
    sudo swapoff -a && sysctl -w vm.swappiness=0
    sudo sed -ri '/^[^#]*swap/s@^@#@' /etc/fstab
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13

    (可选)时间同步

    让所有集群保持相同的时间,防止因为时间误差导致互联互通失败

    # 安装 ntpdate
    sudo rpm -ivh http://mirrors.wlnmp.com/centos/wlnmp-release-centos.noarch.rpm
    sudo yum install ntpdate -y
    
    #
    sudo ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
    sudo echo 'Asia/Shanghai' >/etc/timezone
    sudo ntpdate time2.aliyun.com
    # 加入定时任务到 crontab 文件
    */5 * * * * /usr/sbin/ntpdate time2.aliyun.com
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10

    配置 limit 参数,防止资源不够用

    sudo ulimit -SHn 65535
    sudo vim /etc/security/limits.conf
    # 末尾添加如下内容
    * soft nofile 65536
    * hard nofile 131072
    * soft nproc 65535
    * hard nproc 655350
    * soft memlock unlimited
    * hard memlock unlimited
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9

    安装并配置 ipvs

    sudo yum install ipvsadm ipset sysstat conntrack libseccomp -y
    
    sudo modprobe -- ip_vs
    sudo modprobe -- ip_vs_rr
    sudo modprobe -- ip_vs_wrr
    sudo modprobe -- ip_vs_sh
    sudo modprobe -- nf_conntrack
    
    vim /etc/modules-load.d/ipvs.conf 
    # 加入以下内容
    ip_vs
    ip_vs_lc
    ip_vs_wlc
    ip_vs_rr
    ip_vs_wrr
    ip_vs_lblc
    ip_vs_lblcr
    ip_vs_dh
    ip_vs_sh
    ip_vs_fo
    ip_vs_nq
    ip_vs_sed
    ip_vs_ftp
    ip_vs_sh
    nf_conntrack
    ip_tables
    ip_set
    xt_set
    ipt_set
    ipt_rpfilter
    ipt_REJECT
    ipip
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
    overlay
    br_netfilter
    EOF
    
    • 1
    • 2
    • 3
    • 4
    sudo modprobe overlay
    sudo modprobe br_netfilter
    sudo systemctl enable --now systemd-modules-load.service
    
    • 1
    • 2
    • 3

    内核参数设置

    cat <<EOF > /etc/sysctl.d/k8s.conf
    net.ipv4.ip_forward = 1
    net.bridge.bridge-nf-call-iptables = 1
    net.bridge.bridge-nf-call-ip6tables = 1
    fs.may_detach_mounts = 1
    net.ipv4.conf.all.route_localnet = 1
    vm.overcommit_memory=1
    vm.panic_on_oom=0
    fs.inotify.max_user_watches=89100
    fs.file-max=52706963
    fs.nr_open=52706963
    net.netfilter.nf_conntrack_max=2310720
    
    net.ipv4.tcp_keepalive_time = 600
    net.ipv4.tcp_keepalive_probes = 3
    net.ipv4.tcp_keepalive_intvl =15
    net.ipv4.tcp_max_tw_buckets = 36000
    net.ipv4.tcp_tw_reuse = 1
    net.ipv4.tcp_max_orphans = 327680
    net.ipv4.tcp_orphan_retries = 3
    net.ipv4.tcp_syncookies = 1
    net.ipv4.tcp_max_syn_backlog = 16384
    net.ipv4.ip_conntrack_max = 65536
    net.ipv4.tcp_max_syn_backlog = 16384
    net.ipv4.tcp_timestamps = 0
    net.core.somaxconn = 16384
    EOF
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    sudo sysctl --system
    sudo reboot
    
    • 1
    • 2

    docker

    安装 docker

    安装 docker 请参照官网

    sudo systemctl start docker
    sudo systemctl enable docker
    
    # 修改 cgroup
    cat > /etc/docker/daemon.json <<EOF
    {
      "exec-opts": ["native.cgroupdriver=systemd"]
    }
    EOF
    # 让配置生效
    sudo systemctl daemon-reload
    sudo systemctl restart docker
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12

    安装 cri-dockerd

    参照 https://github.com/Mirantis/cri-dockerd

    如果使用 go 编译,可以使用 https://goproxy.cn/ 作为依赖镜像。

    安装 kubeadm

    # 查看 kubeadm 版本
    yum list kubeadm.x86_64 --showduplicates
    # 安装最新 1.24 版
    sudo yum install -y kubelet-1.24* kubeadm-1.24* kubectl-1.24*
    # 设置开机启动
    sudo systemctl enable --now kubelet
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6

    初始化 kubeadm

    hostnamectl set-hostname k8s-master
    # 修改 /etc/hosts 添加如下内容
    <ip> k8s-master
    
    # 初始化
    kubeadm init --cri-socket unix:///var/run/cri-dockerd.sock --image-repository registry.cn-hangzhou.aliyuncs.com/google_containers --pod-network-cidr=172.20.0.0/16 --service-cidr=172.21.0.0/16 --kubernetes-version=1.24.8
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6

    也可以使用如下 kubeadm-config.yaml 文件

    apiVersion: kubeadm.k8s.io/v1beta3
    kind: InitConfiguration
    nodeRegistration:
      criSocket: "unix:///var/run/cri-dockerd.sock"
    ---
    apiVersion: kubeadm.k8s.io/v1beta3
    kind: ClusterConfiguration
    kubernetesVersion: v1.24.8
    networking:
      serviceSubnet: 172.18.0.0/16
      podSubnet: 172.19.0.0/16
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    命令可以简化为:

    kubeadm init --config kubeadm-config.yaml --image-repository registry.cn-hangzhou.aliyuncs.com/google_containers
    
    • 1

    错误修复

    Unfortunately, an error has occurred:
            timed out waiting for the condition
    
    This error is likely caused by:
            - The kubelet is not running
            - The kubelet is unhealthy due to a misconfiguration of the node in some way (required cgroups disabled)
    
    If you are on a systemd-powered system, you can try to troubleshoot the error with the following commands:
            - 'systemctl status kubelet'
            - 'journalctl -xeu kubelet'
    
    Additionally, a control plane component may have crashed or exited when started by the container runtime.
    To troubleshoot, list all containers using your preferred container runtimes CLI.
    Here is one example how you may list all running Kubernetes containers by using crictl:
            - 'crictl --runtime-endpoint unix:///var/run/cri-dockerd.sock ps -a | grep kube | grep -v pause'
            Once you have found the failing container, you can inspect its logs with:
            - 'crictl --runtime-endpoint unix:///var/run/cri-dockerd.sock logs CONTAINERID'
    error execution phase wait-control-plane: couldn't initialize a Kubernetes cluster
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18

    发生上述错误时,可以按照提示使用 journalctl -xeu kubelet 查看错误信息。一般这种错误有两种常见原因:

    1. docker 没有关闭 cgroup。可以参照前面安装 docker 的内容设置成 systemd
    2. kubelet 默认使用了 k8s.gcr.io 源的镜像,下载失败导致。

    本人写此 demo 时,遇到的是第二个问题。通过 journalctl -xeu kubelet 查看日志为:

    Sep 12 06:42:57 k8s-master kubelet[4391]: E0912 06:42:57.628841    4391 remote_runtime.go:212] "RunPodSandbox from runtime service failed" err="rpc error: code = Unknown desc = failed pulling image \"k8s.gcr.io/pause:3.6\": Error response from daemon: Get \"https://k8s.gcr.io/v2/\": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)"
    Sep 12 06:42:57 k8s-master kubelet[4391]: E0912 06:42:57.629112    4391 kuberuntime_sandbox.go:70] "Failed to create sandbox for pod" err="rpc error: code = Unknown desc = failed pulling image \"k8s.gcr.io/pause:3.6\": Error response from daemon: Get \"https://k8s.gcr.io/v2/\": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)" pod="kube-system/kube-scheduler-k8s-master"
    
    • 1
    • 2

    解决方法如下:

    kubeadm reset -f --cri-socket unix:///var/run/cri-dockerd.sock
    docker image pull registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.6
    docker image tag registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.6 k8s.gcr.io/pause:3.6
    
    # 重新执行初始化命令
    
    • 1
    • 2
    • 3
    • 4
    • 5

    子节点加入

    初始化成功后,控制台会打印两个信息:

    1. 如果使用非 root 账户使用 k8s
    2. 子节点接入 k8s 的命令

    因为子节点加入网络的 token 有有效期,后续有新的节点加入时,可以通过如下命令生成新的 token

    kubeadm token create --print-join-command
    
    • 1

    安装 Calico

    按照官网指导安装即可。为了提高安装速度,下面这几个镜像可以提前下载

    quay.io/tigera/operator
    calico/kube-controllers
    calico/apiserver
    calico/cni
    calico/node-driver-registrar
    calico/csi
    calico/pod2daemon-flexvol
    calico/node
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
  • 相关阅读:
    DataGridView可以点击列排序 Sort(使用BindingList改写)
    gradle 改为 国内镜像 下载
    CentOS安装指定版本的Docker(包括卸载)
    npm install报错 code:128
    数据库安全定义以及重要性简单讲解
    38.迪杰斯特拉(Dijkstra)算法
    使用HTML制作静态网站作业——我的校园运动会(HTML+CSS)
    原来定时器中断是个伪中断
    mysql中的date、datetime、timestamp你还不知道怎么使用吗
    Java集合
  • 原文地址:https://blog.csdn.net/kaiyuanheshang/article/details/126789556