• 一键部署k8s集群


    前置动作

    关闭防火墙

    systemctl disable firewalld && systemctl stop firewalld

    关闭SELinux

    1. sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/selinux/config && grep 'SELINUX=disabled' /etc/selinux/config
    2. setenforce 0
    3. getenforce

     关闭swap

    1. # 关闭swap
    2. swapoff -a
    3. # 永久关闭swap
    4. sed -ri 's/.*swap.*/#&/' /etc/fstab

    部署k8s集群

    机器至少配置

    序号

    类型

    主机名

    IP

    备注(CPU/内存/硬盘)

    1

    Mater

    k8s-api.bcs.local

    192.168.46.128

    8C16G,100G

    2

    Node1

    node-192-168-46-129

    192.168.46.129

    4C8G,100G

    3

    Node2

    node-192-168-46-130

    192.168.46.130

    4C8G,100G

    4

    Node3

    node-192-168-46-131

    192.168.46.131

    4C8G,100G

    软件需求

    需求项具体要求检查命令
    操作系统CentOS 7.9 64 位cat /etc/centos-release
    kernel3.10.0 及以上uname -r
    Swap关闭。防止 io 飙升影响 kubelet 进程。free -m Swap 这行值为 0
    防火墙关闭iptables -vnL 无其他规则
    SELinux关闭。k8s 官方要求。getenforce 的输出为 Disabled
    时区所有服务器时区应该统一,建议使用北京时间使用 timedatectl set-timezone Asia/Shanghai 设置为北京时间。
    时间同步etcd 选举时要求节点间时间差小于 1s配置 chronyd 同步时间
    docker 版本19.03 及更高docker version
    kubenetes 版本限 1.18 或 1.20,其他版本未经测试。用户报告 1.22 以上版本不兼容,1.17 版本部署 bcs 会失败。kubectl version

    部署初始 master

     一键部署的bcs.sh脚本内容如下:

    1. #!/usr/bin/env bash
    2. # Usage: Install BCS
    3. # bcs
    4. BCS_DIR="$HOME/.bcs"
    5. BK_HOME=${BK_HOME:-/data/bcs}
    6. bcs_override=${bcs_override:-false}
    7. bcs_sysctl=${bcs_sysctl:-1}
    8. # yum
    9. MIRROR_IP=${MIRROR_IP:-null}
    10. # 本地yum源仓库
    11. # 可配置本地的yum源仓库比如: MIRROR_URL=${MIRROR_URL:-http://192.168.10.41/local/}
    12. # 以下配置腾讯的yum源仓库
    13. MIRROR_URL=${MIRROR_URL:-https://mirrors.tencent.com}
    14. # helm
    15. BKREPO_URL=${BKREPO_URL:-null}
    16. # docker
    17. DOCKER_LIB=${DOCKER_LIB:-${BK_HOME}/lib/docker}
    18. DOCKER_VERSION=${DOCKER_VERSION:-19.03.9}
    19. DOCKER_LIVE_RESTORE=${DOCKER_LIVE_RESTORE:-false}
    20. # 可配置本地的yum源仓库比如: REPO_MIRRORS=${REPO_MIRRORS:-[\"http://192.168.10.41/local/\"]}
    21. REPO_MIRRORS=${REPO_MIRRORS:-[\"https://mirror.ccs.tencentyun.com\"]}
    22. DOCKER_BRIDGE=${DOCKER_BRIDGE:-null}
    23. # 可配置本地docker仓库,比如:
    24. # BK_PUBLIC_REPO=${BK_PUBLIC_REPO:-192.168.10.42:5000}
    25. # BK_RELEASE_REPO=${BK_RELEASE_REPO:-192.168.10.42:5000}
    26. BK_PUBLIC_REPO=${BK_PUBLIC_REPO:-hub.bktencent.com}
    27. BK_RELEASE_REPO=${BK_RELEASE_REPO:-hub.bktencent.com/blueking}
    28. # k8s
    29. KUBELET_LIB=${KUBELET_LIB:-${BK_HOME}/lib/kubelet}
    30. BCS_K8S_CTRL_IP=${BCS_K8S_CTRL_IP:-$LAN_IP}
    31. K8S_VER=${K8S_VER:-1.20.11}
    32. K8S_SVC_CIDR=${K8S_SVC_CIDR:-10.96.0.0/12}
    33. K8S_POD_CIDR=${K8S_POD_CIDR:-10.244.0.0/16}
    34. K8S_EXTRA_ARGS=${K8S_EXTRA_ARGS:-allowed-unsafe-sysctls: 'net.ipv4.tcp_tw_reuse'}
    35. ETCD_LIB=${ETCD_LIB:-${BK_HOME}/lib/etcd}
    36. BCS_CP_WORKER=${BCS_CP_WORKER:-0}
    37. K8S_CNI=${K8S_CNI:-flannel}
    38. join_cmd_b64=${join_cmd_b64:-null}
    39. cluster_env=${cluster_env:-null}
    40. master_join_cmd_b64=${master_join_cmd_b64:-null}
    41. # 如果配置本地yum源,需要写入配置的yum源地址,如果没有则忽略
    42. # rm -f /etc/yum.repos.d/*.repo
    43. # cat >/etc/yum.repos.d/CentOS-Base.repo<<EOF
    44. # [centos]
    45. # name=CentOS-releasever - Base
    46. # baseurl=http://192.168.10.41/local/
    47. # gpgcheck=0
    48. # enabled=1
    49. # EOF
    50. # yum clean all
    51. # yum makecache
    52. # yum repolist
    53. # 安全模式
    54. set -euo pipefail
    55. # 重置PATH
    56. PATH=/usr/local/bin:/usr/local/sbin:/usr/sbin:/usr/bin:/sbin:/bin
    57. export PATH
    58. # 通用脚本框架变量
    59. #SELF_DIR=$(dirname "$(readlink -f "$0")")
    60. #PROGRAM=$(basename "$0")
    61. VERSION=1.0
    62. EXITCODE=0
    63. OP_TYPE=
    64. LAN_IP=
    65. # 全局默认变量
    66. PROJECTS=( bcsenv op helm k8smaster k8snode )
    67. PROJECT=
    68. ON_CLOUD="bare-metal"
    69. # error exit handler
    70. err_trap_handler () {
    71. MYSELF="$0"
    72. LASTLINE="$1"
    73. LASTERR="$2"
    74. echo "${MYSELF}: line ${LASTLINE} with exit code ${LASTERR}" >&2
    75. }
    76. trap 'err_trap_handler ${LINENO} $?' ERR
    77. usage () {
    78. cat <<EOF
    79. 用法:
    80. bcs.sh [ -h --help -? 查看帮助 ]
    81. [ -i, --install 支持安装模块(${PROJECTS[*]}) ]
    82. [ -c, --clean 清理安装模块(${PROJECTS[*]}) ]
    83. [ -r, --render 渲染模块配置(${PROJECTS[*]}) ]
    84. [ -v, --version [可选] 查看脚本版本号 ]
    85. EOF
    86. }
    87. usage_and_exit () {
    88. usage
    89. exit "$1"
    90. }
    91. log () {
    92. echo "[INFO]: $*"
    93. }
    94. warning () {
    95. echo "[WARN]: $*" 1>&2
    96. EXITCODE=$((EXITCODE + 1))
    97. }
    98. version () {
    99. echo "bcs.sh version $VERSION"
    100. }
    101. highlight () {
    102. echo -e "\033[7m $* \033[0m"
    103. }
    104. error () {
    105. highlight "[ERROR]: $*" 1>&2
    106. usage_and_exit 1
    107. }
    108. ok_bcs () {
    109. cat <<EOF
    110. $(
    111. log "LAN_IP: $LAN_IP"
    112. highlight "Welcome to BCS on $ON_CLOUD"
    113. )
    114. EOF
    115. }
    116. bye_bcs () {
    117. cat <<EOF
    118. $(
    119. highlight "Finish"
    120. )
    121. EOF
    122. }
    123. _retry () {
    124. local n=1
    125. local max=2
    126. local delay=1
    127. while true; do
    128. if "$@"; then
    129. break
    130. elif (( n < max )); then
    131. ((n++))
    132. warning "Command failed. Attempt $n/$max:"
    133. sleep $delay;
    134. else
    135. error "The command $* has failed after $n attempts."
    136. fi
    137. done
    138. }
    139. ### 运维相关配置
    140. install_op () {
    141. _install_common
    142. op_kubeadm
    143. op_kubectl
    144. op_minikube
    145. op_helm
    146. op_bkrepo "${BKREPO_URL}"
    147. log "Complete"
    148. }
    149. _install_common () {
    150. if ! rpm -q bash-completion &>/dev/null; then
    151. yum -y install bash-completion || error "Install bash-completion Failed"
    152. fi
    153. }
    154. op_kubeadm () {
    155. #检测kubeadm版本
    156. if command -v kubeadm &>/dev/null; then
    157. sed -ri '/bcs config begin for kubeadm/,/bcs config end for kubeadm/d' "$BCS_DIR/bcs.env"
    158. cat >> "$BCS_DIR/bcs.env" << 'EOF'
    159. # bcs config begin for kubeadm
    160. # kubeadm 命令补全
    161. source <(kubeadm completion bash)
    162. # bcs config end for kubeadm
    163. EOF
    164. fi
    165. }
    166. op_kubectl () {
    167. if command -v kubectl &>/dev/null; then
    168. sed -ri '/bcs config begin for kubectl/,/bcs config end for kubectl/d' "$BCS_DIR/bcs.env"
    169. cat >> "$BCS_DIR/bcs.env" << 'EOF'
    170. # bcs config begin for kubectl
    171. # kubectl 命令补全
    172. source <(kubectl completion bash)
    173. # bcs config end for kubectl
    174. EOF
    175. fi
    176. }
    177. op_minikube () {
    178. if command -v minikube &>/dev/null; then
    179. sed -ri '/bcs config begin for minikube/,/bcs config end for minikube/d' "$BCS_DIR/bcs.env"
    180. cat >> "$BCS_DIR/bcs.env" << 'EOF'
    181. # bcs config begin for minikube
    182. # minikube 命令补全
    183. source <(minikube completion bash)
    184. # bcs config end for minikube
    185. EOF
    186. fi
    187. }
    188. op_helm () {
    189. if command -v helm &>/dev/null; then
    190. sed -ri '/bcs config begin for helm/,/bcs config end for helm/d' "$BCS_DIR/bcs.env"
    191. cat >> "$BCS_DIR/bcs.env" << 'EOF'
    192. # bcs config begin for helm
    193. # Helm 命令补全
    194. source <(helm completion bash)
    195. # Helm 激活对 OCI 的支持
    196. export HELM_EXPERIMENTAL_OCI=1
    197. # bcs config end for helm
    198. EOF
    199. fi
    200. }
    201. op_bkrepo () {
    202. local BKREPO_URL="$1"
    203. if command -v helm &>/dev/null; then
    204. if [[ $BKREPO_URL == "null" ]]; then
    205. warning "BKREPO_URL is ${BKREPO_URL}, skipping"
    206. return 0
    207. fi
    208. highlight "Add bkrepo: ${BKREPO_URL}"
    209. # 如果是使用本地repo仓库,需要注释下面两行,如果是使用腾讯的不需要注释
    210. helm repo add bk "${BKREPO_URL}"
    211. helm repo update
    212. log "bkrepo added"
    213. else
    214. warning "Add bkrepo: helm not found, skipping"
    215. return 0
    216. fi
    217. }
    218. clean_op () {
    219. helm repo remove bkrepo || warning "remove bkrepo failed"
    220. clean_bcsenv
    221. }
    222. ### 环境/系统初始化
    223. install_bcsenv () {
    224. local bcs_override=true
    225. _on_cloud
    226. _add_sysctl
    227. _add_hosts
    228. cat -n "$BCS_DIR/bcs.env"
    229. _init_kubeadmconfig
    230. log "Complete"
    231. }
    232. _init_kubeadmconfig () {
    233. local join_cmd
    234. local node_name
    235. local node_type
    236. # 参数检查
    237. [[ -n ${BCS_K8S_CTRL_IP} ]] || error "Kubernetes控制平面IP未指定"
    238. if [[ ${join_cmd_b64} != "null" ]]; then
    239. join_cmd="$(echo -n "${join_cmd_b64}" | base64 -d)"
    240. echo -n "${join_cmd}" | grep -q "kubeadm join" || error "添加节点命令参数异常"
    241. node_name="node-$(echo "$LAN_IP" | tr '.' '-')"
    242. node_type="JoinConfiguration"
    243. elif [[ ${master_join_cmd_b64} != "null" ]]; then
    244. join_cmd="$(echo -n "${master_join_cmd_b64}" | base64 -d)"
    245. echo -n "${join_cmd}" | grep -q "kubeadm join" || error "master扩容命令参数异常"
    246. node_name="master-$(echo "$LAN_IP" | tr '.' '-')"
    247. node_type="JoinConfiguration"
    248. else
    249. node_name="master-$(echo "$LAN_IP" | tr '.' '-')"
    250. node_type="InitConfiguration"
    251. fi
    252. cat > "$BCS_DIR/kubeadm-config" << EOF
    253. apiVersion: kubeadm.k8s.io/$(
    254. [[ $K8S_VER =~ ^1.12 ]] && { echo "v1alpha3"; exit; }
    255. [[ $K8S_VER =~ ^1.1[3|4] ]] && { echo "v1beta1"; exit; }
    256. [[ $K8S_VER =~ ^1.(1[5-9]|2[0-2]) ]] && { echo "v1beta2"; exit; }
    257. )
    258. apiServer:
    259. extraArgs:
    260. authorization-mode: Node,RBAC
    261. timeoutForControlPlane: 4m0s
    262. certificatesDir: /etc/kubernetes/pki
    263. clusterName: kubernetes
    264. controlPlaneEndpoint: k8s-api.bcs.local:6443
    265. controllerManager: {}
    266. dns:
    267. type: CoreDNS
    268. etcd:
    269. local:
    270. dataDir: ${ETCD_LIB}
    271. # 如果使用本地镜像仓库,需要改成自己镜像仓库路径,比如:imageRepository: ${BK_PUBLIC_REPO}
    272. imageRepository: ${BK_PUBLIC_REPO}/k8s.gcr.io
    273. kind: ClusterConfiguration
    274. kubernetesVersion: v${K8S_VER}
    275. networking:
    276. dnsDomain: cluster.local
    277. podSubnet: ${K8S_POD_CIDR}
    278. serviceSubnet: ${K8S_SVC_CIDR}
    279. scheduler: {}
    280. ---
    281. apiVersion: kubeadm.k8s.io/$(
    282. [[ $K8S_VER =~ ^1.12 ]] && { echo "v1alpha3"; exit; }
    283. [[ $K8S_VER =~ ^1.1[3|4] ]] && { echo "v1beta1"; exit; }
    284. [[ $K8S_VER =~ ^1.(1[5-9]|2[0-2]) ]] && { echo "v1beta2"; exit; }
    285. )
    286. kind: $node_type
    287. nodeRegistration:
    288. name: $node_name
    289. kubeletExtraArgs:
    290. root-dir: ${KUBELET_LIB}
    291. $(
    292. if [[ -n ${K8S_EXTRA_ARGS} ]]; then
    293. cat << EOFF
    294. ${K8S_EXTRA_ARGS}
    295. EOFF
    296. fi
    297. )
    298. $(
    299. if [[ $K8S_VER =~ ^1.12 ]]; then
    300. cat << EOFF
    301. pod-infra-container-image: ${BK_PUBLIC_REPO}/k8s.gcr.io/pause:3.1
    302. EOFF
    303. fi
    304. if [[ $K8S_VER =~ ^1.12 ]] && [[ $node_type == "JoinConfiguration" ]]; then
    305. cat << EOFF
    306. #discoveryToken: $(echo ${join_cmd} | grep -Po '(?<=discovery-token-ca-cert-hash )sha256:[a-z0-9]{64}' )
    307. discoveryTokenAPIServers:
    308. - k8s-api.bcs.local:6443
    309. discoveryTokenUnsafeSkipCAVerification: true
    310. tlsBootstrapToken: $(echo ${join_cmd} | grep -Po '(?<=token )[a-z0-9.]{23}' )
    311. token: $(echo ${join_cmd} | grep -Po '(?<=token )[a-z0-9.]{23}' )
    312. EOFF
    313. elif [[ $node_type == "JoinConfiguration" ]]; then
    314. cat << EOFF
    315. discovery:
    316. bootstrapToken:
    317. apiServerEndpoint: k8s-api.bcs.local:6443
    318. caCertHashes:
    319. - $(echo ${join_cmd} | grep -Po '(?<=discovery-token-ca-cert-hash )sha256:[a-z0-9]{64}' )
    320. token: $(echo ${join_cmd} | grep -Po '(?<=token )[a-z0-9.]{23}' )
    321. EOFF
    322. if [[ $node_name =~ ^master ]]; then
    323. cat << EOFF
    324. controlPlane:
    325. certificateKey: $(echo ${join_cmd} | grep -Po '(?<=certificate-key )[a-z0-9]{64}' )
    326. EOFF
    327. fi
    328. fi
    329. )
    330. ---
    331. apiVersion: kubeproxy.config.k8s.io/v1alpha1
    332. kind: KubeProxyConfiguration
    333. mode: ipvs
    334. $(
    335. if ! [[ $BCS_K8S_CTRL_IP =~ $LAN_IP ]]; then
    336. cat << EOFF
    337. ipvs:
    338. excludeCIDRs:
    339. - "$BCS_K8S_CTRL_IP/32"
    340. EOFF
    341. fi
    342. )
    343. EOF
    344. highlight "$node_name: init bcsenv"
    345. }
    346. _on_baremetal () {
    347. log "NOT on cloud"
    348. [[ -n $LAN_IP ]] || LAN_IP=$(ip -4 -o route get 10/8 | sed -n 's/.*src \([0-9.]\+\).*/\1/p')
    349. _init_bcsenv
    350. }
    351. _on_cloud () {
    352. install -dv "${BCS_DIR}" || warning "create ${BCS_DIR} dir failed"
    353. touch "${BCS_DIR}/bcs.env"
    354. if [[ $bcs_override != "true" ]]; then
    355. #set -a
    356. # shellcheck disable=SC1091
    357. source "$BCS_DIR/bcs.env"
    358. #set +a
    359. [[ -z $LAN_IP ]] || return 0
    360. fi
    361. QCLOUD_META_API="http://169.254.0.23/latest/meta-data"
    362. AWS_META_API="http://169.254.169.254/latest/meta-data"
    363. local META_API
    364. if curl -m 2 -qIfs "${QCLOUD_META_API}" >/dev/null; then
    365. ON_CLOUD="qcloud"
    366. META_API="${QCLOUD_META_API}"
    367. elif curl -m 2 -Ifs "${AWS_META_API}" >/dev/null; then
    368. ON_CLOUD="aws"
    369. META_API="${AWS_META_API}"
    370. else
    371. _on_baremetal
    372. return 0
    373. fi
    374. LAN_IP="$( curl -sSf ${META_API}/local-ipv4 )"
    375. [[ -n $LAN_IP ]] || LAN_IP=$(ip -4 -o route get 10/8 | sed -n 's/.*src \([0-9.]\+\).*/\1/p')
    376. case "$ON_CLOUD" in
    377. qcloud)
    378. # 配置本地yum源的需要改成自己的yum仓库 mirror_url="http://192.1168.10.41/local/"
    379. mirror_url="http://mirrors.tencentyun.com"
    380. ;;
    381. aws)
    382. # 配置本地yum源的需要改成自己的yum仓库 mirror_url="http://192.1168.10.41/local/"
    383. mirror_url="https://mirrors.tencent.com"
    384. ;;
    385. esac
    386. _init_bcsenv
    387. }
    388. _init_bcsenv () {
    389. highlight "Add envfile"
    390. # shellcheck disable=SC1090
    391. [[ ${cluster_env} == "null" ]] || source <( echo "${cluster_env}" | base64 -d )
    392. [[ -n ${MIRROR_URL} ]] || MIRROR_URL=${mirror_url}
    393. # local LAN_IP="$1"
    394. # local MIRROR_URL="$2"
    395. cat > "$BCS_DIR/bcs.env" << EOF
    396. # bcs config begin for $ON_CLOUD
    397. ON_CLOUD="${ON_CLOUD}"
    398. BCS_DIR="${BCS_DIR}"
    399. BK_HOME="${BK_HOME}"
    400. bcs_sysctl="${bcs_sysctl}"
    401. MIRROR_IP="${MIRROR_IP}"
    402. MIRROR_URL="${MIRROR_URL}"
    403. BKREPO_URL="${BKREPO_URL}"
    404. DOCKER_LIB="${DOCKER_LIB}"
    405. DOCKER_VERSION="${DOCKER_VERSION}"
    406. DOCKER_LIVE_RESTORE="${DOCKER_LIVE_RESTORE}"
    407. REPO_MIRRORS='${REPO_MIRRORS}'
    408. DOCKER_BRIDGE="${DOCKER_BRIDGE}"
    409. BK_PUBLIC_REPO="${BK_PUBLIC_REPO}"
    410. BK_RELEASE_REPO="${BK_RELEASE_REPO}"
    411. KUBELET_LIB="${KUBELET_LIB}"
    412. K8S_VER="${K8S_VER}"
    413. K8S_SVC_CIDR="${K8S_SVC_CIDR}"
    414. K8S_POD_CIDR="${K8S_POD_CIDR}"
    415. K8S_EXTRA_ARGS="${K8S_EXTRA_ARGS}"
    416. ETCD_LIB="${ETCD_LIB}"
    417. LAN_IP="${LAN_IP}"
    418. BCS_K8S_CTRL_IP="${BCS_K8S_CTRL_IP:-$LAN_IP}"
    419. # bcs config end for $ON_CLOUD
    420. EOF
    421. sed -ri "/bcs config begin for $ON_CLOUD/,/bcs config end for $ON_CLOUD/d" "$HOME/.bashrc"
    422. cat >> "$HOME/.bashrc" << EOF
    423. # bcs config begin for $ON_CLOUD
    424. source "${BCS_DIR}/bcs.env"
    425. # bcs config end for $ON_CLOUD
    426. EOF
    427. # shellcheck disable=SC1091
    428. source "${BCS_DIR}/bcs.env"
    429. }
    430. _add_sysctl () {
    431. # shellcheck disable=SC1091
    432. source /etc/os-release
    433. if [[ $VERSION_ID != "2.2" ]]; then
    434. echo br_netfilter ip_vs ip_vs_rr ip_vs_wrr ip_vs_sh nf_conntrack | xargs -n1 modprobe
    435. fi
    436. if [[ -f /etc/tlinux-release ]] && [[ $K8S_CNI == "ws_flannel" ]]; then
    437. if lsmod | grep -q vlxan; then
    438. rmmod vxlan
    439. elif lsmod | grep -q vlxan; then
    440. error "vxlan模块卸载失败"
    441. fi
    442. modprobe vxlan udp_port=4789
    443. log "Winodws flannel VXLAN using $(cat /sys/module/vxlan/parameters/udp_port)"
    444. fi
    445. [[ ${bcs_sysctl} == "1" ]] || return 0
    446. highlight "Add sysctl"
    447. TOTAL_MEM=$(free -b | awk 'NR==2{print $2}')
    448. TOTAL_MEM=${TOTAL_MEM:-$(( 16 * 1024 * 1024 *1024 ))}
    449. PAGE_SIZE=$(getconf PAGE_SIZE)
    450. PAGE_SIZE=${PAGE_SIZE:-4096}
    451. THREAD_SIZE=$(( PAGE_SIZE << 2 ))
    452. sed -ri.bcs.bak '/bcs config begin/,/bcs config end/d' /etc/sysctl.conf
    453. cat >> "/etc/sysctl.conf" << EOF
    454. # bcs config begin
    455. # 系统中每一个端口最大的监听队列的长度,这是个全局的参数,默认值128太小,32768跟友商一致
    456. net.core.somaxconn=32768
    457. # 大量短连接时,开启TIME-WAIT端口复用
    458. net.ipv4.tcp_tw_reuse=1
    459. # TCP半连接队列长度。值太小的话容易造成高并发时客户端连接请求被拒绝
    460. net.ipv4.tcp_max_syn_backlog=8096
    461. # RPS是将内核网络rx方向报文处理的软中断分配到合适CPU核,以提升网络应用整体性能的技术。这个参数设置RPS flow table大小
    462. fs.inotify.max_user_instances=8192
    463. # inotify watch总数量限制。调大该参数避免"Too many open files"错误
    464. fs.inotify.max_user_watches=524288
    465. # 使用bpf需要开启
    466. net.core.bpf_jit_enable=1
    467. # 使用bpf需要开启
    468. net.core.bpf_jit_harden=1
    469. # 使用bpf需要开启
    470. net.core.bpf_jit_kallsyms=1
    471. # 用于调节rx软中断周期中内核可以从驱动队列获取的最大报文数,以每CPU为基础有效,计算公式(dev_weight * dev_weight_tx_bias)。主要用于调节网络栈和CPU在tx上的不对称
    472. net.core.dev_weight_tx_bias=1
    473. # socket receive buffer大小
    474. net.core.rmem_max=16777216
    475. # RPS是将内核网络rx方向报文处理的软中断分配到合适CPU核,以提升网络应用整体性能的技术。这个参数设置RPS flow table大小
    476. net.core.rps_sock_flow_entries=8192
    477. # socket send buffer大小
    478. net.core.wmem_max=16777216
    479. # 避免"neighbor table overflow"错误(发生过真实客户案例,触发场景为节点数量超过1024,并且某应用需要跟所有节点通信)
    480. net.ipv4.neigh.default.gc_thresh1=2048
    481. # 同上
    482. net.ipv4.neigh.default.gc_thresh2=8192
    483. # 同上
    484. net.ipv4.neigh.default.gc_thresh3=16384
    485. # orphan socket是应用以及close但TCP栈还没有释放的socket(不包含TIME_WAIT和CLOSE_WAIT)。 适当调大此参数避免负载高时报'Out of socket memory'错误。32768跟友商一致。
    486. net.ipv4.tcp_max_orphans=32768
    487. # 代理程序(如nginx)容易产生大量TIME_WAIT状态的socket。适当调大这个参数避免"TCP: time wait bucket table overflow"错误。
    488. net.ipv4.tcp_max_tw_buckets=16384
    489. # TCP socket receive buffer大小。 太小会造成TCP连接throughput降低
    490. net.ipv4.tcp_rmem=4096 12582912 16777216
    491. # TCP socket send buffer大小。 太小会造成TCP连接throughput降低
    492. net.ipv4.tcp_wmem=4096 12582912 16777216
    493. # 控制每个进程的内存地址空间中 virtual memory area的数量
    494. vm.max_map_count=262144
    495. # 为了支持k8s service, 必须开启
    496. net.ipv4.ip_forward=1
    497. # ubuntu系统上这个参数缺省为"/usr/share/apport/apport %p %s %c %P"。在容器中会造成无法生成core文件
    498. kernel.core_pattern=core
    499. # 内核在发生死锁或者死循环的时候可以触发panic,默认值是0.
    500. kernel.softlockup_panic=0
    501. # 使得iptable可以作用在网桥上
    502. net.bridge.bridge-nf-call-ip6tables=1
    503. net.bridge.bridge-nf-call-iptables=1
    504. # 系统全局PID号数值的限制。
    505. kernel.pid_max=$(( 4 * 1024 * 1024))
    506. # 系统进程描述符总数量限制,根据内存大小动态计算得出,TOTAL_MEM为系统的内存总量,单位是字节,THREAD_SIZE默认为16,单位是kb。
    507. kernel.threads-max=$((TOTAL_MEM / (8 * THREAD_SIZE) ))
    508. # 整个系统fd(包括socket)的总数量限制。根据内存大小动态计算得出,TOTAL_MEM为系统的内存总量,单位是字节,调大该参数避免"Too many open files"错误。
    509. fs.file-max=$(( TOTAL_MEM / 10240 ))
    510. # bcs config end
    511. EOF
    512. sysctl --system
    513. # ulimit
    514. cat > /etc/security/limits.d/99-bcs.conf << EOF
    515. # bcs config begin
    516. * soft nproc 1028546
    517. * hard nproc 1028546
    518. * soft nofile 204800
    519. * hard nofile 204800
    520. # bcs config end
    521. EOF
    522. }
    523. _add_hosts () {
    524. [[ ${MIRROR_IP} != "null" ]] || return 0
    525. highlight "Add hosts"
    526. sed -ri.bcs.bak '/bcs config begin for bcs/,/bcs config end for bcs/d' /etc/hosts
    527. cat >> "/etc/hosts" << EOF
    528. # bcs config begin for bcs
    529. $(
    530. if [[ ${ON_CLOUD} == qcloud ]] && [[ -n ${MIRROR_IP} ]]; then
    531. echo "${MIRROR_IP} mirrors.tencentyun.com"
    532. fi
    533. )
    534. # bcs config end for bcs
    535. EOF
    536. }
    537. ### 容器运行时: Docker
    538. install_docker () {
    539. # 如果使用本地yum源则改成本地配置
    540. # local yum_repo
    541. #
    542. #
    543. # if docker info &>/dev/null && [[ -d ${DOCKER_LIB} ]];then
    544. # warning "Already installed, skipping"
    545. # return 0
    546. # fi
    547. #
    548. #
    549. # yum install -y -q yum-utils
    550. #
    551. #
    552. # 列出yum源中支持的docker版本
    553. ## 指定Dokcker版本
    554. #
    555. # Install
    556. # yum -y install docker-ce-19.03.9-3.el7 docker-ce-cli-19.03.9-3.el7 containerd.io
    557. local yum_repo
    558. yum_repo="${MIRROR_URL}/docker-ce/linux/centos/docker-ce.repo"
    559. if docker info &>/dev/null && [[ -d ${DOCKER_LIB} ]];then
    560. warning "Already installed, skipping"
    561. return 0
    562. fi
    563. if ! curl -Ifs "$yum_repo" > /dev/null; then
    564. error "Unable to curl repository file $yum_repo, is it valid?"
    565. fi
    566. curl -fs "$yum_repo" | sed "s#https://download.docker.com#${MIRROR_URL}/docker-ce#g" | tee "$BCS_DIR/docker-ce.repo"
    567. [[ ! -f /etc/tlinux-release ]] || sed -i "s/\$releasever/7/g" "$BCS_DIR/docker-ce.repo"
    568. yum install -y -q yum-utils
    569. yum-config-manager --add-repo "$BCS_DIR/docker-ce.repo"
    570. yum makecache fast
    571. # 列出yum源中支持的docker版本
    572. ## 指定Dokcker版本
    573. pkg_pattern="$(echo "${DOCKER_VERSION}" | sed "s/-ce-/\\\\.ce.*/g" | sed "s/-/.*/g").*el"
    574. pkg_version=$(yum list --showduplicates 'docker-ce' | grep "$pkg_pattern" | tail -1 | awk '{print $2}' | cut -d':' -f 2)
    575. [[ -n $pkg_version ]] || job_fail "ERROR: $DOCKER_VERSION not found amongst yum list results"
    576. cli_pkg_version=$(yum list --showduplicates 'docker-ce-cli' | grep "$pkg_pattern" | tail -1 | awk '{print $2}' | cut -d':' -f 2)
    577. # Install
    578. yum -y install docker-ce-cli-"$cli_pkg_version" docker-ce-"$pkg_version" containerd.io
    579. # Setting
    580. render_docker
    581. # Enable
    582. systemctl enable docker
    583. systemctl restart docker
    584. # Testing
    585. docker info
    586. if ! docker --version; then
    587. error "Did Docker get installed?"
    588. fi
    589. if ! docker run --rm "$BK_PUBLIC_REPO"/library/hello-world:latest; then
    590. error "Could not get docker to run the hello world container"
    591. fi
    592. }
    593. render_docker () {
    594. # To-Do Docker配置调优
    595. # dockerd | Docker Documentation
    596. # https://docs.docker.com/engine/reference/commandline/dockerd/
    597. # Docker 调优 | Rancher文档
    598. # https://docs.rancher.cn/docs/rancher2/best-practices/2.0-2.4/optimize/docker/_index
    599. # daemon.json
    600. ## 创建数据目录
    601. install -dv "${DOCKER_LIB}"
    602. ## 创建配置文件目录
    603. install -dv /etc/docker/
    604. install -dv /etc/systemd/system/docker.service.d/
    605. if [[ -s /etc/docker/daemon.json ]] && [[ ! -f /etc/docker/daemon.json.bcs.bak ]]; then
    606. warning "/etc/docker/daemon.json已存在,备份中..."
    607. cp -av /etc/docker/daemon.json{,.bcs.bak} || job_fail "备份原配置文件失败"
    608. fi
    609. log "开始写入配置docker文件..."
    610. # 如果使用本地docker仓库需要改成自己的仓库 比如: "insecure-registries": ["192.168.10.42:5000"]
    611. cat > /etc/docker/daemon.json << EOF
    612. {
    613. "data-root": "${DOCKER_LIB}",
    614. "exec-opts": ["native.cgroupdriver=systemd"],
    615. "registry-mirrors": ${REPO_MIRRORS},
    616. "max-concurrent-downloads": 10,
    617. "live-restore": ${DOCKER_LIVE_RESTORE},
    618. "log-level": "info",
    619. "log-opts": {
    620. "max-size": "100m",
    621. "max-file": "5"
    622. },
    623. "storage-driver": "overlay2",
    624. "storage-opts": [
    625. "overlay2.override_kernel_check=true"
    626. ]
    627. }
    628. EOF
    629. ## 因路由冲突,手动创建Docker网桥
    630. if [[ ${DOCKER_BRIDGE} != "null" ]]; then
    631. ip link add name docker0 type bridge
    632. ip addr add dev docker0 "${DOCKER_BRIDGE}"
    633. sed -i "/\"data-root\":/i \"bip\": \"${DOCKER_BRIDGE}\"," /etc/docker/daemon.json
    634. fi
    635. ## systemd service
    636. cat>/etc/systemd/system/docker.service.d/bcs-docker.conf<<EOF
    637. [Service]
    638. ExecStartPost=/sbin/iptables -P FORWARD ACCEPT
    639. EOF
    640. systemctl daemon-reload
    641. log "Complete"
    642. }
    643. clean_bcsenv () {
    644. if [[ -f "$BCS_DIR/bcs.env" ]]; then
    645. if grep -q "bcs config begin" "$BCS_DIR/bcs.env" "$HOME/.bashrc"; then
    646. sed -ri.bcs.bak "/bcs config begin/,/bcs config end/d" "$BCS_DIR/bcs.env" "$HOME/.bashrc"
    647. fi
    648. fi
    649. log "Complete"
    650. }
    651. ### Kubernetes
    652. install_k8stool () {
    653. local mirror_url
    654. master_iplist=${BCS_K8S_CTRL_IP:-$LAN_IP}
    655. read -r -a master_iplist <<< "${master_iplist//,/ }"
    656. if [[ -z ${master_iplist[0]} ]]; then
    657. error "BCS_K8S_CTRL_IP is null"
    658. fi
    659. highlight "Add kube-apiserver hosts"
    660. sed -ri.bcs.bak '/bcs config begin for kube-apiserver/,/bcs config end for kube-apiserver/d' /etc/hosts
    661. cat >> /etc/hosts << EOF
    662. # bcs config begin for kube-apiserver
    663. ${master_iplist[0]} k8s-api.bcs.local
    664. # bcs config end for kube-apiserver
    665. EOF
    666. # Pre
    667. # 添加repo源
    668. mirror_url="${MIRROR_URL}/kubernetes"
    669. # 配置了本地docker仓库以下kbs仓库配置需要注释
    670. cat > "$BCS_DIR/kubernetes.repo" << EOF
    671. [kubernetes]
    672. name=Kubernetes
    673. baseurl=${mirror_url}/yum/repos/kubernetes-el7-x86_64
    674. enabled=1
    675. gpgcheck=0
    676. EOF
    677. yum install -y -q yum-utils bash-completion
    678. # 配置了本地docker仓库以下kbs仓库配置需要注释
    679. yum-config-manager --add-repo "$BCS_DIR/kubernetes.repo"
    680. yum clean all
    681. yum makecache fast
    682. ## kubelet数据目录
    683. install -dv "${KUBELET_LIB}"
    684. # cat > /etc/sysconfig/kubelet << EOF
    685. # KUBELET_EXTRA_ARGS="--root-dir=${KUBELET_LIB}"
    686. # EOF
    687. # 以下6行可以改成
    688. # yum -y install \
    689. # "kubectl-1.20.11-0" \
    690. # "kubelet-1.20.11-0" \
    691. # "kubeadm-1.20.11-0"
    692. pkg_pattern="${K8S_VER}"
    693. pkg_version=$(yum list --showduplicates 'kubeadm' | grep -w "$pkg_pattern" | tail -1 | awk '{print $2}' | cut -d':' -f 2)
    694. yum -y install \
    695. "kubectl-${pkg_version}" \
    696. "kubeadm-${pkg_version}" \
    697. "kubelet-${pkg_version}"
    698. # kubeadm config images pull --config="$BCS_DIR/kubeadm-config" -v 11
    699. # kubeadm config images pull --image-repository="${BK_PUBLIC_REPO}/k8s.gcr.io" \
    700. # -v 11 --kubernetes-version "${K8S_VER}" || error "pull kubernetes images failed"
    701. highlight "kubectl $(kubectl version --client --short || error "Did kubectl get installed?" )"
    702. highlight "kubeadm Version: $(kubeadm version -o short || error "Did kubectl get installed?" )"
    703. }
    704. install_helm () {
    705. command -v docker &>/dev/null || error "docker client is not found"
    706. if ! helm version --short 2>/dev/null | grep -qoE '^v3\.'; then
    707. docker run -v /usr/bin:/tmp --rm --entrypoint /bin/cp "${BK_PUBLIC_REPO}"/alpine/helm:3.7.2 -f /usr/bin/helm /tmp/ || error "pull helm image failed"
    708. fi
    709. highlight "helm Version: $(helm version --short)"
    710. }
    711. get_joincmd () {
    712. install_k8sctrl
    713. }
    714. install_k8sctrl () {
    715. local node_name
    716. local join_cmd
    717. local cert_key
    718. local master_join_cmd
    719. if ! kubectl cluster-info 2>/dev/null ; then
    720. systemctl enable --now kubelet
    721. ## etcd数据目录
    722. install -dv "${ETCD_LIB}"
    723. ln -sfv "${ETCD_LIB}" /var/lib/
    724. node_name="master-$(echo "$LAN_IP" | tr '.' '-')"
    725. highlight "Setup K8S Control Plane: $node_name"
    726. kubeadm init --config="$BCS_DIR/kubeadm-config" "$( [[ $K8S_VER =~ ^1.12 ]] && echo --ignore-preflight-errors=SystemVerification || echo --upload-certs)" || error "install k8s master failed"
    727. # kubeadm init --node-name "$node_name" --kubernetes-version "${K8S_VER}" \
    728. # --control-plane-endpoint k8s-api.bcs.local \
    729. # --image-repository="${BK_PUBLIC_REPO}/k8s.gcr.io" \
    730. # --service-cidr="${K8S_SVC_CIDR}" --pod-network-cidr="${K8S_POD_CIDR}" --upload-certs || error "install k8s master failed"
    731. install -dv "$HOME/.kube"
    732. install -v -m 600 -o "$(id -u)" -g "$(id -g)" /etc/kubernetes/admin.conf "$HOME/.kube/config"
    733. # flannel CNI创建
    734. if [[ -n ${K8S_CNI} ]]; then
    735. "install_${K8S_CNI}"
    736. else
    737. install_flannel
    738. fi
    739. fi
    740. install_op
    741. join_cmd="$(kubeadm token create --print-join-command)"
    742. if [[ $K8S_VER =~ ^1.12 ]]; then
    743. join_cmd="$join_cmd --ignore-preflight-errors=SystemVerification"
    744. kubectl set image deployment/coredns coredns="${BK_PUBLIC_REPO}/k8s.gcr.io/coredns:1.2.6" -n kube-system
    745. # kubectl get nodes -l kubernetes.io/os || kubectl label node -l node-role.kubernetes.io/master= kubernetes.io/os=linux
    746. highlight "Kubernetes控制节点启动成功"
    747. else
    748. cert_key="$(kubeadm init phase upload-certs --upload-certs | grep -E '[a-z0-9]{64}')"
    749. [[ -n $cert_key ]] || error "not found certificate key"
    750. master_join_cmd="$join_cmd --control-plane --certificate-key $cert_key"
    751. [[ "$BCS_CP_WORKER" == "0" ]] || kubectl taint node -l node-role.kubernetes.io/master= node-role.kubernetes.io/master:NoSchedule-
    752. # echo "master_join_cmd:${master_join_cmd}"
    753. cluster_env=$( grep -vE "LAN_IP=|^#|^source |^export " "${BCS_DIR}"/bcs.env | base64 -w 0)
    754. master_join_cmd_b64=$(echo -n "${master_join_cmd}" | base64 -w 0)
    755. echo "master_join_cmd:${master_join_cmd}"
    756. echo "cluster_env:${cluster_env}"
    757. echo "master_join_cmd_b64:${master_join_cmd_b64}"
    758. fi
    759. cluster_env=$( grep -vE "LAN_IP=|^#|^source |^export " "${BCS_DIR}"/bcs.env | base64 -w 0)
    760. join_cmd_b64=$(echo -n "${join_cmd}" | base64 -w 0)
    761. echo "join_cmd:${join_cmd}"
    762. echo "cluster_env:${cluster_env}"
    763. echo "join_cmd_b64:${join_cmd_b64}"
    764. cat <<EOF
    765. ======================
    766. $( highlight "Kubernetes控制节点启动成功" )
    767. $(
    768. [[ $K8S_VER =~ ^1.12 ]] && exit
    769. highlight "扩容控制平面执行以下命令"
    770. echo "set -a"
    771. echo "cluster_env=${cluster_env}"
    772. echo "master_join_cmd_b64=${master_join_cmd_b64}"
    773. echo "set +a"
    774. echo "curl -fsSL https://bkopen-1252002024.file.myqcloud.com/ce7/bcs.sh | bash -s -- install k8s-control-plane"
    775. )
    776. $(
    777. highlight "扩容节点执行以下命令"
    778. echo "set -a"
    779. echo "cluster_env=${cluster_env}"
    780. echo "join_cmd_b64=${join_cmd_b64}"
    781. echo "set +a"
    782. echo "curl -fsSL https://bkopen-1252002024.file.myqcloud.com/ce7/bcs.sh | bash -s -- install k8s-node"
    783. )
    784. EOF
    785. }
    786. install_k8s () {
    787. if [[ ${cluster_env} == "null" ]]; then
    788. install_k8s-1st-ctrl
    789. else
    790. install_k8s-node
    791. fi
    792. }
    793. install_k8smaster () {
    794. install_k8s-1st-ctrl
    795. }
    796. install_k8s-1st-ctrl () {
    797. install_bcsenv
    798. install_docker
    799. install_k8stool
    800. install_helm
    801. install_k8sctrl
    802. }
    803. clean_k8snode () {
    804. clean_k8s-node
    805. }
    806. clean_k8s-node () {
    807. systemctl disable --now kubelet
    808. if [[ $K8S_VER =~ ^1.12 ]]; then
    809. kubeadm reset phase cleanup-node -f
    810. else
    811. kubeadm reset phase cleanup-node
    812. fi
    813. bak_dir="/data/backup/$(date +%s)"
    814. install -dv "$bak_dir" || error "create backup dir $bak_dir failed"
    815. docker ps | grep -qv NAME && docker rm -f "$(docker ps -aq)"
    816. [[ -d /etc/kubernetes ]] && mv -v /etc/kubernetes "$bak_dir"/
    817. [[ -d /var/lib/kubelet ]] && mv -v /var/lib/kubelet "$bak_dir"/
    818. [[ -d ${KUBELET_LIB} ]] && mv -v "${KUBELET_LIB}" "$bak_dir"/kubelet
    819. systemctl disable --now docker
    820. log "Uninstall docker, kubelet >>> Done"
    821. }
    822. clean_k8smaster () {
    823. clean_k8s-control-plane
    824. }
    825. clean_k8s-master () {
    826. clean_k8s-control-plane
    827. }
    828. clean_k8s-control-plane () {
    829. if [[ $K8S_VER =~ ^1.12 ]]; then
    830. kubeadm reset phase update-cluster-status -f
    831. kubeadm reset phase remove-etcd-member -f
    832. else
    833. kubeadm reset phase update-cluster-status
    834. kubeadm reset phase remove-etcd-member
    835. fi
    836. clean_k8snode
    837. [[ -d "$HOME"/.kube ]] && mv -v "$HOME"/.kube "$bak_dir"/
    838. [[ -d ${ETCD_LIB} ]] && mv -v "${ETCD_LIB}" "$bak_dir"/
    839. [[ -L /var/lib/etcd ]] && rm -vf /var/lib/etcd
    840. [[ -d /var/lib/etcd ]] && mv -v /var/lib/etcd "$bak_dir"/
    841. log "Uninstall Kubernetes Control Plane >>> Done"
    842. }
    843. install_k8snode (){
    844. install_k8s-node
    845. }
    846. install_k8s-control-plane () {
    847. install_k8s-node
    848. }
    849. install_k8s-node () {
    850. local join_cmd
    851. local node_name
    852. [[ ${cluster_env} != "null" ]] || error "cluster_env未指定 请运行完整的执行命令"
    853. install_bcsenv
    854. install_docker
    855. # 参数检查
    856. if [[ -z ${BCS_K8S_CTRL_IP} ]]; then
    857. error "Kubernetes控制平面IP未指定"
    858. elif [[ ${BCS_K8S_CTRL_IP} == "${LAN_IP}" ]]; then
    859. error "该节点为Kubernetes第一台控制平面,请至其它节点执行该命令"
    860. fi
    861. if [[ ${join_cmd_b64} != "null" ]] && [[ ${master_join_cmd_b64} == "null" ]]; then
    862. join_cmd="$(echo -n "${join_cmd_b64}" | base64 -d)"
    863. echo -n "${join_cmd}" | grep -q "kubeadm join" || error "添加节点命令参数异常"
    864. node_name="node-$(echo "$LAN_IP" | tr '.' '-')"
    865. elif [[ ${master_join_cmd_b64} != "null" ]]; then
    866. join_cmd="$(echo -n "${master_join_cmd_b64}" | base64 -d)"
    867. echo -n "${join_cmd}" | grep -q "kubeadm join" || error "master扩容命令参数异常"
    868. node_name="master-$(echo "$LAN_IP" | tr '.' '-')"
    869. else
    870. error "添加参数有误"
    871. fi
    872. install_localpv_dir
    873. if ! kubectl cluster-info 2>/dev/null && ! docker ps | grep -q pause; then
    874. install_k8stool
    875. systemctl enable --now kubelet
    876. ## etcd数据目录
    877. install -dv "${ETCD_LIB}"
    878. ln -sfv "${ETCD_LIB}" /var/lib/
    879. cat "$BCS_DIR/kubeadm-config"
    880. highlight "$node_name: kubeadm join --config=$BCS_DIR/kubeadm-config -v 11"
    881. kubeadm join --config="$BCS_DIR/kubeadm-config" -v 11
    882. if [[ ${master_join_cmd_b64} != "null" ]]; then
    883. install -dv "$HOME/.kube"
    884. install -v -m 600 -o "$(id -u)" -g "$(id -g)" /etc/kubernetes/admin.conf "$HOME/.kube/config"
    885. log "Kubernetes Control Plane扩容成功"
    886. install_op
    887. else
    888. log "添加Kubernetes节点成功"
    889. fi
    890. fi
    891. }
    892. ## CNI
    893. install_flannel () {
    894. cat << EOF | sed "s#10.244.0.0/16#${K8S_POD_CIDR}#g" | kubectl apply -f -
    895. ---
    896. kind: Namespace
    897. apiVersion: v1
    898. metadata:
    899. name: kube-flannel
    900. labels:
    901. pod-security.kubernetes.io/enforce: privileged
    902. ---
    903. kind: ClusterRole
    904. apiVersion: rbac.authorization.k8s.io/v1
    905. metadata:
    906. name: flannel
    907. rules:
    908. - apiGroups:
    909. - ""
    910. resources:
    911. - pods
    912. verbs:
    913. - get
    914. - apiGroups:
    915. - ""
    916. resources:
    917. - nodes
    918. verbs:
    919. - list
    920. - watch
    921. - apiGroups:
    922. - ""
    923. resources:
    924. - nodes/status
    925. verbs:
    926. - patch
    927. ---
    928. kind: ClusterRoleBinding
    929. apiVersion: rbac.authorization.k8s.io/v1
    930. metadata:
    931. name: flannel
    932. roleRef:
    933. apiGroup: rbac.authorization.k8s.io
    934. kind: ClusterRole
    935. name: flannel
    936. subjects:
    937. - kind: ServiceAccount
    938. name: flannel
    939. namespace: kube-flannel
    940. ---
    941. apiVersion: v1
    942. kind: ServiceAccount
    943. metadata:
    944. name: flannel
    945. namespace: kube-flannel
    946. ---
    947. kind: ConfigMap
    948. apiVersion: v1
    949. metadata:
    950. name: kube-flannel-cfg
    951. namespace: kube-flannel
    952. labels:
    953. tier: node
    954. app: flannel
    955. data:
    956. cni-conf.json: |
    957. {
    958. "name": "cbr0",
    959. "cniVersion": "0.3.1",
    960. "plugins": [
    961. {
    962. "type": "flannel",
    963. "delegate": {
    964. "hairpinMode": true,
    965. "isDefaultGateway": true
    966. }
    967. },
    968. {
    969. "type": "portmap",
    970. "capabilities": {
    971. "portMappings": true
    972. }
    973. }
    974. ]
    975. }
    976. net-conf.json: |
    977. {
    978. "Network": "10.244.0.0/16",
    979. "Backend": {
    980. "Type": "vxlan"$([[ $K8S_CNI == "ws_flannel" ]] && echo ', "VNI" : 4096, "Port": 4789' )
    981. }
    982. }
    983. ---
    984. apiVersion: apps/v1
    985. kind: DaemonSet
    986. metadata:
    987. name: kube-flannel-ds
    988. namespace: kube-flannel
    989. labels:
    990. tier: node
    991. app: flannel
    992. spec:
    993. selector:
    994. matchLabels:
    995. app: flannel
    996. template:
    997. metadata:
    998. labels:
    999. tier: node
    1000. app: flannel
    1001. spec:
    1002. affinity:
    1003. nodeAffinity:
    1004. requiredDuringSchedulingIgnoredDuringExecution:
    1005. nodeSelectorTerms:
    1006. - matchExpressions:
    1007. - key: kubernetes.io/os
    1008. operator: In
    1009. values:
    1010. - linux
    1011. hostNetwork: true
    1012. priorityClassName: system-node-critical
    1013. tolerations:
    1014. - operator: Exists
    1015. effect: NoSchedule
    1016. serviceAccountName: flannel
    1017. initContainers:
    1018. - name: install-cni-plugin
    1019. #image: flannelcni/flannel-cni-plugin:v1.1.0 for ppc64le and mips64le (dockerhub limitations may apply)
    1020. # 配置了本地docker源的需要注意修改自己的路径镜像 image: ${BK_PUBLIC_REPO}/docker.io/rancher/mirrored-flannelcni-flannel-cni-plugin:v1.1.0
    1021. image: docker.io/rancher/mirrored-flannelcni-flannel-cni-plugin:v1.1.0
    1022. command:
    1023. - cp
    1024. args:
    1025. - -f
    1026. - /flannel
    1027. - /opt/cni/bin/flannel
    1028. volumeMounts:
    1029. - name: cni-plugin
    1030. mountPath: /opt/cni/bin
    1031. - name: install-cni
    1032. #image: flannelcni/flannel:v0.19.2 for ppc64le and mips64le (dockerhub limitations may apply)
    1033. image: ${BK_PUBLIC_REPO}/flannelcni/flannel:v0.19.2
    1034. command:
    1035. - cp
    1036. args:
    1037. - -f
    1038. - /etc/kube-flannel/cni-conf.json
    1039. - /etc/cni/net.d/10-flannel.conflist
    1040. volumeMounts:
    1041. - name: cni
    1042. mountPath: /etc/cni/net.d
    1043. - name: flannel-cfg
    1044. mountPath: /etc/kube-flannel/
    1045. containers:
    1046. - name: kube-flannel
    1047. #image: flannelcni/flannel:v0.19.2 for ppc64le and mips64le (dockerhub limitations may apply)
    1048. image: ${BK_PUBLIC_REPO}/flannelcni/flannel:v0.19.2
    1049. command:
    1050. - /opt/bin/flanneld
    1051. args:
    1052. - --ip-masq
    1053. - --kube-subnet-mgr
    1054. resources:
    1055. requests:
    1056. cpu: "100m"
    1057. memory: "50Mi"
    1058. limits:
    1059. cpu: "100m"
    1060. memory: "50Mi"
    1061. securityContext:
    1062. privileged: false
    1063. capabilities:
    1064. add: ["NET_ADMIN", "NET_RAW"]
    1065. env:
    1066. - name: POD_NAME
    1067. valueFrom:
    1068. fieldRef:
    1069. fieldPath: metadata.name
    1070. - name: POD_NAMESPACE
    1071. valueFrom:
    1072. fieldRef:
    1073. fieldPath: metadata.namespace
    1074. - name: EVENT_QUEUE_DEPTH
    1075. value: "5000"
    1076. volumeMounts:
    1077. - name: run
    1078. mountPath: /run/flannel
    1079. - name: flannel-cfg
    1080. mountPath: /etc/kube-flannel/
    1081. - name: xtables-lock
    1082. mountPath: /run/xtables.lock
    1083. volumes:
    1084. - name: run
    1085. hostPath:
    1086. path: /run/flannel
    1087. - name: cni-plugin
    1088. hostPath:
    1089. path: /opt/cni/bin
    1090. - name: cni
    1091. hostPath:
    1092. path: /etc/cni/net.d
    1093. - name: flannel-cfg
    1094. configMap:
    1095. name: kube-flannel-cfg
    1096. - name: xtables-lock
    1097. hostPath:
    1098. path: /run/xtables.lock
    1099. type: FileOrCreate
    1100. EOF
    1101. }
    1102. ## Windows
    1103. install_ws_flannel () {
    1104. if [[ -f /etc/tlinux-release ]]; then
    1105. rmmod vxlan
    1106. lsmod |grep -q vxlan && error "vxlan模块卸载失败"
    1107. modprobe vxlan udp_port=4789
    1108. log "Winodws flannel VXLAN UDP Port using $(cat /sys/module/vxlan/parameters/udp_port)"
    1109. fi
    1110. install_flannel
    1111. install_ws_kubeproxy
    1112. install_ws_flannel_overlay
    1113. }
    1114. install_ws_kubeproxy () {
    1115. cat << 'EOF' | sed "s/VERSION/v${K8S_VER}/g" | kubectl apply -f -
    1116. # https://github.com/kubernetes-sigs/sig-windows-tools/releases/latest/download/kube-proxy.yml
    1117. apiVersion: v1
    1118. data:
    1119. run-script.ps1: |-
    1120. $ErrorActionPreference = "Stop";
    1121. # Get newest cni conf file that is not 0-containerd-nat.json or spin until one shows up.
    1122. # With Docker the kube-proxy pod should not be scheduled to Windows nodes until host networking is configured.
    1123. # With contianerD host networking is required to schedule any pod including the CNI pods so a basic nat network is
    1124. # configured. This network should not be used by kube-proxy.
    1125. function Get-NetConfFile {
    1126. while ($true) {
    1127. if (Test-Path /host/etc/cni/net.d/) {
    1128. $files = @()
    1129. $files += Get-ChildItem -Path /host/etc/cni/net.d/ -Exclude "0-containerd-nat.json"
    1130. if ($files.Length -gt 0) {
    1131. $file = (($files | Sort-Object LastWriteTime | Select-Object -Last 1).Name)
    1132. Write-Host "Using CNI conf file: $file"
    1133. return $file
    1134. }
    1135. }
    1136. Write-Host "Waiting for CNI file..."
    1137. Start-Sleep 10
    1138. }
    1139. }
    1140. mkdir -force /host/var/lib/kube-proxy/var/run/secrets/kubernetes.io/serviceaccount
    1141. mkdir -force /host/k/kube-proxy
    1142. cp -force /k/kube-proxy/* /host/k/kube-proxy
    1143. cp -force /var/lib/kube-proxy/* /host/var/lib/kube-proxy
    1144. cp -force /var/run/secrets/kubernetes.io/serviceaccount/* /host/var/lib/kube-proxy/var/run/secrets/kubernetes.io/serviceaccount #FIXME?
    1145. # If live patching kube-proxy, make sure and patch it inside this container, so that the SHA
    1146. # matches that of what is on the host. i.e. uncomment the below line...
    1147. # wget <download-path-to-kube-proxy.exe> -outfile k/kube-proxy/kube-proxy.exe
    1148. cp -force /k/kube-proxy/* /host/k/kube-proxy
    1149. $cniConfFile = Get-NetConfFile
    1150. $networkName = (Get-Content "/host/etc/cni/net.d/$cniConfFile" | ConvertFrom-Json).name
    1151. $sourceVip = ($env:POD_IP -split "\.")[0..2] + 0 -join "."
    1152. yq w -i /host/var/lib/kube-proxy/config.conf winkernel.sourceVip $sourceVip
    1153. yq w -i /host/var/lib/kube-proxy/config.conf winkernel.networkName $networkName
    1154. yq w -i /host/var/lib/kube-proxy/config.conf featureGates.WinOverlay true
    1155. yq w -i /host/var/lib/kube-proxy/config.conf mode "kernelspace"
    1156. # Start the kube-proxy as a wins process on the host.
    1157. # Note that this will rename kube-proxy.exe to rancher-wins-kube-proxy.exe on the host!
    1158. wins cli process run --path /k/kube-proxy/kube-proxy.exe --args "--v=6 --config=/var/lib/kube-proxy/config.conf --hostname-override=$env:NODE_NAME --feature-gates=WinOverlay=true"
    1159. kind: ConfigMap
    1160. apiVersion: v1
    1161. metadata:
    1162. labels:
    1163. app: kube-proxy
    1164. name: kube-proxy-windows
    1165. namespace: kube-system
    1166. ---
    1167. apiVersion: apps/v1
    1168. kind: DaemonSet
    1169. metadata:
    1170. labels:
    1171. k8s-app: kube-proxy
    1172. name: kube-proxy-windows
    1173. namespace: kube-system
    1174. spec:
    1175. selector:
    1176. matchLabels:
    1177. k8s-app: kube-proxy-windows
    1178. template:
    1179. metadata:
    1180. labels:
    1181. k8s-app: kube-proxy-windows
    1182. spec:
    1183. serviceAccountName: kube-proxy
    1184. containers:
    1185. - command:
    1186. - pwsh
    1187. args:
    1188. - -file
    1189. - /var/lib/kube-proxy-windows/run-script.ps1
    1190. env:
    1191. - name: NODE_NAME
    1192. valueFrom:
    1193. fieldRef:
    1194. apiVersion: v1
    1195. fieldPath: spec.nodeName
    1196. - name: POD_IP
    1197. valueFrom:
    1198. fieldRef:
    1199. fieldPath: status.podIP
    1200. image: sigwindowstools/kube-proxy:VERSION-nanoserver
    1201. name: kube-proxy
    1202. volumeMounts:
    1203. - name: host
    1204. mountPath: /host
    1205. - name: wins
    1206. mountPath: \\.\pipe\rancher_wins
    1207. - mountPath: /var/lib/kube-proxy
    1208. name: kube-proxy
    1209. - mountPath: /var/lib/kube-proxy-windows
    1210. name: kube-proxy-windows
    1211. nodeSelector:
    1212. kubernetes.io/os: windows
    1213. tolerations:
    1214. - key: CriticalAddonsOnly
    1215. operator: Exists
    1216. - operator: Exists
    1217. volumes:
    1218. - configMap:
    1219. defaultMode: 420
    1220. name: kube-proxy-windows
    1221. name: kube-proxy-windows
    1222. - configMap:
    1223. name: kube-proxy
    1224. name: kube-proxy
    1225. - hostPath:
    1226. path: /
    1227. name: host
    1228. - name: wins
    1229. hostPath:
    1230. path: \\.\pipe\rancher_wins
    1231. type: null
    1232. updateStrategy:
    1233. type: RollingUpdate
    1234. EOF
    1235. }
    1236. clean_ws_kubeproxy () {
    1237. kubectl delete -n kube-system daemonset.apps/kube-proxy-windows
    1238. kubectl delete -n kube-system configmap/kube-proxy-windows
    1239. }
    1240. install_ws_flannel_overlay () {
    1241. cat << 'EOF' | kubectl apply -f -
    1242. # https://github.com/kubernetes-sigs/sig-windows-tools/releases/latest/download/flannel-overlay.yml
    1243. ---
    1244. kind: ConfigMap
    1245. apiVersion: v1
    1246. metadata:
    1247. name: kube-flannel-windows-cfg
    1248. namespace: kube-system
    1249. labels:
    1250. tier: node
    1251. app: flannel
    1252. data:
    1253. run.ps1: |
    1254. $ErrorActionPreference = "Stop";
    1255. mkdir -force /host/etc/cni/net.d
    1256. mkdir -force /host/etc/kube-flannel
    1257. mkdir -force /host/opt/cni/bin
    1258. mkdir -force /host/k/flannel
    1259. mkdir -force /host/k/flannel/var/run/secrets/kubernetes.io/serviceaccount
    1260. $containerRuntime = "docker"
    1261. if (Test-Path /host/etc/cni/net.d/0-containerd-nat.json) {
    1262. $containerRuntime = "containerd"
    1263. }
    1264. Write-Host "Configuring CNI for $containerRuntime"
    1265. $serviceSubnet = yq r /etc/kubeadm-config/ClusterConfiguration networking.serviceSubnet
    1266. $podSubnet = yq r /etc/kubeadm-config/ClusterConfiguration networking.podSubnet
    1267. $networkJson = wins cli net get | convertfrom-json
    1268. if ($containerRuntime -eq "docker") {
    1269. $cniJson = get-content /etc/kube-flannel-windows/cni-conf.json | ConvertFrom-Json
    1270. $cniJson.delegate.policies[0].Value.ExceptionList = $serviceSubnet, $podSubnet
    1271. $cniJson.delegate.policies[1].Value.DestinationPrefix = $serviceSubnet
    1272. Set-Content -Path /host/etc/cni/net.d/10-flannel.conf ($cniJson | ConvertTo-Json -depth 100)
    1273. } elseif ($containerRuntime -eq "containerd") {
    1274. $cniJson = get-content /etc/kube-flannel-windows/cni-conf-containerd.json | ConvertFrom-Json
    1275. $cniJson.delegate.AdditionalArgs[0].Value.Settings.Exceptions = $serviceSubnet, $podSubnet
    1276. $cniJson.delegate.AdditionalArgs[1].Value.Settings.DestinationPrefix = $serviceSubnet
    1277. $cniJson.delegate.AdditionalArgs[2].Value.Settings.ProviderAddress = $networkJson.AddressCIDR.Split('/')[0]
    1278. Set-Content -Path /host/etc/cni/net.d/10-flannel.conf ($cniJson | ConvertTo-Json -depth 100)
    1279. }
    1280. cp -force /etc/kube-flannel/net-conf.json /host/etc/kube-flannel
    1281. cp -force -recurse /cni/* /host/opt/cni/bin
    1282. cp -force /k/flannel/* /host/k/flannel/
    1283. cp -force /kube-proxy/kubeconfig.conf /host/k/flannel/kubeconfig.yml
    1284. cp -force /var/run/secrets/kubernetes.io/serviceaccount/* /host/k/flannel/var/run/secrets/kubernetes.io/serviceaccount/
    1285. wins cli process run --path /k/flannel/setup.exe --args "--mode=overlay --interface=Ethernet"
    1286. wins cli route add --addresses 169.254.169.254
    1287. wins cli process run --path /k/flannel/flanneld.exe --args "--kube-subnet-mgr --kubeconfig-file /k/flannel/kubeconfig.yml" --envs "POD_NAME=$env:POD_NAME POD_NAMESPACE=$env:POD_NAMESPACE"
    1288. cni-conf.json: |
    1289. {
    1290. "name": "flannel.4096",
    1291. "cniVersion": "0.3.0",
    1292. "type": "flannel",
    1293. "capabilities": {
    1294. "dns": true
    1295. },
    1296. "delegate": {
    1297. "type": "win-overlay",
    1298. "policies": [
    1299. {
    1300. "Name": "EndpointPolicy",
    1301. "Value": {
    1302. "Type": "OutBoundNAT",
    1303. "ExceptionList": []
    1304. }
    1305. },
    1306. {
    1307. "Name": "EndpointPolicy",
    1308. "Value": {
    1309. "Type": "ROUTE",
    1310. "DestinationPrefix": "",
    1311. "NeedEncap": true
    1312. }
    1313. }
    1314. ]
    1315. }
    1316. }
    1317. cni-conf-containerd.json: |
    1318. {
    1319. "name": "flannel.4096",
    1320. "cniVersion": "0.2.0",
    1321. "type": "flannel",
    1322. "capabilities": {
    1323. "portMappings": true,
    1324. "dns": true
    1325. },
    1326. "delegate": {
    1327. "type": "sdnoverlay",
    1328. "AdditionalArgs": [
    1329. {
    1330. "Name": "EndpointPolicy",
    1331. "Value": {
    1332. "Type": "OutBoundNAT",
    1333. "Settings" : {
    1334. "Exceptions": []
    1335. }
    1336. }
    1337. },
    1338. {
    1339. "Name": "EndpointPolicy",
    1340. "Value": {
    1341. "Type": "SDNROUTE",
    1342. "Settings": {
    1343. "DestinationPrefix": "",
    1344. "NeedEncap": true
    1345. }
    1346. }
    1347. },
    1348. {
    1349. "Name":"EndpointPolicy",
    1350. "Value":{
    1351. "Type":"ProviderAddress",
    1352. "Settings":{
    1353. "ProviderAddress":""
    1354. }
    1355. }
    1356. }
    1357. ]
    1358. }
    1359. }
    1360. ---
    1361. apiVersion: apps/v1
    1362. kind: DaemonSet
    1363. metadata:
    1364. name: kube-flannel-ds-windows-amd64
    1365. labels:
    1366. tier: node
    1367. app: flannel
    1368. namespace: kube-system
    1369. spec:
    1370. selector:
    1371. matchLabels:
    1372. app: flannel
    1373. template:
    1374. metadata:
    1375. labels:
    1376. tier: node
    1377. app: flannel
    1378. spec:
    1379. affinity:
    1380. nodeAffinity:
    1381. requiredDuringSchedulingIgnoredDuringExecution:
    1382. nodeSelectorTerms:
    1383. - matchExpressions:
    1384. - key: kubernetes.io/os
    1385. operator: In
    1386. values:
    1387. - windows
    1388. - key: kubernetes.io/arch
    1389. operator: In
    1390. values:
    1391. - amd64
    1392. hostNetwork: true
    1393. serviceAccountName: flannel
    1394. tolerations:
    1395. - operator: Exists
    1396. effect: NoSchedule
    1397. containers:
    1398. - name: kube-flannel
    1399. image: sigwindowstools/flannel:v0.13.0-nanoserver
    1400. command:
    1401. - pwsh
    1402. args:
    1403. - -file
    1404. - /etc/kube-flannel-windows/run.ps1
    1405. volumeMounts:
    1406. - name: wins
    1407. mountPath: \\.\pipe\rancher_wins
    1408. - name: host
    1409. mountPath: /host
    1410. - name: kube-proxy
    1411. mountPath: /kube-proxy
    1412. - name: cni
    1413. mountPath: /etc/cni/net.d
    1414. - name: flannel-cfg
    1415. mountPath: /etc/kube-flannel/
    1416. - name: flannel-windows-cfg
    1417. mountPath: /etc/kube-flannel-windows/
    1418. - name: kubeadm-config
    1419. mountPath: /etc/kubeadm-config/
    1420. env:
    1421. - name: POD_NAME
    1422. valueFrom:
    1423. fieldRef:
    1424. apiVersion: v1
    1425. fieldPath: metadata.name
    1426. - name: POD_NAMESPACE
    1427. valueFrom:
    1428. fieldRef:
    1429. apiVersion: v1
    1430. fieldPath: metadata.namespace
    1431. volumes:
    1432. - name: opt
    1433. hostPath:
    1434. path: /opt
    1435. - name: host
    1436. hostPath:
    1437. path: /
    1438. - name: cni
    1439. hostPath:
    1440. path: /etc
    1441. - name: flannel-cfg
    1442. configMap:
    1443. name: kube-flannel-cfg
    1444. - name: flannel-windows-cfg
    1445. configMap:
    1446. name: kube-flannel-windows-cfg
    1447. - name: kube-proxy
    1448. configMap:
    1449. name: kube-proxy
    1450. - name: kubeadm-config
    1451. configMap:
    1452. name: kubeadm-config
    1453. - name: wins
    1454. hostPath:
    1455. path: \\.\pipe\rancher_wins
    1456. type: null
    1457. EOF
    1458. }
    1459. clean_ws_flannel_overlay () {
    1460. kubectl delete -n kube-system daemonset.apps/kube-flannel-ds-windows-amd64
    1461. kubectl delete -n kube-system configmap/kube-flannel-windows-cfg
    1462. }
    1463. ### BCS
    1464. _init_bk_ns () {
    1465. kubectl create ns bk-system
    1466. kubectl patch ns bk-system --type=json -p='[{"op": "add", "path": "/metadata/labels", "value": {"bcs-webhook": "false"}}]'
    1467. }
    1468. ### Kubernetes生态工具
    1469. # k8s >= 1.18
    1470. install_ingress-nginx () {
    1471. local NAMESPACE="bk-system"
    1472. kubectl get ns "$NAMESPACE" || _init_bk_ns
    1473. # 如果配置本地helm仓库,需要改动
    1474. # helm repo add localrepo http://192.168.10.42/helmChart
    1475. # helm repo update
    1476. helm repo add mirrors https://hub.bktencent.com/chartrepo/mirrors
    1477. helm repo update
    1478. cat << EOF | helm upgrade --install ingress-nginx mirrors/ingress-nginx -n $NAMESPACE --version 3.36.0 --debug -f - || error "helm upgrade failed"
    1479. controller:
    1480. metrics:
    1481. enabled: true
    1482. image:
    1483. # 如果配置了本地镜像仓库,需要修改自己的路径,比如: registry: ${BK_PUBLIC_REPO}
    1484. registry: ${BK_PUBLIC_REPO}/k8s.gcr.io
    1485. tag: "v0.49.0"
    1486. digest: ""
    1487. config:
    1488. # nginx 与 client 保持的一个长连接能处理的请求数量,默认 100,高并发场景建议调高。
    1489. # 参考: https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/configmap/#keep-alive-requests
    1490. keep-alive-requests: "10000"
    1491. # nginx 与 upstream 保持长连接的最大空闲连接数 (不是最大连接数),默认 32,在高并发下场景下调大,避免频繁建连导致 TIME_WAIT 飙升。
    1492. # 参考: https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/configmap/#upstream-keepalive-connections
    1493. upstream-keepalive-connections: "200"
    1494. # 每个 worker 进程可以打开的最大连接数,默认 16384
    1495. # 参考: https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/configmap/#max-worker-connections
    1496. max-worker-connections: "65536"
    1497. # 上传文件需要
    1498. proxy-body-size: "2G"
    1499. proxy-read-timeout: "600"
    1500. service:
    1501. type: NodePort
    1502. nodePorts:
    1503. http: 32080
    1504. https: 32443
    1505. ingressClassResource:
    1506. enabled: true
    1507. default: true
    1508. admissionWebhooks:
    1509. patch:
    1510. image:
    1511. registry: ${BK_PUBLIC_REPO}
    1512. tag: "v1.5.1"
    1513. digest: ""
    1514. EOF
    1515. kubectl -n $NAMESPACE wait --for=condition=available --timeout=600s deployment --all
    1516. kubectl -n $NAMESPACE get deployments --output name | xargs -I{} kubectl -n $NAMESPACE rollout status --timeout=600s {}
    1517. }
    1518. install_kubeapps () {
    1519. # 配置本地的helm的需要自己改成
    1520. # kubectl create namespace kubeapps
    1521. # helm install kubeapps --namespace kubeapps localrepo/kubeapps
    1522. helm repo add bitnami https://charts.bitnami.com/bitnami
    1523. kubectl create namespace kubeapps
    1524. helm install kubeapps --namespace kubeapps bitnami/kubeapps
    1525. }
    1526. clean_kubeapps () {
    1527. helm uninstall kubeapps --namespace kubeapps
    1528. }
    1529. install_localpv_dir () {
    1530. install -dv /mnt/blueking/vol{01..20} "${BK_HOME}/localpv"/vol{01..20} || error "create dir failed"
    1531. for i in {01..20}; do
    1532. src_dir="${BK_HOME}/localpv/vol$i"
    1533. dst_dir="/mnt/blueking/vol$i"
    1534. if grep -w "$src_dir" /etc/fstab; then
    1535. warning "WARN: /etc/fstab [$src_dir] already exists"
    1536. else
    1537. echo "$src_dir $dst_dir none defaults,bind 0 0" | tee -a /etc/fstab || error "add /etc/fstab failed"
    1538. fi
    1539. done
    1540. # 挂载
    1541. mount -av || error "mount local pv dir failed"
    1542. }
    1543. install_localpv () {
    1544. local NAMESPACE="bk-system"
    1545. kubectl get ns "$NAMESPACE" || _init_bk_ns
    1546. # 配置了本地helm的需要注释以下两行
    1547. helm repo add mirrors https://hub.bktencent.com/chartrepo/mirrors
    1548. helm repo update
    1549. cat << EOF | helm upgrade --install provisioner mirrors/provisioner -n $NAMESPACE --version 2.4.0 --debug -f - || error "helm upgrade failed"
    1550. daemonset:
    1551. image: ${BK_PUBLIC_REPO}/k8s.gcr.io/sig-storage/local-volume-provisioner:v2.4.0
    1552. classes:
    1553. - name: local-storage
    1554. hostDir: /mnt/blueking
    1555. volumeMode: Filesystem
    1556. storageClass:
    1557. # create and set storage class as default
    1558. isDefaultClass: true
    1559. reclaimPolicy: Delete
    1560. EOF
    1561. kubectl -n $NAMESPACE get daemonset --output name | xargs -I{} kubectl -n $NAMESPACE rollout status --timeout=600s {}
    1562. }
    1563. install_metrics-server () {
    1564. cat << EOF | kubectl apply -f - || error "install metrics-server failed"
    1565. apiVersion: v1
    1566. kind: ServiceAccount
    1567. metadata:
    1568. labels:
    1569. k8s-app: metrics-server
    1570. name: metrics-server
    1571. namespace: kube-system
    1572. ---
    1573. apiVersion: rbac.authorization.k8s.io/v1
    1574. kind: ClusterRole
    1575. metadata:
    1576. labels:
    1577. k8s-app: metrics-server
    1578. rbac.authorization.k8s.io/aggregate-to-admin: "true"
    1579. rbac.authorization.k8s.io/aggregate-to-edit: "true"
    1580. rbac.authorization.k8s.io/aggregate-to-view: "true"
    1581. name: system:aggregated-metrics-reader
    1582. rules:
    1583. - apiGroups:
    1584. - metrics.k8s.io
    1585. resources:
    1586. - pods
    1587. - nodes
    1588. verbs:
    1589. - get
    1590. - list
    1591. - watch
    1592. ---
    1593. apiVersion: rbac.authorization.k8s.io/v1
    1594. kind: ClusterRole
    1595. metadata:
    1596. labels:
    1597. k8s-app: metrics-server
    1598. name: system:metrics-server
    1599. rules:
    1600. - apiGroups:
    1601. - ""
    1602. resources:
    1603. - pods
    1604. - nodes
    1605. - nodes/stats
    1606. - namespaces
    1607. - configmaps
    1608. verbs:
    1609. - get
    1610. - list
    1611. - watch
    1612. ---
    1613. apiVersion: rbac.authorization.k8s.io/v1
    1614. kind: RoleBinding
    1615. metadata:
    1616. labels:
    1617. k8s-app: metrics-server
    1618. name: metrics-server-auth-reader
    1619. namespace: kube-system
    1620. roleRef:
    1621. apiGroup: rbac.authorization.k8s.io
    1622. kind: Role
    1623. name: extension-apiserver-authentication-reader
    1624. subjects:
    1625. - kind: ServiceAccount
    1626. name: metrics-server
    1627. namespace: kube-system
    1628. ---
    1629. apiVersion: rbac.authorization.k8s.io/v1
    1630. kind: ClusterRoleBinding
    1631. metadata:
    1632. labels:
    1633. k8s-app: metrics-server
    1634. name: metrics-server:system:auth-delegator
    1635. roleRef:
    1636. apiGroup: rbac.authorization.k8s.io
    1637. kind: ClusterRole
    1638. name: system:auth-delegator
    1639. subjects:
    1640. - kind: ServiceAccount
    1641. name: metrics-server
    1642. namespace: kube-system
    1643. ---
    1644. apiVersion: rbac.authorization.k8s.io/v1
    1645. kind: ClusterRoleBinding
    1646. metadata:
    1647. labels:
    1648. k8s-app: metrics-server
    1649. name: system:metrics-server
    1650. roleRef:
    1651. apiGroup: rbac.authorization.k8s.io
    1652. kind: ClusterRole
    1653. name: system:metrics-server
    1654. subjects:
    1655. - kind: ServiceAccount
    1656. name: metrics-server
    1657. namespace: kube-system
    1658. ---
    1659. apiVersion: v1
    1660. kind: Service
    1661. metadata:
    1662. labels:
    1663. k8s-app: metrics-server
    1664. name: metrics-server
    1665. namespace: kube-system
    1666. spec:
    1667. ports:
    1668. - name: https
    1669. port: 443
    1670. protocol: TCP
    1671. targetPort: https
    1672. selector:
    1673. k8s-app: metrics-server
    1674. ---
    1675. apiVersion: apps/v1
    1676. kind: Deployment
    1677. metadata:
    1678. labels:
    1679. k8s-app: metrics-server
    1680. name: metrics-server
    1681. namespace: kube-system
    1682. spec:
    1683. selector:
    1684. matchLabels:
    1685. k8s-app: metrics-server
    1686. strategy:
    1687. rollingUpdate:
    1688. maxUnavailable: 0
    1689. template:
    1690. metadata:
    1691. labels:
    1692. k8s-app: metrics-server
    1693. spec:
    1694. containers:
    1695. - args:
    1696. - --cert-dir=/tmp
    1697. - --secure-port=4443
    1698. - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
    1699. - --kubelet-use-node-status-port
    1700. - --metric-resolution=15s
    1701. - --kubelet-insecure-tls=true
    1702. image: ${BK_PUBLIC_REPO}/k8s.gcr.io/metrics-server/metrics-server:v0.5.2
    1703. imagePullPolicy: IfNotPresent
    1704. livenessProbe:
    1705. failureThreshold: 3
    1706. httpGet:
    1707. path: /livez
    1708. port: https
    1709. scheme: HTTPS
    1710. periodSeconds: 10
    1711. name: metrics-server
    1712. ports:
    1713. - containerPort: 4443
    1714. name: https
    1715. protocol: TCP
    1716. readinessProbe:
    1717. failureThreshold: 3
    1718. httpGet:
    1719. path: /readyz
    1720. port: https
    1721. scheme: HTTPS
    1722. initialDelaySeconds: 20
    1723. periodSeconds: 10
    1724. resources:
    1725. requests:
    1726. cpu: 100m
    1727. memory: 200Mi
    1728. securityContext:
    1729. readOnlyRootFilesystem: true
    1730. runAsNonRoot: true
    1731. runAsUser: 1000
    1732. volumeMounts:
    1733. - mountPath: /tmp
    1734. name: tmp-dir
    1735. nodeSelector:
    1736. kubernetes.io/os: linux
    1737. priorityClassName: system-cluster-critical
    1738. serviceAccountName: metrics-server
    1739. volumes:
    1740. - emptyDir: {}
    1741. name: tmp-dir
    1742. ---
    1743. apiVersion: apiregistration.k8s.io/v1
    1744. kind: APIService
    1745. metadata:
    1746. labels:
    1747. k8s-app: metrics-server
    1748. name: v1beta1.metrics.k8s.io
    1749. spec:
    1750. group: metrics.k8s.io
    1751. groupPriorityMinimum: 100
    1752. insecureSkipTLSVerify: true
    1753. service:
    1754. name: metrics-server
    1755. namespace: kube-system
    1756. version: v1beta1
    1757. versionPriority: 100
    1758. EOF
    1759. kubectl -n kube-system rollout status --timeout=600s deployment/metrics-server
    1760. }
    1761. clean_metrics-server () {
    1762. kubectl -n kube-system delete apiservice,deployment,service,clusterrolebinding,rolebinding,clusterrole,serviceaccount -l k8s-app=metrics-server || error "uninstall metrics-server failed"
    1763. }
    1764. ## 脚本框架
    1765. check_args () {
    1766. if [[ -n $1 ]]; then
    1767. return 0
    1768. else
    1769. error "缺少参数值"
    1770. usage_and_exit 1
    1771. fi
    1772. }
    1773. check_func () {
    1774. local OP_TYPE="$1"
    1775. local PROJECT="$2"
    1776. if [[ -n ${OP_TYPE} ]] && [[ -n ${PROJECT} ]]; then
    1777. type "${OP_TYPE}_${PROJECT}" &>/dev/null || error "${OP_TYPE} [$PROJECT] NOT SUPPORT"
    1778. else
    1779. return 0
    1780. fi
    1781. }
    1782. # 解析命令行参数,长短混合模式
    1783. (( $# == 0 )) && usage_and_exit 1
    1784. while (( $# > 0 )); do
    1785. case "$1" in
    1786. --install | -i | install )
    1787. shift
    1788. PROJECT="$1"
    1789. OP_TYPE="install"
    1790. ;;
    1791. --get | get )
    1792. shift
    1793. PROJECT="$1"
    1794. OP_TYPE="get"
    1795. ;;
    1796. --clean | -c | clean )
    1797. shift
    1798. PROJECT="$1"
    1799. OP_TYPE="clean"
    1800. ;;
    1801. --render | -r | render )
    1802. shift
    1803. PROJECT="$1"
    1804. OP_TYPE="render"
    1805. ;;
    1806. --help | -h | '-?' | help )
    1807. usage_and_exit 0
    1808. ;;
    1809. --version | -v | -V | version )
    1810. version
    1811. exit 0
    1812. ;;
    1813. -*)
    1814. error "不可识别的参数: $1"
    1815. ;;
    1816. *)
    1817. break
    1818. ;;
    1819. esac
    1820. shift
    1821. done
    1822. check_func "${OP_TYPE}" "${PROJECT}"
    1823. [[ ${PROJECT} == "bcsenv" ]] || _on_cloud
    1824. case "${OP_TYPE}" in
    1825. install)
    1826. highlight "INSTALL: ${PROJECT}"
    1827. "install_${PROJECT}"
    1828. ok_bcs
    1829. ;;
    1830. get)
    1831. highlight "Get: ${PROJECT}"
    1832. "get_${PROJECT}"
    1833. ok_bcs
    1834. ;;
    1835. clean)
    1836. highlight "CLEAN: ${PROJECT}"
    1837. "clean_${PROJECT}"
    1838. bye_bcs
    1839. ;;
    1840. render)
    1841. highlight "RENDER CONFIG TEMPLATE: ${PROJECT}"
    1842. "render_${PROJECT}"
    1843. ok_bcs
    1844. ;;
    1845. -*)
    1846. error "不可识别的参数: $1"
    1847. ;;
    1848. *)
    1849. usage_and_exit 0
    1850. esac

     给文件设置权限

    chmod 777 bcs.sh 

    在第一台master 机器执行命令

    ./bcs.sh  -i k8s

    执行成功获得以下信息 

     执行成功后,复制以下命令

    1. Kubernetes控制节点启动成功
    2. 扩容控制平面执行以下命令
    3. cluster_env=T05fQ0xPVUQ9ImJhcmUtbWV0YWwiCkJDU19ESVI9Ii9yb290Ly5iY3MiCkJLX0hPTUU9Ii9kYXRhL2JjcyIKYmNzX3N5c2N0bD0iMSIKTUlSUk9SX0lQPSJudWxsIgpNSVJST1JfVVJMPSJodHRwczovL21pcnJvcnMudGVuY2VudC5jb20iCkJLUkVQT19VUkw9Im51bGwiCkRPQ0tFUl9MSUI9Ii9kYXRhL2Jjcy9saWIvZG9ja2VyIgpET0NLRVJfVkVSU0lPTj0iMTkuMDMuOSIKRE9DS0VSX0xJVkVfUkVTVE9SRT0iZmFsc2UiClJFUE9fTUlSUk9SUz0nWyJodHRwczovL21pcnJvci5jY3MudGVuY2VudHl1bi5jb20iXScKRE9DS0VSX0JSSURHRT0ibnVsbCIKQktfUFVCTElDX1JFUE89Imh1Yi5ia3RlbmNlbnQuY29tIgpCS19SRUxFQVNFX1JFUE89Imh1Yi5ia3RlbmNlbnQuY29tL2JsdWVraW5nIgpLVUJFTEVUX0xJQj0iL2RhdGEvYmNzL2xpYi9rdWJlbGV0IgpLOFNfVkVSPSIxLjIwLjExIgpLOFNfU1ZDX0NJRFI9IjEwLjk2LjAuMC8xMiIKSzhTX1BPRF9DSURSPSIxMC4yNDQuMC4wLzE2IgpLOFNfRVhUUkFfQVJHUz0iYWxsb3dlZC11bnNhZmUtc3lzY3RsczogbmV0LmlwdjQudGNwX3R3X3JldXNlIgpFVENEX0xJQj0iL2RhdGEvYmNzL2xpYi9ldGNkIgpCQ1NfSzhTX0NUUkxfSVA9IjE5Mi4xNjguNDYuMTI4Igo=
    4. master_join_cmd_b64=a3ViZWFkbSBqb2luIGs4cy1hcGkuYmNzLmxvY2FsOjY0NDMgLS10b2tlbiA5dXgzYWkuZW00ZmlxMGV4cWh2N2l6YiAgICAgLS1kaXNjb3ZlcnktdG9rZW4tY2EtY2VydC1oYXNoIHNoYTI1Njo0MjIzNWUyMzFmYzk3YzJiNWNkYTNiOWUxYTE2OTcxYzYyYzkwOGFmOTdhZTExOWE3ZDNjNzdhZDhlNTdjMjk1ICAtLWNvbnRyb2wtcGxhbmUgLS1jZXJ0aWZpY2F0ZS1rZXkgZjk3NjM3MDY0OGMwYzA5ZmQ0ZDk0OWMwYjEyOGNiYTdmNzMwOTVhN2YxZTJmM2QwNGM3MWYxNDIwNjdlMjQxMA==
    5. set +a
    6. curl -fsSL https://bkopen-1252002024.file.myqcloud.com/ce7/bcs.sh | bash -s -- install k8s-control-plane
    7. 扩容节点执行以下命令
    8. set -a
    9. cluster_env=T05fQ0xPVUQ9ImJhcmUtbWV0YWwiCkJDU19ESVI9Ii9yb290Ly5iY3MiCkJLX0hPTUU9Ii9kYXRhL2JjcyIKYmNzX3N5c2N0bD0iMSIKTUlSUk9SX0lQPSJudWxsIgpNSVJST1JfVVJMPSJodHRwczovL21pcnJvcnMudGVuY2VudC5jb20iCkJLUkVQT19VUkw9Im51bGwiCkRPQ0tFUl9MSUI9Ii9kYXRhL2Jjcy9saWIvZG9ja2VyIgpET0NLRVJfVkVSU0lPTj0iMTkuMDMuOSIKRE9DS0VSX0xJVkVfUkVTVE9SRT0iZmFsc2UiClJFUE9fTUlSUk9SUz0nWyJodHRwczovL21pcnJvci5jY3MudGVuY2VudHl1bi5jb20iXScKRE9DS0VSX0JSSURHRT0ibnVsbCIKQktfUFVCTElDX1JFUE89Imh1Yi5ia3RlbmNlbnQuY29tIgpCS19SRUxFQVNFX1JFUE89Imh1Yi5ia3RlbmNlbnQuY29tL2JsdWVraW5nIgpLVUJFTEVUX0xJQj0iL2RhdGEvYmNzL2xpYi9rdWJlbGV0IgpLOFNfVkVSPSIxLjIwLjExIgpLOFNfU1ZDX0NJRFI9IjEwLjk2LjAuMC8xMiIKSzhTX1BPRF9DSURSPSIxMC4yNDQuMC4wLzE2IgpLOFNfRVhUUkFfQVJHUz0iYWxsb3dlZC11bnNhZmUtc3lzY3RsczogbmV0LmlwdjQudGNwX3R3X3JldXNlIgpFVENEX0xJQj0iL2RhdGEvYmNzL2xpYi9ldGNkIgpCQ1NfSzhTX0NUUkxfSVA9IjE5Mi4xNjguNDYuMTI4Igo=
    10. join_cmd_b64=a3ViZWFkbSBqb2luIGs4cy1hcGkuYmNzLmxvY2FsOjY0NDMgLS10b2tlbiA5dXgzYWkuZW00ZmlxMGV4cWh2N2l6YiAgICAgLS1kaXNjb3ZlcnktdG9rZW4tY2EtY2VydC1oYXNoIHNoYTI1Njo0MjIzNWUyMzFmYzk3YzJiNWNkYTNiOWUxYTE2OTcxYzYyYzkwOGFmOTdhZTExOWE3ZDNjNzdhZDhlNTdjMjk1IA==
    11. set +a
    12. curl -fsSL https://bkopen-1252002024.file.myqcloud.com/ce7/bcs.sh | bash -s -- install k8s-node

    扩容master

    将bcs.sh上传到扩容的maser节点后 ,在另外要扩容的master节点执行:

    1. Kubernetes控制节点启动成功
    2. 扩容控制平面执行以下命令
    3. cluster_env=T05fQ0xPVUQ9ImJhcmUtbWV0YWwiCkJDU19ESVI9Ii9yb290Ly5iY3MiCkJLX0hPTUU9Ii9kYXRhL2JjcyIKYmNzX3N5c2N0bD0iMSIKTUlSUk9SX0lQPSJudWxsIgpNSVJST1JfVVJMPSJodHRwczovL21pcnJvcnMudGVuY2VudC5jb20iCkJLUkVQT19VUkw9Im51bGwiCkRPQ0tFUl9MSUI9Ii9kYXRhL2Jjcy9saWIvZG9ja2VyIgpET0NLRVJfVkVSU0lPTj0iMTkuMDMuOSIKRE9DS0VSX0xJVkVfUkVTVE9SRT0iZmFsc2UiClJFUE9fTUlSUk9SUz0nWyJodHRwczovL21pcnJvci5jY3MudGVuY2VudHl1bi5jb20iXScKRE9DS0VSX0JSSURHRT0ibnVsbCIKQktfUFVCTElDX1JFUE89Imh1Yi5ia3RlbmNlbnQuY29tIgpCS19SRUxFQVNFX1JFUE89Imh1Yi5ia3RlbmNlbnQuY29tL2JsdWVraW5nIgpLVUJFTEVUX0xJQj0iL2RhdGEvYmNzL2xpYi9rdWJlbGV0IgpLOFNfVkVSPSIxLjIwLjExIgpLOFNfU1ZDX0NJRFI9IjEwLjk2LjAuMC8xMiIKSzhTX1BPRF9DSURSPSIxMC4yNDQuMC4wLzE2IgpLOFNfRVhUUkFfQVJHUz0iYWxsb3dlZC11bnNhZmUtc3lzY3RsczogbmV0LmlwdjQudGNwX3R3X3JldXNlIgpFVENEX0xJQj0iL2RhdGEvYmNzL2xpYi9ldGNkIgpCQ1NfSzhTX0NUUkxfSVA9IjE5Mi4xNjguNDYuMTI4Igo=
    4. master_join_cmd_b64=a3ViZWFkbSBqb2luIGs4cy1hcGkuYmNzLmxvY2FsOjY0NDMgLS10b2tlbiA5dXgzYWkuZW00ZmlxMGV4cWh2N2l6YiAgICAgLS1kaXNjb3ZlcnktdG9rZW4tY2EtY2VydC1oYXNoIHNoYTI1Njo0MjIzNWUyMzFmYzk3YzJiNWNkYTNiOWUxYTE2OTcxYzYyYzkwOGFmOTdhZTExOWE3ZDNjNzdhZDhlNTdjMjk1ICAtLWNvbnRyb2wtcGxhbmUgLS1jZXJ0aWZpY2F0ZS1rZXkgZjk3NjM3MDY0OGMwYzA5ZmQ0ZDk0OWMwYjEyOGNiYTdmNzMwOTVhN2YxZTJmM2QwNGM3MWYxNDIwNjdlMjQxMA==
    5. set +a

    再执行

    1. curl -fsSL https://bkopen-1252002024.file.myqcloud.com/ce7/bcs.sh | bash -s -- install k8s-control-plane
    2. 或者
    3. ./bcs.sh -i k8s-control-plane

    扩容node节点

    将bcs.sh上传到扩容的node节点后,在要扩容的node节点执行:

    1. 扩容节点执行以下命令
    2. set -a
    3. cluster_env=T05fQ0xPVUQ9ImJhcmUtbWV0YWwiCkJDU19ESVI9Ii9yb290Ly5iY3MiCkJLX0hPTUU9Ii9kYXRhL2JjcyIKYmNzX3N5c2N0bD0iMSIKTUlSUk9SX0lQPSJudWxsIgpNSVJST1JfVVJMPSJodHRwczovL21pcnJvcnMudGVuY2VudC5jb20iCkJLUkVQT19VUkw9Im51bGwiCkRPQ0tFUl9MSUI9Ii9kYXRhL2Jjcy9saWIvZG9ja2VyIgpET0NLRVJfVkVSU0lPTj0iMTkuMDMuOSIKRE9DS0VSX0xJVkVfUkVTVE9SRT0iZmFsc2UiClJFUE9fTUlSUk9SUz0nWyJodHRwczovL21pcnJvci5jY3MudGVuY2VudHl1bi5jb20iXScKRE9DS0VSX0JSSURHRT0ibnVsbCIKQktfUFVCTElDX1JFUE89Imh1Yi5ia3RlbmNlbnQuY29tIgpCS19SRUxFQVNFX1JFUE89Imh1Yi5ia3RlbmNlbnQuY29tL2JsdWVraW5nIgpLVUJFTEVUX0xJQj0iL2RhdGEvYmNzL2xpYi9rdWJlbGV0IgpLOFNfVkVSPSIxLjIwLjExIgpLOFNfU1ZDX0NJRFI9IjEwLjk2LjAuMC8xMiIKSzhTX1BPRF9DSURSPSIxMC4yNDQuMC4wLzE2IgpLOFNfRVhUUkFfQVJHUz0iYWxsb3dlZC11bnNhZmUtc3lzY3RsczogbmV0LmlwdjQudGNwX3R3X3JldXNlIgpFVENEX0xJQj0iL2RhdGEvYmNzL2xpYi9ldGNkIgpCQ1NfSzhTX0NUUkxfSVA9IjE5Mi4xNjguNDYuMTI4Igo=
    4. join_cmd_b64=a3ViZWFkbSBqb2luIGs4cy1hcGkuYmNzLmxvY2FsOjY0NDMgLS10b2tlbiA5dXgzYWkuZW00ZmlxMGV4cWh2N2l6YiAgICAgLS1kaXNjb3ZlcnktdG9rZW4tY2EtY2VydC1oYXNoIHNoYTI1Njo0MjIzNWUyMzFmYzk3YzJiNWNkYTNiOWUxYTE2OTcxYzYyYzkwOGFmOTdhZTExOWE3ZDNjNzdhZDhlNTdjMjk1IA==
    5. set +a

    在执行

    1. curl -fsSL https://bkopen-1252002024.file.myqcloud.com/ce7/bcs.sh | bash -s -- install k8s-node
    2. 或者
    3. ./bcs.sh -i k8s-node

    查看集群

     使用命令

    1. kubectl get node
    2. 或者
    3. kubectl get node -o wide

    会出现的问题

    出先The connection to the server localhost:8080 was refused - did you specify the right host or port?的错误信息。

    #从主master节点复制admin.conf到有问题的机器

    scp /etc/kubernetes/admin.conf 192.168.10.42:/etc/kubernetes/admin.conf

    输入密码

    传过去后,在有问题机器输入

    1. echo "export KUBECONFIG=/etc/kubernetes/admin.conf" >> ~/.bash_profile
    2. source ~/.bash_profile

  • 相关阅读:
    flutter布局中的一些细节
    springboot中实现权限认证的两个框架
    各位同志,Android studio打不开,提示什么动态链接库
    怎么禁止用户点击f12查看数据,查看网页代码
    ESP8266-Arduino编程实例-L3G4200D三轴陀螺仪驱动
    阿里旗下语雀宕机近10小时 企业关键业务如何避免被“云”拖累
    基于单片机的指纹密码锁系统
    【华为机试 Python实现】图的存储结构_图的遍历_最短路径问题
    CMU 15-213 CSAPP. Ch11. Dynamic Memory Allocation
    MBA-day26 数的概念与性质
  • 原文地址:https://blog.csdn.net/qq_63431773/article/details/132733207