• k8s从入门到精通


    目录

    k8s介绍

    一、kubernetes 部署

    1.0 sealos一键部署

    2.0二进制安装包下载

    二、harbor仓库搭建

    三、k8s流量入口Ingress

    3.1 helm3安装 ingress-nginx(k8s版本要求1.20以上)

    四、k8sHPA 自动水平伸缩pod

    五、k8s存储

    5.1 k8s持久化存储02pv pvc

    5.2 StorageClass

    5.ubuntu20.04系统

    六、k8s架构师课程之有状态服务StatefulSet

    七、k8s一次性和定时任务

    八、k8sRBAC角色访问控制

    九、k8s业务日志收集上节介绍、下节实战

    十、k8s的Prometheus监控实战

    十一、k8s安装kuboard图形化界面

    11.离线安装下载:

    十二、 k8s架构师课程基于gitlab的CICD自动化

    十三、k8s安装kubesphere3.3


    k8s介绍

    k8s架构图

    02ffa04351fb4123890e5732826a5e83.png

    一、kubernetes 部署

    1.0 sealos一键部署

    sealos 离线安装k8s-CSDN博客

    1.环境规划

    1. 系统版本 CentOS 7.9.2009
    2. 服务器配置2核4g
    3. k8s版本1.20.2
    4. docker版本docker-ce-19.03.14 docker-ce-cli-19.03.14
    5. IP规划-3主2子节点
    6. 20.6.100.220 k8s-m01
    7. 20.6.100.221 k8s-m02
    8. 20.6.100.222 k8s-m03
    9. 20.6.100.223 k8s-node01
    10. 20.6.100.224 k8s-node02
    11. 20.6.100.225 k8s-ck

    .系统配置yum

    1. #添加访问互联路由
    2. cat > /etc/resolv.conf <<EOF
    3. nameserver 8.8.8.8
    4. nameserver 114.114.114.114
    5. EOF
    6. #设置为阿里云yum源
    7. mkdir -p /etc/yum.repos.d/bak && mv /etc/yum.repos.d/CentOS* /etc/yum.repos.d/bak
    8. curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
    9. yum clean all
    10. yum -y install sshpass wget conntrack ntpdate ntp ipvsadm ipset jq iptables curl sysstat libseccomp wget vim net-tools git lrzsz unzip gcc telnet

    2.0二进制安装包下载

    1. 链接:https://pan.baidu.com/s/1OBT9pxcZiuHx0hLxS2Fd1g?pwd=fhbg
    2. 提取码:fhbg

    2.1安装脚本来着-博哥爱运维

    cat k8s_install_new .sh

    1. #!/bin/bash
    2. # auther: boge
    3. # descriptions: the shell scripts will use ansible to deploy K8S at binary for siample
    4. # 传参检测
    5. [ $# -ne 6 ] && echo -e "Usage: $0 rootpasswd netnum nethosts cri cni k8s-cluster-name\nExample: bash $0 newpasswd 20.6.100 220\ 221\ 222\ 223\ 224 [containerd|docker] [calico|flannel] test\n" && exit 11
    6. # 变量定义
    7. export release=3.0.0
    8. export k8s_ver=v1.20.2 # v1.20.2, v1.19.7, v1.18.15, v1.17.17
    9. rootpasswd=$1
    10. netnum=$2
    11. nethosts=$3
    12. cri=$4
    13. cni=$5
    14. clustername=$6
    15. if ls -1v ./kubeasz*.tar.gz &>/dev/null;then software_packet="$(ls -1v ./kubeasz*.tar.gz )";else software_packet="";fi
    16. pwd="/etc/kubeasz"
    17. # deploy机器升级软件库
    18. if cat /etc/redhat-release &>/dev/null;then
    19. yum update -y
    20. else
    21. apt-get update && apt-get upgrade -y && apt-get dist-upgrade -y
    22. [ $? -ne 0 ] && apt-get -yf install
    23. fi
    24. # deploy机器检测python环境
    25. python2 -V &>/dev/null
    26. if [ $? -ne 0 ];then
    27. if cat /etc/redhat-release &>/dev/null;then
    28. yum install gcc openssl-devel bzip2-devel wget -y
    29. wget https://www.python.org/ftp/python/2.7.16/Python-2.7.16.tgz
    30. tar xzf Python-2.7.16.tgz
    31. cd Python-2.7.16
    32. ./configure --enable-optimizations
    33. make altinstall
    34. ln -s /usr/bin/python2.7 /usr/bin/python
    35. cd -
    36. else
    37. apt-get install -y python2.7 && ln -s /usr/bin/python2.7 /usr/bin/python
    38. fi
    39. fi
    40. # deploy机器设置pip安装加速源
    41. if [[ $clustername != 'aws' ]]; then
    42. mkdir ~/.pip
    43. cat > ~/.pip/pip.conf <<CB
    44. [global]
    45. index-url = https://mirrors.aliyun.com/pypi/simple
    46. [install]
    47. trusted-host=mirrors.aliyun.com
    48. CB
    49. fi
    50. # deploy机器安装相应软件包
    51. if cat /etc/redhat-release &>/dev/null;then
    52. yum install git python-pip sshpass wget -y
    53. [ -f ./get-pip.py ] && python ./get-pip.py || {
    54. wget https://bootstrap.pypa.io/pip/2.7/get-pip.py && python get-pip.py
    55. }
    56. else
    57. apt-get install git python-pip sshpass -y
    58. [ -f ./get-pip.py ] && python ./get-pip.py || {
    59. wget https://bootstrap.pypa.io/pip/2.7/get-pip.py && python get-pip.py
    60. }
    61. fi
    62. python -m pip install --upgrade "pip < 21.0"
    63. pip -V
    64. pip install --no-cache-dir ansible netaddr
    65. # 在deploy机器做其他node的ssh免密操作
    66. for host in `echo "${nethosts}"`
    67. do
    68. echo "============ ${netnum}.${host} ===========";
    69. if [[ ${USER} == 'root' ]];then
    70. [ ! -f /${USER}/.ssh/id_rsa ] &&\
    71. ssh-keygen -t rsa -P '' -f /${USER}/.ssh/id_rsa
    72. else
    73. [ ! -f /home/${USER}/.ssh/id_rsa ] &&\
    74. ssh-keygen -t rsa -P '' -f /home/${USER}/.ssh/id_rsa
    75. fi
    76. sshpass -p ${rootpasswd} ssh-copy-id -o StrictHostKeyChecking=no ${USER}@${netnum}.${host}
    77. if cat /etc/redhat-release &>/dev/null;then
    78. ssh -o StrictHostKeyChecking=no ${USER}@${netnum}.${host} "yum update -y"
    79. else
    80. ssh -o StrictHostKeyChecking=no ${USER}@${netnum}.${host} "apt-get update && apt-get upgrade -y && apt-get dist-upgrade -y"
    81. [ $? -ne 0 ] && ssh -o StrictHostKeyChecking=no ${USER}@${netnum}.${host} "apt-get -yf install"
    82. fi
    83. done
    84. # deploy机器下载k8s二进制安装脚本
    85. if [[ ${software_packet} == '' ]];then
    86. curl -C- -fLO --retry 3 https://github.com/easzlab/kubeasz/releases/download/${release}/ezdown
    87. sed -ri "s+^(K8S_BIN_VER=).*$+\1${k8s_ver}+g" ezdown
    88. chmod +x ./ezdown
    89. # 使用工具脚本下载
    90. ./ezdown -D && ./ezdown -P
    91. else
    92. tar xvf ${software_packet} -C /etc/
    93. chmod +x ${pwd}/{ezctl,ezdown}
    94. fi
    95. # 初始化一个名为my的k8s集群配置
    96. CLUSTER_NAME="$clustername"
    97. ${pwd}/ezctl new ${CLUSTER_NAME}
    98. if [[ $? -ne 0 ]];then
    99. echo "cluster name [${CLUSTER_NAME}] was exist in ${pwd}/clusters/${CLUSTER_NAME}."
    100. exit 1
    101. fi
    102. if [[ ${software_packet} != '' ]];then
    103. # 设置参数,启用离线安装
    104. sed -i 's/^INSTALL_SOURCE.*$/INSTALL_SOURCE: "offline"/g' ${pwd}/clusters/${CLUSTER_NAME}/config.yml
    105. fi
    106. # to check ansible service
    107. ansible all -m ping
    108. #---------------------------------------------------------------------------------------------------
    109. #修改二进制安装脚本配置 config.yml
    110. sed -ri "s+^(CLUSTER_NAME:).*$+\1 \"${CLUSTER_NAME}\"+g" ${pwd}/clusters/${CLUSTER_NAME}/config.yml
    111. ## k8s上日志及容器数据存独立磁盘步骤(参考阿里云的)
    112. [ ! -d /var/lib/container ] && mkdir -p /var/lib/container/{kubelet,docker}
    113. ## cat /etc/fstab
    114. # UUID=105fa8ff-bacd-491f-a6d0-f99865afc3d6 / ext4 defaults 1 1
    115. # /dev/vdb /var/lib/container/ ext4 defaults 0 0
    116. # /var/lib/container/kubelet /var/lib/kubelet none defaults,bind 0 0
    117. # /var/lib/container/docker /var/lib/docker none defaults,bind 0 0
    118. ## tree -L 1 /var/lib/container
    119. # /var/lib/container
    120. # ├── docker
    121. # ├── kubelet
    122. # └── lost+found
    123. # docker data dir
    124. DOCKER_STORAGE_DIR="/var/lib/container/docker"
    125. sed -ri "s+^(STORAGE_DIR:).*$+STORAGE_DIR: \"${DOCKER_STORAGE_DIR}\"+g" ${pwd}/clusters/${CLUSTER_NAME}/config.yml
    126. # containerd data dir
    127. CONTAINERD_STORAGE_DIR="/var/lib/container/containerd"
    128. sed -ri "s+^(STORAGE_DIR:).*$+STORAGE_DIR: \"${CONTAINERD_STORAGE_DIR}\"+g" ${pwd}/clusters/${CLUSTER_NAME}/config.yml
    129. # kubelet logs dir
    130. KUBELET_ROOT_DIR="/var/lib/container/kubelet"
    131. sed -ri "s+^(KUBELET_ROOT_DIR:).*$+KUBELET_ROOT_DIR: \"${KUBELET_ROOT_DIR}\"+g" ${pwd}/clusters/${CLUSTER_NAME}/config.yml
    132. if [[ $clustername != 'aws' ]]; then
    133. # docker aliyun repo
    134. REG_MIRRORS="https://pqbap4ya.mirror.aliyuncs.com"
    135. sed -ri "s+^REG_MIRRORS:.*$+REG_MIRRORS: \'[\"${REG_MIRRORS}\"]\'+g" ${pwd}/clusters/${CLUSTER_NAME}/config.yml
    136. fi
    137. # [docker]信任的HTTP仓库
    138. sed -ri "s+127.0.0.1/8+${netnum}.0/24+g" ${pwd}/clusters/${CLUSTER_NAME}/config.yml
    139. # disable dashboard auto install
    140. sed -ri "s+^(dashboard_install:).*$+\1 \"no\"+g" ${pwd}/clusters/${CLUSTER_NAME}/config.yml
    141. # 融合配置准备
    142. CLUSEER_WEBSITE="${CLUSTER_NAME}k8s.gtapp.xyz"
    143. lb_num=$(grep -wn '^MASTER_CERT_HOSTS:' ${pwd}/clusters/${CLUSTER_NAME}/config.yml |awk -F: '{print $1}')
    144. lb_num1=$(expr ${lb_num} + 1)
    145. lb_num2=$(expr ${lb_num} + 2)
    146. sed -ri "${lb_num1}s+.*$+ - "${CLUSEER_WEBSITE}"+g" ${pwd}/clusters/${CLUSTER_NAME}/config.yml
    147. sed -ri "${lb_num2}s+(.*)$+#\1+g" ${pwd}/clusters/${CLUSTER_NAME}/config.yml
    148. # node节点最大pod 数
    149. MAX_PODS="120"
    150. sed -ri "s+^(MAX_PODS:).*$+\1 ${MAX_PODS}+g" ${pwd}/clusters/${CLUSTER_NAME}/config.yml
    151. # 修改二进制安装脚本配置 hosts
    152. # clean old ip
    153. sed -ri '/192.168.1.1/d' ${pwd}/clusters/${CLUSTER_NAME}/hosts
    154. sed -ri '/192.168.1.2/d' ${pwd}/clusters/${CLUSTER_NAME}/hosts
    155. sed -ri '/192.168.1.3/d' ${pwd}/clusters/${CLUSTER_NAME}/hosts
    156. sed -ri '/192.168.1.4/d' ${pwd}/clusters/${CLUSTER_NAME}/hosts
    157. # 输入准备创建ETCD集群的主机位
    158. echo "enter etcd hosts here (example: 222 221 220) ↓"
    159. read -p "" ipnums
    160. for ipnum in `echo ${ipnums}`
    161. do
    162. echo $netnum.$ipnum
    163. sed -i "/\[etcd/a $netnum.$ipnum" ${pwd}/clusters/${CLUSTER_NAME}/hosts
    164. done
    165. # 输入准备创建KUBE-MASTER集群的主机位
    166. echo "enter kube-master hosts here (example: 222 221 220) ↓"
    167. read -p "" ipnums
    168. for ipnum in `echo ${ipnums}`
    169. do
    170. echo $netnum.$ipnum
    171. sed -i "/\[kube_master/a $netnum.$ipnum" ${pwd}/clusters/${CLUSTER_NAME}/hosts
    172. done
    173. # 输入准备创建KUBE-NODE集群的主机位
    174. echo "enter kube-node hosts here (example: 224 223) ↓"
    175. read -p "" ipnums
    176. for ipnum in `echo ${ipnums}`
    177. do
    178. echo $netnum.$ipnum
    179. sed -i "/\[kube_node/a $netnum.$ipnum" ${pwd}/clusters/${CLUSTER_NAME}/hosts
    180. done
    181. # 配置容器运行时CNI
    182. case ${cni} in
    183. flannel)
    184. sed -ri "s+^CLUSTER_NETWORK=.*$+CLUSTER_NETWORK=\"${cni}\"+g" ${pwd}/clusters/${CLUSTER_NAME}/hosts
    185. ;;
    186. calico)
    187. sed -ri "s+^CLUSTER_NETWORK=.*$+CLUSTER_NETWORK=\"${cni}\"+g" ${pwd}/clusters/${CLUSTER_NAME}/hosts
    188. ;;
    189. *)
    190. echo "cni need be flannel or calico."
    191. exit 11
    192. esac
    193. # 配置K8S的ETCD数据备份的定时任务
    194. if cat /etc/redhat-release &>/dev/null;then
    195. if ! grep -w '94.backup.yml' /var/spool/cron/root &>/dev/null;then echo "00 00 * * * `which ansible-playbook` ${pwd}/playbooks/94.backup.yml &> /dev/null" >> /var/spool/cron/root;else echo exists ;fi
    196. chown root.crontab /var/spool/cron/root
    197. chmod 600 /var/spool/cron/root
    198. else
    199. if ! grep -w '94.backup.yml' /var/spool/cron/crontabs/root &>/dev/null;then echo "00 00 * * * `which ansible-playbook` ${pwd}/playbooks/94.backup.yml &> /dev/null" >> /var/spool/cron/crontabs/root;else echo exists ;fi
    200. chown root.crontab /var/spool/cron/crontabs/root
    201. chmod 600 /var/spool/cron/crontabs/root
    202. fi
    203. rm /var/run/cron.reboot
    204. service crond restart
    205. #---------------------------------------------------------------------------------------------------
    206. # 准备开始安装了
    207. rm -rf ${pwd}/{dockerfiles,docs,.gitignore,pics,dockerfiles} &&\
    208. find ${pwd}/ -name '*.md'|xargs rm -f
    209. read -p "Enter to continue deploy k8s to all nodes >>>" YesNobbb
    210. # now start deploy k8s cluster
    211. cd ${pwd}/
    212. # to prepare CA/certs & kubeconfig & other system settings
    213. ${pwd}/ezctl setup ${CLUSTER_NAME} 01
    214. sleep 1
    215. # to setup the etcd cluster
    216. ${pwd}/ezctl setup ${CLUSTER_NAME} 02
    217. sleep 1
    218. # to setup the container runtime(docker or containerd)
    219. case ${cri} in
    220. containerd)
    221. sed -ri "s+^CONTAINER_RUNTIME=.*$+CONTAINER_RUNTIME=\"${cri}\"+g" ${pwd}/clusters/${CLUSTER_NAME}/hosts
    222. ${pwd}/ezctl setup ${CLUSTER_NAME} 03
    223. ;;
    224. docker)
    225. sed -ri "s+^CONTAINER_RUNTIME=.*$+CONTAINER_RUNTIME=\"${cri}\"+g" ${pwd}/clusters/${CLUSTER_NAME}/hosts
    226. ${pwd}/ezctl setup ${CLUSTER_NAME} 03
    227. ;;
    228. *)
    229. echo "cri need be containerd or docker."
    230. exit 11
    231. esac
    232. sleep 1
    233. # to setup the master nodes
    234. ${pwd}/ezctl setup ${CLUSTER_NAME} 04
    235. sleep 1
    236. # to setup the worker nodes
    237. ${pwd}/ezctl setup ${CLUSTER_NAME} 05
    238. sleep 1
    239. # to setup the network plugin(flannel、calico...)
    240. ${pwd}/ezctl setup ${CLUSTER_NAME} 06
    241. sleep 1
    242. # to setup other useful plugins(metrics-server、coredns...)
    243. ${pwd}/ezctl setup ${CLUSTER_NAME} 07
    244. sleep 1
    245. # [可选]对集群所有节点进行操作系统层面的安全加固 https://github.com/dev-sec/ansible-os-hardening
    246. #ansible-playbook roles/os-harden/os-harden.yml
    247. #sleep 1
    248. cd `dirname ${software_packet:-/tmp}`
    249. k8s_bin_path='/opt/kube/bin'
    250. echo "------------------------- k8s version list ---------------------------"
    251. ${k8s_bin_path}/kubectl version
    252. echo
    253. echo "------------------------- All Healthy status check -------------------"
    254. ${k8s_bin_path}/kubectl get componentstatus
    255. echo
    256. echo "------------------------- k8s cluster info list ----------------------"
    257. ${k8s_bin_path}/kubectl cluster-info
    258. echo
    259. echo "------------------------- k8s all nodes list -------------------------"
    260. ${k8s_bin_path}/kubectl get node -o wide
    261. echo
    262. echo "------------------------- k8s all-namespaces's pods list ------------"
    263. ${k8s_bin_path}/kubectl get pod --all-namespaces
    264. echo
    265. echo "------------------------- k8s all-namespaces's service network ------"
    266. ${k8s_bin_path}/kubectl get svc --all-namespaces
    267. echo
    268. echo "------------------------- k8s welcome for you -----------------------"
    269. echo
    270. # you can use k alias kubectl to siample
    271. echo "alias k=kubectl && complete -F __start_kubectl k" >> ~/.bashrc
    272. # get dashboard url
    273. ${k8s_bin_path}/kubectl cluster-info|grep dashboard|awk '{print $NF}'|tee -a /root/k8s_results
    274. # get login token
    275. ${k8s_bin_path}/kubectl -n kube-system describe secret $(${k8s_bin_path}/kubectl -n kube-system get secret | grep admin-user | awk '{print $1}')|grep 'token:'|awk '{print $NF}'|tee -a /root/k8s_results
    276. echo
    277. echo "you can look again dashboard and token info at >>> /root/k8s_results <<<"
    278. #echo ">>>>>>>>>>>>>>>>> You can excute command [ source ~/.bashrc ] <<<<<<<<<<<<<<<<<<<<"
    279. echo ">>>>>>>>>>>>>>>>> You need to excute command [ reboot ] to restart all nodes <<<<<<<<<<<<<<<<<<<<"
    280. rm -f $0
    281. [ -f ${software_packet} ] && rm -f ${software_packet}
    282. #rm -f ${pwd}/roles/deploy/templates/${USER_NAME}-csr.json.j2
    283. #sed -ri "s+${USER_NAME}+admin+g" ${pwd}/roles/prepare/tasks/main.yml

    2.2运行脚本 并加入参数, newpasswd是服务器密码

    1. bash k8s_install_new .sh newpasswd 20.6.100 220\ 221\ 222\ 223\ 224 docker calico test

    #脚本运行中途需要输入如下参数

    588fee197dae4b68ab3dcdd86e6462cf.png

    二、harbor仓库搭建

    1.安装

    1. #目录/root上传文件docker-compose和harbor-offline-installer-v1.2.0.tgz
    2. mv /root/docker-compose /usr/local/bin/
    3. chmod a+x /usr/local/bin/docker-compose
    4. ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose
    5. tar -zxvf harbor-offline-installer-v2.4.1.tgz
    6. mv harbor /usr/local/
    7. cd /usr/local/harbor/
    8. cp harbor.yml.tmpl harbor.yml
    9. sed -i 's/hostname: reg.mydomain.com/hostname: 20.6.100.225/g' harbor.yml
    10. sed -i 's/https/#https/g' harbor.yml
    11. sed -i 's/port: 443/#port: 443/g' harbor.yml
    12. sed -i 's/certificate/#certificate/g' harbor.yml
    13. sed -i 's/private_key/#private_key/g' harbor.yml
    14. mkdir -p /etc/docker/
    15. cat >/etc/docker/daemon.json<<EOF
    16. {
    17. "registry-mirrors": ["https://gv33cz42.mirror.aliyuncs.com", "https://registry.cn-hangzhou.aliyuncs.com", "https://registry.docker-cn.com", "https://docker.mirrors.ustc.edu.cn", "https://dockerhub.azk8s.cn"],
    18. "exec-opts": ["native.cgroupdriver=systemd"],
    19. "max-concurrent-downloads": 20,
    20. "live-restore": true,
    21. "storage-driver": "overlay2",
    22. "max-concurrent-uploads": 10,
    23. "debug": true,
    24. "log-opts": {
    25. "max-size": "100m",
    26. "max-file": "10"
    27. },
    28. "insecure-registries": ["20.6.100.225:80"]
    29. }
    30. EOF
    31. systemctl daemon-reload && systemctl restart docker && systemctl status docker
    32. #安装
    33. ./install.sh
    34. ## 重启harbor
    35. cd /usr/local/harbor/
    36. docker-compose down -v
    37. docker-compose up -d
    38. docker ps|grep harbor

    2.需要访问仓库的其他节点的 daemon.json添加如下内容

    1. cat >/etc/docker/daemon.json<<EOF
    2. {
    3. "registry-mirrors": ["https://gv33cz42.mirror.aliyuncs.com", "https://registry.cn-hangzhou.aliyuncs.com", "https://registry.docker-cn.com", "https://docker.mirrors.ustc.edu.cn", "https://dockerhub.azk8s.cn"],
    4. "exec-opts": ["native.cgroupdriver=systemd"],
    5. "max-concurrent-downloads": 20,
    6. "live-restore": true,
    7. "storage-driver": "overlay2",
    8. "max-concurrent-uploads": 10,
    9. "debug": true,
    10. "log-opts": {
    11. "max-size": "100m",
    12. "max-file": "10"
    13. },
    14. "insecure-registries": ["20.6.100.225:80"]
    15. }
    16. EOF
    17. systemctl daemon-reload && systemctl restart docker && systemctl status docker

    3.节点使用仓库

    1. #登入仓库网站
    2. docker login -u admin -p Harbor12345 20.6.100.225:80
    3. #下载镜像
    4. docker pull daocloud.io/library/nginx:1.9.1
    5. #给镜像打上标签
    6. docker tag daocloud.io/library/nginx:1.9.1 20.6.100.225:5000/library/nginx:1.9.1
    7. #镜像上传
    8. docker push 20.6.100.225:80/library/nginx:1.9.1
    9. #删除镜像
    10. docker rmi 20.6.100.225:80/library/nginx:1.9.1
    11. #打包
    12. docker save daocloud.io/library/nginx:1.9.1 > /root/nginx-1.9.1.tar
    13. #加载包
    14. docker load -i /root/nginx-1.9.1.tar

    三、k8s流量入口Ingress

    1.逻辑图

    64ab6d7904084f559abce88ed3114559.png

    Ingress是允许入站连接到达集群服务的一组规则。即介于物理网络和群集svc之间的一组转发规则。
    其实就是实现L4 L7的负载均衡:
    注意:这里的Ingress并非将外部流量通过Service来转发到服务pod上,而只是通过Service来找到对应的Endpoint来发现pod进行转发

       
        internet
            |
       [ Ingress ]   ---> [ Services ] ---> [ Endpoint ]
       --|-----|--                                 |
       [ Pod,pod,...... ]<-------------------------|

    aliyun-ingress-controller有一个很重要的修改,就是它支持路由配置的动态更新,
    大家用过Nginx的可以知道,在修改完Nginx的配置,我们是需要进行nginx -s reload来重加载配置才能生效的,
    在K8s上,这个行为也是一样的,但由于K8s运行的服务会非常多,所以它的配置更新是非常频繁的,
    因此,如果不支持配置动态更新,对于在高频率变化的场景下,Nginx频繁Reload会带来较明显的请求访问问题:

    1.造成一定的QPS抖动和访问失败情况
    2.对于长连接服务会被频繁断掉
    3.造成大量的处于shutting down的Nginx Worker进程,进而引起内存膨胀

    详细原理分析见这篇文章: https://developer.aliyun.com/article/692732

    2.准备来部署aliyun-ingress-controller,下面直接是生产中在用的yaml配置,我们保存了aliyun-ingress-nginx.yaml准备开始部署:

    1. cat > /data/k8s/aliyun-ingress-nginx.yaml <<EOF
    2. apiVersion: v1
    3. kind: Namespace
    4. metadata:
    5. name: ingress-nginx
    6. labels:
    7. app: ingress-nginx
    8. ---
    9. apiVersion: v1
    10. kind: ServiceAccount
    11. metadata:
    12. name: nginx-ingress-controller
    13. namespace: ingress-nginx
    14. labels:
    15. app: ingress-nginx
    16. ---
    17. apiVersion: rbac.authorization.k8s.io/v1beta1
    18. kind: ClusterRole
    19. metadata:
    20. name: nginx-ingress-controller
    21. labels:
    22. app: ingress-nginx
    23. rules:
    24. - apiGroups:
    25. - ""
    26. resources:
    27. - configmaps
    28. - endpoints
    29. - nodes
    30. - pods
    31. - secrets
    32. - namespaces
    33. - services
    34. verbs:
    35. - get
    36. - list
    37. - watch
    38. - apiGroups:
    39. - "extensions"
    40. - "networking.k8s.io"
    41. resources:
    42. - ingresses
    43. verbs:
    44. - get
    45. - list
    46. - watch
    47. - apiGroups:
    48. - ""
    49. resources:
    50. - events
    51. verbs:
    52. - create
    53. - patch
    54. - apiGroups:
    55. - "extensions"
    56. - "networking.k8s.io"
    57. resources:
    58. - ingresses/status
    59. verbs:
    60. - update
    61. - apiGroups:
    62. - ""
    63. resources:
    64. - configmaps
    65. verbs:
    66. - create
    67. - apiGroups:
    68. - ""
    69. resources:
    70. - configmaps
    71. resourceNames:
    72. - "ingress-controller-leader-nginx"
    73. verbs:
    74. - get
    75. - update
    76. ---
    77. apiVersion: rbac.authorization.k8s.io/v1beta1
    78. kind: ClusterRoleBinding
    79. metadata:
    80. name: nginx-ingress-controller
    81. labels:
    82. app: ingress-nginx
    83. roleRef:
    84. apiGroup: rbac.authorization.k8s.io
    85. kind: ClusterRole
    86. name: nginx-ingress-controller
    87. subjects:
    88. - kind: ServiceAccount
    89. name: nginx-ingress-controller
    90. namespace: ingress-nginx
    91. ---
    92. apiVersion: v1
    93. kind: Service
    94. metadata:
    95. labels:
    96. app: ingress-nginx
    97. name: nginx-ingress-lb
    98. namespace: ingress-nginx
    99. spec:
    100. # DaemonSet need:
    101. # ----------------
    102. type: ClusterIP
    103. # ----------------
    104. # Deployment need:
    105. # ----------------
    106. # type: NodePort
    107. # ----------------
    108. ports:
    109. - name: http
    110. port: 80
    111. targetPort: 80
    112. protocol: TCP
    113. - name: https
    114. port: 443
    115. targetPort: 443
    116. protocol: TCP
    117. - name: metrics
    118. port: 10254
    119. protocol: TCP
    120. targetPort: 10254
    121. selector:
    122. app: ingress-nginx
    123. ---
    124. kind: ConfigMap
    125. apiVersion: v1
    126. metadata:
    127. name: nginx-configuration
    128. namespace: ingress-nginx
    129. labels:
    130. app: ingress-nginx
    131. data:
    132. keep-alive: "75"
    133. keep-alive-requests: "100"
    134. upstream-keepalive-connections: "10000"
    135. upstream-keepalive-requests: "100"
    136. upstream-keepalive-timeout: "60"
    137. allow-backend-server-header: "true"
    138. enable-underscores-in-headers: "true"
    139. generate-request-id: "true"
    140. http-redirect-code: "301"
    141. ignore-invalid-headers: "true"
    142. log-format-upstream: '{"@timestamp": "$time_iso8601","remote_addr": "$remote_addr","x-forward-for": "$proxy_add_x_forwarded_for","request_id": "$req_id","remote_user": "$remote_user","bytes_sent": $bytes_sent,"request_time": $request_time,"status": $status,"vhost": "$host","request_proto": "$server_protocol","path": "$uri","request_query": "$args","request_length": $request_length,"duration": $request_time,"method": "$request_method","http_referrer": "$http_referer","http_user_agent": "$http_user_agent","upstream-sever":"$proxy_upstream_name","proxy_alternative_upstream_name":"$proxy_alternative_upstream_name","upstream_addr":"$upstream_addr","upstream_response_length":$upstream_response_length,"upstream_response_time":$upstream_response_time,"upstream_status":$upstream_status}'
    143. max-worker-connections: "65536"
    144. worker-processes: "2"
    145. proxy-body-size: 20m
    146. proxy-connect-timeout: "10"
    147. proxy_next_upstream: error timeout http_502
    148. reuse-port: "true"
    149. server-tokens: "false"
    150. ssl-ciphers: ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:DHE-DSS-AES128-GCM-SHA256:kEDH+AESGCM:ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA:ECDHE-ECDSA-AES128-SHA:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA:ECDHE-ECDSA-AES256-SHA:DHE-RSA-AES128-SHA256:DHE-RSA-AES128-SHA:DHE-DSS-AES128-SHA256:DHE-RSA-AES256-SHA256:DHE-DSS-AES256-SHA:DHE-RSA-AES256-SHA:AES128-GCM-SHA256:AES256-GCM-SHA384:AES128-SHA256:AES256-SHA256:AES128-SHA:AES256-SHA:AES:CAMELLIA:DES-CBC3-SHA:!aNULL:!eNULL:!EXPORT:!DES:!RC4:!MD5:!PSK:!aECDH:!EDH-DSS-DES-CBC3-SHA:!EDH-RSA-DES-CBC3-SHA:!KRB5-DES-CBC3-SHA
    151. ssl-protocols: TLSv1 TLSv1.1 TLSv1.2
    152. ssl-redirect: "false"
    153. worker-cpu-affinity: auto
    154. ---
    155. kind: ConfigMap
    156. apiVersion: v1
    157. metadata:
    158. name: tcp-services
    159. namespace: ingress-nginx
    160. labels:
    161. app: ingress-nginx
    162. ---
    163. kind: ConfigMap
    164. apiVersion: v1
    165. metadata:
    166. name: udp-services
    167. namespace: ingress-nginx
    168. labels:
    169. app: ingress-nginx
    170. ---
    171. apiVersion: apps/v1
    172. kind: DaemonSet
    173. metadata:
    174. name: nginx-ingress-controller
    175. namespace: ingress-nginx
    176. labels:
    177. app: ingress-nginx
    178. annotations:
    179. component.version: "v0.30.0"
    180. component.revision: "v1"
    181. spec:
    182. # Deployment need:
    183. # ----------------
    184. # replicas: 1
    185. # ----------------
    186. selector:
    187. matchLabels:
    188. app: ingress-nginx
    189. template:
    190. metadata:
    191. labels:
    192. app: ingress-nginx
    193. annotations:
    194. prometheus.io/port: "10254"
    195. prometheus.io/scrape: "true"
    196. scheduler.alpha.kubernetes.io/critical-pod: ""
    197. spec:
    198. # DaemonSet need:
    199. # ----------------
    200. hostNetwork: true
    201. # ----------------
    202. serviceAccountName: nginx-ingress-controller
    203. priorityClassName: system-node-critical
    204. affinity:
    205. podAntiAffinity:
    206. preferredDuringSchedulingIgnoredDuringExecution:
    207. - podAffinityTerm:
    208. labelSelector:
    209. matchExpressions:
    210. - key: app
    211. operator: In
    212. values:
    213. - ingress-nginx
    214. topologyKey: kubernetes.io/hostname
    215. weight: 100
    216. nodeAffinity:
    217. requiredDuringSchedulingIgnoredDuringExecution:
    218. nodeSelectorTerms:
    219. - matchExpressions:
    220. - key: type
    221. operator: NotIn
    222. values:
    223. - virtual-kubelet
    224. containers:
    225. - name: nginx-ingress-controller
    226. image: registry.cn-beijing.aliyuncs.com/acs/aliyun-ingress-controller:v0.30.0.2-9597b3685-aliyun
    227. args:
    228. - /nginx-ingress-controller
    229. - --configmap=$(POD_NAMESPACE)/nginx-configuration
    230. - --tcp-services-configmap=$(POD_NAMESPACE)/tcp-services
    231. - --udp-services-configmap=$(POD_NAMESPACE)/udp-services
    232. - --publish-service=$(POD_NAMESPACE)/nginx-ingress-lb
    233. - --annotations-prefix=nginx.ingress.kubernetes.io
    234. - --enable-dynamic-certificates=true
    235. - --v=2
    236. securityContext:
    237. allowPrivilegeEscalation: true
    238. capabilities:
    239. drop:
    240. - ALL
    241. add:
    242. - NET_BIND_SERVICE
    243. runAsUser: 101
    244. env:
    245. - name: POD_NAME
    246. valueFrom:
    247. fieldRef:
    248. fieldPath: metadata.name
    249. - name: POD_NAMESPACE
    250. valueFrom:
    251. fieldRef:
    252. fieldPath: metadata.namespace
    253. ports:
    254. - name: http
    255. containerPort: 80
    256. - name: https
    257. containerPort: 443
    258. livenessProbe:
    259. failureThreshold: 3
    260. httpGet:
    261. path: /healthz
    262. port: 10254
    263. scheme: HTTP
    264. initialDelaySeconds: 10
    265. periodSeconds: 10
    266. successThreshold: 1
    267. timeoutSeconds: 10
    268. readinessProbe:
    269. failureThreshold: 3
    270. httpGet:
    271. path: /healthz
    272. port: 10254
    273. scheme: HTTP
    274. periodSeconds: 10
    275. successThreshold: 1
    276. timeoutSeconds: 10
    277. # resources:
    278. # limits:
    279. # cpu: "1"
    280. # memory: 2Gi
    281. # requests:
    282. # cpu: "1"
    283. # memory: 2Gi
    284. volumeMounts:
    285. - mountPath: /etc/localtime
    286. name: localtime
    287. readOnly: true
    288. volumes:
    289. - name: localtime
    290. hostPath:
    291. path: /etc/localtime
    292. type: File
    293. nodeSelector:
    294. boge/ingress-controller-ready: "true"
    295. tolerations:
    296. - operator: Exists
    297. initContainers:
    298. - command:
    299. - /bin/sh
    300. - -c
    301. - |
    302. mount -o remount rw /proc/sys
    303. sysctl -w net.core.somaxconn=65535
    304. sysctl -w net.ipv4.ip_local_port_range="1024 65535"
    305. sysctl -w fs.file-max=1048576
    306. sysctl -w fs.inotify.max_user_instances=16384
    307. sysctl -w fs.inotify.max_user_watches=524288
    308. sysctl -w fs.inotify.max_queued_events=16384
    309. image: registry.cn-beijing.aliyuncs.com/acs/busybox:v1.29.2
    310. imagePullPolicy: Always
    311. name: init-sysctl
    312. securityContext:
    313. privileged: true
    314. procMount: Default
    315. ---
    316. ## Deployment need for aliyun'k8s:
    317. #apiVersion: v1
    318. #kind: Service
    319. #metadata:
    320. # annotations:
    321. # service.beta.kubernetes.io/alibaba-cloud-loadbalancer-id: "lb-xxxxxxxxxxxxxxxxxxx"
    322. # service.beta.kubernetes.io/alibaba-cloud-loadbalancer-force-override-listeners: "true"
    323. # labels:
    324. # app: nginx-ingress-lb
    325. # name: nginx-ingress-lb-local
    326. # namespace: ingress-nginx
    327. #spec:
    328. # externalTrafficPolicy: Local
    329. # ports:
    330. # - name: http
    331. # port: 80
    332. # protocol: TCP
    333. # targetPort: 80
    334. # - name: https
    335. # port: 443
    336. # protocol: TCP
    337. # targetPort: 443
    338. # selector:
    339. # app: ingress-nginx
    340. # type: LoadBalancer
    341. EOF

    3.#开始部署

    kubectl  apply -f aliyun-ingress-nginx.yaml

    4.我们查看下pod,会发现空空如也,为什么会这样呢?

    kubectl -n ingress-nginx get pod

    注意上面的yaml配置里面,我使用了节点选择配置,只有打了我指定lable标签的node节点,也会被允许调度pod上去运行
    nodeSelector:
    boge/ingress-controller-ready: “true”

    5.给mast 打标签,想运行什么节点上就给什么节点打标签

    kubectl label node 20.6.100.220 boge/ingress-controller-ready=true
    kubectl label node 20.6.100.221 boge/ingress-controller-ready=true
    kubectl label node 20.6.100.222 boge/ingress-controller-ready=true

    6.接着可以看到pod就被调试到这两台node上启动了

    kubectl -n ingress-nginx get pod -o wide

    7 子节点修改haproxy.cfg
    vi /etc/haproxy/haproxy.cfg

    1. listen ingress-http
    2. bind 0.0.0.0:80
    3. mode tcp
    4. option tcplog
    5. option dontlognull
    6. option dontlog-normal
    7. balance roundrobin
    8. server 20.6.100.220 20.6.100.220:80 check inter 2000 fall 2 rise 2 weight 1
    9. server 20.6.100.221 20.6.100.221:80 check inter 2000 fall 2 rise 2 weight 1
    10. server 20.6.100.222 20.6.100.222:80 check inter 2000 fall 2 rise 2 weight 1
    11. listen ingress-https
    12. bind 0.0.0.0:443
    13. mode tcp
    14. option tcplog
    15. option dontlognull
    16. option dontlog-normal
    17. balance roundrobin
    18. server 20.6.100.220 20.6.100.220:443 check inter 2000 fall 2 rise 2 weight 1
    19. server 20.6.100.221 20.6.100.221:443 check inter 2000 fall 2 rise 2 weight 1
    20. server 20.6.100.222 20.6.100.222:443 check inter 2000 fall 2 rise 2 weight 1

    8.子节点安装keepalived

    yum install -y keepalived

    9.编辑配置修改为如下:

    这里是node 20.6.100.223

    1. cat > /etc/keepalived/keepalived.conf <<EOF
    2. global_defs {
    3. router_id lb-master
    4. }
    5. vrrp_script check-haproxy {
    6. script "killall -0 haproxy"
    7. interval 5
    8. weight -60
    9. }
    10. vrrp_instance VI-kube-master {
    11. state MASTER
    12. priority 120
    13. unicast_src_ip 20.6.100.223
    14. unicast_peer {
    15. 20.6.100.224
    16. }
    17. dont_track_primary
    18. interface ens160 # 注意这里的网卡名称修改成你机器真实的内网网卡名称,可用命令ip addr查看
    19. virtual_router_id 111
    20. advert_int 3
    21. track_script {
    22. check-haproxy
    23. }
    24. virtual_ipaddress {
    25. 20.6.100.226
    26. }
    27. }
    28. EOF

    10.这里是node 20.6.100.224

    1. cat > /etc/keepalived/keepalived.conf <<EOF
    2. global_defs {
    3. router_id lb-master
    4. }
    5. vrrp_script check-haproxy {
    6. script "killall -0 haproxy"
    7. interval 5
    8. weight -60
    9. }
    10. vrrp_instance VI-kube-master {
    11. state MASTER
    12. priority 120
    13. unicast_src_ip 20.6.100.224
    14. unicast_peer {
    15. 20.6.100.223
    16. }
    17. dont_track_primary
    18. interface ens160
    19. virtual_router_id 111
    20. advert_int 3
    21. track_script {
    22. check-haproxy
    23. }
    24. virtual_ipaddress {
    25. 20.6.100.226
    26. }
    27. }
    28. EOF

    11.启动服务器

    1. # 重启服务
    2. systemctl restart haproxy.service
    3. systemctl restart keepalived.service
    4. # 查看运行状态
    5. systemctl status haproxy.service
    6. systemctl status keepalived.service
    7. # 添加开机自启动(haproxy默认安装好就添加了自启动)
    8. systemctl enable keepalived.service
    9. # 查看是否添加成功
    10. systemctl is-enabled keepalived.service
    11. #enabled就代表添加成功了
    12. # 同时我可查看下VIP是否已经生成
    13. ip a|grep 226

    12.然后准备nginx的ingress配置,保留为nginx-ingress.yaml,并执行它

    1. apiVersion: v1
    2. kind: Service
    3. metadata:
    4. namespace: test
    5. name: nginx
    6. labels:
    7. app: nginx
    8. spec:
    9. ports:
    10. - port: 80
    11. protocol: TCP
    12. targetPort: 80
    13. selector:
    14. app: nginx
    15. ---
    16. apiVersion: apps/v1
    17. kind: Deployment
    18. metadata:
    19. namespace: test
    20. name: nginx
    21. labels:
    22. app: nginx
    23. spec:
    24. replicas: 1
    25. selector:
    26. matchLabels:
    27. app: nginx
    28. template:
    29. metadata:
    30. labels:
    31. app: nginx
    32. spec:
    33. containers:
    34. - name: nginx
    35. image: nginx
    36. ports:
    37. - containerPort: 80
    38. ---
    39. apiVersion: extensions/v1beta1
    40. kind: Ingress
    41. metadata:
    42. namespace: test
    43. name: nginx-ingress
    44. spec:
    45. rules:
    46. - host: nginx.boge.com
    47. http:
    48. paths:
    49. - backend:
    50. serviceName: nginx
    51. servicePort: 80
    52. path: /

    13.运行

    1. kubectl apply -f nginx-ingress.yaml
    2. #查看创建的ingress资源
    3. # kubectl get ingress
    4. NAME CLASS HOSTS ADDRESS PORTS AGE
    5. nginx-ingress nginx.boge.com 80 13s
    6. # 我们在其它节点上,加下本地hosts,来测试下效果
    7. 20.6.1.226 nginx.boge.com
    8. #测试
    9. curl nginx.boge.com


    14.生产环境正常情况下大部分是一个Ingress对应一个Service服务,但在一些特殊情况,需要复用一个Ingress来访问多个服务的,下面我们来实践下
    再创建一个nginx的deployment和service,注意名称修改下不要冲突了

    1. # kubectl create deployment web --image=nginx
    2. deployment.apps/web created
    3. # kubectl expose deployment web --port=80 --target-port=80
    4. service/web exposed
    5. # 确认下创建结果
    6. # kubectl get deployments.apps
    7. NAME READY UP-TO-DATE AVAILABLE AGE
    8. nginx 1/1 1 1 16h
    9. web 1/1 1 1 45s
    10. # kubectl get pod
    11. # kubectl get svc
    12. # 接着来修改Ingress
    13. # 注意:这里可以通过两种方式来修改K8s正在运行的资源
    14. # 第一种:直接通过edit修改在线服务的资源来生效,这个通常用在测试环境,在实际生产中不建议这么用
    15. kubectl edit ingress nginx-ingress
    16. # 第二种: 通过之前创建ingress的yaml配置,在上面进行修改,再apply更新进K8s,在生产中是建议这么用的,我们这里也用这种方式来修改
    17. # vim nginx-ingress.yaml
    18. apiVersion: extensions/v1beta1
    19. kind: Ingress
    20. metadata:
    21. annotations:
    22. nginx.ingress.kubernetes.io/rewrite-target: / # 注意这里需要把进来到服务的请求重定向到/,这个和传统的nginx配置是一样的,不配会404
    23. name: nginx-ingress
    24. spec:
    25. rules:
    26. - host: nginx.boge.com
    27. http:
    28. paths:
    29. - backend:
    30. serviceName: nginx
    31. servicePort: 80
    32. path: /nginx # 注意这里的路由名称要是唯一的
    33. - backend: # 从这里开始是新增加的
    34. serviceName: web
    35. servicePort: 80
    36. path: /web # 注意这里的路由名称要是唯一的
    37. # 开始创建
    38. [root@node-1 ~]# kubectl apply -f nginx-ingress.yaml
    39. ingress.extensions/nginx-ingress configured
    40. # 同时为了更直观的看到效果,我们按前面讲到的方法来修改下nginx默认的展示页面
    41. # kubectl exec -it nginx-f89759699-6vgr8 -- bash
    42. echo "i am nginx" > /usr/share/nginx/html/index.html
    43. # kubectl exec -it web-5dcb957ccc-nr2m7 -- bash
    44. echo "i am web" > /usr/share/nginx/html/index.html

    15.因为http属于是明文传输数据不安全,在生产中我们通常会配置https加密通信,现在实战下Ingress的tls配置

    1. # 这里我先自签一个https的证书
    2. #1. 先生成私钥key
    3. openssl genrsa -out tls.key 2048
    4. #2.再基于key生成tls证书(注意:这里我用的*.boge.com,这是生成泛域名的证书,后面所有新增加的三级域名都是可以用这个证书的)
    5. openssl req -new -x509 -key tls.key -out tls.cert -days 360 -subj /CN=*.boge.com
    6. # 看下创建结果
    7. # ll
    8. -rw-r--r-- 1 root root 1099 Nov 27 11:44 tls.cert
    9. -rw-r--r-- 1 root root 1679 Nov 27 11:43 tls.key
    10. # 在K8s上创建tls的secret(注意默认ns是default)
    11. kubectl create secret tls mytls --cert=tls.cert --key=tls.key
    12. # 然后修改先的ingress的yaml配置
    13. # cat nginx-ingress.yaml
    14. apiVersion: extensions/v1beta1
    15. kind: Ingress
    16. metadata:
    17. annotations:
    18. nginx.ingress.kubernetes.io/rewrite-target: / # 注意这里需要把进来到服务的请求重定向到/,这个和传统的nginx配置是一样的,不配会404
    19. name: nginx-ingress
    20. spec:
    21. rules:
    22. - host: nginx.boge.com
    23. http:
    24. paths:
    25. - backend:
    26. serviceName: nginx
    27. servicePort: 80
    28. path: /nginx # 注意这里的路由名称要是唯一的
    29. - backend: # 从这里开始是新增加的
    30. serviceName: web
    31. servicePort: 80
    32. path: /web # 注意这里的路由名称要是唯一的
    33. tls: # 增加下面这段,注意缩进格式
    34. - hosts:
    35. - nginx.boge.com # 这里域名和上面的对应
    36. secretName: mytls # 这是我先生成的secret
    37. # 进行更新
    38. kubectl apply -f nginx-ingress.yaml

    16.测试现在再来看看https访问的效果:

    https://nginx.boge.com/nginx

    https://nginx.boge.com/web

    注意:这里因为是我自签的证书,所以浏览器地访问时会提示您的连接不是私密连接 ,我这里用的谷歌浏览器,直接点高级,再点击继续前往nginx.boge.com(不安全)

    3.1 helm3安装 ingress-nginx(k8s版本要求1.20以上)

    1.下载 ingress-nginx-4.2.5.tgz

    1. helm fetch ingress-nginx/ingress-nginx --version 4.2.5
    2. #或者curl -LO https://github.com/kubernetes/ingress-nginx/releases/download/helm-chart-4.2.5/ingress-nginx-4.2.5.tgz

    2.解压,修改文件

    sudo tar -xvf ingress-nginx-4.2.5.tgz && sudo cd ingress-nginx

    #下面是已经修改完毕的,可以直接使用

    vim values.yaml

    1. ## nginx configuration
    2. ## Ref: https://github.com/kubernetes/ingress-nginx/blob/main/docs/user-guide/nginx-configuration/index.md
    3. commonLabels: {}
    4. # scmhash: abc123
    5. # myLabel: aakkmd
    6. controller:
    7. name: controller
    8. image:
    9. chroot: false
    10. registry: registry.cn-hangzhou.aliyuncs.com
    11. image: google_containers/nginx-ingress-controller
    12. ## repository:
    13. tag: "v1.3.1"
    14. #digest: sha256:54f7fe2c6c5a9db9a0ebf1131797109bb7a4d91f56b9b362bde2abd237dd1974
    15. #digestChroot: sha256:a8466b19c621bd550b1645e27a004a5cc85009c858a9ab19490216735ac432b1
    16. pullPolicy: IfNotPresent
    17. # www-data -> uid 101
    18. runAsUser: 101
    19. allowPrivilegeEscalation: true
    20. # -- Use an existing PSP instead of creating one
    21. existingPsp: ""
    22. # -- Configures the controller container name
    23. containerName: controller
    24. # -- Configures the ports that the nginx-controller listens on
    25. containerPort:
    26. http: 80
    27. https: 443
    28. # -- Will add custom configuration options to Nginx https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/configmap/
    29. config: {}
    30. # -- Annotations to be added to the controller config configuration configmap.
    31. configAnnotations: {}
    32. # -- Will add custom headers before sending traffic to backends according to https://github.com/kubernetes/ingress-nginx/tree/main/docs/examples/customization/custom-headers
    33. proxySetHeaders: {}
    34. # -- Will add custom headers before sending response traffic to the client according to: https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/configmap/#add-headers
    35. addHeaders: {}
    36. # -- Optionally customize the pod dnsConfig.
    37. dnsConfig: {}
    38. # -- Optionally customize the pod hostname.
    39. hostname: {}
    40. # -- Optionally change this to ClusterFirstWithHostNet in case you have 'hostNetwork: true'.
    41. # By default, while using host network, name resolution uses the host's DNS. If you wish nginx-controller
    42. # to keep resolving names inside the k8s network, use ClusterFirstWithHostNet.
    43. dnsPolicy: ClusterFirstWithHostNet
    44. # -- Bare-metal considerations via the host network https://kubernetes.github.io/ingress-nginx/deploy/baremetal/#via-the-host-network
    45. # Ingress status was blank because there is no Service exposing the NGINX Ingress controller in a configuration using the host network, the default --publish-service flag used in standard cloud setups does not apply
    46. reportNodeInternalIp: false
    47. # -- Process Ingress objects without ingressClass annotation/ingressClassName field
    48. # Overrides value for --watch-ingress-without-class flag of the controller binary
    49. # Defaults to false
    50. watchIngressWithoutClass: false
    51. # -- Process IngressClass per name (additionally as per spec.controller).
    52. ingressClassByName: false
    53. # -- This configuration defines if Ingress Controller should allow users to set
    54. # their own *-snippet annotations, otherwise this is forbidden / dropped
    55. # when users add those annotations.
    56. # Global snippets in ConfigMap are still respected
    57. allowSnippetAnnotations: true
    58. # -- Required for use with CNI based kubernetes installations (such as ones set up by kubeadm),
    59. # since CNI and hostport don't mix yet. Can be deprecated once https://github.com/kubernetes/kubernetes/issues/23920
    60. # is merged
    61. hostNetwork: true
    62. ## Use host ports 80 and 443
    63. ## Disabled by default
    64. hostPort:
    65. # -- Enable 'hostPort' or not
    66. enabled: false
    67. ports:
    68. # -- 'hostPort' http port
    69. http: 80
    70. # -- 'hostPort' https port
    71. https: 443
    72. # -- Election ID to use for status update
    73. electionID: ingress-controller-leader
    74. ## This section refers to the creation of the IngressClass resource
    75. ## IngressClass resources are supported since k8s >= 1.18 and required since k8s >= 1.19
    76. ingressClassResource:
    77. # -- Name of the ingressClass
    78. name: nginx
    79. # -- Is this ingressClass enabled or not
    80. enabled: true
    81. # -- Is this the default ingressClass for the cluster
    82. default: false
    83. # -- Controller-value of the controller that is processing this ingressClass
    84. controllerValue: "k8s.io/ingress-nginx"
    85. # -- Parameters is a link to a custom resource containing additional
    86. # configuration for the controller. This is optional if the controller
    87. # does not require extra parameters.
    88. parameters: {}
    89. # -- For backwards compatibility with ingress.class annotation, use ingressClass.
    90. # Algorithm is as follows, first ingressClassName is considered, if not present, controller looks for ingress.class annotation
    91. ingressClass: nginx
    92. # -- Labels to add to the pod container metadata
    93. podLabels: {}
    94. # key: value
    95. # -- Security Context policies for controller pods
    96. podSecurityContext: {}
    97. # -- See https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ for notes on enabling and using sysctls
    98. sysctls: {}
    99. # sysctls:
    100. # "net.core.somaxconn": "8192"
    101. # -- Allows customization of the source of the IP address or FQDN to report
    102. # in the ingress status field. By default, it reads the information provided
    103. # by the service. If disable, the status field reports the IP address of the
    104. # node or nodes where an ingress controller pod is running.
    105. publishService:
    106. # -- Enable 'publishService' or not
    107. enabled: true
    108. # -- Allows overriding of the publish service to bind to
    109. # Must be /
    110. pathOverride: ""
    111. # Limit the scope of the controller to a specific namespace
    112. scope:
    113. # -- Enable 'scope' or not
    114. enabled: false
    115. # -- Namespace to limit the controller to; defaults to $(POD_NAMESPACE)
    116. namespace: ""
    117. # -- When scope.enabled == false, instead of watching all namespaces, we watching namespaces whose labels
    118. # only match with namespaceSelector. Format like foo=bar. Defaults to empty, means watching all namespaces.
    119. namespaceSelector: ""
    120. # -- Allows customization of the configmap / nginx-configmap namespace; defaults to $(POD_NAMESPACE)
    121. configMapNamespace: ""
    122. tcp:
    123. # -- Allows customization of the tcp-services-configmap; defaults to $(POD_NAMESPACE)
    124. configMapNamespace: ""
    125. # -- Annotations to be added to the tcp config configmap
    126. annotations: {}
    127. udp:
    128. # -- Allows customization of the udp-services-configmap; defaults to $(POD_NAMESPACE)
    129. configMapNamespace: ""
    130. # -- Annotations to be added to the udp config configmap
    131. annotations: {}
    132. # -- Maxmind license key to download GeoLite2 Databases.
    133. ## https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-geolite2-databases
    134. maxmindLicenseKey: ""
    135. # -- Additional command line arguments to pass to nginx-ingress-controller
    136. # E.g. to specify the default SSL certificate you can use
    137. extraArgs: {}
    138. ## extraArgs:
    139. ## default-ssl-certificate: "/"
    140. # -- Additional environment variables to set
    141. extraEnvs: []
    142. # extraEnvs:
    143. # - name: FOO
    144. # valueFrom:
    145. # secretKeyRef:
    146. # key: FOO
    147. # name: secret-resource
    148. # -- Use a `DaemonSet` or `Deployment`
    149. kind: DaemonSet
    150. # -- Annotations to be added to the controller Deployment or DaemonSet
    151. ##
    152. annotations: {}
    153. # keel.sh/pollSchedule: "@every 60m"
    154. # -- Labels to be added to the controller Deployment or DaemonSet and other resources that do not have option to specify labels
    155. ##
    156. labels: {}
    157. # keel.sh/policy: patch
    158. # keel.sh/trigger: poll
    159. # -- The update strategy to apply to the Deployment or DaemonSet
    160. ##
    161. updateStrategy: {}
    162. # rollingUpdate:
    163. # maxUnavailable: 1
    164. # type: RollingUpdate
    165. # -- `minReadySeconds` to avoid killing pods before we are ready
    166. ##
    167. minReadySeconds: 0
    168. # -- Node tolerations for server scheduling to nodes with taints
    169. ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
    170. ##
    171. tolerations: []
    172. # - key: "key"
    173. # operator: "Equal|Exists"
    174. # value: "value"
    175. # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
    176. # -- Affinity and anti-affinity rules for server scheduling to nodes
    177. ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
    178. ##
    179. affinity: {}
    180. # # An example of preferred pod anti-affinity, weight is in the range 1-100
    181. # podAntiAffinity:
    182. # preferredDuringSchedulingIgnoredDuringExecution:
    183. # - weight: 100
    184. # podAffinityTerm:
    185. # labelSelector:
    186. # matchExpressions:
    187. # - key: app.kubernetes.io/name
    188. # operator: In
    189. # values:
    190. # - ingress-nginx
    191. # - key: app.kubernetes.io/instance
    192. # operator: In
    193. # values:
    194. # - ingress-nginx
    195. # - key: app.kubernetes.io/component
    196. # operator: In
    197. # values:
    198. # - controller
    199. # topologyKey: kubernetes.io/hostname
    200. # # An example of required pod anti-affinity
    201. # podAntiAffinity:
    202. # requiredDuringSchedulingIgnoredDuringExecution:
    203. # - labelSelector:
    204. # matchExpressions:
    205. # - key: app.kubernetes.io/name
    206. # operator: In
    207. # values:
    208. # - ingress-nginx
    209. # - key: app.kubernetes.io/instance
    210. # operator: In
    211. # values:
    212. # - ingress-nginx
    213. # - key: app.kubernetes.io/component
    214. # operator: In
    215. # values:
    216. # - controller
    217. # topologyKey: "kubernetes.io/hostname"
    218. # -- Topology spread constraints rely on node labels to identify the topology domain(s) that each Node is in.
    219. ## Ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/
    220. ##
    221. topologySpreadConstraints: []
    222. # - maxSkew: 1
    223. # topologyKey: topology.kubernetes.io/zone
    224. # whenUnsatisfiable: DoNotSchedule
    225. # labelSelector:
    226. # matchLabels:
    227. # app.kubernetes.io/instance: ingress-nginx-internal
    228. # -- `terminationGracePeriodSeconds` to avoid killing pods before we are ready
    229. ## wait up to five minutes for the drain of connections
    230. ##
    231. terminationGracePeriodSeconds: 300
    232. # -- Node labels for controller pod assignment
    233. ## Ref: https://kubernetes.io/docs/user-guide/node-selection/
    234. ##
    235. nodeSelector:
    236. kubernetes.io/os: linux
    237. ingress: "true"
    238. ## Liveness and readiness probe values
    239. ## Ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-probes
    240. ##
    241. ## startupProbe:
    242. ## httpGet:
    243. ## # should match container.healthCheckPath
    244. ## path: "/healthz"
    245. ## port: 10254
    246. ## scheme: HTTP
    247. ## initialDelaySeconds: 5
    248. ## periodSeconds: 5
    249. ## timeoutSeconds: 2
    250. ## successThreshold: 1
    251. ## failureThreshold: 5
    252. livenessProbe:
    253. httpGet:
    254. # should match container.healthCheckPath
    255. path: "/healthz"
    256. port: 10254
    257. scheme: HTTP
    258. initialDelaySeconds: 10
    259. periodSeconds: 10
    260. timeoutSeconds: 1
    261. successThreshold: 1
    262. failureThreshold: 5
    263. readinessProbe:
    264. httpGet:
    265. # should match container.healthCheckPath
    266. path: "/healthz"
    267. port: 10254
    268. scheme: HTTP
    269. initialDelaySeconds: 10
    270. periodSeconds: 10
    271. timeoutSeconds: 1
    272. successThreshold: 1
    273. failureThreshold: 3
    274. # -- Path of the health check endpoint. All requests received on the port defined by
    275. # the healthz-port parameter are forwarded internally to this path.
    276. healthCheckPath: "/healthz"
    277. # -- Address to bind the health check endpoint.
    278. # It is better to set this option to the internal node address
    279. # if the ingress nginx controller is running in the `hostNetwork: true` mode.
    280. healthCheckHost: ""
    281. # -- Annotations to be added to controller pods
    282. ##
    283. podAnnotations: {}
    284. replicaCount: 1
    285. minAvailable: 1
    286. ## Define requests resources to avoid probe issues due to CPU utilization in busy nodes
    287. ## ref: https://github.com/kubernetes/ingress-nginx/issues/4735#issuecomment-551204903
    288. ## Ideally, there should be no limits.
    289. ## https://engineering.indeedblog.com/blog/2019/12/cpu-throttling-regression-fix/
    290. resources:
    291. ## limits:
    292. ## cpu: 100m
    293. ## memory: 90Mi
    294. requests:
    295. cpu: 100m
    296. memory: 90Mi
    297. # Mutually exclusive with keda autoscaling
    298. autoscaling:
    299. enabled: false
    300. minReplicas: 1
    301. maxReplicas: 11
    302. targetCPUUtilizationPercentage: 50
    303. targetMemoryUtilizationPercentage: 50
    304. behavior: {}
    305. # scaleDown:
    306. # stabilizationWindowSeconds: 300
    307. # policies:
    308. # - type: Pods
    309. # value: 1
    310. # periodSeconds: 180
    311. # scaleUp:
    312. # stabilizationWindowSeconds: 300
    313. # policies:
    314. # - type: Pods
    315. # value: 2
    316. # periodSeconds: 60
    317. autoscalingTemplate: []
    318. # Custom or additional autoscaling metrics
    319. # ref: https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#support-for-custom-metrics
    320. # - type: Pods
    321. # pods:
    322. # metric:
    323. # name: nginx_ingress_controller_nginx_process_requests_total
    324. # target:
    325. # type: AverageValue
    326. # averageValue: 10000m
    327. # Mutually exclusive with hpa autoscaling
    328. keda:
    329. apiVersion: "keda.sh/v1alpha1"
    330. ## apiVersion changes with keda 1.x vs 2.x
    331. ## 2.x = keda.sh/v1alpha1
    332. ## 1.x = keda.k8s.io/v1alpha1
    333. enabled: false
    334. minReplicas: 1
    335. maxReplicas: 11
    336. pollingInterval: 30
    337. cooldownPeriod: 300
    338. restoreToOriginalReplicaCount: false
    339. scaledObject:
    340. annotations: {}
    341. # Custom annotations for ScaledObject resource
    342. # annotations:
    343. # key: value
    344. triggers: []
    345. # - type: prometheus
    346. # metadata:
    347. # serverAddress: http://:9090
    348. # metricName: http_requests_total
    349. # threshold: '100'
    350. # query: sum(rate(http_requests_total{deployment="my-deployment"}[2m]))
    351. behavior: {}
    352. # scaleDown:
    353. # stabilizationWindowSeconds: 300
    354. # policies:
    355. # - type: Pods
    356. # value: 1
    357. # periodSeconds: 180
    358. # scaleUp:
    359. # stabilizationWindowSeconds: 300
    360. # policies:
    361. # - type: Pods
    362. # value: 2
    363. # periodSeconds: 60
    364. # -- Enable mimalloc as a drop-in replacement for malloc.
    365. ## ref: https://github.com/microsoft/mimalloc
    366. ##
    367. enableMimalloc: true
    368. ## Override NGINX template
    369. customTemplate:
    370. configMapName: ""
    371. configMapKey: ""
    372. service:
    373. enabled: true
    374. # -- If enabled is adding an appProtocol option for Kubernetes service. An appProtocol field replacing annotations that were
    375. # using for setting a backend protocol. Here is an example for AWS: service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
    376. # It allows choosing the protocol for each backend specified in the Kubernetes service.
    377. # See the following GitHub issue for more details about the purpose: https://github.com/kubernetes/kubernetes/issues/40244
    378. # Will be ignored for Kubernetes versions older than 1.20
    379. ##
    380. appProtocol: true
    381. annotations: {}
    382. labels: {}
    383. # clusterIP: ""
    384. # -- List of IP addresses at which the controller services are available
    385. ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
    386. ##
    387. externalIPs: []
    388. # -- Used by cloud providers to connect the resulting `LoadBalancer` to a pre-existing static IP according to https://kubernetes.io/docs/concepts/services-networking/service/#loadbalancer
    389. loadBalancerIP: ""
    390. loadBalancerSourceRanges: []
    391. enableHttp: true
    392. enableHttps: true
    393. ## Set external traffic policy to: "Local" to preserve source IP on providers supporting it.
    394. ## Ref: https://kubernetes.io/docs/tutorials/services/source-ip/#source-ip-for-services-with-typeloadbalancer
    395. # externalTrafficPolicy: ""
    396. ## Must be either "None" or "ClientIP" if set. Kubernetes will default to "None".
    397. ## Ref: https://kubernetes.io/docs/concepts/services-networking/service/#virtual-ips-and-service-proxies
    398. # sessionAffinity: ""
    399. ## Specifies the health check node port (numeric port number) for the service. If healthCheckNodePort isn’t specified,
    400. ## the service controller allocates a port from your cluster’s NodePort range.
    401. ## Ref: https://kubernetes.io/docs/tasks/access-application-cluster/create-external-load-balancer/#preserving-the-client-source-ip
    402. # healthCheckNodePort: 0
    403. # -- Represents the dual-stack-ness requested or required by this Service. Possible values are
    404. # SingleStack, PreferDualStack or RequireDualStack.
    405. # The ipFamilies and clusterIPs fields depend on the value of this field.
    406. ## Ref: https://kubernetes.io/docs/concepts/services-networking/dual-stack/
    407. ipFamilyPolicy: "SingleStack"
    408. # -- List of IP families (e.g. IPv4, IPv6) assigned to the service. This field is usually assigned automatically
    409. # based on cluster configuration and the ipFamilyPolicy field.
    410. ## Ref: https://kubernetes.io/docs/concepts/services-networking/dual-stack/
    411. ipFamilies:
    412. - IPv4
    413. ports:
    414. http: 80
    415. https: 443
    416. targetPorts:
    417. http: http
    418. https: https
    419. type: LoadBalancer
    420. ## type: NodePort
    421. ## nodePorts:
    422. ## http: 32080
    423. ## https: 32443
    424. ## tcp:
    425. ## 8080: 32808
    426. nodePorts:
    427. http: ""
    428. https: ""
    429. tcp: {}
    430. udp: {}
    431. external:
    432. enabled: true
    433. internal:
    434. # -- Enables an additional internal load balancer (besides the external one).
    435. enabled: false
    436. # -- Annotations are mandatory for the load balancer to come up. Varies with the cloud service.
    437. annotations: {}
    438. # loadBalancerIP: ""
    439. # -- Restrict access For LoadBalancer service. Defaults to 0.0.0.0/0.
    440. loadBalancerSourceRanges: []
    441. ## Set external traffic policy to: "Local" to preserve source IP on
    442. ## providers supporting it
    443. ## Ref: https://kubernetes.io/docs/tutorials/services/source-ip/#source-ip-for-services-with-typeloadbalancer
    444. # externalTrafficPolicy: ""
    445. # shareProcessNamespace enables process namespace sharing within the pod.
    446. # This can be used for example to signal log rotation using `kill -USR1` from a sidecar.
    447. shareProcessNamespace: false
    448. # -- Additional containers to be added to the controller pod.
    449. # See https://github.com/lemonldap-ng-controller/lemonldap-ng-controller as example.
    450. extraContainers: []
    451. # - name: my-sidecar
    452. # - name: POD_NAME
    453. # valueFrom:
    454. # fieldRef:
    455. # fieldPath: metadata.name
    456. # - name: POD_NAMESPACE
    457. # valueFrom:
    458. # fieldRef:
    459. # fieldPath: metadata.namespace
    460. # volumeMounts:
    461. # - name: copy-portal-skins
    462. # mountPath: /srv/var/lib/lemonldap-ng/portal/skins
    463. # -- Additional volumeMounts to the controller main container.
    464. extraVolumeMounts: []
    465. # - name: copy-portal-skins
    466. # mountPath: /var/lib/lemonldap-ng/portal/skins
    467. # -- Additional volumes to the controller pod.
    468. extraVolumes: []
    469. # - name: copy-portal-skins
    470. # emptyDir: {}
    471. # -- Containers, which are run before the app containers are started.
    472. extraInitContainers: []
    473. # - name: init-myservice
    474. # command: ['sh', '-c', 'until nslookup myservice; do echo waiting for myservice; sleep 2; done;']
    475. extraModules: []
    476. ## Modules, which are mounted into the core nginx image
    477. # - name: opentelemetry
    478. #
    479. # The image must contain a `/usr/local/bin/init_module.sh` executable, which
    480. # will be executed as initContainers, to move its config files within the
    481. # mounted volume.
    482. admissionWebhooks:
    483. annotations: {}
    484. # ignore-check.kube-linter.io/no-read-only-rootfs: "This deployment needs write access to root filesystem".
    485. ## Additional annotations to the admission webhooks.
    486. ## These annotations will be added to the ValidatingWebhookConfiguration and
    487. ## the Jobs Spec of the admission webhooks.
    488. enabled: true
    489. # -- Additional environment variables to set
    490. extraEnvs: []
    491. # extraEnvs:
    492. # - name: FOO
    493. # valueFrom:
    494. # secretKeyRef:
    495. # key: FOO
    496. # name: secret-resource
    497. # -- Admission Webhook failure policy to use
    498. failurePolicy: Fail
    499. # timeoutSeconds: 10
    500. port: 8443
    501. certificate: "/usr/local/certificates/cert"
    502. key: "/usr/local/certificates/key"
    503. namespaceSelector: {}
    504. objectSelector: {}
    505. # -- Labels to be added to admission webhooks
    506. labels: {}
    507. # -- Use an existing PSP instead of creating one
    508. existingPsp: ""
    509. networkPolicyEnabled: false
    510. service:
    511. annotations: {}
    512. # clusterIP: ""
    513. externalIPs: []
    514. # loadBalancerIP: ""
    515. loadBalancerSourceRanges: []
    516. servicePort: 443
    517. type: ClusterIP
    518. createSecretJob:
    519. resources: {}
    520. # limits:
    521. # cpu: 10m
    522. # memory: 20Mi
    523. # requests:
    524. # cpu: 10m
    525. # memory: 20Mi
    526. patchWebhookJob:
    527. resources: {}
    528. patch:
    529. enabled: true
    530. image:
    531. registry: registry.cn-hangzhou.aliyuncs.com
    532. image: google_containers/kube-webhook-certgen
    533. ## for backwards compatibility consider setting the full image url via the repository value below
    534. ## use *either* current default registry/image or repository format or installing chart by providing the values.yaml will fail
    535. ## repository:
    536. tag: v1.3.0
    537. # digest: sha256:549e71a6ca248c5abd51cdb73dbc3083df62cf92ed5e6147c780e30f7e007a47
    538. pullPolicy: IfNotPresent
    539. # -- Provide a priority class name to the webhook patching job
    540. ##
    541. priorityClassName: ""
    542. podAnnotations: {}
    543. nodeSelector:
    544. kubernetes.io/os: linux
    545. tolerations: []
    546. # -- Labels to be added to patch job resources
    547. labels: {}
    548. securityContext:
    549. runAsNonRoot: true
    550. runAsUser: 2000
    551. fsGroup: 2000
    552. metrics:
    553. port: 10254
    554. # if this port is changed, change healthz-port: in extraArgs: accordingly
    555. enabled: false
    556. service:
    557. annotations: {}
    558. # prometheus.io/scrape: "true"
    559. # prometheus.io/port: "10254"
    560. # clusterIP: ""
    561. # -- List of IP addresses at which the stats-exporter service is available
    562. ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
    563. ##
    564. externalIPs: []
    565. # loadBalancerIP: ""
    566. loadBalancerSourceRanges: []
    567. servicePort: 10254
    568. type: ClusterIP
    569. # externalTrafficPolicy: ""
    570. # nodePort: ""
    571. serviceMonitor:
    572. enabled: false
    573. additionalLabels: {}
    574. ## The label to use to retrieve the job name from.
    575. ## jobLabel: "app.kubernetes.io/name"
    576. namespace: ""
    577. namespaceSelector: {}
    578. ## Default: scrape .Release.Namespace only
    579. ## To scrape all, use the following:
    580. ## namespaceSelector:
    581. ## any: true
    582. scrapeInterval: 30s
    583. # honorLabels: true
    584. targetLabels: []
    585. relabelings: []
    586. metricRelabelings: []
    587. prometheusRule:
    588. enabled: false
    589. additionalLabels: {}
    590. # namespace: ""
    591. rules: []
    592. # # These are just examples rules, please adapt them to your needs
    593. # - alert: NGINXConfigFailed
    594. # expr: count(nginx_ingress_controller_config_last_reload_successful == 0) > 0
    595. # for: 1s
    596. # labels:
    597. # severity: critical
    598. # annotations:
    599. # description: bad ingress config - nginx config test failed
    600. # summary: uninstall the latest ingress changes to allow config reloads to resume
    601. # - alert: NGINXCertificateExpiry
    602. # expr: (avg(nginx_ingress_controller_ssl_expire_time_seconds) by (host) - time()) < 604800
    603. # for: 1s
    604. # labels:
    605. # severity: critical
    606. # annotations:
    607. # description: ssl certificate(s) will expire in less then a week
    608. # summary: renew expiring certificates to avoid downtime
    609. # - alert: NGINXTooMany500s
    610. # expr: 100 * ( sum( nginx_ingress_controller_requests{status=~"5.+"} ) / sum(nginx_ingress_controller_requests) ) > 5
    611. # for: 1m
    612. # labels:
    613. # severity: warning
    614. # annotations:
    615. # description: Too many 5XXs
    616. # summary: More than 5% of all requests returned 5XX, this requires your attention
    617. # - alert: NGINXTooMany400s
    618. # expr: 100 * ( sum( nginx_ingress_controller_requests{status=~"4.+"} ) / sum(nginx_ingress_controller_requests) ) > 5
    619. # for: 1m
    620. # labels:
    621. # severity: warning
    622. # annotations:
    623. # description: Too many 4XXs
    624. # summary: More than 5% of all requests returned 4XX, this requires your attention
    625. # -- Improve connection draining when ingress controller pod is deleted using a lifecycle hook:
    626. # With this new hook, we increased the default terminationGracePeriodSeconds from 30 seconds
    627. # to 300, allowing the draining of connections up to five minutes.
    628. # If the active connections end before that, the pod will terminate gracefully at that time.
    629. # To effectively take advantage of this feature, the Configmap feature
    630. # worker-shutdown-timeout new value is 240s instead of 10s.
    631. ##
    632. lifecycle:
    633. preStop:
    634. exec:
    635. command:
    636. - /wait-shutdown
    637. priorityClassName: ""
    638. # -- Rollback limit
    639. ##
    640. revisionHistoryLimit: 10
    641. ## Default 404 backend
    642. ##
    643. defaultBackend:
    644. ##
    645. enabled: false
    646. name: defaultbackend
    647. image:
    648. registry: k8s.gcr.io
    649. image: defaultbackend-amd64
    650. ## for backwards compatibility consider setting the full image url via the repository value below
    651. ## use *either* current default registry/image or repository format or installing chart by providing the values.yaml will fail
    652. ## repository:
    653. tag: "1.5"
    654. pullPolicy: IfNotPresent
    655. # nobody user -> uid 65534
    656. runAsUser: 65534
    657. runAsNonRoot: true
    658. readOnlyRootFilesystem: true
    659. allowPrivilegeEscalation: false
    660. # -- Use an existing PSP instead of creating one
    661. existingPsp: ""
    662. extraArgs: {}
    663. serviceAccount:
    664. create: true
    665. name: ""
    666. automountServiceAccountToken: true
    667. # -- Additional environment variables to set for defaultBackend pods
    668. extraEnvs: []
    669. port: 8080
    670. ## Readiness and liveness probes for default backend
    671. ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/
    672. ##
    673. livenessProbe:
    674. failureThreshold: 3
    675. initialDelaySeconds: 30
    676. periodSeconds: 10
    677. successThreshold: 1
    678. timeoutSeconds: 5
    679. readinessProbe:
    680. failureThreshold: 6
    681. initialDelaySeconds: 0
    682. periodSeconds: 5
    683. successThreshold: 1
    684. timeoutSeconds: 5
    685. # -- Node tolerations for server scheduling to nodes with taints
    686. ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
    687. ##
    688. tolerations: []
    689. # - key: "key"
    690. # operator: "Equal|Exists"
    691. # value: "value"
    692. # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
    693. affinity: {}
    694. # -- Security Context policies for controller pods
    695. # See https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ for
    696. # notes on enabling and using sysctls
    697. ##
    698. podSecurityContext: {}
    699. # -- Security Context policies for controller main container.
    700. # See https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ for
    701. # notes on enabling and using sysctls
    702. ##
    703. containerSecurityContext: {}
    704. # -- Labels to add to the pod container metadata
    705. podLabels: {}
    706. # key: value
    707. # -- Node labels for default backend pod assignment
    708. ## Ref: https://kubernetes.io/docs/user-guide/node-selection/
    709. ##
    710. nodeSelector:
    711. kubernetes.io/os: linux
    712. # -- Annotations to be added to default backend pods
    713. ##
    714. podAnnotations: {}
    715. replicaCount: 1
    716. minAvailable: 1
    717. resources: {}
    718. # limits:
    719. # cpu: 10m
    720. # memory: 20Mi
    721. # requests:
    722. # cpu: 10m
    723. # memory: 20Mi
    724. extraVolumeMounts: []
    725. ## Additional volumeMounts to the default backend container.
    726. # - name: copy-portal-skins
    727. # mountPath: /var/lib/lemonldap-ng/portal/skins
    728. extraVolumes: []
    729. ## Additional volumes to the default backend pod.
    730. # - name: copy-portal-skins
    731. # emptyDir: {}
    732. autoscaling:
    733. annotations: {}
    734. enabled: false
    735. minReplicas: 1
    736. maxReplicas: 2
    737. targetCPUUtilizationPercentage: 50
    738. targetMemoryUtilizationPercentage: 50
    739. service:
    740. annotations: {}
    741. # clusterIP: ""
    742. # -- List of IP addresses at which the default backend service is available
    743. ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
    744. ##
    745. externalIPs: []
    746. # loadBalancerIP: ""
    747. loadBalancerSourceRanges: []
    748. servicePort: 80
    749. type: ClusterIP
    750. priorityClassName: ""
    751. # -- Labels to be added to the default backend resources
    752. labels: {}
    753. ## Enable RBAC as per https://github.com/kubernetes/ingress-nginx/blob/main/docs/deploy/rbac.md and https://github.com/kubernetes/ingress-nginx/issues/266
    754. rbac:
    755. create: true
    756. scope: false
    757. ## If true, create & use Pod Security Policy resources
    758. ## https://kubernetes.io/docs/concepts/policy/pod-security-policy/
    759. podSecurityPolicy:
    760. enabled: false
    761. serviceAccount:
    762. create: true
    763. name: ""
    764. automountServiceAccountToken: true
    765. # -- Annotations for the controller service account
    766. annotations: {}
    767. # -- Optional array of imagePullSecrets containing private registry credentials
    768. ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
    769. imagePullSecrets: []
    770. # - name: secretName
    771. # -- TCP service key-value pairs
    772. ## Ref: https://github.com/kubernetes/ingress-nginx/blob/main/docs/user-guide/exposing-tcp-udp-services.md
    773. ##
    774. tcp: {}
    775. # 8080: "default/example-tcp-svc:9000"
    776. # -- UDP service key-value pairs
    777. ## Ref: https://github.com/kubernetes/ingress-nginx/blob/main/docs/user-guide/exposing-tcp-udp-services.md
    778. ##
    779. udp: {}
    780. # 53: "kube-system/kube-dns:53"
    781. # -- Prefix for TCP and UDP ports names in ingress controller service
    782. ## Some cloud providers, like Yandex Cloud may have a requirements for a port name regex to support cloud load balancer integration
    783. portNamePrefix: ""
    784. # -- (string) A base64-encoded Diffie-Hellman parameter.
    785. # This can be generated with: `openssl dhparam 4096 2> /dev/null | base64`
    786. ## Ref: https://github.com/kubernetes/ingress-nginx/tree/main/docs/examples/customization/ssl-dh-param
    787. dhParam:

    3.安装ingress

    1. # 选择节点打label
    2. kubectl label node k8s-node01 ingress=true # k8s-node01是自己自定义的node节点名称
    3. kubectl get node --show-labels
    4. #创建命名空间
    5. kubectl create ns ingress-nginx
    6. # 使用helm进行安装
    7. helm install ingress-nginx -f values.yaml -n ingress-nginx .
    8. helm list -n ingress-nginx
    9. kubectl -n ingress-nginx get pods -o wide
    10. kubectl -n ingress-nginx get svc -o wide
    11. # 删除ingress-nginx
    12. helm delete ingress-nginx -n ingress-nginx
    13. # 更新ingress-nginx
    14. helm upgrade ingress-nginx -n -f values.yaml -n ingress-nginx .

    4.测试网页

    vim test-nginx.yaml

    1. ---
    2. apiVersion: apps/v1
    3. kind: Deployment
    4. metadata:
    5. name: my-deploy-nginx
    6. spec:
    7. replicas: 2
    8. selector:
    9. matchLabels:
    10. app: mynginx
    11. template:
    12. metadata:
    13. labels:
    14. app: mynginx
    15. spec:
    16. containers:
    17. - name: mynginx
    18. image: nginx
    19. ports:
    20. - containerPort: 80
    21. ---
    22. kind: Service
    23. apiVersion: v1
    24. metadata:
    25. name: myservice
    26. spec:
    27. ports:
    28. - protocol: TCP
    29. port: 80
    30. targetPort: 80
    31. selector:
    32. app: mynginx
    33. type: ClusterIP
    34. ---
    35. apiVersion: networking.k8s.io/v1
    36. kind: Ingress
    37. metadata:
    38. name: mynginx
    39. spec:
    40. ingressClassName: "nginx" #在部署ingress-nginx时,valume.yaml文件中定义的
    41. rules:
    42. - host: nginx.rw.com
    43. http:
    44. paths:
    45. - path: /
    46. pathType: Prefix
    47. backend:
    48. service:
    49. name: myservice
    50. port:
    51. number: 80

    kubectl apply -f test-nginx.yaml

    kubectl  get all

    kubectl get ingress

    四、k8sHPA 自动水平伸缩pod

    1.pod内资源分配的配置格式如下:

    默认可以只配置requests,但根据生产中的经验,建议把limits资源限制也加上,因为对K8s来说,只有这两个都配置了且配置的值都要一样,这个pod资源的优先级才是最高的,在node资源不够的情况下,首先是把没有任何资源分配配置的pod资源给干掉,其次是只配置了requests的,最后才是两个都配置的情况,仔细品品
     

          resources:
            limits:   # 限制单个pod最多能使用1核(1000m 毫核)cpu以及2G内存
              cpu: "1"
              memory: 2Gi
            requests: # 保证这个pod初始就能分配这么多资源
              cpu: "1"
              memory: 2Gi

    2.我们现在以上面创建的deployment资源web来实践下hpa的效果,首先用我们学到的方法导出web的yaml配置,并增加资源分配配置增加

    apiVersion: v1
    kind: Service
    metadata:
      labels:
        app: web
      name: web
    spec:
      ports:
      - port: 80
        protocol: TCP
        targetPort: 80
      selector:
        app: web
    ---
    apiVersion: apps/v1
    kind: Deployment
    metadata:
      labels:
        app: web
      name: web
      namespace: default
    spec:
      replicas: 1
      selector:
        matchLabels:
          app: web
      template:
        metadata:
          labels:
            app: web
        spec:
          containers:
          - image: nginx
            name: web
            resources:
              limits:   # 因为我这里是测试环境,所以这里CPU只分配50毫核(0.05核CPU)和20M的内存
                cpu: "50m"
                memory: 20Mi
              requests: # 保证这个pod初始就能分配这么多资源
                cpu: "50m"
                memory: 20Mi

    3.运行

    kubectl  apply -f web.yaml

    4.第一种:为deployment资源web创建hpa,pod数量上限3个,最低1个,在pod平均CPU达到50%后开始扩容

    kubectl  autoscale deployment web --max=3 --min=1 --cpu-percent=50

    5.第二种创建hpa

    cat hpa-web.yaml

    1. apiVersion: autoscaling/v2beta1 # v2beta1版本
    2. #apiVersion: apps/v1
    3. kind: HorizontalPodAutoscaler
    4. metadata:
    5. name: web
    6. spec:
    7. maxReplicas: 10
    8. minReplicas: 1 # 1-10个pod范围内扩容与裁剪
    9. scaleTargetRef:
    10. apiVersion: apps/v1
    11. kind: Deployment
    12. name: web
    13. metrics:
    14. - type: Resource
    15. resource:
    16. name: memory
    17. targetAverageUtilization: 50 # 50%内存利用

    6.执行

    kubectl apply -f hpa-web.yaml

    7.我们启动一个临时pod,来模拟大量请求

    kubectl run -it --rm busybox --image=busybox -- sh
    / # while :;do wget -q -O- http://web;done

    #等待2 ~ 3分钟,注意k8s为了避免频繁增删pod,对副本的增加速度有限制

    kubectl get hpa web -w

    236eee9b4ba2418da17427aceae4a9a9.png

    五、k8s存储

    5.1 k8s持久化存储02pv pvc

    开始部署NFS-SERVER

    1. # 我们这里在10.0.1.201上安装(在生产中,大家要提供作好NFS-SERVER环境的规划)
    2. yum -y install nfs-utils
    3. # 创建NFS挂载目录
    4. mkdir /nfs_dir
    5. chown nobody.nobody /nfs_dir
    6. # 修改NFS-SERVER配置
    7. echo '/nfs_dir *(rw,sync,no_root_squash)' > /etc/exports
    8. # 重启服务
    9. systemctl restart rpcbind.service
    10. systemctl restart nfs-utils.service
    11. systemctl restart nfs-server.service
    12. # 增加NFS-SERVER开机自启动
    13. systemctl enable rpcbind.service
    14. systemctl enable nfs-utils.service
    15. systemctl enable nfs-server.service
    16. # 验证NFS-SERVER是否能正常访问
    17. showmount -e 10.0.1.201
    18. #需要挂载的服务器执行
    19. yum install nfs-utils -y

    接着准备好pv的yaml配置,保存为pv1.yaml

    1. # cat pv1.yaml
    2. apiVersion: v1
    3. kind: PersistentVolume
    4. metadata:
    5. name: pv1
    6. labels:
    7. type: test-claim # 这里建议打上一个独有的标签,方便在多个pv的时候方便提供pvc选择挂载
    8. spec:
    9. capacity:
    10. storage: 1Gi # <---------- 1
    11. accessModes:
    12. - ReadWriteOnce # <---------- 2
    13. persistentVolumeReclaimPolicy: Recycle # <---------- 3
    14. storageClassName: nfs # <---------- 4
    15. nfs:
    16. path: /nfs_dir/pv1 # <---------- 5
    17. server: 10.0.1.201

    1. capacity 指定 PV 的容量为 1G。
    2. accessModes 指定访问模式为 ReadWriteOnce,支持的访问模式有: ReadWriteOnce – PV 能以 read-write 模式 mount 到单个节点。 ReadOnlyMany – PV 能以 read-only 模式 mount 到多个节点。 ReadWriteMany – PV 能以 read-write 模式 mount 到多个节点。
    3. persistentVolumeReclaimPolicy 指定当 PV 的回收策略为 Recycle,支持的策略有: Retain – 需要管理员手工回收。 Recycle – 清除 PV 中的数据,效果相当于执行 rm -rf /thevolume/*。 Delete – 删除 Storage Provider 上的对应存储资源,例如 AWS EBS、GCE PD、Azure Disk、OpenStack Cinder Volume 等。
    4. storageClassName 指定 PV 的 class 为 nfs。相当于为 PV 设置了一个分类,PVC 可以指定 class 申请相应 class 的 PV。
    5. 指定 PV 在 NFS 服务器上对应的目录,这里注意,我测试的时候,需要手动先创建好这个目录并授权好,不然后面挂载会提示目录不存在 mkdir /nfsdata/pv1 && chown -R nobody.nogroup /nfsdata 。

    创建这个pv

    1. # kubectl apply -f pv1.yaml
    2. persistentvolume/pv1 created
    3. # STATUS 为 Available,表示 pv1 就绪,可以被 PVC 申请
    4. # kubectl get pv
    5. NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE
    6. pv1 1Gi RWO Recycle Available nfs 4m45s

    接着准备PVC的yaml,保存为pvc1.yaml

    cat pvc1.yaml

    1. apiVersion: v1
    2. kind: PersistentVolumeClaim
    3. metadata:
    4. name: pvc1
    5. spec:
    6. accessModes:
    7. - ReadWriteOnce
    8. resources:
    9. requests:
    10. storage: 1Gi
    11. storageClassName: nfs
    12. selector:
    13. matchLabels:
    14. type: test-claim

    创建这个pvc

    1. # kubectl apply -f pvc1.yaml
    2. persistentvolumeclaim/pvc1 created
    3. # 看下pvc的STATUS为Bound代表成功挂载到pv了
    4. # kubectl get pvc
    5. NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
    6. pvc1 Bound pv1 1Gi RWO nfs 2s
    7. # 这个时候再看下pv,STATUS也是Bound了,同时CLAIM提示被default/pvc1消费
    8. # kubectl get pv
    9. NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE
    10. pv1 1Gi RWO Recycle Bound default/pvc1 nfs

    下面讲下如何回收PVC以及PV

    1. # 这里删除时会一直卡着,我们按ctrl+c看看怎么回事
    2. # kubectl delete pvc pvc1
    3. persistentvolumeclaim "pvc1" deleted
    4. ^C
    5. # 看下pvc发现STATUS是Terminating删除中的状态,我分析是因为服务pod还在占用这个pvc使用中
    6. # kubectl get pvc
    7. NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
    8. pvc1 Terminating pv1 1Gi RWO nfs 21m
    9. # 先删除这个pod
    10. # kubectl delete pod nginx-569546db98-99qpq
    11. pod "nginx-569546db98-99qpq" deleted
    12. # 再看先删除的pvc已经没有了
    13. # kubectl get pvc
    14. No resources found in default namespace.
    15. # 根据先前创建pv时的数据回收策略为Recycle – 清除 PV 中的数据,这时果然先创建的index.html已经被删除了,在生产中要尤其注意这里的模式,注意及时备份数据,注意及时备份数据,注意及时备份数据
    16. # ll /nfs_dir/pv1/
    17. total 0
    18. # 虽然此时pv是可以再次被pvc来消费的,但根据生产的经验,建议在删除pvc时,也同时把它消费的pv一并删除,然后再重启创建都是可以的

    5.2 StorageClass

    1.k8s持久化存储的第三节,给大家带来 StorageClass动态存储的讲解。

    我们上节课提到了K8s对于存储解耦的设计是,pv交给存储管理员来管理,我们只管用pvc来消费就好,但这里我们实际还是得一起管理pv和pvc,在实际工作中,我们(存储管理员)可以提前配置好pv的动态供给StorageClass,来根据pvc的消费动态生成pv。
    StorageClass

    我这是直接拿生产中用的实例来作演示,利用nfs-client-provisioner来生成一个基于nfs的StorageClass,部署配置yaml配置如下,保持为nfs-sc.yaml:

    1. apiVersion: v1
    2. kind: ServiceAccount
    3. metadata:
    4. name: nfs-client-provisioner
    5. namespace: kube-system
    6. ---
    7. kind: ClusterRole
    8. apiVersion: rbac.authorization.k8s.io/v1
    9. metadata:
    10. name: nfs-client-provisioner-runner
    11. rules:
    12. - apiGroups: [""]
    13. resources: ["persistentvolumes"]
    14. verbs: ["get", "list", "watch", "create", "delete"]
    15. - apiGroups: [""]
    16. resources: ["persistentvolumeclaims"]
    17. verbs: ["get", "list", "watch", "update"]
    18. - apiGroups: ["storage.k8s.io"]
    19. resources: ["storageclasses"]
    20. verbs: ["get", "list", "watch"]
    21. - apiGroups: [""]
    22. resources: ["events"]
    23. verbs: ["list", "watch", "create", "update", "patch"]
    24. - apiGroups: [""]
    25. resources: ["endpoints"]
    26. verbs: ["get", "list", "watch", "create", "update", "patch"]
    27. ---
    28. kind: ClusterRoleBinding
    29. apiVersion: rbac.authorization.k8s.io/v1
    30. metadata:
    31. name: run-nfs-client-provisioner
    32. subjects:
    33. - kind: ServiceAccount
    34. name: nfs-client-provisioner
    35. namespace: kube-system
    36. roleRef:
    37. kind: ClusterRole
    38. name: nfs-client-provisioner-runner
    39. apiGroup: rbac.authorization.k8s.io
    40. ---
    41. kind: Deployment
    42. apiVersion: apps/v1
    43. metadata:
    44. name: nfs-provisioner-01
    45. namespace: kube-system
    46. spec:
    47. replicas: 1
    48. strategy:
    49. type: Recreate
    50. selector:
    51. matchLabels:
    52. app: nfs-provisioner-01
    53. template:
    54. metadata:
    55. labels:
    56. app: nfs-provisioner-01
    57. spec:
    58. serviceAccountName: nfs-client-provisioner
    59. containers:
    60. - name: nfs-client-provisioner
    61. #老版本插件使用jmgao1983/nfs-client-provisioner:latest
    62. # image: jmgao1983/nfs-client-provisioner:latest
    63. image: vbouchaud/nfs-client-provisioner:latest
    64. imagePullPolicy: IfNotPresent
    65. volumeMounts:
    66. - name: nfs-client-root
    67. mountPath: /persistentvolumes
    68. env:
    69. - name: PROVISIONER_NAME
    70. value: nfs-provisioner-01 # 此处供应者名字供storageclass调用
    71. - name: NFS_SERVER
    72. value: 10.0.1.201 # 填入NFS的地址
    73. - name: NFS_PATH
    74. value: /nfs_dir # 填入NFS挂载的目录
    75. volumes:
    76. - name: nfs-client-root
    77. nfs:
    78. server: 10.0.1.201 # 填入NFS的地址
    79. path: /nfs_dir # 填入NFS挂载的目录
    80. ---
    81. apiVersion: storage.k8s.io/v1
    82. kind: StorageClass
    83. metadata:
    84. name: nfs-boge
    85. provisioner: nfs-provisioner-01
    86. # Supported policies: Delete、 Retain , default is Delete
    87. reclaimPolicy: Retain

    2.开始创建这个StorageClass

    1. # kubectl apply -f nfs-sc.yaml
    2. serviceaccount/nfs-client-provisioner created
    3. clusterrole.rbac.authorization.k8s.io/nfs-client-provisioner-runner created
    4. clusterrolebinding.rbac.authorization.k8s.io/run-nfs-client-provisioner created
    5. deployment.apps/nfs-provisioner-01 created
    6. orageclass.storage.k8s.io/nfs-boge created
    7. # 注意这个是在放kube-system的namespace下面,这里面放置一些偏系统类的服务
    8. # kubectl -n kube-system get pod -w
    9. NAME READY STATUS RESTARTS AGE
    10. calico-kube-controllers-7fdc86d8ff-dpdm5 1/1 Running 1 24h
    11. calico-node-8jcp5 1/1 Running 1 24h
    12. calico-node-m92rn 1/1 Running 1 24h
    13. calico-node-xg5n4 1/1 Running 1 24h
    14. calico-node-xrfqq 1/1 Running 1 24h
    15. coredns-d9b6857b5-5zwgf 1/1 Running 1 24h
    16. metrics-server-869ffc99cd-wfj44 1/1 Running 2 24h
    17. nfs-provisioner-01-5db96d9cc9-qxlgk 0/1 ContainerCreating 0 9s
    18. nfs-provisioner-01-5db96d9cc9-qxlgk 1/1 Running 0 21s
    19. # StorageClass已经创建好了
    20. # kubectl get sc
    21. NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE
    22. nfs-boge nfs-provisioner-01 Retain Immediate false 37s

    3.我们来基于StorageClass创建一个pvc,看看动态生成的pv是什么效果

    1. # vim pvc-sc.yaml
    2. kind: PersistentVolumeClaim
    3. apiVersion: v1
    4. metadata:
    5. name: pvc-sc
    6. spec:
    7. storageClassName: nfs-boge
    8. accessModes:
    9. - ReadWriteMany
    10. resources:
    11. requests:
    12. storage: 1Mi
    13. # kubectl apply -f pvc-sc.yaml
    14. persistentvolumeclaim/pvc-sc created
    15. # kubectl get pvc
    16. NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
    17. pvc-sc Bound pvc-63eee4c7-90fd-4c7e-abf9-d803c3204623 1Mi RWX nfs-boge 3s
    18. pvc1 Bound pv1 1Gi RWO nfs 24m
    19. # kubectl get pv
    20. NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE
    21. pv1 1Gi RWO Recycle Bound default/pvc1 nfs 49m
    22. pvc-63eee4c7-90fd-4c7e-abf9-d803c3204623 1Mi RWX Retain Bound default/pvc-sc nfs-boge 7s

    4.我们修改下nginx的yaml配置,将pvc的名称换成上面的pvc-sc:

    1. # vim nginx.yaml
    2. ---
    3. apiVersion: apps/v1
    4. kind: Deployment
    5. metadata:
    6. labels:
    7. app: nginx
    8. name: nginx
    9. spec:
    10. replicas: 1
    11. selector:
    12. matchLabels:
    13. app: nginx
    14. template:
    15. metadata:
    16. labels:
    17. app: nginx
    18. spec:
    19. containers:
    20. - image: nginx
    21. name: nginx
    22. volumeMounts: # 我们这里将nginx容器默认的页面目录挂载
    23. - name: html-files
    24. mountPath: "/usr/share/nginx/html"
    25. volumes:
    26. - name: html-files
    27. persistentVolumeClaim:
    28. claimName: pvc-sc
    29. # kubectl apply -f nginx.yaml
    30. service/nginx unchanged
    31. deployment.apps/nginx configured
    32. # 这里注意下,因为是动态生成的pv,所以它的目录基于是一串随机字符串生成的,这时我们直接进到pod内来创建访问页面
    33. # kubectl exec -it nginx-57cdc6d9b4-n497g -- bash
    34. root@nginx-57cdc6d9b4-n497g:/# echo 'storageClass used' > /usr/share/nginx/html/index.html
    35. root@nginx-57cdc6d9b4-n497g:/# exit
    36. # curl 10.68.238.54
    37. storageClass used
    38. # 我们看下NFS挂载的目录
    39. # ll /nfs_dir/
    40. total 0
    41. drwxrwxrwx 2 root root 24 Nov 27 17:52 default-pvc-sc-pvc-63eee4c7-90fd-4c7e-abf9-d803c3204623
    42. drwxr-xr-x 2 root root 6 Nov 27 17:25 pv1

    5.ubuntu20.04系统

    1. #1安装nfs服务端
    2. sudo apt install nfs-kernel-server -y
    3. #2. 创建目录
    4. sudo mkdir -p /nfs_dir/
    5. #3. 使任何客户端均可访问
    6. sudo chown nobody:nogroup /data/k8s/  
    7. #sudo chmod 755 /nfs_dir/
    8. sudo chmod 777 /nfs_dir/
    9. #4. 配置/etc/exports文件, 使任何ip均可访问(加入以下语句)
    10. vi /etc/exports
    11. /nfs_dir/ *(rw,sync,no_subtree_check)
    12.   
    13. #5. 检查nfs服务的目录
    14. # (重新加载配置)
    15. sudo exportfs -ra
    16. #(查看共享的目录和允许访问的ip段)
    17. sudo showmount -e
    18. #6. 重启nfs服务使以上配置生效
    19. sudo systemctl restart nfs-kernel-server
    20. #sudo /etc/init.d/nfs-kernel-server restart
    21. #查看nfs服务的状态是否为active状态:active(exited)或active(runing)
    22. systemctl status nfs-kernel-server
    23. #7. 测试nfs服务是否成功启动
    24. #安装nfs 客户端
    25. sudo apt-get install nfs-common
    26. #创建挂载目录
    27. sudo mkdir /nfs_dir/
    28. #7.4 在主机上的Linux中测试是否正常
    29. sudo mount -t nfs -o nolock -o tcp 192.168.100.11:/nfs_dir/ /nfs_dir/
    30. #错误 mount.nfs: access denied by server while mounting

    六、k8s架构师课程之有状态服务StatefulSet

    1.StatefulSet

    前面我们讲到了Deployment、DaemonSet都只适合用来跑无状态的服务pod,那么这里的StatefulSet(简写sts)就是用来跑有状态服务pod的。

    那怎么理解有状态服务和无状态服务呢?简单快速地理解为:无状态服务最典型的是WEB服务器的每次http请求,它的每次请求都是全新的,和之前的没有关系;那么有状态服务用网游服务器来举例比较恰当了,每个用户的登陆请求,服务端都是先根据这个用户之前注册过的帐号密码等信息来判断这次登陆请求是否正常。

    无状态服务因为相互之前都是独立的,很适合用横向扩充来增加服务的资源量

    还有一个很形象的比喻,在K8s的无状态服务的pod有点类似于农村圈养的牲畜,饲养它们的人不会给它们每个都单独取个名字(pod都是随机名称,IP每次发生重启也是变化的),当其中一只病了或被卖了,带来的感观只是数量上的减少,这时再买些相应数量的牲畜回来就可以回到之前的状态了(当一个pod因为某些原来被删除掉的时候,K8s会启动一个新的pod来代替它);而有状态服务的pod就像养的一只只宠物,主人对待自己喜欢的宠物都会给它们取一个比较有特色的名字(在K8s上运行的有状态服务的pod,都会被给予一个独立的固定名称),并且每只宠物都有它独特的外貌和性格,如果万一这只宠物丢失了,那么需要到宠物店再买一只同样品种同样毛色的宠物来代替了(当有状态服务的pod删除时,K8s会启动一个和先前一模一样名称的pod来代替它)。

    有状态服务sts比较常见的mongo复制集 ,redis cluster,rabbitmq cluster等等,这些服务基本都会用StatefulSet模式来运行,当然除了这个,它们内部集群的关系还需要一系列脚本或controller来维系它们间的状态,这些会在后面进阶课程专门来讲,现在为了让大家先更好的明白StatefulSet,我这里直接还是用nginx服务来实战演示

    1. 1、创建pv
    2. -------------------------------------------
    3. root@node1:~# cat web-pv.yaml
    4. # mkdir -p /nfs_dir/{web-pv0,web-pv1}
    5. apiVersion: v1
    6. kind: PersistentVolume
    7. metadata:
    8. name: web-pv0
    9. labels:
    10. type: web-pv0
    11. spec:
    12. capacity:
    13. storage: 1Gi
    14. accessModes:
    15. - ReadWriteOnce
    16. persistentVolumeReclaimPolicy: Retain
    17. storageClassName: my-storage-class
    18. nfs:
    19. path: /nfs_dir/web-pv0
    20. server: 10.0.1.201
    21. ---
    22. apiVersion: v1
    23. kind: PersistentVolume
    24. metadata:
    25. name: web-pv1
    26. labels:
    27. type: web-pv1
    28. spec:
    29. capacity:
    30. storage: 1Gi
    31. accessModes:
    32. - ReadWriteOnce
    33. persistentVolumeReclaimPolicy: Retain
    34. storageClassName: my-storage-class
    35. nfs:
    36. path: /nfs_dir/web-pv1
    37. server: 10.0.1.201
    38. 2、创建pvc(这一步可以省去让其自动创建,这里手动创建是为了让大家能更清楚在sts里面pvc的创建过程)
    39. -------------------------------------------
    40. 这一步非常非常的关键,因为如果创建的PVC的名称和StatefulSet中的名称没有对应上,
    41. 那么StatefulSet中的Pod就肯定创建不成功.
    42. 我们在这里创建了一个叫做www-web-0和www-web-1的PVC,这个名字是不是很奇怪,
    43. 而且在这个yaml里并没有提到PV的名字,所以PV和PVC是怎么bound起来的呢?
    44. 是通过labels标签下的key:value键值对来进行匹配的,
    45. 我们在创建PV时指定了label的键值对,在PVC里通过selector可以指定label。
    46. 然后再回到这个PVC的名称定义:www-web-0,为什么叫这样一个看似有规律的名字呢,
    47. 这里需要看看下面创建StatefulSet中的yaml,
    48. 首先我们看到StatefulSet的name叫web,设置的replicas为2个,
    49. volumeMounts和volumeClaimTemplates的name必须相同,为www,
    50. 所以StatefulSet创建的第一个Pod的name应该为web-0,第二个为web-1。
    51. 这里StatefulSet中的Pod与PVC之间的绑定关系是通过名称来匹配的,即:
    52. PVC_name = volumeClaimTemplates_name + "-" + pod_name
    53. www-web-0 = www + "-" + web-0
    54. www-web-1 = www + "-" + web-1
    55. root@node1:~# cat web-pvc.yaml
    56. kind: PersistentVolumeClaim
    57. apiVersion: v1
    58. metadata:
    59. name: www-web-0
    60. spec:
    61. accessModes:
    62. - ReadWriteOnce
    63. resources:
    64. requests:
    65. storage: 1Gi
    66. storageClassName: my-storage-class
    67. selector:
    68. matchLabels:
    69. type: web-pv0
    70. ---
    71. kind: PersistentVolumeClaim
    72. apiVersion: v1
    73. metadata:
    74. name: www-web-1
    75. spec:
    76. accessModes:
    77. - ReadWriteOnce
    78. resources:
    79. requests:
    80. storage: 1Gi
    81. storageClassName: my-storage-class
    82. selector:
    83. matchLabels:
    84. type: web-pv1
    85. 3、创建Service 和 StatefulSet
    86. -------------------------------------------
    87. 在上一步中我们已经创建了名为www-web-0的PVC了,接下来创建一个service和statefulset,
    88. service的名称可以随意取,但是statefulset的名称已经定死了,为web,
    89. 并且statefulset中的volumeClaimTemplates_name必须为www,volumeMounts_name也必须为www。
    90. 只有这样,statefulset中的pod才能通过命名来匹配到PVC,否则会创建失败。
    91. root@node1:~# cat web.yaml
    92. apiVersion: v1
    93. kind: Service
    94. metadata:
    95. name: web-headless
    96. labels:
    97. app: nginx
    98. spec:
    99. ports:
    100. - port: 80
    101. name: web
    102. clusterIP: None
    103. selector:
    104. app: nginx
    105. ---
    106. apiVersion: v1
    107. kind: Service
    108. metadata:
    109. name: web
    110. labels:
    111. app: nginx
    112. spec:
    113. ports:
    114. - port: 80
    115. name: web
    116. selector:
    117. app: nginx
    118. ---
    119. apiVersion: apps/v1
    120. kind: StatefulSet
    121. metadata:
    122. name: web
    123. spec:
    124. selector:
    125. matchLabels:
    126. app: nginx # has to match .spec.template.metadata.labels
    127. serviceName: "web-headless" #需要第4行的name一致
    128. replicas: 2 # by default is 1
    129. template:
    130. metadata:
    131. labels:
    132. app: nginx # has to match .spec.selector.matchLabels
    133. spec:
    134. terminationGracePeriodSeconds: 10
    135. containers:
    136. - name: nginx
    137. image: nginx
    138. imagePullPolicy: IfNotPresent
    139. ports:
    140. - containerPort: 80
    141. name: web
    142. volumeMounts:
    143. - name: www
    144. mountPath: /usr/share/nginx/html
    145. volumeClaimTemplates:
    146. - metadata:
    147. name: www
    148. spec:
    149. accessModes: [ "ReadWriteOnce" ]
    150. storageClassName: "my-storage-class"
    151. resources:
    152. requests:
    153. storage: 1Gi

    2.动态存储创建sts-web.yaml

    1. cat sts-web.yaml
    2. apiVersion: v1
    3. kind: Service
    4. metadata:
    5. name: web-headless
    6. labels:
    7. app: nginx
    8. spec:
    9. ports:
    10. - port: 80
    11. name: web
    12. clusterIP: None
    13. selector:
    14. app: nginx
    15. ---
    16. apiVersion: v1
    17. kind: Service
    18. metadata:
    19. name: web
    20. labels:
    21. app: nginx
    22. spec:
    23. ports:
    24. - port: 80
    25. name: web
    26. selector:
    27. app: nginx
    28. ---
    29. apiVersion: apps/v1
    30. kind: StatefulSet
    31. metadata:
    32. name: web
    33. spec:
    34. selector:
    35. matchLabels:
    36. app: nginx # has to match .spec.template.metadata.labels
    37. serviceName: "nginx"
    38. replicas: 2 # by default is 1
    39. template:
    40. metadata:
    41. labels:
    42. app: nginx # has to match .spec.selector.matchLabels
    43. spec:
    44. terminationGracePeriodSeconds: 10
    45. containers:
    46. - name: nginx
    47. image: nginx
    48. imagePullPolicy: IfNotPresent
    49. ports:
    50. - containerPort: 80
    51. name: web
    52. volumeMounts:
    53. - name: www
    54. mountPath: /usr/share/nginx/html
    55. volumeClaimTemplates:
    56. - metadata:
    57. name: www
    58. spec:
    59. accessModes: [ "ReadWriteOnce" ]
    60. storageClassName: "nfs-boge" #之前创建的sc
    61. resources:
    62. requests:
    63. storage: 1Gi
    64. 执行测试
    65. [root@k8s-m01 nfs_dir]# kubectl apply -f sts-web.yaml -n renwei
    66. 编辑测试页面
    67. [root@k8s-m01 nfs_dir]# echo 00000 > renwei-www-web-0-pvc-477af122-468d-43f2-b935-5b1bd653fca5/index.html
    68. [root@k8s-m01 nfs_dir]# echo 11111 >renwei-www-web-1-pvc-f3e720ec-98bf-4d3e-85bf-44eae02fa1c7/index.html
    69. 启动临时程序,测试页面
    70. [root@k8s-m01 nfs_dir]# kubectl det pod svc -n renwei
    71. [root@k8s-m01 nfs_dir]# curl 0.68.186.15
    72. [root@k8s-m01 nfs_dir]# kubectl run --rm -it busybox --image=busybox -- sh
    73. If you don't see a command prompt, try pressing enter.
    74. / #
    75. / #
    76. / # wget www-web-0.web-headless.renwei
    77. / # cat index.html
    78. / # exit
    79. [root@k8s-m01 nfs_dir]# kubectl delete -f sts-web.yaml -n renwei
    80. #删除pv,pvc
    81. [root@k8s-m01 nfs_dir]# kubectl get pvc -n renwei
    82. [root@k8s-m01 nfs_dir]# kubectl -n renwei delete pvc www-web-0
    83. [root@k8s-m01 nfs_dir]# kubectl -n renwei delete pvc www-web-1
    84. [root@k8s-m01 nfs_dir]# kubectl get pv
    85. [root@k8s-m01 nfs_dir]# kubectl delete pv pvc-477af122-468d-43f2-b935-5b1bd653fca5
    86. [root@k8s-m01 nfs_dir]# kubectl delete pv pvc-f3e720ec-98bf-4d3e-85bf-44eae02fa1c7
    87. [root@k8s-m01 nfs_dir]# kubectl get pvc -n renwei
    88. [root@k8s-m01 nfs_dir]# kubectl get pv

    七、k8s一次性和定时任务

    1.首先讲下一次性任务,在K8s中它叫job,直接来实战一番,先准备下yaml配置

    这里我们不知道yaml怎么写,可以直接kubectl create job -h就能看到命令行创建示例了,然后可以根据创建出来的服务资源来导出它的yaml配置为my-job.yaml

    1. apiVersion: batch/v1 # 1. batch/v1 是当前 Job 的 apiVersion
    2. kind: Job # 2. 指明当前资源的类型为 Job
    3. metadata:
    4. name: my-job
    5. spec:
    6. template:
    7. metadata:
    8. spec:
    9. containers:
    10. - image: busybox
    11. name: my-job
    12. command: ["echo","Hello, boge."]
    13. restartPolicy: Never # 3. restartPolicy 指定什么情况下需要重启容器。对于 Job,只能设置为 Never 或者 OnFailure

    2.创建它并查看结果

    1. # kubectl apply -f my-job.yaml
    2. job.batch/my-job created
    3. # kubectl get jobs.batch
    4. NAME COMPLETIONS DURATION AGE
    5. my-job 1/1 2s 73s
    6. # COMPLETIONS 已完成的
    7. # DURATION 这个job运行所花费的时间
    8. # AGE 这个job资源已经从创建到目前为止的时间
    9. # job会生成一个pod,当完成任务后会是Completed的状态
    10. # kubectl get pod
    11. NAME READY STATUS RESTARTS AGE
    12. my-job-7h6fb 0/1 Completed 0 31s
    13. # 看下这个job生成的pod日志
    14. # kubectl logs my-job-7h6fb
    15. Hello, boge.

    3. ob失败了会有什么现象出现呢?

    我们编辑这个job的yaml,把执行的命令改成一个不存在的命令看看会发生什么

    1. apiVersion: batch/v1 # 1. batch/v1 是当前 Job 的 apiVersion
    2. kind: Job # 2. 指明当前资源的类型为 Job
    3. metadata:
    4. name: my-job
    5. spec:
    6. template:
    7. metadata:
    8. spec:
    9. containers:
    10. - image: busybox
    11. name: my-job
    12. command: ["echoaaa","Hello, boge."]
    13. restartPolicy: Never # 3. restartPolicy 指定什么情况下需要重启容器。对于 Job,只能设置为 Never 或者 OnFailure

    4.创建它

    1. # kubectl apply -f my-job.yaml
    2. # 可以观察到这个job因为不成功,并且restartPolicy重启模式是Never不会被重启,但它的job状态始终未完成,所以它会一直不停的创建新的pod,直到COMPLETIONS为1/1,对于我们这个示例,它显然永远都不会成功
    3. # kubectl get pod
    4. NAME READY STATUS RESTARTS AGE
    5. my-job-9fcbm 0/1 StartError 0 47s
    6. my-job-bt2kd 0/1 StartError 0 54s
    7. my-job-mlnzz 0/1 StartError 0 37s
    8. my-job-mntdp 0/1 StartError 0 17s
    9. # kubectl get job
    10. NAME COMPLETIONS DURATION AGE
    11. my-job 0/1 15s 15s
    12. # 找一个pod看下事件描述,会很清晰地指出命令不存在
    13. # kubectl describe pod my-job-9fcbm
    14. Name: my-job-9fcbm
    15. Namespace: default
    16. ......
    17. Events:
    18. Type Reason Age From Message
    19. ---- ------ ---- ---- -------
    20. Normal Scheduled 44s default-scheduler Successfully assigned default/my-job-9fcbm to 10.0.0.204
    21. Normal Pulling 43s kubelet Pulling image "busybox"
    22. Normal Pulled 36s kubelet Successfully pulled image "busybox" in 7.299038719s
    23. Normal Created 36s kubelet Created container my-job
    24. Warning Failed 36s kubelet Error: failed to create containerd task: OCI runtime create failed: container_linux.go:370: starting container process caused: exec: "echoaaa": executable file not found in $PATH: unknown
    25. # 删除掉这个job,不然那创建的pod数量可有够多的了
    26. # kubectl delete job my-job
    27. # 试试把restartPolicy重启模式换成OnFailure观察看看
    28. # kubectl get pod
    29. NAME READY STATUS RESTARTS AGE
    30. my-job-gs95h 0/1 CrashLoopBackOff 3 84s
    31. # 可以看到它不会创建新的pod,而是会尝试重启自身,以期望恢复正常,这里看到已经重启了3次,还会持续增加到5,然后会被K8s给删除以尝试,因为这里只是job而不是deployment,它不会自己再启动一个新的pod,所以这个job等于就没有了,这里说明OnFailure是生效的,至少不会有那么多错误的pod出现了

    5.并行执行job

    准备好yaml配置

    1. apiVersion: batch/v1
    2. kind: Job
    3. metadata:
    4. name: my-job
    5. spec:
    6. parallelism: 2 # 并行执行2个job
    7. template:
    8. metadata:
    9. name: my-job
    10. spec:
    11. containers:
    12. - image: busybox
    13. name: my-job
    14. command: ["echo","Hello, boge."]
    15. restartPolicy: OnFailure

    6.创建并查看结果

    1. # kubectl apply -f my-job.yaml
    2. job.batch/my-job created
    3. # job一共启动了2个pod,并且它们的AGE一样,可见是并行创建的
    4. # kubectl get pod
    5. NAME READY STATUS RESTARTS AGE
    6. my-job-fwf8l 0/1 Completed 0 7s
    7. my-job-w2fxd 0/1 Completed 0 7s

    7.再来个组合测试下并行完成定制的总任务数量

    1. apiVersion: batch/v1
    2. kind: Job
    3. metadata:
    4. name: myjob
    5. spec:
    6. completions: 6 # 此job完成pod的总数量
    7. parallelism: 2 # 每次并发跑2个job
    8. template:
    9. metadata:
    10. name: myjob
    11. spec:
    12. containers:
    13. - name: hello
    14. image: busybox
    15. command: ["echo"," hello boge! "]
    16. restartPolicy: OnFailure

    8.创建并查看结果

    1. # 可以看到是每次并发2个job,完成6个总量即停止
    2. # kubectl get pod
    3. NAME READY STATUS RESTARTS AGE
    4. myjob-54wmk 0/1 Completed 0 11s
    5. myjob-fgtmj 0/1 Completed 0 15s
    6. myjob-fkj5l 0/1 Completed 0 7s
    7. myjob-hsccm 0/1 Completed 0 7s
    8. myjob-lrpsr 0/1 Completed 0 15s
    9. myjob-ppfns 0/1 Completed 0 11s
    10. # 符合预期
    11. # kubectl get job
    12. NAME COMPLETIONS DURATION AGE
    13. myjob 6/6 14s 34s
    14. # 测试完成后删掉这个资源
    15. kubectl delete job myjob

    9 到此,job的内容就讲完了,在生产中,job比较适合用在CI/CD流水线中,作完一次性任务使用,我在生产中基本没怎么用这个资源。
    cronjob

    上面的job是一次性任务,那我们需要定时循环来执行一个任务可以吗?答案肯定是可以的,就像我们在linux系统上面用crontab一样,在K8s上用cronjob的另一个好处就是它是分布式的,执行的pod可以是在集群中的任意一台NODE上面(这点和cronsun有点类似)

    让我们开始实战吧,先准备一下cronjob的yaml配置为my-cronjob.yaml

    1. apiVersion: batch/v1beta1 # <--------- 当前 CronJob 的 apiVersion
    2. kind: CronJob # <--------- 当前资源的类型
    3. metadata:
    4. name: hello
    5. spec:
    6. schedule: "* * * * *" # <--------- schedule 指定什么时候运行 Job,其格式与 Linux crontab 一致,这里 * * * * * 的含义是每一分钟启动一次
    7. jobTemplate: # <--------- 定义 Job 的模板,格式与前面 Job 一致
    8. spec:
    9. template:
    10. spec:
    11. containers:
    12. - name: hello
    13. image: busybox
    14. command: ["echo","boge like cronjob."]
    15. restartPolicy: OnFailure

    10.正常创建后,我们过几分钟来看看运行结果

    1. # 这里会显示cronjob的综合信息
    2. # kubectl get cronjobs.batch
    3. NAME SCHEDULE SUSPEND ACTIVE LAST SCHEDULE AGE
    4. hello * * * * * False 0 66s 2m20s
    5. # 可以看到它每隔一分钟就会创建一个pod来执行job任务
    6. # kubectl get pod
    7. NAME READY STATUS RESTARTS AGE
    8. hello-1610267460-9b6hp 0/1 Completed 0 2m5s
    9. hello-1610267520-fm427 0/1 Completed 0 65s
    10. hello-1610267580-v8g4h 0/1 ContainerCreating 0 5s
    11. # 测试完成后删掉这个资源
    12. # kubectl delete cronjobs.batch hello
    13. cronjob.batch "hello" deleted

    cronjob定时任务在生产中的用处很多,这也是为什么上面job我说用得很少的缘故,我们可以把一些需要定时定期运行的任务,在K8s上以cronjob运行,依托K8s强大的资源调度以及服务自愈能力,我们可以放心的把定时任务交给它执行。

    八、k8sRBAC角色访问控制

    1.RBAC

    在k8s上我们如何控制访问权限呢,答案就是Role-based access control (RBAC) - 基于角色(Role)的访问控制,(RBAC)是一种基于组织中用户的角色来调节控制对 计算机或网络资源的访问的方法。

    在早期的K8s版本,RBAC还未出现的时候,整个K8s的安全是较为薄弱的,有了RBAC后,我们可以对K8s集群的访问人员作非常明细化的控制,控制他们能访问什么资源,以只读还是可以读写的形式来访问,目前RBAC是K8s默认的安全授权标准,所以我们非常有必要来掌握RBAC的使用,这样才有更有力的保障我们的K8s集群的安全使用,下面我们将以生产中的实际使用来大家了解及掌握RBAC的生产应用。

    RBAC里面的几种资源关系图,下面将用下面的资源来演示生产中经典的RBAC应用

                      |--- Role --- RoleBinding                只在指定namespace中生效
    ServiceAccount ---|
                      |--- ClusterRole --- ClusterRoleBinding  不受namespace限制,在整个K8s集群中生效

    2.在我看来,RBAC在K8s上的用途主要分为两大类:

    第一类是保证在K8s上运行的pod服务具有相应的集群权限,如gitlab的CI/CD,它需要能访问除自身以外其他pod,比如gitlab-runner的pod的权限,再比如gitlab-runner的pod需要拥有创建新的临时pod的权限,用以来构建CI/CD自动化流水线,这里大家没用过不懂没关系,先简单了解下就可以了,在本课程后面基于K8s及gitlab的生产实战CI/CD内容会给大家作详细实战讲解;

    第二类是创建能访问K8s相应资源、拥有对应权限的kube-config配置给到使用K8s的人员,来作为连接K8s的授权凭证

    第一类的实战这里先暂时以早期的helm2来作下讲解,helm是一个快捷安装K8s各类资源的管理工具,通过之前给大家讲解的,一个较为完整的服务可能会存在deployment,service,configmap,secret,ingress等资源来组合使用,大家在用的过程中可能会觉得配置使用较为麻烦,这时候helm就出现了,它把这些资源都打包封装成它自己能识别的内容,我们在安装一个服务的时候,就只需要作下简单的配置,一条命令即可完成上述众多资源的配置安装,titller相当于helm的服务端,它是需要有权限在K8s中创建各类资源的,在初始安装使用时,如果没有配置RBAC权限,我们会看到如下报错:

    root@node1:~# helm install stable/mysql
    Error: no available release name found

    3.这时,我们可以来快速解决这个问题,创建sa关联K8s自带的最高权限的ClusterRole(生产中建议不要这样做,权限太高有安全隐患,这个就和linux的root管理帐号一样,一般都是建议通过sudo来控制帐号权限)

    1. kubectl create serviceaccount --namespace kube-system tiller
    2. kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller
    3. kubectl patch deploy --namespace kube-system tiller-deploy -p '{"spec":{"template":{"spec":{"serviceAccount":"tiller"}}}}'

    4.第二类,我这里就直接以我在生产中实施的完整脚本来做讲解及实战,相信会给大家带来一个全新的学习感受,并能很快掌握它们:

    1.创建对指定namespace有所有权限的kube-config

    1. #!/bin/bash
    2. #
    3. # This Script based on https://jeremievallee.com/2018/05/28/kubernetes-rbac-namespace-user.html
    4. # K8s'RBAC doc: https://kubernetes.io/docs/reference/access-authn-authz/rbac
    5. # Gitlab'CI/CD doc: hhttps://docs.gitlab.com/ee/user/permissions.html#running-pipelines-on-protected-branches
    6. #
    7. # In honor of the remarkable Windson
    8. BASEDIR="$(dirname "$0")"
    9. folder="$BASEDIR/kube_config"
    10. echo -e "All namespaces is here: \n$(kubectl get ns|awk 'NR!=1{print $1}')"
    11. echo "endpoint server if local network you can use $(kubectl cluster-info |awk '/Kubernetes/{print $NF}')"
    12. namespace=$1
    13. endpoint=$(echo "$2" | sed -e 's,https\?://,,g')
    14. if [[ -z "$endpoint" || -z "$namespace" ]]; then
    15. echo "Use "$(basename "$0")" NAMESPACE ENDPOINT";
    16. exit 1;
    17. fi
    18. if ! kubectl get ns|awk 'NR!=1{print $1}'|grep -w "$namespace";then kubectl create ns "$namespace";else echo "namespace: $namespace was exist."; exit 1 ;fi
    19. echo "---
    20. apiVersion: v1
    21. kind: ServiceAccount
    22. metadata:
    23. name: $namespace-user
    24. namespace: $namespace
    25. ---
    26. kind: Role
    27. apiVersion: rbac.authorization.k8s.io/v1beta1
    28. metadata:
    29. name: $namespace-user-full-access
    30. namespace: $namespace
    31. rules:
    32. - apiGroups: ['', 'extensions', 'apps', 'metrics.k8s.io']
    33. resources: ['*']
    34. verbs: ['*']
    35. - apiGroups: ['batch']
    36. resources:
    37. - jobs
    38. - cronjobs
    39. verbs: ['*']
    40. ---
    41. kind: RoleBinding
    42. apiVersion: rbac.authorization.k8s.io/v1beta1
    43. metadata:
    44. name: $namespace-user-view
    45. namespace: $namespace
    46. subjects:
    47. - kind: ServiceAccount
    48. name: $namespace-user
    49. namespace: $namespace
    50. roleRef:
    51. apiGroup: rbac.authorization.k8s.io
    52. kind: Role
    53. name: $namespace-user-full-access
    54. ---
    55. # https://kubernetes.io/zh/docs/concepts/policy/resource-quotas/
    56. apiVersion: v1
    57. kind: ResourceQuota
    58. metadata:
    59. name: $namespace-compute-resources
    60. namespace: $namespace
    61. spec:
    62. hard:
    63. pods: "10"
    64. services: "10"
    65. persistentvolumeclaims: "5"
    66. requests.cpu: "1"
    67. requests.memory: 2Gi
    68. limits.cpu: "2"
    69. limits.memory: 4Gi" | kubectl apply -f -
    70. kubectl -n $namespace describe quota $namespace-compute-resources
    71. mkdir -p $folder
    72. tokenName=$(kubectl get sa $namespace-user -n $namespace -o "jsonpath={.secrets[0].name}")
    73. token=$(kubectl get secret $tokenName -n $namespace -o "jsonpath={.data.token}" | base64 --decode)
    74. certificate=$(kubectl get secret $tokenName -n $namespace -o "jsonpath={.data['ca\.crt']}")
    75. echo "apiVersion: v1
    76. kind: Config
    77. preferences: {}
    78. clusters:
    79. - cluster:
    80. certificate-authority-data: $certificate
    81. server: https://$endpoint
    82. name: $namespace-cluster
    83. users:
    84. - name: $namespace-user
    85. user:
    86. as-user-extra: {}
    87. client-key-data: $certificate
    88. token: $token
    89. contexts:
    90. - context:
    91. cluster: $namespace-cluster
    92. namespace: $namespace
    93. user: $namespace-user
    94. name: $namespace
    95. current-context: $namespace" > $folder/$namespace.kube.conf

    2.创建对指定namespace有所有权限的kube-config(在已有的namespace中创建)

    1. #!/bin/bash
    2. BASEDIR="$(dirname "$0")"
    3. folder="$BASEDIR/kube_config"
    4. echo -e "All namespaces is here: \n$(kubectl get ns|awk 'NR!=1{print $1}')"
    5. echo "endpoint server if local network you can use $(kubectl cluster-info |awk '/Kubernetes/{print $NF}')"
    6. namespace=$1
    7. endpoint=$(echo "$2" | sed -e 's,https\?://,,g')
    8. if [[ -z "$endpoint" || -z "$namespace" ]]; then
    9. echo "Use "$(basename "$0")" NAMESPACE ENDPOINT";
    10. exit 1;
    11. fi
    12. echo "---
    13. apiVersion: v1
    14. kind: ServiceAccount
    15. metadata:
    16. name: $namespace-user
    17. namespace: $namespace
    18. ---
    19. kind: Role
    20. apiVersion: rbac.authorization.k8s.io/v1beta1
    21. metadata:
    22. name: $namespace-user-full-access
    23. namespace: $namespace
    24. rules:
    25. - apiGroups: ['', 'extensions', 'apps', 'metrics.k8s.io']
    26. resources: ['*']
    27. verbs: ['*']
    28. - apiGroups: ['batch']
    29. resources:
    30. - jobs
    31. - cronjobs
    32. verbs: ['*']
    33. ---
    34. kind: RoleBinding
    35. apiVersion: rbac.authorization.k8s.io/v1beta1
    36. metadata:
    37. name: $namespace-user-view
    38. namespace: $namespace
    39. subjects:
    40. - kind: ServiceAccount
    41. name: $namespace-user
    42. namespace: $namespace
    43. roleRef:
    44. apiGroup: rbac.authorization.k8s.io
    45. kind: Role
    46. name: $namespace-user-full-access" | kubectl apply -f -
    47. mkdir -p $folder
    48. tokenName=$(kubectl get sa $namespace-user -n $namespace -o "jsonpath={.secrets[0].name}")
    49. token=$(kubectl get secret $tokenName -n $namespace -o "jsonpath={.data.token}" | base64 --decode)
    50. certificate=$(kubectl get secret $tokenName -n $namespace -o "jsonpath={.data['ca\.crt']}")
    51. echo "apiVersion: v1
    52. kind: Config
    53. preferences: {}
    54. clusters:
    55. - cluster:
    56. certificate-authority-data: $certificate
    57. server: https://$endpoint
    58. name: $namespace-cluster
    59. users:
    60. - name: $namespace-user
    61. user:
    62. as-user-extra: {}
    63. client-key-data: $certificate
    64. token: $token
    65. contexts:
    66. - context:
    67. cluster: $namespace-cluster
    68. namespace: $namespace
    69. user: $namespace-user
    70. name: $namespace
    71. current-context: $namespace" > $folder/$namespace.kube.conf

    3.同上,创建只读权限的

    1. #!/bin/bash
    2. BASEDIR="$(dirname "$0")"
    3. folder="$BASEDIR/kube_config"
    4. echo -e "All namespaces is here: \n$(kubectl get ns|awk 'NR!=1{print $1}')"
    5. echo "endpoint server if local network you can use $(kubectl cluster-info |awk '/Kubernetes/{print $NF}')"
    6. namespace=$1
    7. endpoint=$(echo "$2" | sed -e 's,https\?://,,g')
    8. if [[ -z "$endpoint" || -z "$namespace" ]]; then
    9. echo "Use "$(basename "$0")" NAMESPACE ENDPOINT";
    10. exit 1;
    11. fi
    12. echo "---
    13. apiVersion: v1
    14. kind: ServiceAccount
    15. metadata:
    16. name: $namespace-user-readonly
    17. namespace: $namespace
    18. ---
    19. kind: Role
    20. apiVersion: rbac.authorization.k8s.io/v1beta1
    21. metadata:
    22. name: $namespace-user-readonly-access
    23. namespace: $namespace
    24. rules:
    25. - apiGroups: ['', 'extensions', 'apps', 'metrics.k8s.io']
    26. resources: ['pods', 'pods/log']
    27. verbs: ['get', 'list', 'watch']
    28. - apiGroups: ['batch']
    29. resources: ['jobs', 'cronjobs']
    30. verbs: ['get', 'list', 'watch']
    31. ---
    32. kind: RoleBinding
    33. apiVersion: rbac.authorization.k8s.io/v1beta1
    34. metadata:
    35. name: $namespace-user-view-readonly
    36. namespace: $namespace
    37. subjects:
    38. - kind: ServiceAccount
    39. name: $namespace-user-readonly
    40. namespace: $namespace
    41. roleRef:
    42. apiGroup: rbac.authorization.k8s.io
    43. kind: Role
    44. name: $namespace-user-readonly-access" | kubectl apply -f -
    45. mkdir -p $folder
    46. tokenName=$(kubectl get sa $namespace-user-readonly -n $namespace -o "jsonpath={.secrets[0].name}")
    47. token=$(kubectl get secret $tokenName -n $namespace -o "jsonpath={.data.token}" | base64 --decode)
    48. certificate=$(kubectl get secret $tokenName -n $namespace -o "jsonpath={.data['ca\.crt']}")
    49. echo "apiVersion: v1
    50. kind: Config
    51. preferences: {}
    52. clusters:
    53. - cluster:
    54. certificate-authority-data: $certificate
    55. server: https://$endpoint
    56. name: $namespace-cluster-readonly
    57. users:
    58. - name: $namespace-user-readonly
    59. user:
    60. as-user-extra: {}
    61. client-key-data: $certificate
    62. token: $token
    63. contexts:
    64. - context:
    65. cluster: $namespace-cluster-readonly
    66. namespace: $namespace
    67. user: $namespace-user-readonly
    68. name: $namespace
    69. current-context: $namespace" > $folder/$namespace-readonly.kube.conf

    4.最后,来一个多个集群配置融合的创建,这个在多集群管理方面非常有用,这里只以创建只读权限配置作为演示

    1. #!/bin/bash
    2. # describe: create k8s cluster all namespaces resources with readonly clusterrole, no exec 、delete ...
    3. # look system default to study:
    4. # kubectl describe clusterrole view
    5. # restore all change:
    6. #kubectl -n kube-system delete sa all-readonly-${clustername}
    7. #kubectl delete clusterrolebinding all-readonly-${clustername}
    8. #kubectl delete clusterrole all-readonly-${clustername}
    9. clustername=$1
    10. Help(){
    11. echo "Use "$(basename "$0")" ClusterName(example: k8s1|k8s2|k8s3|delk8s1|delk8s2|delk8s3|3in1)";
    12. exit 1;
    13. }
    14. if [[ -z "${clustername}" ]]; then
    15. Help
    16. fi
    17. case ${clustername} in
    18. k8s1)
    19. endpoint="https://x.x.x.x:123456"
    20. ;;
    21. k8s2)
    22. endpoint="https://x.x.x.x:123456"
    23. ;;
    24. k8s3)
    25. endpoint="https://x.x.x.x:123456"
    26. ;;
    27. delk8s1)
    28. kubectl -n kube-system delete sa all-readonly-k8s1
    29. kubectl delete clusterrolebinding all-readonly-k8s1
    30. kubectl delete clusterrole all-readonly-k8s1
    31. echo "${clustername} successful."
    32. exit 0
    33. ;;
    34. delk8s2)
    35. kubectl -n kube-system delete sa all-readonly-k8s2
    36. kubectl delete clusterrolebinding all-readonly-k8s2
    37. kubectl delete clusterrole all-readonly-k8s2
    38. echo "${clustername} successful."
    39. exit 0
    40. ;;
    41. delk8s3)
    42. kubectl -n kube-system delete sa all-readonly-k8s3
    43. kubectl delete clusterrolebinding all-readonly-k8s3
    44. kubectl delete clusterrole all-readonly-k8s3
    45. echo "${clustername} successful."
    46. exit 0
    47. ;;
    48. 3in1)
    49. KUBECONFIG=./all-readonly-k8s1.conf:all-readonly-k8s2.conf:all-readonly-k8s3.conf kubectl config view --flatten > ./all-readonly-3in1.conf
    50. kubectl --kubeconfig=./all-readonly-3in1.conf config use-context "k8s3"
    51. kubectl --kubeconfig=./all-readonly-3in1.conf config set-context "k8s3" --namespace="default"
    52. kubectl --kubeconfig=./all-readonly-3in1.conf config get-contexts
    53. echo -e "\n\n\n"
    54. cat ./all-readonly-3in1.conf |base64 -w 0
    55. exit 0
    56. ;;
    57. *)
    58. Help
    59. esac
    60. echo "---
    61. apiVersion: rbac.authorization.k8s.io/v1
    62. kind: ClusterRole
    63. metadata:
    64. name: all-readonly-${clustername}
    65. rules:
    66. - apiGroups:
    67. - ''
    68. resources:
    69. - configmaps
    70. - endpoints
    71. - persistentvolumes
    72. - persistentvolumeclaims
    73. - pods
    74. - replicationcontrollers
    75. - replicationcontrollers/scale
    76. - serviceaccounts
    77. - services
    78. - nodes
    79. verbs:
    80. - get
    81. - list
    82. - watch
    83. - apiGroups:
    84. - ''
    85. resources:
    86. - bindings
    87. - events
    88. - limitranges
    89. - namespaces/status
    90. - pods/log
    91. - pods/status
    92. - replicationcontrollers/status
    93. - resourcequotas
    94. - resourcequotas/status
    95. verbs:
    96. - get
    97. - list
    98. - watch
    99. - apiGroups:
    100. - ''
    101. resources:
    102. - namespaces
    103. verbs:
    104. - get
    105. - list
    106. - watch
    107. - apiGroups:
    108. - apps
    109. resources:
    110. - controllerrevisions
    111. - daemonsets
    112. - deployments
    113. - deployments/scale
    114. - replicasets
    115. - replicasets/scale
    116. - statefulsets
    117. - statefulsets/scale
    118. verbs:
    119. - get
    120. - list
    121. - watch
    122. - apiGroups:
    123. - autoscaling
    124. resources:
    125. - horizontalpodautoscalers
    126. verbs:
    127. - get
    128. - list
    129. - watch
    130. - apiGroups:
    131. - batch
    132. resources:
    133. - cronjobs
    134. - jobs
    135. verbs:
    136. - get
    137. - list
    138. - watch
    139. - apiGroups:
    140. - extensions
    141. resources:
    142. - daemonsets
    143. - deployments
    144. - deployments/scale
    145. - ingresses
    146. - networkpolicies
    147. - replicasets
    148. - replicasets/scale
    149. - replicationcontrollers/scale
    150. verbs:
    151. - get
    152. - list
    153. - watch
    154. - apiGroups:
    155. - policy
    156. resources:
    157. - poddisruptionbudgets
    158. verbs:
    159. - get
    160. - list
    161. - watch
    162. - apiGroups:
    163. - networking.k8s.io
    164. resources:
    165. - networkpolicies
    166. verbs:
    167. - get
    168. - list
    169. - watch
    170. - apiGroups:
    171. - metrics.k8s.io
    172. resources:
    173. - pods
    174. verbs:
    175. - get
    176. - list
    177. - watch" | kubectl apply -f -
    178. kubectl -n kube-system create sa all-readonly-${clustername}
    179. kubectl create clusterrolebinding all-readonly-${clustername} --clusterrole=all-readonly-${clustername} --serviceaccount=kube-system:all-readonly-${clustername}
    180. tokenName=$(kubectl -n kube-system get sa all-readonly-${clustername} -o "jsonpath={.secrets[0].name}")
    181. token=$(kubectl -n kube-system get secret $tokenName -o "jsonpath={.data.token}" | base64 --decode)
    182. certificate=$(kubectl -n kube-system get secret $tokenName -o "jsonpath={.data['ca\.crt']}")
    183. echo "apiVersion: v1
    184. kind: Config
    185. preferences: {}
    186. clusters:
    187. - cluster:
    188. certificate-authority-data: $certificate
    189. server: $endpoint
    190. name: all-readonly-${clustername}-cluster
    191. users:
    192. - name: all-readonly-${clustername}
    193. user:
    194. as-user-extra: {}
    195. client-key-data: $certificate
    196. token: $token
    197. contexts:
    198. - context:
    199. cluster: all-readonly-${clustername}-cluster
    200. user: all-readonly-${clustername}
    201. name: ${clustername}
    202. current-context: ${clustername}" > ./all-readonly-${clustername}.conf

    5.命令测试

    1. [root@k8s-m01 rbac]# kubectl --kubeconfig=./kube_config/web.kube.conf get pod
    2. No resources found in web namespace.
    3. [root@k8s-m01 rbac]#
    4. [root@k8s-m01 rbac]#
    5. [root@k8s-m01 rbac]# kubectl --kubeconfig=./kube_config/web.kube.conf get pod -A
    6. Error from server (Forbidden): pods is forbidden: User "system:serviceaccount:web:web-user" cannot list resource "pods" in API group "" at the cluster scope
    7. [root@k8s-m01 rbac]# kubectl -n web get sa
    8. NAME SECRETS AGE
    9. default 1 119m
    10. web-user 1 42m
    11. [root@k8s-m01 rbac]#
    12. [root@k8s-m01 rbac]# kubectl -n web get role
    13. NAME CREATED AT
    14. web-user-full-access 2022-07-28T15:09:05Z
    15. [root@k8s-m01 rbac]#
    16. [root@k8s-m01 rbac]# kubectl -n web get rolebindings.rbac.authorization.k8s.io
    17. NAME ROLE AGE
    18. web-user-view Role/web-user-full-access 42m
    19. [root@k8s-m01 rbac]#

    6.rules 中的参数说明如下:


    apiGroups: 支持的API组列表,例如”apiVersion: batch/v1”、”apiVersion: extensions”、”apiVersion: apps”
    resources: 支持的资源对象列表,例如 pods、deployments、secrets、jobs、configmaps、endpoints、persistentvolumeclaims、replicationcontrollers、statefulsets、namespaces等。
    verbs: 对资源对象 的操作方法列表, 例如get获取, list列表, watch监视, patch修补, delete删除, update更新, create创建 ,deletecollection级联删除, redirect重定向,replace代替...等。

    九、k8s业务日志收集上节介绍、下节实战

    日志收集

    现在市面上大多数课程都是以EFK来作来K8s项目的日志解决方案,它包括三个组件:Elasticsearch, Fluentd(filebeat), Kibana;Elasticsearch 是日志存储和日志搜索引擎,Fluentd 负责把k8s集群的日志发送给 Elasticsearch, Kibana 则是可视化界面查看和检索存储在 Elasticsearch 的数据。

    但根据生产中实际使用情况来看,它有以下弊端:

    1、日志收集系统 EFK是在每个kubernetes的NODE节点以daemonset的形式启动一个fluentd的pod,来收集NODE节点上的日志,如容器日志(/var/log/containers/*.log),但里面无法作细分,想要的和不想要的都收集进来了,带来的后面就是磁盘IO压力会比较大,日志过滤麻烦。

    2、无法收集对应POD里面的业务日志 上面第1点只能收集pod的stdout日志,但是pod内如有需要收集的业务日志,像pod内的/tmp/datalog/*.log,那EFK是无能为力的,只能是在pod内启动多个容器(filebeat)去收集容器内日志,但这又会带来的是pod多容器性能的损耗,这个接下来会详细讲到。

    3、fluentd的采集速率性能较低,只能不到filebeat的1/10的性能。

    基于此,我通过调研发现了阿里开源的智能容器采集工具 Log-Pilot,github地址:
    https://github.com/AliyunContainerService/log-pilot

    下面以sidecar 模式和log-pilot这两种方式的日志收集形式做个详细对比说明:

    第一种模式是 sidecar 模式,这种需要我们在每个 Pod 中都附带一个 logging 容器来进行本 Pod 内部容器的日志采集,一般采用共享卷的方式,但是对于这一种模式来说,很明显的一个问题就是占用的资源比较多,尤其是在集群规模比较大的情况下,或者说单个节点上容器特别多的情况下,它会占用过多的系统资源,同时也对日志存储后端占用过多的连接数。当我们的集群规模越大,这种部署模式引发的潜在问题就越大。

    92a2c4d4a5d641a89ef349eb1385e82e.png

    另一种模式是 Node 模式,这种模式是我们在每个 Node 节点上仅需部署一个 logging 容器来进行本 Node 所有容器的日志采集。这样跟前面的模式相比最明显的优势就是占用资源比较少,同样在集群规模比较大的情况下表现出的优势越明显,同时这也是社区推荐的一种模式

    495965e2bcb94165ba0a7ad9e879c2e7.png

    经过多方面测试,log-pilot对现有业务pod侵入性很小,只需要在原有pod的内传入几行env环境变量,即可对此pod相关的日志进行收集,已经测试了后端接收的工具有logstash、elasticsearch、kafka、redis、file,均OK,下面开始部署整个日志收集环境。

    我们这里用一个tomcat服务来模拟业务服务,用log-pilot分别收集它的stdout以及容器内的业务数据日志文件到指定后端存储(这里分别以elasticsearch、kafka的这两种企业常用的接收工具来做示例)

    准备好相应的yaml配置

    vi tomcat-test.yaml

    1. apiVersion: apps/v1
    2. kind: Deployment
    3. metadata:
    4. labels:
    5. app: tomcat
    6. name: tomcat
    7. spec:
    8. replicas: 1
    9. selector:
    10. matchLabels:
    11. app: tomcat
    12. template:
    13. metadata:
    14. labels:
    15. app: tomcat
    16. spec:
    17. tolerations:
    18. - key: "node-role.kubernetes.io/master"
    19. effect: "NoSchedule"
    20. containers:
    21. - name: tomcat
    22. image: "tomcat:7.0"
    23. env: # 注意点一,添加相应的环境变量(下面收集了两块日志1、stdout 2、/usr/local/tomcat/logs/catalina.*.log)
    24. - name: aliyun_logs_tomcat-syslog # 如日志发送到es,那index名称为 tomcat-syslog
    25. value: "stdout"
    26. - name: aliyun_logs_tomcat-access # 如日志发送到es,那index名称为 tomcat-access
    27. value: "/usr/local/tomcat/logs/catalina.*.log"
    28. volumeMounts: # 注意点二,对pod内要收集的业务日志目录需要进行共享,可以收集多个目录下的日志文件
    29. - name: tomcat-log
    30. mountPath: /usr/local/tomcat/logs
    31. volumes:
    32. - name: tomcat-log
    33. emptyDir: {}

    #运行ym
    kubectl apply -f tomcat-test.yaml

    #查看pod
    kubectl get pod

    #查看日志
    kubectl logs tomcat-7cd6957667-bjx9r

    #进入容器看看日志文件
    kubectl exec -it tomcat-7cd6957667-bjx9r /bin/bash

    ls -l /usr/local/tomcat/logs/
    exit

    vi elasticsearch.6.8.13-statefulset.yaml

    1. apiVersion: apps/v1
    2. kind: StatefulSet
    3. metadata:
    4. labels:
    5. addonmanager.kubernetes.io/mode: Reconcile
    6. k8s-app: elasticsearch-logging
    7. version: v6.8.13
    8. name: elasticsearch-logging
    9. # namespace: logging
    10. spec:
    11. replicas: 1
    12. revisionHistoryLimit: 10
    13. selector:
    14. matchLabels:
    15. k8s-app: elasticsearch-logging
    16. version: v6.8.13
    17. serviceName: elasticsearch-logging
    18. template:
    19. metadata:
    20. labels:
    21. k8s-app: elasticsearch-logging
    22. version: v6.8.13
    23. spec:
    24. # nodeSelector:
    25. # esnode: "true" ## 注意给想要运行到的node打上相应labels
    26. containers:
    27. - env:
    28. - name: NAMESPACE
    29. valueFrom:
    30. fieldRef:
    31. apiVersion: v1
    32. fieldPath: metadata.namespace
    33. - name: cluster.name
    34. value: elasticsearch-logging-0
    35. - name: ES_JAVA_OPTS
    36. value: "-Xms512m -Xmx512m"
    37. image: elastic/elasticsearch:6.8.13
    38. name: elasticsearch-logging
    39. ports:
    40. - containerPort: 9200
    41. name: db
    42. protocol: TCP
    43. - containerPort: 9300
    44. name: transport
    45. protocol: TCP
    46. volumeMounts:
    47. - mountPath: /usr/share/elasticsearch/data
    48. name: elasticsearch-logging
    49. dnsConfig:
    50. options:
    51. - name: single-request-reopen
    52. initContainers:
    53. - command:
    54. - /bin/sysctl
    55. - -w
    56. - vm.max_map_count=262144
    57. image: busybox
    58. imagePullPolicy: IfNotPresent
    59. name: elasticsearch-logging-init
    60. resources: {}
    61. securityContext:
    62. privileged: true
    63. - name: fix-permissions
    64. image: busybox
    65. command: ["sh", "-c", "chown -R 1000:1000 /usr/share/elasticsearch/data"]
    66. securityContext:
    67. privileged: true
    68. volumeMounts:
    69. - name: elasticsearch-logging
    70. mountPath: /usr/share/elasticsearch/data
    71. volumes:
    72. - name: elasticsearch-logging
    73. hostPath:
    74. path: /esdata
    75. ---
    76. apiVersion: v1
    77. kind: Service
    78. metadata:
    79. labels:
    80. k8s-app: elasticsearch-logging
    81. name: elasticsearch
    82. # namespace: logging
    83. spec:
    84. ports:
    85. - port: 9200
    86. protocol: TCP
    87. targetPort: db
    88. selector:
    89. k8s-app: elasticsearch-logging
    90. type: ClusterIP

    kubectl apply -f elasticsearch.6.8.13-statefulset.yaml

    kubectl get pod

    vim kibana.6.8.13.yaml

    1. apiVersion: apps/v1
    2. kind: Deployment
    3. metadata:
    4. name: kibana
    5. # namespace: logging
    6. labels:
    7. app: kibana
    8. spec:
    9. selector:
    10. matchLabels:
    11. app: kibana
    12. template:
    13. metadata:
    14. labels:
    15. app: kibana
    16. spec:
    17. containers:
    18. - name: kibana
    19. image: elastic/kibana:6.8.13
    20. resources:
    21. limits:
    22. cpu: 1000m
    23. requests:
    24. cpu: 100m
    25. env:
    26. - name: ELASTICSEARCH_URL
    27. value: http://elasticsearch:9200
    28. ports:
    29. - containerPort: 5601
    30. ---
    31. apiVersion: v1
    32. kind: Service
    33. metadata:
    34. name: kibana
    35. # namespace: logging
    36. labels:
    37. app: kibana
    38. spec:
    39. ports:
    40. - port: 5601
    41. protocol: TCP
    42. targetPort: 5601
    43. type: ClusterIP
    44. selector:
    45. app: kibana
    46. ---
    47. apiVersion: extensions/v1beta1
    48. kind: Ingress
    49. metadata:
    50. name: kibana
    51. # namespace: logging
    52. spec:
    53. rules:
    54. - host: kibana.boge.com
    55. http:
    56. paths:
    57. - path: /
    58. backend:
    59. serviceName: kibana
    60. servicePort: 5601

    vi log-pilot.yml        # 后端输出的elasticsearch

    1. apiVersion: apps/v1
    2. kind: DaemonSet
    3. metadata:
    4. name: log-pilot
    5. labels:
    6. app: log-pilot
    7. # 设置期望部署的namespace
    8. # namespace: ns-elastic
    9. spec:
    10. selector:
    11. matchLabels:
    12. app: log-pilot
    13. updateStrategy:
    14. type: RollingUpdate
    15. template:
    16. metadata:
    17. labels:
    18. app: log-pilot
    19. annotations:
    20. scheduler.alpha.kubernetes.io/critical-pod: ''
    21. spec:
    22. # 是否允许部署到Master节点上
    23. #tolerations:
    24. #- key: node-role.kubernetes.io/master
    25. # effect: NoSchedule
    26. containers:
    27. - name: log-pilot
    28. # 版本请参考https://github.com/AliyunContainerService/log-pilot/releases
    29. image: registry.cn-hangzhou.aliyuncs.com/acs/log-pilot:0.9.7-filebeat
    30. resources:
    31. limits:
    32. memory: 500Mi
    33. requests:
    34. cpu: 200m
    35. memory: 200Mi
    36. env:
    37. - name: "NODE_NAME"
    38. valueFrom:
    39. fieldRef:
    40. fieldPath: spec.nodeName
    41. ##--------------------------------
    42. # - name: "LOGGING_OUTPUT"
    43. # value: "logstash"
    44. # - name: "LOGSTASH_HOST"
    45. # value: "logstash-g1"
    46. # - name: "LOGSTASH_PORT"
    47. # value: "5044"
    48. ##--------------------------------
    49. - name: "LOGGING_OUTPUT"
    50. value: "elasticsearch"
    51. ## 请确保集群到ES网络可达
    52. - name: "ELASTICSEARCH_HOSTS"
    53. value: "elasticsearch:9200"
    54. ## 配置ES访问权限
    55. #- name: "ELASTICSEARCH_USER"
    56. # value: "{es_username}"
    57. #- name: "ELASTICSEARCH_PASSWORD"
    58. # value: "{es_password}"
    59. ##--------------------------------
    60. ## https://github.com/AliyunContainerService/log-pilot/blob/master/docs/filebeat/docs.md
    61. ## to file need configure 1
    62. # - name: LOGGING_OUTPUT
    63. # value: file
    64. # - name: FILE_PATH
    65. # value: /tmp
    66. # - name: FILE_NAME
    67. # value: filebeat.log
    68. volumeMounts:
    69. - name: sock
    70. mountPath: /var/run/docker.sock
    71. - name: root
    72. mountPath: /host
    73. readOnly: true
    74. - name: varlib
    75. mountPath: /var/lib/filebeat
    76. - name: varlog
    77. mountPath: /var/log/filebeat
    78. - name: localtime
    79. mountPath: /etc/localtime
    80. readOnly: true
    81. ## to file need configure 2
    82. # - mountPath: /tmp
    83. # name: mylog
    84. livenessProbe:
    85. failureThreshold: 3
    86. exec:
    87. command:
    88. - /pilot/healthz
    89. initialDelaySeconds: 10
    90. periodSeconds: 10
    91. successThreshold: 1
    92. timeoutSeconds: 2
    93. securityContext:
    94. capabilities:
    95. add:
    96. - SYS_ADMIN
    97. terminationGracePeriodSeconds: 30
    98. volumes:
    99. - name: sock
    100. hostPath:
    101. path: /var/run/docker.sock
    102. - name: root
    103. hostPath:
    104. path: /
    105. - name: varlib
    106. hostPath:
    107. path: /var/lib/filebeat
    108. type: DirectoryOrCreate
    109. - name: varlog
    110. hostPath:
    111. path: /var/log/filebeat
    112. type: DirectoryOrCreate
    113. - name: localtime
    114. hostPath:
    115. path: /etc/localtime
    116. ## to file need configure 3
    117. # - hostPath:
    118. # path: /tmp/mylog
    119. # type: ""
    120. # name: mylog

    kubectl apply -f log-pilot.yml

    kubectl get pod

    节点加入hosts解析
    echo “20.6.100.226 kibana.boge.com” >> /etc/hosts

    eb3c20a1545f4a0ea0d95b4a63c87d2b.png

    4.进入页面

    f1a429ddb6754b138b2396535b816094.png

    5.定义日志名称

    9e9cb9383e234dd5abee33f4edca3784.png

    6.创建日期

    85634154b59142358f80e3bc81b82b23.png

     7.查看日志展示
    d0a1b35756604c8181f39c232d5e3793.png

    删除测试环境
    kubectl delete -f log-pilot.yml
    kubectl delete -f kibana.6.8.13.yaml
    kubectl delete -f elasticsearch.6.8.13-statefulset.yaml
    kubectl delete -f tomcat-test.yaml

    8.第2种:#后端输出到kafka

    kubectl apply -f tomcat-test.yaml

    vi log-pilot2-kafka.yaml

    1. ---
    2. apiVersion: v1
    3. kind: ConfigMap
    4. metadata:
    5. name: log-pilot2-configuration
    6. #namespace: ns-elastic
    7. data:
    8. logging_output: "kafka"
    9. kafka_brokers: "10.0.1.204:9092"
    10. kafka_version: "0.10.0"
    11. # configure all valid topics in kafka
    12. # when disable auto-create topic
    13. kafka_topics: "tomcat-syslog,tomcat-access"
    14. ---
    15. apiVersion: apps/v1
    16. kind: DaemonSet
    17. metadata:
    18. name: log-pilot2
    19. #namespace: ns-elastic
    20. labels:
    21. k8s-app: log-pilot2
    22. spec:
    23. selector:
    24. matchLabels:
    25. k8s-app: log-pilot2
    26. updateStrategy:
    27. type: RollingUpdate
    28. template:
    29. metadata:
    30. labels:
    31. k8s-app: log-pilot2
    32. spec:
    33. tolerations:
    34. - key: node-role.kubernetes.io/master
    35. effect: NoSchedule
    36. containers:
    37. - name: log-pilot2
    38. #
    39. # wget https://github.com/AliyunContainerService/log-pilot/archive/v0.9.7.zip
    40. # unzip log-pilot-0.9.7.zip
    41. # vim ./log-pilot-0.9.7/assets/filebeat/config.filebeat
    42. # ...
    43. # output.kafka:
    44. # hosts: [$KAFKA_BROKERS]
    45. # topic: '%{[topic]}'
    46. # codec.format:
    47. # string: '%{[message]}'
    48. # ...
    49. image: registry.cn-hangzhou.aliyuncs.com/acs/log-pilot:0.9.7-filebeat
    50. env:
    51. - name: "LOGGING_OUTPUT"
    52. valueFrom:
    53. configMapKeyRef:
    54. name: log-pilot2-configuration
    55. key: logging_output
    56. - name: "KAFKA_BROKERS"
    57. valueFrom:
    58. configMapKeyRef:
    59. name: log-pilot2-configuration
    60. key: kafka_brokers
    61. - name: "KAFKA_VERSION"
    62. valueFrom:
    63. configMapKeyRef:
    64. name: log-pilot2-configuration
    65. key: kafka_version
    66. - name: "NODE_NAME"
    67. valueFrom:
    68. fieldRee:
    69. fieldPath: spec.nodeName
    70. volumeMounts:
    71. - name: sock
    72. mountPath: /var/run/docker.sock
    73. - name: logs
    74. mountPath: /var/log/filebeat
    75. - name: state
    76. mountPath: /var/lib/filebeat
    77. - name: root
    78. mountPath: /host
    79. readOnly: true
    80. - name: localtime
    81. mountPath: /etc/localtime
    82. # configure all valid topics in kafka
    83. # when disable auto-create topic
    84. - name: config-volume
    85. mountPath: /etc/filebeat/config
    86. securityContext:
    87. capabilities:
    88. add:
    89. - SYS_ADMIN
    90. terminationGracePeriodSeconds: 30
    91. volumes:
    92. - name: sock
    93. hostPath:
    94. path: /var/run/docker.sock
    95. type: Socket
    96. - name: logs
    97. hostPath:
    98. path: /var/log/filebeat
    99. type: DirectoryOrCreate
    100. - name: state
    101. hostPath:
    102. path: /var/lib/filebeat
    103. type: DirectoryOrCreate
    104. - name: root
    105. hostPath:
    106. path: /
    107. type: Directory
    108. - name: localtime
    109. hostPath:
    110. path: /etc/localtime
    111. type: File
    112. # kubelet sync period
    113. - name: config-volume
    114. configMap:
    115. name: log-pilot2-configuration
    116. items:
    117. - key: kafka_topics
    118. path: kafka_topics

    9.准备一个测试用的kafka服务

    1. # 部署前准备
    2. # 0. 先把代码pull到本地
    3. # https://github.com/wurstmeister/kafka-docker
    4. # 修改docker-compose.yml为:
    5. #——------------------------------
    6. version: '2'
    7. services:
    8. zookeeper:
    9. image: wurstmeister/zookeeper
    10. ports:
    11. - "2181:2181"
    12. kafka:
    13. #build: .
    14. image: wurstmeister/kafka
    15. ports:
    16. - "9092:9092"
    17. environment:
    18. KAFKA_ADVERTISED_HOST_NAMe: 10.0.1.204 # docker运行的机器IP
    19. KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
    20. volumes:
    21. - /var/run/docker.sock:/var/run/docker.sock
    22. - /nfs_storageclass/kafka:/kafka
    23. #——------------------------------
    24. #启动
    25. # docker-compose up -d
    26. #删除
    27. #docker-compose down -v
    28. #重启
    29. #docker-compose up -d
    30. # 2. result look:
    31. #查看启动进程
    32. # docker ps |grep zook
    33. #docker ps |grep ka
    34. #进入kafka
    35. bash-4.4# docker run --rm -v /var/run/docker.sock:/var/run/docker.sock -e HOST_IP=10.0.1.204 -e ZK=10.0.1.204:2181 -i -t wurstmeister/kafka /bin/bash
    36. # 4. list topic
    37. bash-4.4# kafka-topics.sh --zookeeper 10.0.1.204:2181 --list
    38. tomcat-access
    39. tomcat-syslog
    40. # 5. consumer topic data:
    41. bash-4.4# kafka-console-consumer.sh --bootstrap-server 10.0.1.204:9092 --topic tomcat-access --from-beginning

    十、k8s的Prometheus监控实战

    1. 链接:https://pan.baidu.com/s/1O-RQd3n2HJrOdRbElidnEA?pwd=hoqv
    2. 提取码:hoqv

    1.导入上传tar包

    sudo docker load -i alertmanager-v0.21.0.tar
    sudo docker load -i grafana-7.3.4.tar
    sudo docker load -i k8s-prometheus-adapter-v0.8.2.tar
    sudo docker load -i kube-rbac-proxy-v0.8.0.tar
    sudo docker load -i kube-state-metrics-v1.9.7.tar
    sudo docker load -i node-exporter-v1.0.1.tar
    sudo docker load -i prometheus-config-reloader-v0.43.2.tar
    sudo docker load -i prometheus_demo_service.tar
    sudo docker load -i prometheus-operator-v0.43.2.tar
    sudo docker load -i prometheus-v2.22.1.tar

    2.解压下载的代码包

    sudo unzip kube-prometheus-master.zip
    sudo rm -f kube-prometheus-master.zip && cd kube-prometheus-master

    3.这里建议先看下有哪些镜像,便于在下载镜像快的节点上先收集好所有需要的离线docker镜像

    1. find ./ -type f |xargs grep 'image: '|sort|uniq|awk '{print $3}'|grep ^[a-zA-Z]|grep -Evw 'error|kubeRbacProxy'|sort -rn|uniq

    4.开始创建所有服务

    1. kubectl create -f manifests/setup
    2. kubectl create -f manifests/
    3. #过一会查看创建结果:
    4. kubectl -n monitoring get all
    5. # 附:清空上面部署的prometheus所有服务:
    6. # kubectl delete --ignore-not-found=true -f manifests/ -f manifests/setup

    5.访问下prometheus的UI

    1. # 修改下prometheus UI的service模式,便于我们访问
    2. # kubectl -n monitoring patch svc prometheus-k8s -p '{"spec":{"type":"NodePort"}}'
    3. service/prometheus-k8s patched
    4. # kubectl -n monitoring get svc prometheus-k8s
    5. NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
    6. prometheus-k8s NodePort 10.68.23.79 9090:22129/TCP 7m43s

    6.然后因为K8s的这两上核心组件我们是以二进制形式部署的,为了能让K8s上的prometheus能发现,我们还需要来创建相应的service和endpoints来将其关联起来
    注意:我们需要将endpoints里面的mast IP换成我们实际情况的

    1. apiVersion: v1
    2. kind: Service
    3. metadata:
    4. namespace: kube-system
    5. name: kube-controller-manager
    6. labels:
    7. k8s-app: kube-controller-manager
    8. spec:
    9. type: ClusterIP
    10. clusterIP: None
    11. ports:
    12. - name: http-metrics
    13. port: 10252
    14. targetPort: 10252
    15. protocol: TCP
    16. ---
    17. apiVersion: v1
    18. kind: Endpoints
    19. metadata:
    20. labels:
    21. k8s-app: kube-controller-manager
    22. name: kube-controller-manager
    23. namespace: kube-system
    24. subsets:
    25. - addresses:
    26. - ip: 10.0.1.201
    27. - ip: 10.0.1.202
    28. ports:
    29. - name: http-metrics
    30. port: 10252
    31. protocol: TCP
    32. ---
    33. apiVersion: v1
    34. kind: Service
    35. metadata:
    36. namespace: kube-system
    37. name: kube-scheduler
    38. labels:
    39. k8s-app: kube-scheduler
    40. spec:
    41. type: ClusterIP
    42. clusterIP: None
    43. ports:
    44. - name: http-metrics
    45. port: 10251
    46. targetPort: 10251
    47. protocol: TCP
    48. ---
    49. apiVersion: v1
    50. kind: Endpoints
    51. metadata:
    52. labels:
    53. k8s-app: kube-scheduler
    54. name: kube-scheduler
    55. namespace: kube-system
    56. subsets:
    57. - addresses:
    58. - ip: 10.0.1.201
    59. - ip: 10.0.1.202
    60. ports:
    61. - name: http-metrics
    62. port: 10251
    63. protocol: TCP

    7.将上面的yaml配置保存为repair-prometheus.yaml,然后创建它

    kubectl apply -f repair-prometheus.yaml

    8.记得还要修改一个地方

    # kubectl -n monitoring edit servicemonitors.monitoring.coreos.com kube-scheduler
    # 将下面两个地方的https换成http
        port: https-metrics
        scheme: https

    # kubectl -n monitoring edit servicemonitors.monitoring.coreos.com kube-controller-manager
    # 将下面两个地方的https换成http
        port: https-metrics
        scheme: https

    9.然后再返回prometheus UI处,耐心等待几分钟,就能看到已经被发现了

    monitoring/kube-controller-manager/0 (2/2 up)
    monitoring/kube-scheduler/0 (2/2 up)

    10.使用prometheus来监控ingress-nginx

    我们前面部署过ingress-nginx,这个是整个K8s上所有服务的流量入口组件很关键,因此把它的metrics指标收集到prometheus来做好相关监控至关重要,因为前面ingress-nginx服务是以daemonset形式部署的,并且映射了自己的端口到宿主机上,那么我可以直接用pod运行NODE上的IP来看下metrics

    curl 10.0.1.201:10254/metrics

    11.创建 servicemonitor配置让prometheus能发现ingress-nginx的metrics

    1. # vim servicemonitor.yaml
    2. apiVersion: monitoring.coreos.com/v1
    3. kind: ServiceMonitor
    4. metadata:
    5. labels:
    6. app: ingress-nginx
    7. name: nginx-ingress-scraping
    8. namespace: ingress-nginx
    9. spec:
    10. endpoints:
    11. - interval: 30s
    12. path: /metrics
    13. port: metrics
    14. jobLabel: app
    15. namespaceSelector:
    16. matchNames:
    17. - ingress-nginx
    18. selector:
    19. matchLabels:
    20. app: ingress-nginx

    12.创建它

    # kubectl apply -f servicemonitor.yaml
    servicemonitor.monitoring.coreos.com/nginx-ingress-scraping created
    # kubectl -n ingress-nginx get servicemonitors.monitoring.coreos.com
    NAME                     AGE
    nginx-ingress-scraping   8s

    13.指标一直没收集上来,看看proemtheus服务的日志,发现报错如下:

    # kubectl -n monitoring logs prometheus-k8s-0 -c prometheus |grep error

    level=error ts=2020-12-13T09:52:35.565Z caller=klog.go:96 component=k8s_client_runtime func=ErrorDepth msg="/app/discovery/kubernetes/kubernetes.go:426: Failed to watch *v1.Endpoints: failed to list *v1.Endpoints: endpoints is forbidden: User \"system:serviceaccount:monitoring:prometheus-k8s\" cannot list resource \"endpoints\" in API group \"\" in the namespace \"ingress-nginx\""

    14.需要修改prometheus的clusterrole

    #   kubectl edit clusterrole prometheus-k8s
    #------ 原始的rules -------
    rules:
    - apiGroups:
      - ""
      resources:
      - nodes/metrics
      verbs:
      - get
    - nonResourceURLs:
      - /metrics
      verbs:
      - get
    #---------------------------

    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRole
    metadata:
      name: prometheus-k8s
    rules:
    - apiGroups:
      - ""
      resources:
      - nodes
      - services
      - endpoints
      - pods
      - nodes/proxy
      verbs:
      - get
      - list
      - watch
    - apiGroups:
      - ""
      resources:
      - configmaps
      - nodes/metrics
      verbs:
      - get
    - nonResourceURLs:
      - /metrics
      verbs:
      - get
     

    15.再到prometheus UI上看下,发现已经有了

    ingress-nginx/nginx-ingress-scraping/0 (1/1 up)

    16.使用Prometheus来监控二进制部署的ETCD集群

    作为K8s所有资源存储的关键服务ETCD,我们也有必要把它给监控起来,正好借这个机会,完整的演示一次利用Prometheus来监控非K8s集群服务的步骤

    在前面部署K8s集群的时候,我们是用二进制的方式部署的ETCD集群,并且利用自签证书来配置访问ETCD,正如前面所说,现在关键的服务基本都会留有指标metrics接口支持prometheus的监控,利用下面命令,我们可以看到ETCD都暴露出了哪些监控指标出来

    curl --cacert /etc/kubernetes/ssl/ca.pem --cert /etc/kubeasz/clusters/test/ssl/etcd.pem  --key /etc/kubeasz/clusters/test/ssl/etcd-key.pem https://10.0.1.201:2379/metrics

    17.上面查看没问题后,接下来我们开始进行配置使ETCD能被prometheus发现并监控

    # 首先把ETCD的证书创建为secret
    kubectl -n monitoring create secret generic etcd-certs --from-file=/etc/kubeasz/clusters/test/ssl/etcd.pem   --from-file=/etc/kubeasz/clusters/test/ssl/etcd-key.pem   --from-file=/etc/kubeasz/clusters/test/ssl/ca.pem

    # 接着在prometheus里面引用这个secrets
    kubectl -n monitoring edit prometheus k8s

    spec:
    ...
      secrets:
      - etcd-certs

    # 保存退出后,prometheus会自动重启服务pod以加载这个secret配置,过一会,我们进pod来查看下是不是已经加载到ETCD的证书了
    # kubectl -n monitoring exec -it prometheus-k8s-0 -c prometheus  -- sh
    /prometheus $ ls /etc/prometheus/secrets/etcd-certs/
    ca.pem        etcd-key.pem  etcd.pem

    18.接下来准备创建service、endpoints以及ServiceMonitor的yaml配置

    注意替换下面的NODE节点IP为实际ETCD所在NODE内网IP

    # vim prometheus-etcd.yaml
    apiVersion: v1
    kind: Service
    metadata:
      name: etcd-k8s
      namespace: monitoring
      labels:
        k8s-app: etcd
    spec:
      type: ClusterIP
      clusterIP: None
      ports:
      - name: api
        port: 2379
        protocol: TCP
    ---
    apiVersion: v1
    kind: Endpoints
    metadata:
      name: etcd-k8s
      namespace: monitoring
      labels:
        k8s-app: etcd
    subsets:
    - addresses:
      - ip: 10.0.1.201
      - ip: 10.0.1.202
      - ip: 10.0.1.203
      ports:
      - name: api
        port: 2379
        protocol: TCP
    ---
    apiVersion: monitoring.coreos.com/v1
    kind: ServiceMonitor
    metadata:
      name: etcd-k8s
      namespace: monitoring
      labels:
        k8s-app: etcd-k8s
    spec:
      jobLabel: k8s-app
      endpoints:
      - port: api
        interval: 30s
        scheme: https
        tlsConfig:
          caFile: /etc/prometheus/secrets/etcd-certs/ca.pem
          certFile: /etc/prometheus/secrets/etcd-certs/etcd.pem
          keyFile: /etc/prometheus/secrets/etcd-certs/etcd-key.pem
          #use insecureSkipVerify only if you cannot use a Subject Alternative Name
          insecureSkipVerify: true
      selector:
        matchLabels:
          k8s-app: etcd
      namespaceSelector:
        matchNames:
        - monitoring

    19.开始创建上面的资源

    kubectl apply -f prometheus-etcd.yaml

    #过一会,就可以在prometheus UI上面看到ETCD集群被监控了

    monitoring/etcd-k8s/0 (3/3 up)

    20.接下来我们用grafana来展示被监控的ETCD指标


    查看grafana端口
    kubectl -n monitoring get svc |grep grafana

    1. 在grafana官网模板中心搜索etcd,下载这个json格式的模板文件
    https://grafana.com/dashboards/3070

    2.然后打开自己先部署的grafana首页,
    点击左边菜单栏四个小正方形方块HOME --- Manage
    再点击右边 Import dashboard ---
    点击Upload .json File 按钮,上传上面下载好的json文件 etcd_rev3.json,
    然后在prometheus选择数据来源
    点击Import,即可显示etcd集群的图形监控信息

    21.prometheus监控数据以及grafana配置持久化存储配置

    这节实战课给大家讲解下如果配置prometheus以及grafana的数据持久化。prometheus数据持久化配置

    # 注意这下面的statefulset服务就是我们需要做数据持久化的地方
    # kubectl -n monitoring get statefulset,pod|grep prometheus-k8s
    statefulset.apps/prometheus-k8s      2/2     5h41m
    pod/prometheus-k8s-0                       2/2     Running   1          19m
    pod/prometheus-k8s-1                       2/2     Running   1          19m

    # 看下我们之前准备的StorageClass动态存储
    # kubectl get sc
    NAME       PROVISIONER          RECLAIMPOLICY   VOLUMEBINDINGMODE   ALLOWVOLUMEEXPANSION   AGE
    nfs-boge   nfs-provisioner-01   Retain          Immediate           false                  4d

    # 准备prometheus持久化的pvc配置
    # kubectl -n monitoring edit prometheus k8s

    spec:
    ......
      storage:
        volumeClaimTemplate:
          spec:
            accessModes: [ "ReadWriteOnce" ]
            storageClassName: "nfs-boge"
            resources:
              requests:
                storage: 1Gi

    # 上面修改保存退出后,过一会我们查看下pvc创建情况,以及pod内的数据挂载情况
    # kubectl -n monitoring get pvc
    NAME                                 STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
    prometheus-k8s-db-prometheus-k8s-0   Bound    pvc-055e6b11-31b7-4503-ba2b-4f292ba7bd06   1Gi        RWO            nfs-boge       17s
    prometheus-k8s-db-prometheus-k8s-1   Bound    pvc-249c344b-3ef8-4a5d-8003-b8ce8e282d32   1Gi        RWO            nfs-boge       17s


    # kubectl -n monitoring exec -it prometheus-k8s-0 -c prometheus -- sh
    /prometheus $ df -Th
    ......
    10.0.1.201:/nfs_dir/monitoring-prometheus-k8s-db-prometheus-k8s-0-pvc-055e6b11-31b7-4503-ba2b-4f292ba7bd06/prometheus-db
                         nfs4           97.7G      9.4G     88.2G  10% /prometheus

    22.grafana配置持久化存储配置

    # 保存pvc为grafana-pvc.yaml
    kind: PersistentVolumeClaim
    apiVersion: v1
    metadata:
      name: grafana
      namespace: monitoring
    spec:
      storageClassName: nfs-boge
      accessModes:
        - ReadWriteMany
      resources:
        requests:
          storage: 1Gi

    # 开始创建pvc
    # kubectl apply -f grafana-pvc.yaml

    # 看下创建的pvc
    # kubectl -n monitoring get pvc
    NAME                                 STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
    grafana                              Bound    pvc-394a26e1-3274-4458-906e-e601a3cde50d   1Gi        RWX            nfs-boge       3s
    prometheus-k8s-db-prometheus-k8s-0   Bound    pvc-055e6b11-31b7-4503-ba2b-4f292ba7bd06   1Gi        RWO            nfs-boge       6m46s
    prometheus-k8s-db-prometheus-k8s-1   Bound    pvc-249c344b-3ef8-4a5d-8003-b8ce8e282d32   1Gi        RWO            nfs-boge       6m46s


    # 编辑grafana的deployment资源配置
    # kubectl -n monitoring edit deployments.apps grafana

    # 旧的配置
          volumes:
          - emptyDir: {}
            name: grafana-storage
    # 替换成新的配置
          volumes:
          - name: grafana-storage
            persistentVolumeClaim:
              claimName: grafana

    # 同时加入下面的env环境变量,将登陆密码进行固定修改
        spec:
          containers:
            - containerPort: 3000
              name: http
              protocol: TCP
          ......
            env:
            - name: GF_SECURITY_ADMIN_USER
              value: admin
            - name: GF_SECURITY_ADMIN_PASSWORD
              value: admin321

    # 过一会,等grafana重启完成后,用上面的新密码进行登陆
    # kubectl -n monitoring get pod -w|grep grafana
    grafana-5698bf94f4-prbr2               0/1     Running   0          3s
    grafana-5698bf94f4-prbr2               1/1     Running   0          4s

    # 因为先前的数据并未持久化,所以会发现先导入的ETCD模板已消失,这时重新再导入一次,后面重启也不会丢了

    22 prometheus发送报警

    早期我们经常用邮箱接收报警邮件,但是报警不及时,而且目前各云平台对邮件发送限制还比较严格,所以目前在生产中用得更为多的是基于webhook来转发报警内容到企业中用的聊天工具中,比如钉钉、企业微信、飞书等。

    prometheus的报警组件是Alertmanager,它支持自定义webhook的方式来接受它发出的报警,它发出的日志json字段比较多,我们需要根据需要接收的app来做相应的日志清洗转发

    这里博哥将用golang结合Gin网络框架来编写一个日志清洗转发工具,分别对这几种常用的报警方式作详细地说明及实战

    下载boge-webhook.zip
    https://cloud.189.cn/t/B3EFZvnuMvuu (访问码:h1wx)

    首先看下报警规则及报警发送配置是什么样的

    prometheus-operator的规则非常齐全,基本属于开箱即用类型,大家可以根据日常收到的报警,对里面的rules报警规则作针对性的调整,比如把报警观察时长缩短一点等

    #进入压缩包
    cd /data/k8s/prometheus/webhook

    cat  Dockerfile
    #打包镜像
    docker build -t dockerck.e21.cn/product/alertmanaer-webhook:1.0 .

    #推送镜像
    docker push dockerck.e21.cn/product/alertmanaer-webhook:1.0

    kubectl apply -f alertmanaer-webhook.yaml

    find / -name "prometheus-rules.yaml"

    监控报警规划修改   vim /data/k8s/prometheus/kube-prometheus-master/manifests/prometheus-rules.yaml
    修改完成记得更新   kubectl apply -f /data/k8s/prometheus/kube-prometheus-master/manifests/prometheus-rules.yaml
    # 通过这里可以获取需要创建的报警配置secret名称
    # kubectl -n monitoring edit statefulsets.apps alertmanager-main
    ...
          volumes:
          - name: config-volume
            secret:
              defaultMode: 420
              secretName: alertmanager-main
    ...

    # 注意事先在配置文件 alertmanager.yaml 里面编辑好收件人等信息 ,再执行下面的命令


    kubectl -n monitoring get secrets alertmanager-main
    kubectl -n monitoring delete secrets alertmanager-main
    kubectl create secret generic  alertmanager-main --from-file=alertmanager.yaml -n monitoring


    kubectl -n monitoring delete pod alertmanager-main-0
    kubectl -n monitoring delete pod alertmanager-main-1
    kubectl -n monitoring delete pod alertmanager-main-2

    #查看svc
    kubectl get svc |grep alertmanaer-dingtalk-svc

    #测试串  
    curl -X POST -H 'Content-type: application/json' -d '{"name": "boge","titlea": "'"$(id)"'", "texta": "'"$(whoami)--$(hostname)"'"}' 10.68.64.119/5e00fc1a/prometheus/weixin

    十一、k8s安装kuboard图形化界面

    1.安装-获取部署 Kuboard 所需的 YAML 文件:

    curl -o kuboard-v3.yaml https://addons.kuboard.cn/kuboard/kuboard-v3-storage-class.yaml

    编辑 kuboard-v3.yaml 文件中的配置,该部署文件中,有1处配置必须修改:

    storageClassName

      volumeClaimTemplates:
      - metadata:
          name: data
        spec:
          # 请填写一个有效的 StorageClass name
          storageClassName: please-provide-a-valid-StorageClass-name-here
          accessModes: [ "ReadWriteMany" ]
          resources:
            requests:
              storage: 5Gi

    2.部署到 Kubernetes 集群

    kubectl create -f kuboard-v3.yaml

    访问 Kuboard

    在浏览器中打开链接 http://your-node-ip-address:30080

    输入初始用户名和密码,并登录
        用户名: admin
        密码: Kuboard123

    3.卸载

    执行 Kuboard v3 的卸载

    kubectl delete -f https://addons.kuboard.cn/kuboard/kuboard-v3-storage-class.yaml

    4.进入页面,导入集群
    4.1、名称和描述,自己定义
    4.2、将 cat /root/.kube/config 内容复制 kubeconfig中

    151c1b3b4e49472f9fa00aa10580aa9e.png

    11.离线安装下载:

    https://cloud.189.cn/t/qiaMzyvU3Aj2 (访问码:6ngs)

    十二、 k8s架构师课程基于gitlab的CICD自动化

    1.这节课我们先来部署gitlab私有代码仓库所需要的数据库postgresql和redis。

    需要注意的是,如果大家的nfs-server的地址和挂载目录不是按博哥前面课程讲得来定义的话,那么下面的yaml配置中需要记得替换。

    mkdir -p /nfs_dir/{gitlab_etc_ver130806,gitlab_log_ver130806,gitlab_opt_ver130806,gitlab_postgresql_data_ver130806}

    #创建表空间
    kubectl create namespace gitlab-ver130806

    kubectl get ns

    2.部署postgresql

    vi 3postgres.yaml

    1. # pv 3postgres.yaml
    2. ---
    3. apiVersion: v1
    4. kind: PersistentVolume
    5. metadata:
    6. name: gitlab-postgresql-data-ver130806
    7. labels:
    8. type: gitlab-postgresql-data-ver130806
    9. spec:
    10. capacity:
    11. storage: 10Gi
    12. accessModes:
    13. - ReadWriteOnce
    14. persistentVolumeReclaimPolicy: Retain
    15. storageClassName: nfs
    16. nfs:
    17. path: /nfs_dir/gitlab_postgresql_data_ver130806
    18. server: 10.0.1.201
    19. # pvc
    20. ---
    21. kind: PersistentVolumeClaim
    22. apiVersion: v1
    23. metadata:
    24. name: gitlab-postgresql-data-ver130806-pvc
    25. spec:
    26. accessModes:
    27. - ReadWriteOnce
    28. resources:
    29. requests:
    30. storage: 10Gi
    31. storageClassName: nfs
    32. selector:
    33. matchLabels:
    34. type: gitlab-postgresql-data-ver130806
    35. ---
    36. apiVersion: v1
    37. kind: Service
    38. metadata:
    39. name: postgresql
    40. labels:
    41. app: gitlab
    42. tier: postgreSQL
    43. spec:
    44. ports:
    45. - port: 5432
    46. selector:
    47. app: gitlab
    48. tier: postgreSQL
    49. ---
    50. apiVersion: apps/v1
    51. kind: Deployment
    52. metadata:
    53. name: postgresql
    54. labels:
    55. app: gitlab
    56. tier: postgreSQL
    57. spec:
    58. replicas: 1
    59. selector:
    60. matchLabels:
    61. app: gitlab
    62. tier: postgreSQL
    63. strategy:
    64. type: Recreate
    65. template:
    66. metadata:
    67. labels:
    68. app: gitlab
    69. tier: postgreSQL
    70. spec:
    71. #nodeSelector:
    72. # gee/disk: "500g"
    73. containers:
    74. - image: postgres:12.6-alpine
    75. #- image: harbor.boge.com/library/postgres:12.6-alpine
    76. name: postgresql
    77. env:
    78. - name: POSTGRES_USER
    79. value: gitlab
    80. - name: POSTGRES_DB
    81. value: gitlabhq_production
    82. - name: POSTGRES_PASSWORD
    83. value: bogeusepg
    84. - name: TZ
    85. value: Asia/Shanghai
    86. ports:
    87. - containerPort: 5432
    88. name: postgresql
    89. livenessProbe:
    90. exec:
    91. command:
    92. - sh
    93. - -c
    94. - exec pg_isready -U gitlab -h 127.0.0.1 -p 5432 -d gitlabhq_production
    95. initialDelaySeconds: 110
    96. timeoutSeconds: 5
    97. failureThreshold: 6
    98. readinessProbe:
    99. exec:
    100. command:
    101. - sh
    102. - -c
    103. - exec pg_isready -U gitlab -h 127.0.0.1 -p 5432 -d gitlabhq_production
    104. initialDelaySeconds: 20
    105. timeoutSeconds: 3
    106. periodSeconds: 5
    107. # resources:
    108. # requests:
    109. # cpu: 100m
    110. # memory: 512Mi
    111. # limits:
    112. # cpu: "1"
    113. # memory: 1Gi
    114. volumeMounts:
    115. - name: postgresql
    116. mountPath: /var/lib/postgresql/data
    117. volumes:
    118. - name: postgresql
    119. persistentVolumeClaim:
    120. claimName: gitlab-postgresql-data-ver130806-pvc

    3,创建pod

    kubectl -n gitlab-ver130806 apply -f 3postgres.yaml


    #检查pod
    kubectl -n gitlab-ver130806 get pod

    4.部署redis

    vi 4redis.yaml

    1. ---
    2. apiVersion: v1
    3. kind: Service
    4. metadata:
    5. name: redis
    6. labels:
    7. app: gitlab
    8. tier: backend
    9. spec:
    10. ports:
    11. - port: 6379
    12. targetPort: 6379
    13. selector:
    14. app: gitlab
    15. tier: backend
    16. ---
    17. apiVersion: apps/v1
    18. kind: Deployment
    19. metadata:
    20. name: redis
    21. labels:
    22. app: gitlab
    23. tier: backend
    24. spec:
    25. replicas: 1
    26. selector:
    27. matchLabels:
    28. app: gitlab
    29. tier: backend
    30. strategy:
    31. type: Recreate
    32. template:
    33. metadata:
    34. labels:
    35. app: gitlab
    36. tier: backend
    37. spec:
    38. #nodeSelector:
    39. # gee/disk: "500g"
    40. containers:
    41. - image: redis:6.2.0-alpine3.13
    42. #- image: harbor.boge.com/library/redis:6.2.0-alpine3.13
    43. name: redis
    44. command:
    45. - "redis-server"
    46. args:
    47. - "--requirepass"
    48. - "bogeuseredis"
    49. # resources:
    50. # requests:
    51. # cpu: "1"
    52. # memory: 2Gi
    53. # limits:
    54. # cpu: "1"
    55. # memory: 2Gi
    56. ports:
    57. - containerPort: 6379
    58. name: redis
    59. livenessProbe:
    60. exec:
    61. command:
    62. - sh
    63. - -c
    64. - "redis-cli ping"
    65. initialDelaySeconds: 30
    66. periodSeconds: 10
    67. timeoutSeconds: 5
    68. successThreshold: 1
    69. failureThreshold: 3
    70. readinessProbe:
    71. exec:
    72. command:
    73. - sh
    74. - -c
    75. - "redis-cli ping"
    76. initialDelaySeconds: 5
    77. periodSeconds: 10
    78. timeoutSeconds: 1
    79. successThreshold: 1
    80. failureThreshold: 3
    81. initContainers:
    82. - command:
    83. - /bin/sh
    84. - -c
    85. - |
    86. ulimit -n 65536
    87. mount -o remount rw /sys
    88. echo never > /sys/kernel/mm/transparent_hugepage/enabled
    89. mount -o remount rw /proc/sys
    90. echo 2000 > /proc/sys/net/core/somaxconn
    91. echo 1 > /proc/sys/vm/overcommit_memory
    92. image: registry.cn-beijing.aliyuncs.com/acs/busybox:v1.29.2
    93. imagePullPolicy: IfNotPresent
    94. name: init-redis
    95. resources: {}
    96. securityContext:
    97. privileged: true
    98. procMount: Default

    #创建pod

    kubectl -n gitlab-ver130806 apply -f 4redis.yaml
    #检查pod
    kubectl -n gitlab-ver130806 get pod

    5.开始部署gitlab服务。

    先定制一下镜像

    Dockerfile

    1. FROM gitlab/gitlab-ce:13.8.6-ce.0
    2. RUN rm /etc/apt/sources.list \
    3. && echo 'deb http://apt.postgresql.org/pub/repos/apt/ xenial-pgdg main' > /etc/apt/sources.list.d/pgdg.list \
    4. && wget --no-check-certificate -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
    5. COPY sources.list /etc/apt/sources.list
    6. RUN apt-get update -yq && \
    7. apt-get install -y vim iproute2 net-tools iputils-ping curl wget software-properties-common unzip postgresql-client-12 && \
    8. rm -rf /var/cache/apt/archives/*
    9. RUN ln -svf /usr/bin/pg_dump /opt/gitlab/embedded/bin/pg_dump
    10. #---------------------------------------------------------------
    11. # docker build -t gitlab/gitlab-ce:13.8.6-ce.1 .

    sources.list

    1. deb http://mirrors.aliyun.com/ubuntu/ xenial main
    2. deb-src http://mirrors.aliyun.com/ubuntu/ xenial main
    3. deb http://mirrors.aliyun.com/ubuntu/ xenial-updates main
    4. deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates main
    5. deb http://mirrors.aliyun.com/ubuntu/ xenial universe
    6. deb-src http://mirrors.aliyun.com/ubuntu/ xenial universe
    7. deb http://mirrors.aliyun.com/ubuntu/ xenial-updates universe
    8. deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates universe
    9. deb http://mirrors.aliyun.com/ubuntu xenial-security main
    10. deb-src http://mirrors.aliyun.com/ubuntu xenial-security main
    11. deb http://mirrors.aliyun.com/ubuntu xenial-security universe
    12. deb-src http://mirrors.aliyun.com/ubuntu xenial-security universe

    6.打包推送镜像

    docker tag gitlab/gitlab-ce:13.8.6-ce.1 dockerck.e21.cn/library/gitlab-ce:13.8.6-ce.1
    docker push dockerck.e21.cn/library/gitlab-ce:13.8.6-ce.1

    7.开始部署5gitlab.yaml

    vi 5gitlab.yaml

    1. # restore gitlab data command example:
    2. # kubectl -n gitlab-ver130806 exec -it $(kubectl -n gitlab-ver130806 get pod|grep -v runner|grep gitlab|awk '{print $1}') -- gitlab-rake gitlab:backup:restore BACKUP=1602889879_2020_10_17_12.9.2
    3. # kubectl -n gitlab-ver130806 exec -it $(kubectl -n gitlab-ver130806 get pod|grep -v runner|grep gitlab|awk '{print $1}') -- gitlab-ctl reconfigure
    4. # kubectl -n gitlab-ver130806 exec -it $(kubectl -n gitlab-ver130806 get pod|grep -v runner|grep gitlab|awk '{print $1}') -- gitlab-ctl status
    5. # pv
    6. ---
    7. apiVersion: v1
    8. kind: PersistentVolume
    9. metadata:
    10. name: gitlab-etc-ver130806
    11. labels:
    12. type: gitlab-etc-ver130806
    13. spec:
    14. capacity:
    15. storage: 1Gi
    16. accessModes:
    17. - ReadWriteOnce
    18. persistentVolumeReclaimPolicy: Retain
    19. storageClassName: nfs
    20. nfs:
    21. path: /nfs_dir/gitlab_etc_ver130806
    22. server: 10.0.1.201
    23. # pvc
    24. ---
    25. kind: PersistentVolumeClaim
    26. apiVersion: v1
    27. metadata:
    28. name: gitlab-etc-ver130806-pvc
    29. spec:
    30. accessModes:
    31. - ReadWriteOnce
    32. resources:
    33. requests:
    34. storage: 1Gi
    35. storageClassName: nfs
    36. selector:
    37. matchLabels:
    38. type: gitlab-etc-ver130806
    39. # pv
    40. ---
    41. apiVersion: v1
    42. kind: PersistentVolume
    43. metadata:
    44. name: gitlab-log-ver130806
    45. labels:
    46. type: gitlab-log-ver130806
    47. spec:
    48. capacity:
    49. storage: 1Gi
    50. accessModes:
    51. - ReadWriteOnce
    52. persistentVolumeReclaimPolicy: Retain
    53. storageClassName: nfs
    54. nfs:
    55. path: /nfs_dir/gitlab_log_ver130806
    56. server: 10.0.1.201
    57. # pvc
    58. ---
    59. kind: PersistentVolumeClaim
    60. apiVersion: v1
    61. metadata:
    62. name: gitlab-log-ver130806-pvc
    63. spec:
    64. accessModes:
    65. - ReadWriteOnce
    66. resources:
    67. requests:
    68. storage: 1Gi
    69. storageClassName: nfs
    70. selector:
    71. matchLabels:
    72. type: gitlab-log-ver130806
    73. # pv
    74. ---
    75. apiVersion: v1
    76. kind: PersistentVolume
    77. metadata:
    78. name: gitlab-opt-ver130806
    79. labels:
    80. type: gitlab-opt-ver130806
    81. spec:
    82. capacity:
    83. storage: 1Gi
    84. accessModes:
    85. - ReadWriteOnce
    86. persistentVolumeReclaimPolicy: Retain
    87. storageClassName: nfs
    88. nfs:
    89. path: /nfs_dir/gitlab_opt_ver130806
    90. server: 10.0.1.201
    91. # pvc
    92. ---
    93. kind: PersistentVolumeClaim
    94. apiVersion: v1
    95. metadata:
    96. name: gitlab-opt-ver130806-pvc
    97. spec:
    98. accessModes:
    99. - ReadWriteOnce
    100. resources:
    101. requests:
    102. storage: 1Gi
    103. storageClassName: nfs
    104. selector:
    105. matchLabels:
    106. type: gitlab-opt-ver130806
    107. ---
    108. apiVersion: v1
    109. kind: Service
    110. metadata:
    111. name: gitlab
    112. labels:
    113. app: gitlab
    114. tier: frontend
    115. spec:
    116. ports:
    117. - name: gitlab-ui
    118. port: 80
    119. protocol: TCP
    120. targetPort: 80
    121. - name: gitlab-ssh
    122. port: 22
    123. protocol: TCP
    124. targetPort: 22
    125. nodePort: 32155
    126. selector:
    127. app: gitlab
    128. tier: frontend
    129. type: NodePort
    130. ---
    131. apiVersion: v1
    132. kind: ServiceAccount
    133. metadata:
    134. name: gitlab
    135. ---
    136. apiVersion: rbac.authorization.k8s.io/v1
    137. kind: ClusterRoleBinding
    138. metadata:
    139. name: gitlab-cb-ver130806
    140. roleRef:
    141. apiGroup: rbac.authorization.k8s.io
    142. kind: ClusterRole
    143. name: cluster-admin
    144. subjects:
    145. - kind: ServiceAccount
    146. name: gitlab
    147. namespace: gitlab-ver130806
    148. ---
    149. apiVersion: apps/v1
    150. kind: Deployment
    151. metadata:
    152. name: gitlab
    153. labels:
    154. app: gitlab
    155. tier: frontend
    156. spec:
    157. replicas: 1
    158. selector:
    159. matchLabels:
    160. app: gitlab
    161. tier: frontend
    162. strategy:
    163. type: Recreate
    164. template:
    165. metadata:
    166. labels:
    167. app: gitlab
    168. tier: frontend
    169. spec:
    170. serviceAccountName: gitlab
    171. containers:
    172. - image: harbor.boge.com/library/gitlab-ce:13.8.6-ce.1
    173. name: gitlab
    174. # resources:
    175. # requests:
    176. # cpu: 400m
    177. # memory: 4Gi
    178. # limits:
    179. # cpu: "800m"
    180. # memory: 8Gi
    181. securityContext:
    182. privileged: true
    183. env:
    184. - name: TZ
    185. value: Asia/Shanghai
    186. - name: GITLAB_OMNIBUS_CONFIG
    187. value: |
    188. postgresql['enable'] = false
    189. gitlab_rails['db_username'] = "gitlab"
    190. gitlab_rails['db_password'] = "bogeusepg"
    191. gitlab_rails['db_host'] = "postgresql"
    192. gitlab_rails['db_port'] = "5432"
    193. gitlab_rails['db_database'] = "gitlabhq_production"
    194. gitlab_rails['db_adapter'] = 'postgresql'
    195. gitlab_rails['db_encoding'] = 'utf8'
    196. redis['enable'] = false
    197. gitlab_rails['redis_host'] = 'redis'
    198. gitlab_rails['redis_port'] = '6379'
    199. gitlab_rails['redis_password'] = 'bogeuseredis'
    200. gitlab_rails['gitlab_shell_ssh_port'] = 22
    201. external_url 'http://git.boge.com/'
    202. nginx['listen_port'] = 80
    203. nginx['listen_https'] = false
    204. #-------------------------------------------
    205. gitlab_rails['gitlab_email_enabled'] = true
    206. gitlab_rails['gitlab_email_from'] = 'admin@boge.com'
    207. gitlab_rails['gitlab_email_display_name'] = 'boge'
    208. gitlab_rails['gitlab_email_reply_to'] = 'gitlab@boge.com'
    209. gitlab_rails['gitlab_default_can_create_group'] = true
    210. gitlab_rails['gitlab_username_changing_enabled'] = true
    211. gitlab_rails['smtp_enable'] = true
    212. gitlab_rails['smtp_address'] = "smtp.exmail.qq.com"
    213. gitlab_rails['smtp_port'] = 465
    214. gitlab_rails['smtp_user_name'] = "gitlab@boge.com"
    215. gitlab_rails['smtp_password'] = "bogesendmail"
    216. gitlab_rails['smtp_domain'] = "exmail.qq.com"
    217. gitlab_rails['smtp_authentication'] = "login"
    218. gitlab_rails['smtp_enable_starttls_auto'] = true
    219. gitlab_rails['smtp_tls'] = true
    220. #-------------------------------------------
    221. # 关闭 promethues
    222. prometheus['enable'] = false
    223. # 关闭 grafana
    224. grafana['enable'] = false
    225. # 减少内存占用
    226. unicorn['worker_memory_limit_min'] = "200 * 1 << 20"
    227. unicorn['worker_memory_limit_max'] = "300 * 1 << 20"
    228. # 减少 sidekiq 的并发数
    229. sidekiq['concurrency'] = 16
    230. # 减少 postgresql 数据库缓存
    231. postgresql['shared_buffers'] = "256MB"
    232. # 减少 postgresql 数据库并发数量
    233. postgresql['max_connections'] = 8
    234. # 减少进程数 worker=CPU核数+1
    235. unicorn['worker_processes'] = 2
    236. nginx['worker_processes'] = 2
    237. puma['worker_processes'] = 2
    238. # puma['per_worker_max_memory_mb'] = 850
    239. # 保留3天备份的数据文件
    240. gitlab_rails['backup_keep_time'] = 259200
    241. #-------------------------------------------
    242. ports:
    243. - containerPort: 80
    244. name: gitlab
    245. livenessProbe:
    246. exec:
    247. command:
    248. - sh
    249. - -c
    250. - "curl -s http://127.0.0.1/-/health|grep -w 'GitLab OK'"
    251. initialDelaySeconds: 120
    252. periodSeconds: 10
    253. timeoutSeconds: 5
    254. successThreshold: 1
    255. failureThreshold: 3
    256. readinessProbe:
    257. exec:
    258. command:
    259. - sh
    260. - -c
    261. - "curl -s http://127.0.0.1/-/health|grep -w 'GitLab OK'"
    262. initialDelaySeconds: 120
    263. periodSeconds: 10
    264. timeoutSeconds: 5
    265. successThreshold: 1
    266. failureThreshold: 3
    267. volumeMounts:
    268. - mountPath: /etc/gitlab
    269. name: gitlab1
    270. - mountPath: /var/log/gitlab
    271. name: gitlab2
    272. - mountPath: /var/opt/gitlab
    273. name: gitlab3
    274. - mountPath: /etc/localtime
    275. name: tz-config
    276. volumes:
    277. - name: gitlab1
    278. persistentVolumeClaim:
    279. claimName: gitlab-etc-ver130806-pvc
    280. - name: gitlab2
    281. persistentVolumeClaim:
    282. claimName: gitlab-log-ver130806-pvc
    283. - name: gitlab3
    284. persistentVolumeClaim:
    285. claimName: gitlab-opt-ver130806-pvc
    286. - name: tz-config
    287. hostPath:
    288. path: /usr/share/zoneinfo/Asia/Shanghai
    289. securityContext:
    290. runAsUser: 0
    291. fsGroup: 0

    8.创建pod
     

    kubectl -n gitlab-ver130806 apply -f 5gitlab.yaml

    kubectl -n gitlab-ver130806 get pod

    9.部署gitlab-tls

    vi 6gitlab-tls

    1. # old version
    2. #apiVersion: extensions/v1beta1
    3. #kind: Ingress
    4. #metadata:
    5. # name: gitlab
    6. # annotations:
    7. # nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
    8. # nginx.ingress.kubernetes.io/proxy-body-size: "20m"
    9. #spec:
    10. # tls:
    11. # - hosts:
    12. # - git.boge.com
    13. # secretName: mytls
    14. # rules:
    15. # - host: git.boge.com
    16. # http:
    17. # paths:
    18. # - path: /
    19. # backend:
    20. # serviceName: gitlab
    21. # servicePort: 80
    22. # Add tls
    23. # openssl genrsa -out tls.key 2048
    24. # openssl req -new -x509 -key tls.key -out tls.cert -days 360 -subj /CN=*.boge.com
    25. # kubectl -n gitlab-ver130806 create secret tls mytls --cert=tls.cert --key=tls.key
    26. # new version
    27. ## https://kubernetes.io/docs/concepts/services-networking/ingress/
    28. apiVersion: networking.k8s.io/v1
    29. kind: Ingress
    30. metadata:
    31. name: gitlab
    32. annotations:
    33. nginx.ingress.kubernetes.io/force-ssl-redirect: "false"
    34. nginx.ingress.kubernetes.io/proxy-body-size: "20m"
    35. spec:
    36. tls:
    37. - hosts:
    38. - git.boge.com
    39. secretName: mytls
    40. rules:
    41. - host: git.boge.com
    42. http:
    43. paths:
    44. - path: /
    45. pathType: Prefix
    46. backend:
    47. service:
    48. name: gitlab
    49. port:
    50. number: 80
    51. ---

    10.#创建pod


    kubectl -n gitlab-ver130806 apply -f 6gitlab-tls.yaml

    kubectl -n gitlab-ver130806 get pod

    11.在安装服务器上增加ssh端口转发

    我们要保持所有开发人员能使用默认的22端口来通过ssh拉取代码,那么就需要做如下端口转发配置

    vim /etc/ssh/sshd_config

    Port 10022


    systemctl restart sshd

    # 注意配置此转发前,需要将对应NODE的本身ssh连接端口作一下修改,以防后面登陆不了该机器
    #kubectl get svc -n gitlab-ver130806 |grep git
    iptables -t nat -A PREROUTING -d 10.0.1.204 -p tcp --dport 22 -j DNAT --to-destination 10.0.1.204:31755

    #↑ 删除上面创建的这一条规则,将-A换成-D即可
    #iptables -t nat -D PREROUTING -d 10.0.1.204 -p tcp --dport 22 -j DNAT --to-destination 10.0.1.204:31755


    iptables -t nat  -nvL PREROUTING


    #将需要拉去仓库代码的服务器密钥(cat /root/.ssh/id_rsa.pub) 加入git页面(SSH Keys)

    12.这节课我们来讲gitlab里面的runner,gitlab的CI/CD自动化,都是由gitlab下发指令,依靠runner这个组件去执行的,我们这里也是把runner运行在k8s上面。

    runner按字面意思就是奔跑者的意思,它在整个自动化流程里面的角色也相当于一个外卖小哥,它接收gitlab下发的自动化指令,来去做相应的操作,从而实现整个CI/CD的效果。
    部署gitlab-runner

    1. mkdir -p /nfs_dir/{gitlab-runner1-ver130806-docker,gitlab-runner2-ver130806-share}
    2. # gitlab-ci-multi-runner register
    3. # Active √ Paused Runners don't accept new jobs
    4. # Protected This runner will only run on pipelines triggered on protected branches
    5. # Run untagged jobs Indicates whether this runner can pick jobs without tags
    6. # Lock to current projects When a runner is locked, it cannot be assigned to other projects
    7. # pv
    8. ---
    9. apiVersion: v1
    10. kind: PersistentVolume
    11. metadata:
    12. name: gitlab-runner1-ver130806-docker
    13. labels:
    14. type: gitlab-runner1-ver130806-docker
    15. spec:
    16. capacity:
    17. storage: 0.1Gi
    18. accessModes:
    19. - ReadWriteMany
    20. persistentVolumeReclaimPolicy: Retain
    21. storageClassName: nfs
    22. nfs:
    23. path: /nfs_dir/gitlab-runner1-ver130806-docker
    24. server: 10.0.1.201
    25. # pvc
    26. ---
    27. kind: PersistentVolumeClaim
    28. apiVersion: v1
    29. metadata:
    30. name: gitlab-runner1-ver130806-docker
    31. namespace: gitlab-ver130806
    32. spec:
    33. accessModes:
    34. - ReadWriteMany
    35. resources:
    36. requests:
    37. storage: 0.1Gi
    38. storageClassName: nfs
    39. selector:
    40. matchLabels:
    41. type: gitlab-runner1-ver130806-docker
    42. ---
    43. # https://docs.gitlab.com/runner/executors
    44. #concurrent = 30
    45. #check_interval = 0
    46. #[session_server]
    47. # session_timeout = 1800
    48. #[[runners]]
    49. # name = "gitlab-runner1-ver130806-docker"
    50. # url = "http://git.boge.com"
    51. # token = "xxxxxxxxxxxxxxxxxxxxxx"
    52. # executor = "kubernetes"
    53. # [runners.kubernetes]
    54. # namespace = "gitlab-ver130806"
    55. # image = "docker:stable"
    56. # helper_image = "gitlab/gitlab-runner-helper:x86_64-9fc34d48-pwsh"
    57. # privileged = true
    58. # [[runners.kubernetes.volumes.pvc]]
    59. # name = "gitlab-runner1-ver130806-docker"
    60. # mount_path = "/mnt"
    61. ---
    62. apiVersion: apps/v1
    63. kind: Deployment
    64. metadata:
    65. name: gitlab-runner1-ver130806-docker
    66. namespace: gitlab-ver130806
    67. spec:
    68. replicas: 1
    69. selector:
    70. matchLabels:
    71. name: gitlab-runner1-ver130806-docker
    72. template:
    73. metadata:
    74. labels:
    75. name: gitlab-runner1-ver130806-docker
    76. spec:
    77. hostAliases:
    78. #kubectl -n gitlab-ver130806 get svc |grep git
    79. - ip: "10.68.140.109"
    80. hostnames:
    81. - "git.boge.com"
    82. serviceAccountName: gitlab
    83. containers:
    84. - args:
    85. - run
    86. image: gitlab/gitlab-runner:v13.10.0
    87. name: gitlab-runner1-ver130806-docker
    88. volumeMounts:
    89. - mountPath: /etc/gitlab-runner
    90. name: config
    91. - mountPath: /etc/ssl/certs
    92. name: cacerts
    93. readOnly: true
    94. restartPolicy: Always
    95. volumes:
    96. - persistentVolumeClaim:
    97. claimName: gitlab-runner1-ver130806-docker
    98. name: config
    99. - hostPath:
    100. path: /usr/share/ca-certificates/mozilla
    101. name: cacerts

    8aa8696ef17f4709907a1d306aec557a.png

    13.部署

    kubectl -n gitlab-ver130806 apply -f 7gitlab-runner-docker.yaml

    kubectl -n gitlab-ver130806 exec -it gitlab-runner1-ver130806-docker-7cc8c bash

    # 进入git 注册
    root@gitlab-runner1-ver130806-docker-7cc8cc7595-nqcmc:/# gitlab-ci-multi-runner register

    07a79a982aa04d2d8c4b72f45c8ca2ee.png

    14        vi /nfs_dir/gitlab-runner1-ver130806-docker/config.toml

    1. concurrent = 30
    2. check_interval = 0
    3. [session_server]
    4. session_timeout = 1800
    5. [[runners]]
    6. name = "gitlab-runner1-ver130806-docker"
    7. url = "http://git.boge.com/"
    8. token = "2sU_GyKpbgVisPNmp-Fb" #这个是系统自动生成的不动
    9. executor = "kubernetes"
    10. [runners.kubernetes]
    11. namespace = "gitlab-ver130806"
    12. image = "docker:stable"
    13. helper_image = "gitlab/gitlab-runner-helper:x86_64-9fc34d48-pwsh"
    14. privileged = true
    15. [[runners.kubernetes.volumes.pvc]]
    16. name = "gitlab-runner1-ver130806-docker"
    17. mount_path = "/mnt"

    15.#查看pod

    1. kubectl -n gitlab-ver130806 get pod
    2. #删除runne的pod ,等它重启
    3. kubectl -n gitlab-ver130806 delete pod gitlab-runner1-ver130806-docker-7cc8cc7595-nqcmc

    进入gti页面 ,去掉第4个√,保留第一个√,保存退出

    0aede3a26f5e4464b107a78b21356924.png

    share 

    1. # gitlab-ci-multi-runner register
    2. # Active √ Paused Runners don't accept new jobs
    3. # Protected This runner will only run on pipelines triggered on protected branches
    4. # Run untagged jobs √ Indicates whether this runner can pick jobs without tags
    5. # Lock to current projects When a runner is locked, it cannot be assigned to other projects
    6. # pv
    7. ---
    8. apiVersion: v1
    9. kind: PersistentVolume
    10. metadata:
    11. name: gitlab-runner2-ver130806-share
    12. labels:
    13. type: gitlab-runner2-ver130806-share
    14. spec:
    15. capacity:
    16. storage: 0.1Gi
    17. accessModes:
    18. - ReadWriteMany
    19. persistentVolumeReclaimPolicy: Retain
    20. storageClassName: nfs
    21. nfs:
    22. path: /nfs_dir/gitlab-runner2-ver130806-share
    23. server: 10.0.1.201
    24. # pvc
    25. ---
    26. kind: PersistentVolumeClaim
    27. apiVersion: v1
    28. metadata:
    29. name: gitlab-runner2-ver130806-share
    30. namespace: gitlab-ver130806
    31. spec:
    32. accessModes:
    33. - ReadWriteMany
    34. resources:
    35. requests:
    36. storage: 0.1Gi
    37. storageClassName: nfs
    38. selector:
    39. matchLabels:
    40. type: gitlab-runner2-ver130806-share
    41. ---
    42. # https://docs.gitlab.com/runner/executors
    43. #concurrent = 30
    44. #check_interval = 0
    45. #[session_server]
    46. # session_timeout = 1800
    47. #[[runners]]
    48. # name = "gitlab-runner2-ver130806-share"
    49. # url = "http://git.boge.com"
    50. # token = "xxxxxxxxxxxxxxxx"
    51. # executor = "kubernetes"
    52. # [runners.kubernetes]
    53. # namespace = "gitlab-ver130806"
    54. # image = "registry.cn-beijing.aliyuncs.com/acs/busybox/busybox:v1.29.2"
    55. # helper_image = "gitlab/gitlab-runner-helper:x86_64-9fc34d48-pwsh"
    56. # privileged = false
    57. # [[runners.kubernetes.volumes.pvc]]
    58. # name = "gitlab-runner2-ver130806-share"
    59. # mount_path = "/mnt"
    60. ---
    61. apiVersion: apps/v1
    62. kind: Deployment
    63. metadata:
    64. name: gitlab-runner2-ver130806-share
    65. namespace: gitlab-ver130806
    66. spec:
    67. replicas: 1
    68. selector:
    69. matchLabels:
    70. name: gitlab-runner2-ver130806-share
    71. template:
    72. metadata:
    73. labels:
    74. name: gitlab-runner2-ver130806-share
    75. spec:
    76. hostAliases:
    77. #kubectl -n gitlab-ver130806 get svc |grep git
    78. - ip: "10.68.140.109"
    79. hostnames:
    80. - "git.boge.com"
    81. serviceAccountName: gitlab
    82. containers:
    83. - args:
    84. - run
    85. image: gitlab/gitlab-runner:v13.10.0
    86. name: gitlab-runner2-ver130806-share
    87. volumeMounts:
    88. - mountPath: /etc/gitlab-runner
    89. name: config
    90. - mountPath: /etc/ssl/certs
    91. name: cacerts
    92. readOnly: true
    93. restartPolicy: Always
    94. volumes:
    95. - persistentVolumeClaim:
    96. claimName: gitlab-runner2-ver130806-share
    97. name: config
    98. - hostPath:
    99. path: /usr/share/ca-certificates/mozilla
    100. name: cacerts

    16.执行yaml

    kubectl -n gitlab-ver130806 apply -f 8gitlab-runner-share.yaml

    kubectl -n gitlab-ver130806 get pod

    #进入pod
    kubectl -n gitlab-ver130806 exec -it gitlab-runner2-ver130806-share-555695cf9 bash


    #gitlab-ci-multi-runner register

    f4ef152fa8fc424d990f93bd329b51f4.png

    17#修改配置
    vi /nfs_dir/gitlab-runner2-ver130806-share/config.toml

    1. concurrent = 30
    2. check_interval = 0
    3. [session_server]
    4. session_timeout = 1800
    5. [[runners]]
    6. name = "gitlab-runner2-ver130806-share"
    7. url = "http://git.boge.com/"
    8. token = "yj5tWzuaAB8xjP4kfbKQ"#使用系统自动生成的,不修改
    9. executor = "kubernetes"
    10. [runners.kubernetes]
    11. namespace = "gitlab-ver130806"
    12. image = "registry.cn-beijing.aliyuncs.com/acs/busybox/busybox:v1.29.2"
    13. helper_image = "gitlab/gitlab-runner-helper:x86_64-9fc34d48-pwsh"
    14. privileged = false
    15. [[runners.kubernetes.volumes.pvc]]
    16. name = "gitlab-runner2-ver130806-share"
    17. mount_path = "/mnt"

    18.执行

    kubectl -n gitlab-ver130806 get pod

    kubectl -n gitlab-ver130806 delete pod gitlab-runner2-ver130806-share-555695cf99-7nhp2

    进入gti页面 ,保留第一个√,第三个√保存退出

    19.这节课我们继续来配置gitlab相关的服务。
    增加gitlab在k8s的内部解析

    为什么这么做呢,博哥这里总结了两点原因:

    优化gitlab网络通信,对于runner要调用gitlab服务来说,直接走内部地址速度更快
    如果是在用阿里云的同学,采用在k8s上部署gitlab的话,那么k8s内部服务比如runner是不能通过同集群前面的公网入口SLB来请求访问的,这里阿里云自身网络架构原因,这个时候我们只需要做如下配置即可完美解决

    #编辑
    kubectl -n kube-system edit configmaps coredns

    # kubectl -n kube-system get configmaps coredns  -o yaml
    apiVersion: v1
    data:
      Corefile: |
        .:53 {
            errors
            health
            ready
            log
            rewrite stop {
              name regex git.boge.com gitlab.gitlab-ver130806.svc.cluster.local
              answer name gitlab.gitlab-ver130806.svc.cluster.local git.boge.com
            }

            kubernetes cluster.local in-addr.arpa ip6.arpa {

              pods verified
              fallthrough in-addr.arpa ip6.arpa
            }
            autopath @kubernetes
            prometheus :9153
            forward . /etc/resolv.conf
            cache 30
            loop
            reload
            loadbalance
        }
    kind: ConfigMap
    metadata:
      name: coredns
      namespace: kube-system

    20.#查看pod

    #查看pod
    kubectl -n kube-system get pod |grep  coredns

    #删除pod
    kubectl -n kube-system delete pod coredns-5787695b7f-vzfm6

    21. 我们现在在k8s来部署dind服务,提供整个CI(持续集成)的功能。

    1. 我们现在在k8s来部署dind服务,提供整个CI(持续集成)的功能。
    2. 我们看看docker version列出的结果 Docker采取的是C/S架构 Docker进程默认不监听任何端口,它会生成一个socket(/var/run/docker.sock)文件来进行本地进程通信 Docker C/S 之间采取Rest API作为通信协议,我们可以让Docker daemon进程监听一个端口,这就为我们用docker client调用远程调用docker daemon进程执行镜像构建提供了可行性
    3. ![在这里插入图片描述](https://img-blog.csdnimg.cn/8a6d8ec893c344fb99c6cc72e69e5b88.png#pic_center)
    4. > docker in docker
    5. ```bash
    6. # dind pip instll staus : kill -9 code 137(128+9) ,may be limits(cpu,memory) resources need change
    7. # only have docker client ,use dind can be use normal
    8. #dindSvc=$(kubectl -n kube-system get svc dind |awk 'NR==2{print $3}')
    9. #export DOCKER_HOST="tcp://${dindSvc}:2375/"
    10. #export DOCKER_DRIVER=overlay2
    11. #export DOCKER_TLS_CERTDIR=""
    12. ---
    13. # SVC
    14. kind: Service
    15. apiVersion: v1
    16. metadata:
    17. name: dind
    18. namespace: kube-system
    19. spec:
    20. selector:
    21. app: dind
    22. ports:
    23. - name: tcp-port
    24. port: 2375
    25. protocol: TCP
    26. targetPort: 2375
    27. ---
    28. # Deployment
    29. apiVersion: apps/v1
    30. kind: Deployment
    31. metadata:
    32. name: dind
    33. namespace: kube-system
    34. labels:
    35. app: dind
    36. spec:
    37. replicas: 1
    38. selector:
    39. matchLabels:
    40. app: dind
    41. template:
    42. metadata:
    43. labels:
    44. app: dind
    45. spec:
    46. hostNetwork: true
    47. containers:
    48. - name: dind
    49. #image: docker:19-dind
    50. image: harbor.boge.com/library/docker:19-dind
    51. lifecycle:
    52. postStart:
    53. exec:
    54. command: ["/bin/sh", "-c", "docker login harbor.boge.com -u 'admin' -p 'boge666'"]
    55. # 3. when delete this pod , use this keep kube-proxy to flush role done
    56. preStop:
    57. exec:
    58. command: ["/bin/sh", "-c", "sleep 5"]
    59. ports:
    60. - containerPort: 2375
    61. # resources:
    62. # requests:
    63. # cpu: 200m
    64. # memory: 256Mi
    65. # limits:
    66. # cpu: 0.5
    67. # memory: 1Gi
    68. readinessProbe:
    69. tcpSocket:
    70. port: 2375
    71. initialDelaySeconds: 10
    72. periodSeconds: 30
    73. livenessProbe:
    74. tcpSocket:
    75. port: 2375
    76. initialDelaySeconds: 10
    77. periodSeconds: 30
    78. securityContext:
    79. privileged: true
    80. env:
    81. - name: DOCKER_HOST
    82. value: tcp://localhost:2375
    83. - name: DOCKER_DRIVER
    84. value: overlay2
    85. - name: DOCKER_TLS_CERTDIR
    86. value: ''
    87. volumeMounts:
    88. - name: docker-graph-storage
    89. mountPath: /var/lib/docker
    90. - name: tz-config
    91. mountPath: /etc/localtime
    92. # kubectl -n kube-system create secret generic harbor-ca --from-file=harbor-ca=/data/harbor/ssl/tls.cert
    93. - name: harbor-ca
    94. mountPath: /etc/docker/certs.d/harbor.boge.com/ca.crt
    95. subPath: harbor-ca
    96. # kubectl create secret docker-registry boge-secret --docker-server=harbor.boge.com --docker-username=admin --docker-password=boge666 --docker-email=admin@boge.com
    97. hostAliases:
    98. - hostnames:
    99. - harbor.boge.com
    100. ip: 10.0.1.204
    101. imagePullSecrets:
    102. - name: bogeharbor
    103. volumes:
    104. # - emptyDir:
    105. # medium: ""
    106. # sizeLimit: 10Gi
    107. - hostPath:
    108. path: /var/lib/container/docker
    109. name: docker-graph-storage
    110. - hostPath:
    111. path: /usr/share/zoneinfo/Asia/Shanghai
    112. name: tz-config
    113. - name: harbor-ca
    114. secret:
    115. secretName: harbor-ca
    116. defaultMode: 0600
    117. #
    118. # kubectl taint node 10.0.1.201 Ingress=:NoExecute
    119. # kubectl describe node 10.0.1.201 |grep -i taint
    120. # kubectl taint node 10.0.1.201 Ingress:NoExecute-
    121. nodeSelector:
    122. kubernetes.io/hostname: "10.0.1.201"
    123. tolerations:
    124. - operator: Exists

    22。#创建证书


    kubectl -n kube-system create secret generic harbor-ca --from-file=harbor-ca=/data/harbor/ssl/tls.cert

    #执行yaml
    kubectl apply -f 10bind.yaml

    #git 添加ssh
    cat /root/.ssh/id_rsa.pub

    024983237bc148b7aafc309a5ec11c6c.png

    1. #Git global setup Git全局设置
    2. git config --global user.name "Administrator"
    3. git config --global user.email "admin@example.com"
    4. #Create a new repository 创建新存储库
    5. git clone git@git.boge.com:root/test.git
    6. cd test
    7. touch README.md
    8. #添加上传文件
    9. git add README.md
    10. #添加描述
    11. git commit -m "add README"
    12. #推送
    13. git push -u origin master
    14. #Push an existing folder 推送现有文件夹
    15. cd existing_folder
    16. git init
    17. git remote add origin git@git.boge.com:root/test.git
    18. git add .
    19. git commit -m "Initial commit"
    20. git push -u origin master
    21. #Push an existing Git repository 推送现有Git存储库
    22. cd existing_repo
    23. git remote rename origin old-origin
    24. git remote add origin git@git.boge.com:root/test.git
    25. git push -u origin --all
    26. git push -u origin --tags

    接着我们找一台机器,这里我们选取10.0.1.201这台机器,加一条本地hosts 10.0.1.204 git.boge.com,来试下推送gitlab代码仓库有无问题,详细操作见本节同名视频课程,希望大家能对着视频自己动手操作一遍,理解上面这些配置的含义,后面可以举一反三,在k8s的其他服务也可以这么来做,达到访问更优的效果。

    添加ssh key:

     在git客户端执行(我是在linux服务器上):

    # ssh-keygen -t ed25519 -C "123123@qq.com"   一直回车
    # cd ~/.ssh/
    # cat id_ed25519.pub

    在git客户端克隆,注意端口号。不需要输入密码:

    #git clone ssh://git@git.boge.com:32155/zoujiaojiao/it.git

    十三、k8s安装kubesphere3.3

    1.部署kubesphere时需要默认 StorageClass

    kubectl edit sc nfs-boge

      metadata:
        annotations:
          storageclass.beta.kubernetes.io/is-default-class: "true"

    2.下载yaml

    wget https://github.com/kubesphere/ks-installer/releases/download/v3.3.0/kubesphere-installer.yaml

    wget https://github.com/kubesphere/ks-installer/releases/download/v3.3.0/cluster-configuration.yaml
     

    #将ectd下的 endpointIps改为你的master节点的私有IP地址。

    endpointIps: 20.120.100.220

    #运行yaml

    kubectl apply -f kubesphere-installer.yaml

    kubectl apply -f cluster-configuration.yaml

    3.查看日志

    kubectl logs -n kubesphere-system $(kubectl get pod -n kubesphere-system -l 'app in (ks-install, ks-installer)' -o jsonpath='{.items[0].metadata.name}') -f

    #访问任意机器的 30880端口
    #账号 : admin
    #密码 : P@88w0rd

    4.解决etcd监控证书找不到问题

    kubectl -n kubesphere-monitoring-system create secret generic kube-etcd-client-certs  --from-file=etcd-client-ca.crt=/etc/kubernetes/pki/etcd/ca.crt  --from-file=etcd-client.crt=/etc/kubernetes/pki/apiserver-etcd-client.crt  --from-file=etcd-client.key=/etc/kubernetes/pki/apiserver-etcd-client.key
     

    5.文件未删除干净

    failed: [localhost] (item={'ns': 'kubesphere-system', 'kind': 'users.iam.kubesphere.io', 'resource': 'admin', 'release': 'ks-core'}) => {"ansible_loop_var": "item", "changed": true, "cmd": "/usr/local/bin/kubectl -n kubesphere-system annotate --overwrite users.iam.kubesphere.io admin meta.helm.sh/release-name=ks-core && /usr/local/bin/kubectl -n kubesphere-system annotate --overwrite users.iam.kubesphere.io admin meta.helm.sh/release-namespace=kubesphere-system && /usr/local/bin/kubectl -n kubesphere-system label --overwrite users.iam.kubesphere.io admin app.kubernetes.io/managed-by=Helm\n", "delta": "0:00:00.675675", "end": "2022-02-10 04:53:09.022419", "failed_when_result": true, "item": {"kind": "users.iam.kubesphere.io", "ns": "kubesphere-system", "release": "ks-core", "resource": "admin"}, "msg": "non-zero return code", "rc": 1, "start": "2022-02-10 04:53:08.346744", "stderr": "Error from server (InternalError): Internal error occurred: failed calling webhook \"users.iam.kubesphere.io\": Post \"https://ks-controller-manager.kubesphere-system.svc:443/validate-email-iam-kubesphere-io-v1alpha2?timeout=30s\": service \"ks-controller-manager\" not found", "stderr_lines": ["Error from server (InternalError): Internal error occurred: failed calling webhook \"users.iam.kubesphere.io\": Post \"https://ks-controller-manager.kubesphere-system.svc:443/validate-email-iam-kubesphere-io-v1alpha2?timeout=30s\": service \"ks-controller-manager\" not found"], "stdout": "", "stdout_lines": []}

    参考   https://github.com/kubesphere/ks-installer/blob/master/scripts/kubesphere-delete.sh 将sh文件下载到master节点,然后删除后重新安装

    del.sh

    1. #!/usr/bin/env bash
    2. function delete_sure(){
    3. cat << eof
    4. $(echo -e "\033[1;36mNote:\033[0m")
    5. Delete the KubeSphere cluster, including the module kubesphere-system kubesphere-devops-system kubesphere-devops-worker kubesphere-monitoring-system kubesphere-logging-system openpitrix-system.
    6. eof
    7. read -p "Please reconfirm that you want to delete the KubeSphere cluster. (yes/no) " ans
    8. while [[ "x"$ans != "xyes" && "x"$ans != "xno" ]]; do
    9. read -p "Please reconfirm that you want to delete the KubeSphere cluster. (yes/no) " ans
    10. done
    11. if [[ "x"$ans == "xno" ]]; then
    12. exit
    13. fi
    14. }
    15. delete_sure
    16. # delete ks-installer
    17. kubectl delete deploy ks-installer -n kubesphere-system 2>/dev/null
    18. # delete helm
    19. for namespaces in kubesphere-system kubesphere-devops-system kubesphere-monitoring-system kubesphere-logging-system openpitrix-system kubesphere-monitoring-federated
    20. do
    21. helm list -n $namespaces | grep -v NAME | awk '{print $1}' | sort -u | xargs -r -L1 helm uninstall -n $namespaces 2>/dev/null
    22. done
    23. # delete kubefed
    24. kubectl get cc -n kubesphere-system ks-installer -o jsonpath="{.status.multicluster}" | grep enable
    25. if [[ $? -eq 0 ]]; then
    26. # delete kubefed types resources
    27. for kubefed in `kubectl api-resources --namespaced=true --api-group=types.kubefed.io -o name`
    28. do
    29. kubectl delete -n kube-federation-system $kubefed --all 2>/dev/null
    30. done
    31. for kubefed in `kubectl api-resources --namespaced=false --api-group=types.kubefed.io -o name`
    32. do
    33. kubectl delete $kubefed --all 2>/dev/null
    34. done
    35. # delete kubefed core resouces
    36. for kubefed in `kubectl api-resources --namespaced=true --api-group=core.kubefed.io -o name`
    37. do
    38. kubectl delete -n kube-federation-system $kubefed --all 2>/dev/null
    39. done
    40. for kubefed in `kubectl api-resources --namespaced=false --api-group=core.kubefed.io -o name`
    41. do
    42. kubectl delete $kubefed --all 2>/dev/null
    43. done
    44. # uninstall kubefed chart
    45. helm uninstall -n kube-federation-system kubefed 2>/dev/null
    46. fi
    47. helm uninstall -n kube-system snapshot-controller 2>/dev/null
    48. # delete kubesphere deployment & statefulset
    49. kubectl delete deployment -n kubesphere-system `kubectl get deployment -n kubesphere-system -o jsonpath="{.items[*].metadata.name}"` 2>/dev/null
    50. kubectl delete statefulset -n kubesphere-system `kubectl get statefulset -n kubesphere-system -o jsonpath="{.items[*].metadata.name}"` 2>/dev/null
    51. # delete monitor resources
    52. kubectl delete prometheus -n kubesphere-monitoring-system k8s 2>/dev/null
    53. kubectl delete Alertmanager -n kubesphere-monitoring-system main 2>/dev/null
    54. kubectl delete DaemonSet -n kubesphere-monitoring-system node-exporter 2>/dev/null
    55. kubectl delete statefulset -n kubesphere-monitoring-system `kubectl get statefulset -n kubesphere-monitoring-system -o jsonpath="{.items[*].metadata.name}"` 2>/dev/null
    56. # delete grafana
    57. kubectl delete deployment -n kubesphere-monitoring-system grafana 2>/dev/null
    58. kubectl --no-headers=true get pvc -n kubesphere-monitoring-system -o custom-columns=:metadata.namespace,:metadata.name | grep -E kubesphere-monitoring-system | xargs -n2 kubectl delete pvc -n 2>/dev/null
    59. # delete pvc
    60. pvcs="kubesphere-system|openpitrix-system|kubesphere-devops-system|kubesphere-logging-system"
    61. kubectl --no-headers=true get pvc --all-namespaces -o custom-columns=:metadata.namespace,:metadata.name | grep -E $pvcs | xargs -n2 kubectl delete pvc -n 2>/dev/null
    62. # delete rolebindings
    63. delete_role_bindings() {
    64. for rolebinding in `kubectl -n $1 get rolebindings -l iam.kubesphere.io/user-ref -o jsonpath="{.items[*].metadata.name}"`
    65. do
    66. kubectl -n $1 delete rolebinding $rolebinding 2>/dev/null
    67. done
    68. }
    69. # delete roles
    70. delete_roles() {
    71. kubectl -n $1 delete role admin 2>/dev/null
    72. kubectl -n $1 delete role operator 2>/dev/null
    73. kubectl -n $1 delete role viewer 2>/dev/null
    74. for role in `kubectl -n $1 get roles -l iam.kubesphere.io/role-template -o jsonpath="{.items[*].metadata.name}"`
    75. do
    76. kubectl -n $1 delete role $role 2>/dev/null
    77. done
    78. }
    79. # remove useless labels and finalizers
    80. for ns in `kubectl get ns -o jsonpath="{.items[*].metadata.name}"`
    81. do
    82. kubectl label ns $ns kubesphere.io/workspace-
    83. kubectl label ns $ns kubesphere.io/namespace-
    84. kubectl patch ns $ns -p '{"metadata":{"finalizers":null,"ownerReferences":null}}'
    85. delete_role_bindings $ns
    86. delete_roles $ns
    87. done
    88. # delete clusterroles
    89. delete_cluster_roles() {
    90. for role in `kubectl get clusterrole -l iam.kubesphere.io/role-template -o jsonpath="{.items[*].metadata.name}"`
    91. do
    92. kubectl delete clusterrole $role 2>/dev/null
    93. done
    94. for role in `kubectl get clusterroles | grep "kubesphere" | awk '{print $1}'| paste -sd " "`
    95. do
    96. kubectl delete clusterrole $role 2>/dev/null
    97. done
    98. }
    99. delete_cluster_roles
    100. # delete clusterrolebindings
    101. delete_cluster_role_bindings() {
    102. for rolebinding in `kubectl get clusterrolebindings -l iam.kubesphere.io/role-template -o jsonpath="{.items[*].metadata.name}"`
    103. do
    104. kubectl delete clusterrolebindings $rolebinding 2>/dev/null
    105. done
    106. for rolebinding in `kubectl get clusterrolebindings | grep "kubesphere" | awk '{print $1}'| paste -sd " "`
    107. do
    108. kubectl delete clusterrolebindings $rolebinding 2>/dev/null
    109. done
    110. }
    111. delete_cluster_role_bindings
    112. # delete clusters
    113. for cluster in `kubectl get clusters -o jsonpath="{.items[*].metadata.name}"`
    114. do
    115. kubectl patch cluster $cluster -p '{"metadata":{"finalizers":null}}' --type=merge
    116. done
    117. kubectl delete clusters --all 2>/dev/null
    118. # delete workspaces
    119. for ws in `kubectl get workspaces -o jsonpath="{.items[*].metadata.name}"`
    120. do
    121. kubectl patch workspace $ws -p '{"metadata":{"finalizers":null}}' --type=merge
    122. done
    123. kubectl delete workspaces --all 2>/dev/null
    124. # make DevOps CRs deletable
    125. for devops_crd in $(kubectl get crd -o=jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' | grep "devops.kubesphere.io"); do
    126. for ns in $(kubectl get ns -ojsonpath='{.items..metadata.name}'); do
    127. for devops_res in $(kubectl get $devops_crd -n $ns -oname); do
    128. kubectl patch $devops_res -n $ns -p '{"metadata":{"finalizers":[]}}' --type=merge
    129. done
    130. done
    131. done
    132. # delete validatingwebhookconfigurations
    133. for webhook in ks-events-admission-validate users.iam.kubesphere.io network.kubesphere.io validating-webhook-configuration resourcesquotas.quota.kubesphere.io
    134. do
    135. kubectl delete validatingwebhookconfigurations.admissionregistration.k8s.io $webhook 2>/dev/null
    136. done
    137. # delete mutatingwebhookconfigurations
    138. for webhook in ks-events-admission-mutate logsidecar-injector-admission-mutate mutating-webhook-configuration
    139. do
    140. kubectl delete mutatingwebhookconfigurations.admissionregistration.k8s.io $webhook 2>/dev/null
    141. done
    142. # delete users
    143. for user in `kubectl get users -o jsonpath="{.items[*].metadata.name}"`
    144. do
    145. kubectl patch user $user -p '{"metadata":{"finalizers":null}}' --type=merge
    146. done
    147. kubectl delete users --all 2>/dev/null
    148. # delete helm resources
    149. for resource_type in `echo helmcategories helmapplications helmapplicationversions helmrepos helmreleases`; do
    150. for resource_name in `kubectl get ${resource_type}.application.kubesphere.io -o jsonpath="{.items[*].metadata.name}"`; do
    151. kubectl patch ${resource_type}.application.kubesphere.io ${resource_name} -p '{"metadata":{"finalizers":null}}' --type=merge
    152. done
    153. kubectl delete ${resource_type}.application.kubesphere.io --all 2>/dev/null
    154. done
    155. # delete workspacetemplates
    156. for workspacetemplate in `kubectl get workspacetemplates.tenant.kubesphere.io -o jsonpath="{.items[*].metadata.name}"`
    157. do
    158. kubectl patch workspacetemplates.tenant.kubesphere.io $workspacetemplate -p '{"metadata":{"finalizers":null}}' --type=merge
    159. done
    160. kubectl delete workspacetemplates.tenant.kubesphere.io --all 2>/dev/null
    161. # delete federatednamespaces in namespace kubesphere-monitoring-federated
    162. for resource in $(kubectl get federatednamespaces.types.kubefed.io -n kubesphere-monitoring-federated -oname); do
    163. kubectl patch "${resource}" -p '{"metadata":{"finalizers":null}}' --type=merge -n kubesphere-monitoring-federated
    164. done
    165. # delete crds
    166. for crd in `kubectl get crds -o jsonpath="{.items[*].metadata.name}"`
    167. do
    168. if [[ $crd == *kubesphere.io ]] || [[ $crd == *kubefed.io ]] ; then kubectl delete crd $crd 2>/dev/null; fi
    169. done
    170. # delete relevance ns
    171. for ns in kube-federation-system kubesphere-alerting-system kubesphere-controls-system kubesphere-devops-system kubesphere-devops-worker kubesphere-logging-system kubesphere-monitoring-system kubesphere-monitoring-federated openpitrix-system kubesphere-system
    172. do
    173. kubectl delete ns $ns 2>/dev/null
    174. done

    6.修改镜像拉去规则
     

    kubectl -n kubesphere-system edit deployments.apps ks-apiserver

    kubectl -n kubesphere-system edit deployments.apps ks-console

    kubectl -n kubesphere-system edit deployments.apps ks-controller-manager

    IfNotPresent

    只有当镜像在本地不存在时才会拉取

    参考博客:https://www.toutiao.com/c/user/token/MS4wLjABAAAA0YFomuMNm87NNysXeUsQdI0Tt3gOgz8WG_0B3MzxsmI/?is_new_connect=0&is_new_user=0&tab=article&wid=1649086635305

  • 相关阅读:
    绿色便携方式安装apache+mysql+tomcat+php集成环境并提供控制面板
    记一次服务器Cuda驱动崩溃修复过程
    Redis(11)Hyperloglog
    墨香戏韵,重塑经典
    SpringCloud微服务之基于zuul搭建服务网关
    【C++】vector 的常用接口
    Centos 安装/操作 Docker
    Hive mapjoin使用
    非 root 用户安装和配置 NodeJS
    PyQt5可视化编程-菜单和工具栏
  • 原文地址:https://blog.csdn.net/qq_35583325/article/details/126950754