主机名 | 系统 | 挂载情况 | 网卡ens33,Lnet的IP地址 | Lustre集群名 | 内存 |
---|---|---|---|---|---|
mds005 | Centos7.9 | (共享磁盘)1个mgs,1个MDT,2个OST | 192.168.10.25/209.21 | global | 2G |
mds006 | Centos7.9 | (共享磁盘)1个mgs,1个MDT,2个OST | 192.168.10.26/209.22 | global | 2G |
manager | Centos7.9 | 无 | 192.168.10.5 | 无 | 2G |
注意:自动化脚本lustre_auto.sh在manager节点上,五块4G共享磁盘(/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde,/dev/sdf),两个网卡(管理IP地址,Lnet通信IP地址)
- # ens33上的IP地址,即集群管理IP地址
- host_address=(192.168.10.25 192.168.10.26)
- # ens38上的IP地址(其他也可以,两个节点网卡名一样就行),即Lnet通信IP地址
- lnet_address=(192.168.209.25 192.168.209.26)
- # 集群节点的主机名和域名
- host_hostname=(mds005 mds006)
- # ssh连接密码,集群认证密码
- host_passwd=110119
- # 安装expect命令
- expect -v &> /dev/null
- if [ `echo $?` -ne 0 ];then
- echo "没有expect,安装expect命令"
- yum install -y expect
- fi
- # 配置免密登录
- echo "########################## 本地开始配置ssh ##########################"
- if [ `test -a ~/.ssh/id_rsa.pub;echo $?` == 0 ];then
- echo "ssh公钥已创建"
- else
- echo "ssh公钥未创建,开始创建"
- /usr/bin/expect << eof
- # 设置捕获字符串后,期待回复的超时时间
- set timeout 10
-
- spawn ssh-keygen -t rsa -b 1024
-
- ## 开始进连续捕获
- expect {
- "connecting (yes/no)?" { send "yes\n"; exp_continue }
- "s password:" { send "${host_passwd}\n"; exp_continue }
- ".ssh/id_rsa)" { send "\n"; exp_continue }
- "Overwrite (y/n)?" { send "y\n"; exp_continue }
- "no passphrase):" { send "\n"; exp_continue }
- "passphrase again:" { send "\n"; exp_continue }
- }
- eof
- fi
-
-
-
- # 本地的密钥开始加入被控制主机
- for ((j=0;j<2;j++));do
- echo "########################## ${host_address[j]}正在被添加公钥 ##########################"
- /usr/bin/expect << eof
- # 设置捕获字符串后,期待回复的超时时间
- set timeout 10
-
- spawn ssh-copy-id -i /root/.ssh/id_rsa.pub root@${host_address[j]}
-
- ## 开始进连续捕获
- expect {
- "connecting (yes/no)?" { send "yes\n"; exp_continue }
- "s password:" { send "${host_passwd}\n"; exp_continue }
- }
- eof
- echo "############# ${host_address[j]}配置完毕 #############"
- done
-
-
-
-
- # 被控制主机开始创建密钥
- for ((j=0;j<2;j++));do
- echo "########################## ${host_address[j]}开始创建密钥 ##########################"
- if [ `ssh root@${host_address[j]} 'test -a ~/.ssh/id_rsa.pub;echo $?'` == 0 ];then
- echo "ssh公钥已创建"
- else
- echo "ssh公钥未创建,开始创建"
- /usr/bin/expect << eof
- # 设置捕获字符串后,期待回复的超时时间
- set timeout 10
-
- spawn ssh root@${host_address[j]} "ssh-keygen -t rsa -b 1024"
-
- ## 开始进连续捕获
- expect {
- "connecting (yes/no)?" { send "yes\n"; exp_continue }
- "s password:" { send "${host_passwd}\n"; exp_continue }
- ".ssh/id_rsa)" { send "\n"; exp_continue }
- "Overwrite (y/n)?" { send "y\n"; exp_continue }
- "no passphrase):" { send "\n"; exp_continue }
- "passphrase again:" { send "\n"; exp_continue }
- }
- eof
- fi
- echo "############# ${host_address[j]}配置完毕 #############"
- done
-
-
-
- # 被控制主机开始分配密钥
- for ((j=0;j<2;j++));do
- for((k=0;k<2;k++));do
- echo "########################## ${host_address[j]}开始分配公钥给${host_address[k]} ##########################"
- /usr/bin/expect << eof
- # 设置捕获字符串后,期待回复的超时时间
- set timeout 10
-
- spawn ssh -t root@${host_address[j]} "ssh root@${host_address[k]}"
- spawn ssh -t root@${host_address[j]} "ssh-copy-id -i /root/.ssh/id_rsa.pub root@${host_address[k]}"
-
- ## 开始进连续捕获
- expect {
- "connecting (yes/no)?" { send "yes\n"; exp_continue }
- "s password:" { send "${host_passwd}\n"; exp_continue }
- }
- eof
- echo "############# ${host_address[j]}配置完毕 #############"
- done
- done
- for ((i=0;i<100;i++));do
- read -p "修改主机名和配置域名映射?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始配置主机名和域名映射 ##########################"
- for ((j=0;j<2;j++));do
- if [ `ssh root@${host_address[j]} "hostname"` != "${host_hostname[j]}" ];then
- ssh root@${host_address[j]} "hostnamectl set-hostname ${host_hostname[j]}"
- fi
- ssh root@${host_address[j]} "cat << eof > /etc/hosts
- 127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
- ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
- eof"
- for ((k=0;k<2;k++));do
- ssh root@${host_address[j]} "echo '${host_address[k]} ${host_hostname[k]}' >> /etc/hosts"
- done
- done
- echo "############# 配置完毕 #############"
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-修改主机名和配置域名映射 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
- # 测试被控主机ssh连接
- for ((j=0;j<2;j++));do
- echo "########################## ${host_address[j]}开始测试 ##########################"
- for ((k=0;k<2;k++));do
- /usr/bin/expect << eof
- # 设置捕获字符串后,期待回复的超时时间
- set timeout 10
-
- spawn ssh -t root@${host_address[j]} "ssh root@${host_address[k]} 'exit'"
-
- ## 开始进连续捕获
- expect {
- "connecting (yes/no)?" { send "yes\n"; exp_continue }
- }
- eof
- if [ `echo $?` != 0 ];then
- echo "${host_hostname[j]}主机无法免密登录${host_hostname[k]}"
- exit
- fi
- done
- echo "############# ${host_address[j]}测试完毕 #############"
- done
- # 配置防火墙和selinux
- for ((i=0;i<100;i++));do
- read -p "配置防火墙和selinux?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始配置防火墙和selinux ##########################"
- for ((j=0;j<2;j++));do
- ssh root@${host_address[j]} "systemctl stop firewalld;systemctl disable firewalld"
- ssh root@${host_address[j]} "sed -i 's/SELINUX=.*/SELINUX=disabled/' /etc/selinux/config"
- done
- echo "############# 配置完毕 #############"
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-配置防火墙和selinux #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
- # 配置yum源
- for ((i=0;i<100;i++));do
- read -p "配置yum源?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始配置yum源 ##########################"
- for ((j=0;j<2;j++));do
- echo "########################## 配置${host_address[j]}的本地yum源 ##########################"
- ssh root@${host_address[j]} "mkdir /mnt/cdrom &> /dev/null;mount /dev/cdrom /mnt/cdrom"
- if [ -z "`ssh root@${host_address[j]} "grep '^\/dev\/cdrom' /etc/fstab"`" ];then
- ssh root@${host_address[j]} "cat << eof >> /etc/fstab
- /dev/cdrom /mnt/cdrom iso9660 defaults 0 0
- eof"
- fi
- ssh root@${host_address[j]} "cat << eof > /etc/yum.repos.d/centos-local.repo
- [centos7.9]
- name=centos7.9
- baseurl=file:///mnt/cdrom
- enabled=1
- gpgcheck=0
- eof"
- echo "############# ${host_address[j]}配置完毕 #############"
-
- echo "########################## 配置${host_address[j]}的扩展源 ##########################"
- ssh root@${host_address[j]} "yum install epel-release -y"
- echo "############# ${host_address[j]}配置完毕 #############"
-
- echo "########################## 配置${host_address[j]}的阿里yum源 ##########################"
- ssh root@${host_address[j]} "yum install -y wget"
- if [ `ssh root@${host_address[j]} 'test -a /etc/yum.repos.d/CentOS-Base.repo;echo $?'` == 0 ];then
- ssh root@${host_address[j]} "wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo"
- fi
- ssh root@${host_address[j]} "yum clean all && yum repolist"
- echo "############# ${host_address[j]}配置完毕 #############"
-
- if [ `ssh root@${host_address[j]} "echo $?"` != 0 ];then
- echo "yum源配置有误,退出执行脚本"
- exit
- fi
- done
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-配置yum源 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
- # 配置chrony时间服务器
- for ((i=0;i<100;i++));do
- read -p "配置chrony时间服务器?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始配置chrony ##########################"
- for ((j=0;j<2;j++));do
- if [ `ssh root@${host_address[j]} "systemctl restart chronyd;echo $?"` != 0 ];then
- echo "${host_address[j]} 安装chrony"
- ssh root@${host_address[j]} "yum install -y chrony && systemctl restart chronyd"
- if [ `echo $?` != 0 ];then
- echo "安装失败,请排错!"
- exit
- fi
- fi
- echo "${host_address[j]}配置chrony"
- ssh root@${host_address[j]} "sed -i '/^server/d' /etc/chrony.conf"
- if [ ${host_address[j]} == ${host_address[0]} ];then
- ssh root@${host_address[j]} "sed -i '2a\server '"${host_address[0]}"' iburst\' /etc/chrony.conf"
- ssh root@${host_address[j]} "sed -i 's/#allow 192.168.0.0\/16/allow 192.168.10.0\/16/' /etc/chrony.conf"
- ssh root@${host_address[j]} "sed -i 's/#local stratum 10/local stratum 10/' /etc/chrony.conf"
- sleep 2
-
- else
- ssh root@${host_address[j]} "sed -i '2a\server '"${host_address[0]}"' iburst\' /etc/chrony.conf"
- fi
- ssh root@${host_address[j]} "systemctl restart chronyd && systemctl enable chronyd &> /dev/null"
- sleep 5
- ssh root@${host_address[j]} "timedatectl set-ntp true && chronyc sources -v | sed -n '/^\^\*/p'"
- if [ -z "`ssh root@${host_address[j]} "chronyc sources -v | sed -n '/^\^\*/p'"`" ];then
- echo -e "\e[31m此节点${host_address[j]}的chrony配置有误,请手动调试\e[0m"
- exit
- fi
- echo "############# ${host_address[j]}配置完毕 #############"
- done
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-配置chrony时间服务器 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
- # 安装e2fsprogs
- for ((i=0;i<100;i++));do
- read -p "安装e2fsprogs?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始安装e2fsprogs ##########################"
- for ((j=0;j<2;j++));do
-
- echo "########################## ${host_address[j]}开始安装e2fsprogs ##########################"
- ssh root@${host_address[j]} "rm -rf ~/e2fsprogs1.44.5 && mkdir ~/e2fsprogs1.44.5"
- ssh root@${host_address[j]} "wget -c -r -nd https://downloads.whamcloud.com/public/e2fsprogs/1.44.5.wc1/el7/RPMS/x86_64/ -P ~/e2fsprogs1.44.5"
- ssh root@${host_address[j]} "rm -rf ~/e2fsprogs1.44.5/index.html* ~/e2fsprogs1.44.5/unknown.gif ~/e2fsprogs1.44.5/*.gif ~/e2fsprogs1.44.5/sha256sum"
- ssh root@${host_address[j]} "rpm -Uvh ~/e2fsprogs1.44.5/* --force"
- ssh root@${host_address[j]} "rpm -qa | grep e2fsprogs"
- if [ `echo $?` != 0 ];then
- echo "安装失败,请排错!"
- exit
- fi
- echo "############# ${host_address[j]}配置完毕 #############"
- done
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-安装e2fsprogs #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
- for ((i=0;i<100;i++));do
- read -p "安装Lustre软件?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始安装lustre ##########################"
- for ((j=0;j<2;j++));do
- echo "########################## ${host_address[j]}开始安装lustre ##########################"
- ssh root@${host_address[j]} "yum install -y linux-firmware dracut selinux-policy-targeted kexec-tools libyaml perl"
- ssh root@${host_address[j]} "rm -rf ~/lustre2.12.1 && mkdir ~/lustre2.12.1"
- ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kernel-3.10.0-957.10.1.el7_lustre.x86_64.rpm -P ~/lustre2.12.1"
- ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kmod-lustre-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
- ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kmod-lustre-osd-ldiskfs-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
- ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
- ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-osd-ldiskfs-mount-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
- ssh root@${host_address[j]} "rpm -Uvh ~/lustre2.12.1/*.rpm --force"
- ssh root@${host_address[j]} "rpm -qa | grep lustre"
- if [ `echo $?` != 0 ];then
- echo "安装失败,请排错!"
- exit
- fi
- echo "############# ${host_address[j]}配置完毕 #############"
- done
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-安装lustre #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
- for ((i=0;i<100;i++));do
- read -p "是否重启集群主机(只有重启kernel内核才能更换生效)?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- for((j=0;j<2;j++));do
- ssh root@${host_address[j]} "reboot"
- continue
- done
-
- for ((k=0;k<100;k++));do
- if [ ${k} -eq 99 ];then
- echo "############# 设备连接超时.... #############"
- exit
- fi
- if [ `ssh root@${host_address[0]} -o ConnectTimeout=5 "exit";echo $?` == 0 -a `ssh root@${host_address[1]} -o ConnectTimeout=5 "exit";echo $?` == 0 ];then
- echo "############# 设备已重启 #############"
- break
- else
- echo "############# 设备正在重启 #############"
- fi
- done
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-重启 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
- # 检查lustre
- for((i=0;i<2;i++));do
- echo "########################## ${host_address[i]}加载Lustre模块,查看Lustre版本 ##########################"
- ssh root@${host_address[i]} "modprobe lustre && lsmod | grep lustre"
- ssh root@${host_address[i]} "modinfo lustre"
- echo "############# ${host_address[i]}配置完毕 #############"
- done
- # 配置Lnet网卡的IP地址和Lustre网络
- for ((i=0;i<100;i++));do
- read -p "是否继续配置Lnet网卡的IP地址?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- for((j=0;j<2;j++));do
- echo "########################## ${host_address[j]}配置Lnet网卡的IP地址 ##########################"
- read -p "请根据以上输出显示中,输入你要配置的LnNet网卡名称:" network_card
- if [ -z "`ssh root@${host_address[j]} "ip addr | grep -o ${network_card}"`" ];then
- echo "网卡不存在,请重试"
- exit
- fi
- ssh root@${host_address[j]} "nmcli connection delete ${network_card} &> /dev/null"
- ssh root@${host_address[j]} "nmcli connection add type ethernet con-name ${network_card} ifname ${network_card} ipv4.method manual ipv4.addresses '${lnet_address[j]}/24' autoconnect yes"
- ssh root@${host_address[j]} "nmcli connection up ${network_card}"
- echo "############# ${host_address[j]}配置完毕 #############"
- echo "########################## ${host_address[j]}配置Lnet网络 ##########################"
- ssh root@${host_address[j]} "echo options lnet networks='tcp(ens33),tcp2(${network_card})' > /etc/modprobe.d/lustre.conf"
- ssh root@${host_address[j]} "lustre_rmmod && modprobe -v lustre"
- echo "############# ${host_address[j]}配置完毕 #############"
- echo "############# ${host_address[j]}查看Lnet网络 #############"
- ssh root@${host_address[j]} "lctl list_nids"
- echo "##########################################################"
- done
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-配置Lnet网卡的IP地址 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
- # 格式化lustre
- for ((i=0;i<100;i++));do
- read -p "是否继续格式化lustre?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始格式化lustre ##########################"
- ssh root@${host_address[0]} "lsblk"
- ssh root@${host_address[0]} "mkfs.lustre --mgs --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdb"
- ssh root@${host_address[0]} "mkfs.lustre --fsname global --mdt --index=0 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdc"
- ssh root@${host_address[0]} "mkfs.lustre --fsname global --mdt --index=1 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdd"
- ssh root@${host_address[0]} "mkfs.lustre --fsname global --ost --index=0 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sde"
- ssh root@${host_address[0]} "mkfs.lustre --fsname global --ost --index=1 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdf"
- echo "############# 格式化完毕 #############"
- echo "########################## 创建挂载点目录 ##########################"
- for((j=0;j<2;j++));do
- ssh root@${host_address[j]} "mkdir /mnt/mgs &> /dev/null;mkdir /mnt/mdt1 &> /dev/null;mkdir /mnt/mdt2 &> /dev/null;mkdir /mnt/ost1 &> /dev/null;mkdir /mnt/ost2 &> /dev/null"
- done
- echo "############# 创建完毕 #############"
- echo "############# ${host_address[1]}查看格式化 #############"
- ssh root@${host_address[1]} "blkid /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf"
- echo "##########################################################"
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-格式化lustre #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
- # 测试挂载
- for ((i=0;i<100;i++));do
- read -p "是否继续测试lustre的挂载?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- for((j=0;j<2;j++));do
- echo "########################## ${host_address[j]}测试挂载 ##########################"
- echo "########################## mgs测试挂载 ##########################"
- ssh root@${host_address[j]} "umount /mnt/mgs &> /dev/null"
- ssh root@${host_address[j]} "mount -t lustre /dev/sdb /mnt/mgs"
- if [ `echo $?` != 0 ];then
- echo "############# mgs测试有误,请手动查看! #############"
- exit
- fi
- echo "########################## mdt1测试挂载 ##########################"
- ssh root@${host_address[j]} "umount /mnt/mdt1 &> /dev/null"
- ssh root@${host_address[j]} "mount -t lustre /dev/sdc /mnt/mdt1"
- if [ `echo $?` != 0 ];then
- echo "############# mdt1测试有误,请手动查看! #############"
- ssh root@${host_address[j]} "umount /mnt/mgs"
- exit
- fi
- echo "########################## mdt2测试挂载 ##########################"
- ssh root@${host_address[j]} "umount /mnt/mdt2 &> /dev/null"
- ssh root@${host_address[j]} "mount -t lustre /dev/sdd /mnt/mdt2"
- if [ `echo $?` != 0 ];then
- echo "############# mdt2测试有误,请手动查看! #############"
- ssh root@${host_address[j]} "umount /mnt/mdt1;umount /mnt/mgs"
- exit
- fi
- echo "########################## ost1测试挂载 ##########################"
- ssh root@${host_address[j]} "umount /mnt/ost1 &> /dev/null"
- ssh root@${host_address[j]} "mount -t lustre /dev/sde /mnt/ost1"
- if [ `echo $?` != 0 ];then
- echo "############# ost1测试有误,请手动查看! #############"
- ssh root@${host_address[j]} "umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
- exit
- fi
- echo "########################## ost2测试挂载 ##########################"
- ssh root@${host_address[j]} "umount /mnt/ost2 &> /dev/null"
- ssh root@${host_address[j]} "mount -t lustre /dev/sdf /mnt/ost2"
- if [ `echo $?` != 0 ];then
- echo "############# ost2测试有误,请手动查看! #############"
- ssh root@${host_address[j]} "umount /mnt/ost1;umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
- exit
- fi
- ssh root@${host_address[j]} "umount /mnt/ost2;umount /mnt/ost1;umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
- done
- echo "############# 测试完毕 #############"
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-测试挂载 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
- # 安装packemaker和corosync软件和创建集群
- for ((i=0;i<100;i++));do
- read -p "是否继续安装packemaker和corosync软件和创建集群?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- if [ -z "`ssh root@${host_address[0]} 'pcs status' | grep mycluster`" ];then
- for((j=0;j<2;j++));do
- echo "########################## ${host_address[j]}开始安装 ##########################"
- ssh root@${host_address[j]} "yum install pacemaker pcs policycoreutils-python -y"
- echo "############# ${host_address[j]}安装完毕 #############"
- echo "########################## ${host_address[j]}开始配置 ##########################"
- ssh root@${host_address[j]} "systemctl enable pcsd;systemctl restart pcsd"
- ssh root@${host_address[j]} "echo '${host_passwd}' |passwd --stdin hacluster"
- echo "############# ${host_address[j]}配置完毕 #############"
- done
- /usr/bin/expect << eof
- # 设置捕获字符串后,期待回复的超时时间
- set timeout 10
-
- spawn ssh root@${host_address[0]} "pcs cluster auth ${host_hostname[*]}"
-
- ## 开始进连续捕获
- expect {
- "Username:" { send "hacluster\n"; exp_continue }
- "Password:" { send "${host_passwd}\n"; exp_continue }
- }
- eof
- echo "########################## 开始创建集群 ##########################"
- ssh root@${host_address[0]} "pcs cluster setup --name mylustre ${host_hostname[*]}"
- echo "############# 创建完毕 #############"
- fi
- echo "########################## 启动集群 ##########################"
- ssh root@${host_address[0]} "pcs cluster start --all"
- echo "############# 启动完毕 #############"
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-安装packemaker和corosync软件和配置 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
-
- # 配置资源防护
- for ((i=0;i<100;i++));do
- read -p "是否继续配置资源防护?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始配置资源防护 ##########################"
- for((j=0;j<2;j++));do
- ssh root@${host_address[j]} "yum install -y fence-agents-all"
- done
- ssh root@${host_address[0]} "pcs property set stonith-enabled=true"
- if [ `ssh root@${host_address[j]} "pcs status" | grep "stonith:fence_heuristics_ping" | grep -c "Started"` -eq 2 ];then
- echo "############# stonith已创建,并且正常运行,跳过配置stonith #############"
- break
- fi
- if [ `ssh root@${host_address[j]} "pcs status" | grep "stonith:fence_heuristics_ping"` -eq 2 ];then
- ssh root@${host_address[0]} "pcs stonith delete stonith-ping-${host_hostname[0]}"
- ssh root@${host_address[0]} "pcs stonith delete stonith-ping-${host_hostname[0]}"
- fi
-
- ssh root@${host_address[0]} "pcs stonith create stonith-ping-${host_hostname[0]} fence_heuristics_ping ping_targets=${host_address[0]}"
- ssh root@${host_address[0]} "pcs stonith create stonith-ping-${host_hostname[1]} fence_heuristics_ping ping_targets=${host_address[1]}"
- ssh root@${host_address[0]} "pcs status"
- echo "############# 配置完毕 #############"
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-配置资源防护 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
- # 创建lustre资源
- for ((i=0;i<100;i++));do
- read -p "是否继续创建lustre资源(可删除后再创建)?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- for((j=0;j<2;j++));do
- if [ -z `ssh root@${host_address[j]} 'rpm -qa | grep lustre-resource-agents-2.12.1-1'` ];then
- echo "########################## ${host_address[j]}安装ocf:lustre:Lustre包 ##########################"
- ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-resource-agents-2.12.1-1.el7.x86_64.rpm"
- ssh root@${host_address[j]} "rpm -ivh lustre-resource-agents-2.12.1-1.el7.x86_64.rpm"
- echo "############# 安装完毕 #############"
- fi
- done
- echo "########################## 开始删除lustre资源 ##########################"
- ssh root@${host_address[0]} "pcs resource delete global-ost2 &> /dev/null;pcs resource delete global-ost1 &> /dev/null"
- ssh root@${host_address[0]} "pcs resource delete global-mdt1 &> /dev/null;pcs resource delete global-mdt2 &> /dev/null"
- ssh root@${host_address[0]} "pcs resource delete global-mgs &> /dev/null"
- echo "############# 删除完毕 #############"
- echo "########################## 开始创建lustre资源 ##########################"
- ssh root@${host_address[0]} "pcs resource create global-mgs ocf:lustre:Lustre target=/dev/sdb mountpoint=/mnt/mgs"
- if [ `echo $?` != 0 ];then
- echo "############# mgs资源创建有误,请手动查看! #############"
- exit
- fi
- ssh root@${host_address[0]} "pcs resource create global-mdt1 ocf:lustre:Lustre target=/dev/sdc mountpoint=/mnt/mdt1"
- if [ `echo $?` != 0 ];then
- echo "############# mdt1资源创建有误,请手动查看! #############"
- exit
- fi
- ssh root@${host_address[0]} "pcs resource create global-mdt2 ocf:lustre:Lustre target=/dev/sdd mountpoint=/mnt/mdt2"
- if [ `echo $?` != 0 ];then
- echo "############# mdt2资源创建有误,请手动查看! #############"
- exit
- fi
- ssh root@${host_address[0]} "pcs resource create global-ost1 ocf:lustre:Lustre target=/dev/sde mountpoint=/mnt/ost1"
- if [ `echo $?` != 0 ];then
- echo "############# ost1资源创建有误,请手动查看! #############"
- exit
- fi
- ssh root@${host_address[0]} "pcs resource create global-ost2 ocf:lustre:Lustre target=/dev/sdf mountpoint=/mnt/ost2"
- if [ `echo $?` != 0 ];then
- echo "############# ost2资源创建有误,请手动查看! #############"
- exit
- fi
- ssh root@${host_address[0]} "pcs constraint location add global-constraint-mgs global-mgs ${host_hostname[0]} 10"
- ssh root@${host_address[0]} "pcs constraint location add global-constraint-mdt1 global-mdt1 ${host_hostname[0]} 10"
- ssh root@${host_address[0]} "pcs constraint location add global-constraint-mdt2 global-mdt2 ${host_hostname[1]} 10"
- ssh root@${host_address[0]} "pcs constraint location add global-constraint-ost1 global-ost1 ${host_hostname[0]} 10"
- ssh root@${host_address[0]} "pcs constraint location add global-constraint-ost2 global-ost2 ${host_hostname[1]} 10"
- ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-mdt1"
- ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-mdt2"
- ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-ost1"
- ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-ost2"
- if [ `echo $?` != 0 ];then
- echo "############# 资源约束创建有误,请手动查看! #############"
- exit
- fi
- lnet_name1=`ssh root@${host_address[0]} "awk '{print $3}' /etc/modprobe.d/lustre.conf" | awk -F ',' '{print $2}' | awk -F '(' '{print $2}' | awk -F ')' '{print $1}'`
- lnet_name2=`ssh root@${host_address[1]} "awk '{print $3}' /etc/modprobe.d/lustre.conf" | awk -F ',' '{print $2}' | awk -F '(' '{print $2}' | awk -F ')' '{print $1}'`
- ssh root@${host_address[0]} "pcs resource delete ping-lnet &>/dev/null;pcs resource delete global-healthLUSTRE &> /dev/null"
- ssh root@${host_address[0]} "pcs resource create ping-lnet ocf:lustre:healthLNET lctl=true multiplier=1001 device=${lnet_name1} host_list='${lnet_address[0]}@tcp2 ${lnet_address[1]}@tcp2' --clone"
- ssh root@${host_address[0]} "pcs resource create global-healthLUSTRE ocf:lustre:healthLUSTRE --clone"
- if [ `echo $?` != 0 ];then
- echo "############# 资源监听创建有误,请手动查看! #############"
- exit
- fi
- echo "############# 创建完毕 #############"
- echo "########################## 查看集群状态 ##########################"
- echo "########################################################################"
- echo "########################################################################"
- ssh root@${host_address[0]} "pcs status"
- echo "########################################################################"
- echo "########################################################################"
- echo "################################################## 脚本到此全部执行完毕 ##################################################"
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-创建lustre资源 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
- # 高可用双机lustre集群的的自动化部署脚本,(部署条件:配置了共享磁盘(/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde,/dev/sdf)。并且有两个网卡,其中一个网卡已经配置了IP地址)
- host_address=(192.168.10.25 192.168.10.26)
- lnet_address=(192.168.209.25 192.168.209.26)
- host_hostname=(mds005 mds006)
- host_passwd=110119
-
-
- # 安装expect命令
- expect -v &> /dev/null
- if [ `echo $?` -ne 0 ];then
- echo "没有expect,安装expect命令"
- yum install -y expect
- fi
-
- # 配置免密登录
- echo "########################## 本地开始配置ssh ##########################"
- if [ `test -a ~/.ssh/id_rsa.pub;echo $?` == 0 ];then
- echo "ssh公钥已创建"
- else
- echo "ssh公钥未创建,开始创建"
- /usr/bin/expect << eof
- # 设置捕获字符串后,期待回复的超时时间
- set timeout 10
-
- spawn ssh-keygen -t rsa -b 1024
-
- ## 开始进连续捕获
- expect {
- "connecting (yes/no)?" { send "yes\n"; exp_continue }
- "s password:" { send "${host_passwd}\n"; exp_continue }
- ".ssh/id_rsa)" { send "\n"; exp_continue }
- "Overwrite (y/n)?" { send "y\n"; exp_continue }
- "no passphrase):" { send "\n"; exp_continue }
- "passphrase again:" { send "\n"; exp_continue }
- }
- eof
- fi
-
-
-
- # 本地的密钥开始加入被控制主机
- for ((j=0;j<2;j++));do
- echo "########################## ${host_address[j]}正在被添加公钥 ##########################"
- /usr/bin/expect << eof
- # 设置捕获字符串后,期待回复的超时时间
- set timeout 10
-
- spawn ssh-copy-id -i /root/.ssh/id_rsa.pub root@${host_address[j]}
-
- ## 开始进连续捕获
- expect {
- "connecting (yes/no)?" { send "yes\n"; exp_continue }
- "s password:" { send "${host_passwd}\n"; exp_continue }
- }
- eof
- echo "############# ${host_address[j]}配置完毕 #############"
- done
-
-
-
- # 被控制主机开始创建密钥
- for ((j=0;j<2;j++));do
- echo "########################## ${host_address[j]}开始创建密钥 ##########################"
- if [ `ssh root@${host_address[j]} 'test -a ~/.ssh/id_rsa.pub;echo $?'` == 0 ];then
- echo "ssh公钥已创建"
- else
- echo "ssh公钥未创建,开始创建"
- /usr/bin/expect << eof
- # 设置捕获字符串后,期待回复的超时时间
- set timeout 10
-
- spawn ssh root@${host_address[j]} "ssh-keygen -t rsa -b 1024"
-
- ## 开始进连续捕获
- expect {
- "connecting (yes/no)?" { send "yes\n"; exp_continue }
- "s password:" { send "${host_passwd}\n"; exp_continue }
- ".ssh/id_rsa)" { send "\n"; exp_continue }
- "Overwrite (y/n)?" { send "y\n"; exp_continue }
- "no passphrase):" { send "\n"; exp_continue }
- "passphrase again:" { send "\n"; exp_continue }
- }
- eof
- fi
- echo "############# ${host_address[j]}配置完毕 #############"
- done
-
-
-
- # 被控制主机开始分配密钥
- for ((j=0;j<2;j++));do
- echo "########################## ${host_address[j]}开始分配公钥 ##########################"
- for ((k=0;k<2;k++));do
-
- /usr/bin/expect << eof
- # 设置捕获字符串后,期待回复的超时时间
- set timeout 10
-
- spawn ssh root@${host_address[j]} "ssh-copy-id -i /root/.ssh/id_rsa.pub root@${host_address[k]}"
-
- ## 开始进连续捕获
- expect {
- "connecting (yes/no)?" { send "yes\n"; exp_continue }
- "s password:" { send "${host_passwd}\n"; exp_continue }
- }
- eof
- done
- echo "############# ${host_address[j]}分配完毕 #############"
- done
-
-
-
-
-
- # 修改主机名和配置域名映射
- for ((i=0;i<100;i++));do
- read -p "修改主机名和配置域名映射?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始配置主机名和域名映射 ##########################"
- for ((j=0;j<2;j++));do
- if [ `ssh root@${host_address[j]} "hostname"` != "${host_hostname[j]}" ];then
- ssh root@${host_address[j]} "hostnamectl set-hostname ${host_hostname[j]}"
- fi
- ssh root@${host_address[j]} "cat << eof > /etc/hosts
- 127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
- ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
- eof"
- for ((k=0;k<2;k++));do
- ssh root@${host_address[j]} "echo '${host_address[k]} ${host_hostname[k]}' >> /etc/hosts"
- done
- done
- echo "############# 配置完毕 #############"
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-修改主机名和配置域名映射 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
-
-
-
-
-
- # 配置防火墙和selinux
- for ((i=0;i<100;i++));do
- read -p "配置防火墙和selinux?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始配置防火墙和selinux ##########################"
- for ((j=0;j<2;j++));do
- ssh root@${host_address[j]} "systemctl stop firewalld;systemctl disable firewalld"
- ssh root@${host_address[j]} "sed -i 's/SELINUX=.*/SELINUX=disabled/' /etc/selinux/config"
- done
- echo "############# 配置完毕 #############"
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-配置防火墙和selinux #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
-
-
- # 配置yum源
- for ((i=0;i<100;i++));do
- read -p "配置yum源?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始配置ssh ##########################"
- for ((j=0;j<2;j++));do
- echo "########################## 配置${host_address[j]}的本地yum源 ##########################"
- ssh root@${host_address[j]} "mkdir /mnt/cdrom &> /dev/null;mount /dev/cdrom /mnt/cdrom"
- if [ -z "`ssh root@${host_address[j]} "grep '^\/dev\/cdrom' /etc/fstab"`" ];then
- ssh root@${host_address[j]} "cat << eof >> /etc/fstab
- /dev/cdrom /mnt/cdrom iso9660 defaults 0 0
- eof"
- fi
- ssh root@${host_address[j]} "cat << eof > /etc/yum.repos.d/centos-local.repo
- [centos7.9]
- name=centos7.9
- baseurl=file:///mnt/cdrom
- enabled=1
- gpgcheck=0
- eof"
- echo "############# ${host_address[j]}配置完毕 #############"
-
- echo "########################## 配置${host_address[j]}的扩展源 ##########################"
- ssh root@${host_address[j]} "yum install epel-release -y"
- echo "############# ${host_address[j]}配置完毕 #############"
-
- echo "########################## 配置${host_address[j]}的阿里yum源 ##########################"
- ssh root@${host_address[j]} "yum install -y wget"
- if [ `ssh root@${host_address[j]} 'test -a /etc/yum.repos.d/CentOS-Base.repo;echo $?'` == 0 ];then
- ssh root@${host_address[j]} "wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo"
- fi
- ssh root@${host_address[j]} "yum clean all && yum repolist"
- echo "############# ${host_address[j]}配置完毕 #############"
-
- if [ `ssh root@${host_address[j]} "echo $?"` != 0 ];then
- echo "yum源配置有误,退出执行脚本"
- exit
- fi
- done
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-配置yum源 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
-
- # 配置chrony时间服务器
- for ((i=0;i<100;i++));do
- read -p "配置chrony时间服务器?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始配置chrony ##########################"
- for ((j=0;j<2;j++));do
- if [ `ssh root@${host_address[j]} "systemctl restart chronyd;echo $?"` != 0 ];then
- echo "${host_address[j]} 安装chrony"
- ssh root@${host_address[j]} "yum install -y chrony && systemctl restart chronyd"
- if [ `echo $?` != 0 ];then
- echo "安装失败,请排错!"
- exit
- fi
- fi
- echo "${host_address[j]}配置chrony"
- ssh root@${host_address[j]} "sed -i '/^server/d' /etc/chrony.conf"
- if [ ${host_address[j]} == ${host_address[0]} ];then
- ssh root@${host_address[j]} "sed -i '2a\server '"${host_address[0]}"' iburst\' /etc/chrony.conf"
- ssh root@${host_address[j]} "sed -i 's/#allow 192.168.0.0\/16/allow 192.168.10.0\/16/' /etc/chrony.conf"
- ssh root@${host_address[j]} "sed -i 's/#local stratum 10/local stratum 10/' /etc/chrony.conf"
- sleep 2
-
- else
- ssh root@${host_address[j]} "sed -i '2a\server '"${host_address[0]}"' iburst\' /etc/chrony.conf"
- fi
- ssh root@${host_address[j]} "systemctl restart chronyd && systemctl enable chronyd &> /dev/null"
- sleep 5
- ssh root@${host_address[j]} "timedatectl set-ntp true && chronyc sources -v | sed -n '/^\^\*/p'"
- if [ -z "`ssh root@${host_address[j]} "chronyc sources -v | sed -n '/^\^\*/p'"`" ];then
- echo -e "\e[31m此节点${host_address[j]}的chrony配置有误,请手动调试\e[0m"
- exit
- fi
- echo "############# ${host_address[j]}配置完毕 #############"
- done
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-配置chrony时间服务器 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
-
-
-
-
-
- # 安装e2fsprogs
- for ((i=0;i<100;i++));do
- read -p "安装e2fsprogs?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始安装e2fsprogs ##########################"
- for ((j=0;j<2;j++));do
-
- echo "########################## ${host_address[j]}开始安装e2fsprogs ##########################"
- ssh root@${host_address[j]} "rm -rf ~/e2fsprogs1.44.5 && mkdir ~/e2fsprogs1.44.5"
- ssh root@${host_address[j]} "wget -c -r -nd https://downloads.whamcloud.com/public/e2fsprogs/1.44.5.wc1/el7/RPMS/x86_64/ -P ~/e2fsprogs1.44.5"
- ssh root@${host_address[j]} "rm -rf ~/e2fsprogs1.44.5/index.html* ~/e2fsprogs1.44.5/unknown.gif ~/e2fsprogs1.44.5/*.gif ~/e2fsprogs1.44.5/sha256sum"
- ssh root@${host_address[j]} "rpm -Uvh ~/e2fsprogs1.44.5/* --force"
- ssh root@${host_address[j]} "rpm -qa | grep e2fsprogs"
- if [ `echo $?` != 0 ];then
- echo "安装失败,请排错!"
- exit
- fi
- echo "############# ${host_address[j]}配置完毕 #############"
- done
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-安装e2fsprogs #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
-
-
-
- # 安装lustre
- for ((i=0;i<100;i++));do
- read -p "安装Lustre软件?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始安装lustre ##########################"
- for ((j=0;j<2;j++));do
- echo "########################## ${host_address[j]}开始安装lustre ##########################"
- ssh root@${host_address[j]} "yum install -y linux-firmware dracut selinux-policy-targeted kexec-tools libyaml perl"
- ssh root@${host_address[j]} "rm -rf ~/lustre2.12.1 && mkdir ~/lustre2.12.1"
- ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kernel-3.10.0-957.10.1.el7_lustre.x86_64.rpm -P ~/lustre2.12.1"
- ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kmod-lustre-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
- ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kmod-lustre-osd-ldiskfs-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
- ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
- ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-osd-ldiskfs-mount-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
- ssh root@${host_address[j]} "rpm -Uvh ~/lustre2.12.1/*.rpm --force"
- ssh root@${host_address[j]} "rpm -qa | grep lustre"
- if [ `echo $?` != 0 ];then
- echo "安装失败,请排错!"
- exit
- fi
- echo "############# ${host_address[j]}配置完毕 #############"
- done
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-安装lustre #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
-
-
- #echo "########################## 请手动重启 ##########################"
- for ((i=0;i<100;i++));do
- read -p "是否重启集群主机(只有重启kernel内核才能更换生效)?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- for((j=0;j<2;j++));do
- ssh root@${host_address[j]} "reboot"
- continue
- done
-
- for ((k=0;k<100;k++));do
- if [ ${k} -eq 99 ];then
- echo "############# 设备连接超时.... #############"
- exit
- fi
- if [ `ssh root@${host_address[0]} -o ConnectTimeout=5 "exit";echo $?` == 0 -a `ssh root@${host_address[1]} -o ConnectTimeout=5 "exit";echo $?` == 0 ];then
- echo "############# 设备已重启 #############"
- break
- else
- echo "############# 设备正在重启 #############"
- fi
- done
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-重启 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
- sleep 5
-
-
- # 检查lustre
- for((i=0;i<2;i++));do
- echo "########################## ${host_address[i]}加载Lustre模块,查看Lustre版本 ##########################"
- ssh root@${host_address[i]} "modprobe lustre && lsmod | grep lustre"
- ssh root@${host_address[i]} "modinfo lustre"
- echo "############# ${host_address[i]}配置完毕 #############"
- done
-
-
-
- # 配置配置Lnet网卡的IP地址和Lustre网络
- for ((i=0;i<100;i++));do
- read -p "是否继续配置Lnet网卡的IP地址?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- for((j=0;j<2;j++));do
- echo "########################## ${host_address[j]}配置Lnet网卡的IP地址 ##########################"
- read -p "请根据以上输出显示中,输入你要配置的LnNet网卡名称:" network_card
- if [ -z "`ssh root@${host_address[j]} "ip addr | grep -o ${network_card}"`" ];then
- echo "网卡不存在,请重试"
- exit
- fi
- ssh root@${host_address[j]} "nmcli connection delete ${network_card} &> /dev/null"
- ssh root@${host_address[j]} "nmcli connection add type ethernet con-name ${network_card} ifname ${network_card} ipv4.method manual ipv4.addresses '${lnet_address[j]}/24' autoconnect yes"
- ssh root@${host_address[j]} "nmcli connection up ${network_card}"
- echo "############# ${host_address[j]}配置完毕 #############"
- echo "########################## ${host_address[j]}配置Lnet网络 ##########################"
- ssh root@${host_address[j]} "echo options lnet networks='tcp(ens33),tcp2(${network_card})' > /etc/modprobe.d/lustre.conf"
- ssh root@${host_address[j]} "lustre_rmmod && modprobe -v lustre"
- echo "############# ${host_address[j]}配置完毕 #############"
- echo "############# ${host_address[j]}查看Lnet网络 #############"
- ssh root@${host_address[j]} "lctl list_nids"
- echo "##########################################################"
- done
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-配置Lnet网卡的IP地址 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
-
-
-
-
- # 格式化lustre
- for ((i=0;i<100;i++));do
- read -p "是否继续格式化lustre?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始格式化lustre ##########################"
- ssh root@${host_address[0]} "lsblk"
- ssh root@${host_address[0]} "mkfs.lustre --mgs --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdb"
- ssh root@${host_address[0]} "mkfs.lustre --fsname global --mdt --index=0 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdc"
- ssh root@${host_address[0]} "mkfs.lustre --fsname global --mdt --index=1 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdd"
- ssh root@${host_address[0]} "mkfs.lustre --fsname global --ost --index=0 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sde"
- ssh root@${host_address[0]} "mkfs.lustre --fsname global --ost --index=1 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdf"
- echo "############# 格式化完毕 #############"
- echo "########################## 创建挂载点目录 ##########################"
- for((j=0;j<2;j++));do
- ssh root@${host_address[j]} "mkdir /mnt/mgs &> /dev/null;mkdir /mnt/mdt1 &> /dev/null;mkdir /mnt/mdt2 &> /dev/null;mkdir /mnt/ost1 &> /dev/null;mkdir /mnt/ost2 &> /dev/null"
- done
- echo "############# 创建完毕 #############"
- echo "############# ${host_address[1]}查看格式化 #############"
- ssh root@${host_address[1]} "blkid /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf"
- echo "##########################################################"
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-格式化lustre #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
-
-
- # 测试挂载
- for ((i=0;i<100;i++));do
- read -p "是否继续测试lustre的挂载?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- for((j=0;j<2;j++));do
- echo "########################## ${host_address[j]}测试挂载 ##########################"
- echo "########################## mgs测试挂载 ##########################"
- ssh root@${host_address[j]} "umount /mnt/mgs &> /dev/null"
- ssh root@${host_address[j]} "mount -t lustre /dev/sdb /mnt/mgs"
- if [ `echo $?` != 0 ];then
- echo "############# mgs测试有误,请手动查看! #############"
- exit
- fi
- echo "########################## mdt1测试挂载 ##########################"
- ssh root@${host_address[j]} "umount /mnt/mdt1 &> /dev/null"
- ssh root@${host_address[j]} "mount -t lustre /dev/sdc /mnt/mdt1"
- if [ `echo $?` != 0 ];then
- echo "############# mdt1测试有误,请手动查看! #############"
- ssh root@${host_address[j]} "umount /mnt/mgs"
- exit
- fi
- echo "########################## mdt2测试挂载 ##########################"
- ssh root@${host_address[j]} "umount /mnt/mdt2 &> /dev/null"
- ssh root@${host_address[j]} "mount -t lustre /dev/sdd /mnt/mdt2"
- if [ `echo $?` != 0 ];then
- echo "############# mdt2测试有误,请手动查看! #############"
- ssh root@${host_address[j]} "umount /mnt/mdt1;umount /mnt/mgs"
- exit
- fi
- echo "########################## ost1测试挂载 ##########################"
- ssh root@${host_address[j]} "umount /mnt/ost1 &> /dev/null"
- ssh root@${host_address[j]} "mount -t lustre /dev/sde /mnt/ost1"
- if [ `echo $?` != 0 ];then
- echo "############# ost1测试有误,请手动查看! #############"
- ssh root@${host_address[j]} "umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
- exit
- fi
- echo "########################## ost2测试挂载 ##########################"
- ssh root@${host_address[j]} "umount /mnt/ost2 &> /dev/null"
- ssh root@${host_address[j]} "mount -t lustre /dev/sdf /mnt/ost2"
- if [ `echo $?` != 0 ];then
- echo "############# ost2测试有误,请手动查看! #############"
- ssh root@${host_address[j]} "umount /mnt/ost1;umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
- exit
- fi
- ssh root@${host_address[j]} "umount /mnt/ost2;umount /mnt/ost1;umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
- done
- echo "############# 测试完毕 #############"
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-测试挂载 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
-
-
-
- # 安装packemaker和corosync软件和创建集群
- for ((i=0;i<100;i++));do
- read -p "是否继续安装packemaker和corosync软件和创建集群?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- if [ -z "`ssh root@${host_address[0]} 'pcs status' | grep mycluster`" ];then
- for((j=0;j<2;j++));do
- echo "########################## ${host_address[j]}开始安装 ##########################"
- ssh root@${host_address[j]} "yum install pacemaker pcs policycoreutils-python -y"
- echo "############# ${host_address[j]}安装完毕 #############"
- echo "########################## ${host_address[j]}开始配置 ##########################"
- ssh root@${host_address[j]} "systemctl enable pcsd;systemctl restart pcsd"
- ssh root@${host_address[j]} "echo '${host_passwd}' |passwd --stdin hacluster"
- echo "############# ${host_address[j]}配置完毕 #############"
- done
- /usr/bin/expect << eof
- # 设置捕获字符串后,期待回复的超时时间
- set timeout 10
-
- spawn ssh root@${host_address[0]} "pcs cluster auth ${host_hostname[*]}"
-
- ## 开始进连续捕获
- expect {
- "Username:" { send "hacluster\n"; exp_continue }
- "Password:" { send "${host_passwd}\n"; exp_continue }
- }
- eof
- echo "########################## 开始创建集群 ##########################"
- ssh root@${host_address[0]} "pcs cluster setup --name mylustre ${host_hostname[*]}"
- echo "############# 创建完毕 #############"
- fi
- echo "########################## 启动集群 ##########################"
- ssh root@${host_address[0]} "pcs cluster start --all"
- echo "############# 启动完毕 #############"
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-安装packemaker和corosync软件和配置 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
-
-
- # 配置资源防护
- for ((i=0;i<100;i++));do
- read -p "是否继续配置资源防护?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- echo "########################## 开始配置资源防护 ##########################"
- for((j=0;j<2;j++));do
- ssh root@${host_address[j]} "yum install -y fence-agents-all"
- done
- ssh root@${host_address[0]} "pcs property set stonith-enabled=true"
- if [ `ssh root@${host_address[j]} "pcs status" | grep "stonith:fence_heuristics_ping" | grep -c "Started"` -eq 2 ];then
- echo "############# stonith已创建,并且正常运行,跳过配置stonith #############"
- break
- fi
- if [ `ssh root@${host_address[j]} "pcs status" | grep "stonith:fence_heuristics_ping"` -eq 2 ];then
- ssh root@${host_address[0]} "pcs stonith delete stonith-ping-${host_hostname[0]}"
- ssh root@${host_address[0]} "pcs stonith delete stonith-ping-${host_hostname[0]}"
- fi
-
- ssh root@${host_address[0]} "pcs stonith create stonith-ping-${host_hostname[0]} fence_heuristics_ping ping_targets=${host_address[0]}"
- ssh root@${host_address[0]} "pcs stonith create stonith-ping-${host_hostname[1]} fence_heuristics_ping ping_targets=${host_address[1]}"
- ssh root@${host_address[0]} "pcs status"
- echo "############# 配置完毕 #############"
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-配置资源防护 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done
-
-
- # 创建lustre资源
- for ((i=0;i<100;i++));do
- read -p "是否继续创建lustre资源(可删除后再创建)?(Y/n): " flag
- if [ "${flag}" == "Y" ];then
- sleep 3
- for((j=0;j<2;j++));do
- if [ -z `ssh root@${host_address[j]} 'rpm -qa | grep lustre-resource-agents-2.12.1-1'` ];then
- echo "########################## ${host_address[j]}安装ocf:lustre:Lustre包 ##########################"
- ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-resource-agents-2.12.1-1.el7.x86_64.rpm"
- ssh root@${host_address[j]} "rpm -ivh lustre-resource-agents-2.12.1-1.el7.x86_64.rpm"
- echo "############# 安装完毕 #############"
- fi
- done
- echo "########################## 开始删除lustre资源 ##########################"
- ssh root@${host_address[0]} "pcs resource delete global-ost2 &> /dev/null;pcs resource delete global-ost1 &> /dev/null"
- ssh root@${host_address[0]} "pcs resource delete global-mdt1 &> /dev/null;pcs resource delete global-mdt2 &> /dev/null"
- ssh root@${host_address[0]} "pcs resource delete global-mgs &> /dev/null"
- echo "############# 删除完毕 #############"
- echo "########################## 开始创建lustre资源 ##########################"
- ssh root@${host_address[0]} "pcs resource create global-mgs ocf:lustre:Lustre target=/dev/sdb mountpoint=/mnt/mgs"
- if [ `echo $?` != 0 ];then
- echo "############# mgs资源创建有误,请手动查看! #############"
- exit
- fi
- ssh root@${host_address[0]} "pcs resource create global-mdt1 ocf:lustre:Lustre target=/dev/sdc mountpoint=/mnt/mdt1"
- if [ `echo $?` != 0 ];then
- echo "############# mdt1资源创建有误,请手动查看! #############"
- exit
- fi
- ssh root@${host_address[0]} "pcs resource create global-mdt2 ocf:lustre:Lustre target=/dev/sdd mountpoint=/mnt/mdt2"
- if [ `echo $?` != 0 ];then
- echo "############# mdt2资源创建有误,请手动查看! #############"
- exit
- fi
- ssh root@${host_address[0]} "pcs resource create global-ost1 ocf:lustre:Lustre target=/dev/sde mountpoint=/mnt/ost1"
- if [ `echo $?` != 0 ];then
- echo "############# ost1资源创建有误,请手动查看! #############"
- exit
- fi
- ssh root@${host_address[0]} "pcs resource create global-ost2 ocf:lustre:Lustre target=/dev/sdf mountpoint=/mnt/ost2"
- if [ `echo $?` != 0 ];then
- echo "############# ost2资源创建有误,请手动查看! #############"
- exit
- fi
- ssh root@${host_address[0]} "pcs constraint location add global-constraint-mgs global-mgs ${host_hostname[0]} 10"
- ssh root@${host_address[0]} "pcs constraint location add global-constraint-mdt1 global-mdt1 ${host_hostname[0]} 10"
- ssh root@${host_address[0]} "pcs constraint location add global-constraint-mdt2 global-mdt2 ${host_hostname[1]} 10"
- ssh root@${host_address[0]} "pcs constraint location add global-constraint-ost1 global-ost1 ${host_hostname[0]} 10"
- ssh root@${host_address[0]} "pcs constraint location add global-constraint-ost2 global-ost2 ${host_hostname[1]} 10"
- ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-mdt1"
- ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-mdt2"
- ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-ost1"
- ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-ost2"
- if [ `echo $?` != 0 ];then
- echo "############# 资源约束创建有误,请手动查看! #############"
- exit
- fi
- lnet_name1=`ssh root@${host_address[0]} "awk '{print $3}' /etc/modprobe.d/lustre.conf" | awk -F ',' '{print $2}' | awk -F '(' '{print $2}' | awk -F ')' '{print $1}'`
- lnet_name2=`ssh root@${host_address[1]} "awk '{print $3}' /etc/modprobe.d/lustre.conf" | awk -F ',' '{print $2}' | awk -F '(' '{print $2}' | awk -F ')' '{print $1}'`
- ssh root@${host_address[0]} "pcs resource delete ping-lnet &>/dev/null;pcs resource delete global-healthLUSTRE &> /dev/null"
- ssh root@${host_address[0]} "pcs resource create ping-lnet ocf:lustre:healthLNET lctl=true multiplier=1001 device=${lnet_name1} host_list='${lnet_address[0]}@tcp2 ${lnet_address[1]}@tcp2' --clone"
- ssh root@${host_address[0]} "pcs resource create global-healthLUSTRE ocf:lustre:healthLUSTRE --clone"
- if [ `echo $?` != 0 ];then
- echo "############# 资源监听创建有误,请手动查看! #############"
- exit
- fi
- echo "############# 创建完毕 #############"
- echo "########################## 查看集群状态 ##########################"
- echo "########################################################################"
- echo "########################################################################"
- ssh root@${host_address[0]} "pcs status"
- echo "########################################################################"
- echo "########################################################################"
- echo "################################################## 脚本到此全部执行完毕 ##################################################"
- break
- elif [ "${flag}" == "n" ];then
- echo "############# 已跳过步骤-创建lustre资源 #############"
- break
- elif [ ${i} -eq 99 ];then
- echo "############# 已退出 #############"
- exit
- else continue;fi
- done