一、OS配置
1.1环境规划
环境:centos 7
数据库:pg11.5
ip地址:
node1 10.122.166.120
node2 10.122.166.127
node3 10.122.166.128
vip:
10.122.166.123
10.122.166.133
1.2系统配置
关闭selinux ‐‐‐‐> /etc/selinux/config
关闭防火墙
systemctl stop firewalld.service
systemctl disable firewalld.service
修改主机名
hostnamectl set‐hostname nodeX
编辑 /etc/hosts 文件
10.122.166.120 node1
10.122.166.127 node2
10.122.166.128 node3
二、数据库安装
2.1 所有节点安装PG软件(以node1为例)
1)安装依赖包
[root@node1 ~]# yum install gcc gcc-c++ flex bison readline-devel zlib-devel -y
2)创建用户
- [root@node1 ~]# groupadd postgres
- [root@node1 ~]# useradd postgres -g postgres
- [root@node1 ~]# echo "pg123"|passwd postgres --stdin
3)编译安装PG软件
- [root@node1 ~]# tar -xvf postgresql-11.5.tar.gz
- [root@node1 ~]# cd postgresql-11.5/
- [root@node1 ~]# ./configure --with-segsize=20 --prefix=/data/postgres/
- [root@node1 ~]# cd contrib/
- [root@node1 ~]# gmake install all
4)创建数据目录、归档目录
- [root@node1 ~]# mkdir -p /data/postgres/data
- [root@node1 ~]# mkdir -p /data/postgres/archive_log
- [root@node1 ~]# chown -R postgres:postgres /data/postgres
- [root@node1 ~]# chmod 0700 /data/postgres/data
2.2 node1 初始化数据库
1)初始化数据库
- [postgres@node1 ~]$ cd /data/postgres/bin/
- [postgres@node1 bin]$ initdb ‐D /data/postgres/data
2)修改配置文件
- [postgres@node1 data]$ vim postgresql.conf
-
- listen_addresses = '*'
- port = '5432'
- max_connections = '1000'
- log_connections = 'yes'
- shared_buffers = '4000MB'
- effective_cache_size = '1000MB'
- temp_buffers = '64MB'
- work_mem = '40MB'
- max_worker_processes = '32'
- max_parallel_workers = '16'
- max_parallel_workers_per_gather = '4'
- wal_compression = 'on'
- log_destination = 'csvlog'
- logging_collector = 'on'
- log_directory = 'log'
- log_truncate_on_rotation = 'on'
- log_rotation_size = '30MB'
- log_lock_waits = 'on'
- lock_timeout = '300000'
- wal_level = replica
- synchronous_commit = on
- archive_mode = on
- archive_command = 'cp %p /data/postgres/archive_log/%f'
- max_wal_senders=5
- wal_keep_segments = 32
- hot_standby = on
- wal_log_hints = 'on'
- log_statement = 'ddl'
- [postgres@node1 data]$ vim pg_hba.conf
-
- host all all 10.122.166.0/24 trust
- host replication all 10.122.166.0/24 trust
3)添加环境变量
- [postgres@node1 ~]$ vi ~/.bash_profile
-
- export PATH=/data/postgres/bin/:$PATH
- export PGDATA=/data/postgres/data
- export LD_LIBRARY_PATH=/data/postgres/lib:${LD_LIBRARY_PATH}
-
- [postgres@node1 ~]$ source ~/.bash_profile
4)启动数据库
[postgres@node1 ~]$ pg_ctl start
2.3 node2/node3 创建备库
1)以 node2 为例
[postgres@node2 data]$ pg_basebackup -h 10.122.166.120 -p 5432 -U postgres -D /data/postgres/data
2)配置recovery.conf 文件
- [postgres@node2 data]$ vim /data/postgres/data/recovery.conf
- standby_mode = on
- primary_conninfo = 'host=10.122.166.120 port=5432 user=postgres password=postgres'
- recovery_target_timeline = 'latest'
3)启动备库
[postgres@node2 ~]$ pg_ctl start
2.4 node1 查看集群状态
- postgres=# select application_name,client_addr,sync_state from pg_stat_replication;
- application_name | client_addr | sync_state
- ------------------+----------------+------------
- node2 | 10.122.166.127 | async
- node3 | 10.122.166.128 | async
- (2 rows)
2.5 所有节点停止 PG服务
先备库 后主库
pg_ctl -D stop -m f
三、配置corosync
1)所有节点安装软件
yum install -y pacemaker corosync pcs
2)配置 corosync.conf文件
- [root@node1 ~]# cd /etc/corosync
- [root@node1 corosync]# vim corosync.conf
- totem {
- max_messages: 20
- vsftype: none
- crypto_cipher: none
- secauth: on
- token_retransmits_before_loss_const: 10
- interface {
- bindnetaddr: 10.122.166.0
- mcastaddr: 226.94.61.22
- ringnumber: 0
- mcastport: 5405
- ttl: 1
- }
- consensus: 6000
- clear_node_high_bit: yes
- crypto_hash: none
- rrp_mode: none
- join: 60
- cluster_name: cluster
- token: 5000
- version: 2
- transport: udp
- ip_version: ipv4
- heartbeat_failures_allowed : 3
- }
- logging {
- to_logfile: yes
- timestamp: on
- syslog_facility: daemon
- logger_subsys {
- debug: off
- subsys: QUORUM
- }
- to_syslog: yes
- debug: off
- logfile: /var/log/corosync.log
- to_stderr: no
- fileline: off
- }
- quorum {
- expected_votes: 3
- two_node: 0
- provider: corosync_votequorum
- }
-
- nodelist {
- node {
- ring0_addr: node1
- nodeid: 1
- }
- node {
- ring0_addr: node2
- nodeid: 2
- }
- node {
- ring0_addr: node3
- nodeid: 3
- }
- }
3)复制到另外两个节点
- [root@node1 corosync]# scp corosync.conf 10.122.166.127:/etc/corosync
- [root@node1 corosync]# scp corosync.conf 10.122.166.128:/etc/corosync
4)所有节点启动corosync服务
service corosync start
四、配置 pacemaker
1)所有节点启动 pacemaker服务
service pacemaker start
2)任意节点使用root用户检查状态
- [root@node3 cib]# crm_mon -Afr -1
- Stack: corosync
- Current DC: node2 (version 1.1.23-1.el7_9.1-9acf116022) - partition with quorum
- Last updated: Thu Sep 29 13:59:07 2022
- Last change: Thu Sep 29 13:58:59 2022 by hacluster via crmd on node2
-
- 3 nodes configured
- 0 resource instances configured
-
- Online: [ node1 node2 node3 ]
-
- No resources
-
-
- Node Attributes:
- * Node node1:
- * Node node2:
- * Node node3:
-
- Migration Summary:
- * Node node2:
- * Node node1:
- * Node node3:
3)node1 编辑config.pcs文件
- [root@node1 ~]# cd /var/lib/pacemaker/
- [root@node1 pacemaker]# vim config.pcs
-
- pcs cluster cib pgsql_cfg
-
- pcs -f pgsql_cfg property set no-quorum-policy="ignore"
- pcs -f pgsql_cfg property set stonith-enabled="false"
- pcs -f pgsql_cfg resource defaults resource-stickiness="INFINITY"
- pcs -f pgsql_cfg resource defaults migration-threshold="1"
-
- pcs -f pgsql_cfg resource create vip-master IPaddr2 \
- ip="10.122.166.123" \
- nic="eth0" \
- cidr_netmask="24" \
- op start timeout="60s" interval="0s" on-fail="restart" \
- op monitor timeout="60s" interval="10s" on-fail="restart" \
- op stop timeout="60s" interval="0s" on-fail="block"
-
- pcs -f pgsql_cfg resource create vip-slave IPaddr2 \
- ip="10.122.166.133" \
- nic="eth0" \
- cidr_netmask="24" \
- meta migration-threshold="0" \
- op start timeout="60s" interval="0s" on-fail="stop" \
- op monitor timeout="60s" interval="10s" on-fail="restart" \
- op stop timeout="60s" interval="0s" on-fail="ignore"
-
- pcs -f pgsql_cfg resource create pgsql pgsql \
- pgctl="/data/postgres/bin/pg_ctl" \
- psql="/data/postgres/bin/psql" \
- pgdata="/data/postgres/data/" \
- rep_mode="sync" \
- node_list="node1 node2 node3" \
- restore_command="cp %p /data/postgres/archive_log/%f" \
- primary_conninfo_opt="keepalives_idle=60 keepalives_interval=5 keepalives_count=5" \
- master_ip="10.122.166.123" \
- restart_on_promote='true' \
- op start timeout="60s" interval="0s" on-fail="restart" \
- op monitor timeout="60s" interval="4s" on-fail="restart" \
- op monitor timeout="60s" interval="3s" on-fail="restart" role="Master" \
- op promote timeout="60s" interval="0s" on-fail="restart" \
- op demote timeout="60s" interval="0s" on-fail="stop" \
- op stop timeout="60s" interval="0s" on-fail="block" \
- op notify timeout="60s" interval="0s"
-
- pcs -f pgsql_cfg resource master pgsql-cluster pgsql \
- master-max=1 master-node-max=1 clone-max=3 clone-node-max=1 notify=true
-
- pcs -f pgsql_cfg resource group add master-group vip-master
- pcs -f pgsql_cfg resource group add slave-group vip-slave
-
- pcs -f pgsql_cfg constraint colocation add master-group with master pgsql-cluster INFINITY
- pcs -f pgsql_cfg constraint order promote pgsql-cluster then start master-group symmetrical=false score=INFINITY
- pcs -f pgsql_cfg constraint order demote pgsql-cluster then stop master-group symmetrical=false score=0
-
- pcs -f pgsql_cfg constraint colocation add slave-group with slave pgsql-cluster INFINITY
- pcs -f pgsql_cfg constraint order promote pgsql-cluster then start slave-group symmetrical=false score=INFINITY
- pcs -f pgsql_cfg constraint order demote pgsql-cluster then stop slave-group symmetrical=false score=0
-
- pcs cluster cib-push pgsql_cfg
4)所有节点增加 root用户的 PATH路径
- [root@node1 ~]# vim .bash_profile
- export PATH=/data/postgres/bin/:$PATH
- [root@node1 ~]# source .bash_profile
5)node1 加载配置
- [root@node1 pacemaker]# sh config.pcs
- Warning: Defaults do not apply to resources which override them with their own defined values
- Warning: Defaults do not apply to resources which override them with their own defined values
- Assumed agent name 'ocf:heartbeat:IPaddr2' (deduced from 'IPaddr2')
- Assumed agent name 'ocf:heartbeat:IPaddr2' (deduced from 'IPaddr2')
- Assumed agent name 'ocf:heartbeat:pgsql' (deduced from 'pgsql')
- Adding pgsql-cluster master-group (score: INFINITY) (Options: first-action=promote then-action=start symmetrical=false)
- Adding pgsql-cluster master-group (score: 0) (Options: first-action=demote then-action=stop symmetrical=false)
- Adding pgsql-cluster slave-group (score: INFINITY) (Options: first-action=promote then-action=start symmetrical=false)
- Adding pgsql-cluster slave-group (score: 0) (Options: first-action=demote then-action=stop symmetrical=false)
- CIB updated
6)等待一会,再次检查状态
- [root@node1 ~]# crm_mon -Afr -1
- Stack: corosync
- Current DC: node2 (version 1.1.23-1.el7_9.1-9acf116022) - partition with quorum
- Last updated: Thu Sep 29 15:14:27 2022
- Last change: Thu Sep 29 14:15:00 2022 by root via crm_attribute on node1
-
- 3 nodes configured
- 5 resource instances configured
-
- Online: [ node1 node2 node3 ]
-
- Full list of resources:
-
- Master/Slave Set: pgsql-cluster [pgsql]
- Masters: [ node1 ]
- Slaves: [ node2 node3 ]
- Resource Group: master-group
- vip-master (ocf::heartbeat:IPaddr2): Started node1
- Resource Group: slave-group
- vip-slave (ocf::heartbeat:IPaddr2): Started node2
-
- Node Attributes:
- * Node node1:
- + master-pgsql : 1000
- + pgsql-data-status : LATEST
- + pgsql-master-baseline : 000000002D000098
- + pgsql-status : PRI
- * Node node2:
- + master-pgsql : 100
- + pgsql-data-status : STREAMING|SYNC
- + pgsql-status : HS:sync
- * Node node3:
- + master-pgsql : -INFINITY
- + pgsql-data-status : STREAMING|ASYNC
- + pgsql-status : HS:async
-
- Migration Summary:
- * Node node2:
- * Node node1:
- * Node node3:
7)任意节点使用 pcs命令检查状态
- [root@node1 ~]# pcs status
- Cluster name: cluster
- Stack: corosync
- Current DC: node2 (version 1.1.23-1.el7_9.1-9acf116022) - partition with quorum
- Last updated: Thu Sep 29 15:15:08 2022
- Last change: Thu Sep 29 14:15:00 2022 by root via crm_attribute on node1
-
- 3 nodes configured
- 5 resource instances configured
-
- Online: [ node1 node2 node3 ]
-
- Full list of resources:
-
- Master/Slave Set: pgsql-cluster [pgsql]
- Masters: [ node1 ]
- Slaves: [ node2 node3 ]
- Resource Group: master-group
- vip-master (ocf::heartbeat:IPaddr2): Started node1
- Resource Group: slave-group
- vip-slave (ocf::heartbeat:IPaddr2): Started node2
-
- Daemon Status:
- corosync: active/disabled
- pacemaker: active/disabled
- pcsd: inactive/disabled
8)在 node1(主库)查看状态
- postgres=# select application_name,client_addr,sync_state from pg_stat_replication;
- application_name | client_addr | sync_state
- ------------------+----------------+------------
- node2 | 10.122.166.127 | sync
- node3 | 10.122.166.128 | async
- (2 rows)
安装 crmsh
- [root@ceph1 ~]# wget -P /etc/yum.repos.d/ http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/network:ha-clustering:Stable.repo
- [root@ceph1 ~]# yum install -y crmsh
- [root@ceph1 ~]# crm configure property stonith-enabled=false
使用 crm 命令
- [root@node1 ~]# crm status
- Stack: corosync
- Current DC: node2 (version 1.1.23-1.el7_9.1-9acf116022) - partition with quorum
- Last updated: Thu Sep 29 15:18:04 2022
- Last change: Thu Sep 29 14:15:00 2022 by root via crm_attribute on node1
-
- 3 nodes configured
- 5 resource instances configured
-
- Online: [ node1 node2 node3 ]
-
- Full list of resources:
-
- Master/Slave Set: pgsql-cluster [pgsql]
- Masters: [ node1 ]
- Slaves: [ node2 node3 ]
- Resource Group: master-group
- vip-master (ocf::heartbeat:IPaddr2): Started node1
- Resource Group: slave-group
- vip-slave (ocf::heartbeat:IPaddr2): Started node2
常用命令
- 停止集群服务: service pacemaker stop (会自动停止PG)
- 启动集群服务: service pacemaker start(会自动启动PG)
- 查看集群状态: crm_mon -Afr -1
- 查看集群状态且启动刷新: crm_mon -Afr
- 刷新集群状态: crm resource cleanup pgsql-cluster
- 刷新某一个节点的状态: crm resource cleanup pgsql nodename
- 清理历史告警: crm resource cleanup