
能执行 PITR(point in time recovery)是很重要的,pg 原生提供的 pg_basebackup 只能全备,所以当数据量太大时产生痛点。而 pgbackrest 有如下优点:参考
官方监控暴露方法,可写一个shell脚本暴露指标,并用node_exporter 的 prom 功能收集。或者用 app_exporter 实现。最终都配合 prometheus + alertmanager + grafana 实现,思路如下:
可以在数据库的机器上写一个 exporter,查这两个值
app_pg_pgbackrest_last_successful_backup_ts = 1684739932000(job = node; ip=192.168.2.99)
app_pg_pgbackrest_last_archived_wal = 000000010000000000000006(job = node; ip=192.168.2.99)
prometheus 每天 pull 一次这个exporter的数据
然后 prometheus 配置一个 rule,如果 「最近的两次app_pg_pgbackrest_last_successful_backup_ts 不一样」or 「最近的两次app_pg_pgbackrest_last_archived_wal 不一样」 则 飞书报警
然后在 grafana 把 app_pg_pgbackrest_last_successful_backup_ts 展示成一个时序图
然后在 grafana 把 app_pg_pgbackrest_last_archived_wal 展示成一个表格(样式得查查)
安装包见https://github.com/pgbackrest/pgbackrest/releases/tag/release/2.37, 如果下载不下来, 隔壁文件有离线安装包
一般建议在A机器(会产生很多在生产环境无用的东西)源码编译,再在生产环境pg-primary机器运行.
mkdir -p /build
wget -q -O - \
https://github.com/pgbackrest/pgbackrest/archive/release/2.37.tar.gz | \
tar zx -C /build
apt-get install make gcc libpq-dev libssl-dev libxml2-dev pkg-config \
liblz4-dev libzstd-dev libbz2-dev libz-dev libyaml-dev
cd /build/pgbackrest-release-2.37/src && ./configure && make
这里就是在生产环境的pg-primary机器运行
sudo apt-get install postgresql-client libxml2
cd /build/pgbackrest-release-2.37/src
chown postgres:postgres pgbackrest
chmod 777 pgbackrest
cp pgbackrest /usr/bin/
mkdir -p -m 755 /var/log/pgbackrest
chown -R postgres:postgres /var/log/pgbackrest
mkdir -p /etc/pgbackrest/conf.d
touch /etc/pgbackrest/pgbackrest.conf
chmod 640 /etc/pgbackrest/pgbackrest.conf
chown -R postgres:postgres /etc/pgbackrest
(base) root@k8s-master-133:/build/pgbackrest-release-2.37/src# pgbackrest
pgBackRest 2.37 - General help
Usage:
pgbackrest [options] [command]
Commands:
archive-get Get a WAL segment from the archive.
archive-push Push a WAL segment to the archive.
backup Backup a database cluster.
check Check the configuration.
expire Expire backups that exceed retention.
help Get help.
info Retrieve information about backups.
repo-get Get a file from a repository.
repo-ls List files in a repository.
restore Restore a database cluster.
server pgBackRest server.
server-ping Ping pgBackRest server.
stanza-create Create the required stanza data.
stanza-delete Delete a stanza.
stanza-upgrade Upgrade a stanza.
start Allow pgBackRest processes to run.
stop Stop pgBackRest processes from running.
version Get version.
Use 'pgbackrest help [command]' for more information.
sudo -u postgres /usr/lib/postgresql/12/bin/initdb \
-D /var/lib/postgresql/12/demo -k -A peer
sudo pg_createcluster 12 demo
postgresql.conf配置文件里的, listen_addresses = '*'
[demo]pgbackrest的pgbackrest.conf设置数据路径如下, 其必须和给pg服务的postgresql.conf中的data_directory相同.vim /etc/pgbackrest/pgbackrest.conf
[demo]
pg1-path=/home/ubuntu/data_gas/postgresql/11/main
repo1-path), 添加到配置文件中mkdir -p /data_gas/pgbackrest_fake_backups/
chmod 755 /data_gas/pgbackrest_fake_backups/
chown postgres:postgres -R /data_gas/pgbackrest_fake_backups/
mkdir -p /tmp/pgbackrest/
chown -R postgres:postgres /tmp/pgbackrest/
vim /etc/pgbackrest/pgbackrest.conf
[demo]
pg1-path=/home/ubuntu/data_gas/postgresql/11/main
[global]
repo1-path=/data_gas/pgbackrest_fake_backups
archive_command = 'pgbackrest --stanza=demo archive-push %p'
archive_mode = on
listen_addresses = '*'
log_line_prefix = ''
max_wal_senders = 3
wal_level = replica
logging_collector = on # 打开日志, 必要条件
log_destination = 'csvlog' # 格式csvlog
log_directory = 'pg_log' # 日志目录 /u01/app/pglog
log_rotation_size = 50MB # 日志最大size 50MB
log_rotation_age = 1d #每天切一个日志
log_truncate_on_rotation = on #同名日志覆盖
全备[demo]
pg1-path=/home/ubuntu/data_gas/postgresql/11/main
[global]
repo1-path=/data_gas/pgbackrest_fake_backups
repo1-retention-full=10
repo1-retention-diff=10
[global:archive-push]
compress-level=9
sudo -u postgres pgbackrest --stanza=demo --log-level-console=info stanza-create
2022-02-19 17:57:20.580 P00 INFO: stanza-create command begin 2.37: --exec-id=13114-780b1514 --log-level-console=info --pg1-path=/home/ubuntu/data_gas/postgresql/11/main --repo1-path=/data_gas/pgbackrest_fake_backups --stanza=demo
2022-02-19 17:57:21.201 P00 INFO: stanza-create for stanza 'demo' on repo1
2022-02-19 17:57:21.225 P00 INFO: stanza-create command end: completed successfully (646ms)
postgres=# select pg_switch_wal();
pg_switch_wal
---------------
193/D55137F0
(1 row)
sudo -u postgres pgbackrest --stanza=demo --log-level-console=debug check
2022-02-19 18:36:37.128 P00 INFO: check command begin 2.37: --exec-id=33904-f7ab2a46 --log-level-console=info --pg1-path=/home/ubuntu/data_gas/postgresql/11/main --stanza=demo
ERROR: [068]: archive_command 'date' must contain pgbackrest
2022-02-19 18:36:37.535 P00 INFO: check command end: aborted with exception [068]
root@k8s-master-163:/home/ubuntu/data_gas/postgresql/11/main# sudo -u postgres pgbackrest --stanza=demo --log-level-console=info check
2022-02-19 19:50:11.961 P00 INFO: check command begin 2.37: --exec-id=4078-5deaa3d8 --log-level-console=info --pg1-path=/home/ubuntu/data_gas/postgresql/11/main --repo1-path=/data_gas/pgbackrest_fake_backups --stanza=demo
2022-02-19 19:50:12.566 P00 INFO: check repo1 configuration (primary)
2022-02-19 19:50:12.869 P00 INFO: check repo1 archive for WAL (primary)
2022-02-19 19:50:13.271 P00 INFO: WAL segment 0000000300000193000000DB successfully archived to '/data_gas/pgbackrest_fake_backups/archive/demo/11-1/0000000300000193/0000000300000193000000DB-1dec1269bc76816e19781a67eb62498b9da74ad8.gz' on repo1
2022-02-19 19:50:13.271 P00 INFO: check command end: completed successfully (1312ms)
-- 备份数据路径如下
root@k8s-master-163:/data_gas/pgbackrest_fake_backups# tree
.
├── archive
│ └── demo
│ ├── 11-1
│ │ └── 0000000300000193
│ │ ├── 0000000300000193000000DA-14a9ba9ebb821655e85ad380ecb4cffad1a1ae6c.gz
│ │ └── 0000000300000193000000DB-1dec1269bc76816e19781a67eb62498b9da74ad8.gz
│ ├── archive.info
│ └── archive.info.copy
└── backup
└── demo
├── backup.info
└── backup.info.copy
6 directories, 6 files
root@k8s-master-163:/data_gas/pgbackrest_fake_backups/archive/demo/11-1/0000000300000193# ll -h
total 12M
drwxr-x--- 2 postgres postgres 4.0K Feb 19 19:50 ./
drwxr-x--- 3 postgres postgres 4.0K Feb 19 19:49 ../
-rw-r----- 1 postgres postgres 11M Feb 19 19:49 0000000300000193000000DA-14a9ba9ebb821655e85ad380ecb4cffad1a1ae6c.gz
-rw-r----- 1 postgres postgres 575K Feb 19 19:50 0000000300000193000000DB-1dec1269bc76816e19781a67eb62498b9da74ad8.gz
全备开始的那一刻, 最新的wal1文件为0000000300000193000000DE, 追上其上一个点(0000000300000193000000DD)即视为全备完成.
root@k8s-master-163:/data# sudo -u postgres pgbackrest --stanza=demo --log-level-console=debug --start-fast --type=full backup
2022-02-19 20:09:01.827 P00 INFO: backup command begin 2.37: --exec-id=35713-a52b3add --log-level-console=info --pg1-path=/home/ubuntu/data_gas/postgresql/11/main --repo1-path=/data_gas/pgbackrest_fake_backups --repo1-retention-full=2 --stanza=demo --start-fast
WARN: no prior backup exists, incr backup has been changed to full
2022-02-19 20:09:02.835 P00 INFO: execute non-exclusive pg_start_backup(): backup begins after the requested immediate checkpoint completes
2022-02-19 20:09:03.637 P00 INFO: backup start archive = 0000000300000193000000DE, lsn = 193/DE000028
2022-02-19 20:09:03.637 P00 INFO: check archive for prior segment 0000000300000193000000DD
root@k8s-master-163:/data_gas/pgbackrest_fake_backups# tree
.
├── archive
│ └── demo
│ ├── 11-1
│ │ └── 0000000300000193
│ │ ├── 0000000300000193000000DA-14a9ba9ebb821655e85ad380ecb4cffad1a1ae6c.gz
│ │ ├── 0000000300000193000000DB-1dec1269bc76816e19781a67eb62498b9da74ad8.gz
│ │ ├── 0000000300000193000000DC-694f52d330abfa30137ac07b4da71bbab42c2a1a.gz
│ │ └── 0000000300000193000000DD-d5456d8434f115fc1da083e2f6c988650d7b1751.gz
│ ├── archive.info
│ └── archive.info.copy
└── backup
└── demo
├── 20220219-200902F
│ ├── backup.manifest.copy
│ └── pg_data
│ ├── base
│ │ ├── 1
│ │ ├── 117450
│ │ ├── 13052
│ │ ├── 13053
│ │ ├── 16384
│ │ │ ├── 20012.2.gz
│ │ │ ├── 20012.3.gz
│ │ │ ├── 316826.1.gz
│ │ │ ├── 316826.gz
│ │ │ ├── 512001.gz
│ │ │ ├── 512009.1.gz
│ │ │ ├── 512009.gz
│ │ │ ├── 512017.1.gz
│ │ │ └── 512017.gz
│ │ └── pgsql_job_cache
│ ├── global
│ ├── pg_commit_ts
│ ├── pg_dynshmem
│ ├── pg_foreign_file
│ │ └── cached
│ ├── pg_log
│ ├── pg_logical
│ │ ├── mappings
│ │ └── snapshots
│ ├── pg_multixact
│ │ ├── members
│ │ └── offsets
│ ├── pg_notify
│ ├── pg_replslot
│ ├── pg_serial
│ ├── pg_snapshots
│ ├── pg_stat
│ ├── pg_stat_tmp
│ ├── pg_subtrans
│ ├── pg_tblspc
│ ├── pg_twophase
│ ├── pg_wal
│ │ └── archive_status
│ ├── pg_xact
│ └── pipeline
│ └── zmq
├── backup.info
└── backup.info.copy
41 directories, 18 files
sudo -u postgres pgbackrest --stanza=demo --type=diff \
--log-level-console=info backup
[filtered 7 lines of output]
P00 INFO: check archive for segment(s) 000000010000000000000004:000000010000000000000005
P00 INFO: new backup label = 20211231-195532F_20211231-195538D
P00 INFO: diff backup size = 8.3KB, file total = 976
P00 INFO: backup command end: completed successfully
P00 INFO: expire command begin 2.37: --exec-id=1357-bbff90c0 --log-level-console=info --log-level-stderr=off --no-log-timestamp --repo1-cipher-pass= --repo1-cipher-type=aes-256-cbc --repo1-path=/var/lib/pgbackrest --repo1-retention-full=2 --stanza=demo
select pg_current_wal_lsn(),pg_walfile_name(pg_current_wal_lsn()),pg_walfile_name_offset(pg_current_wal_lsn());
pg_current_wal_lsn | pg_walfile_name | pg_walfile_name_offset
--------------------+--------------------------+------------------------------------
1A0/831A56C0 | 00000003000001A000000083 | (00000003000001A000000083,1726144)
cd /home/ubuntu/data_gas/postgresql/11/main/pg_wal
ll -rt
-rw------- 1 postgres postgres 16777216 Feb 21 06:51 00000003000001A00000009F
drwx------ 22 postgres postgres 4096 Feb 21 10:14 ../
drwx------ 3 postgres postgres 53248 Feb 21 10:17 ./
-rw------- 1 postgres postgres 16777216 Feb 21 10:26 00000003000001A000000082
drwx------ 2 postgres postgres 65536 Feb 21 10:26 archive_status/
-rw------- 1 postgres postgres 16777216 Feb 21 10:26 00000003000001A000000083
root@k8s-master-163:~# sudo -u postgres pgbackrest info
stanza: demo
status: error (no valid backups)
cipher: none
db (current)
wal archive min/max (11): 0000000300000198000000B1/00000003000001A000000082
backup命令的debug等级的日志, 会打印completed successfully.
2022-02-21 15:46:58.700 P00 DEBUG: storage/storage::storageInfoList: => true
2022-02-21 15:46:58.700 P00 DEBUG: storage/storage::storageList: => {["20220221-150458F"]}
2022-02-21 15:46:58.701 P00 DEBUG: command/expire/expire::removeExpiredBackup: => void
2022-02-21 15:46:58.701 P00 DEBUG: command/expire/expire::removeExpiredArchive: (infoBackup: {InfoBackup}, timeBasedFullRetention: false, repoIdx: 0)
2022-02-21 15:46:58.701 P00 DEBUG: command/expire/expire::removeExpiredArchive: => void
2022-02-21 15:46:58.701 P00 DEBUG: command/expire/expire::removeExpiredHistory: (infoBackup: {InfoBackup}, repoIdx: 0)
2022-02-21 15:46:58.701 P00 DEBUG: command/expire/expire::removeExpiredHistory: => void
2022-02-21 15:46:58.701 P00 DEBUG: command/expire/expire::cmdExpire: => void
2022-02-21 15:46:58.701 P00 DEBUG: common/exit::exitSafe: (result: 0, error: false, signalType: 0)
2022-02-21 15:46:58.701 P00 INFO: expire command end: completed successfully (116ms)
2022-02-21 15:46:58.701 P00 DEBUG: common/lock::lockRelease: (failOnNoLock: false)
2022-02-21 15:46:58.701 P00 DEBUG: common/lock::lockRelease: => true
2022-02-21 15:46:58.701 P00 DEBUG: common/exit::exitSafe: => 0
2022-02-21 15:46:58.701 P00 DEBUG: main::main: => 0
postgres@k8s-master-163:~$ pgbackrest info
stanza: demo
status: ok
cipher: none
db (current)
wal archive min/max (11): 00000003000001A000000096/00000003000001A00000009A
full backup: 20220221-134139F
timestamp start/stop: 2022-02-21 13:41:39 / 2022-02-21 14:10:02
wal start/stop: 00000003000001A000000099 / 00000003000001A00000009A
database size: 48.8GB, database backup size: 48.8GB
repo1: backup set size: 10.7GB, backup size: 10.7GB
pgbackrest --stanza=postgres --log-level-console=debug backup
pgbackrest@k8s-master-133:/data_gas$ pgbackrest info
stanza: postgres
status: ok
cipher: none
db (current)
wal archive min/max (11): 00000003000001A00000009C/00000003000001A0000000B7
full backup: 20220221-150458F
timestamp start/stop: 2022-02-21 15:04:58 / 2022-02-21 15:46:56
wal start/stop: 00000003000001A0000000AD / 00000003000001A0000000AF
database size: 46.9GB, database backup size: 46.9GB
repo1: backup set size: 10.5GB, backup size: 10.5GB
diff backup: 20220221-150458F_20220221-162757D
timestamp start/stop: 2022-02-21 16:27:57 / 2022-02-21 16:34:27
wal start/stop: 00000003000001A0000000B7 / 00000003000001A0000000B7
database size: 46.9GB, database backup size: 7.4GB
repo1: backup set size: 10.5GB, backup size: 1.6GB
backup reference list: 20220221-150458F
全备与增量备份可以使用 cron 等实用程序安排备份。
在以下示例中,配置了两个 cron 作业以运行;完整备份计划在每周日上午 6:30 进行,差异备份计划在周一至周六上午 6:30 进行。如果这个 crontab 是在周中第一次安装,那么 pgBackRest 将在第一次执行差异作业时运行完整备份,然后在第二天执行差异备份。
#m h dom mon dow command
30 06 * * 0 pgbackrest --type=full --stanza=demo backup
30 06 * * 1-6 pgbackrest --type=diff --stanza=demo backup
计划备份后,配置保留很重要,以便备份定期过期,请参阅保留。
root@k8s-master-163:/home/ubuntu/data_gas/postgresql/11/main# sudo -u postgres pgbackrest info
stanza: demo
status: error (no valid backups, backup/expire running)
cipher: none
db (current)
wal archive min/max (11): 0000000300000193000000DA/0000000300000193000000DD
使用–output=json。JSON 输出包含比文本输出更多的信息
wal archive min/max 显示当前存储在存档中的最小和最大 WAL
备份从最旧到最新显示。最旧的备份将始终是完整备份(由标签末尾的F指示),但最新备份可以是完整备份、差异备份(以D结尾)或增量备份(以I结尾)。
“ timestamp start/stop ”定义了备份运行的时间段。’ timestamp stop ’ 可用于确定执行时间点恢复时要使用的备份。有关时间点恢复的更多信息,请参见时间点恢复部分。
’ wal start/stop ’ 定义了在恢复时使数据库保持一致所需的 WAL 范围。备份命令将确保此 WAL 范围在完成之前位于存档中。
“database size”是数据库的完整未压缩大小,而“数据库备份大小”是数据库中实际备份的数据量(对于完整备份,这些数据量相同)。
’ repo ’ 指示此备份驻留在哪个存储库中。‘备份集大小’ 包括此备份中的所有文件以及存储库中从此备份还原数据库所需的任何引用备份,而 ’ backup size ’ 仅包括此备份中的文件(对于完整备份,这些文件也是相同的)。如果在pgBackRest或文件系统中启用了压缩,存储库大小反映了压缩文件的大小。
'backup reference list’包含恢复此备份所需的其他备份
要恢复PostgreSQL集群的备份,请使用restore命令运行pgBackRest 。需要停止集群(在这种情况下它已经停止)并且必须从PostgreSQL数据目录中删除所有文件。
sudo -u postgres find /home/ubuntu/data_gas/postgresql/11/main -mindepth 1 -delete
sudo -u postgres pgbackrest --stanza=demo restore
sudo pg_ctlcluster 12 demo start
PostgreSQL COPY命令允许 将pgBackRest信息加载到表中。以下示例将该逻辑包装在可用于执行实时查询的函数中。
sudo -u postgres cat /build/pgbackrest-release-2.37/doc/example/pgsql-pgbackrest-info.sql
sudo -u postgres psql -f /build/pgbackrest-release-2.37/doc/example/pgsql-pgbackrest-info.sql
现在可以使用monitor.pgbackrest_info()函数来确定最后一次成功的备份时间和一个节的归档 WAL。
root@k8s-master-163:/build/pgbackrest-release-2.37/doc/example# sudo -u postgres psql -f /build/pgbackrest-release-2.37/doc/example/pgsql-pgbackrest-query.sql
name | last_successful_backup | last_archived_wal
--------+------------------------+--------------------------
"demo" | | 0000000300000193000000E1
(1 row)
通常最好保留尽可能多的备份,以便为时间点恢复提供更大的窗口,但也必须考虑磁盘空间等实际问题。保留选项会在不再需要旧备份时将其删除。
pgBackRest根据保留类型(可以是计数或时间段)执行完整备份轮换。当指定计数时,过期与创建备份的时间无关,而是必须保留多少。差异备份和增量备份是基于计数的,但是当它们所依赖的备份过期时,它们总是会过期。有关详细信息和示例,请参阅完整备份保留和差异备份保留部分。默认情况下,未过期的备份会保留已归档 WAL,但是,尽管不推荐,但可以使用保留归档选项为每个存储库修改此计划。有关详细信息和示例,请参阅存档保留部分。过期
_命令在每次成功备份后自动运行,也可以由用户运行。当由用户运行时,将按照每个已配置存储库的保留设置的定义发生过期。如果提供了–repo选项,则仅在指定的存储库上发生过期。用户也可以使用–set选项将过期时间限制为特定的备份集,除非指定了–repo选项,否则将搜索所有存储库,并且任何匹配设置条件的内容都将过期。应该注意的是,存档保留计划将在运行expire命令时检查并执行。
repo1 -retention-full-type确定选项repo1-retention-full的解释方式;作为要保留的完整备份的计数或保留完整备份的天数。新备份必须在到期前完成——这意味着如果repo1-retention-full-type=count和repo1-retention-full=2则在最旧的备份到期之前将存储三个完整备份,或者如果repo1-retention -full-type=time和repo1-retention-full=20则必须有一个完整备份至少 20 天才能到期。
pg-primary:/etc/pgbackrest/pgbackrest.conf **⇒** Configure repo1-retention-full
[demo]
pg1-path=/var/lib/postgresql/12/demo
[global]
repo1-cipher-pass=zWaf6XtpjIVZC5444yXB+cgFDFl7MxGlgkZSaoPvTGirhPygu4jOKOXf9LO4vjfO
repo1-cipher-type=aes-256-cbc
repo1-path=/var/lib/pgbackrest
repo1-retention-full=2
start-fast=y
[global:archive-push]
compress-level=9
Backup repo1-retention-full=2 but currently there is only one full backup so the next full backup to run will not expire any full backups.
pg-primary ⇒ Perform a full backup
sudo -u postgres pgbackrest --stanza=demo --type=full \
--log-level-console=detail backup
[filtered 985 lines of output]
P00 INFO: backup command end: completed successfully
P00 INFO: expire command begin 2.37: --exec-id=1625-0be930da --log-level-console=detail --log-level-stderr=off --no-log-timestamp --repo1-cipher-pass= --repo1-cipher-type=aes-256-cbc --repo1-path=/var/lib/pgbackrest --repo1-retention-full=2 --stanza=demo
P00 DETAIL: repo1: 12-1 archive retention on backup 20211231-195532F, start = 000000010000000000000002
P00 INFO: repo1: 12-1 remove archive, start = 000000010000000000000001, stop = 000000010000000000000001
P00 INFO: expire command end: completed successfully
Archive is expired because WAL segments were generated before the oldest backup. These are not useful for recovery — only WAL segments generated after a backup can be used to recover that backup.
pg-primary ⇒ Perform a full backup
sudo -u postgres pgbackrest --stanza=demo --type=full \
--log-level-console=info backup
[filtered 9 lines of output]
P00 INFO: backup command end: completed successfully
P00 INFO: expire command begin 2.37: --exec-id=1652-b7ac48aa --log-level-console=info --log-level-stderr=off --no-log-timestamp --repo1-cipher-pass= --repo1-cipher-type=aes-256-cbc --repo1-path=/var/lib/pgbackrest --repo1-retention-full=2 --stanza=demo
P00 INFO: repo1: expire full backup set 20211231-195532F, 20211231-195532F_20211231-195538D
P00 INFO: repo1: remove expired backup 20211231-195532F_20211231-195538D
P00 INFO: repo1: remove expired backup 20211231-195532F
P00 INFO: repo1: 12-1 remove archive, start = 0000000100000000, stop = 000000020000000000000006
P00 INFO: expire command end: completed successfully
The 20211231-195532F full backup is expired and archive retention is based on the 20211231-195604F which is now the oldest full backup.
将repo1-retention-diff设置为所需的差异备份数。差异仅依赖于先前的完整备份,因此可以为最后一天或更长时间创建一组滚动差异。这允许快速恢复到最近的时间点,但减少了整体空间消耗。
pg-primary : /etc/pgbackrest/pgbackrest.conf **⇒**配置repo1-retention-diff
[demo]
pg1-path=/var/lib/postgresql/12/demo
[global]
repo1-cipher-pass=zWaf6XtpjIVZC5444yXB+cgFDFl7MxGlgkZSaoPvTGirhPygu4jOKOXf9LO4vjfO
repo1-cipher-type=aes-256-cbc
repo1-path=/var/lib/pgbackrest
repo1-retention-diff=2
repo1-retention-full=2
start-fast=y
[global:archive-push]
compress-level=9
备份repo1-retention-diff=1因此需要在一个过期之前执行两个差异。添加增量备份以演示增量过期。增量备份不能单独过期——它们总是与相关的完整备份或差异备份一起过期。
pg-primary ⇒执行差异和增量备份
sudo -u postgres pgbackrest --stanza=demo --type=diff backup
sudo -u postgres pgbackrest --stanza=demo --type=incr backup
现在执行差异备份将使之前的差异备份和增量备份失效,只留下一个差异备份。
sudo -u postgres pgbackrest --stanza=demo --type=diff \
--log-level-console=info backup
[filtered 10 lines of output]
P00 INFO: backup command end: completed successfully
P00 INFO: expire command begin 2.37: --exec-id=1729-c585e461 --log-level-console=info --log-level-stderr=off --no-log-timestamp --repo1-cipher-pass= --repo1-cipher-type=aes-256-cbc --repo1-path=/var/lib/pgbackrest --repo1-retention-diff=1 --repo1-retention-full=2 --stanza=demo
P00 INFO: repo1: expire diff backup set 20211231-195609F_20211231-195615D, 20211231-195609F_20211231-195619I
P00 INFO: repo1: remove expired backup 20211231-195609F_20211231-195619I
P00 INFO: repo1: remove expired backup 20211231-195609F_20211231-195615D
P00 INFO: expire command end: completed successfully
尽管pgBackRest会在备份过期时自动删除归档的 WAL 段(基于repo1-retention-full选项的完整备份默认使 WAL 过期),但更积极地使归档过期以节省磁盘空间可能很有用。请注意,出于差异存档保留的目的,完整备份被视为差异备份。
过期存档永远不会删除使备份一致所需的 WAL 段。但是,由于时间点恢复 (PITR) 仅适用于连续的 WAL 流,因此在正常备份过期过程之外主动过期存档时应小心。要确定什么会过期而不实际过期,可以在命令行中使用expire命令提供空运行选项。
pg-primary : /etc/pgbackrest/pgbackrest.conf ⇒配置repo1-retention-diff
[demo]
pg1-path=/var/lib/postgresql/12/demo
[global]
repo1-cipher-pass=zWaf6XtpjIVZC5444yXB+cgFDFl7MxGlgkZSaoPvTGirhPygu4jOKOXf9LO4vjfO
repo1-cipher-type=aes-256-cbc
repo1-path=/var/lib/pgbackrest
repo1-retention-diff=2
repo1-retention-full=2
start-fast=y
[global:archive-push]
compress-level=9
pg-primary ⇒执行差异备份
sudo -u postgres pgbackrest --stanza=demo --type=diff \
--log-level-console=info backup
[filtered 6 lines of output]
P00 INFO: backup stop archive = 000000020000000000000013, lsn = 0/13000050
P00 INFO: check archive for segment(s) 000000020000000000000012:000000020000000000000013
P00 INFO: new backup label = 20211231-195609F_20211231-195627D
P00 INFO: diff backup size = 8.3KB, file total = 976
P00 INFO: backup command end: completed successfully
[filtered 2 lines of output]
pg-primary ⇒过期存档
sudo -u postgres pgbackrest --stanza=demo --log-level-console=detail \
--repo1-retention-archive-type=diff --repo1-retention-archive=1 expire
P00 INFO: expire command begin 2.37: --exec-id=1812-3bf5f31c --log-level-console=detail --log-level-stderr=off --no-log-timestamp --repo1-cipher-pass= --repo1-cipher-type=aes-256-cbc --repo1-path=/var/lib/pgbackrest --repo1-retention-archive=1 --repo1-retention-archive-type=diff --repo1-retention-diff=2 --repo1-retention-full=2 --stanza=demo
P00 DETAIL: repo1: 12-1 archive retention on backup 20211231-195604F, start = 000000020000000000000007, stop = 000000020000000000000007
P00 DETAIL: repo1: 12-1 archive retention on backup 20211231-195609F, start = 000000020000000000000008, stop = 000000020000000000000009
P00 DETAIL: repo1: 12-1 archive retention on backup 20211231-195609F_20211231-195621D, start = 00000002000000000000000E, stop = 00000002000000000000000F
P00 DETAIL: repo1: 12-1 archive retention on backup 20211231-195609F_20211231-195627D, start = 000000020000000000000012
P00 INFO: repo1: 12-1 remove archive, start = 00000002000000000000000A, stop = 00000002000000000000000D
P00 INFO: repo1: 12-1 remove archive, start = 000000020000000000000010, stop = 000000020000000000000011
P00 INFO: expire command end: completed successfully
20211231-195609F_20211231-195621D差异备份具有存档的 WAL 段,必须保留 这些段以使旧备份保持一致,即使它们无法通过 PITR 进一步播放。删除在20211231-195609F_20211231-195621D之后但在20211231-195609F_20211231-195627D之前生成的 WAL 段。新备份20211231-195609F_20211231-195627D后生成的 WAL 段保留并可用于 PITR。
由于出于差异存档保留的目的,完整备份被视为差异备份,因此如果现在使用相同的设置执行完整备份,则仅为 PITR 保留该完整备份的存档。
restore 命令自动默认为从存在备份的第一个存储库中选择最新的备份(请参阅快速入门 - 恢复备份)。检查存储库的顺序由pgbackrest.conf规定(例如,repo1 将在 repo2 之前检查)。要从特定存储库中进行选择,可以传递–repo选项(例如–repo=1)。如果需要不是最新的备份,则可以传递–set选项。
如果指定了–type=time的 PITR ,则必须使用–target选项指定目标时间。
如果未通过–set指定备份选项,然后将按顺序检查配置的存储库以获取包含请求时间的备份。如果找不到备份,将使用第一个包含备份的存储库中的最新备份。
对于其他类型的 PITR,例如xid,如果目标在最新备份之前,则必须提供–set选项。有关更多详细信息和示例,请参阅时间点恢复。
以下部分介绍了其他恢复命令功能。
如果还原以非 root 用户身份运行(典型场景),则所有还原的文件都将属于执行pgBackRest的用户/组。如果现有文件不属于执行用户/组,则如果无法将所有权更新到执行用户/组,则会导致错误。在这种情况下,需要由特权用户更新文件所有权,然后才能重试还原。
如果以root用户身份运行还原,则pgBackRest将尝试重新创建在进行备份时记录在清单中的所有权。清单中仅存储用户/组名称,因此还原主机上必须存在相同的名称才能正常工作。如果在本地找不到用户/组名称,则将使用PostgreSQL数据目录的用户/组,如果数据目录用户/组无法映射到名称 ,则最后使用root 。
在快速启动中恢复备份需要在执行恢复之前清理数据库集群目录。delta选项允许pgBackRest自动确定可以保留数据库集群目录中的哪些文件以及需要从备份中恢复哪些文件——它还删除了备份清单中不存在的文件,因此它将处理不同的更改。这是通过为数据库集群目录中的每个文件计算SHA-1加密哈希来实现的。如果SHA-1hash 与备份中存储的 hash 不匹配,则该文件将被恢复。当与process-max选项结合使用时,此操作非常有效。由于PostgreSQL服务器在还原期间关闭,因此可以使用比在PostgreSQL服务器运行时备份期间可能需要的更多数量的进程。
sudo pg_ctlcluster 12 demo stop
sudo -u postgres pgbackrest --stanza=demo --delta \
--log-level-console=detail restore
[filtered 2 lines of output]
P00 DETAIL: check '/var/lib/postgresql/12/demo' exists
P00 DETAIL: remove 'global/pg_control' so cluster will not start if restore does not complete
P00 INFO: remove invalid files/links/paths from '/var/lib/postgresql/12/demo'
P00 DETAIL: remove invalid file '/var/lib/postgresql/12/demo/backup_label.old'
P00 DETAIL: remove invalid file '/var/lib/postgresql/12/demo/base/1/pg_internal.init'
[filtered 816 lines of output]
P01 DETAIL: restore file /var/lib/postgresql/12/demo/base/13397/PG_VERSION - exists and matches backup (3B, 99%) checksum ad552e6dc057d1d825bf49df79d6b98eba846ebe
P01 DETAIL: restore file /var/lib/postgresql/12/demo/base/1/PG_VERSION - exists and matches backup (3B, 99%) checksum ad552e6dc057d1d825bf49df79d6b98eba846ebe
P01 DETAIL: restore file /var/lib/postgresql/12/demo/PG_VERSION - exists and matches backup (3B, 100%) checksum ad552e6dc057d1d825bf49df79d6b98eba846ebe
P01 DETAIL: restore file /var/lib/postgresql/12/demo/global/6100 - exists and is zero size (0B, 100%)
P01 DETAIL: restore file /var/lib/postgresql/12/demo/global/6000 - exists and is zero size (0B, 100%)
[filtered 202 lines of output]
sudo pg_ctlcluster 12 demo start
在某些情况下,可能需要有选择地从集群备份中恢复特定数据库。这可以出于性能原因或将选定的数据库移动到没有足够空间来恢复整个集群备份的机器上。
sudo -u postgres pgbackrest --stanza=demo \
--set=20211231-195609F_20211231-195642I info
[filtered 11 lines of output]
repo1: backup set size: 4.7MB, backup size: 1.9MB
backup reference list: 20211231-195609F, 20211231-195609F_20211231-195627D
database list: postgres (13398), test1 (24576), test2 (24577)
--db-include可指定恢复哪个数据库sudo pg_ctlcluster 12 demo stop
sudo -u postgres pgbackrest --stanza=demo --delta \
--db-include=test2 --type=immediate --target-action=promote restore
sudo pg_ctlcluster 12 demo start
之后再执行drop 未被恢复的database即可
-- 先mock insert数据, 并手动把数据落盘到wal里
deepface_data=# create table important_table (message text);
CREATE TABLE
deepface_data=# insert into important_table values ('Important Data');
INSERT 0 1
deepface_data=# commit;
WARNING: there is no transaction in progress
COMMIT
deepface_data=# select * from important_table;
message
----------------
Important Data
(1 row)
deepface_data=# select pg_switch_wal();
pg_switch_wal
---------------
1A0/BE5733B8
(1 row)
-- 记录需要PITR的时间点
postgres@k8s-master-163:/home/ubuntu/data_gas/postgresql/11/main$ psql -Atc "select current_timestamp"
2022-02-21 20:22:10.671781+08
-- 再mock delete数据, 并手动把数据落盘到wal里(可选)
deepface_data=# drop table important_table;
deepface_data=# select * from important_table;
ERROR: relation "important_table" does not exist
LINE 1: select * from important_table;
deepface_data=# select pg_switch_wal();
pg_switch_wal
---------------
1A0/BF22FD98
(1 row)
-- 最后停pg
pg_ctl stop -D /home/ubuntu/data_gas/postgresql/11/main
20220221-150458F_20220221-172726Dpostgres@k8s-master-163:/home/ubuntu/data_gas/postgresql/11/main_bak/pg_log$ pgbackrest info
stanza: postgres
status: ok
cipher: none
db (current)
wal archive min/max (11): 00000003000001A00000009C/00000004000001A0000000C3
full backup: 20220221-150458F
timestamp start/stop: 2022-02-21 15:04:58 / 2022-02-21 15:46:56
wal start/stop: 00000003000001A0000000AD / 00000003000001A0000000AF
database size: 46.9GB, database backup size: 46.9GB
repo1: backup set size: 10.5GB, backup size: 10.5GB
diff backup: 20220221-150458F_20220221-162757D
timestamp start/stop: 2022-02-21 16:27:57 / 2022-02-21 16:34:27
wal start/stop: 00000003000001A0000000B7 / 00000003000001A0000000B7
database size: 46.9GB, database backup size: 7.4GB
repo1: backup set size: 10.5GB, backup size: 1.6GB
backup reference list: 20220221-150458F
diff backup: 20220221-150458F_20220221-172726D
timestamp start/stop: 2022-02-21 17:27:26 / 2022-02-21 17:34:32
wal start/stop: 00000003000001A0000000BB / 00000003000001A0000000BB
database size: 46.9GB, database backup size: 8.7GB
repo1: backup set size: 10.5GB, backup size: 1.9GB
backup reference list: 20220221-150458F
-- 然后开始备份
pgbackrest --stanza=postgres --delta \
--set=20220221-150458F_20220221-172726D \
--type=time "--target=2022-02-21 20:22:10.671781+08" \
--target-action=promote restore
/var/log/pgbackrest/postgres-restore.log里看到如下日志, 这期间因为主库pg已被stop所以并不会有``pg_log目录`的日志产生-------------------PROCESS START-------------------
2022-02-21 20:34:09.593 P00 INFO: restore command begin 2.37: --delta --exec-id=4021-a4c337f9 --log-level-file=detail --pg1-path=/home/ubuntu/data_gas/postgresql/11/main --repo1-host=Pgbackrest1 --set=20220221-150458F_20220221-172726D --stanza=postgres --target="2022-02-21 20:22:10.671781+08" --target-action=promote --type=time
2022-02-21 20:34:10.268 P00 INFO: repo1: restore backup set 20220221-150458F_20220221-172726D, recovery will start at 2022-02-21 17:27:26
2022-02-21 20:34:10.269 P00 DETAIL: check '/home/ubuntu/data_gas/postgresql/11/main' exists
2022-02-21 20:34:10.269 P00 DETAIL: remove 'global/pg_control' so cluster will not start if restore does not complete
2022-02-21 20:34:10.288 P00 INFO: remove invalid files/links/paths from '/home/ubuntu/data_gas/postgresql/11/main'
2022-02-21 20:34:10.290 P00 DETAIL: remove invalid file '/home/ubuntu/data_gas/postgresql/11/main/backup_label.old'
2022-02-21 20:34:10.350 P00 DETAIL: remove invalid file '/home/ubuntu/data_gas/postgresql/11/main/base/13053/pg_internal.init'
-- 漫长地: 把主库, 和备库, 比较文件的checksum: 若文件已存在则跳过, 反之则scp过来并解压
2022-02-21 20:34:10.796 P00 DETAIL: remove invalid file '/home/ubuntu/data_gas/postgresql/11/main/base/16384/3494052'
2022-02-21 20:34:13.051 P01 DETAIL: restore file /home/ubuntu/data_gas/postgresql/11/main/base/16384/512017.1 - exists and matches backup (1GB, 2%) checksum ef2e9483c4c11a53d25428d4bb0f9bf51a8dc9a1
2022-02-21 20:34:18.880 P01 DETAIL: restore file /home/ubuntu/data_gas/postgresql/11/main/base/16384/512009 - exists and matches backup (1GB, 8%) checksum 9e8cdd58580a69cb4c0f31cb5cfe60058f914a1f
2022-02-21 20:34:20.828 P01 DETAIL: restore file /home/ubuntu/data_gas/postgresql/11/main/base/16384/512001 - exists and matches backup (1GB, 10%) checksum 27fbe592661e7c24e3c611a66fa34aacebe785dd
2022-02-21 20:39:57.077 P01 DETAIL: restore file /home/ubuntu/data_gas/postgresql/11/main/base/16384/23097 - exists and matches backup (920KB, 98%) checksum fe382a7243896112f88ba354d049477212326878
2022-02-21 20:39:58.614 P01 DETAIL: restore file /home/ubuntu/data_gas/postgresql/11/main/base/16384/655658 - exists and matches backup (512KB, 99%) checksum 38b57a6c92df04afec5f991648385646a322d64c
2022-02-21 20:40:03.784 P01 DETAIL: restore file /home/ubuntu/data_gas/postgresql/11/main/base/16384/881355 - exists and matches backup (96KB, 99%) checksum f4c6540168f231b17ab020e7020de0adb9a50656
2022-02-21 20:40:12.147 P01 DETAIL: restore file /home/ubuntu/data_gas/postgresql/11/main/base/16384/1723579_fsm - exists and matches backup (24KB, 99%) checksum 0c77e1e9be7b6ce5f67d0a1cc4654e7c360e7a71
2022-02-21 20:40:40.364 P01 DETAIL: restore file /home/ubuntu/data_gas/postgresql/11/main/base/13052/12890 - exists and is zero size (0B, 100%)
2022-02-21 20:40:40.365 P01 DETAIL: restore file /home/ubuntu/data_gas/postgresql/11/main/base/1/826_vm - exists and is zero size (0B, 100%)
-- 移除无用的文件
2022-02-21 20:40:40.596 P00 INFO: write /home/ubuntu/data_gas/postgresql/11/main/recovery.conf
2022-02-21 20:40:40.626 P00 DETAIL: sync path '/home/ubuntu/data_gas/postgresql/11/main'
2022-02-21 20:40:40.642 P00 DETAIL: sync path '/home/ubuntu/data_gas/postgresql/11/main/base'
2022-02-21 20:40:40.642 P00 DETAIL: sync path '/home/ubuntu/data_gas/postgresql/11/main/base/1'
2022-02-21 20:40:40.642 P00 DETAIL: sync path '/home/ubuntu/data_gas/postgresql/11/main/base/13052'
-- 然后就成功了
2022-02-21 20:40:40.643 P00 INFO: restore global/pg_control (performed last to ensure aborted restores cannot be started)
2022-02-21 20:40:40.643 P00 DETAIL: sync path '/home/ubuntu/data_gas/postgresql/11/main/global'
2022-02-21 20:40:40.659 P00 INFO: restore size = 46.9GB, file total = 12000
2022-02-21 20:40:40.663 P00 INFO: restore command end: completed successfully (391071ms)
restore_command = 'pgbackrest --stanza=postgres archive-get %f "%p"'
recovery_target_time = '2022-02-21 20:22:10.671781+08'
recovery_target_action = 'promote'
-- 先在PGDATA目录里创建一个postmaster.opts文件, 只有如下一行
/usr/lib/postgresql/11/bin/postgres "-D" "/home/ubuntu/data_gas/postgresql/11/main"
-- 然后启动主库
postgres@k8s-master-163:/home/ubuntu/data_gas/postgresql/11/main$ pg_ctl restart -D /home/ubuntu/data_gas/postgresql/11/main
pg_ctl: PID file "/home/ubuntu/data_gas/postgresql/11/main/postmaster.pid" does not exist
Is server running?
trying to start server anyway
waiting for server to start....LOG: listening on IPv4 address "0.0.0.0", port 5432
LOG: listening on IPv6 address "::", port 5432
LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432"
LOG: redirecting log output to logging collector process
HINT: Future log output will appear in directory "pg_log".
.... done
server started
-- 启动后, 主库的pg_log如下, 此时是只读状态
LOG: database system was interrupted; last known up at 2022-02-21 17:32:34 CST
FATAL: the database system is starting up
FATAL: the database system is starting up
LOG: starting point-in-time recovery to 2022-02-21 20:22:10.671781+08
LOG: restored log file "00000003000001A0000000BB" from archive
LOG: redo starts at 1A0/BB000028
LOG: consistent recovery state reached at 1A0/BB5919C8
LOG: database system is ready to accept read only connections
LOG: restored log file "00000003000001A0000000BC" from archive
LOG: starting maintenance daemon on database 16384 user 10
CONTEXT: Citus maintenance daemon for database 16384 user 10
200648 LOG: restored log file "00000003000001A0000000BD" from archive
200649 LOG: redo done at 1A0/BD2810F0
200650 LOG: last completed transaction was at log time 2022-02-21 17:42:30.690344+08
200651 LOG: restored log file "00000003000001A0000000BD" from archive
200652 LOG: restored log file "00000004.history" from archive
200653 LOG: selected new timeline ID: 5
200654 LOG: archive recovery complete
200655 LOG: database system is ready to accept connections
假设两台服务器,分别命名为Pg1和Pgbackrest1,其中:
Pg1:数据库服务器
Pgbackrest1:远端仓库服务器
在Pgbackrest1上创建用户,并设置密码。
useradd pgbackrest
passwd pgbackrest
useradd postgres
passwd postgres
vim /etc/hosts
192.168.2.133 Pgbackrest1
192.168.2.163 Pg1
vim /etc/ssh/sshd_config
PubkeyAuthentication yes
StrictHostKeyChecking no
mkdir -p -m 750 /home/pgbackrest/.ssh
chown -R pgbackrest.pgbackrest /home/pgbackrest/
su - pgbackrest
ssh-keygen -f /home/pgbackrest/.ssh/id_rsa -t rsa -b 4096 -N ""
su - postgres
mkdir -m 750 -p /home/postgres/.ssh
ssh-keygen -f /home/postgres/.ssh/id_rsa -t rsa -b 4096 -N ""
cd ~/.ssh
ssh-copy-id -i ./id_rsa.pub postgres@Pg1
cd ~/.ssh
ssh-copy-id -i ./id_rsa.pub pgbackrest@Pgbackrest1

repox-host必须放在[global]里, 不能放在[demo]里.repox-host-user是pgbackrest, 可按需更改才能让主备网络能通: 例如我就是用主的postgres用户和备的postgres用户建立的ssh免密, 所以我在此处变设置为postgres/etc/pgbackrest里的pgbackrest.conf和conf.d, 其中conf.d中还是放若干.conf文件, 会被组合起来.在Pgbackrest1上修改文件执行权限,(如何编译pgbackrest参见单机使用部分)。
chmod 755 /usr/bin/pgbackrest
chown postgres:postgres /usr/bin/pgbackrest
在Pgbackrest1上创建配置文件和路径。
mkdir -p -m 770 /var/log/pgbackrest
chown pgbackrest:pgbackrest /var/log/pgbackrest
mkdir -p /etc/pgbackrest
mkdir -p /etc/pgbackrest/conf.d
touch /etc/pgbackrest/pgbackrest.conf
chmod 640 /etc/pgbackrest/pgbackrest.conf
chown pgbackrest:pgbackrest /etc/pgbackrest/pgbackrest.conf
chown -R pgbackrest:pgbackrest /etc/pgbackrest/
mkdir -p /var/lib/pgbackrest
chmod 750 /var/lib/pgbackrest
chown pgbackrest:pgbackrest /var/lib/pgbackrest
vim /etc/pgbackrest/pgbackrest.conf
内容如下:
[postgres]
pg1-host=Pg1
pg1-path=/home/ubuntu/data_gas/postgresql/11/main
pg1-host-user=postgres
pg1-user=postgres
[global]
repo1-path=/data_gas/pgbackrest_fake_backups
repo1-retention-full=2
start-fast=y
chmod 755 /usr/bin/pgbackrest
mkdir -p -m 770 /var/log/pgbackrest
chown postgres:postgres /var/log/pgbackrest
mkdir -p /etc/pgbackrest
mkdir -p /etc/pgbackrest/conf.d
touch /etc/pgbackrest/pgbackrest.conf
chmod 640 /etc/pgbackrest/pgbackrest.conf
chown postgres:postgres /etc/pgbackrest/pgbackrest.conf
chown -R postgres:postgres /etc/pgbackrest/
vim /etc/pgbackrest/pgbackrest.conf
[postgres]
pg1-path=/home/ubuntu/data_gas/postgresql/11/main
pg1-user=postgres
[global]
log-level-file=detail
repo1-host=Pgbackrest1
su - postgres
cd ~
vim .pgpass
localhost:5432:postgres:postgres:Zstvgcs@9102
localhost:5432:deepface_data:postgres:Zstvgcs@9102
chmod 0600 .pgpass
vim ~/.bashrc
export PGPASSFILE=~/.pgpass
archive_command = 'pgbackrest --stanza=postgres archive-push %p'
archive_mode = on
listen_addresses = '*'
log_line_prefix = ''
max_wal_senders = 3
wal_level = replica
/var/lib/postgresql/.ssh下创建id_rsa私钥, 和id_rsa.pub公钥, 通过sshpass配合ssh-copy-id来把自己的公钥传递给对方(即将id_rsa.pub放入对方的authorized_keys中, 并在client第一次连接server时在client端创建known_host文件)-rw------- 1 postgres postgres 749 Feb 28 09:06 authorized_keys
-rw------- 1 postgres postgres 3.4K Feb 28 08:35 id_rsa
-rw-r--r-- 1 postgres postgres 749 Feb 28 08:35 id_rsa.pub
-rw------- 1 postgres postgres 806 Feb 28 09:06 known_hosts
-rw-r--r-- 1 postgres postgres 142 Feb 28 09:05 known_hosts.old
#!/bin/bash
RED="\033[31m"
YELLOW="\033[33m"
COLOR_END="\033[0m"
echo -e "${YELLOW}[开始] ${COLOR_END}"
if [ $UID != 0 ]; then
echo -e "${RED}请使用root安装!${COLOR_END}" && exit 500
fi
# postgres
echo -e "${YELLOW}开始创建postgres用户 ${COLOR_END}"
POSTGRES_HOME_PATH="/var/lib/postgresql"
mkdir -p -m 750 ${POSTGRES_HOME_PATH}/.ssh
chown -R postgres:postgres ${POSTGRES_HOME_PATH}/.ssh
if [ ! -f "${POSTGRES_HOME_PATH}/.ssh/id_rsa" ] || [ ! -f "${POSTGRES_HOME_PATH}/.ssh/id_rsa.pub" ]; then
echo -e "${YELLOW}开始创建新秘钥${COLOR_END}" && su - postgres -c "ssh-keygen -f ${POSTGRES_HOME_PATH}/.ssh/id_rsa -t rsa -b 4096 -N ''"
else
echo -e "${YELLOW}秘钥已存在, 一切正常${COLOR_END}"
fi
(echo "postgres" && echo "postgres") | sudo passwd postgres
echo -e "${YELLOW}[结束] ${COLOR_END}"
#!/bin/bash
# [主机打包] tar -Pzcvf bin/primary.tgz /etc/pgbackrest/ /tmp/pgbackrest/ /usr/bin/pgbackrest /usr/bin/sshpass /var/log/pgbackrest/
# [备机打包] tar -Pzcvf bin/standby.tgz /etc/pgbackrest/ /tmp/pgbackrest/ /usr/bin/pgbackrest /usr/bin/sshpass /var/spool/cron/crontabs/postgres /var/log/pgbackrest/
check_dependency_dir() {
FUNC="检查依赖目录"
echo -e "${YELLOW}[开始] ${FUNC} ${COLOR_END}"
tar zxvfP bin/"${NODE_TYPE}.tgz"
case ${NODE_TYPE} in
${NODE_TYPE_STANDBY})
[[ ! -d /backup ]] && echo -e "${RED}未挂载/backup目录, 请挂载或软链接到大容量磁盘(>=2TB), 防止磁盘被冷备写满!${COLOR_END}" && exit 500
mkdir -p ${STANDY_BACKUP_PATH}
chown -R postgres:postgres /backup
chmod 77
;;
esac
echo -e "${YELLOW}[结束] ${FUNC} ${COLOR_END}"
}
create_ssh_passwordless() {
FUNC="建立ssh免密登录"
echo -e "${YELLOW}[开始] ${FUNC} ${COLOR_END}"
PRIMARY_POSTGRES_HOME_PATH="/var/lib/postgresql"
case ${NODE_TYPE} in
${NODE_TYPE_PRIMARY})
echo -e "${YELLOW}开始拷贝公钥${COLOR_END}" && sshpass -p postgres ssh-copy-id -o StrictHostKeyChecking=no -f -i ${PRIMARY_POSTGRES_HOME_PATH}/.ssh/id_rsa.pub postgres@${STANDBY_IP}
echo -e "${YELLOW}开始建立第一次ssh连接${COLOR_END}" && su - postgres -c "ssh -o StrictHostKeyChecking=no postgres@${STANDBY_IP} hostname"
;;
${NODE_TYPE_STANDBY})
sshpass -p postgres ssh-copy-id -o StrictHostKeyChecking=no -f -i ${PRIMARY_POSTGRES_HOME_PATH}/.ssh/id_rsa.pub postgres@${PRIMARY_IP}
su - postgres -c "ssh -o StrictHostKeyChecking=no postgres@${PRIMARY_IP} hostname"
;;
esac
echo -e "${YELLOW}[结束] ${FUNC} ${COLOR_END}"
}
# 指定[主机][备机]pgbackrest的STANZA_NAME
change_pgbackrest_conf() {
FUNC="更改pgbackrest.conf配置并生效"
echo -e "${YELLOW}[开始] ${FUNC} ${COLOR_END}"
PGBACKREST_TEMPLATE_CONF_PATH="/etc/pgbackrest/${NODE_TYPE}.asgard.template"
PGBACKREST_CONF_PATH="/etc/pgbackrest/conf.d/${STANZA_NAME}.conf"
cp -p ${PGBACKREST_TEMPLATE_CONF_PATH} ${PGBACKREST_CONF_PATH}
sed -i -r "s#\[asgard(.*)\]#\[${STANZA_NAME}\]#g" ${PGBACKREST_CONF_PATH}
case ${NODE_TYPE} in
${NODE_TYPE_PRIMARY})
sed -i -r "s#pg1-path=(.*)#pg1-path=${PGDATA}#g" ${PGBACKREST_CONF_PATH}
sed -i -r "s#repo1-host=(.*)#repo1-host=${STANDBY_IP}#g" ${PGBACKREST_CONF_PATH}
;;
${NODE_TYPE_STANDBY})
sed -i -r "s#pg1-host=(.*)#pg1-host=${PRIMARY_IP}#g" ${PGBACKREST_CONF_PATH}
sed -i -r "s#pg1-path=(.*)#pg1-path=${PGDATA}#g" ${PGBACKREST_CONF_PATH}
;;
esac
echo -e "${YELLOW}[结束] ${FUNC} ${COLOR_END}"
}
change_cron_conf() {
FUNC="更改cron配置"
echo -e "${YELLOW}[开始] ${FUNC} ${COLOR_END}"
CRON_CONF_PATH="/var/spool/cron/crontabs/postgres"
case ${NODE_TYPE} in
${NODE_TYPE_STANDBY})
sed -i -r "s#stanza=(.*) #stanza=${STANZA_NAME} #g" ${CRON_CONF_PATH}
echo -e "${YELLOW}已配置cron任务如下: ${COLOR_END}" && su - postgres -c "crontab -l"
;;
esac
echo -e "${YELLOW}[结束] ${FUNC} ${COLOR_END}"
}
restart_primary_postgres_to_reload_conf() {
FUNC="更改postgresql.conf配置并生效"
echo -e "${YELLOW}[开始] ${FUNC} ${COLOR_END}"
case ${NODE_TYPE} in
${NODE_TYPE_PRIMARY})
sed -i -r "s#(.*)archive_command(.*)=(.*)#archive_command='pgbackrest --stanza=${STANZA_NAME} archive-push %p'#g" $PG_CONF_PATH
su - postgres -c "psql -c 'select pg_reload_conf()'"
;;
esac
echo -e "${YELLOW}[结束] ${FUNC} ${COLOR_END}"
}
create_pgbackrest_cmd_shell() {
FUNC="创建pgbackrest命令脚本"
echo -e "${YELLOW}[开始] ${FUNC} ${COLOR_END}"
case ${NODE_TYPE} in
${NODE_TYPE_STANDBY})
CMD_FILE="3create_stanza_${STANZA_NAME}.sh" && echo "su - postgres -c \"pgbackrest --stanza=${STANZA_NAME} --log-level-console=detail stanza-create\"" >${CMD_FILE} && chown postgres.postgres ${CMD_FILE} && chmod 777 ${CMD_FILE}
CMD_FILE="4check_${STANZA_NAME}.sh" && echo "su - postgres -c \"pgbackrest --stanza=${STANZA_NAME} --log-level-console=info check\"" >${CMD_FILE} && chown postgres.postgres ${CMD_FILE} && chmod 777 ${CMD_FILE}
CMD_FILE="5backup_${STANZA_NAME}.sh" && echo "su - postgres -c \"pgbackrest --stanza=${STANZA_NAME} --log-level-console=detail backup\"" >${CMD_FILE} && chown postgres.postgres ${CMD_FILE} && chmod 777 ${CMD_FILE}
# 在primary或standby测试创建stanza: pgbackrest --stanza=asgard.192.168.2.163.to.192.168.2.133 --log-level-console=info stanza-create.
# 在primary测试某wal的push: pgbackrest --stanza=asgard.192.168.2.163.to.192.168.2.133 archive-push pg_wal/00000006000001B8000000D1.
# 在primary或standby测试check: pgbackrest --stanza=asgard.192.168.2.163.to.192.168.2.133 check.
# 在standby测试backup: pgbackrest --stanza=asgard.192.168.2.163.to.192.168.2.133 --log-level-console=info --type=full --start-fast=y backup
;;
esac
echo -e "${YELLOW}[结束] ${FUNC} ${COLOR_END}"
}
RED="\033[31m"
GREEN="\033[32m"
YELLOW="\033[33m"
COLOR_END="\033[0m"
# 主函数: 输入配置
echo -e "${YELLOW}冷备安装开始${COLOR_END}"
if [ ${UID} != 0 ]; then
echo -e "${RED}请使用root安装!${COLOR_END}" && exit 500
fi
echo -en "${YELLOW}请填写[主机]IP: ${COLOR_END}" && read PRIMARY_IP
echo -en "${YELLOW}请填写[备机]IP: ${COLOR_END}" && read STANDBY_IP
if [ ${PRIMARY_IP} == ${STANDBY_IP} ]; then
echo -e "${RED}集群安装, 主备节点IP不能相同!${COLOR_END}" && exit 500
fi
echo -en "${YELLOW}当前在主机还是备机操作, 主机请填primary, 备机请填standby: ${COLOR_END}" && read NODE_TYPE
NODE_TYPE_PRIMARY='primary'
NODE_TYPE_STANDBY='standby'
echo -en "${YELLOW}请填写[主机]PGDATA路径${COLOR_END}, 如/home/ubuntu/data_gas/postgresql/11/main, 请在主机通过ps -ef | grep postgres查询: " && read PGDATA
case ${NODE_TYPE} in
${NODE_TYPE_PRIMARY})
echo -en "${YELLOW}请填写[主机]PG配置文件路径${COLOR_END}, 如/home/ubuntu/data_gas/postgresql/11/main/postgresql.conf, 请在主机通过ps -ef | grep postgres查询: " && read PG_CONF_PATH
echo -en "${GREEN}请确认: PRIMARY_IP: ${PRIMARY_IP}, STANDBY_IP: ${STANDBY_IP}, PGDATA: ${PGDATA}, PG_CONF_PATH: ${PG_CONF_PATH}, 当前正在操作的机器: ${NODE_TYPE}, 确认配置? [Y/n]${COLOR_END}" && read IS_CONF_OK
;;
${NODE_TYPE_STANDBY})
echo -en "${GREEN}请确认: PRIMARY_IP: ${PRIMARY_IP}, STANDBY_IP: ${STANDBY_IP}, PGDATA: ${PGDATA}, 当前正在操作的机器: ${NODE_TYPE}, 确认配置? [Y/n]${COLOR_END}" && read IS_CONF_OK
;;
*)
echo -e "${RED}无效的主备类型, 请填写primary或standby!${COLOR_END}" && exit 500
;;
esac
case $IS_CONF_OK in
[nN][oO] | [nN])
exit 0
;;
esac
STANDY_BACKUP_PATH=/backup/pgbackrest
STANZA_NAME="asgard.${PRIMARY_IP}.to.${STANDBY_IP}"
echo $STANZA_NAME
check_dependency_dir
create_ssh_passwordless
change_pgbackrest_conf
change_cron_conf
restart_primary_postgres_to_reload_conf
create_pgbackrest_cmd_shell
su - postgres -c "pgbackrest --stanza=asgard.192.168.2.113.to.192.168.2.112 --log-level-console=detail stanza-create"
su - postgres -c "pgbackrest --stanza=asgard.192.168.2.113.to.192.168.2.112 --log-level-console=info check"
su - postgres -c "pgbackrest --stanza=asgard.192.168.2.113.to.192.168.2.112 --log-level-console=detail backup"
/etc/pgbackrest/pgbackrest.conf中配置即可primary.asgard.template生成主机配置文件, 以standby.asgard.template生成备机配置文件# 目录结构
postgres@wolf-db-192:/etc/pgbackrest$ tree
.
|-- conf.d
| |-- asgard.192.168.2.112.to.192.168.2.113.conf
| |-- asgard.192.168.2.113.to.192.168.2.112.conf
| `-- global.conf
|-- primary.asgard.template
`-- standby.asgard.template
# 主模板
postgres@wolf-db-192:/etc/pgbackrest$ cat primary.asgard.template
[asgard]
pg1-path=/platformData/postgresql/11/main
repo1-host=192.168.2.standby
repo1-host-user=postgres
# 从模板
postgres@wolf-db-192:/etc/pgbackrest$ cat standby.asgard.template
[asgard]
pg1-host=192.168.2.priamary
pg1-path=/platformData/postgresql/11/main
repo1-path=/backup/pgbackrest
repo1-host-user=postgres
repo1-retention-full-type=count
repo1-retention-full=2
# 全局配置
postgres@wolf-db-192:/etc/pgbackrest/conf.d$ cat global.conf
[global]
log-level-console=detail
log-level-file=detail
buffer-size=16MiB
process-max=10
start-fast=y
compress-type=bz2
compress-level=9
archive-mode-check=n
# 主机生成的配置文件
postgres@wolf-db-192:/etc/pgbackrest/conf.d$ cat asgard.192.168.2.112.to.192.168.2.113.conf
[asgard.192.168.2.112.to.192.168.2.113]
pg1-path=/platformData/postgresql/11/main
repo1-host=192.168.2.113
repo1-host-user=postgres
# 备机生成的配置文件
postgres@wolf-db-192:/etc/pgbackrest/conf.d$ cat asgard.192.168.2.113.to.192.168.2.112.conf
[asgard.192.168.2.113.to.192.168.2.112]
pg1-host=192.168.2.113
pg1-path=/platformData/postgresql/11/main
repo1-path=/backup/pgbackrest
repo1-host-user=postgres
repo1-retention-full-type=count
repo1-retention-full=2
(base) root@k8s-master-133:/etc/pgbackrest/conf.d# pgbackrest info --repo-path=/backup/pgbackrest
stanza: asgard.192.168.2.163.to.192.168.2.133
status: error (no valid backups, backup/expire running)
cipher: none
db (current)
wal archive min/max (11): 00000006000001B8000000D4/00000006000001B8000000D6
(base) root@k8s-master-133:/etc/pgbackrest/conf.d# pgbackrest info --repo-path=/backup/pgbackrest
stanza: asgard.192.168.2.163.to.192.168.2.133
status: ok
cipher: none
db (current)
wal archive min/max (11): 00000006000001B8000000CE/00000006000001B8000000D1
full backup: 20220228-155432F
timestamp start/stop: 2022-02-28 15:54:32 / 2022-02-28 16:08:58
wal start/stop: 00000006000001B8000000D1 / 00000006000001B8000000D1
database size: 44.7GB, database backup size: 44.7GB
repo1: backup set size: 7.6GB, backup size: 7.6GB
# 2.112作为备机, 备份2.113后的效果
root@wolf-db-192.168.2.112:/home/ubuntu/cold-standby/latest# ./info.sh
stanza: asgard.192.168.2.113.to.192.168.2.112
status: ok
cipher: none
db (current)
wal archive min/max (11): 000000010000000000000002/000000010000000000000008
full backup: 20220301-163915F
timestamp start/stop: 2022-03-01 16:39:15 / 2022-03-01 16:39:21
wal start/stop: 000000010000000000000003 / 000000010000000000000004
database size: 48.7MB, database backup size: 48.7MB
repo1: backup set size: 3.6MB, backup size: 3.6MB
incr backup: 20220301-163915F_20220301-164012I
timestamp start/stop: 2022-03-01 16:40:12 / 2022-03-01 16:40:16
wal start/stop: 000000010000000000000006 / 000000010000000000000006
database size: 48.7MB, database backup size: 67.7KB
repo1: backup set size: 3.6MB, backup size: 3.0KB
backup reference list: 20220301-163915F
# 2.113作为备机, 备份2.112后的效果
root@wolf-db-192.168.2.113:/home/ubuntu/cold-standby/latest# ./info.sh
stanza: asgard.192.168.2.112.to.192.168.2.113
status: ok
cipher: none
db (current)
wal archive min/max (11): 000000010000000000000002/000000010000000000000006
full backup: 20220301-163656F
timestamp start/stop: 2022-03-01 16:36:56 / 2022-03-01 16:37:23
wal start/stop: 000000010000000000000003 / 000000010000000000000004
database size: 174.4MB, database backup size: 174.4MB
repo1: backup set size: 6.4MB, backup size: 6.4MB
incr backup: 20220301-163656F_20220301-163927I
timestamp start/stop: 2022-03-01 16:39:27 / 2022-03-01 16:39:53
wal start/stop: 000000010000000000000006 / 000000010000000000000006
database size: 174.4MB, database backup size: 116.6MB
repo1: backup set size: 6.4MB, backup size: 2.7MB
backup reference list: 20220301-163656F
pgbackrest@k8s-master-133:/data/data_gas/pgbackrest_fake_backups/backup/postgres/20220221-150458F/pg_data/base/16384$ ll
total 217732
drwxr-x--- 2 pgbackrest pgbackrest 4096 Feb 21 15:05 ./
drwxr-x--- 7 pgbackrest pgbackrest 4096 Feb 21 15:05 ../
-rw-r----- 1 pgbackrest pgbackrest 145347061 Feb 21 15:05 512017.1.gz
-rw-r----- 1 pgbackrest pgbackrest 77594624 Feb 21 15:05 512017.gz
tree
└── backup
└── postgres
├── 20220221-150458F
│ ├── backup.manifest.copy
│ └── pg_data
│ ├── base
│ │ ├── 1
│ │ ├── 13052
│ │ ├── 13053
│ │ ├── 16384
│ │ │ ├── 512009.1.gz
│ │ │ ├── 512009.gz
│ │ │ ├── 512017.1.gz
│ │ │ └── 512017.gz
│ │ └── pgsql_job_cache
│ ├── global
│ ├── pg_commit_ts
│ ├── pg_dynshmem
│ ├── pg_foreign_file
│ │ └── cached
│ ├── pg_log
│ ├── pg_logical
│ │ ├── mappings
│ │ └── snapshots
│ ├── pg_multixact
│ │ ├── members
│ │ └── offsets
│ ├── pg_notify
│ ├── pg_replslot
│ ├── pg_serial
│ ├── pg_snapshots
│ ├── pg_stat
│ ├── pg_stat_tmp
│ ├── pg_subtrans
│ ├── pg_tblspc
│ ├── pg_twophase
│ ├── pg_wal
│ │ └── archive_status
│ ├── pg_xact
│ └── pipeline
│ └── zmq
├── backup.info
└── backup.info.copy
psql -Upostgres -dpostgres
create table test(id int);
deepface_data=# insert into yctest123 values (1);
INSERT 0 1
deepface_data=# insert into yctest123 values (2);
INSERT 0 1
deepface_data=# insert into yctest123 values (3);
INSERT 0 1
deepface_data=# insert into yctest123 values (4);
INSERT 0 1
deepface_data=# insert into yctest123 values (5);
INSERT 0 1
pgbackrest --stanza=postgres --log-level-console=debug backup
在Pg1上运行pgbackrest还原,需要先停止PostgreSQL,然后执行还原操作。
pgbackrest --stanza=postgres --log-level-console=debug --delta restore
--db-include参数指定库名称备份: 不同机器, 配置hosts和ssh, 写脚本, 每周日执行全备(full), 每周一到六执行差异备份(diff), 设置最多保留x个全备和y个差备
恢复: 用PITR, 指定target-time, 其内部会用最近一次全备和最近一次差备来做.
citus集群: citus是4个机器, 需要配置4个备机, pgbackrest的目录, 这样比较好管理
– compress-type=bz2 压缩比最高
– compress-level=9
– buffer-size=10MiB 会有更大的内存用于压缩
– log-level-console=trace 最详细的日志
– process-max=10 默认为1, 可以调大1-999
– repo-host-user=postgres, 因为默认是pgbackrest, 所以会有很多奇怪的现象, 可以设置为此+主备都用postgres试试, 因为if postgreSQL runs on the repository host the postgres user can be placed in the pgbackrest group so it has read permissions on the repository without being able to damage the contents accidentally.
– archive-timeout=60默认是60s, 是archive-push的超时时间, 可以设置为0.1-86400
1create_ssh_key.sh(创建私钥)2install.sh(创建ssh免密, 配置pgbackrest和postgres), 例如三台机器互为备份,希望A是B的primary, B是C的primary, C是A的primary,具体执行顺序如下:
3create_stanza.sh, 4check.sh, 5backup.sh. 此时由操作者手动执行来创建冷备仓库, 校验, 和开始全备, 之后会有linux cron调度每晚做差备+每周做全备info.sh查看备份情况stanza: myname.192.168.2.110.to.192.168.2.112
status: ok
cipher: none
db (current)
wal archive min/max (11): 000000010000000000000005/00000001000000000000000C
full backup: 20220302-162118F
timestamp start/stop: 2022-03-02 16:21:18 / 2022-03-02 16:21:28
wal start/stop: 00000001000000000000000C / 00000001000000000000000C
database size: 93.4MB, database backup size: 93.4MB
repo1: backup set size: 7.5MB, backup size: 7.5MB
root@db-192.168.2.110:/home/ubuntu/cold-standby/cold-standby-1.0.0# ./info.sh
stanza: myname.192.168.2.113.to.192.168.2.110
status: ok
cipher: none
db (current)
wal archive min/max (11): 000000010000000000000009/0000000100000003000000DB
full backup: 20220302-181033F
timestamp start/stop: 2022-03-02 18:10:33 / 2022-03-02 18:18:26
wal start/stop: 0000000100000003000000DB / 0000000100000003000000DB
database size: 9.9GB, database backup size: 9.9GB
repo1: backup set size: 2.4GB, backup size: 2.4GB
select * from faces limit 1;
mydb=# select ts from faces order by ts desc limit 3;
ts
---------------
1656863999095
1656863998274
1656863998274
(3 rows)
mydb=# select ts from faces_117594 order by ts desc limit 3;
ts
---------------
1656863998274
1656863764624
1656863760464
(3 rows)
info.sh查看备份inforoot@db-192.168.2.112:/home/ubuntu/cold-standby/latest# ./info.sh
stanza: myname.192.168.2.110.to.192.168.2.112
status: ok
cipher: none
db (current)
wal archive min/max (11): 000000010000000000000005/00000001000000000000000E
full backup: 20220302-162118F
timestamp start/stop: 2022-03-02 16:21:18 / 2022-03-02 16:21:28
wal start/stop: 00000001000000000000000C / 00000001000000000000000C
database size: 93.4MB, database backup size: 93.4MB
repo1: backup set size: 7.5MB, backup size: 7.5MB
root@db-192.168.2.112:/home/ubuntu/cold-standby/latest# ./5backup_myname.192.168.2.110.to.192.168.2.112.sh
022-03-02 18:44:56.047 P00 INFO: execute non-exclusive pg_stop_backup() and wait for all WAL segments to archive
2022-03-02 18:44:56.248 P00 INFO: backup stop archive = 00000001000000000000000E, lsn = 0/E0000F8
2022-03-02 18:44:56.251 P00 DETAIL: wrote 'backup_label' file returned from pg_stop_backup()
2022-03-02 18:44:56.252 P00 INFO: check archive for segment(s) 00000001000000000000000E:00000001000000000000000E
2022-03-02 18:44:56.809 P00 INFO: new backup label = 20220302-162118F_20220302-184451I
2022-03-02 18:44:56.912 P00 INFO: incr backup size = 2.1MB, file total = 4802
2022-03-02 18:44:56.913 P00 INFO: backup command end: completed successfully (6055ms)
2022-03-02 18:44:56.913 P00 INFO: expire command begin 2.37: --buffer-size=16MiB --exec-id=2594-8a8aa00b --log-level-console=detail --log-level-file=detail --repo1-path=/backup/pgbackrest --repo1-retention-full=2 --repo1-retention-full-type=count --stanza=myname.192.168.2.110.to.192.168.2.112
2022-03-02 18:44:56.929 P00 INFO: expire command end: completed successfully (16ms)
stanza: myname.192.168.2.110.to.192.168.2.112
status: ok
cipher: none
db (current)
wal archive min/max (11): 000000010000000000000005/00000001000000000000000E
full backup: 20220302-162118F
timestamp start/stop: 2022-03-02 16:21:18 / 2022-03-02 16:21:28
wal start/stop: 00000001000000000000000C / 00000001000000000000000C
database size: 93.4MB, database backup size: 93.4MB
repo1: backup set size: 7.5MB, backup size: 7.5MB
incr backup: 20220302-162118F_20220302-184451I
timestamp start/stop: 2022-03-02 18:44:51 / 2022-03-02 18:44:56
wal start/stop: 00000001000000000000000E / 00000001000000000000000E
database size: 93.4MB, database backup size: 2.1MB
repo1: backup set size: 7.5MB, backup size: 44.5KB
backup reference list: 20220302-162118F
root@db-192.168.2.112:/home/ubuntu/cold-standby/latest# cat info.sh
#!/bin/bash
pgbackrest info --repo-path=/backup/pgbackrest
root@db-192.168.2.112:/home/ubuntu/cold-standby/latest# pgbackrest info --repo-path=/backup/pgbackrest --set=20220302-162118F_20220302-184451I --stanza=myname.192.168.2.110.to.192.168.2.112
stanza: myname.192.168.2.110.to.192.168.2.112
status: ok
cipher: none
db (current)
wal archive min/max (11): 000000010000000000000005/00000001000000000000000E
incr backup: 20220302-162118F_20220302-184451I
timestamp start/stop: 2022-03-02 18:44:51 / 2022-03-02 18:44:56
wal start/stop: 00000001000000000000000E / 00000001000000000000000E
database size: 93.4MB, database backup size: 2.1MB
repo1: backup set size: 7.5MB, backup size: 44.5KB
backup reference list: 20220302-162118F
database list: mydb1 (20999), case_db (23255), mydb (17329), postgres (13053)
info.sh查看备份inforoot@db-192.168.2.113:/home/ubuntu/cold-standby/latest# ./info.sh
stanza: myname.192.168.2.112.to.192.168.2.113
status: ok
cipher: none
db (current)
wal archive min/max (11): 0000000100000001000000F3/0000000100000003000000DA
full backup: 20220302-171627F
timestamp start/stop: 2022-03-02 17:23:18 / 2022-03-02 17:26:28
wal start/stop: 0000000100000003000000DA / 0000000100000003000000DA
database size: 10GB, database backup size: 10GB
repo1: backup set size: 2.4GB, backup size: 2.4GB
root@db-192.168.2.113:/home/ubuntu/cold-standby/latest# ./5backup_myname.192.168.2.112.to.192.168.2.113.sh
2022-03-02 18:54:08.991 P00 INFO: backup command begin 2.37: --buffer-size=16MiB --compress-level=9 --compress-type=bz2 --exec-id=13210-4371e112 --log-level-console=detail --log-level-file=detail --pg1-host=192.168.2.112 --pg1-path=/platformData/postgresql/11/main --process-max=10 --repo1-path=/backup/pgbackrest --repo1-retention-full=2 --repo1-retention-full-type=count --stanza=myname.192.168.2.112.to.192.168.2.113 --start-fast
2022-03-02 18:54:10.177 P00 INFO: last backup label = 20220302-171627F, version = 2.37
2022-03-02 18:54:10.177 P00 INFO: execute non-exclusive pg_start_backup(): backup begins after the requested immediate checkpoint completes
2022-03-02 18:54:10.780 P00 INFO: backup start archive = 0000000100000003000000DC, lsn = 3/DC000028
2022-03-02 18:54:10.781 P00 INFO: check archive for prior segment 0000000100000003000000DB
2022-03-02 18:54:41.357 P00 INFO: execute non-exclusive pg_stop_backup() and wait for all WAL segments to archive
2022-03-02 18:54:41.558 P00 INFO: backup stop archive = 0000000100000003000000DC, lsn = 3/DC000190
2022-03-02 18:54:41.561 P00 DETAIL: wrote 'backup_label' file returned from pg_stop_backup()
2022-03-02 18:54:41.561 P00 INFO: check archive for segment(s) 0000000100000003000000DC:0000000100000003000000DC
2022-03-02 18:54:42.052 P00 INFO: new backup label = 20220302-171627F_20220302-185409I
2022-03-02 18:54:42.561 P00 INFO: incr backup size = 182.8MB, file total = 22919
2022-03-02 18:54:42.563 P00 INFO: backup command end: completed successfully (33574ms)
2022-03-02 18:54:42.563 P00 INFO: expire command begin 2.37: --buffer-size=16MiB --exec-id=13210-4371e112 --log-level-console=detail --log-level-file=detail --repo1-path=/backup/pgbackrest --repo1-retention-full=2 --repo1-retention-full-type=count --stanza=myname.192.168.2.112.to.192.168.2.113
2022-03-02 18:54:42.569 P00 INFO: expire command end: completed successfully (6ms)
root@db-192.168.2.113:/home/ubuntu/cold-standby/latest# ./info.sh
stanza: myname.192.168.2.112.to.192.168.2.113
status: ok
cipher: none
db (current)
wal archive min/max (11): 0000000100000001000000F3/0000000100000003000000DC
full backup: 20220302-171627F
timestamp start/stop: 2022-03-02 17:23:18 / 2022-03-02 17:26:28
wal start/stop: 0000000100000003000000DA / 0000000100000003000000DA
database size: 10GB, database backup size: 10GB
repo1: backup set size: 2.4GB, backup size: 2.4GB
incr backup: 20220302-171627F_20220302-185409I
timestamp start/stop: 2022-03-02 18:54:09 / 2022-03-02 18:54:41
wal start/stop: 0000000100000003000000DC / 0000000100000003000000DC
database size: 10GB, database backup size: 182.8MB
repo1: backup set size: 2.4GB, backup size: 20.8MB
backup reference list: 20220302-171627F
root@db-192.168.2.113:/home/ubuntu/cold-standby/latest# cat info.sh
#!/bin/bash
pgbackrest info --repo-path=/backup/pgbackrest
root@db-192.168.2.113:/home/ubuntu/cold-standby/latest# pgbackrest info --repo-path=/backup/pgbackrest --set=20220302-171627F_20220302-185409I --stanza=myname.192.168.2.112.to.192.168.2.113
stanza: myname.192.168.2.112.to.192.168.2.113
status: ok
cipher: none
db (current)
wal archive min/max (11): 0000000100000001000000F3/0000000100000003000000DC
incr backup: 20220302-171627F_20220302-185409I
timestamp start/stop: 2022-03-02 18:54:09 / 2022-03-02 18:54:41
wal start/stop: 0000000100000003000000DC / 0000000100000003000000DC
database size: 10GB, database backup size: 182.8MB
repo1: backup set size: 2.4GB, backup size: 20.8MB
backup reference list: 20220302-171627F
database list: mydb (16384), postgres (13053)
mydb=# select * from pg_dist_node;
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
--------+---------+---------------+----------+----------+-------------+----------+----------+-------------+----------------+------------------
1 | 1 | 192.168.2.113 | 5432 | default | f | t | primary | default | f | t
2 | 2 | 192.168.2.112 | 5432 | default | f | t | primary | default | f | t
(2 rows)
global/pg_control文件使pg起不来冷备主机, 停pgserver并删除global/pg_controlroot@db-192.168.2.110:/home/ubuntu/cold-standby/cold-standby-1.0.0# /etc/init.d/postgresql stop
[ ok ] Stopping postgresql (via systemctl): postgresql.service.
root@db-192.168.2.110:/home/ubuntu/cold-standby/cold-standby-1.0.0# psql
psql: could not connect to server: No such file or directory
Is the server running locally and accepting
connections on Unix domain socket "/var/run/postgresql/.s.PGSQL.5432"?
postgres@db-192:/platformData/postgresql/11/main$ rm global/pg_control
# 此时pg起不来
root@db-192.168.2.110:/home/ubuntu/cold-standby/cold-standby-1.0.0# /etc/init.d/postgresql start
[ ok ] Starting postgresql (via systemctl): postgresql.service.
# psql连不上
root@db-192.168.2.110:/home/ubuntu/cold-standby/cold-standby-1.0.0# psql
psql: could not connect to server: No such file or directory
Is the server running locally and accepting
connections on Unix domain socket "/var/run/postgresql/.s.PGSQL.5432"?
## 查看日志发现没有启动情况
postgres@db-192:/platformData/postgresql/11/main$ tailf pg_log/postgresql-00.log
2022-03-02 19:29:47 CST [11177-15] LOG: database system is shut down
# 查看ps确实没有postgres进程
postgres@db-192:/platformData/postgresql/11/main$ ps -ef | grep postgres
冷备主机, 恢复冷备pgbackrest --stanza=myname.192.168.2.110.to.192.168.2.112 --delta --log-level-console=debug restore
2022-03-02 19:39:39.921 P00 DETAIL: sync path '/platformData/postgresql/11/main/pg_wal/archive_status'
2022-03-02 19:39:39.921 P00 DETAIL: sync path '/platformData/postgresql/11/main/pg_xact'
2022-03-02 19:39:39.922 P00 DETAIL: sync path '/platformData/postgresql/11/main/global'
2022-03-02 19:39:39.925 P00 INFO: restore command end: completed successfully (3793ms)
# 看到被删除的pg_control已经恢复了
postgres@db-192:/platformData/postgresql/11/main$ ll global/pg_control
-rw------- 1 postgres postgres 8.0K Mar 2 18:52 global/pg_control
postgres@db-192:/platformData/postgresql/11/main$ cat global/pg_control
|”Ÿí¡KbL›\
VLb`(*?±Ã1ý2±CVLb*?ˆÐ‡Ö2A @ ǫò¶OíBÈU„Iï‚wø‰™ÁÖèؤÄö¹X ¥ŒƒVПñ
冷备主机, 启动pg, 已恢复正常root@db-192.168.2.110:/home/ubuntu/cold-standby/cold-standby-1.0.0# /etc/init.d/postgresql start
[ ok ] Starting postgresql (via systemctl): postgresql.service.
root@db-192.168.2.110:/home/ubuntu/cold-standby/cold-standby-1.0.0# psql
psql (11.4 (Ubuntu 11.4-1.pgdg16.04+1))
Type "help" for help.
postgres=# \c mydb
You are now connected to database "mydb" as user "postgres".
mydb=# \d
mydb=# select ts from faces order by ts desc limit 3;
ts
---------------
1656863999095
1656863998274
1656863998274
(3 rows)
PGDATA目录文件使pg起不来PGDATA目录postgres@db-192:/platformData/postgresql/11$ rm -rf main
# 此时连不上pg
root@db-192.168.2.110:/home/ubuntu/cold-standby/cold-standby-1.0.0# psql
psql: could not connect to server: No such file or directory
Is the server running locally and accepting
connections on Unix domain socket "/var/run/postgresql/.s.PGSQL.5432"?
pgbackrest --stanza=myname.192.168.2.110.to.192.168.2.112 --delta --log-level-console=detail restore
2022-03-02 19:48:33.287 P06 DETAIL: restore file /platformData/postgresql/11/main/base/1/12895 (0B, 100%)
2022-03-02 19:48:33.287 P07 DETAIL: restore file /platformData/postgresql/11/main/base/1/12890 (0B, 100%)
2022-03-02 19:48:33.287 P00 INFO: write /platformData/postgresql/11/main/recovery.conf
2022-03-02 19:48:33.335 P00 DETAIL: sync path '/platformData/postgresql/11/main/pg_wal/archive_status'
2022-03-02 19:48:33.336 P00 DETAIL: sync path '/platformData/postgresql/11/main/pg_xact'
2022-03-02 19:48:33.337 P00 INFO: restore global/pg_control (performed last to ensure aborted restores cannot be started)
2022-03-02 19:48:33.337 P00 DETAIL: sync path '/platformData/postgresql/11/main/global'
2022-03-02 19:48:33.337 P00 INFO: restore size = 93.4MB, file total = 4802
2022-03-02 19:48:33.339 P00 INFO: restore command end: completed successfully (6271ms)
# 此时可以发现PGDATA已经恢复成功, pgserver还未启动
root@db-192.168.2.110:/home/ubuntu/cold-standby/cold-standby-1.0.0# psql
psql: could not connect to server: No such file or directory
Is the server running locally and accepting
connections on Unix domain socket "/var/run/postgresql/.s.PGSQL.5432"?
# 我们启动pgserver即可
root@db-192.168.2.110:/home/ubuntu/cold-standby/cold-standby-1.0.0# /etc/init.d/postgresql start
[ ok ] Starting postgresql (via systemctl): postgresql.service.
# 启动后即可正常连接
root@db-192.168.2.110:/home/ubuntu/cold-standby/cold-standby-1.0.0# psql
psql (11.4 (Ubuntu 11.4-1.pgdg16.04+1))
Type "help" for help.
postgres=# \c mydb
You are now connected to database "mydb" as user "postgres".
mydb=# \d
mydb=# select ts from faces order by ts desc limit 3;
ts
---------------
1656863999095
1656863998274
1656863998274
(3 rows)
postgres@db-192:/platformData/postgresql/11$ rm -rf main/
root@db-192.168.2.112:/home/ubuntu/cold-standby/latest# psql
psql: could not connect to server: No such file or directory
Is the server running locally and accepting
connections on Unix domain socket "/var/run/postgresql/.s.PGSQL.5432"?
mydb=# select ts from faces order by ts desc limit 3;
ERROR: could not open relation mapping file "global/pg_filenode.map": No such file or directory
CONTEXT: while executing command on 192.168.2.112:5432
parallel worker
mydb=# select ts from faces order by ts desc limit 3;
ERROR: connection to the remote node 192.168.2.112:5432 failed with the following error: FATAL: could not open relation mapping file "global/pg_filenode.map": No such file or directory
/var/log/pgbackrest能查看detail级别详细日志)(以下在slaver1即2.112执行), 恢复耗时10GB耗时3minpostgres@db-192:/platformData/postgresql/11$ pgbackrest --stanza=myname.192.168.2.112.to.192.168.2.113 --delta --log-level-console=info restore
WARN: --delta or --force specified but unable to find 'PG_VERSION' or 'backup.manifest' in '/platformData/postgresql/11/main' to confirm that this is a valid $PGDATA directory. --delta and --force have been disabled and if any files exist in the destination directories the restore will be aborted.
2022-03-02 20:00:45.566 P00 INFO: repo1: restore backup set 20220302-171627F_20220302-185409I, recovery will start at 2022-03-02 18:54:09
2022-03-02 20:03:39.543 P00 INFO: write /platformData/postgresql/11/main/recovery.conf
2022-03-02 20:03:39.580 P00 INFO: restore global/pg_control (performed last to ensure aborted restores cannot be started)
2022-03-02 20:03:39.581 P00 INFO: restore size = 10GB, file total = 22919
2022-03-02 20:03:39.585 P00 INFO: restore command end: completed successfully (174715ms)
postgres@db-192:/platformData/postgresql/11$ ll
total 4.0K
drwx------ 21 postgres postgres 4.0K Mar 2 20:03 main
postgres@db-192:/platformData/postgresql/11$ du -sh ./*
11G ./main
# 启动slaver1的pg
root@db-192.168.2.112:/var/log/pgbackrest# psql
psql: could not connect to server: No such file or directory
Is the server running locally and accepting
connections on Unix domain socket "/var/run/postgresql/.s.PGSQL.5432"?
root@db-192.168.2.112:/var/log/pgbackrest# /etc/init.d/postgresql start
[ ok ] Starting postgresql (via systemctl): postgresql.service.
# slaver1已正常启动
root@db-192.168.2.112:/var/log/pgbackrest# psql
psql (11.4 (Ubuntu 11.4-1.pgdg16.04+1))
Type "help" for help.
postgres=# \c mydb
You are now connected to database "mydb" as user "postgres".
mydb=# \d
mydb=# select ts from faces_117592 order by ts desc limit 3;
ts
---------------
1656863944718
1656863944004
1656863911192
(3 rows)
mydb=# select ts from faces order by ts desc limit 3;
ts
---------------
1656863999095
1656863998274
1656863998274
(3 rows)
差备和每周全备, expire过期策略我们配置留2个全备, 当某全备过期时, 依赖于其的差备会自动过期
root@db-192.168.2.112:/home/ubuntu/cold-standby/latest# ./info.sh
stanza: myname.192.168.2.110.to.192.168.2.112
status: ok
cipher: none
db (current)
wal archive min/max (11): 000000010000000000000005/000000010000000000000011
full backup: 20220302-162118F
timestamp start/stop: 2022-03-02 16:21:18 / 2022-03-02 16:21:28
wal start/stop: 00000001000000000000000C / 00000001000000000000000C
database size: 93.4MB, database backup size: 93.4MB
repo1: backup set size: 7.5MB, backup size: 7.5MB
incr backup: 20220302-162118F_20220302-184451I
timestamp start/stop: 2022-03-02 18:44:51 / 2022-03-02 18:44:56
wal start/stop: 00000001000000000000000E / 00000001000000000000000E
database size: 93.4MB, database backup size: 2.1MB
repo1: backup set size: 7.5MB, backup size: 44.5KB
backup reference list: 20220302-162118F
incr backup: 20220302-162118F_20220302-185206I
timestamp start/stop: 2022-03-02 18:52:06 / 2022-03-02 18:52:10
wal start/stop: 000000010000000000000010 / 000000010000000000000010
database size: 93.4MB, database backup size: 2.1MB
repo1: backup set size: 7.5MB, backup size: 44.7KB
backup reference list: 20220302-162118F
全备, 如下看到2个全备+2个差备# 执行全备
root@db-192.168.2.112:/home/ubuntu/cold-standby/latest# su - postgres -c "pgbackrest --stanza=myname.192.168.2.110.to.192.168.2.112 --log-level-console=info --type=full backup"
2022-03-02 20:16:48.277 P00 INFO: backup command begin 2.37: --buffer-size=16MiB --compress-level=9 --compress-type=bz2 --exec-id=5324-bfeace3a --log-level-console=info --log-level-file=detail --pg1-host=192.168.2.110 --pg1-path=/platformData/postgresql/11/main --process-max=10 --repo1-path=/backup/pgbackrest --repo1-retention-full=2 --repo1-retention-full-type=count --stanza=myname.192.168.2.110.to.192.168.2.112 --start-fast --type=full
2022-03-02 20:16:49.227 P00 INFO: execute non-exclusive pg_start_backup(): backup begins after the requested immediate checkpoint completes
2022-03-02 20:16:49.831 P00 INFO: backup start archive = 000000030000000000000014, lsn = 0/14000028
2022-03-02 20:16:49.831 P00 INFO: check archive for prior segment 000000030000000000000013
2022-03-02 20:16:56.241 P00 INFO: execute non-exclusive pg_stop_backup() and wait for all WAL segments to archive
2022-03-02 20:16:56.443 P00 INFO: backup stop archive = 000000030000000000000014, lsn = 0/14000158
2022-03-02 20:16:56.446 P00 INFO: check archive for segment(s) 000000030000000000000014:000000030000000000000014
2022-03-02 20:16:56.996 P00 INFO: new backup label = 20220302-201649F
2022-03-02 20:16:57.088 P00 INFO: full backup size = 93.6MB, file total = 4803
2022-03-02 20:16:57.089 P00 INFO: backup command end: completed successfully (8813ms)
2022-03-02 20:16:57.089 P00 INFO: expire command begin 2.37: --buffer-size=16MiB --exec-id=5324-bfeace3a --log-level-console=info --log-level-file=detail --repo1-path=/backup/pgbackrest --repo1-retention-full=2 --repo1-retention-full-type=count --stanza=myname.192.168.2.110.to.192.168.2.112
2022-03-02 20:16:57.111 P00 INFO: repo1: 11-1 remove archive, start = 000000010000000000000005, stop = 00000001000000000000000B
2022-03-02 20:16:57.111 P00 INFO: expire command end: completed successfully (22ms)
# 查看备份info
root@db-192.168.2.112:/home/ubuntu/cold-standby/latest# ./info.sh
stanza: myname.192.168.2.110.to.192.168.2.112
status: ok
cipher: none
db (current)
wal archive min/max (11): 00000001000000000000000C/000000030000000000000014
full backup: 20220302-162118F
timestamp start/stop: 2022-03-02 16:21:18 / 2022-03-02 16:21:28
wal start/stop: 00000001000000000000000C / 00000001000000000000000C
database size: 93.4MB, database backup size: 93.4MB
repo1: backup set size: 7.5MB, backup size: 7.5MB
incr backup: 20220302-162118F_20220302-184451I
timestamp start/stop: 2022-03-02 18:44:51 / 2022-03-02 18:44:56
wal start/stop: 00000001000000000000000E / 00000001000000000000000E
database size: 93.4MB, database backup size: 2.1MB
repo1: backup set size: 7.5MB, backup size: 44.5KB
backup reference list: 20220302-162118F
incr backup: 20220302-162118F_20220302-185206I
timestamp start/stop: 2022-03-02 18:52:06 / 2022-03-02 18:52:10
wal start/stop: 000000010000000000000010 / 000000010000000000000010
database size: 93.4MB, database backup size: 2.1MB
repo1: backup set size: 7.5MB, backup size: 44.7KB
backup reference list: 20220302-162118F
full backup: 20220302-201649F
timestamp start/stop: 2022-03-02 20:16:49 / 2022-03-02 20:16:56
wal start/stop: 000000030000000000000014 / 000000030000000000000014
database size: 93.6MB, database backup size: 93.6MB
repo1: backup set size: 7.5MB, backup size: 7.5MB
全备的基础上, 再执行1次全备, 因为我们配置了只保留2次全备, 则最早的1次全备会过期, 依赖于最早那次全备的差备同样也会过期# 执行全备, 会触发删掉最早的1次全备和最早的2次差备
root@db-192.168.2.112:/home/ubuntu/cold-standby/latest# su - postgres -c "pgbackrest --stanza=myname.192.168.2.110.to.192.168.2.112 --log-level-console=info --type=full backup"
2022-03-02 20:19:29.219 P00 INFO: backup command begin 2.37: --buffer-size=16MiB --compress-level=9 --compress-type=bz2 --exec-id=5500-7a10af39 --log-level-console=info --log-level-file=detail --pg1-host=192.168.2.110 --pg1-path=/platformData/postgresql/11/main --process-max=10 --repo1-path=/backup/pgbackrest --repo1-retention-full=2 --repo1-retention-full-type=count --stanza=myname.192.168.2.110.to.192.168.2.112 --start-fast --type=full
2022-03-02 20:19:30.181 P00 INFO: execute non-exclusive pg_start_backup(): backup begins after the requested immediate checkpoint completes
2022-03-02 20:19:30.784 P00 INFO: backup start archive = 000000030000000000000016, lsn = 0/16000028
2022-03-02 20:19:30.784 P00 INFO: check archive for prior segment 000000030000000000000015
2022-03-02 20:19:37.387 P00 INFO: execute non-exclusive pg_stop_backup() and wait for all WAL segments to archive
2022-03-02 20:19:37.588 P00 INFO: backup stop archive = 000000030000000000000016, lsn = 0/16000130
2022-03-02 20:19:37.591 P00 INFO: check archive for segment(s) 000000030000000000000016:000000030000000000000016
2022-03-02 20:19:38.139 P00 INFO: new backup label = 20220302-201930F
2022-03-02 20:19:38.232 P00 INFO: full backup size = 93.6MB, file total = 4803
2022-03-02 20:19:38.233 P00 INFO: backup command end: completed successfully (9016ms)
2022-03-02 20:19:38.233 P00 INFO: expire command begin 2.37: --buffer-size=16MiB --exec-id=5500-7a10af39 --log-level-console=info --log-level-file=detail --repo1-path=/backup/pgbackrest --repo1-retention-full=2 --repo1-retention-full-type=count --stanza=myname.192.168.2.110.to.192.168.2.112
2022-03-02 20:19:38.242 P00 INFO: repo1: expire full backup set 20220302-162118F, 20220302-162118F_20220302-184451I, 20220302-162118F_20220302-185206I
2022-03-02 20:19:38.247 P00 INFO: repo1: remove expired backup 20220302-162118F_20220302-185206I
2022-03-02 20:19:38.247 P00 INFO: repo1: remove expired backup 20220302-162118F_20220302-184451I
2022-03-02 20:19:38.247 P00 INFO: repo1: remove expired backup 20220302-162118F
2022-03-02 20:19:38.394 P00 INFO: repo1: 11-1 remove archive, start = 0000000100000000, stop = 000000030000000000000013
2022-03-02 20:19:38.395 P00 INFO: repo1: 11-1 remove history file 00000002.history
2022-03-02 20:19:38.395 P00 INFO: expire command end: completed successfully (162ms)
# 查看备份信息info, 可以看到, 只剩下最近的2次全备了
root@db-192.168.2.112:/home/ubuntu/cold-standby/latest# ./info.sh
stanza: myname.192.168.2.110.to.192.168.2.112
status: ok
cipher: none
db (current)
wal archive min/max (11): 000000030000000000000014/000000030000000000000016
full backup: 20220302-201649F
timestamp start/stop: 2022-03-02 20:16:49 / 2022-03-02 20:16:56
wal start/stop: 000000030000000000000014 / 000000030000000000000014
database size: 93.6MB, database backup size: 93.6MB
repo1: backup set size: 7.5MB, backup size: 7.5MB
full backup: 20220302-201930F
timestamp start/stop: 2022-03-02 20:19:30 / 2022-03-02 20:19:37
wal start/stop: 000000030000000000000016 / 000000030000000000000016
database size: 93.6MB, database backup size: 93.6MB
repo1: backup set size: 7.5MB, backup size: 7.5MB
全备, 每天执行差备, 如下root@db-192.168.2.112:/home/ubuntu/cold-standby/latest# ll /var/spool/cron/crontabs/postgres
-rw-r--r-- 1 postgres postgres 1344 Mar 2 16:18 /var/spool/cron/crontabs/postgres
root@db-192.168.2.112:/home/ubuntu/cold-standby/latest# cat /var/spool/cron/crontabs/postgres
30 02 * * 0 /usr/local/bin/pgbackrest --log-level-console=detail --type=full --stanza=myname.192.168.2.110.to.192.168.2.112 2>&1
30 02 * * 1-6 /usr/local/bin/pgbackrest --log-level-console=detail --type=diff --stanza=myname.192.168.2.110.to.192.168.2.112 2>&1
备份阶段10GB耗时3min, 全备的恢复阶段10GB耗时3min.#!/bin/bash
RED="\033[31m"
YELLOW="\033[33m"
COLOR_END="\033[0m"
echo -e "${YELLOW}[开始] ${COLOR_END}"
if [ $UID != 0 ]; then
echo -e "${RED}请使用root安装!${COLOR_END}" && exit 500
fi
# postgres
echo -e "${YELLOW}开始创建postgres用户 ${COLOR_END}"
POSTGRES_HOME_PATH="/var/lib/postgresql"
mkdir -p -m 750 ${POSTGRES_HOME_PATH}/.ssh
chown -R postgres:postgres ${POSTGRES_HOME_PATH}/.ssh
if [ ! -f "${POSTGRES_HOME_PATH}/.ssh/id_rsa" ] || [ ! -f "${POSTGRES_HOME_PATH}/.ssh/id_rsa.pub" ]; then
echo -e "${YELLOW}开始创建新秘钥${COLOR_END}" && su - postgres -c "ssh-keygen -f ${POSTGRES_HOME_PATH}/.ssh/id_rsa -t rsa -b 4096 -N ''"
else
echo -e "${YELLOW}秘钥已存在, 一切正常${COLOR_END}"
fi
(echo "postgres" && echo "postgres") | sudo passwd postgres
echo -e "${YELLOW}[结束] ${COLOR_END}"
#!/bin/bash
# [主机打包] tar -Pzcvf bin/primary.tgz /etc/pgbackrest/ /tmp/pgbackrest/ /usr/bin/pgbackrest /usr/bin/sshpass /var/log/pgbackrest/
# [备机打包] tar -Pzcvf bin/standby.tgz /etc/pgbackrest/ /tmp/pgbackrest/ /usr/bin/pgbackrest /usr/bin/sshpass /var/spool/cron/crontabs/postgres /var/log/pgbackrest/
check_dependency_dir() {
FUNC="检查依赖目录"
echo -e "${YELLOW}[开始] ${FUNC} ${COLOR_END}"
tar zxvfP bin/"${NODE_TYPE}.tgz"
case ${NODE_TYPE} in
${NODE_TYPE_STANDBY})
[[ ! -d /backup ]] && echo -e "${RED}未挂载/backup目录, 请挂载或软链接到大容量磁盘(>=2TB), 防止磁盘被冷备写满!${COLOR_END}" && exit 500
mkdir -p ${STANDY_BACKUP_PATH}
chown -R postgres:postgres /backup
chmod 77
;;
esac
echo -e "${YELLOW}[结束] ${FUNC} ${COLOR_END}"
}
create_ssh_passwordless() {
FUNC="建立ssh免密登录"
echo -e "${YELLOW}[开始] ${FUNC} ${COLOR_END}"
PRIMARY_POSTGRES_HOME_PATH="/var/lib/postgresql"
case ${NODE_TYPE} in
${NODE_TYPE_PRIMARY})
echo -e "${YELLOW}开始拷贝公钥${COLOR_END}" && sshpass -p postgres ssh-copy-id -o StrictHostKeyChecking=no -f -i ${PRIMARY_POSTGRES_HOME_PATH}/.ssh/id_rsa.pub postgres@${STANDBY_IP}
echo -e "${YELLOW}开始建立第一次ssh连接${COLOR_END}" && su - postgres -c "ssh -o StrictHostKeyChecking=no postgres@${STANDBY_IP} hostname"
;;
${NODE_TYPE_STANDBY})
sshpass -p postgres ssh-copy-id -o StrictHostKeyChecking=no -f -i ${PRIMARY_POSTGRES_HOME_PATH}/.ssh/id_rsa.pub postgres@${PRIMARY_IP}
su - postgres -c "ssh -o StrictHostKeyChecking=no postgres@${PRIMARY_IP} hostname"
;;
esac
echo -e "${YELLOW}[结束] ${FUNC} ${COLOR_END}"
}
# 指定[主机][备机]pgbackrest的STANZA_NAME
change_pgbackrest_conf() {
FUNC="更改pgbackrest.conf配置并生效"
echo -e "${YELLOW}[开始] ${FUNC} ${COLOR_END}"
PGBACKREST_TEMPLATE_CONF_PATH="/etc/pgbackrest/${NODE_TYPE}.myname.template"
PGBACKREST_CONF_PATH="/etc/pgbackrest/conf.d/${STANZA_NAME}.conf"
cp -p ${PGBACKREST_TEMPLATE_CONF_PATH} ${PGBACKREST_CONF_PATH}
sed -i -r "s#\[myname(.*)\]#\[${STANZA_NAME}\]#g" ${PGBACKREST_CONF_PATH}
case ${NODE_TYPE} in
${NODE_TYPE_PRIMARY})
sed -i -r "s#pg1-path=(.*)#pg1-path=${PGDATA}#g" ${PGBACKREST_CONF_PATH}
sed -i -r "s#repo1-host=(.*)#repo1-host=${STANDBY_IP}#g" ${PGBACKREST_CONF_PATH}
;;
${NODE_TYPE_STANDBY})
sed -i -r "s#pg1-host=(.*)#pg1-host=${PRIMARY_IP}#g" ${PGBACKREST_CONF_PATH}
sed -i -r "s#pg1-path=(.*)#pg1-path=${PGDATA}#g" ${PGBACKREST_CONF_PATH}
;;
esac
echo -e "${YELLOW}[结束] ${FUNC} ${COLOR_END}"
}
change_cron_conf() {
FUNC="更改cron配置"
echo -e "${YELLOW}[开始] ${FUNC} ${COLOR_END}"
CRON_CONF_PATH="/var/spool/cron/crontabs/postgres"
case ${NODE_TYPE} in
${NODE_TYPE_STANDBY})
sed -i -r "s#stanza=(.*) #stanza=${STANZA_NAME} #g" ${CRON_CONF_PATH}
echo -e "${YELLOW}已配置cron任务如下: ${COLOR_END}" && su - postgres -c "crontab -l"
;;
esac
echo -e "${YELLOW}[结束] ${FUNC} ${COLOR_END}"
}
restart_primary_postgres_to_reload_conf() {
FUNC="更改postgresql.conf配置并生效"
echo -e "${YELLOW}[开始] ${FUNC} ${COLOR_END}"
case ${NODE_TYPE} in
${NODE_TYPE_PRIMARY})
sed -i -r "s#(.*)archive_command(.*)=(.*)#archive_command='pgbackrest --stanza=${STANZA_NAME} archive-push %p'#g" $PG_CONF_PATH
su - postgres -c "psql -c 'select pg_reload_conf()'"
;;
esac
echo -e "${YELLOW}[结束] ${FUNC} ${COLOR_END}"
}
create_pgbackrest_cmd_shell() {
FUNC="创建pgbackrest命令脚本"
echo -e "${YELLOW}[开始] ${FUNC} ${COLOR_END}"
case ${NODE_TYPE} in
${NODE_TYPE_STANDBY})
CMD_FILE="3create_stanza_${STANZA_NAME}.sh" && echo "su - postgres -c \"pgbackrest --stanza=${STANZA_NAME} --log-level-console=detail stanza-create\"" >${CMD_FILE} && chown postgres.postgres ${CMD_FILE} && chmod 777 ${CMD_FILE}
CMD_FILE="4check_${STANZA_NAME}.sh" && echo "su - postgres -c \"pgbackrest --stanza=${STANZA_NAME} --log-level-console=detail check\"" >${CMD_FILE} && chown postgres.postgres ${CMD_FILE} && chmod 777 ${CMD_FILE}
CMD_FILE="5backup_${STANZA_NAME}.sh" && echo "su - postgres -c \"pgbackrest --stanza=${STANZA_NAME} --log-level-console=detail backup\"" >${CMD_FILE} && chown postgres.postgres ${CMD_FILE} && chmod 777 ${CMD_FILE}
# 在primary或standby测试创建stanza: pgbackrest --stanza=myname.192.168.2.163.to.192.168.2.133 --log-level-console=info stanza-create.
# 在primary测试某wal的push: pgbackrest --stanza=myname.192.168.2.163.to.192.168.2.133 archive-push pg_wal/00000006000001B8000000D1.
# 在primary或standby测试check: pgbackrest --stanza=myname.192.168.2.163.to.192.168.2.133 check.
# 在standby测试backup: pgbackrest --stanza=myname.192.168.2.163.to.192.168.2.133 --log-level-console=info --type=full --start-fast=y backup
;;
esac
echo -e "${YELLOW}[结束] ${FUNC} ${COLOR_END}"
}
RED="\033[31m"
GREEN="\033[32m"
YELLOW="\033[33m"
COLOR_END="\033[0m"
# 主函数: 输入配置
echo -e "${YELLOW}冷备安装开始${COLOR_END}"
if [ ${UID} != 0 ]; then
echo -e "${RED}请使用root安装!${COLOR_END}" && exit 500
fi
echo -en "${YELLOW}请填写[主机]IP: ${COLOR_END}" && read PRIMARY_IP
echo -en "${YELLOW}请填写[备机]IP: ${COLOR_END}" && read STANDBY_IP
if [ ${PRIMARY_IP} == ${STANDBY_IP} ]; then
echo -e "${RED}集群安装, 主备节点IP不能相同!${COLOR_END}" && exit 500
fi
echo -en "${YELLOW}当前在主机还是备机操作, 主机请填primary, 备机请填standby: ${COLOR_END}" && read NODE_TYPE
NODE_TYPE_PRIMARY='primary'
NODE_TYPE_STANDBY='standby'
echo -en "${YELLOW}请填写[主机]PGDATA路径${COLOR_END}, 如/home/ubuntu/data_gas/postgresql/11/main, 请在主机通过ps -ef | grep postgres查询: " && read PGDATA
case ${NODE_TYPE} in
${NODE_TYPE_PRIMARY})
echo -en "${YELLOW}请填写[主机]PG配置文件路径${COLOR_END}, 如/home/ubuntu/data_gas/postgresql/11/main/postgresql.conf, 请在主机通过ps -ef | grep postgres查询: " && read PG_CONF_PATH
echo -en "${GREEN}请确认: PRIMARY_IP: ${PRIMARY_IP}, STANDBY_IP: ${STANDBY_IP}, PGDATA: ${PGDATA}, PG_CONF_PATH: ${PG_CONF_PATH}, 当前正在操作的机器: ${NODE_TYPE}, 确认配置? [Y/n]${COLOR_END}" && read IS_CONF_OK
;;
${NODE_TYPE_STANDBY})
echo -en "${GREEN}请确认: PRIMARY_IP: ${PRIMARY_IP}, STANDBY_IP: ${STANDBY_IP}, PGDATA: ${PGDATA}, 当前正在操作的机器: ${NODE_TYPE}, 确认配置? [Y/n]${COLOR_END}" && read IS_CONF_OK
;;
*)
echo -e "${RED}无效的主备类型, 请填写primary或standby!${COLOR_END}" && exit 500
;;
esac
case $IS_CONF_OK in
[nN][oO] | [nN])
exit 0
;;
esac
STANDY_BACKUP_PATH=/backup/pgbackrest
STANZA_NAME="myname.${PRIMARY_IP}.to.${STANDBY_IP}"
echo $STANZA_NAME
check_dependency_dir
create_ssh_passwordless
change_pgbackrest_conf
change_cron_conf
restart_primary_postgres_to_reload_conf
create_pgbackrest_cmd_shell