测试发现,pgsentinel插件在pg_active_session_history视图记录条数较多时,存在严重的内存占用问题,群里的其他朋友反馈还可能存在严重的内存泄漏问题。本文仅用于学习和测试,未用于生产环境。
设置 pgsentinel_ash.max_entries=10000000,启动DB报错需请求28G内存。对于负载稍高的数据库,例如每秒40个活跃会话,按照每秒收集一次的频率,1000万行也仅够保存不到3天的数据,该插件就需要占掉28G内存,实用性太低。
-bash-4.2$ pg_ctl start -D $PGDATA
waiting for server to start....2023-10-07 19:43:09.865 CST [2210] FATAL: could not map anonymous shared memory: Cannot allocate memory
2023-10-07 19:43:09.865 CST [2210] HINT: This error usually means that PostgreSQL's request for a shared memory segment exceeded available memory, swap space, or huge pages. To reduce the request size (currently 29601538048 bytes), reduce PostgreSQL's shared memory usage, perhaps by reducing shared_buffers or max_connections.
2023-10-07 19:43:09.865 CST [2210] LOG: database system is shut down
stopped waiting
pg_ctl: could not start server
Examine the log output
众所周知,pg是没有像oracle那样的ash视图的,因此要回溯历史问题不太方便。pgsentinel插件会将pg_stat_activity与pg_stat_statements视图内容定期快照,并存入pg_active_session_history和pg_stat_statements_history视图中(重启数据库其中数据会被清空)。
重启数据库其中数据会被清空,大部分字段与对应版本的pg_stat_activity视图字段含义相同
Column | Type | 备注 |
---|---|---|
ash_time | timestamp with time zone | 采样时间 |
datid | oid | |
datname | text | |
pid | integer | |
leader_pid | integer | 若有并行,其leader进程的pid |
usesysid | oid | user id |
usename | text | |
application_name | text | |
client_addr | text | |
client_hostname | text | |
client_port | integer | |
backend_start | timestamp with time zone | |
xact_start | timestamp with time zone | |
query_start | timestamp with time zone | |
state_change | timestamp with time zone | |
wait_event_type | text | |
wait_event | text | |
state | text | |
backend_xid | xid | |
backend_xmin | xid | |
top_level_query | text | 执行函数、存储过程时的外层SQL(开pg_stat_statements.track = all才会有区别) |
query | text | |
cmdtype | text | |
queryid | bigint | |
backend_type | text | |
blockers | integer | blockers数量 |
blockerpid | integer | |
blocker_state | text |
重启数据库其中数据会被清空,与对应版本的pg_stat_statements视图字段含义相同
Column | Type | 备注 |
---|---|---|
ash_time | timestamp with time zone | |
userid | oid | |
dbid | oid | |
queryid | bigint | |
calls | bigint | |
total_exec_time | double precision | |
rows | bigint | |
shared_blks_hit | bigint | |
shared_blks_read | bigint | |
shared_blks_dirtied | bigint | |
shared_blks_written | bigint | |
local_blks_hit | bigint | |
local_blks_read | bigint | |
local_blks_dirtied | bigint | |
local_blks_written | bigint | |
temp_blks_read | bigint | |
temp_blks_written | bigint | |
blk_read_time | double precision | |
blk_write_time | double precision | |
plans | bigint | |
total_plan_time | double precision | |
wal_records | bigint | |
wal_fpi | bigint | |
wal_bytes | numeric |
GitHub - pgsentinel/pgsentinel: postgresql extension providing Active session history
- # poatgres用户执行
- unzip pgsentinel-master.zip
- cd pgsentinel-master/src
- make
-
- # root用户执行(要配环境变量,参考下面)
- make install
具体安装过程
-bash-4.2$ unzip pgsentinel-master.zip
-bash-4.2$ cd pgsentinel-master/src
-bash-4.2$ make
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -g -O2 -fPIC -I. -I./ -I/data/postgres/base/14.0/include/server -I/data/postgres/base/14.0/include/internal -D_GNU_SOURCE -c -o pgsentinel.o pgsentinel.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -g -O2 -fPIC -I. -I./ -I/data/postgres/base/14.0/include/server -I/data/postgres/base/14.0/include/internal -D_GNU_SOURCE -c -o get_parsedinfo.o get_parsedinfo.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -g -O2 -fPIC -shared -o pgsentinel.so pgsentinel.o get_parsedinfo.o -L/data/postgres/base/14.0/lib -Wl,--as-needed -Wl,-rpath,'/data/postgres/base/14.0/lib',--enable-new-dtags -lm[root@linux01 ~]# vi .bash_profile
# .bash_profile# Get the aliases and functions
if [ -f ~/.bashrc ]; then
. ~/.bashrc
fi# User specific environment and startup programs
PATH=$PATH:$HOME/bin
export PATH
export PGHOME=/data/postgres/base/14.0
export PGDATA=/data/postgres/pg5432/data
export PATH=$PGHOME/bin:$PATH:$HOME/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PGHOME/lib
export LANG=en_US.UTF-8
~
[root@linux01 ~]# source .bash_profile
[root@linux01 ~]#
[root@linux01 ~]# cd .../pgsentinel-master/src
[root@linux01 src]# make install
/usr/bin/mkdir -p '/data/postgres/base/14.0/lib'
/usr/bin/mkdir -p '/data/postgres/base/14.0/share/extension'
/usr/bin/mkdir -p '/data/postgres/base/14.0/share/extension'
/usr/bin/install -c -m 755 pgsentinel.so '/data/postgres/base/14.0/lib/pgsentinel.so'
/usr/bin/install -c -m 644 .//pgsentinel.control '/data/postgres/base/14.0/share/extension/'
/usr/bin/install -c -m 644 .//pgsentinel--1.0.sql '/data/postgres/base/14.0/share/extension/'
创建插件
CREATE EXTENSION pgsentinel;
- vi postgresql.conf
- shared_preload_libraries = 'pg_stat_statements,auto_explain,pgsentinel'
若未配置,查询会报错
- postgres=# select * from pg_active_session_history ;
- ERROR: pg_active_session_history must be loaded via shared_preload_libraries
重启db生效
- pg_ctl stop -m fast
- pg_ctl start -D $PGDATA
-
- postgres=# select * from pg_active_session_history ;
- (0 rows)
可以直接在postgresql.conf中修改,也可以alter system设置
alter system set pgsentinel_pgssh.enable=on;
参数名 | 参数含义 | 默认值 | 建议值 | 备注 |
---|---|---|---|---|
pgsentinel_ash.sampling_period | 采样时间(秒) | 1 | 10,视业务负载及需求而定 | reload生效 |
pgsentinel_ash.max_entries | pg_active_session_history 最大记录条数(占用ring buffer大小,单位为字节) | 1000 | 视业务负载及需求而定。注意非常耗内存,设置1000万约占内存28G | 重启生效。设置过大可能内存不足,DB启动失败 |
pgsentinel.db_name | 数据存在哪个db中 | postgres | pgawr | 重启生效 |
pgsentinel_ash.track_idle_trans | 是否记录 idle in transaction 状态会话 | off | on | reload生效 |
pgsentinel_pgssh.max_entries | pg_stat_statements_history 最大记录条数(占用ring buffer大小,单位为字节) | 10000 | 视业务负载及需求而定 | 重启生效。设置过大可能内存不足,DB启动失败 |
pgsentinel_pgssh.enable | 是否启用 pg_stat_statements_history | off | on | 重启生效 |
这部分对应源码
从中也可以看到各参数含义、默认值、最小最大值,是否需重启生效等
- static void
- pgsentinel_load_params(void)
- {
-
- DefineCustomIntVariable("pgsentinel_ash.sampling_period",
- "Duration between each pull (in seconds).",
- NULL,
- &ash_sampling_period,
- 1,
- 1,
- INT_MAX,
- PGC_SIGHUP,
- 0,
- NULL,
- NULL,
- NULL);
-
- DefineCustomBoolVariable("pgsentinel_ash.track_idle_trans",
- "Track session in idle transaction state.",
- NULL,
- &ash_track_idle_trans,
- false,
- PGC_SIGHUP,
- 0,
- NULL,
- NULL,
- NULL);
-
- if (!process_shared_preload_libraries_in_progress)
- return;
-
- /* can't define PGC_POSTMASTER variable after startup */
- DefineCustomIntVariable("pgsentinel_ash.max_entries",
- "Maximum number of ash entries.",
- NULL,
- &ash_max_entries,
- 1000,
- 1000,
- INT_MAX,
- PGC_POSTMASTER,
- 0,
- NULL,
- NULL,
- NULL);
-
- EmitWarningsOnPlaceholders("pgsentinel_ash");
-
- DefineCustomIntVariable("pgsentinel_pgssh.max_entries",
- "Maximum number of pgssh entries.",
- NULL,
- &pgssh_max_entries,
- 10000,
- 10000,
- INT_MAX,
- PGC_POSTMASTER,
- 0,
- NULL,
- NULL,
- NULL);
-
- DefineCustomBoolVariable("pgsentinel_pgssh.enable",
- "Enable pg_stat_statements_history.",
- NULL,
- &pgssh_enable,
- false,
- PGC_POSTMASTER,
- 0,
- NULL,
- NULL,
- NULL);
-
- EmitWarningsOnPlaceholders("pgsentinel_pgssh");
-
- DefineCustomStringVariable("pgsentinel.db_name",
- gettext_noop("Database on which the worker connect."),
- NULL,
- &pgsentinelDbName,
- "postgres",
- PGC_POSTMASTER,
- GUC_SUPERUSER_ONLY,
- NULL, NULL, NULL);
- }
查询语句保留长度
- # 为每个活动会话的pg_stat_activity.query字段所保留的内存量(字节,默认1024)
- track_activity_query_size = 2048
跟踪层级
pgsentinel依赖于pg_stat_statements插件的数据,如果想要更详细,可以调整相应参数(但必须注意对系统的负载)
- # 记录函数和存储过程中的子语句
- pg_stat_statements.track = all
插件最核心的就是pg_active_session_history,pg_stat_statements_history两个视图,所以源码中最重要的,也就是这两个视图的创建。
源码中的 pgsentinel--1.0.sql,可以看到这两个视图内容来自两个函数,并进行授权
- CREATE VIEW pg_active_session_history AS
- SELECT * FROM pg_active_session_history();
-
- GRANT SELECT ON pg_active_session_history TO PUBLIC;
-
- CREATE VIEW pg_stat_statements_history AS
- SELECT * FROM pg_stat_statements_history();
-
- GRANT SELECT ON pg_stat_statements_history TO PUBLIC;
而这两个函数实际是用c语言编写的
-
- CREATE FUNCTION pg_active_session_history(
- OUT ash_time timestamptz,
- OUT datid Oid,
- OUT datname text,
- OUT pid integer,
- OUT leader_pid integer,
- OUT usesysid Oid,
- OUT usename text,
- OUT application_name text,
- OUT client_addr text,
- OUT client_hostname text,
- OUT client_port integer,
- OUT backend_start timestamptz,
- OUT xact_start timestamptz,
- OUT query_start timestamptz,
- OUT state_change timestamptz,
- OUT wait_event_type text,
- OUT wait_event text,
- OUT state text,
- OUT backend_xid xid,
- OUT backend_xmin xid,
- OUT top_level_query text,
- OUT query text,
- OUT cmdtype text,
- OUT queryid bigint,
- OUT backend_type text,
- OUT blockers integer,
- OUT blockerpid integer,
- OUT blocker_state text
- )
- RETURNS SETOF record
- AS 'MODULE_PATHNAME', 'pg_active_session_history'
- LANGUAGE C STRICT VOLATILE PARALLEL SAFE;
-
- -- Register a view on the function for ease of use.
- CREATE VIEW pg_active_session_history AS
- SELECT * FROM pg_active_session_history();
-
- GRANT SELECT ON pg_active_session_history TO PUBLIC;
-
- CREATE FUNCTION pg_stat_statements_history(
- OUT ash_time timestamptz,
- OUT userid Oid,
- OUT dbid Oid,
- OUT queryid bigint,
- OUT calls bigint,
- OUT total_exec_time double precision,
- OUT rows bigint,
- OUT shared_blks_hit bigint,
- OUT shared_blks_read bigint,
- OUT shared_blks_dirtied bigint,
- OUT shared_blks_written bigint,
- OUT local_blks_hit bigint,
- OUT local_blks_read bigint,
- OUT local_blks_dirtied bigint,
- OUT local_blks_written bigint,
- OUT temp_blks_read bigint,
- OUT temp_blks_written bigint,
- OUT blk_read_time double precision,
- OUT blk_write_time double precision,
- OUT plans bigint,
- OUT total_plan_time double precision,
- OUT wal_records bigint,
- OUT wal_fpi bigint,
- OUT wal_bytes numeric
- )
- RETURNS SETOF record
- AS 'MODULE_PATHNAME', 'pg_stat_statements_history'
- LANGUAGE C STRICT VOLATILE PARALLEL SAFE;
既然如此,我们看看源码中究竟是怎么实现的这些函数
它有两个分支,另外根据不同pg版本有不同语句(这里只挑了一个版本):
- select act.datid, act.datname, act.pid, act.usesysid, act.usename, \
- act.application_name, text(act.client_addr), act.client_hostname, \
- act.client_port, act.backend_start, act.xact_start, act.query_start, \
- act.state_change, case when act.wait_event_type is null then 'CPU' \
- else act.wait_event_type end as wait_event_type,case when act.wait_event is null \
- then 'CPU' else act.wait_event end as wait_event, act.state, act.backend_xid, \
- act.backend_xmin, act.query, act.backend_type,(pg_blocking_pids(act.pid))[1], \
- cardinality(pg_blocking_pids(act.pid)),blk.state,gpi.*, act.leader_pid \
- from pg_stat_activity act left join pg_stat_activity blk \
- on (pg_blocking_pids(act.pid))[1] = blk.pid,get_parsedinfo(act.pid) gpi \
- where act.state ='active' and act.pid != pg_backend_pid()";
- select act.datid, act.datname, act.pid, act.usesysid, act.usename, \
- act.application_name, text(act.client_addr), act.client_hostname, \
- act.client_port, act.backend_start, act.xact_start, act.query_start, \
- act.state_change, case when act.wait_event_type is null then 'CPU' \
- else act.wait_event_type end as wait_event_type,case when act.wait_event is null \
- then 'CPU' else act.wait_event end as wait_event, act.state, act.backend_xid, \
- act.backend_xmin, act.query, act.backend_type,(pg_blocking_pids(act.pid))[1], \
- cardinality(pg_blocking_pids(act.pid)),blk.state,gpi.*, act.leader_pid \
- from pg_stat_activity act left join pg_stat_activity blk \
- on (pg_blocking_pids(act.pid))[1] = blk.pid,get_parsedinfo(act.pid) gpi \
- where act.state in ('active', 'idle in transaction') and act.pid != pg_backend_pid()";
也有版本区分,这里只取其中一版
- select userid, dbid, queryid, calls, total_exec_time, rows, shared_blks_hit, \
- shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, \
- local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, \
- temp_blks_written, blk_read_time, blk_write_time, \
- plans, total_plan_time, wal_records, wal_fpi, wal_bytes \
- from pg_stat_statements \
- where queryid in (select queryid from pg_active_session_history \
- where ash_time in (select ash_time from pg_active_session_history \
- order by ash_time desc limit 1))";
每一行记录叫做一个entry
- /* ash entry */
- typedef struct ashEntry
- {
- int pid;
- #if PG_VERSION_NUM >= 130000
- int leader_pid;
- #endif
- int client_port;
- uint64 queryid;
- TimestampTz ash_time;
- Oid datid;
- Oid usesysid;
- char *usename;
- char *datname;
- char *application_name;
- char *wait_event_type;
- char *wait_event;
- char *state;
- char *blocker_state;
- char *client_hostname;
- int blockers;
- int blockerpid;
- char *top_level_query;
- char *query;
- char *cmdtype;
- char *backend_type;
- char *client_addr;
- TransactionId backend_xmin;
- TransactionId backend_xid;
- TimestampTz backend_start;
- TimestampTz xact_start;
- TimestampTz query_start;
- TimestampTz state_change;
- } ashEntry;
-
- /* pg_stat_statement_history entry */
- typedef struct pgsshEntry
- {
- TimestampTz ash_time;
- Oid userid;
- Oid dbid;
- uint64 queryid;
- int64 calls;
- double total_time;
- int64 rows;
- int64 shared_blks_hit;
- int64 shared_blks_read;
- int64 shared_blks_dirtied;
- int64 shared_blks_written;
- int64 local_blks_hit;
- int64 local_blks_read;
- int64 local_blks_dirtied;
- int64 local_blks_written;
- int64 temp_blks_read;
- int64 temp_blks_written;
- double blk_read_time;
- double blk_write_time;
- #if PG_VERSION_NUM >= 130000
- int64 plans;
- double total_plan_time;
- int64 wal_records;
- int64 wal_fpi;
- uint64 wal_bytes;
- #endif
- } pgsshEntry;
每个字段有一个buffer变量,记录共享内存用量,例如
- static char *AshEntryUsenameBuffer = NULL;
- static char *AshEntryDatnameBuffer = NULL;
- static char *AshEntryAppnameBuffer = NULL;
ash_entry_memsize和pgssh_entry_memsize估算entry所需内存,如果占用量过大,DB启动可能会失败。基本原理是:
- /* Estimate amount of shared memory needed for ash entry */
- static Size
- ash_entry_memsize(void)
- {
- Size size;
-
- /* AshEntryArray */
- size = mul_size(sizeof(ashEntry), ash_max_entries);
- /* AshEntryUsenameBuffer */
- size = add_size(size, mul_size(NAMEDATALEN, ash_max_entries));
- /* AshEntryDatnameBuffer */
- size = add_size(size, mul_size(NAMEDATALEN, ash_max_entries));
- /* AshEntryAppnameBuffer */
- size = add_size(size, mul_size(NAMEDATALEN, ash_max_entries));
- /* AshEntryClientaddrBuffer */
- size = add_size(size, mul_size(NAMEDATALEN, ash_max_entries));
- /* AshEntryWaitEventTypeBuffer */
- size = add_size(size, mul_size(NAMEDATALEN, ash_max_entries));
- /* AshEntryWaitEventBuffer */
- size = add_size(size, mul_size(NAMEDATALEN, ash_max_entries));
- /* AshEntryStateBuffer */
- size = add_size(size, mul_size(NAMEDATALEN, ash_max_entries));
- /* AshEntryClientHostnameBuffer */
- size = add_size(size, mul_size(NAMEDATALEN, ash_max_entries));
- /* AshEntryQueryBuffer */
- size = add_size(size, mul_size(pgstat_track_activity_query_size,
- ash_max_entries));
- /* AshEntryCmdTypeBuffer */
- size = add_size(size, mul_size(NAMEDATALEN, ash_max_entries));
- /* AshEntryTopLevelQueryBuffer */
- size = add_size(size, mul_size(pgstat_track_activity_query_size,
- ash_max_entries));
- /* AshEntryBackendTypeBuffer */
- size = add_size(size, mul_size(NAMEDATALEN, ash_max_entries));
- /* AshEntryBlockerStateBuffer */
- size = add_size(size, mul_size(NAMEDATALEN, ash_max_entries));
-
- return size;
- }
参考:
GitHub - pgsentinel/pgsentinel: postgresql extension providing Active session history