在对postgres源代码有一定接触了解后,全局性地剖析SQL命令的执行全流程,本文以如下命令为例,[CREATE TABLE shy_test(id int primary key, name carchar(20))]一步一步结合源码深入分析,窥探其实现流程与设计思想。
1 CreateStmt
该结构用于保存对Create Table Statement语句查询解析生成的相关信息,如表信息、column列信息列表,访问方法(heap,btree)等
typedef struct CreateStmt
{
NodeTag type;
RangeVar *relation; /* relation to create */
List *tableElts; /* column definitions (list of ColumnDef) */
List *inhRelations; /* relations to inherit from (list of
* RangeVar) */
PartitionBoundSpec *partbound; /* FOR VALUES clause */
PartitionSpec *partspec; /* PARTITION BY clause */
TypeName *ofTypename; /* OF typename 类型名 */
List *constraints; /* constraints (list of Constraint nodes) 约束*/
List *options; /* options from WITH clause with 语句参数 */
OnCommitAction oncommit; /* what do we do at COMMIT? */
char *tablespacename; /* table space to use, or NULL 表空间 */
char *accessMethod; /* table access method 访问方法 */
bool if_not_exists; /* just do nothing if it already exists? 表是否存在*/
} CreateStmt;
(gdb) p (CreateStmt)$rawstmt->stmt
$6 = {type = T_CreateStmt, relation = 0x55db4d59e140, tableElts = 0x55db4d59e478, inhRelations = 0x0, partbound = 0x0, partspec = 0x0,
ofTypename = 0x0, constraints = 0x0, options = 0x0, oncommit = ONCOMMIT_NOOP, tablespacename = 0x0, accessMethod = 0x0, if_not_exists = false}
2 RangeVar
该结构保存SQL语句中from 子句信息,如catalogname/ relname、是否采用别名(alias)和继承关系。
typedef struct RangeVar
{
NodeTag type;
char *catalogname; /* the catalog (database) name, or NULL */
char *schemaname; /* the schema name, or NULL */
char *relname; /* the relation/sequence name */
bool inh; /* expand rel by inheritance? recursively act
* on children? */
char relpersistence; /* see RELPERSISTENCE_* in pg_class.h */
Alias *alias; /* table alias & optional column aliases */
int location; /* token location, or -1 if unknown */
} RangeVar;
(gdb) p *$createstmt->relation
$8 = {type = T_RangeVar, catalogname = 0x0, schemaname = 0x0, relname = 0x55db4d59e120 “shy_test”, inh = true, relpersistence = 112 ‘p’,
alias = 0x0, location = 13}
3 ColumnDef
该结构体保存创建表中的列定义信息,如列名、是否为空、压缩方法、是否定义默认值
/*
* ColumnDef - column definition (used in various creates)
*
* If the column has a default value, we may have the value expression
* in either "raw" form (an untransformed parse tree) or "cooked" form
* (a post-parse-analysis, executable expression tree), depending on
* how this ColumnDef node was created (by parsing, or by inheritance
* from an existing relation). We should never have both in the same node!
*
* Similarly, we may have a COLLATE specification in either raw form
* (represented as a CollateClause with arg==NULL) or cooked form
* (the collation's OID).
*
* The constraints list may contain a CONSTR_DEFAULT item in a raw
* parsetree produced by gram.y, but transformCreateStmt will remove
* the item and set raw_default instead. CONSTR_DEFAULT items
* should not appear in any subsequent processing.
*/
typedef struct ColumnDef
{
NodeTag type;
char *colname; /* name of column */
TypeName *typeName; /* type of column */
char *compression; /* compression method for column */
int inhcount; /* number of times column is inherited */
bool is_local; /* column has local (non-inherited) def'n */
bool is_not_null; /* NOT NULL constraint specified? */
bool is_from_type; /* column definition came from table type */
char storage; /* attstorage setting, or 0 for default */
Node *raw_default; /* default value (untransformed parse tree) */
Node *cooked_default; /* default value (transformed expr tree) */
char identity; /* attidentity setting */
RangeVar *identitySequence; /* to store identity sequence name for
* ALTER TABLE ... ADD COLUMN */
char generated; /* attgenerated setting */
CollateClause *collClause; /* untransformed COLLATE spec, if any */
Oid collOid; /* collation OID (InvalidOid if not set) */
List *constraints; /* other constraints on column */
List *fdwoptions; /* per-column FDW options */
int location; /* parse location, or -1 if none/unknown */
} ColumnDef;
(gdb) p *$columdef
$31 = {type = T_ColumnDef, colname = 0x55db4d59e190 “id”, typeName = 0x55db4d59e238, compression = 0x0, inhcount = 0, is_local = true, is_not_null = false, is_from_type = false, storage = 0 ‘\000’, raw_default = 0x0, cooked_default = 0x0, identity = 0 ‘\000’, identitySequence = 0x0, generated = 0 ‘\000’, collClause = 0x0, collOid = 0, constraints = 0x55db4d59e398, fdwoptions = 0x0, location = 23}
(gdb) p *$columdef1
$32 = {type = T_ColumnDef, colname = 0x55db4d59e4c8 “name”, typeName = 0x55db4d59e570, compression = 0x0, inhcount = 0, is_local = true, is_not_null = false, is_from_type = false, storage = 0 ‘\000’, raw_default = 0x0, cooked_default = 0x0, identity = 0 ‘\000’, identitySequence = 0x0, generated = 0 ‘\000’, collClause = 0x0, collOid = 0, constraints = 0x0, fdwoptions = 0x0, location = 43}
4 Constraint
该结构体用于保存约束信息,如主键、唯一索引、非空、外键和排他约束等信息。
typedef struct Constraint
{
NodeTag type;
ConstrType contype; /* see above 约束类型 */
/* Fields used for most/all constraint types: */
char *conname; /* Constraint name, or NULL if unnamed 约束名 */
bool deferrable; /* DEFERRABLE? 可延迟 */
bool initdeferred; /* INITIALLY DEFERRED? */
int location; /* token location, or -1 if unknown */
/* Fields used for constraints with expressions (CHECK and DEFAULT): */
bool is_no_inherit; /* is constraint non-inheritable? 不可继承的约束*/
Node *raw_expr; /* expr, as untransformed parse tree 表达式,作为未转化解析树*/
char *cooked_expr; /* expr, as nodeToString representation */
char generated_when; /* ALWAYS or BY DEFAULT */
/* Fields used for unique constraints (UNIQUE and PRIMARY KEY): */
List *keys; /* String nodes naming referenced key key列
* column(s) */
List *including; /* String nodes naming referenced nonkey nonkey列
* column(s) */
/* Fields used for EXCLUSION constraints: */
List *exclusions; /* list of (IndexElem, operator name) pairs */
/* Fields used for index constraints (UNIQUE, PRIMARY KEY, EXCLUSION): */
List *options; /* options from WITH clause */
char *indexname; /* existing index to use; otherwise NULL 索引名 */
char *indexspace; /* index tablespace; NULL for default 索引对应的表空间 */
bool reset_default_tblspc; /* reset default_tablespace prior to
* creating the index */
/* These could be, but currently are not, used for UNIQUE/PKEY: */
char *access_method; /* index access method; NULL for default 访问方法 */
Node *where_clause; /* partial index predicate where子句信息 */
/* Fields used for FOREIGN KEY constraints: */
RangeVar *pktable; /* Primary key table 主键信息 */
List *fk_attrs; /* Attributes of foreign key 外键属性列表*/
List *pk_attrs; /* Corresponding attrs in PK table 对应的主键属性*/
char fk_matchtype; /* FULL, PARTIAL, SIMPLE */
char fk_upd_action; /* ON UPDATE action */
char fk_del_action; /* ON DELETE action */
List *old_conpfeqop; /* pg_constraint.conpfeqop of my former self */
Oid old_pktable_oid; /* pg_constraint.confrelid of my former
* self */
/* Fields used for constraints that allow a NOT VALID specification */
bool skip_validation; /* skip validation of existing rows? */
bool initially_valid; /* mark the new constraint as valid? */
} Constraint;
-------------------------------------------------------------------------------------
typedef enum ConstrType /* types of constraints */
{
CONSTR_NULL, /* not standard SQL, but a lot of people
* expect it */
CONSTR_NOTNULL,
CONSTR_DEFAULT,
CONSTR_IDENTITY,
CONSTR_GENERATED,
CONSTR_CHECK,
CONSTR_PRIMARY,
CONSTR_UNIQUE,
CONSTR_EXCLUSION,
CONSTR_FOREIGN,
CONSTR_ATTR_DEFERRABLE, /* attributes for previous constraint node */
CONSTR_ATTR_NOT_DEFERRABLE,
CONSTR_ATTR_DEFERRED,
CONSTR_ATTR_IMMEDIATE
} ConstrType;
(gdb) p (Constraint)$columdef->constraints->elements->ptr_value
$26 = {type = T_Constraint, contype = CONSTR_PRIMARY, conname = 0x0, deferrable = false, initdeferred = false, location = 30,
is_no_inherit = false, raw_expr = 0x0, cooked_expr = 0x0, generated_when = 0 ‘\000’, keys = 0x0, including = 0x0, exclusions = 0x0,
options = 0x0, indexname = 0x0, indexspace = 0x0, reset_default_tblspc = false, access_method = 0x0, where_clause = 0x0, pktable = 0x0,
fk_attrs = 0x0, pk_attrs = 0x0, fk_matchtype = 0 ‘\000’, fk_upd_action = 0 ‘\000’, fk_del_action = 0 ‘\000’, old_conpfeqop = 0x0,
old_pktable_oid = 0, skip_validation = false, initially_valid = false}
5 Query
SQL语句完成词法、语法解析生成解析树,后进行查询分析与重写生成查询树,其元素为Query结构体
/*
* Query -
* Parse analysis turns all statements into a Query tree
* for further processing by the rewriter and planner.
* 对解析树进行分析生成查询树,继而供后续重写器和计划器处理
* Utility statements (i.e. non-optimizable statements) have the
* utilityStmt field set, and the rest of the Query is mostly dummy.
*
* Planning converts a Query tree into a Plan tree headed by a PlannedStmt
* node --- the Query structure is not used by the executor.
*
* 计划器将查询树转变成计划树,其head为 PlannedStmt节点
*/
typedef struct Query
{
NodeTag type;
CmdType commandType; /* select|insert|update|delete|utility */
QuerySource querySource; /* where did I come from? */
uint64 queryId; /* query identifier (can be set by plugins) query 标识符*/
bool canSetTag; /* do I set the command result tag? */
Node *utilityStmt; /* non-null if commandType == CMD_UTILITY */
int resultRelation; /* rtable index of target relation for
* INSERT/UPDATE/DELETE; 0 for SELECT */
bool hasAggs; /* has aggregates in tlist or havingQual agg */
bool hasWindowFuncs; /* has window functions in tlist 是否有窗口函数 */
bool hasTargetSRFs; /* has set-returning functions in tlist 是否设有returning functions */
bool hasSubLinks; /* has subquery SubLink 子查询链 */
bool hasDistinctOn; /* distinctClause is from DISTINCT ON 是否有distinct子句 */
bool hasRecursive; /* WITH RECURSIVE was specified */
bool hasModifyingCTE; /* has INSERT/UPDATE/DELETE in WITH */
bool hasForUpdate; /* FOR [KEY] UPDATE/SHARE was specified 是否指定for update */
bool hasRowSecurity; /* rewriter has applied some RLS policy */
bool isReturn; /* is a RETURN statement return 查询*/
List *cteList; /* WITH list (of CommonTableExpr's) */
List *rtable; /* list of range table entries 范围表项 */
FromExpr *jointree; /* table join tree (FROM and WHERE clauses) join tree */
List *targetList; /* target list (of TargetEntry) 投影列表*/
OverridingKind override; /* OVERRIDING clause */
OnConflictExpr *onConflict; /* ON CONFLICT DO [NOTHING | UPDATE] 冲突*/
List *returningList; /* return-values list (of TargetEntry) 返回链表*/
List *groupClause; /* a list of SortGroupClause's */
bool groupDistinct; /* is the group by clause distinct? */
List *groupingSets; /* a list of GroupingSet's if present */
Node *havingQual; /* qualifications applied to groups */
List *windowClause; /* a list of WindowClause's */
List *distinctClause; /* a list of SortGroupClause's */
List *sortClause; /* a list of SortGroupClause's */
Node *limitOffset; /* # of result tuples to skip (int8 expr) 偏移*/
Node *limitCount; /* # of result tuples to return (int8 expr) 计数*/
LimitOption limitOption; /* limit type */
List *rowMarks; /* a list of RowMarkClause's */
Node *setOperations; /* set-operation tree if this is top level of
* a UNION/INTERSECT/EXCEPT query */
List *constraintDeps; /* a list of pg_constraint OIDs that the query
* depends on to be semantically valid */
List *withCheckOptions; /* a list of WithCheckOption's (added
* during rewrite) */
/*
* The following two fields identify the portion of the source text string
* containing this query. They are typically only populated in top-level
* Queries, not in sub-queries. When not set, they might both be zero, or
* both be -1 meaning "unknown".
*/
int stmt_location; /* start location, or -1 if unknown */
int stmt_len; /* length in bytes; 0 means "rest of string" */
} Query;
解析分析生成Query结构体
(gdb) p *result
$36 = {type = T_Query, commandType = CMD_UTILITY, querySource = QSRC_ORIGINAL, queryId = 0, canSetTag = false, utilityStmt = 0x55db4d59e6d0, resultRelation = 0, hasAggs = false, hasWindowFuncs = false, hasTargetSRFs = false, hasSubLinks = false, hasDistinctOn = false, hasRecursive = false, hasModifyingCTE = false, hasForUpdate = false, hasRowSecurity = false, isReturn = false, cteList = 0x0, rtable = 0x0, jointree = 0x0, targetList = 0x0, override = OVERRIDING_NOT_SET, onConflict = 0x0, returningList = 0x0, groupClause = 0x0, groupDistinct = false, groupingSets = 0x0, havingQual = 0x0, windowClause = 0x0, distinctClause = 0x0, sortClause = 0x0, limitOffset = 0x0, limitCount = 0x0, limitOption = LIMIT_OPTION_COUNT, rowMarks = 0x0, setOperations = 0x0, constraintDeps = 0x0, withCheckOptions = 0x0,
stmt_location = 0, stmt_len = 0}
对Query进行重写后生成查询树链表信息
(gdb) p (Query)querytree_list->elements->ptr_value
$43 = {type = T_Query, commandType = CMD_UTILITY, querySource = QSRC_ORIGINAL, queryId = 0, canSetTag = true, utilityStmt = 0x55db4d59e6d0,
resultRelation = 0, hasAggs = false, hasWindowFuncs = false, hasTargetSRFs = false, hasSubLinks = false, hasDistinctOn = false,
hasRecursive = false, hasModifyingCTE = false, hasForUpdate = false, hasRowSecurity = false, isReturn = false, cteList = 0x0, rtable = 0x0,
jointree = 0x0, targetList = 0x0, override = OVERRIDING_NOT_SET, onConflict = 0x0, returningList = 0x0, groupClause = 0x0,
groupDistinct = false, groupingSets = 0x0, havingQual = 0x0, windowClause = 0x0, distinctClause = 0x0, sortClause = 0x0, limitOffset = 0x0,
limitCount = 0x0, limitOption = LIMIT_OPTION_COUNT, rowMarks = 0x0, setOperations = 0x0, constraintDeps = 0x0, withCheckOptions = 0x0,
stmt_location = 0, stmt_len = 60}
6 PlannedStmt
计划器会对上述的查询树进一步处理生成计划树
/* ----------------
* PlannedStmt node
*
* The output of the planner is a Plan tree headed by a PlannedStmt node.
* PlannedStmt holds the "one time" information needed by the executor.
*
* 计划器对此处理生成一个头部为 PlannedStmt node 计划树
* DDL语句其 commandType == CMD_UTILITY
* For simplicity in APIs, we also wrap utility statements in PlannedStmt
* nodes; in such cases, commandType == CMD_UTILITY, the statement itself
* is in the utilityStmt field, and the rest of the struct is mostly dummy.
* (We do use canSetTag, stmt_location, stmt_len, and possibly queryId.)
* ----------------
*/
typedef struct PlannedStmt
{
NodeTag type;
CmdType commandType; /* select|insert|update|delete|utility */
uint64 queryId; /* query identifier (copied from Query) */
bool hasReturning; /* is it insert|update|delete RETURNING? */
bool hasModifyingCTE; /* has insert|update|delete in WITH? */
bool canSetTag; /* do I set the command result tag? */
bool transientPlan; /* redo plan when TransactionXmin changes? */
bool dependsOnRole; /* is plan specific to current role? */
bool parallelModeNeeded; /* parallel mode required to execute? 是否为并行模式 */
int jitFlags; /* which forms of JIT should be performed JIT 执行形式 */
struct Plan *planTree; /* tree of Plan nodes */ // plan nodes树
List *rtable; /* list of RangeTblEntry nodes */ // 范围链表
/* rtable indexes of target relations for INSERT/UPDATE/DELETE */
List *resultRelations; /* integer list of RT indexes, or NIL */ // 范围表索引
List *appendRelations; /* list of AppendRelInfo nodes */
List *subplans; /* Plan trees for SubPlan expressions; note
* that some could be NULL */
Bitmapset *rewindPlanIDs; /* indices of subplans that require REWIND */
List *rowMarks; /* a list of PlanRowMark's */
List *relationOids; /* OIDs of relations the plan depends on */ // relation oid
List *invalItems; /* other dependencies, as PlanInvalItems */
List *paramExecTypes; /* type OIDs for PARAM_EXEC Params */
Node *utilityStmt; /* non-null if this is utility stmt */
/* statement location in source string (copied from Query) */
int stmt_location; /* start location, or -1 if unknown */
int stmt_len; /* length in bytes; 0 means "rest of string" */
} PlannedStmt;
(gdb) p (PlannedStmt)plantree_list->elements->ptr_value
$48 = {type = T_PlannedStmt, commandType = CMD_UTILITY, queryId = 0, hasReturning = false, hasModifyingCTE = false, canSetTag = true, transientPlan = false, dependsOnRole = false, parallelModeNeeded = false, jitFlags = 0, planTree = 0x0, rtable = 0x0, resultRelations = 0x0, appendRelations = 0x0, subplans = 0x0, rewindPlanIDs = 0x0, rowMarks = 0x0, relationOids = 0x0, invalItems = 0x0, paramExecTypes = 0x0, utilityStmt = 0x55db4d59e6d0, stmt_location = 0, stmt_len = 60}
本文主要整理总结涉及关键数据结构,下几小节将从执行器模块进行后续讲解其SQL的执行流程。