我们知道src/backend/commands/analyze.c文件中do_analyze_rel函数的采样工作是由如下工作完成:
/* Acquire the sample rows */
rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
if (inh) numrows = acquire_inherited_sample_rows(onerel, elevel, rows, targrows, &totalrows, &totaldeadrows);
else numrows = (*acquirefunc) (onerel, elevel, rows, targrows, &totalrows, &totaldeadrows);
acquire_inherited_sample_rows函数从继承表构成的继承树获取采样行。该函数与acquire_sample_rows具有相同的行为,除了从所有继承子级表以及指定表中收集行。如果没有继承子表,或者所有子表都是不支持分析的外部表,则失败并返回零。
acquire_inherited_sample_rows函数首先调用find_all_inheritors函数获取指定表和所有继承子级表,判定指定表有没有子表,没有则需要置pg_class系统表对应记录的relhassubclass列为false。
static int acquire_inherited_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, double *totalrows, double *totaldeadrows){
Relation *rels; ListCell *lc;
double *relblocks,totalblocks; int numrows, nrels, i;
bool has_child; AcquireSampleRowsFunc *acquirefuncs;
List *tableOIDs = find_all_inheritors(RelationGetRelid(onerel), AccessShareLock, NULL); /* Find all members of inheritance set. We only need AccessShareLock on the children. */ // 查找继承集的所有成员。我们只需要在子表身上使用AccessShareLock。
/* Check that there's at least one descendant, else fail. This could happen despite analyze_rel's relhassubclass check, if table once had a child but no longer does. In that case, we can clear the relhassubclass field so as not to make the same mistake again later. (This is safe because we hold ShareUpdateExclusiveLock.) */ // 检查至少有一个后代,否则失败。尽管analyze_rel进行了relhassubclass检查,但如果表曾经有一个子类,但不再有子类,这可能会发生。在这种情况下,我们可以清除relhassubclass字段,以便以后不再犯同样的错误。(这是安全的,因为我们持有ShareUpdateExclusiveLock。)
if (list_length(tableOIDs) < 2){ // 如果指定表没有子表
SetRelationHasSubclass(RelationGetRelid(onerel), false); // 设置pg_class系统表对应记录的relhassubclass列为false
ereport(elevel,(errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables",get_namespace_name(RelationGetNamespace(onerel)),RelationGetRelationName(onerel))));
return 0;
}
由于我们获取了指定表和所有继承子级表的Oid,并为每个表获取Realation,因此需要为每个表的Realation创建存储空间,这里使用Relation数组rels表示;为每个表指定采样函数,这里使用AcquireSampleRowsFunc数组acquirefuncs表示;为每个表获取数据块的数量,这里使用double数组relblocks表示。
/* Identify acquirefuncs to use, and count blocks in all the relations. The result could overflow BlockNumber, so we use double arithmetic. */ // 标识要使用的AcquireFunc,并对所有关系中的块进行计数。结果可能会使BlockNumber溢出,因此我们使用双算术。
rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation));
acquirefuncs = (AcquireSampleRowsFunc *)palloc(list_length(tableOIDs) * sizeof(AcquireSampleRowsFunc));
relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double));
totalblocks = 0; nrels = 0; has_child = false;
下面是真正的处理流程,首先是遍历tableOIDs中的每个元素。对于普通表或物化视图,使用acquire_sample_rows作为采样函数、RelationGetNumberOfBlocks(childrel)获取数据页数;对于FDW,则采用fdw插件提供的AnalyzeForeignTable函数,获取插件提供的采样函数和数据页数;其他情况考虑释放锁(比如分区表就不在进行采样流程,但是继承表父表就需要进行采样流程)。
foreach(lc, tableOIDs){
Oid childOID = lfirst_oid(lc); // 取出表的Oid
Relation childrel = table_open(childOID, NoLock); /* We already got the needed lock */ // 获取表对应的Relation结构体
if (RELATION_IS_OTHER_TEMP(childrel)){ /* Ignore if temp table of another backend */
table_close(childrel, AccessShareLock); continue;
}
AcquireSampleRowsFunc acquirefunc = NULL;
BlockNumber relpages = 0;
if (childrel->rd_rel->relkind == RELKIND_RELATION || childrel->rd_rel->relkind == RELKIND_MATVIEW) { /* Check table type (MATVIEW can't happen, but might as well allow) */
acquirefunc = acquire_sample_rows; /* Regular table, so use the regular row acquisition function */
relpages = RelationGetNumberOfBlocks(childrel);
}else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE){ /* For a foreign table, call the FDW's hook function to see whether it supports analysis. */
FdwRoutine *fdwroutine = GetFdwRoutineForRelation(childrel, false);
bool ok = false;
if (fdwroutine->AnalyzeForeignTable != NULL) ok = fdwroutine->AnalyzeForeignTable(childrel, &acquirefunc, &relpages);
if (!ok){/* ignore, but release the lock on it */
table_close(childrel, AccessShareLock); continue;
}
}else {/* ignore, but release the lock on it. don't try to unlock the passed-in relation */
if (childrel != onerel) table_close(childrel, AccessShareLock);
else table_close(childrel, NoLock);
continue;
}
has_child = true; /* OK, we'll process this child */
rels[nrels] = childrel;
acquirefuncs[nrels] = acquirefunc;
relblocks[nrels] = (double) relpages;
totalblocks += (double) relpages;
nrels++;
}
if (!has_child){ /* If we don't have at least one child table to consider, fail. If the relation is a partitioned table, it's not counted as a child table. */
ereport(elevel,(errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables",get_namespace_name(RelationGetNamespace(onerel)),RelationGetRelationName(onerel)))); return 0;
}
从每个表中按其占总块计数的比例采样行。遍历上面流程筛选出来的表,获取表的Relation、采样函数、表的数据块数,按比例确定该子表采样行数rint(targrows * childblocks / totalblocks)
,调用采样函数进行采样;如果子表与父表的列类型不同,将子表的列类型转化为父表的列类型
/* Now sample rows from each relation, proportionally to its fraction of the total block count. (This might be less than desirable if the child rels have radically different free-space percentages, but it's not clear that it's worth working harder.) */ // 现在从每个表中按其占总块计数的比例采样行。(如果子REL具有完全不同的自由空间百分比,这可能不太理想,但不清楚是否值得更加努力。)
numrows = 0;*totalrows = 0;*totaldeadrows = 0;
for (i = 0; i < nrels; i++){
Relation childrel = rels[i];
AcquireSampleRowsFunc acquirefunc = acquirefuncs[i];
double childblocks = relblocks[i];
if (childblocks > 0){
int childtargrows = (int) rint(targrows * childblocks / totalblocks);
childtargrows = Min(childtargrows, targrows - numrows); /* Make sure we don't overrun due to roundoff error */
if (childtargrows > 0){
int childrows;double trows,tdrows;
childrows = (*acquirefunc) (childrel, elevel, rows + numrows, childtargrows, &trows, &tdrows); /* Fetch a random sample of the child's rows */
/* We may need to convert from child's rowtype to parent's */
if (childrows > 0 && !equalTupleDescs(RelationGetDescr(childrel), RelationGetDescr(onerel))) {
TupleConversionMap *map = convert_tuples_by_name(RelationGetDescr(childrel), RelationGetDescr(onerel));
if (map != NULL){
for (int j = 0; j < childrows; j++) {
HeapTuple newtup = execute_attr_map_tuple(rows[numrows + j], map);
heap_freetuple(rows[numrows + j]);
rows[numrows + j] = newtup;
}
free_conversion_map(map);
}
}
numrows += childrows; *totalrows += trows; *totaldeadrows += tdrows; /* And add to counts */
}
}
table_close(childrel, NoLock); /* Note: we cannot release the child-table locks, since we may have pointers to their TOAST tables in the sampled rows. */
}
return numrows;
}