Greenplum数据库外部表执行节点相关函数位于src/backend/executor/nodeExternalscan.c文件中,用于支持外部表扫描。由以下历程构成:
INTERFACE ROUTINES
ExecInitExternalScan creates and initializes a externalscan node.
ExecExternalScan sequentially scans a relation.
ExecExternalNext retrieve next tuple in sequential order.
ExecEndExternalScan releases any storage allocated.
ExecExternalReScan rescans the relation

ExecInitExternalScan函数创建并初始化一个externalscan节点,形参ExternalScan代表External Scan node【scan.scanrelid是此节点的外部表的index。filenames是N个字符串节点指针(或NULL)的列表,其中N是数组中的segment数。位置I的指针为NULL或指向包含segment I的文件名的字符串节点】 。其初始化流程如下所示。
ExternalScan *node和EState *estate关联到ExternalScanState对应成员中externalstate->ss.ss_currentRelation和externalstate->ess_ScanDescexternalstate->delayEagerFreeExternalScanState *ExecInitExternalScan(ExternalScan *node, EState *estate, int eflags){
ExternalScanState *externalstate = makeNode(ExternalScanState); /* create state structure */
externalstate->ss.ps.plan = (Plan *) node;
externalstate->ss.ps.state = estate;
ExecAssignExprContext(estate, &externalstate->ss.ps); /* Miscellaneous initialization: create expression context for node */
/* initialize child expressions */
externalstate->ss.ps.targetlist = (List *)ExecInitExpr((Expr *) node->scan.plan.targetlist,(PlanState *) externalstate);
externalstate->ss.ps.qual = (List *)ExecInitExpr((Expr *) node->scan.plan.qual,(PlanState *) externalstate);
/* Check if targetlist or qual contains a var node referencing the ctid column */
externalstate->cdb_want_ctid = contain_ctid_var_reference(&node->scan);
ItemPointerSetInvalid(&externalstate->cdb_fake_ctid);
/* tuple table initialization */
ExecInitResultTupleSlot(estate, &externalstate->ss.ps);
ExecInitScanTupleSlot(estate, &externalstate->ss);
/* get the relation object id from the relid'th entry in the range table and open that relation. */
Relation currentRelation = ExecOpenScanExternalRelation(estate, node->scan.scanrelid);
FileScanDesc currentScanDesc = external_beginscan(currentRelation, node->scancounter, node->uriList, node->fmtOptString, node->fmtType, node->isMasterOnly, node->rejLimit, node->rejLimitInRows, node->logErrors, node->encoding);
externalstate->ss.ss_currentRelation = currentRelation;
externalstate->ess_ScanDesc = currentScanDesc;
ExecAssignScanType(&externalstate->ss, RelationGetDescr(currentRelation));
/* Initialize result tuple type and projection info. */
ExecAssignResultTypeFromTL(&externalstate->ss.ps);
ExecAssignScanProjectionInfo(&externalstate->ss);
/* If eflag contains EXEC_FLAG_REWIND or EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK, then this node is not eager free safe. */
externalstate->delayEagerFree = ((eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0);
return externalstate;
}
ExecExternalScan函数顺序扫描外部表,返回符合要求的元组。其调用ExecScan函数,向其传递访问方法。
/* ExecExternalScan(node) Scans the external relation sequentially and returns the next qualifying tuple. It calls the ExecScan() routine and passes it the access method which retrieve tuples sequentially. */
TupleTableSlot *ExecExternalScan(ExternalScanState *node) {
return ExecScan(&node->ss, (ExecScanAccessMtd) ExternalNext, (ExecScanRecheckMtd) ExternalRecheck); /* use SeqNext as access method */
}

ExternalNext函数首先创建ExternalSelectDesc【该结构体包含了quals和projectionInfo】,进入扫描tuple的循环(获取到合适的元组就返回-火山模型),利用external_getnext函数获取元组,如果tuple不为null,需要进行ExternalConstraintCheck和ctid fake流程;如果tupel为null,说明没有扫描到元组,直接返回。
static TupleTableSlot *ExternalNext(ExternalScanState *node) {
/* get information from the estate and scan state */
EState *estate = node->ss.ps.state;
FileScanDesc scandesc = node->ess_ScanDesc;
ScanDirection direction = estate->es_direction;
TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
bool scanNext = true;
ExternalSelectDesc externalSelectDesc = external_getnext_init(&(node->ss.ps));
if (gp_external_enable_filter_pushdown) externalSelectDesc->filter_quals = node->ss.ps.plan->qual;
while(scanNext){ /* get the next tuple from the file access methods */
HeapTuple tuple = external_getnext(scandesc, direction, externalSelectDesc);
/* save the tuple and the buffer returned to us by the access methods in our scan tuple slot and return the slot. Note: we pass 'false' because tuples returned by heap_getnext() are pointers onto disk pages and were not created with palloc() and so should not be pfree()'d. Note also that ExecStoreTuple will increment the refcount of the buffer; the refcount will not be dropped until the tuple table slot is cleared. */ // 将访问方法返回的元组和缓冲区保存在扫描元组槽中,并返回槽。注意:我们传递“false”是因为heap_getnext()返回的元组是指向磁盘页的指针,不是用palloc()创建的,因此不应该是pfree()。还请注意,ExecStoreTople将增加缓冲区的refcount;在清除元组表槽之前,不会删除refcount。
if (tuple) {
ExecStoreHeapTuple(tuple, slot, InvalidBuffer, true);
if (node->ess_ScanDesc->fs_hasConstraints && !ExternalConstraintCheck(slot, node)){
ExecClearTuple(slot); continue;
}
/* CDB: Label each row with a synthetic ctid if needed for subquery dedup. */
if (node->cdb_want_ctid && !TupIsNull(slot)) {
slot_set_ctid_from_fake(slot, &node->cdb_fake_ctid);
}
}else{
ExecClearTuple(slot);
if (!node->delayEagerFree) { ExecEagerFreeExternalScan(node); }
}
scanNext = false;
}
pfree(externalSelectDesc);
return slot;
}