Postgresql源码（116）提升子查询案例分析

0 总结

对于SQL：select * from student, (select * from score where sno > 2) s where student.sno = s.sno;

pullup在pull_up_subqueries函数内递归完成，分几步：

将内层rte score追加到上层rtbable中：rte1是student、rte2带subquery是子查询、rte3是score。
调整所有var的varno（从1指向3）、varlevelsup（本例不涉及）；还有其他调整本例不涉及。
将上层代表子查询的rte2的subquery清空，但rte2不删除。
将上层jointree中，指向子查询的rte替换为子查询中的FromExpr（sno > 2）。

在这里插入图片描述

1 待分析场景

drop table student;
create table student(sno int, sname varchar(10), ssex int);
insert into student values(1, 'stu1', 0);
insert into student values(2, 'stu2', 1);
insert into student values(3, 'stu3', 1);
insert into student values(4, 'stu4', 0);

drop table course;
create table course(cno int, cname varchar(10), tno int);
insert into course values(10, 'meth', 1);
insert into course values(11, 'english', 2);

drop table teacher;
create table teacher(tno int, tname varchar(10), tsex int);
insert into teacher values(1, 'te1', 1);
insert into teacher values(2, 'te2', 0);

drop table score;
create table score (sno int, cno int, degree int);
insert into score values (1, 10, 100);
insert into score values (1, 11, 89);
insert into score values (2, 10, 99);
insert into score values (2, 11, 90);
insert into score values (3, 10, 87);
insert into score values (3, 11, 20);
insert into score values (4, 10, 60);
insert into score values (4, 11, 70);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27

带子查询的语句：select * from student, (select * from score where sno > 2) s where student.sno = s.sno;

set enable_hashjoin to off;
set enable_mergejoin to off;

explain select * from student, (select * from score where sno > 2) s where student.sno = s.sno;
                             QUERY PLAN
---------------------------------------------------------------------
 Nested Loop  (cost=0.00..11278.20 rows=3740 width=58)
   Join Filter: (student.sno = score.sno)
   ->  Seq Scan on student  (cost=0.00..21.00 rows=1100 width=46)
   ->  Materialize  (cost=0.00..38.90 rows=680 width=12)
         ->  Seq Scan on score  (cost=0.00..35.50 rows=680 width=12)
               Filter: (sno > 2)


||||||||||
||等价写法||
vvvvvvvvvvv


explain select * from student, score where score.sno > 2 and student.sno = score.sno;
                             QUERY PLAN
---------------------------------------------------------------------
 Nested Loop  (cost=0.00..11278.20 rows=3740 width=58)
   Join Filter: (student.sno = score.sno)
   ->  Seq Scan on student  (cost=0.00..21.00 rows=1100 width=46)
   ->  Materialize  (cost=0.00..38.90 rows=680 width=12)
         ->  Seq Scan on score  (cost=0.00..35.50 rows=680 width=12)
               Filter: (sno > 2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

pull_up_subqueries做的事情就是帮我们把子查询上拉了，下面分析上拉是如何做的。

select * from student, (select * from score where sno > 2) s where student.sno = s.sno;

Plannerinfo完整结构
在这里插入图片描述

注意pull_up_subqueries只对jointree做处理。

void
pull_up_subqueries(PlannerInfo *root)
{
	/* Top level of jointree must always be a FromExpr */
	Assert(IsA(root->parse->jointree, FromExpr));
	/* Recursion starts with no containing join nor appendrel */
	root->parse->jointree = (FromExpr *)
		pull_up_subqueries_recurse(root, (Node *) root->parse->jointree,
								   NULL, NULL);
	/* We should still have a FromExpr */
	Assert(IsA(root->parse->jointree, FromExpr));
}
1
2
3
4
5
6
7
8
9
10
11
12

2 pull_up_subqueries流程分析

2.1 处理FromExpr下面挂的第一个RANGETBLREF（student表）

FromExpr的第一张表是student表，指向一个rtekind = RTE_RELATION普通表类型，无需做任何处理。
在这里插入图片描述

2.2 处理FromExpr下面挂的第二个RANGETBLREF（子查询）

FromExpr的第二个rte是子查询(select * from score where sno > 2) s，可以看到引用的rte结构的subquery指向了内层query：
在这里插入图片描述
开始进入pull_up_simple_subquery内部处理，进入路径：

在分析pull_up_simple_subquery前有两个准入条件：

rte->rtekind == RTE_SUBQUERY
is_simple_subquery：不全部列举了，其中重要的是子查询不能带有一些特殊的语法：

is_simple_subquery
	...
	if (subquery->hasAggs ||
		subquery->hasWindowFuncs ||
		subquery->hasTargetSRFs ||
		subquery->groupClause ||
		subquery->groupingSets ||
		subquery->havingQual ||
		subquery->sortClause ||
		subquery->distinctClause ||
		subquery->limitOffset ||
		subquery->limitCount ||
		subquery->hasForUpdate ||
		subquery->cteList)
		return false;
	...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

2.3 进入pull_up_simple_subquery开始处理子查上拉

第一步：拿到rte指向的子查询的Query树，构造PlannerInfo开始处理。

static Node *
pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
						JoinExpr *lowest_outer_join,
						AppendRelInfo *containing_appendrel)
{
	Query	   *subquery;
	Query	   *parse = root->parse;
	PlannerInfo *subroot;

	subquery = copyObject(rte->subquery);

	subroot = makeNode(PlannerInfo);
	subroot->parse = subquery;
	...
	...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

第二步：递归的处理sublink、subquery等。

	replace_empty_jointree(subquery);

	if (subquery->hasSubLinks)
		pull_up_sublinks(subroot);

	preprocess_function_rtes(subroot);

	pull_up_subqueries(subroot);
1
2
3
4
5
6
7
8

第三步：开始pull up

到这里subroot就是rte2的subquery子查询的结构还没有任何调整：
在这里插入图片描述

	/*
	 * Adjust level-0 varnos in subquery so that we can append its rangetable
	 * to upper query's.  We have to fix the subquery's append_rel_list as
	 * well.
	 */
	rtoffset = list_length(parse->rtable);  // 2
	OffsetVarNodes((Node *) subquery, rtoffset, 0);
	OffsetVarNodes((Node *) subroot->append_rel_list, rtoffset, 0);
	
1
2
3
4
5
6
7
8
9

parse是上层查询的，上层有两个rtable。因为要把子查询拉平，所以把子查询的varno的指向调整一下，因为是要append到父查询，所以直接加上父查询rte的个数就好了（这里是2）。加完了应该指向父查询rte的3的位置（现在父查询只有两个rte，3位置是空的）。

OffsetVarNodes((Node *) subquery, 2, 0);

调整var→varno：1→3。
调整rangetblref→rindex：1→3。

	/*
	 * Upper-level vars in subquery are now one level closer to their parent
	 * than before.
	 */
	IncrementVarSublevelsUp((Node *) subquery, -1, 1);
	IncrementVarSublevelsUp((Node *) subroot->append_rel_list, -1, 1);
1
2
3
4
5
6

这一步调整的目的：因为varlevelsup=1表示引用上一层的列（相当于距离）这里拉平后，varlevelsup就需要-1了，因为距离少了1。
在当前SQL中select * from student, (select * from score where sno > 2) s where student.sno = s.sno;，开始调整var→varlevelsup字段，注意这个字段表示当前查询中使用了上层的变量，但上面子查询中(select * from score where sno > 2)没有引用上层的任何列，所以子查询中的var→varlevelsup都是0。这一步调整不会有影响。

	/*
	 * Now append the adjusted rtable entries and their perminfos to upper
	 * query. (We hold off until after fixing the upper rtable entries; no
	 * point in running that code on the subquery ones too.)
	 */
	CombineRangeTables(&parse->rtable, &parse->rteperminfos,
					   subquery->rtable, subquery->rteperminfos);
1
2
3
4
5
6
7

开始把子查询的RTE拷贝到上层，现在子查询里面的varno=3指向就对了。
在这里插入图片描述

	/*
	 * We no longer need the RTE's copy of the subquery's query tree.  Getting
	 * rid of it saves nothing in particular so far as this level of query is
	 * concerned; but if this query level is in turn pulled up into a parent,
	 * we'd waste cycles copying the now-unused query tree.
	 */
	rte->subquery = NULL;
1
2
3
4
5
6
7

删除子查询RTE带的Query，注意现在还缺一个条件。
在这里插入图片描述

pull_up_simple_subquery
	return (Node *) subquery->jointree;
1
2

返回一个jointree带着条件。
在这里插入图片描述
返回去后，在这里把fromlist指向的第二个rte（子查询）换成上面计算好的jointree。

然后就拉平了。

pull_up_subqueries_recurse
	...
	else if (IsA(jtnode, FromExpr))
	{
		FromExpr   *f = (FromExpr *) jtnode;
		ListCell   *l;

		Assert(containing_appendrel == NULL);
		/* Recursively transform all the child nodes */
		foreach(l, f->fromlist)
		{
			lfirst(l) = pull_up_subqueries_recurse(root, lfirst(l),
												   lowest_outer_join,
												   NULL);
		}
	}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

最终效果对比

pullup前 vs pullup后
在这里插入图片描述

相关阅读:
Apache log4j漏洞总结
 ArcGIS10.1软件安装教程
 【特刊征稿】SI特刊“可计算情感”征稿（综合性期刊IF=2.8）
SpringBoot之yaml
第一百五十七回 SliverList组件
 【SpringCloud学习笔记】Elasticsearch
net mvc中使用vue自定义组件遇到的坑
 android 11.0 获取当前界面的APP ，在APP的界面禁止灭屏
 嵌入式培训机构四个月实训课程笔记（完整版）-Linux ARM驱动编程第六天-ARM Linux编程之高级驱动基础（物联技术666）
期货开户手续费的组成和收费模式
原文地址：https://blog.csdn.net/jackgo73/article/details/134534427