diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index d65daee42ba0..3f9f0d6f0bd8 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -1190,6 +1190,10 @@ ExecReScanHashJoin(HashJoinState *node) * if it's a single-batch join, and there is no parameter change for the * inner subnode, then we can just re-use the existing hash table without * rebuilding it. + * + * GPDB: hybrid hash join was modified to spill out scanned batches + * (including 0th batch) to disk to provide rescannability if it was + * requested. See the SpillCurrentBatch comment for details. */ if (node->hj_HashTable != NULL) { diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 79233aab1560..cc4f0a2b2ccb 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -59,6 +59,8 @@ static CdbVisitOpt pathnode_walk_list(List *pathlist, static CdbVisitOpt pathnode_walk_kids(Path *path, CdbVisitOpt (*walker)(Path *path, void *context), void *context); +static Path* ensure_outer_rescannable(PlannerInfo *root, Path *outer_path); +static Path* ensure_inner_rescannable(PlannerInfo *root, Path *outer_path, Path *inner_path); /* * pathnode_walk_node @@ -224,6 +226,53 @@ pathnode_walk_kids(Path *path, } /* pathnode_walk_kids */ +static Path* +ensure_outer_rescannable(PlannerInfo *root, Path *outer_path) +{ + if (!outer_path->rescannable) + { + MaterialPath *matouter = create_material_path(root, outer_path->parent, outer_path); + + matouter->cdb_shield_child_from_rescans = true; + + outer_path = (Path *) matouter; + } + + return outer_path; +} + +static Path* +ensure_inner_rescannable(PlannerInfo *root, Path *outer_path, Path *inner_path) +{ + if (!inner_path->rescannable) + { + /* + * NLs potentially rescan the inner; if our inner path + * isn't rescannable we have to add a materialize node + */ + MaterialPath *matinner = create_material_path(root, inner_path->parent, inner_path); + + matinner->cdb_shield_child_from_rescans = true; + + /* + * If we have motion on the outer, to avoid a deadlock; we + * need to set cdb_strict. In order for materialize to + * fully fetch the underlying (required to avoid our + * deadlock hazard) we must set cdb_strict! + */ + if (inner_path->motionHazard && outer_path->motionHazard) + { + matinner->cdb_strict = true; + matinner->path.motionHazard = false; + } + + inner_path = (Path *) matinner; + } + + return inner_path; +} + + /***************************************************************************** * MISC. PATH UTILITIES *****************************************************************************/ @@ -3218,47 +3267,18 @@ create_nestloop_path(PlannerInfo *root, /* * If this join path is parameterized by a parameter above this path, then - * this path needs to be rescannable. A NestLoop is rescannable, when both + * this path needs to be rescannable. Joins are rescannable, when both * outer and inner paths rescannable, so make them both rescannable. */ - if (!outer_path->rescannable && !bms_is_empty(required_outer)) - { - MaterialPath *matouter = create_material_path(root, outer_path->parent, outer_path); - - matouter->cdb_shield_child_from_rescans = true; - - outer_path = (Path *) matouter; - } + if (!bms_is_empty(required_outer)) + outer_path = ensure_outer_rescannable(root, outer_path); /* * If outer has at most one row, NJ will make at most one pass over inner. * Else materialize inner rel after motion so NJ can loop over results. */ - if (!inner_path->rescannable && - (!outer_path->parent->onerow || !bms_is_empty(required_outer))) - { - /* - * NLs potentially rescan the inner; if our inner path - * isn't rescannable we have to add a materialize node - */ - MaterialPath *matinner = create_material_path(root, inner_path->parent, inner_path); - - matinner->cdb_shield_child_from_rescans = true; - - /* - * If we have motion on the outer, to avoid a deadlock; we - * need to set cdb_strict. In order for materialize to - * fully fetch the underlying (required to avoid our - * deadlock hazard) we must set cdb_strict! - */ - if (inner_path->motionHazard && outer_path->motionHazard) - { - matinner->cdb_strict = true; - matinner->path.motionHazard = false; - } - - inner_path = (Path *) matinner; - } + if (!outer_path->parent->onerow || !bms_is_empty(required_outer)) + inner_path = ensure_inner_rescannable(root, outer_path, inner_path); /* * If the inner path is parameterized by the outer, we must drop any @@ -3456,6 +3476,17 @@ create_mergejoin_path(PlannerInfo *root, inner_path->pathkeys) innersortkeys = NIL; + /* + * If this join path is parameterized by a parameter above this path, then + * this path needs to be rescannable. Joins are rescannable, when both + * outer and inner paths rescannable, so make them both rescannable. + */ + if (!bms_is_empty(required_outer)) + { + outer_path = ensure_outer_rescannable(root, outer_path); + inner_path = ensure_inner_rescannable(root, outer_path, inner_path); + } + pathnode->jpath.path.pathtype = T_MergeJoin; pathnode->jpath.path.parent = joinrel; pathnode->jpath.path.param_info = @@ -3573,6 +3604,17 @@ create_hashjoin_path(PlannerInfo *root, return NULL; } + /* + * If this join path is parameterized by a parameter above this path, then + * this path needs to be rescannable. Joins are rescannable, when both + * outer and inner paths rescannable, so make them both rescannable. + * + * Hash Join inner side was customized in the GPDB to be always rescannable + * (see the SpillCurrentBatch comment for details) + */ + if (!bms_is_empty(required_outer)) + outer_path = ensure_outer_rescannable(root, outer_path); + pathnode = makeNode(HashPath); pathnode->jpath.path.pathtype = T_HashJoin; diff --git a/src/test/regress/expected/gporca.out b/src/test/regress/expected/gporca.out index 8348b38c1107..496625bbcc35 100644 --- a/src/test/regress/expected/gporca.out +++ b/src/test/regress/expected/gporca.out @@ -12595,8 +12595,9 @@ where out.b in (select coalesce(tcorr2_d.c, 99) -> Materialize (cost=1.07..2.16 rows=2 width=8) -> Hash Left Join (cost=1.07..2.14 rows=4 width=8) Hash Cond: (tcorr1.a = tcorr2.a) - -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=4) - -> Seq Scan on tcorr1 (cost=0.00..1.01 rows=1 width=4) + -> Materialize (cost=0.00..1.03 rows=1 width=4) + -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=4) + -> Seq Scan on tcorr1 (cost=0.00..1.01 rows=1 width=4) -> Hash (cost=1.06..1.06 rows=1 width=12) -> HashAggregate (cost=1.04..1.05 rows=1 width=12) Group Key: tcorr2.a @@ -12606,7 +12607,7 @@ where out.b in (select coalesce(tcorr2_d.c, 99) -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..1.03 rows=1 width=8) -> Seq Scan on tcorr2 (cost=0.00..1.01 rows=1 width=8) Optimizer: Postgres query optimizer -(18 rows) +(19 rows) -- expect 1 row select * @@ -12711,8 +12712,9 @@ where out.b in (select coalesce(tcorr2_d.c, 99) -> Materialize (cost=1.07..2.16 rows=2 width=8) -> Hash Left Join (cost=1.07..2.14 rows=4 width=8) Hash Cond: (tcorr1.a = tcorr2.a) - -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=4) - -> Seq Scan on tcorr1 (cost=0.00..1.01 rows=1 width=4) + -> Materialize (cost=0.00..1.03 rows=1 width=4) + -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=4) + -> Seq Scan on tcorr1 (cost=0.00..1.01 rows=1 width=4) -> Hash (cost=1.06..1.06 rows=1 width=12) -> HashAggregate (cost=1.04..1.05 rows=1 width=12) Group Key: tcorr2.a @@ -12722,7 +12724,7 @@ where out.b in (select coalesce(tcorr2_d.c, 99) -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..1.03 rows=1 width=8) -> Seq Scan on tcorr2 (cost=0.00..1.01 rows=1 width=8) Optimizer: Postgres query optimizer -(18 rows) +(19 rows) -- expect 1 row select * diff --git a/src/test/regress/expected/join_gp.out b/src/test/regress/expected/join_gp.out index 48871a385b0d..978173add6fb 100644 --- a/src/test/regress/expected/join_gp.out +++ b/src/test/regress/expected/join_gp.out @@ -5,6 +5,10 @@ -- start_matchignore -- m/ERROR: workfile compresssion is not supported by this build/ -- end_matchignore +-- start_matchsubs +-- m/ERROR: could not devise a query plan for the given query \(pathnode.c:\d+\)/ +-- s/ERROR: could not devise a query plan for the given query \(pathnode.c:\d+\)/ERROR: could not devise a query plan for the given query (pathnode.c:XX)/ +-- end_matchsubs -- -- test numeric hash join -- diff --git a/src/test/regress/expected/join_gp_optimizer.out b/src/test/regress/expected/join_gp_optimizer.out index e3f205c9dea2..f4487a31275a 100644 --- a/src/test/regress/expected/join_gp_optimizer.out +++ b/src/test/regress/expected/join_gp_optimizer.out @@ -5,6 +5,10 @@ -- start_matchignore -- m/ERROR: workfile compresssion is not supported by this build/ -- end_matchignore +-- start_matchsubs +-- m/ERROR: could not devise a query plan for the given query \(pathnode.c:\d+\)/ +-- s/ERROR: could not devise a query plan for the given query \(pathnode.c:\d+\)/ERROR: could not devise a query plan for the given query (pathnode.c:XX)/ +-- end_matchsubs -- -- test numeric hash join -- diff --git a/src/test/regress/sql/join_gp.sql b/src/test/regress/sql/join_gp.sql index bcb7cef1fd48..e625b17c802a 100644 --- a/src/test/regress/sql/join_gp.sql +++ b/src/test/regress/sql/join_gp.sql @@ -6,6 +6,10 @@ -- start_matchignore -- m/ERROR: workfile compresssion is not supported by this build/ -- end_matchignore +-- start_matchsubs +-- m/ERROR: could not devise a query plan for the given query \(pathnode.c:\d+\)/ +-- s/ERROR: could not devise a query plan for the given query \(pathnode.c:\d+\)/ERROR: could not devise a query plan for the given query (pathnode.c:XX)/ +-- end_matchsubs -- -- test numeric hash join