Suppress unnecessary explicit sorting for EPQ mergejoin path
authorRichard Guo <rguo@postgresql.org>
Thu, 8 May 2025 09:20:18 +0000 (18:20 +0900)
committerRichard Guo <rguo@postgresql.org>
Thu, 8 May 2025 09:20:18 +0000 (18:20 +0900)
When building a ForeignPath for a joinrel, if there's a possibility
that EvalPlanQual will be executed, we must identify a suitable path
for EPQ checks.  If the outer or inner path of the chosen path is a
ForeignPath representing a pushed-down join, we replace it with its
fdw_outerpath to ensure that the EPQ check path consists entirely of
local joins.

If the chosen path is a MergePath, and its outer or inner path is a
ForeignPath that is not already well enough ordered, the MergePath
will have non-NIL outersortkeys or innersortkeys indicating the
desired ordering to be created by an explicit Sort node.  If we then
replace the outer or inner path with its corresponding fdw_outerpath,
and that path is already sufficiently ordered, we end up in an
inconsistent state: the MergePath has non-NIL outersortkeys or
innersortkeys, and its input path is already properly ordered.  This
inconsistency can result in an Assert failure or the addition of a
redundant Sort node.

To fix, check if the new outer or inner path of a MergePath is already
properly sorted, and set its outersortkeys or innersortkeys to NIL if
so.

Bug: #18902
Reported-by: Nikita Kalinin <n.kalinin@postgrespro.ru>
Author: Richard Guo <guofenglinux@gmail.com>
Reviewed-by: Tender Wang <tndrwang@gmail.com>
Discussion: https://postgr.es/m/18902-71c1bed2b9f7c46f@postgresql.org

contrib/postgres_fdw/expected/postgres_fdw.out
contrib/postgres_fdw/sql/postgres_fdw.sql
src/backend/foreign/foreign.c

index 24ff5f70cceabbc4aadad12b3dab7ca78baed9c3..2185b42bb4f79f976dc6d8ed4936dae3ce468c70 100644 (file)
@@ -2565,6 +2565,70 @@ SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = f
  96 |  6 | 00096 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6  | 6          | foo | 96 |  6 | 00096 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6  | 6          | foo |  6 |  7 | AAA006 |  6 |  7 | AAA006 |  6 |  6 | 0006
 (10 rows)
 
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT * FROM ft1, ft4, ft5, local_tbl WHERE ft1.c1 = ft4.c1 AND ft1.c1 = ft5.c1
+    AND ft1.c2 = local_tbl.c1 AND ft1.c1 < 100 AND ft5.c1 < 100 FOR UPDATE;
+                                                                                                                                                                                                                                                                                                            QUERY PLAN                                                                                                                                                                                                                                                                                                            
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ LockRows
+   Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft4.c1, ft4.c2, ft4.c3, ft5.c1, ft5.c2, ft5.c3, local_tbl.c1, local_tbl.c2, local_tbl.c3, ft1.*, ft4.*, ft5.*, local_tbl.ctid
+   ->  Merge Join
+         Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft4.c1, ft4.c2, ft4.c3, ft5.c1, ft5.c2, ft5.c3, local_tbl.c1, local_tbl.c2, local_tbl.c3, ft1.*, ft4.*, ft5.*, local_tbl.ctid
+         Merge Cond: (local_tbl.c1 = ft1.c2)
+         ->  Index Scan using local_tbl_pkey on public.local_tbl
+               Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, local_tbl.ctid
+         ->  Sort
+               Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft4.c1, ft4.c2, ft4.c3, ft4.*, ft5.c1, ft5.c2, ft5.c3, ft5.*
+               Sort Key: ft1.c2
+               ->  Foreign Scan
+                     Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft4.c1, ft4.c2, ft4.c3, ft4.*, ft5.c1, ft5.c2, ft5.c3, ft5.*
+                     Relations: ((public.ft1) INNER JOIN (public.ft4)) INNER JOIN (public.ft5)
+                     Remote SQL: SELECT r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2.c1, r2.c2, r2.c3, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2.c1, r2.c2, r2.c3) END, r3.c1, r3.c2, r3.c3, CASE WHEN (r3.*)::text IS NOT NULL THEN ROW(r3.c1, r3.c2, r3.c3) END FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 3" r2 ON (((r1."C 1" = r2.c1)) AND ((r1."C 1" < 100)))) INNER JOIN "S 1"."T 4" r3 ON (((r1."C 1" = r3.c1)) AND ((r3.c1 < 100)))) FOR UPDATE OF r1 FOR UPDATE OF r2 FOR UPDATE OF r3
+                     ->  Merge Join
+                           Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft4.c1, ft4.c2, ft4.c3, ft4.*, ft5.c1, ft5.c2, ft5.c3, ft5.*
+                           Merge Cond: (ft1.c1 = ft5.c1)
+                           ->  Merge Join
+                                 Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft4.c1, ft4.c2, ft4.c3, ft4.*
+                                 Merge Cond: (ft1.c1 = ft4.c1)
+                                 ->  Sort
+                                       Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
+                                       Sort Key: ft1.c1
+                                       ->  Foreign Scan on public.ft1
+                                             Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) FOR UPDATE
+                                 ->  Sort
+                                       Output: ft4.c1, ft4.c2, ft4.c3, ft4.*
+                                       Sort Key: ft4.c1
+                                       ->  Foreign Scan on public.ft4
+                                             Output: ft4.c1, ft4.c2, ft4.c3, ft4.*
+                                             Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3" FOR UPDATE
+                           ->  Sort
+                                 Output: ft5.c1, ft5.c2, ft5.c3, ft5.*
+                                 Sort Key: ft5.c1
+                                 ->  Foreign Scan on public.ft5
+                                       Output: ft5.c1, ft5.c2, ft5.c3, ft5.*
+                                       Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 4" WHERE ((c1 < 100)) FOR UPDATE
+(38 rows)
+
+SELECT * FROM ft1, ft4, ft5, local_tbl WHERE ft1.c1 = ft4.c1 AND ft1.c1 = ft5.c1
+    AND ft1.c2 = local_tbl.c1 AND ft1.c1 < 100 AND ft5.c1 < 100 FOR UPDATE;
+ c1 | c2 |  c3   |              c4              |            c5            | c6 |     c7     | c8  | c1 | c2 |   c3   | c1 | c2 |   c3   | c1 | c2 |  c3  
+----+----+-------+------------------------------+--------------------------+----+------------+-----+----+----+--------+----+----+--------+----+----+------
+ 12 |  2 | 00012 | Tue Jan 13 00:00:00 1970 PST | Tue Jan 13 00:00:00 1970 | 2  | 2          | foo | 12 | 13 | AAA012 | 12 | 13 | AAA012 |  2 |  2 | 0002
+ 42 |  2 | 00042 | Thu Feb 12 00:00:00 1970 PST | Thu Feb 12 00:00:00 1970 | 2  | 2          | foo | 42 | 43 | AAA042 | 42 | 43 | AAA042 |  2 |  2 | 0002
+ 72 |  2 | 00072 | Sat Mar 14 00:00:00 1970 PST | Sat Mar 14 00:00:00 1970 | 2  | 2          | foo | 72 | 73 | AAA072 | 72 | 73 |        |  2 |  2 | 0002
+ 24 |  4 | 00024 | Sun Jan 25 00:00:00 1970 PST | Sun Jan 25 00:00:00 1970 | 4  | 4          | foo | 24 | 25 | AAA024 | 24 | 25 | AAA024 |  4 |  4 | 0004
+ 54 |  4 | 00054 | Tue Feb 24 00:00:00 1970 PST | Tue Feb 24 00:00:00 1970 | 4  | 4          | foo | 54 | 55 | AAA054 | 54 | 55 |        |  4 |  4 | 0004
+ 84 |  4 | 00084 | Thu Mar 26 00:00:00 1970 PST | Thu Mar 26 00:00:00 1970 | 4  | 4          | foo | 84 | 85 | AAA084 | 84 | 85 | AAA084 |  4 |  4 | 0004
+ 96 |  6 | 00096 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6  | 6          | foo | 96 | 97 | AAA096 | 96 | 97 | AAA096 |  6 |  6 | 0006
+ 36 |  6 | 00036 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6  | 6          | foo | 36 | 37 | AAA036 | 36 | 37 |        |  6 |  6 | 0006
+ 66 |  6 | 00066 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6  | 6          | foo | 66 | 67 | AAA066 | 66 | 67 | AAA066 |  6 |  6 | 0006
+  6 |  6 | 00006 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6  | 6          | foo |  6 |  7 | AAA006 |  6 |  7 | AAA006 |  6 |  6 | 0006
+ 48 |  8 | 00048 | Wed Feb 18 00:00:00 1970 PST | Wed Feb 18 00:00:00 1970 | 8  | 8          | foo | 48 | 49 | AAA048 | 48 | 49 | AAA048 |  8 |  8 | 0008
+ 18 |  8 | 00018 | Mon Jan 19 00:00:00 1970 PST | Mon Jan 19 00:00:00 1970 | 8  | 8          | foo | 18 | 19 | AAA018 | 18 | 19 |        |  8 |  8 | 0008
+ 78 |  8 | 00078 | Fri Mar 20 00:00:00 1970 PST | Fri Mar 20 00:00:00 1970 | 8  | 8          | foo | 78 | 79 | AAA078 | 78 | 79 | AAA078 |  8 |  8 | 0008
+(13 rows)
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 -- test that add_paths_with_pathkeys_for_rel() arranges for the epq_path to
index 1f27260bafe1709aba24c64c7b08203818457fb9..e534b40de3c76e133eefbbc0b898d885310a5c22 100644 (file)
@@ -715,6 +715,11 @@ SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = f
     AND ft1.c2 = ft5.c1 AND ft1.c2 = local_tbl.c1 AND ft1.c1 < 100 AND ft2.c1 < 100 FOR UPDATE;
 SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = ft4.c1
     AND ft1.c2 = ft5.c1 AND ft1.c2 = local_tbl.c1 AND ft1.c1 < 100 AND ft2.c1 < 100 FOR UPDATE;
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT * FROM ft1, ft4, ft5, local_tbl WHERE ft1.c1 = ft4.c1 AND ft1.c1 = ft5.c1
+    AND ft1.c2 = local_tbl.c1 AND ft1.c1 < 100 AND ft5.c1 < 100 FOR UPDATE;
+SELECT * FROM ft1, ft4, ft5, local_tbl WHERE ft1.c1 = ft4.c1 AND ft1.c1 = ft5.c1
+    AND ft1.c2 = local_tbl.c1 AND ft1.c1 < 100 AND ft5.c1 < 100 FOR UPDATE;
 RESET enable_nestloop;
 RESET enable_hashjoin;
 
index f0835fc3070563695e1baefe71d3308effd4e860..9c57f7a259c3c42a0f53d4145f3adbe4bf622f01 100644 (file)
@@ -22,6 +22,7 @@
 #include "foreign/foreign.h"
 #include "funcapi.h"
 #include "miscadmin.h"
+#include "optimizer/paths.h"
 #include "tcop/tcopprot.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
@@ -808,7 +809,23 @@ GetExistingLocalJoinPath(RelOptInfo *joinrel)
 
            foreign_path = (ForeignPath *) joinpath->outerjoinpath;
            if (IS_JOIN_REL(foreign_path->path.parent))
+           {
                joinpath->outerjoinpath = foreign_path->fdw_outerpath;
+
+               if (joinpath->path.pathtype == T_MergeJoin)
+               {
+                   MergePath  *merge_path = (MergePath *) joinpath;
+
+                   /*
+                    * If the new outer path is already well enough ordered
+                    * for the mergejoin, we can skip doing an explicit sort.
+                    */
+                   if (merge_path->outersortkeys &&
+                       pathkeys_contained_in(merge_path->outersortkeys,
+                                             joinpath->outerjoinpath->pathkeys))
+                       merge_path->outersortkeys = NIL;
+               }
+           }
        }
 
        if (IsA(joinpath->innerjoinpath, ForeignPath))
@@ -817,7 +834,23 @@ GetExistingLocalJoinPath(RelOptInfo *joinrel)
 
            foreign_path = (ForeignPath *) joinpath->innerjoinpath;
            if (IS_JOIN_REL(foreign_path->path.parent))
+           {
                joinpath->innerjoinpath = foreign_path->fdw_outerpath;
+
+               if (joinpath->path.pathtype == T_MergeJoin)
+               {
+                   MergePath  *merge_path = (MergePath *) joinpath;
+
+                   /*
+                    * If the new inner path is already well enough ordered
+                    * for the mergejoin, we can skip doing an explicit sort.
+                    */
+                   if (merge_path->innersortkeys &&
+                       pathkeys_contained_in(merge_path->innersortkeys,
+                                             joinpath->innerjoinpath->pathkeys))
+                       merge_path->innersortkeys = NIL;
+               }
+           }
        }
 
        return (Path *) joinpath;