From c914d19fac97a19564cfc30c1fe336942f248e40 Mon Sep 17 00:00:00 2001 From: Sven Klemm Date: Mon, 11 Dec 2023 19:57:44 +0100 Subject: [PATCH] Remove the timescaledb_fdw foreign data wrapper This is the fdw implementation that was used for communication between multinode instances. --- cmake/ScriptFiles.cmake | 6 +- sql/pre_install/fdw_functions.sql | 14 - sql/pre_install/timescaledb_fdw.sql | 7 - sql/updates/latest-dev.sql | 4 + sql/updates/reverse-dev.sql | 6 + src/process_utility.c | 111 - src/ts_catalog/continuous_agg.c | 6 - test/runner.sh | 2 +- tsl/src/CMakeLists.txt | 1 - tsl/src/data_node.c | 1 - tsl/src/debug.c | 98 +- tsl/src/debug.h | 2 - tsl/src/fdw/CMakeLists.txt | 17 - tsl/src/fdw/README.md | 43 - tsl/src/fdw/data_node_chunk_assignment.c | 300 -- tsl/src/fdw/data_node_chunk_assignment.h | 69 - tsl/src/fdw/data_node_scan_exec.c | 175 -- tsl/src/fdw/data_node_scan_exec.h | 14 - tsl/src/fdw/data_node_scan_plan.c | 1880 ------------ tsl/src/fdw/data_node_scan_plan.h | 29 - tsl/src/fdw/deparse.c | 3437 ---------------------- tsl/src/fdw/deparse.h | 56 - tsl/src/fdw/estimate.c | 537 ---- tsl/src/fdw/estimate.h | 14 - tsl/src/fdw/fdw.c | 434 --- tsl/src/fdw/fdw.h | 17 - tsl/src/fdw/fdw_utils.c | 127 - tsl/src/fdw/fdw_utils.h | 22 - tsl/src/fdw/modify_exec.c | 712 ----- tsl/src/fdw/modify_exec.h | 33 - tsl/src/fdw/modify_plan.c | 233 -- tsl/src/fdw/modify_plan.h | 12 - tsl/src/fdw/option.c | 359 --- tsl/src/fdw/option.h | 13 - tsl/src/fdw/relinfo.c | 543 ---- tsl/src/fdw/relinfo.h | 167 -- tsl/src/fdw/scan_exec.c | 582 ---- tsl/src/fdw/scan_exec.h | 69 - tsl/src/fdw/scan_plan.c | 1074 ------- tsl/src/fdw/scan_plan.h | 53 - tsl/src/fdw/shippable.c | 229 -- tsl/src/fdw/shippable.h | 13 - tsl/src/hypertable.c | 1 - tsl/src/init.c | 5 - tsl/src/nodes/async_append.c | 13 +- tsl/src/planner.c | 36 - tsl/test/shared/expected/extension.out | 2 - 47 files changed, 17 insertions(+), 11561 deletions(-) delete mode 100644 sql/pre_install/fdw_functions.sql delete mode 100644 sql/pre_install/timescaledb_fdw.sql delete mode 100644 tsl/src/fdw/CMakeLists.txt delete mode 100644 tsl/src/fdw/README.md delete mode 100644 tsl/src/fdw/data_node_chunk_assignment.c delete mode 100644 tsl/src/fdw/data_node_chunk_assignment.h delete mode 100644 tsl/src/fdw/data_node_scan_exec.c delete mode 100644 tsl/src/fdw/data_node_scan_exec.h delete mode 100644 tsl/src/fdw/data_node_scan_plan.c delete mode 100644 tsl/src/fdw/data_node_scan_plan.h delete mode 100644 tsl/src/fdw/deparse.c delete mode 100644 tsl/src/fdw/deparse.h delete mode 100644 tsl/src/fdw/estimate.c delete mode 100644 tsl/src/fdw/estimate.h delete mode 100644 tsl/src/fdw/fdw.c delete mode 100644 tsl/src/fdw/fdw.h delete mode 100644 tsl/src/fdw/fdw_utils.c delete mode 100644 tsl/src/fdw/fdw_utils.h delete mode 100644 tsl/src/fdw/modify_exec.c delete mode 100644 tsl/src/fdw/modify_exec.h delete mode 100644 tsl/src/fdw/modify_plan.c delete mode 100644 tsl/src/fdw/modify_plan.h delete mode 100644 tsl/src/fdw/option.c delete mode 100644 tsl/src/fdw/option.h delete mode 100644 tsl/src/fdw/relinfo.c delete mode 100644 tsl/src/fdw/relinfo.h delete mode 100644 tsl/src/fdw/scan_exec.c delete mode 100644 tsl/src/fdw/scan_exec.h delete mode 100644 tsl/src/fdw/scan_plan.c delete mode 100644 tsl/src/fdw/scan_plan.h delete mode 100644 tsl/src/fdw/shippable.c delete mode 100644 tsl/src/fdw/shippable.h diff --git a/cmake/ScriptFiles.cmake b/cmake/ScriptFiles.cmake index 365f320ec77..4ee495776a2 100644 --- a/cmake/ScriptFiles.cmake +++ b/cmake/ScriptFiles.cmake @@ -16,14 +16,12 @@ set(PRE_INSTALL_SOURCE_FILES pre_install/types.post.sql # Must be before tables.sql pre_install/tables.sql pre_install/cache.sql - pre_install/insert_data.sql - pre_install/fdw_functions.sql - pre_install/timescaledb_fdw.sql) + pre_install/insert_data.sql) # Source files that define functions and need to be rerun in update set(PRE_INSTALL_FUNCTION_FILES pre_install/types.functions.sql - pre_install/fdw_functions.sql) + ) # The rest of the source files defining mostly functions set(SOURCE_FILES diff --git a/sql/pre_install/fdw_functions.sql b/sql/pre_install/fdw_functions.sql deleted file mode 100644 index 77353c82e58..00000000000 --- a/sql/pre_install/fdw_functions.sql +++ /dev/null @@ -1,14 +0,0 @@ --- This file and its contents are licensed under the Apache License 2.0. --- Please see the included NOTICE for copyright information and --- LICENSE-APACHE for a copy of the license. - -CREATE OR REPLACE FUNCTION @extschema@.timescaledb_fdw_handler() -RETURNS fdw_handler -AS '@MODULE_PATHNAME@', 'ts_timescaledb_fdw_handler' -LANGUAGE C STRICT; - -CREATE OR REPLACE FUNCTION @extschema@.timescaledb_fdw_validator(text[], oid) -RETURNS void -AS '@MODULE_PATHNAME@', 'ts_timescaledb_fdw_validator' -LANGUAGE C STRICT; - diff --git a/sql/pre_install/timescaledb_fdw.sql b/sql/pre_install/timescaledb_fdw.sql deleted file mode 100644 index 932c04a57e8..00000000000 --- a/sql/pre_install/timescaledb_fdw.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file and its contents are licensed under the Apache License 2.0. --- Please see the included NOTICE for copyright information and --- LICENSE-APACHE for a copy of the license. - -CREATE FOREIGN DATA WRAPPER timescaledb_fdw - HANDLER @extschema@.timescaledb_fdw_handler - VALIDATOR @extschema@.timescaledb_fdw_validator; diff --git a/sql/updates/latest-dev.sql b/sql/updates/latest-dev.sql index 4ca258239b5..b33c946f0bb 100644 --- a/sql/updates/latest-dev.sql +++ b/sql/updates/latest-dev.sql @@ -74,3 +74,7 @@ ALTER EXTENSION timescaledb DROP TABLE _timescaledb_catalog.hypertable_compressi DROP VIEW IF EXISTS timescaledb_information.compression_settings; DROP TABLE _timescaledb_catalog.hypertable_compression; +DROP FOREIGN DATA WRAPPER IF EXISTS timescaledb_fdw; +DROP FUNCTION IF EXISTS @extschema@.timescaledb_fdw_handler(); +DROP FUNCTION IF EXISTS @extschema@.timescaledb_fdw_validator(text[], oid); + diff --git a/sql/updates/reverse-dev.sql b/sql/updates/reverse-dev.sql index 07e4a62431c..61ecf20d414 100644 --- a/sql/updates/reverse-dev.sql +++ b/sql/updates/reverse-dev.sql @@ -191,3 +191,9 @@ GRANT SELECT ON _timescaledb_catalog.hypertable_compression TO PUBLIC; DROP VIEW timescaledb_information.compression_settings; ALTER EXTENSION timescaledb DROP TABLE _timescaledb_catalog.compression_settings; DROP TABLE _timescaledb_catalog.compression_settings; + +CREATE FUNCTION @extschema@.timescaledb_fdw_handler() RETURNS fdw_handler AS '@MODULE_PATHNAME@', 'ts_timescaledb_fdw_handler' LANGUAGE C STRICT; +CREATE FUNCTION @extschema@.timescaledb_fdw_validator(text[], oid) RETURNS void AS '@MODULE_PATHNAME@', 'ts_timescaledb_fdw_validator' LANGUAGE C STRICT; + +CREATE FOREIGN DATA WRAPPER timescaledb_fdw HANDLER @extschema@.timescaledb_fdw_handler VALIDATOR @extschema@.timescaledb_fdw_validator; + diff --git a/src/process_utility.c b/src/process_utility.c index 0cdff538cba..ceab172544c 100644 --- a/src/process_utility.c +++ b/src/process_utility.c @@ -5,7 +5,6 @@ */ #include #include -#include #include #include #include @@ -407,55 +406,6 @@ add_chunk_oid(Hypertable *ht, Oid chunk_relid, void *vargs) } } -static bool -block_on_foreign_server(const char *const server_name) -{ - const ForeignServer *server; - - Assert(server_name != NULL); - server = GetForeignServerByName(server_name, true); - if (NULL != server) - { - Oid ts_fdwid = get_foreign_data_wrapper_oid(EXTENSION_FDW_NAME, false); - if (server->fdwid == ts_fdwid) - return true; - } - return false; -} - -static DDLResult -process_create_foreign_server_start(ProcessUtilityArgs *args) -{ - CreateForeignServerStmt *stmt = (CreateForeignServerStmt *) args->parsetree; - - if (strcmp(EXTENSION_FDW_NAME, stmt->fdwname) == 0) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("operation not supported for a TimescaleDB data node"), - errhint("Use add_data_node() to add data nodes to a " - "distributed database."))); - - return DDL_CONTINUE; -} - -static void -process_drop_foreign_server_start(DropStmt *stmt) -{ - ListCell *lc; - - foreach (lc, stmt->objects) - { - const char *servername = strVal(lfirst(lc)); - - if (block_on_foreign_server(servername)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("operation not supported on a TimescaleDB data node"), - errhint("Use delete_data_node() to remove data nodes from a " - "distributed database."))); - } -} - static void process_drop_trigger_start(ProcessUtilityArgs *args, DropStmt *stmt) { @@ -492,55 +442,6 @@ process_drop_trigger_start(ProcessUtilityArgs *args, DropStmt *stmt) ts_cache_release(hcache); } -static DDLResult -process_create_foreign_table_start(ProcessUtilityArgs *args) -{ - CreateForeignTableStmt *stmt = (CreateForeignTableStmt *) args->parsetree; - - if (block_on_foreign_server(stmt->servername)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("operation not supported"), - errdetail( - "It is not possible to create stand-alone TimescaleDB foreign tables."))); - - return DDL_CONTINUE; -} - -static DDLResult -process_alter_foreign_server(ProcessUtilityArgs *args) -{ - AlterForeignServerStmt *stmt = (AlterForeignServerStmt *) args->parsetree; - ForeignServer *server = GetForeignServerByName(stmt->servername, true); - Oid fdwid = get_foreign_data_wrapper_oid(EXTENSION_FDW_NAME, false); - ListCell *lc; - - if (server != NULL && server->fdwid == fdwid) - { - if (stmt->has_version) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("version not supported"), - errdetail( - "It is not possible to set a version on the data node configuration."))); - - /* Options are validated by the FDW, but we need to block available option - * since that must be handled via alter_data_node(). */ - foreach (lc, stmt->options) - { - DefElem *elem = lfirst(lc); - - if (strcmp(elem->defname, "available") == 0) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot set \"available\" using ALTER SERVER"), - errhint("Use alter_data_node() to set \"available\"."))); - } - } - - return DDL_CONTINUE; -} - static void process_altertableschema(ProcessUtilityArgs *args) { @@ -1795,9 +1696,6 @@ process_drop_start(ProcessUtilityArgs *args) case OBJECT_VIEW: process_drop_view_start(args, stmt); break; - case OBJECT_FOREIGN_SERVER: - process_drop_foreign_server_start(stmt); - break; case OBJECT_TRIGGER: process_drop_trigger_start(args, stmt); break; @@ -4217,15 +4115,6 @@ process_ddl_command_start(ProcessUtilityArgs *args) switch (nodeTag(args->parsetree)) { - case T_CreateForeignTableStmt: - handler = process_create_foreign_table_start; - break; - case T_AlterForeignServerStmt: - handler = process_alter_foreign_server; - break; - case T_CreateForeignServerStmt: - handler = process_create_foreign_server_start; - break; case T_AlterObjectSchemaStmt: handler = process_alterobjectschema; break; diff --git a/src/ts_catalog/continuous_agg.c b/src/ts_catalog/continuous_agg.c index 8d7a2768c00..11e0499110a 100644 --- a/src/ts_catalog/continuous_agg.c +++ b/src/ts_catalog/continuous_agg.c @@ -1151,15 +1151,9 @@ drop_continuous_agg(FormData_continuous_agg *cadata, bool drop_user_view) if (!raw_hypertable_has_other_caggs) { hypertable_invalidation_log_delete(form.raw_hypertable_id); - if (ts_cm_functions->remote_invalidation_log_delete) - ts_cm_functions->remote_invalidation_log_delete(form.raw_hypertable_id, - HypertableIsRawTable); } ts_materialization_invalidation_log_delete_inner(form.mat_hypertable_id); - if (ts_cm_functions->remote_invalidation_log_delete) - ts_cm_functions->remote_invalidation_log_delete(form.mat_hypertable_id, - HypertableIsMaterialization); if (!raw_hypertable_has_other_caggs) { diff --git a/test/runner.sh b/test/runner.sh index 5bec73a19d4..bfa91391670 100755 --- a/test/runner.sh +++ b/test/runner.sh @@ -98,7 +98,7 @@ cd ${EXE_DIR}/sql # create database and install timescaledb ${PSQL} "$@" -U $TEST_ROLE_SUPERUSER -d postgres -v ECHO=none -c "CREATE DATABASE \"${TEST_DBNAME}\";" -${PSQL} "$@" -U $TEST_ROLE_SUPERUSER -d ${TEST_DBNAME} -v ECHO=none -c "SET client_min_messages=error; CREATE EXTENSION timescaledb; GRANT USAGE ON FOREIGN DATA WRAPPER timescaledb_fdw TO ${TEST_ROLE_1};" +${PSQL} "$@" -U $TEST_ROLE_SUPERUSER -d ${TEST_DBNAME} -v ECHO=none -c "SET client_min_messages=error; CREATE EXTENSION timescaledb;" ${PSQL} "$@" -U $TEST_ROLE_SUPERUSER -d ${TEST_DBNAME} -v ECHO=none -v MODULE_PATHNAME="'timescaledb-${EXT_VERSION}'" -v TSL_MODULE_PATHNAME="'timescaledb-tsl-${EXT_VERSION}'" < ${TEST_SUPPORT_FILE} >/dev/null 2>&1 export TEST_DBNAME diff --git a/tsl/src/CMakeLists.txt b/tsl/src/CMakeLists.txt index c326ef1739f..19706852914 100644 --- a/tsl/src/CMakeLists.txt +++ b/tsl/src/CMakeLists.txt @@ -61,6 +61,5 @@ install(TARGETS ${TSL_LIBRARY_NAME} DESTINATION ${PG_PKGLIBDIR}) add_subdirectory(bgw_policy) add_subdirectory(compression) add_subdirectory(continuous_aggs) -add_subdirectory(fdw) add_subdirectory(nodes) add_subdirectory(remote) diff --git a/tsl/src/data_node.c b/tsl/src/data_node.c index 971b4a5f151..4367dd8c163 100644 --- a/tsl/src/data_node.c +++ b/tsl/src/data_node.c @@ -41,7 +41,6 @@ #include "extension.h" #include "cache.h" #include "chunk.h" -#include "fdw/fdw.h" #include "remote/async.h" #include "remote/connection.h" #include "remote/connection_cache.h" diff --git a/tsl/src/debug.c b/tsl/src/debug.c index 5bcd18e74d4..1bc53d6d58d 100644 --- a/tsl/src/debug.c +++ b/tsl/src/debug.c @@ -48,8 +48,6 @@ #include #include -#include "fdw/relinfo.h" -#include "fdw/fdw_utils.h" #include "debug.h" #include "utils.h" @@ -81,12 +79,6 @@ static const char *upperrel_stage_name[] = { }; /* clang-format on */ -static const char *fdw_rel_type_names[] = { - [TS_FDW_RELINFO_HYPERTABLE_DATA_NODE] = "DATA_NODE", - [TS_FDW_RELINFO_HYPERTABLE] = "HYPERTABLE", - [TS_FDW_RELINFO_FOREIGN_TABLE] = "FOREIGN_TABLE", -}; - static void append_var_expr(StringInfo buf, const Node *expr, const List *rtable) { @@ -288,11 +280,6 @@ ts_append_pathkeys(StringInfo buf, const List *pathkeys, const List *rtable) static const char * get_relation_name(PlannerInfo *root, RelOptInfo *rel) { - TsFdwRelInfo *fdw_info = fdw_relinfo_get(rel); - - if (NULL != fdw_info) - return fdw_info->relation_name->data; - if (rel->reloptkind == RELOPT_BASEREL) { RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); @@ -303,22 +290,6 @@ get_relation_name(PlannerInfo *root, RelOptInfo *rel) return "-"; } -/* - * Return a string name for the FDW type of a relation. - * - * For relations that are not an FDW relation we simply return "-". - */ -static const char * -get_fdw_relation_typename(RelOptInfo *rel) -{ - TsFdwRelInfo *fdw_info = fdw_relinfo_get(rel); - - if (NULL != fdw_info) - return fdw_rel_type_names[fdw_info->type]; - - return "-"; -} - static void tsl_debug_append_path(StringInfo buf, PlannerInfo *root, Path *path, int indent) { @@ -414,10 +385,7 @@ tsl_debug_append_path(StringInfo buf, PlannerInfo *root, Path *path, int indent) if (path->parent) { - appendStringInfo(buf, - " [rel type: %s, kind: %s", - get_fdw_relation_typename(path->parent), - reloptkind_name[path->parent->reloptkind]); + appendStringInfo(buf, " [rel kind: %s", reloptkind_name[path->parent->reloptkind]); appendStringInfoString(buf, ", parent's base rels: "); append_relids(buf, root, path->parent->relids); appendStringInfoChar(buf, ']'); @@ -480,69 +448,11 @@ tsl_debug_append_pathlist(StringInfo buf, PlannerInfo *root, List *pathlist, int ListCell *cell; foreach (cell, pathlist) { - Path *path = isconsidered ? ((ConsideredPath *) lfirst(cell))->path : lfirst(cell); + Path *path = lfirst(cell); tsl_debug_append_path(buf, root, path, indent); } } -/* - * Check whether a path is the origin of a considered path. - * - * It is not possible to do a simple memcmp() of paths here because a path - * could be a (semi-)shallow copy. Therefore we use the origin of the - * ConsideredPath object. - */ -static bool -path_is_origin(const Path *p1, const ConsideredPath *p2) -{ - return p2->origin == (uintptr_t) p1; -} - -/* - * Print paths that were pruned during planning. - * - * The pruned paths are those that have been considered but are not in the - * rel's pathlist. - */ -static void -tsl_debug_append_pruned_pathlist(StringInfo buf, PlannerInfo *root, RelOptInfo *rel, int indent) -{ - TsFdwRelInfo *fdw_info = fdw_relinfo_get(rel); - ListCell *lc1; - - if (NULL == fdw_info || fdw_info->considered_paths == NIL) - return; - - foreach (lc1, rel->pathlist) - { - Path *p1 = (Path *) lfirst(lc1); - ListCell *lc2; - - foreach (lc2, fdw_info->considered_paths) - { - ConsideredPath *p2 = (ConsideredPath *) lfirst(lc2); - - if (path_is_origin(p1, p2)) - { - fdw_info->considered_paths = list_delete_cell(fdw_info->considered_paths, lc2); - fdw_utils_free_path(p2); - break; - } - } - } - - if (fdw_info->considered_paths == NIL) - return; - - appendStringInfoString(buf, "Pruned paths:\n"); - tsl_debug_append_pathlist(buf, root, fdw_info->considered_paths, indent, true); - - foreach (lc1, fdw_info->considered_paths) - fdw_utils_free_path(lfirst(lc1)); - - fdw_info->considered_paths = NIL; -} - void tsl_debug_log_rel_with_paths(PlannerInfo *root, RelOptInfo *rel, UpperRelationKind *upper_stage) { @@ -552,9 +462,8 @@ tsl_debug_log_rel_with_paths(PlannerInfo *root, RelOptInfo *rel, UpperRelationKi appendStringInfo(buf, "Upper rel stage %s:\n", upperrel_stage_name[*upper_stage]); appendStringInfo(buf, - "RELOPTINFO [rel name: %s, type: %s, kind: %s, base rel names: ", + "RELOPTINFO [rel name: %s, kind: %s, base rel names: ", get_relation_name(root, rel), - get_fdw_relation_typename(rel), reloptkind_name[rel->reloptkind]); append_relids(buf, root, rel->relids); appendStringInfoChar(buf, ']'); @@ -562,7 +471,6 @@ tsl_debug_log_rel_with_paths(PlannerInfo *root, RelOptInfo *rel, UpperRelationKi appendStringInfoString(buf, "Path list:\n"); tsl_debug_append_pathlist(buf, root, rel->pathlist, 1, false); - tsl_debug_append_pruned_pathlist(buf, root, rel, 1); if (rel->cheapest_parameterized_paths) { diff --git a/tsl/src/debug.h b/tsl/src/debug.h index 451a160aa53..bbb68318cb9 100644 --- a/tsl/src/debug.h +++ b/tsl/src/debug.h @@ -9,8 +9,6 @@ #include #include #include -#include "fdw/fdw_utils.h" -#include "fdw/relinfo.h" #ifdef TS_DEBUG extern void tsl_debug_log_rel_with_paths(PlannerInfo *root, RelOptInfo *rel, diff --git a/tsl/src/fdw/CMakeLists.txt b/tsl/src/fdw/CMakeLists.txt deleted file mode 100644 index 2e02af11084..00000000000 --- a/tsl/src/fdw/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -set(SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/relinfo.c - ${CMAKE_CURRENT_SOURCE_DIR}/estimate.c - ${CMAKE_CURRENT_SOURCE_DIR}/fdw.c - ${CMAKE_CURRENT_SOURCE_DIR}/fdw_utils.c - ${CMAKE_CURRENT_SOURCE_DIR}/modify_plan.c - ${CMAKE_CURRENT_SOURCE_DIR}/modify_exec.c - ${CMAKE_CURRENT_SOURCE_DIR}/scan_plan.c - ${CMAKE_CURRENT_SOURCE_DIR}/scan_exec.c - ${CMAKE_CURRENT_SOURCE_DIR}/deparse.c - ${CMAKE_CURRENT_SOURCE_DIR}/shippable.c - ${CMAKE_CURRENT_SOURCE_DIR}/option.c - ${CMAKE_CURRENT_SOURCE_DIR}/data_node_chunk_assignment.c - ${CMAKE_CURRENT_SOURCE_DIR}/data_node_scan_plan.c - ${CMAKE_CURRENT_SOURCE_DIR}/data_node_scan_exec.c) - -target_sources(${TSL_LIBRARY_NAME} PRIVATE ${SOURCES}) diff --git a/tsl/src/fdw/README.md b/tsl/src/fdw/README.md deleted file mode 100644 index d5eca96324d..00000000000 --- a/tsl/src/fdw/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# Query planning and execution for distributed hypertables - -The code in this directory deals with the planning and execution of -queries and inserts on distributed hypertables. The code is based on -PostgreSQL's `postgres_fdw`-- the foreign data wrapper implementation -for querying tables on remote PostgreSQL servers. While we rely on the -same basic foreign data wrapper (FDW) API for interfacing with the -main PostgreSQL planner and executor, we don't consider us strictly -bound to this interface. Therefore, the `timescaledb_fdw` -implementation is not to be considered a regular stand-alone foreign -data wrapper in that you can't manually create foreign tables of that -type. Instead, the use of the FDW interface is out of necessity and is -a transparent part of distributed hypertables. - -The code is roughly split along planning and execution lines, and -various utilities: - -* `fdw.c`: Implements the foreign data wrapper interface (FDW). This - is just a thin layer that calls into other code. -* `modify_(plan|exec).c`: Planning and execution of inserts, updates, - deletes. Note, however, that inserts are mainly handled by - `data_node_dispatch.c`, which optimizes for batched inserts on - distributed hypertables. -* `scan_(plan|exec).c`: General planning and execution of remote - relation scans. -* `relinfo.c`: Information about a remote relation, which is used for - planning distributed queries/inserts. This can be considered an - extension of a standard `RelOptInfo` object. -* `estimate.c`: Code for estimating the cost of scanning distributed - hypertables and chunks. -* `option.c`: Parsing and validation of options on servers, tables, - extension levels that are related to distributed queries and - inserts. -* `deparse.c`: Code to generate remote SQL queries from query - plans. The generated SQL statements are sent to remote data node - servers. -* `shippable.c`: Determines whether expressions in queries are - shippable to the remote end. Certain functions are not safe to - execute on a remote data node or might not exist there. -* `data_node_scan_(plan|exec).c`: Code to turn per-chunk plans into - per-server plans for more efficient execution. -* `data_node_chunk_assignment.c`: Methods to assign/schedule chunks on - data node servers. diff --git a/tsl/src/fdw/data_node_chunk_assignment.c b/tsl/src/fdw/data_node_chunk_assignment.c deleted file mode 100644 index ac553337156..00000000000 --- a/tsl/src/fdw/data_node_chunk_assignment.c +++ /dev/null @@ -1,300 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#include -#include -#include -#include -#include -#include - -#include "data_node_chunk_assignment.h" -#include "dimension.h" -#include "dimension_slice.h" -#include "dimension_vector.h" -#include "hypercube.h" -#include "chunk.h" -#include "ts_catalog/chunk_data_node.h" -#include "relinfo.h" -#include "planner.h" - -/* - * Find an existing data node chunk assignment or initialize a new one. - */ -static DataNodeChunkAssignment * -get_or_create_sca(DataNodeChunkAssignments *scas, Oid serverid, RelOptInfo *rel) -{ - DataNodeChunkAssignment *sca; - bool found; - - Assert(rel == NULL || rel->serverid == serverid); - - sca = hash_search(scas->assignments, &serverid, HASH_ENTER, &found); - - if (!found) - { - /* New entry */ - memset(sca, 0, sizeof(*sca)); - sca->node_server_oid = serverid; - } - - return sca; -} - -static const DimensionSlice * -get_slice_for_dimension(Chunk *chunk, int32 dimension_id) -{ - return ts_hypercube_get_slice_by_dimension_id(chunk->cube, dimension_id); -} - -/* - * Assign the given chunk relation to a data node. - * - * The chunk is assigned according to the strategy set in the - * DataNodeChunkAssignments state. - */ -DataNodeChunkAssignment * -data_node_chunk_assignment_assign_chunk(DataNodeChunkAssignments *scas, RelOptInfo *chunkrel) -{ - DataNodeChunkAssignment *sca = get_or_create_sca(scas, chunkrel->serverid, NULL); - TimescaleDBPrivate *chunk_private = ts_get_private_reloptinfo(chunkrel); - MemoryContext old; - - /* Should never assign the same chunk twice */ - Assert(!bms_is_member(chunkrel->relid, sca->chunk_relids)); - - /* If this is the first chunk we assign to this data node, increment the - * number of data nodes with one or more chunks on them */ - if (list_length(sca->chunks) == 0) - scas->num_nodes_with_chunks++; - - scas->total_num_chunks++; - - /* - * Use the cached ChunkDataNode data to find the relid of the chunk on the - * data node. - */ - Oid remote_chunk_relid = InvalidOid; - ListCell *lc; - foreach (lc, chunk_private->cached_chunk_struct->data_nodes) - { - ChunkDataNode *cdn = (ChunkDataNode *) lfirst(lc); - if (cdn->foreign_server_oid == chunkrel->serverid) - { - remote_chunk_relid = cdn->fd.node_chunk_id; - break; - } - } - Assert(OidIsValid(remote_chunk_relid)); - - /* - * Fill the data node chunk assignment struct. - */ - old = MemoryContextSwitchTo(scas->mctx); - sca->chunk_relids = bms_add_member(sca->chunk_relids, chunkrel->relid); - sca->chunks = lappend(sca->chunks, chunk_private->cached_chunk_struct); - sca->remote_chunk_ids = lappend_int(sca->remote_chunk_ids, remote_chunk_relid); - sca->pages += chunkrel->pages; - sca->rows += chunkrel->rows; - sca->tuples += chunkrel->tuples; - MemoryContextSwitchTo(old); - - return sca; -} - -/* - * Initialize a new chunk assignment state with a specific assignment strategy. - */ -void -data_node_chunk_assignments_init(DataNodeChunkAssignments *scas, - DataNodeChunkAssignmentStrategy strategy, PlannerInfo *root, - unsigned int nrels_hint) -{ - HASHCTL hctl = { - .keysize = sizeof(Oid), - .entrysize = sizeof(DataNodeChunkAssignment), - .hcxt = CurrentMemoryContext, - }; - - scas->strategy = strategy; - scas->root = root; - scas->mctx = hctl.hcxt; - scas->total_num_chunks = 0; - scas->num_nodes_with_chunks = 0; - scas->assignments = hash_create("data node chunk assignments", - nrels_hint, - &hctl, - HASH_ELEM | HASH_CONTEXT | HASH_BLOBS); -} - -/* - * Assign chunks to data nodes. - * - * Each chunk in the chunkrels array is a assigned a data node using the strategy - * set in the DataNodeChunkAssignments state. - */ -DataNodeChunkAssignments * -data_node_chunk_assignment_assign_chunks(DataNodeChunkAssignments *scas, RelOptInfo **chunkrels, - unsigned int nrels) -{ - unsigned int i; - - Assert(scas->assignments != NULL && scas->root != NULL); - - for (i = 0; i < nrels; i++) - { - RelOptInfo *chunkrel = chunkrels[i]; - - Assert(IS_SIMPLE_REL(chunkrel) && chunkrel->fdw_private != NULL); - data_node_chunk_assignment_assign_chunk(scas, chunkrel); - } - - return scas; -} - -/* - * Get the data node assignment for the given relation (chunk). - */ -DataNodeChunkAssignment * -data_node_chunk_assignment_get_or_create(DataNodeChunkAssignments *scas, RelOptInfo *rel) -{ - return get_or_create_sca(scas, rel->serverid, rel); -} - -/* - * Check if a dimension slice overlaps with other slices. - * - * This is a naive implementation that runs in linear time. A more efficient - * approach would be to use, e.g., an interval tree. - */ -static bool -dimension_slice_overlaps_with_others(const DimensionSlice *slice, const List *other_slices) -{ - ListCell *lc; - - foreach (lc, other_slices) - { - const DimensionSlice *other_slice = lfirst(lc); - - if (ts_dimension_slices_collide(slice, other_slice)) - return true; - } - - return false; -} - -/* - * DataNodeSlice: a hash table entry to track the data node a chunk slice is placed - * on. - */ -typedef struct DataNodeSlice -{ - int32 sliceid; - Oid node_serverid; -} DataNodeSlice; - -/* - * Check whether chunks are assigned in an overlapping way. - * - * Assignments are overlapping if any data node has a chunk that overlaps (in the - * given paritioning dimension) with a chunk on another data node. There are two - * cases when this can happen: - * - * 1. The same slice exists on multiple data nodes (we optimize for detecting - * this). - * - * 2. Two different slices overlap while existing on different data nodes (this - * case is more costly to detect). - */ -bool -data_node_chunk_assignments_are_overlapping(DataNodeChunkAssignments *scas, - int32 partitioning_dimension_id) -{ - HASH_SEQ_STATUS status; - HASHCTL hashctl = { - .keysize = sizeof(int32), - .entrysize = sizeof(DataNodeSlice), - .hcxt = CurrentMemoryContext, - }; - HTAB *all_data_node_slice_htab; - DataNodeChunkAssignment *sca; - List *all_data_node_slices = NIL; - - /* No overlapping can occur if there are chunks on only one data node (this - * covers also the case of a single chunk) */ - if (scas->num_nodes_with_chunks <= 1) - return false; - - /* If there are multiple data nodes with chunks and they are not placed along - * a closed "space" dimension, we assume overlapping */ - if (partitioning_dimension_id <= 0) - return true; - - /* Use a hash table to track slice data node mappings by slice ID. The same - * slice can exist on multiple data nodes, causing an overlap across data nodes - * in the slice dimension. This hash table is used to quickly detect such - * "same-slice overlaps" and avoids having to do a more expensive range - * overlap check. - */ - all_data_node_slice_htab = hash_create("all_data_node_slices", - scas->total_num_chunks, - &hashctl, - HASH_BLOBS | HASH_CONTEXT | HASH_ELEM); - - hash_seq_init(&status, scas->assignments); - - while ((sca = hash_seq_search(&status))) - { - List *data_node_slices = NIL; - ListCell *lc; - - /* Check each slice on the data node against the slices on other - * data nodes */ - foreach (lc, sca->chunks) - { - Chunk *chunk = (Chunk *) lfirst(lc); - const DimensionSlice *slice; - DataNodeSlice *ss; - bool found; - - slice = get_slice_for_dimension(chunk, partitioning_dimension_id); - - Assert(NULL != slice); - - /* Get or create a new entry in the global slice set */ - ss = hash_search(all_data_node_slice_htab, &slice->fd.id, HASH_ENTER, &found); - - if (!found) - { - ss->sliceid = slice->fd.id; - ss->node_serverid = sca->node_server_oid; - data_node_slices = lappend(data_node_slices, ts_dimension_slice_copy(slice)); - } - - /* First detect "same-slice overlap", and then do a more expensive - * range overlap check */ - if (ss->node_serverid != sca->node_server_oid || - /* Check if the slice overlaps with the accumulated slices of - * other data nodes. This can be made more efficient by using an - * interval tree. */ - dimension_slice_overlaps_with_others(slice, all_data_node_slices)) - { - /* The same slice exists on (at least) two data nodes, or it - * overlaps with a different slice on another data node */ - hash_seq_term(&status); - hash_destroy(all_data_node_slice_htab); - return true; - } - } - - /* Add the data node's slice set to the set of all data nodes checked so - * far */ - all_data_node_slices = list_concat(all_data_node_slices, data_node_slices); - } - - hash_destroy(all_data_node_slice_htab); - - return false; -} diff --git a/tsl/src/fdw/data_node_chunk_assignment.h b/tsl/src/fdw/data_node_chunk_assignment.h deleted file mode 100644 index 20392529b53..00000000000 --- a/tsl/src/fdw/data_node_chunk_assignment.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#pragma once - -#include -#include -#include -#include -#include - -/* - * data node-chunk assignments map chunks to the data nodes that will be responsible - * for handling those chunks. For replicated chunks several such strategies - * are possible. For example, the system can aim to use as many data nodes as - * possible to increase parallelism or as few as possible to decrease coordination - * overhead. - */ - -typedef struct DataNodeChunkAssignment -{ - Oid node_server_oid; - BlockNumber pages; - double rows; - double tuples; - Cost startup_cost; - Cost total_cost; - Relids chunk_relids; - List *chunks; - List *remote_chunk_ids; -} DataNodeChunkAssignment; - -/* - * Only "attached data node" strategy is supported at this time. This strategy - * picks the data node that is associated with a chunk's foreign table - */ -typedef enum DataNodeChunkAssignmentStrategy -{ - SCA_STRATEGY_ATTACHED_DATA_NODE, -} DataNodeChunkAssignmentStrategy; - -typedef struct DataNodeChunkAssignments -{ - DataNodeChunkAssignmentStrategy strategy; - PlannerInfo *root; - HTAB *assignments; - unsigned long total_num_chunks; - unsigned long num_nodes_with_chunks; - MemoryContext mctx; -} DataNodeChunkAssignments; - -extern DataNodeChunkAssignment * -data_node_chunk_assignment_assign_chunk(DataNodeChunkAssignments *scas, RelOptInfo *chunkrel); - -extern DataNodeChunkAssignments * -data_node_chunk_assignment_assign_chunks(DataNodeChunkAssignments *scas, RelOptInfo **chunkrels, - unsigned int nrels); - -extern DataNodeChunkAssignment * -data_node_chunk_assignment_get_or_create(DataNodeChunkAssignments *scas, RelOptInfo *rel); - -extern void data_node_chunk_assignments_init(DataNodeChunkAssignments *scas, - DataNodeChunkAssignmentStrategy strategy, - PlannerInfo *root, unsigned int nrels_hint); - -extern bool data_node_chunk_assignments_are_overlapping(DataNodeChunkAssignments *scas, - int32 partitioning_dimension_id); diff --git a/tsl/src/fdw/data_node_scan_exec.c b/tsl/src/fdw/data_node_scan_exec.c deleted file mode 100644 index 3a596030d80..00000000000 --- a/tsl/src/fdw/data_node_scan_exec.c +++ /dev/null @@ -1,175 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#include -#include -#include -#include -#include -#include -#include - -#include "scan_plan.h" -#include "scan_exec.h" -#include "data_node_scan_plan.h" -#include "data_node_scan_exec.h" -#include "nodes/async_append.h" -#include "remote/data_fetcher.h" -#include "guc.h" - -/* - * The execution stage of a DataNodeScan. - * - * This implements the execution stage CustomScan interface for a DataNodeScan - * plan. This is heavily based on the ForeignScan implementation, but allow - * scans of remote relations that doesn't have a corresponding local foreign - * table, which is the case for a data node relation. - */ - -typedef struct DataNodeScanState -{ - AsyncScanState async_state; - TsFdwScanState fsstate; - ExprState *recheck_quals; - bool systemcol; -} DataNodeScanState; - -static void -data_node_scan_begin(CustomScanState *node, EState *estate, int eflags) -{ - DataNodeScanState *sss = (DataNodeScanState *) node; - CustomScan *cscan = (CustomScan *) node->ss.ps.plan; - List *fdw_exprs = linitial(cscan->custom_exprs); - List *recheck_quals = lsecond(cscan->custom_exprs); - List *fdw_private = list_nth(cscan->custom_private, DataNodeScanFdwPrivate); - sss->fsstate.planned_fetcher_type = - intVal(list_nth(cscan->custom_private, DataNodeScanFetcherType)); - Assert(sss->fsstate.planned_fetcher_type != AutoFetcherType); - - if ((eflags & EXEC_FLAG_EXPLAIN_ONLY) && !ts_guc_enable_remote_explain) - return; - - fdw_scan_init(&node->ss, &sss->fsstate, cscan->custom_relids, fdw_private, fdw_exprs, eflags); - - sss->recheck_quals = ExecInitQual(recheck_quals, (PlanState *) node); -} - -static TupleTableSlot * -data_node_scan_next(CustomScanState *node) -{ - DataNodeScanState *sss = (DataNodeScanState *) node; - ExprContext *econtext = node->ss.ps.ps_ExprContext; - MemoryContext oldcontext; - TupleTableSlot *slot; - - /* Call the Iterate function in short-lived context */ - oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); - slot = fdw_scan_iterate(&node->ss, &sss->fsstate); - MemoryContextSwitchTo(oldcontext); - - return slot; -} - -/* - * Access method routine to recheck a tuple in EvalPlanQual - */ -static bool -data_node_scan_recheck(CustomScanState *node, TupleTableSlot *slot) -{ - DataNodeScanState *sss = (DataNodeScanState *) node; - ExprContext *econtext; - - /* - * extract necessary information from the custom scan node - */ - econtext = node->ss.ps.ps_ExprContext; - - /* Does the tuple meet the remote qual condition? */ - econtext->ecxt_scantuple = slot; - - ResetExprContext(econtext); - - return ExecQual(sss->recheck_quals, econtext); -} - -static TupleTableSlot * -data_node_scan_exec(CustomScanState *node) -{ - return ExecScan(&node->ss, - (ExecScanAccessMtd) data_node_scan_next, - (ExecScanRecheckMtd) data_node_scan_recheck); -} - -static void -data_node_scan_rescan(CustomScanState *node) -{ - fdw_scan_rescan(&node->ss, &((DataNodeScanState *) node)->fsstate); -} - -static void -data_node_scan_end(CustomScanState *node) -{ - fdw_scan_end(&((DataNodeScanState *) node)->fsstate); -} - -static void -data_node_scan_explain(CustomScanState *node, List *ancestors, ExplainState *es) -{ - CustomScan *scan = (CustomScan *) node->ss.ps.plan; - List *fdw_private = list_nth(scan->custom_private, DataNodeScanFdwPrivate); - - fdw_scan_explain(&node->ss, fdw_private, es, &((DataNodeScanState *) node)->fsstate); -} - -static CustomExecMethods data_node_scan_state_methods = { - .CustomName = "DataNodeScanState", - .BeginCustomScan = data_node_scan_begin, - .EndCustomScan = data_node_scan_end, - .ExecCustomScan = data_node_scan_exec, - .ReScanCustomScan = data_node_scan_rescan, - .ExplainCustomScan = data_node_scan_explain, -}; - -static void -create_fetcher(AsyncScanState *ass) -{ - DataNodeScanState *dnss = (DataNodeScanState *) ass; - create_data_fetcher(&dnss->async_state.css.ss, &dnss->fsstate); -} - -static void -send_fetch_request(AsyncScanState *ass) -{ - DataNodeScanState *dnss = (DataNodeScanState *) ass; - DataFetcher *fetcher = dnss->fsstate.fetcher; - - fetcher->funcs->send_fetch_request(fetcher); -} - -static void -fetch_data(AsyncScanState *ass) -{ - DataNodeScanState *dnss = (DataNodeScanState *) ass; - DataFetcher *fetcher = dnss->fsstate.fetcher; - - fetcher->funcs->fetch_data(fetcher); -} - -Node * -data_node_scan_state_create(CustomScan *cscan) -{ - DataNodeScanState *dnss = - (DataNodeScanState *) newNode(sizeof(DataNodeScanState), T_CustomScanState); - - dnss->async_state.css.methods = &data_node_scan_state_methods; - dnss->systemcol = linitial_int(list_nth(cscan->custom_private, DataNodeScanSystemcol)); - dnss->async_state.init = create_fetcher; - dnss->async_state.send_fetch_request = send_fetch_request; - dnss->async_state.fetch_data = fetch_data; - dnss->fsstate.planned_fetcher_type = - intVal(list_nth(cscan->custom_private, DataNodeScanFetcherType)); - Assert(dnss->fsstate.planned_fetcher_type != AutoFetcherType); - return (Node *) dnss; -} diff --git a/tsl/src/fdw/data_node_scan_exec.h b/tsl/src/fdw/data_node_scan_exec.h deleted file mode 100644 index 0d6dc799c6d..00000000000 --- a/tsl/src/fdw/data_node_scan_exec.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#pragma once - -#include -#include - -#include "fdw/scan_exec.h" -#include "remote/async.h" - -extern Node *data_node_scan_state_create(CustomScan *cscan); diff --git a/tsl/src/fdw/data_node_scan_plan.c b/tsl/src/fdw/data_node_scan_plan.c deleted file mode 100644 index 409aedeb819..00000000000 --- a/tsl/src/fdw/data_node_scan_plan.c +++ /dev/null @@ -1,1880 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "data_node_scan_plan.h" - -#include "data_node_chunk_assignment.h" -#include "data_node_scan_exec.h" -#include "deparse.h" -#include "fdw_utils.h" -#include "relinfo.h" -#include "scan_plan.h" -#include "estimate.h" -#include "planner/planner.h" -#include "chunk.h" -#include "debug_assert.h" - -/* - * DataNodeScan is a custom scan implementation for scanning hypertables on - * remote data nodes instead of scanning individual remote chunks. - * - * A DataNodeScan plan is created by taking a regular per-chunk scan plan and - * then assigning each chunk to a data node, and treating each data node as a - * "partition" of the distributed hypertable. For each resulting data node, we - * create a data node rel which is essentially a base rel representing a remote - * hypertable partition. Since we treat a data node rel as a base rel, although - * it has no corresponding data node table, we point each data node rel to the root - * hypertable. This is conceptually the right thing to do, since each data node - * rel is a partition of the same distributed hypertable. - * - * For each data node rel, we plan a DataNodeScan instead of a ForeignScan since a - * data node rel does not correspond to a real foreign table. A ForeignScan of a - * data node rel would fail when trying to lookup the ForeignServer via the - * data node rel's RTE relid. The only other option to get around the - * ForeignTable lookup is to make a data node rel an upper rel instead of a base - * rel (see nodeForeignscan.c). However, that leads to other issues in - * setrefs.c that messes up our target lists for some queries. - */ - -static Path *data_node_scan_path_create(PlannerInfo *root, RelOptInfo *rel, PathTarget *target, - double rows, Cost startup_cost, Cost total_cost, - List *pathkeys, Relids required_outer, Path *fdw_outerpath, - List *private); - -static Path *data_node_join_path_create(PlannerInfo *root, RelOptInfo *rel, PathTarget *target, - double rows, Cost startup_cost, Cost total_cost, - List *pathkeys, Relids required_outer, Path *fdw_outerpath, - List *private); - -static Path *data_node_scan_upper_path_create(PlannerInfo *root, RelOptInfo *rel, - PathTarget *target, double rows, Cost startup_cost, - Cost total_cost, List *pathkeys, Path *fdw_outerpath, - List *private); - -static bool fdw_pushdown_foreign_join(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, - RelOptInfo *outerrel, RelOptInfo *innerrel, - JoinPathExtraData *extra); - -static AppendRelInfo * -create_append_rel_info(PlannerInfo *root, Index childrelid, Index parentrelid) -{ - RangeTblEntry *parent_rte = planner_rt_fetch(parentrelid, root); - Relation relation = table_open(parent_rte->relid, NoLock); - AppendRelInfo *appinfo; - - appinfo = makeNode(AppendRelInfo); - appinfo->parent_relid = parentrelid; - appinfo->child_relid = childrelid; - appinfo->parent_reltype = relation->rd_rel->reltype; - appinfo->child_reltype = relation->rd_rel->reltype; - ts_make_inh_translation_list(relation, relation, childrelid, &appinfo->translated_vars); - appinfo->parent_reloid = parent_rte->relid; - table_close(relation, NoLock); - - return appinfo; -} - -/* - * Build a new RelOptInfo representing a data node. - * - * Note that the relid index should point to the corresponding range table - * entry (RTE) we created for the data node rel when expanding the - * hypertable. Each such RTE's relid (OID) refers to the hypertable's root - * table. This has the upside that the planner can use the hypertable's - * indexes to plan remote queries more efficiently. In contrast, chunks are - * foreign tables and they cannot have indexes. - */ -static RelOptInfo * -build_data_node_rel(PlannerInfo *root, Index relid, Oid serverid, RelOptInfo *parent) -{ - RelOptInfo *rel = build_simple_rel(root, relid, parent); - - /* - * Use relevant exprs and restrictinfos from the parent rel. These will be - * adjusted to match the data node rel's relid later. - */ - rel->reltarget->exprs = copyObject(parent->reltarget->exprs); - rel->baserestrictinfo = parent->baserestrictinfo; - rel->baserestrictcost = parent->baserestrictcost; - rel->baserestrict_min_security = parent->baserestrict_min_security; - rel->lateral_vars = parent->lateral_vars; - rel->lateral_referencers = parent->lateral_referencers; - rel->lateral_relids = parent->lateral_relids; - rel->serverid = serverid; - - /* - * We need to use the FDW interface to get called by the planner for - * partial aggs. For some reason, the standard upper_paths_hook is never - * called for upper rels of type UPPERREL_PARTIAL_GROUP_AGG, which is odd - * (see end of PostgreSQL planner.c:create_partial_grouping_paths). Until - * this gets fixed in the PostgreSQL planner, we're forced to set - * fdwroutine here although we will scan this rel with a DataNodeScan and - * not a ForeignScan. - */ - rel->fdwroutine = GetFdwRoutineByServerId(serverid); - - return rel; -} - -/* - * Adjust the attributes of data node rel quals. - * - * Code adapted from allpaths.c: set_append_rel_size. - * - * For each data node child rel, copy the quals/restrictions from the parent - * (hypertable) rel and adjust the attributes (e.g., Vars) to point to the - * child rel instead of the parent. - * - * Normally, this happens as part of estimating the rel size of an append - * relation in standard planning, where constraint exclusion and partition - * pruning also happens for each child. Here, however, we don't prune any - * data node rels since they are created based on assignment of already pruned - * chunk child rels at an earlier stage. Data node rels that aren't assigned any - * chunks will never be created in the first place. - */ -static void -adjust_data_node_rel_attrs(PlannerInfo *root, RelOptInfo *data_node_rel, RelOptInfo *hyper_rel, - AppendRelInfo *appinfo) -{ - List *nodequals = NIL; - ListCell *lc; - - foreach (lc, hyper_rel->baserestrictinfo) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); - Node *nodequal; - ListCell *lc2; - - nodequal = adjust_appendrel_attrs(root, (Node *) rinfo->clause, 1, &appinfo); - - nodequal = eval_const_expressions(root, nodequal); - - /* might have gotten an AND clause, if so flatten it */ - foreach (lc2, make_ands_implicit((Expr *) nodequal)) - { - Node *onecq = (Node *) lfirst(lc2); - bool pseudoconstant; - - /* check for pseudoconstant (no Vars or volatile functions) */ - pseudoconstant = !contain_vars_of_level(onecq, 0) && !contain_volatile_functions(onecq); - if (pseudoconstant) - { - /* tell createplan.c to check for gating quals */ - root->hasPseudoConstantQuals = true; - } - /* reconstitute RestrictInfo with appropriate properties */ - bool hasClone = false; - bool isClone = false; -#if PG16_LT - /* both has_clone and is_clone are unused in versions less than PG16 */ - (void) hasClone; - (void) isClone; -#else - hasClone = rinfo->has_clone; - isClone = rinfo->is_clone; -#endif - nodequals = lappend(nodequals, - make_restrictinfo_compat(root, - (Expr *) onecq, - rinfo->is_pushed_down, - hasClone, - isClone, - rinfo->outerjoin_delayed, - pseudoconstant, - rinfo->security_level, - NULL, - NULL, - NULL, - NULL)); - } - } - - data_node_rel->baserestrictinfo = nodequals; - data_node_rel->joininfo = - castNode(List, adjust_appendrel_attrs(root, (Node *) hyper_rel->joininfo, 1, &appinfo)); - - data_node_rel->reltarget->exprs = - castNode(List, - adjust_appendrel_attrs(root, (Node *) hyper_rel->reltarget->exprs, 1, &appinfo)); - - /* Add equivalence class for rel to push down joins and sorts */ - if (hyper_rel->has_eclass_joins || has_useful_pathkeys(root, hyper_rel)) - add_child_rel_equivalences(root, appinfo, hyper_rel, data_node_rel); - - data_node_rel->has_eclass_joins = hyper_rel->has_eclass_joins; -} - -/* - * Build RelOptInfos for each data node. - * - * Each data node rel will point to the root hypertable table, which is - * conceptually correct since we query the identical (partial) hypertables on - * the data nodes. - */ -static RelOptInfo ** -build_data_node_part_rels(PlannerInfo *root, RelOptInfo *hyper_rel, int *nparts) -{ - TimescaleDBPrivate *priv = hyper_rel->fdw_private; - /* Update the partitioning to reflect the new per-data node plan */ - RelOptInfo **part_rels = palloc(sizeof(RelOptInfo *) * list_length(priv->serverids)); - ListCell *lc; - int n = 0; - int i; - - Assert(list_length(priv->serverids) == bms_num_members(priv->server_relids)); - i = -1; - - foreach (lc, priv->serverids) - { - Oid data_node_id = lfirst_oid(lc); - RelOptInfo *data_node_rel; - AppendRelInfo *appinfo; - - i = bms_next_member(priv->server_relids, i); - - Assert(i > 0); - - /* - * The planner expects an AppendRelInfo for any part_rels. Needs to be - * added prior to creating the rel because build_simple_rel will - * invoke our planner hooks that classify relations using this - * information. - */ - appinfo = create_append_rel_info(root, i, hyper_rel->relid); - root->append_rel_array[i] = appinfo; - data_node_rel = build_data_node_rel(root, i, data_node_id, hyper_rel); - part_rels[n++] = data_node_rel; - adjust_data_node_rel_attrs(root, data_node_rel, hyper_rel, appinfo); - } - - if (nparts != NULL) - *nparts = n; - - return part_rels; -} - -/* Callback argument for ts_ec_member_matches_foreign */ -typedef struct -{ - Expr *current; /* current expr, or NULL if not yet found */ - List *already_used; /* expressions already dealt with */ -} ts_ec_member_foreign_arg; - -/* - * Detect whether we want to process an EquivalenceClass member. - * - * This is a callback for use by generate_implied_equalities_for_column. - */ -static bool -ts_ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel, EquivalenceClass *ec, - EquivalenceMember *em, void *arg) -{ - ts_ec_member_foreign_arg *state = (ts_ec_member_foreign_arg *) arg; - Expr *expr = em->em_expr; - - /* - * If we've identified what we're processing in the current scan, we only - * want to match that expression. - */ - if (state->current != NULL) - return equal(expr, state->current); - - /* - * Otherwise, ignore anything we've already processed. - */ - if (list_member(state->already_used, expr)) - return false; - - /* This is the new target to process. */ - state->current = expr; - return true; -} - -/* - * Build parameterizations that are useful for performing joins with the given - * hypertable relation. We will use them to generate the parameterized data node - * scan paths. The code is mostly copied from postgres_fdw, - * postgresGetForeignPaths(). - */ -static List * -build_parameterizations(PlannerInfo *root, RelOptInfo *hyper_rel) -{ - /* - * Thumb through all join clauses for the rel to identify which outer - * relations could supply one or more safe-to-send-to-remote join clauses. - * We'll build a parameterized path for each such outer relation. - * - * Note that in case we have multiple local tables, this outer relation - * here may be the result of joining the local tables together. For an - * example, see the multiple join in the dist_param test. - * - * It's convenient to represent each candidate outer relation by the - * ParamPathInfo node for it. We can then use the ppi_clauses list in the - * ParamPathInfo node directly as a list of the interesting join clauses for - * that rel. This takes care of the possibility that there are multiple - * safe join clauses for such a rel, and also ensures that we account for - * unsafe join clauses that we'll still have to enforce locally (since the - * parameterized-path machinery insists that we handle all movable clauses). - */ - List *ppi_list = NIL; - ListCell *lc; - foreach (lc, hyper_rel->joininfo) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); - Relids required_outer; - ParamPathInfo *param_info; - - /* Check if clause can be moved to this rel */ - if (!join_clause_is_movable_to(rinfo, hyper_rel)) - { - continue; - } - - /* See if it is safe to send to remote */ - if (!ts_is_foreign_expr(root, hyper_rel, rinfo->clause)) - { - continue; - } - - /* Calculate required outer rels for the resulting path */ - required_outer = bms_union(rinfo->clause_relids, hyper_rel->lateral_relids); - /* We do not want the data node rel itself listed in required_outer */ - required_outer = bms_del_member(required_outer, hyper_rel->relid); - - /* - * required_outer probably can't be empty here, but if it were, we - * couldn't make a parameterized path. - */ - if (bms_is_empty(required_outer)) - { - continue; - } - - /* Get the ParamPathInfo */ - param_info = get_baserel_parampathinfo(root, hyper_rel, required_outer); - Assert(param_info != NULL); - - /* - * Add it to list unless we already have it. Testing pointer equality - * is OK since get_baserel_parampathinfo won't make duplicates. - */ - ppi_list = list_append_unique_ptr(ppi_list, param_info); - } - - /* - * The above scan examined only "generic" join clauses, not those that - * were absorbed into EquivalenceClauses. See if we can make anything out - * of EquivalenceClauses. - */ - if (hyper_rel->has_eclass_joins) - { - /* - * We repeatedly scan the eclass list looking for column references - * (or expressions) belonging to the data node rel. Each time we find - * one, we generate a list of equivalence joinclauses for it, and then - * see if any are safe to send to the remote. Repeat till there are - * no more candidate EC members. - */ - ts_ec_member_foreign_arg arg; - - arg.already_used = NIL; - for (;;) - { - List *clauses; - - /* Make clauses, skipping any that join to lateral_referencers */ - arg.current = NULL; - clauses = generate_implied_equalities_for_column(root, - hyper_rel, - ts_ec_member_matches_foreign, - (void *) &arg, - hyper_rel->lateral_referencers); - - /* Done if there are no more expressions in the data node rel */ - if (arg.current == NULL) - { - Assert(clauses == NIL); - break; - } - - /* Scan the extracted join clauses */ - foreach (lc, clauses) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); - Relids required_outer; - ParamPathInfo *param_info; - - /* Check if clause can be moved to this rel */ - if (!join_clause_is_movable_to(rinfo, hyper_rel)) - { - continue; - } - - /* See if it is safe to send to remote */ - if (!ts_is_foreign_expr(root, hyper_rel, rinfo->clause)) - { - continue; - } - - /* Calculate required outer rels for the resulting path */ - required_outer = bms_union(rinfo->clause_relids, hyper_rel->lateral_relids); - required_outer = bms_del_member(required_outer, hyper_rel->relid); - if (bms_is_empty(required_outer)) - { - continue; - } - - /* Get the ParamPathInfo */ - param_info = get_baserel_parampathinfo(root, hyper_rel, required_outer); - Assert(param_info != NULL); - - /* Add it to list unless we already have it */ - ppi_list = list_append_unique_ptr(ppi_list, param_info); - } - - /* Try again, now ignoring the expression we found this time */ - arg.already_used = lappend(arg.already_used, arg.current); - } - } - - return ppi_list; -} - -static void -add_data_node_scan_paths(PlannerInfo *root, RelOptInfo *data_node_rel, RelOptInfo *hyper_rel, - List *ppi_list) -{ - TsFdwRelInfo *hyper_fpinfo = fdw_relinfo_get(hyper_rel); - TsFdwRelInfo *fpinfo = fdw_relinfo_get(data_node_rel); - Path *path; - - if (data_node_rel->reloptkind == RELOPT_JOINREL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("foreign joins are not supported"))); - - path = data_node_scan_path_create(root, - data_node_rel, - NULL, /* default pathtarget */ - fpinfo->rows, - fpinfo->startup_cost, - fpinfo->total_cost, - NIL, /* no pathkeys */ - NULL, - NULL /* no extra plan */, - NIL); - - fdw_utils_add_path(data_node_rel, path); - - /* Add paths with pathkeys */ - fdw_add_paths_with_pathkeys_for_rel(root, data_node_rel, NULL, data_node_scan_path_create); - - /* - * Now build a path for each useful outer relation, if the parameterized - * data node scans are not disabled. - */ - if (!ts_guc_enable_parameterized_data_node_scan) - { - return; - } - - ListCell *ppi_cell; - foreach (ppi_cell, ppi_list) - { - ParamPathInfo *param_info = (ParamPathInfo *) lfirst(ppi_cell); - - /* - * Check if we have an index path locally that matches the - * parameterization. If so, we're going to have the same index path on - * the data node, and it's going to be significantly cheaper that a seq - * scan. We don't know precise values, but we have to discount it later - * so that the remote index paths are preferred. - */ - bool index_matches_parameterization = false; - ListCell *lc; - foreach (lc, hyper_fpinfo->indexed_parameterizations) - { - Bitmapset *item = lfirst(lc); - if (bms_equal(item, param_info->ppi_req_outer)) - { - index_matches_parameterization = true; - break; - } - } - - /* - * As a baseline, cost the data node scan as a seq scan. - */ - Cost startup_cost = 0; - Cost run_cost = 0; - double rows = data_node_rel->tuples > 1 ? data_node_rel->tuples : 123456; - - /* Run remote non-join clauses. */ - const double remote_sel_sane = - (fpinfo->remote_conds_sel > 0 && fpinfo->remote_conds_sel <= 1) ? - fpinfo->remote_conds_sel : - 0.1; - - startup_cost += data_node_rel->reltarget->cost.startup; - startup_cost += fpinfo->remote_conds_cost.startup; - run_cost += fpinfo->remote_conds_cost.per_tuple * rows; - run_cost += cpu_tuple_cost * rows; - run_cost += seq_page_cost * data_node_rel->pages; - rows *= remote_sel_sane; - - /* - * For this parameterization, we're going to have an index scan on the - * remote. We don't have a way to calculate the precise cost for it, so - * at least discount it by a constant factor compared to the seq scan. - */ - if (index_matches_parameterization) - { - run_cost *= 0.1; - } - - /* Run remote join clauses. */ - QualCost remote_join_cost; - cost_qual_eval(&remote_join_cost, param_info->ppi_clauses, root); - - /* - * We don't have up to date per-column statistics for the root - * distributed hypertable currently, so the join estimates are going to - * be way off. The worst is when they are too low and we end up - * transferring much more rows from the data node that we expected. Just - * hardcode it at 0.1 per clause for now. - * In the future, we could make use of per-chunk per-column statistics - * that we do have, by injecting them into the Postgres cost functions - * through the get_relation_stats_hook. For a data node scan, we would - * combine statistics for all participating chunks on the given data - * node. - */ - const double remote_join_sel = pow(0.1, list_length(param_info->ppi_clauses)); - - startup_cost += remote_join_cost.startup; - run_cost += remote_join_cost.per_tuple * rows; - rows *= remote_join_sel; - - /* Transfer the resulting tuples over the network. */ - startup_cost += fpinfo->fdw_startup_cost; - run_cost += fpinfo->fdw_tuple_cost * rows; - - /* Run local filters. */ - const double local_sel_sane = - (fpinfo->local_conds_sel > 0 && fpinfo->local_conds_sel <= 1) ? - fpinfo->local_conds_sel : - 0.5; - - startup_cost += fpinfo->local_conds_cost.startup; - run_cost += fpinfo->local_conds_cost.per_tuple * rows; - run_cost += cpu_tuple_cost * rows; - rows *= local_sel_sane; - - /* Compute the output targetlist. */ - run_cost += data_node_rel->reltarget->cost.per_tuple * rows; - - rows = clamp_row_est(rows); - - /* - * ppi_rows currently won't get looked at by anything, but still we - * may as well ensure that it matches our idea of the rowcount. - */ - param_info->ppi_rows = rows; - - /* Make the path */ - path = data_node_scan_path_create(root, - data_node_rel, - NULL, /* default pathtarget */ - rows, - startup_cost, - startup_cost + run_cost, - NIL, /* no pathkeys */ - param_info->ppi_req_outer, - NULL, - NIL); /* no fdw_private list */ - - add_path(data_node_rel, (Path *) path); - } -} - -/* - * Force GROUP BY aggregates to be pushed down. - * - * Push downs are forced by making the GROUP BY expression in the query become - * the partitioning keys, even if this is not compatible with - * partitioning. This makes the planner believe partitioning and GROUP BYs - * line up perfectly. Forcing a push down is useful because the PostgreSQL - * planner is not smart enough to realize it can always push things down if - * there's, e.g., only one partition (or data node) involved in the query. - */ -static void -force_group_by_push_down(PlannerInfo *root, RelOptInfo *hyper_rel) -{ - PartitionScheme partscheme = hyper_rel->part_scheme; - List *groupexprs; - List **nullable_partexprs; - int16 new_partnatts; - Oid *partopfamily; - Oid *partopcintype; - Oid *partcollation; - ListCell *lc; - int i = 0; - - Assert(partscheme != NULL); - - groupexprs = get_sortgrouplist_exprs(root->parse->groupClause, root->parse->targetList); - new_partnatts = list_length(groupexprs); - - /* - * Only reallocate the partitioning attributes arrays if it is smaller than - * the new size. palloc0 is needed to zero out the extra space. - */ - if (partscheme->partnatts < new_partnatts) - { - partopfamily = palloc0(new_partnatts * sizeof(Oid)); - partopcintype = palloc0(new_partnatts * sizeof(Oid)); - partcollation = palloc0(new_partnatts * sizeof(Oid)); - nullable_partexprs = palloc0(new_partnatts * sizeof(List *)); - - memcpy(partopfamily, partscheme->partopfamily, partscheme->partnatts * sizeof(Oid)); - memcpy(partopcintype, partscheme->partopcintype, partscheme->partnatts * sizeof(Oid)); - memcpy(partcollation, partscheme->partcollation, partscheme->partnatts * sizeof(Oid)); - memcpy(nullable_partexprs, - hyper_rel->nullable_partexprs, - partscheme->partnatts * sizeof(List *)); - - partscheme->partopfamily = partopfamily; - partscheme->partopcintype = partopcintype; - partscheme->partcollation = partcollation; - hyper_rel->nullable_partexprs = nullable_partexprs; - - hyper_rel->partexprs = (List **) palloc0(sizeof(List *) * new_partnatts); - } - - partscheme->partnatts = new_partnatts; - - foreach (lc, groupexprs) - { - List *expr = lfirst(lc); - - hyper_rel->partexprs[i++] = list_make1(expr); - } - - Assert(i == partscheme->partnatts); -} - -/* - * Check if it is safe to push down GROUP BYs to remote nodes. A push down is - * safe if the chunks that are part of the query are disjointedly partitioned - * on data nodes along the first closed "space" dimension, or all dimensions are - * covered in the GROUP BY expresssion. - * - * If we knew that the GROUP BY covers all partitioning keys, we would not - * need to check overlaps. Such a check is done in - * planner.c:group_by_has_partkey(), but this function is not public. We - * could copy it here to avoid some unnecessary work. - * - * There are other "base" cases when we can always safely push down--even if - * the GROUP BY does NOT cover the partitioning keys--for instance, when only - * one data node is involved in the query. We try to account for such cases too - * and "trick" the PG planner to do the "right" thing. - * - * We also want to add any bucketing expression (on, e.g., time) as a "meta" - * partitioning key (in rel->partexprs). This will make the partitionwise - * planner accept the GROUP BY clause for push down even though the expression - * on time is a "derived" partitioning key. - */ -static void -push_down_group_bys(PlannerInfo *root, RelOptInfo *hyper_rel, Hyperspace *hs, - DataNodeChunkAssignments *scas) -{ - const Dimension *dim; - bool overlaps; - - Assert(hs->num_dimensions >= 1); - Assert(hyper_rel->part_scheme->partnatts == hs->num_dimensions); - - /* - * Check for special case when there is only one data node with chunks. This - * can always be safely pushed down irrespective of partitioning - */ - if (scas->num_nodes_with_chunks == 1) - { - force_group_by_push_down(root, hyper_rel); - return; - } - - /* - * Get first closed dimension that we use for assigning chunks to - * data nodes. If there is no closed dimension, we are done. - */ - dim = hyperspace_get_closed_dimension(hs, 0); - - if (NULL == dim) - return; - - overlaps = data_node_chunk_assignments_are_overlapping(scas, dim->fd.id); - - if (!overlaps) - { - /* - * If data node chunk assignments are non-overlapping along the - * "space" dimension, we can treat this as a one-dimensional - * partitioned table since any aggregate GROUP BY that includes the - * data node assignment dimension is safe to execute independently on - * each data node. - */ - Assert(NULL != dim); - hyper_rel->partexprs[0] = ts_dimension_get_partexprs(dim, hyper_rel->relid); - hyper_rel->part_scheme->partnatts = 1; - } -} - -/* - * Check if the query performs a join between a hypertable (outer) and a reference - * table (inner) and the join type is a LEFT JOIN, an INNER JOIN, or an implicit - * join. - */ -static bool -is_safe_to_pushdown_reftable_join(PlannerInfo *root, List *join_reference_tables, - RangeTblEntry *innertableref, JoinType jointype) -{ - Assert(root != NULL); - Assert(innertableref != NULL); - - /* - * We support pushing down of INNER and LEFT joins only. - * - * Constructing queries representing partitioned FULL, SEMI, and ANTI - * joins is hard, hence not considered right now. - */ - if (jointype != JOIN_INNER && jointype != JOIN_LEFT) - return false; - - /* Check that at least one reference table is defined. */ - if (join_reference_tables == NIL) - return false; - - /* Only queries with two tables are supported. */ - if (bms_num_members(root->all_baserels) != 2) - return false; - - /* Right table has to be a distributed hypertable */ - if (!list_member_oid(join_reference_tables, innertableref->relid)) - return false; - - /* Join can be pushed down */ - return true; -} - -/* - * Assess whether the join between inner and outer relations can be pushed down - * to the foreign server. As a side effect, save information we obtain in this - * function to TsFdwRelInfo passed in. - * - * The code is based on PostgreSQL's foreign_join_ok function (version 15.1). - */ -static bool -fdw_pushdown_foreign_join(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, - RelOptInfo *outerrel, RelOptInfo *innerrel, JoinPathExtraData *extra) -{ - TsFdwRelInfo *fpinfo; - TsFdwRelInfo *fpinfo_o; - TsFdwRelInfo *fpinfo_i; - ListCell *lc; - List *joinclauses; - - /* - * If either of the joining relations is marked as unsafe to pushdown, the - * join can not be pushed down. - */ - fpinfo = fdw_relinfo_get(joinrel); - fpinfo_o = fdw_relinfo_get(outerrel); - fpinfo_i = fdw_relinfo_get(innerrel); - - Assert(fpinfo_o != NULL); - Assert(fpinfo_o->pushdown_safe); - Assert(fpinfo_i != NULL); - Assert(fpinfo_i->pushdown_safe); - - /* - * If joining relations have local conditions, those conditions are - * required to be applied before joining the relations. Hence the join can - * not be pushed down (shouldn't happen in the current implementation). - */ - Assert(fpinfo_o->local_conds == NULL); - Assert(fpinfo_i->local_conds == NULL); - - fpinfo->server = fpinfo_o->server; - - /* - * Separate restrict list into join quals and pushed-down (other) quals. - * - * Join quals belonging to an outer join must all be shippable, else we - * cannot execute the join remotely. Add such quals to 'joinclauses'. - * - * Add other quals to fpinfo->remote_conds if they are shippable, else to - * fpinfo->local_conds. In an inner join it's okay to execute conditions - * either locally or remotely; the same is true for pushed-down conditions - * at an outer join. - * - * Note we might return failure after having already scribbled on - * fpinfo->remote_conds and fpinfo->local_conds. That's okay because we - * won't consult those lists again if we deem the join unshippable. - */ - joinclauses = NIL; - foreach (lc, extra->restrictlist) - { - RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); - bool is_remote_clause = ts_is_foreign_expr(root, joinrel, rinfo->clause); - - if (IS_OUTER_JOIN(jointype) && !RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) - { - if (!is_remote_clause) - return false; - joinclauses = lappend(joinclauses, rinfo); - } - else - { - if (is_remote_clause) - fpinfo->remote_conds = lappend(fpinfo->remote_conds, rinfo); - else - fpinfo->local_conds = lappend(fpinfo->local_conds, rinfo); - } - } - - if (fpinfo->local_conds != NIL) - return false; - - /* Save the join clauses, for later use. */ - fpinfo->joinclauses = joinclauses; - - /* - * deparseExplicitTargetList() isn't smart enough to handle anything other - * than a Var. In particular, if there's some PlaceHolderVar that would - * need to be evaluated within this join tree (because there's an upper - * reference to a quantity that may go to NULL as a result of an outer - * join), then we can't try to push the join down because we'll fail when - * we get to deparseExplicitTargetList(). However, a PlaceHolderVar that - * needs to be evaluated *at the top* of this join tree is OK, because we - * can do that locally after fetching the results from the remote side. - * - * Note: At the moment, the placeholder code is not used in our current join - * pushdown implementation. - */ -#ifdef ENABLE_DEAD_CODE - foreach (lc, root->placeholder_list) - { - PlaceHolderInfo *phinfo = lfirst(lc); - Relids relids; - - /* PlaceHolderInfo refers to parent relids, not child relids. */ - relids = IS_OTHER_REL(joinrel) ? joinrel->top_parent_relids : joinrel->relids; - - if (bms_is_subset(phinfo->ph_eval_at, relids) && - bms_nonempty_difference(relids, phinfo->ph_eval_at)) - return false; - } -#endif - - fpinfo->outerrel = outerrel; - fpinfo->innerrel = innerrel; - fpinfo->jointype = jointype; - - /* - * By default, both the input relations are not required to be deparsed as - * subqueries, but there might be some relations covered by the input - * relations that are required to be deparsed as subqueries, so save the - * relids of those relations for later use by the deparser. - */ - fpinfo->make_outerrel_subquery = false; - fpinfo->make_innerrel_subquery = false; - Assert(bms_is_subset(fpinfo_o->lower_subquery_rels, outerrel->relids)); - Assert(bms_is_subset(fpinfo_i->lower_subquery_rels, innerrel->relids)); - fpinfo->lower_subquery_rels = - bms_union(fpinfo_o->lower_subquery_rels, fpinfo_i->lower_subquery_rels); - - /* - * Pull the other remote conditions from the joining relations into join - * clauses or other remote clauses (remote_conds) of this relation - * wherever possible. This avoids building subqueries at every join step. - * - * For an inner join, clauses from both the relations are added to the - * other remote clauses. For LEFT and RIGHT OUTER join, the clauses from - * the outer side are added to remote_conds since those can be evaluated - * after the join is evaluated. The clauses from inner side are added to - * the joinclauses, since they need to be evaluated while constructing the - * join. - * - * For a FULL OUTER JOIN, the other clauses from either relation can not - * be added to the joinclauses or remote_conds, since each relation acts - * as an outer relation for the other. - * - * The joining sides can not have local conditions, thus no need to test - * shippability of the clauses being pulled up. - */ - switch (jointype) - { - case JOIN_INNER: -#if PG14_GE - fpinfo->remote_conds = list_concat(fpinfo->remote_conds, fpinfo_i->remote_conds); - fpinfo->remote_conds = list_concat(fpinfo->remote_conds, fpinfo_o->remote_conds); -#else - fpinfo->remote_conds = - list_concat(fpinfo->remote_conds, list_copy(fpinfo_i->remote_conds)); - fpinfo->remote_conds = - list_concat(fpinfo->remote_conds, list_copy(fpinfo_o->remote_conds)); -#endif - break; - - case JOIN_LEFT: -#if PG14_GE - fpinfo->joinclauses = list_concat(fpinfo->joinclauses, fpinfo_i->remote_conds); - fpinfo->remote_conds = list_concat(fpinfo->remote_conds, fpinfo_o->remote_conds); -#else - fpinfo->joinclauses = - list_concat(fpinfo->joinclauses, list_copy(fpinfo_i->remote_conds)); - fpinfo->remote_conds = - list_concat(fpinfo->remote_conds, list_copy(fpinfo_o->remote_conds)); -#endif - break; - -/* Right and full joins are not supported at the moment */ -#ifdef ENABLE_DEAD_CODE - case JOIN_RIGHT: -#if PG14_GE - fpinfo->joinclauses = list_concat(fpinfo->joinclauses, fpinfo_o->remote_conds); - fpinfo->remote_conds = list_concat(fpinfo->remote_conds, fpinfo_i->remote_conds); -#else - fpinfo->joinclauses = - list_concat(fpinfo->joinclauses, list_copy(fpinfo_o->remote_conds)); - fpinfo->remote_conds = - list_concat(fpinfo->remote_conds, list_copy(fpinfo_i->remote_conds)); -#endif - break; - - case JOIN_FULL: - - /* - * In this case, if any of the input relations has conditions, we - * need to deparse that relation as a subquery so that the - * conditions can be evaluated before the join. Remember it in - * the fpinfo of this relation so that the deparser can take - * appropriate action. Also, save the relids of base relations - * covered by that relation for later use by the deparser. - */ - if (fpinfo_o->remote_conds) - { - fpinfo->make_outerrel_subquery = true; - fpinfo->lower_subquery_rels = - bms_add_members(fpinfo->lower_subquery_rels, outerrel->relids); - } - if (fpinfo_i->remote_conds) - { - fpinfo->make_innerrel_subquery = true; - fpinfo->lower_subquery_rels = - bms_add_members(fpinfo->lower_subquery_rels, innerrel->relids); - } - break; -#endif - - default: - /* Should not happen, we have just checked this above */ - elog(ERROR, "unsupported join type %d", jointype); - } - - /* - * For an inner join, all restrictions can be treated alike. Treating the - * pushed down conditions as join conditions allows a top level full outer - * join to be deparsed without requiring subqueries. - */ - if (jointype == JOIN_INNER) - { - Assert(!fpinfo->joinclauses); - fpinfo->joinclauses = fpinfo->remote_conds; - fpinfo->remote_conds = NIL; - } - /* Mark that this join can be pushed down safely */ - fpinfo->pushdown_safe = true; - - /* - * Set the string describing this join relation to be used in EXPLAIN - * output of corresponding ForeignScan. Note that the decoration we add - * to the base relation names mustn't include any digits, or it'll confuse - * postgresExplainForeignScan. - */ - fpinfo->relation_name = makeStringInfo(); - appendStringInfo(fpinfo->relation_name, - "(%s) %s JOIN (%s)", - fpinfo_o->relation_name->data, - get_jointype_name(fpinfo->jointype), - fpinfo_i->relation_name->data); - - /* - * Set the relation index. This is defined as the position of this - * joinrel in the join_rel_list list plus the length of the rtable list. - * Note that since this joinrel is at the end of the join_rel_list list - * when we are called, we can get the position by list_length. - */ - fpinfo->relation_index = list_length(root->parse->rtable) + list_length(root->join_rel_list); - - return true; -} - -/* - * Check if the given hypertable is a distributed hypertable. - */ -static bool -is_distributed_hypertable(Oid hypertable_reloid) -{ - Cache *hcache; - - Hypertable *ht = - ts_hypertable_cache_get_cache_and_entry(hypertable_reloid, CACHE_FLAG_MISSING_OK, &hcache); - - /* perform check before cache is released */ - bool ht_is_distributed = (ht != NULL && hypertable_is_distributed(ht)); - ts_cache_release(hcache); - - return ht_is_distributed; -} - -/* - * Create a new join partition RelOptInfo data structure for a partition. The data - * structure is based on the parameter joinrel. The paramater is taken as template - * and adjusted for the partition provided by the parameter data_node_rel. - */ -static RelOptInfo * -create_data_node_joinrel(PlannerInfo *root, RelOptInfo *innerrel, RelOptInfo *joinrel, - RelOptInfo *data_node_rel, AppendRelInfo *appinfo) -{ - RelOptInfo *join_partition = palloc(sizeof(RelOptInfo)); - memcpy(join_partition, joinrel, sizeof(RelOptInfo)); - - /* Create a new relinfo for the join partition */ - join_partition->fdw_private = NULL; - TsFdwRelInfo *join_part_fpinfo = fdw_relinfo_create(root, - join_partition, - data_node_rel->serverid, - InvalidOid, - TS_FDW_RELINFO_REFERENCE_JOIN_PARTITION); - - Assert(join_part_fpinfo != NULL); - - TsFdwRelInfo *data_node_rel_fpinfo = fdw_relinfo_get(data_node_rel); - Assert(data_node_rel_fpinfo != NULL); - - /* Copy chunk assignment from hypertable */ - join_part_fpinfo->sca = data_node_rel_fpinfo->sca; - - /* Set parameters of the join partition */ - join_partition->relid = data_node_rel->relid; - join_partition->relids = bms_copy(data_node_rel->relids); - join_partition->relids = bms_add_members(join_partition->relids, innerrel->relids); - join_partition->pathlist = NIL; - join_partition->partial_pathlist = NIL; - - /* Set the reltarget expressions of the partition based on the reltarget expressions - * of the join and adjust them for the partition */ - join_partition->reltarget = create_empty_pathtarget(); - join_partition->reltarget->sortgrouprefs = joinrel->reltarget->sortgrouprefs; - join_partition->reltarget->cost = joinrel->reltarget->cost; - join_partition->reltarget->width = joinrel->reltarget->width; -#if PG14_GE - join_partition->reltarget->has_volatile_expr = joinrel->reltarget->has_volatile_expr; -#endif - join_partition->reltarget->exprs = - castNode(List, - adjust_appendrel_attrs(root, (Node *) joinrel->reltarget->exprs, 1, &appinfo)); - - /* - * Copy the list of parameterizations for which we know indexpats exist, for - * use by the ref table join cost estimation code. - */ - TsFdwRelInfo *joinrel_fpinfo = fdw_relinfo_get(joinrel); - join_part_fpinfo->indexed_parameterizations = joinrel_fpinfo->indexed_parameterizations; - - return join_partition; -} - -/* - * Create a JoinPathExtraData data structure for a partition. The new struct is based on the - * original JoinPathExtraData of the join and the AppendRelInfo of the partition. - */ -static JoinPathExtraData * -create_data_node_joinrel_extra(PlannerInfo *root, JoinPathExtraData *extra, AppendRelInfo *appinfo) -{ - JoinPathExtraData *partition_extra = palloc(sizeof(JoinPathExtraData)); - partition_extra->inner_unique = extra->inner_unique; - partition_extra->sjinfo = extra->sjinfo; - partition_extra->semifactors = extra->semifactors; - partition_extra->param_source_rels = extra->param_source_rels; - partition_extra->mergeclause_list = - castNode(List, adjust_appendrel_attrs(root, (Node *) extra->mergeclause_list, 1, &appinfo)); - partition_extra->restrictlist = - castNode(List, adjust_appendrel_attrs(root, (Node *) extra->restrictlist, 1, &appinfo)); - - return partition_extra; -} - -/* - * Generate the paths for a pushed down join. Each data node will be considered as a partition - * of the join. The join can be pushed down if: - * - * (1) The setting "ts_guc_enable_per_data_node_queries" is enabled - * (2) The outer relation is a distributed hypertable - * (3) The inner relation is marked as a reference table - * (4) The join is a left join or an inner join - * - * The join will be performed between the multiple DataNodeRels (see function - * build_data_node_part_rels) and the original innerrel of the join (the reftable). - * - * The code is based on PostgreSQL's postgresGetForeignJoinPaths function - * (version 15.1). - */ -void -data_node_generate_pushdown_join_paths(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, - RelOptInfo *innerrel, JoinType jointype, - JoinPathExtraData *extra) -{ - Path *joinpath; - double rows = 0; - int width = 0; - Cost startup_cost = 0; - Cost total_cost = 0; - Path *epq_path = NULL; - RelOptInfo **hyper_table_rels; - RelOptInfo **join_partition_rels; - int nhyper_table_rels; - List *join_part_rels_list = NIL; -#if PG15_GE - Bitmapset *data_node_live_rels = NULL; -#endif - - /* - * Skip check if the join result has been considered already. - */ - if (joinrel->fdw_private) - return; - - /* Distributed hypertables are not supported by MERGE at the moment. Ensure that - * we perform our planning only on SELECTs. - */ - if (root->parse->commandType != CMD_SELECT) - return; - -#ifdef ENABLE_DEAD_CODE - /* - * This code does not work for joins with lateral references, since those - * must have parameterized paths, which we don't generate yet. - */ - if (!bms_is_empty(joinrel->lateral_relids)) - return; -#endif - - /* Get the hypertable from the outer relation. */ - RangeTblEntry *rte_outer = planner_rt_fetch(outerrel->relid, root); - - /* Test that the fetched outer relation is an actual RTE and a - * distributed hypertable. */ - if (rte_outer == NULL || !is_distributed_hypertable(rte_outer->relid)) - return; - -#ifdef USE_ASSERT_CHECKING - /* The outerrel has to be distributed. This condition should be always hold - * because otherwise we should not start the planning for distributed tables - * (see timescaledb_set_join_pathlist_hook). - */ - TimescaleDBPrivate *outerrel_private = outerrel->fdw_private; - Assert(outerrel_private != NULL); - Assert(outerrel_private->fdw_relation_info != NULL); -#endif - - /* We know at this point that outerrel is a distributed hypertable. - * So, outerrel has to be partitioned. */ - Assert(outerrel->nparts > 0); - - /* Test if inner table has a range table. */ - RangeTblEntry *rte_inner = planner_rt_fetch(innerrel->relid, root); - if (rte_inner == NULL) - return; - - /* Get current partitioning of the outerrel. */ - hyper_table_rels = outerrel->part_rels; - nhyper_table_rels = outerrel->nparts; - - Assert(nhyper_table_rels > 0); - Assert(hyper_table_rels != NULL); - - /* - * Create an PgFdwRelationInfo entry that is used to indicate - * that the join relation is already considered, so that we won't waste - * time in judging safety of join pushdown and adding the same paths again - * if found safe. Once we know that this join can be pushed down, we fill - * the entry. - */ - TsFdwRelInfo *joinrel_fpinfo = - fdw_relinfo_create(root, joinrel, InvalidOid, InvalidOid, TS_FDW_RELINFO_JOIN); - Assert(joinrel_fpinfo->type == TS_FDW_RELINFO_JOIN); - - /* attrs_used is only for base relations. */ - joinrel_fpinfo->attrs_used = NULL; - joinrel_fpinfo->pushdown_safe = false; - - /* - * Copy the list of parameterizations for which we know indexpats exist, for - * use by the ref table join cost estimation code. - */ - TsFdwRelInfo *hypertable_fpinfo = fdw_relinfo_get(outerrel); - joinrel_fpinfo->indexed_parameterizations = hypertable_fpinfo->indexed_parameterizations; - - /* - * We need the FDW information to get retrieve the information about the - * configured reference join tables. So, create the data structure for - * the first server. The reference tables are the same for all servers. - */ - Oid server_oid = hyper_table_rels[0]->serverid; - joinrel_fpinfo->server = GetForeignServer(server_oid); - apply_fdw_and_server_options(joinrel_fpinfo); - - if (!is_safe_to_pushdown_reftable_join(root, - joinrel_fpinfo->join_reference_tables, - rte_inner, - jointype)) - { - /* - * Reset fdw_private to allow further planner calls with different arguments - * (e.g., swapped inner and outer relation) to replan the pushdown. - */ - pfree(joinrel->fdw_private); - joinrel->fdw_private = NULL; - return; - } - - /* - * Join pushdown only works if the data node rels are created in - * data_node_scan_add_node_paths during scan planning. - */ - if (!ts_guc_enable_per_data_node_queries) - { - ereport(DEBUG1, - (errmsg("join on reference table is not considered to be pushed down because " - "'enable_per_data_node_queries' GUC is disabled"))); - - return; - } - - /* The inner table can be a distributed hypertable or a plain table. Plain tables don't have - * a TsFdwRelInfo at this point. So, it needs to be created. - */ - if (innerrel->fdw_private == NULL) - fdw_relinfo_create(root, innerrel, InvalidOid, InvalidOid, TS_FDW_RELINFO_REFERENCE_TABLE); - - /* Allow pushdown of the inner rel (the reference table) */ - TsFdwRelInfo *fpinfo_i = fdw_relinfo_get(innerrel); - fpinfo_i->pushdown_safe = true; - - ereport(DEBUG1, (errmsg("try to push down a join on a reference table"))); - - join_partition_rels = palloc(sizeof(RelOptInfo *) * nhyper_table_rels); - - /* Create join paths and cost estimations per data node / join relation. */ - for (int i = 0; i < nhyper_table_rels; i++) - { - RelOptInfo *data_node_rel = hyper_table_rels[i]; - Assert(data_node_rel); - - /* Adjust join target expression list */ - AppendRelInfo *appinfo = root->append_rel_array[data_node_rel->relid]; - Assert(appinfo != NULL); - - RelOptInfo *join_partition = - create_data_node_joinrel(root, innerrel, joinrel, data_node_rel, appinfo); - join_partition_rels[i] = join_partition; - TsFdwRelInfo *partition_fpinfo = fdw_relinfo_get(join_partition); - - /* Create a new join path extra for this join partition */ - JoinPathExtraData *partition_extra = create_data_node_joinrel_extra(root, extra, appinfo); - - /* Pushdown the join expressions */ - bool join_pushdown_ok = fdw_pushdown_foreign_join(root, - join_partition, - jointype, - data_node_rel, - innerrel, - partition_extra); - - /* Join cannot be pushed down */ - if (!join_pushdown_ok) - { - ereport(DEBUG1, - (errmsg( - "join pushdown on reference table is not supported for the used query"))); - return; - } - - /* - * Compute the selectivity and cost of the local_conds, so we don't have - * to do it over again for each path. The best we can do for these - * conditions is to estimate selectivity on the basis of local statistics. - * The local conditions are applied after the join has been computed on - * the remote side like quals in WHERE clause, so pass jointype as - * JOIN_INNER. - */ - partition_fpinfo->local_conds_sel = - clauselist_selectivity(root, partition_fpinfo->local_conds, 0, JOIN_INNER, NULL); - cost_qual_eval(&partition_fpinfo->local_conds_cost, partition_fpinfo->local_conds, root); - - /* - * If we are going to estimate costs locally, estimate the join clause - * selectivity here while we have special join info. - */ - partition_fpinfo->joinclause_sel = clauselist_selectivity(root, - partition_fpinfo->joinclauses, - 0, - partition_fpinfo->jointype, - extra->sjinfo); - - /* Estimate costs for bare join relation */ - fdw_estimate_path_cost_size(root, - join_partition, - NIL, - &rows, - &width, - &startup_cost, - &total_cost); - - /* Now update this information in the joinrel */ - join_partition->rows = rows; - join_partition->reltarget->width = width; - partition_fpinfo->rows = rows; - partition_fpinfo->width = width; - partition_fpinfo->startup_cost = startup_cost; - partition_fpinfo->total_cost = total_cost; - - /* - * Create a new join path and add it to the joinrel which represents a - * join between foreign tables. - */ - joinpath = data_node_join_path_create(root, - join_partition, - NULL, /* default pathtarget */ - rows, - startup_cost, - total_cost, - NIL, /* no pathkeys */ - join_partition->lateral_relids, - epq_path, - NIL); /* no fdw_private */ - - Assert(joinpath != NULL); - - if (!bms_is_empty(partition_fpinfo->sca->chunk_relids)) - { - /* Add generated path into joinrel by add_path(). */ - fdw_utils_add_path(join_partition, (Path *) joinpath); - join_part_rels_list = lappend(join_part_rels_list, join_partition); - -#if PG15_GE - data_node_live_rels = bms_add_member(data_node_live_rels, i); -#endif - - /* Consider pathkeys for the join relation */ - fdw_add_paths_with_pathkeys_for_rel(root, - join_partition, - epq_path, - data_node_join_path_create); - } - else - ts_set_dummy_rel_pathlist(join_partition); - - set_cheapest(join_partition); - } - - Assert(list_length(join_part_rels_list) > 0); - - /* Must keep partitioning info consistent with the join partition paths we have created */ - joinrel->part_rels = join_partition_rels; - joinrel->nparts = nhyper_table_rels; -#if PG15_GE - joinrel->live_parts = data_node_live_rels; -#endif - - add_paths_to_append_rel(root, joinrel, join_part_rels_list); - - /* XXX Consider parameterized paths for the join relation */ -} - -/* - * Turn chunk append paths into data node append paths. - * - * By default, a hypertable produces append plans where each child is a chunk - * to be scanned. This function computes alternative append plans where each - * child corresponds to a data node. - * - * In the future, additional assignment algorithms can create their own - * append paths and have the cost optimizer pick the best one. - */ -void -data_node_scan_add_node_paths(PlannerInfo *root, RelOptInfo *hyper_rel) -{ - RelOptInfo **chunk_rels = hyper_rel->part_rels; - int nchunk_rels = hyper_rel->nparts; - RangeTblEntry *hyper_rte = planner_rt_fetch(hyper_rel->relid, root); - Cache *hcache = ts_hypertable_cache_pin(); - Hypertable *ht = ts_hypertable_cache_get_entry(hcache, hyper_rte->relid, CACHE_FLAG_NONE); - List *data_node_rels_list = NIL; - RelOptInfo **data_node_rels; -#if PG15_GE - Bitmapset *data_node_live_rels = NULL; -#endif - int ndata_node_rels; - DataNodeChunkAssignments scas; - int i; - - Assert(NULL != ht); - - if (nchunk_rels <= 0) - { - ts_cache_release(hcache); - return; - } - - /* Create the RelOptInfo for each data node */ - data_node_rels = build_data_node_part_rels(root, hyper_rel, &ndata_node_rels); - - Assert(ndata_node_rels > 0); - - data_node_chunk_assignments_init(&scas, SCA_STRATEGY_ATTACHED_DATA_NODE, root, ndata_node_rels); - - /* Assign chunks to data nodes */ - data_node_chunk_assignment_assign_chunks(&scas, chunk_rels, nchunk_rels); - - /* Try to push down GROUP BY expressions and bucketing, if possible */ - push_down_group_bys(root, hyper_rel, ht->space, &scas); - - /* - * Index path for this relation are not useful by themselves, but we are - * going to use them to guess whether the remote scan can use an index for a - * given parameterization. This is needed to estimate the cost for - * parameterized data node scans. We will reset the pathlist below so these - * path are not going to be used. - */ - create_index_paths(root, hyper_rel); - - /* - * Not sure what parameterizations there could be except the ones used for - * join. Still, it's hard to verify from the code because - * get_baserel_parampathinfo() is called all over the place w/o checking if - * a join would be valid for the given required_outer. So for generating - * the parameterized data node scan paths we'll use the explicit list of - * ppis valid for joins that we just built, and not the entire - * hyper_rel->ppilist. - */ - List *ppi_list = build_parameterizations(root, hyper_rel); - - /* - * Check if we have an index path locally that matches the - * parameterization. If so, we're going to have the same index path on - * the data node, and it's going to be significantly cheaper that a seq - * scan. We don't know precise values, but we have to discount it later - * so that the remote index paths are preferred. - * - * Cache this information for use in reference join pushdown costs. - * It has to have the same idea about which paths are becoming index - * scans. - */ - TsFdwRelInfo *hyper_fpinfo = fdw_relinfo_get(hyper_rel); - ListCell *ppi_cell; - foreach (ppi_cell, ppi_list) - { - ParamPathInfo *param_info = (ParamPathInfo *) lfirst(ppi_cell); - ListCell *path_cell; - foreach (path_cell, hyper_rel->pathlist) - { - Path *path = (Path *) lfirst(path_cell); - if (path->param_info == param_info) - { - /* - * We shouldn't have parameterized seq scans. Can be an - * IndexPath (includes index-only scans) or a BitmapHeapPath. - */ - Assert(path->type == T_BitmapHeapPath || path->type == T_IndexPath); - - hyper_fpinfo->indexed_parameterizations = - lappend(hyper_fpinfo->indexed_parameterizations, param_info->ppi_req_outer); - break; - } - } - } - - /* - * Create estimates and paths for each data node rel based on data node chunk - * assignments. - */ - for (i = 0; i < ndata_node_rels; i++) - { - RelOptInfo *data_node_rel = data_node_rels[i]; - DataNodeChunkAssignment *sca = - data_node_chunk_assignment_get_or_create(&scas, data_node_rel); - TsFdwRelInfo *fpinfo; - - /* - * Basic stats for data node rels come from the assigned chunks since - * data node rels don't correspond to real tables in the system. - */ - data_node_rel->pages = sca->pages; - data_node_rel->tuples = sca->tuples; - data_node_rel->rows = sca->rows; - /* The width should be the same as any chunk */ - data_node_rel->reltarget->width = hyper_rel->part_rels[0]->reltarget->width; - - fpinfo = fdw_relinfo_create(root, - data_node_rel, - data_node_rel->serverid, - hyper_rte->relid, - TS_FDW_RELINFO_HYPERTABLE_DATA_NODE); - - fpinfo->sca = sca; - - if (!bms_is_empty(sca->chunk_relids)) - { - add_data_node_scan_paths(root, data_node_rel, hyper_rel, ppi_list); - data_node_rels_list = lappend(data_node_rels_list, data_node_rel); -#if PG15_GE - data_node_live_rels = bms_add_member(data_node_live_rels, i); -#endif - } - else - ts_set_dummy_rel_pathlist(data_node_rel); - - set_cheapest(data_node_rel); - -#ifdef TS_DEBUG - if (ts_debug_optimizer_flags.show_rel) - tsl_debug_log_rel_with_paths(root, data_node_rel, (UpperRelationKind *) NULL); -#endif - } - - Assert(list_length(data_node_rels_list) > 0); - - /* Reset the pathlist since data node scans are preferred */ - hyper_rel->pathlist = NIL; - - /* Must keep partitioning info consistent with the append paths we create */ - hyper_rel->part_rels = data_node_rels; - hyper_rel->nparts = ndata_node_rels; -#if PG15_GE - hyper_rel->live_parts = data_node_live_rels; -#endif - - add_paths_to_append_rel(root, hyper_rel, data_node_rels_list); - ts_cache_release(hcache); -} - -/* - * Creates CustomScanPath for the data node and adds to output_rel. No custom_path is added, - * i.e., it is encapsulated by the CustomScanPath, so it doesn't inflate continuation of the - * planning and will be planned locally on the data node. - */ -void -data_node_scan_create_upper_paths(PlannerInfo *root, UpperRelationKind stage, RelOptInfo *input_rel, - RelOptInfo *output_rel, void *extra) -{ - TimescaleDBPrivate *rel_private = input_rel->fdw_private; - TsFdwRelInfo *fpinfo; - - if (rel_private == NULL || rel_private->fdw_relation_info == NULL) - /* Not a rel we're interested in */ - return; - - fpinfo = fdw_relinfo_get(input_rel); - - /* Verify that this is a data node rel */ - if (NULL == fpinfo || fpinfo->type != TS_FDW_RELINFO_HYPERTABLE_DATA_NODE) - return; - - fdw_create_upper_paths(fpinfo, - root, - stage, - input_rel, - output_rel, - extra, - data_node_scan_upper_path_create); -} - -static CustomScanMethods data_node_scan_plan_methods = { - .CustomName = "DataNodeScan", - .CreateCustomScanState = data_node_scan_state_create, -}; - -typedef struct DataNodeScanPath -{ - CustomPath cpath; -} DataNodeScanPath; - -static Plan * -data_node_scan_plan_create(PlannerInfo *root, RelOptInfo *rel, CustomPath *best_path, List *tlist, - List *clauses, List *custom_plans) -{ - CustomScan *cscan = makeNode(CustomScan); - ScanInfo scaninfo; - - memset(&scaninfo, 0, sizeof(ScanInfo)); - - fdw_scan_info_init(&scaninfo, root, rel, &best_path->path, clauses, NULL); - - cscan->methods = &data_node_scan_plan_methods; - cscan->custom_plans = custom_plans; - cscan->scan.plan.targetlist = tlist; - cscan->scan.scanrelid = scaninfo.scan_relid; - cscan->custom_scan_tlist = scaninfo.fdw_scan_tlist; - cscan->scan.plan.qual = scaninfo.local_exprs; - cscan->custom_exprs = list_make2(scaninfo.params_list, scaninfo.fdw_recheck_quals); - - /* - * If this is a join, and to make it valid to push down we had to assume - * that the current user is the same as some user explicitly named in the - * query, mark the finished plan as depending on the current user. - */ - if (rel->useridiscurrent) - root->glob->dependsOnRole = true; - - /* - * If rel is a base relation, detect whether any system columns are - * requested from the rel. (If rel is a join relation, rel->relid will be - * 0, but there can be no Var with relid 0 in the rel's targetlist or the - * restriction clauses, so we skip this in that case. Note that any such - * columns in base relations that were joined are assumed to be contained - * in fdw_scan_tlist.) This is a bit of a kluge and might go away - * someday, so we intentionally leave it out of the API presented to FDWs. - */ - - scaninfo.systemcol = false; - - if (scaninfo.scan_relid > 0) - { - Bitmapset *attrs_used = NULL; - ListCell *lc; - int i; - - /* - * First, examine all the attributes needed for joins or final output. - * Note: we must look at rel's targetlist, not the attr_needed data, - * because attr_needed isn't computed for inheritance child rels. - */ - pull_varattnos((Node *) rel->reltarget->exprs, scaninfo.scan_relid, &attrs_used); - - /* Add all the attributes used by restriction clauses. */ - foreach (lc, rel->baserestrictinfo) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); - - pull_varattnos((Node *) rinfo->clause, scaninfo.scan_relid, &attrs_used); - } - - /* Now, are any system columns requested from rel? */ - for (i = FirstLowInvalidHeapAttributeNumber + 1; i < 0; i++) - { - if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber, attrs_used)) - { - scaninfo.systemcol = true; - break; - } - } - - bms_free(attrs_used); - } - - /* Raise an error when system column is requsted, eg. tableoid */ - if (scaninfo.systemcol) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("system columns are not accessible on distributed hypertables with current " - "settings"), - errhint("Set timescaledb.enable_per_data_node_queries=false to query system " - "columns."))); - - /* Should have determined the fetcher type by now. */ - DataFetcherType fetcher_type = ts_data_node_fetcher_scan_type; - Assert(fetcher_type != AutoFetcherType); - - /* Check if we should use prepared statement data fetcher. */ - if (fetcher_type == CopyFetcherType && list_length(scaninfo.params_list) > 0 && - ts_guc_remote_data_fetcher == AutoFetcherType) - { - /* - * The path is parameterized by either Nested Loop params or InitPlan - * params. We can distinguish the join by presence of Path.param_info. - * - * For joins, it is optimal to use Prepared Statement fetcher, because - * this plan is likely to be ran multiple times, and this avoids - * re-planning the query on each inner loop. - * - * For InitPlans, COPY fetcher would be more optimal. Now it's not - * technically possible to use it, because the COPY statements cannot be - * parameterized. We need support for this case in deparsing, to encode - * the parameter values into the query itself. For now, also use the - * Prepared Statement fetcher for this case, because it does not prevent - * parallelism, unlike Cursor. - */ - fetcher_type = PreparedStatementFetcherType; - } - - cscan->custom_private = list_make3(scaninfo.fdw_private, - list_make1_int(scaninfo.systemcol), - makeInteger(fetcher_type)); - - return &cscan->scan.plan; -} - -static CustomPathMethods data_node_scan_path_methods = { - .CustomName = DATA_NODE_SCAN_PATH_NAME, - .PlanCustomPath = data_node_scan_plan_create, -}; - -static Path * -data_node_scan_path_create(PlannerInfo *root, RelOptInfo *rel, PathTarget *target, double rows, - Cost startup_cost, Cost total_cost, List *pathkeys, - Relids required_outer, Path *fdw_outerpath, List *private) -{ - DataNodeScanPath *scanpath = palloc0(sizeof(DataNodeScanPath)); - - if (rel->lateral_relids && !bms_is_subset(rel->lateral_relids, required_outer)) - required_outer = bms_union(required_outer, rel->lateral_relids); - - if (!bms_is_empty(required_outer) && !IS_SIMPLE_REL(rel)) - elog(ERROR, "parameterized foreign joins are not supported yet"); - - scanpath->cpath.path.type = T_CustomPath; - scanpath->cpath.path.pathtype = T_CustomScan; - scanpath->cpath.custom_paths = fdw_outerpath == NULL ? NIL : list_make1(fdw_outerpath); - scanpath->cpath.methods = &data_node_scan_path_methods; - scanpath->cpath.path.parent = rel; - scanpath->cpath.path.pathtarget = target ? target : rel->reltarget; - scanpath->cpath.path.param_info = get_baserel_parampathinfo(root, rel, required_outer); - scanpath->cpath.path.parallel_aware = false; - scanpath->cpath.path.parallel_safe = rel->consider_parallel; - scanpath->cpath.path.parallel_workers = 0; - scanpath->cpath.path.rows = rows; - scanpath->cpath.path.startup_cost = startup_cost; - scanpath->cpath.path.total_cost = total_cost; - scanpath->cpath.path.pathkeys = pathkeys; - - return &scanpath->cpath.path; -} - -/* - * data_node_join_path_create - * Creates a path corresponding to a scan of a foreign join, - * returning the pathnode. - * - * There is a usually-sane default for the pathtarget (rel->reltarget), - * so we let a NULL for "target" select that. - * - * The code is based on PostgreSQL's create_foreign_join_path function - * (version 15.1). - */ -static Path * -data_node_join_path_create(PlannerInfo *root, RelOptInfo *rel, PathTarget *target, double rows, - Cost startup_cost, Cost total_cost, List *pathkeys, - Relids required_outer, Path *fdw_outerpath, List *private) -{ - DataNodeScanPath *scanpath = palloc0(sizeof(DataNodeScanPath)); - -#ifdef ENABLE_DEAD_CODE - if (rel->lateral_relids && !bms_is_subset(rel->lateral_relids, required_outer)) - required_outer = bms_union(required_outer, rel->lateral_relids); - - /* - * We should use get_joinrel_parampathinfo to handle parameterized paths, - * but the API of this function doesn't support it, and existing - * extensions aren't yet trying to build such paths anyway. For the - * moment just throw an error if someone tries it; eventually we should - * revisit this. - */ - if (!bms_is_empty(required_outer) || !bms_is_empty(rel->lateral_relids)) - elog(ERROR, "parameterized foreign joins are not supported yet"); -#endif - - scanpath->cpath.path.type = T_CustomPath; - scanpath->cpath.path.pathtype = T_CustomScan; - scanpath->cpath.custom_paths = fdw_outerpath == NULL ? NIL : list_make1(fdw_outerpath); - scanpath->cpath.methods = &data_node_scan_path_methods; - scanpath->cpath.path.parent = rel; - scanpath->cpath.path.pathtarget = target ? target : rel->reltarget; - scanpath->cpath.path.param_info = NULL; /* XXX see above */ - scanpath->cpath.path.parallel_aware = false; - scanpath->cpath.path.parallel_safe = rel->consider_parallel; - scanpath->cpath.path.parallel_workers = 0; - scanpath->cpath.path.rows = rows; - scanpath->cpath.path.startup_cost = startup_cost; - scanpath->cpath.path.total_cost = total_cost; - scanpath->cpath.path.pathkeys = pathkeys; - - return &scanpath->cpath.path; -} - -static Path * -data_node_scan_upper_path_create(PlannerInfo *root, RelOptInfo *rel, PathTarget *target, - double rows, Cost startup_cost, Cost total_cost, List *pathkeys, - Path *fdw_outerpath, List *private) -{ - DataNodeScanPath *scanpath = palloc0(sizeof(DataNodeScanPath)); - - /* - * Upper relations should never have any lateral references, since joining - * is complete. - */ - Assert(bms_is_empty(rel->lateral_relids)); - - scanpath->cpath.path.type = T_CustomPath; - scanpath->cpath.path.pathtype = T_CustomScan; - scanpath->cpath.custom_paths = fdw_outerpath == NULL ? NIL : list_make1(fdw_outerpath); - scanpath->cpath.methods = &data_node_scan_path_methods; - scanpath->cpath.path.parent = rel; - scanpath->cpath.path.pathtarget = target ? target : rel->reltarget; - scanpath->cpath.path.param_info = NULL; - scanpath->cpath.path.parallel_aware = false; - scanpath->cpath.path.parallel_safe = rel->consider_parallel; - scanpath->cpath.path.parallel_workers = 0; - scanpath->cpath.path.rows = rows; - scanpath->cpath.path.startup_cost = startup_cost; - scanpath->cpath.path.total_cost = total_cost; - scanpath->cpath.path.pathkeys = pathkeys; - - return &scanpath->cpath.path; -} diff --git a/tsl/src/fdw/data_node_scan_plan.h b/tsl/src/fdw/data_node_scan_plan.h deleted file mode 100644 index f842ee4f405..00000000000 --- a/tsl/src/fdw/data_node_scan_plan.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#pragma once - -#include -#include -#include - -#define DATA_NODE_SCAN_PATH_NAME "DataNodeScanPath" - -extern void data_node_scan_add_node_paths(PlannerInfo *root, RelOptInfo *hyper_rel); -extern void data_node_scan_create_upper_paths(PlannerInfo *root, UpperRelationKind stage, - RelOptInfo *input_rel, RelOptInfo *output_rel, - void *extra); - -extern void data_node_generate_pushdown_join_paths(PlannerInfo *root, RelOptInfo *joinrel, - RelOptInfo *outerrel, RelOptInfo *innerrel, - JoinType jointype, JoinPathExtraData *extra); - -/* Indexes of fields in ForeignScan->custom_private */ -typedef enum -{ - DataNodeScanFdwPrivate, - DataNodeScanSystemcol, - DataNodeScanFetcherType, -} DataNodeScanPrivateIndex; diff --git a/tsl/src/fdw/deparse.c b/tsl/src/fdw/deparse.c deleted file mode 100644 index 4c0623093b8..00000000000 --- a/tsl/src/fdw/deparse.c +++ /dev/null @@ -1,3437 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ - -/* - * This file contains source code that was copied and/or modified from - * the PostgreSQL database, which is licensed under the open-source - * PostgreSQL License. Please see the NOTICE at the top level - * directory for a copy of the PostgreSQL License. - */ - -/*------------------------------------------------------------------------- - * - * deparse.c - * Query deparser for postgres_fdw - * - * This file includes functions that examine query WHERE clauses to see - * whether they're safe to send to the data node for execution, as - * well as functions to construct the query text to be sent. The latter - * functionality is annoyingly duplicative of ruleutils.c, but there are - * enough special considerations that it seems best to keep this separate. - * One saving grace is that we only need deparse logic for node types that - * we consider safe to send. - * - * We assume that the remote session's search_path is exactly "pg_catalog", - * and thus we need schema-qualify all and only names outside pg_catalog. - * - * We consider collations and COLLATE expressions safe to send since we assume - * that all nodes of a distributed hypertable has the same configuration - * w.r.t. collations. - * - * Portions Copyright (c) 2012-2017, PostgreSQL Global Development Group - * - * IDENTIFICATION - * contrib/postgres_fdw/deparse.c - * - *------------------------------------------------------------------------- - */ -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "relinfo.h" -#include "deparse.h" -#include "shippable.h" -#include "utils.h" -#include "scan_plan.h" -#include "extension_constants.h" -#include "partialize_finalize.h" -#include "nodes/gapfill/gapfill.h" -#include "planner/planner.h" - -/* - * Global context for foreign_expr_walker's search of an expression tree. - */ -typedef struct foreign_glob_cxt -{ - PlannerInfo *root; /* global planner state */ - RelOptInfo *foreignrel; /* the foreign relation we are planning for */ - Relids relids; /* relids of base relations in the underlying - * scan */ -} foreign_glob_cxt; - -/* - * Context for deparseExpr - */ -typedef struct deparse_expr_cxt -{ - PlannerInfo *root; /* global planner state */ - RelOptInfo *foreignrel; /* the foreign relation we are planning for */ - RelOptInfo *scanrel; /* the underlying scan relation. Same as - * foreignrel, when that represents a join or - * a base relation. */ - StringInfo buf; /* output buffer to append to */ - List **params_list; /* exprs that will become remote Params */ - DataNodeChunkAssignment *sca; -} deparse_expr_cxt; - -#define REL_ALIAS_PREFIX "r" -/* Handy macro to add relation name qualification */ -#define ADD_REL_QUALIFIER(buf, varno) appendStringInfo((buf), "%s%d.", REL_ALIAS_PREFIX, (varno)) -#define SUBQUERY_REL_ALIAS_PREFIX "s" -#define SUBQUERY_COL_ALIAS_PREFIX "c" - -/* Oids of mutable functions determined to safe to pushdown to data nodes */ -static Oid PushdownSafeFunctionOIDs[] = { - F_DATE_CMP_TIMESTAMPTZ, - F_DATE_IN, - F_DATE_OUT, - F_INTERVAL_IN, - F_INTERVAL_PL, - F_NOW, /* Special case, this will be evaluated prior to pushdown */ - F_TIMESTAMPTZ_CMP_DATE, - F_TIMESTAMPTZ_CMP_TIMESTAMP, - F_TIMESTAMPTZ_DATE, - F_TIMESTAMPTZ_EQ_DATE, - F_TIMESTAMPTZ_EQ_TIMESTAMP, - F_TIMESTAMPTZ_GE_DATE, - F_TIMESTAMPTZ_GE_TIMESTAMP, - F_TIMESTAMPTZ_GT_DATE, - F_TIMESTAMPTZ_GT_TIMESTAMP, - F_TIMESTAMPTZ_IN, - F_TIMESTAMPTZ_LE_DATE, - F_TIMESTAMPTZ_LE_TIMESTAMP, - F_TIMESTAMPTZ_LT_DATE, - F_TIMESTAMPTZ_LT_TIMESTAMP, - F_TIMESTAMPTZ_MI_INTERVAL, - F_TIMESTAMPTZ_NE_DATE, - F_TIMESTAMPTZ_NE_TIMESTAMP, - F_TIMESTAMPTZ_OUT, - F_TIMESTAMPTZ_PL_INTERVAL, - F_TIMESTAMPTZ_TIMESTAMP, - F_TIMESTAMP_CMP_TIMESTAMPTZ, - F_TIMESTAMP_EQ_TIMESTAMPTZ, - F_TIMESTAMP_GE_TIMESTAMPTZ, - F_TIMESTAMP_GT_TIMESTAMPTZ, - F_TIMESTAMP_LE_TIMESTAMPTZ, - F_TIMESTAMP_LT_TIMESTAMPTZ, - F_TIMESTAMP_MI_INTERVAL, - F_TIMESTAMP_NE_TIMESTAMPTZ, - F_TIMESTAMP_PL_INTERVAL, - F_TIMESTAMP_TIMESTAMPTZ, - F_TIMETZ_IN, - F_TIME_IN, - F_TIME_TIMETZ, -#if PG14_LT - F_INTERVAL_PART, - F_MAKE_TIMESTAMPTZ, - F_MAKE_TIMESTAMPTZ_AT_TIMEZONE, - F_TIMESTAMPTZ_PART, - F_TIMESTAMPTZ_TIME, - F_TIMESTAMPTZ_TIMETZ, - F_TIMESTAMPTZ_TRUNC, - F_TIMESTAMPTZ_TRUNC_ZONE, - F_TO_TIMESTAMP, -#elif PG14_GE - F_DATE_PART_TEXT_INTERVAL, - F_MAKE_TIMESTAMPTZ_INT4_INT4_INT4_INT4_INT4_FLOAT8, - F_MAKE_TIMESTAMPTZ_INT4_INT4_INT4_INT4_INT4_FLOAT8_TEXT, - F_DATE_PART_TEXT_TIMESTAMPTZ, - F_TIME_TIMESTAMPTZ, - F_TIMETZ_TIMESTAMPTZ, - F_DATE_TRUNC_TEXT_TIMESTAMPTZ, - F_DATE_TRUNC_TEXT_TIMESTAMPTZ_TEXT, - F_TO_TIMESTAMP_TEXT_TEXT, -#endif -}; -static const int NumPushdownSafeOIDs = - sizeof(PushdownSafeFunctionOIDs) / sizeof(PushdownSafeFunctionOIDs[0]); - -/* - * Functions to determine whether an expression can be evaluated safely on - * data node. - */ -static bool foreign_expr_walker(Node *node, foreign_glob_cxt *glob_cxt); -static char *deparse_type_name(Oid type_oid, int32 typemod); - -/* - * Functions to construct string representation of a node tree. - */ -static void deparseTargetList(StringInfo buf, RangeTblEntry *rte, Index rtindex, Relation rel, - bool is_returning, Bitmapset *attrs_used, bool qualify_col, - List **retrieved_attrs); -static void deparseExplicitTargetList(List *tlist, bool is_returning, List **retrieved_attrs, - deparse_expr_cxt *context); -static void deparseSubqueryTargetList(deparse_expr_cxt *context); -static void deparseReturningList(StringInfo buf, RangeTblEntry *rte, Index rtindex, Relation rel, - bool trig_after_row, List *returningList, List **retrieved_attrs); -static void deparseColumnRef(StringInfo buf, int varno, int varattno, RangeTblEntry *rte, - bool qualify_col); -static void deparseRelation(StringInfo buf, Relation rel); -static void deparseExpr(Expr *node, deparse_expr_cxt *context); -static void deparseVar(Var *node, deparse_expr_cxt *context); -static void deparseConst(Const *node, deparse_expr_cxt *context, int showtype); -static void deparseParam(Param *node, deparse_expr_cxt *context); -static void deparseSubscriptingRef(SubscriptingRef *node, deparse_expr_cxt *context); -static void deparseFuncExpr(FuncExpr *node, deparse_expr_cxt *context); -static void deparseOpExpr(OpExpr *node, deparse_expr_cxt *context); -static void deparseOperatorName(StringInfo buf, Form_pg_operator opform); -static void deparseDistinctExpr(DistinctExpr *node, deparse_expr_cxt *context); -static void deparseScalarArrayOpExpr(ScalarArrayOpExpr *node, deparse_expr_cxt *context); -static void deparseRelabelType(RelabelType *node, deparse_expr_cxt *context); -static void deparseBoolExpr(BoolExpr *node, deparse_expr_cxt *context); -static void deparseNullTest(NullTest *node, deparse_expr_cxt *context); -static void deparseArrayExpr(ArrayExpr *node, deparse_expr_cxt *context); -static void printRemoteParam(int paramindex, Oid paramtype, int32 paramtypmod, - deparse_expr_cxt *context); -static void printRemotePlaceholder(Oid paramtype, int32 paramtypmod, deparse_expr_cxt *context); -static void deparseSelectSql(List *tlist, bool is_subquery, List **retrieved_attrs, - deparse_expr_cxt *context, List *pathkeys); -static void deparseLockingClause(deparse_expr_cxt *context); -static void appendOrderByClause(List *pathkeys, deparse_expr_cxt *context); -static void appendLimit(deparse_expr_cxt *context, List *pathkeys); - -static void append_chunk_exclusion_condition(deparse_expr_cxt *context, bool use_alias); -static void appendConditions(List *exprs, deparse_expr_cxt *context, bool is_first); -static void deparseFromExprForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel, - bool use_alias, Index ignore_rel, List **ignore_conds, - List **params_list, DataNodeChunkAssignment *sca); -static void deparseFromExpr(List *quals, deparse_expr_cxt *context); -static void deparseRangeTblRef(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel, - bool make_subquery, Index ignore_rel, List **ignore_conds, - List **params_list, DataNodeChunkAssignment *sca); -static void deparseAggref(Aggref *node, deparse_expr_cxt *context); -static void appendGroupByClause(List *tlist, deparse_expr_cxt *context); -static void appendAggOrderBy(List *orderList, List *targetList, deparse_expr_cxt *context); -static void appendFunctionName(Oid funcid, deparse_expr_cxt *context); -static Node *deparseSortGroupClause(Index ref, List *tlist, bool force_colno, - deparse_expr_cxt *context); -static bool column_qualification_needed(deparse_expr_cxt *context); - -/* - * Helper functions - */ -static bool is_subquery_var(Var *node, RelOptInfo *foreignrel, int *relno, int *colno); -static void get_relation_column_alias_ids(Var *node, RelOptInfo *foreignrel, int *relno, - int *colno); - -/* - * Examine each qual clause in input_conds, and classify them into two groups, - * which are returned as two lists: - * - remote_conds contains expressions that can be evaluated remotely - * - local_conds contains expressions that can't be evaluated remotely - */ -void -classify_conditions(PlannerInfo *root, RelOptInfo *baserel, List *input_conds, List **remote_conds, - List **local_conds) -{ - ListCell *lc; - - *remote_conds = NIL; - *local_conds = NIL; - - foreach (lc, input_conds) - { - RestrictInfo *ri = lfirst_node(RestrictInfo, lc); - - if (ts_is_foreign_expr(root, baserel, ri->clause)) - *remote_conds = lappend(*remote_conds, ri); - else - *local_conds = lappend(*local_conds, ri); - } -} - -static int -oid_comparator(const void *a, const void *b) -{ - if (*(Oid *) a == *(Oid *) b) - return 0; - else if (*(Oid *) a < *(Oid *) b) - return -1; - else - return 1; -} - -static bool -function_is_whitelisted(Oid func_id) -{ - static bool PushdownOIDsSorted = false; - - if (!PushdownOIDsSorted) - { - qsort(PushdownSafeFunctionOIDs, NumPushdownSafeOIDs, sizeof(Oid), oid_comparator); - PushdownOIDsSorted = true; - } - - return bsearch(&func_id, - PushdownSafeFunctionOIDs, - NumPushdownSafeOIDs, - sizeof(Oid), - oid_comparator) != NULL; -} - -/* - * Check for mutable functions in an expression. - * - * This code is based on the corresponding PostgreSQL function, but with extra - * handling to whitelist some bucketing functions that we know are safe to - * push down despite mutability. - */ -static bool -contain_mutable_functions_checker(Oid func_id, void *context) -{ - FuncInfo *finfo = ts_func_cache_get_bucketing_func(func_id); - - /* We treat all bucketing functions as shippable, even date_trunc(text, - * timestamptz). We do this special case for bucketing functions until we - * can figure out a more consistent way to deal with functions taking, - * e.g., timestamptz parameters since we ensure that all connections to - * other nodes have the access node's timezone setting. */ - if (NULL != finfo) - return false; - - if (func_volatile(func_id) == PROVOLATILE_IMMUTABLE) - return false; - - /* Certain functions are mutable but are known to safe to push down to the data node. */ - if (function_is_whitelisted(func_id)) - return false; - -#ifndef NDEBUG - /* Special debug functions that we want to ship to data nodes. */ - const char debug_func_prefix[] = "ts_debug_shippable_"; - if (strncmp(get_func_name(func_id), debug_func_prefix, strlen(debug_func_prefix)) == 0) - { - return false; - } -#endif - - return true; -} - -/* - * Expression walker based on the corresponding PostgreSQL function. We're - * using a custom checker function, so need a modifed version of this walker. - */ -static bool -contain_mutable_functions_walker(Node *node, void *context) -{ - if (node == NULL) - return false; - /* Check for mutable functions in node itself */ - if (check_functions_in_node(node, contain_mutable_functions_checker, context)) - return true; - - if (IsA(node, SQLValueFunction)) - { - /* all variants of SQLValueFunction are stable */ - return true; - } - - if (IsA(node, NextValueExpr)) - { - /* NextValueExpr is volatile */ - return true; - } - - /* - * It should be safe to treat MinMaxExpr as immutable, because it will - * depend on a non-cross-type btree comparison function, and those should - * always be immutable. Treating XmlExpr as immutable is more dubious, - * and treating CoerceToDomain as immutable is outright dangerous. But we - * have done so historically, and changing this would probably cause more - * problems than it would fix. In practice, if you have a non-immutable - * domain constraint you are in for pain anyhow. - */ - - /* Recurse to check arguments */ - if (IsA(node, Query)) - { - /* Recurse into subselects */ - return query_tree_walker((Query *) node, contain_mutable_functions_walker, context, 0); - } - return expression_tree_walker(node, contain_mutable_functions_walker, context); -} - -static bool -foreign_expr_contains_mutable_functions(Node *clause) -{ - return contain_mutable_functions_walker(clause, NULL); -} - -/* - * Returns true if given expr is safe to evaluate on the data node. - */ -bool -ts_is_foreign_expr(PlannerInfo *root, RelOptInfo *baserel, Expr *expr) -{ - foreign_glob_cxt glob_cxt; - TsFdwRelInfo *fpinfo = fdw_relinfo_get(baserel); - - /* - * Check that the expression consists of nodes that are safe to execute - * remotely. - */ - glob_cxt.root = root; - glob_cxt.foreignrel = baserel; - - /* - * For an upper relation, use relids from its underneath scan relation, - * because the upperrel's own relids currently aren't set to anything - * meaningful by the core code. For other relation, use their own relids. - */ - if (IS_UPPER_REL(baserel)) - glob_cxt.relids = fpinfo->outerrel->relids; - else - glob_cxt.relids = baserel->relids; - - if (!foreign_expr_walker((Node *) expr, &glob_cxt)) - return false; - - /* - * It is not supported to execute time_bucket_gapfill on data node. - */ - if (gapfill_in_expression(expr)) - return false; - - /* - * An expression which includes any mutable functions can't be sent over - * because its result is not stable. For example, sending now() remote - * side could cause confusion from clock offsets. Future versions might - * be able to make this choice with more granularity. (We check this last - * because it requires a lot of expensive catalog lookups.) - */ - if (foreign_expr_contains_mutable_functions((Node *) expr)) - return false; - - /* OK to evaluate on the data node */ - return true; -} - -/* - * Check if expression is safe to execute remotely, and return true if so. - * - * We must check that the expression contains only node types we can deparse, - * that all types/functions/operators are safe to send (they are "shippable"), - * and that we aren't sending Var references to system columns. - * - * We do not care about collations because we assume that data nodes have - * identical configuration as the access node. - * - * Note function mutability is not currently considered here. - */ -static bool -foreign_expr_walker(Node *node, foreign_glob_cxt *glob_cxt) -{ - bool check_type = true; - TsFdwRelInfo *fpinfo; - - /* Need do nothing for empty subexpressions */ - if (node == NULL) - return true; - - fpinfo = fdw_relinfo_get(glob_cxt->foreignrel); - - switch (nodeTag(node)) - { - case T_Var: - { - Var *var = castNode(Var, node); - - if (bms_is_member(var->varno, glob_cxt->relids) && var->varlevelsup == 0) - { - /* Var belongs to foreign table */ - - /* - * System columns other than ctid and oid should not be - * sent to the remote, since we don't make any effort to - * ensure that local and remote values match (tableoid, in - * particular, almost certainly doesn't match). - */ - if (var->varattno < 0 && var->varattno != SelfItemPointerAttributeNumber) - return false; - } - } - break; - case T_Const: - /* Consts are OK to execute remotely */ - break; - case T_Param: - /* Params are also OK to execute remotely */ - break; - case T_SubscriptingRef: - { - SubscriptingRef *ar = castNode(SubscriptingRef, node); - - /* Assignment should not be in restrictions. */ - if (ar->refassgnexpr != NULL) - return false; - - /* - * Recurse to remaining subexpressions. Since the array - * subscripts must yield (noncollatable) integers, they won't - * affect the inner_cxt state. - */ - if (!foreign_expr_walker((Node *) ar->refupperindexpr, glob_cxt)) - return false; - if (!foreign_expr_walker((Node *) ar->reflowerindexpr, glob_cxt)) - return false; - if (!foreign_expr_walker((Node *) ar->refexpr, glob_cxt)) - return false; - } - break; - case T_FuncExpr: - { - FuncExpr *fe = castNode(FuncExpr, node); - - /* - * If function used by the expression is not shippable, it - * can't be sent to remote because it might have incompatible - * semantics on remote side. - */ - if (!is_shippable(fe->funcid, ProcedureRelationId, fpinfo)) - return false; - - /* - * Recurse to input subexpressions. - */ - if (!foreign_expr_walker((Node *) fe->args, glob_cxt)) - return false; - } - break; - case T_OpExpr: - case T_DistinctExpr: /* struct-equivalent to OpExpr */ - { - OpExpr *oe = (OpExpr *) node; - - /* - * Similarly, only shippable operators can be sent to remote. - * (If the operator is shippable, we assume its underlying - * function is too.) - */ - if (!is_shippable(oe->opno, OperatorRelationId, fpinfo)) - return false; - - /* - * Recurse to input subexpressions. - */ - if (!foreign_expr_walker((Node *) oe->args, glob_cxt)) - return false; - } - break; - case T_ScalarArrayOpExpr: - { - ScalarArrayOpExpr *oe = castNode(ScalarArrayOpExpr, node); - - /* - * Again, only shippable operators can be sent to remote. - */ - if (!is_shippable(oe->opno, OperatorRelationId, fpinfo)) - return false; - - /* - * Recurse to input subexpressions. - */ - if (!foreign_expr_walker((Node *) oe->args, glob_cxt)) - return false; - } - break; - case T_RelabelType: - { - RelabelType *r = castNode(RelabelType, node); - - /* - * Recurse to input subexpression. - */ - if (!foreign_expr_walker((Node *) r->arg, glob_cxt)) - return false; - } - break; - case T_BoolExpr: - { - BoolExpr *b = castNode(BoolExpr, node); - - /* - * Recurse to input subexpressions. - */ - if (!foreign_expr_walker((Node *) b->args, glob_cxt)) - return false; - } - break; - case T_NullTest: - { - NullTest *nt = castNode(NullTest, node); - - /* - * Recurse to input subexpressions. - */ - if (!foreign_expr_walker((Node *) nt->arg, glob_cxt)) - return false; - } - break; - case T_ArrayExpr: - { - ArrayExpr *a = castNode(ArrayExpr, node); - - /* - * Recurse to input subexpressions. - */ - if (!foreign_expr_walker((Node *) a->elements, glob_cxt)) - return false; - } - break; - case T_List: - { - List *l = castNode(List, node); - ListCell *lc; - - /* - * Recurse to component subexpressions. - */ - foreach (lc, l) - { - if (!foreign_expr_walker((Node *) lfirst(lc), glob_cxt)) - return false; - } - /* Don't apply exprType() to the list. */ - check_type = false; - } - break; - case T_Aggref: - { - Aggref *agg = castNode(Aggref, node); - ListCell *lc; - - /* Not safe to pushdown when not in grouping context */ - if (!IS_UPPER_REL(glob_cxt->foreignrel)) - return false; - - /* As usual, it must be shippable. */ - if (!is_shippable(agg->aggfnoid, ProcedureRelationId, fpinfo)) - return false; - - /* - * Recurse to input args. aggdirectargs, aggorder and - * aggdistinct are all present in args, so no need to check - * their shippability explicitly. - */ - foreach (lc, agg->args) - { - Node *n = (Node *) lfirst(lc); - - /* If TargetEntry, extract the expression from it */ - if (IsA(n, TargetEntry)) - { - TargetEntry *tle = castNode(TargetEntry, n); - - n = (Node *) tle->expr; - } - - if (!foreign_expr_walker(n, glob_cxt)) - return false; - } - - /* - * For aggorder elements, check whether the sort operator, if - * specified, is shippable or not. - */ - if (agg->aggorder) - { - ListCell *lc; - - foreach (lc, agg->aggorder) - { - SortGroupClause *srt = lfirst_node(SortGroupClause, lc); - Oid sortcoltype; - TypeCacheEntry *typentry; - TargetEntry *tle; - - tle = get_sortgroupref_tle(srt->tleSortGroupRef, agg->args); - sortcoltype = exprType((Node *) tle->expr); - typentry = lookup_type_cache(sortcoltype, TYPECACHE_LT_OPR | TYPECACHE_GT_OPR); - /* Check shippability of non-default sort operator. */ - if (srt->sortop != typentry->lt_opr && srt->sortop != typentry->gt_opr && - !is_shippable(srt->sortop, OperatorRelationId, fpinfo)) - return false; - } - } - - /* Check aggregate filter */ - if (!foreign_expr_walker((Node *) agg->aggfilter, glob_cxt)) - return false; - } - break; - default: - - /* - * If it's anything else, assume it's unsafe. This list can be - * expanded later, but don't forget to add deparse support below. - */ - return false; - } - - /* - * If result type of given expression is not shippable, it can't be sent - * to remote because it might have incompatible semantics on remote side. - */ - if (check_type && !is_shippable(exprType(node), TypeRelationId, fpinfo)) - return false; - - /* It looks OK */ - return true; -} - -/* - * Convert type OID + typmod info into a type name we can ship to the data - * node. Someplace else had better have verified that this type name is - * expected to be known on the remote end. - * - * This is almost just format_type_with_typemod(), except that if left to its - * own devices, that function will make schema-qualification decisions based - * on the local search_path, which is wrong. We must schema-qualify all - * type names that are not in pg_catalog. We assume here that built-in types - * are all in pg_catalog and need not be qualified; otherwise, qualify. - */ -static char * -deparse_type_name(Oid type_oid, int32 typemod) -{ - bits16 flags = FORMAT_TYPE_TYPEMOD_GIVEN; - - if (!is_builtin(type_oid)) - flags |= FORMAT_TYPE_FORCE_QUALIFY; - - return format_type_extended(type_oid, typemod, flags); -} - -/* - * Build the targetlist for given relation to be deparsed as SELECT clause. - * - * The output targetlist contains the columns that need to be fetched from the - * data node for the given relation. If foreignrel is an upper relation, - * then the output targetlist can also contain expressions to be evaluated on - * data node. - */ -List * -build_tlist_to_deparse(RelOptInfo *foreignrel) -{ - List *tlist = NIL; - TsFdwRelInfo *fpinfo = fdw_relinfo_get(foreignrel); - ListCell *lc; - - /* - * For an upper relation, we have already built the target list while - * checking shippability, so just return that. - */ - if (IS_UPPER_REL(foreignrel)) - return fpinfo->grouped_tlist; - /* - * We require columns specified in foreignrel->reltarget->exprs and those - * required for evaluating the local conditions. - */ - - tlist = add_to_flat_tlist(tlist, - pull_var_clause((Node *) foreignrel->reltarget->exprs, - PVC_RECURSE_PLACEHOLDERS)); - foreach (lc, fpinfo->local_conds) - { - RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); - - tlist = - add_to_flat_tlist(tlist, - pull_var_clause((Node *) rinfo->clause, PVC_RECURSE_PLACEHOLDERS)); - } - - return tlist; -} - -/* - * Deparse SELECT statement for given relation into buf. - * - * tlist contains the list of desired columns to be fetched from data node. - * For a base relation fpinfo->attrs_used is used to construct SELECT clause, - * hence the tlist is ignored for a base relation. - * - * remote_where is the list of conditions to be deparsed into the WHERE clause, - * and remote_having into the HAVING clause (this is useful for upper relations). - * - * If params_list is not NULL, it receives a list of Params and other-relation - * Vars used in the clauses; these values must be transmitted to the data - * node as parameter values. - * - * If params_list is NULL, we're generating the query for EXPLAIN purposes, - * so Params and other-relation Vars should be replaced by dummy values. - * - * pathkeys is the list of pathkeys to order the result by. - * - * is_subquery is the flag to indicate whether to deparse the specified - * relation as a subquery. - * - * List of columns selected is returned in retrieved_attrs. - */ -void -deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *rel, List *tlist, - List *remote_where, List *remote_having, List *pathkeys, bool is_subquery, - List **retrieved_attrs, List **params_list, DataNodeChunkAssignment *sca) -{ - deparse_expr_cxt context; - TsFdwRelInfo *fpinfo = fdw_relinfo_get(rel); - - /* - * We handle relations for foreign tables, joins between those and upper - * relations. - */ - Assert(IS_JOIN_REL(rel) || IS_SIMPLE_REL(rel) || IS_UPPER_REL(rel)); - - /* Fill portions of context common to upper, join and base relation */ - context.buf = buf; - context.root = root; - context.foreignrel = rel; - context.scanrel = IS_UPPER_REL(rel) ? fpinfo->outerrel : rel; - context.params_list = params_list; - context.sca = sca; - - /* Construct SELECT clause */ - deparseSelectSql(tlist, is_subquery, retrieved_attrs, &context, pathkeys); - - /* Construct FROM and WHERE clauses */ - deparseFromExpr(remote_where, &context); - - if (IS_UPPER_REL(rel)) - { - /* Append GROUP BY clause */ - appendGroupByClause(tlist, &context); - - /* Append HAVING clause */ - if (remote_having) - { - appendStringInfoString(buf, " HAVING "); - appendConditions(remote_having, &context, true); - } - } - - /* Add ORDER BY clause if we found any useful pathkeys */ - if (pathkeys) - appendOrderByClause(pathkeys, &context); - - /* Add LIMIT if it is set and can be pushed */ - if (context.root->limit_tuples > 0.0) - appendLimit(&context, pathkeys); - - /* Add any necessary FOR UPDATE/SHARE. */ - deparseLockingClause(&context); -} - -/* - * Construct "SELECT DISTINCT target_list" or "SELECT DISTINCT ON (col1, col..) - * target_list" statement to push down the DISTINCT clause to the remote side. - * - * We only allow references to basic "Vars" or constants in the DISTINCT exprs - * - * So, "SELECT DISTINCT col1" is fine but "SELECT DISTINCT 2*col1" is not. - * - * "SELECT DISTINCT col1, 'const1', NULL, col2" which is a mix of column - * references and constants is also supported. Everything else is not supported. - * - * It should be noted that "SELECT DISTINCT col1, col2" will return the same - * set of values as "SELECT DISTINCT col2, col1". So nothing additional needs - * to be done here as the upper projection will take care of any ordering - * between the attributes. - * - * We also explicitly deparse the distinctClause entries only for the - * "DISTINCT ON (col..)" case. For regular DISTINCT the targetlist - * deparsing which happens later is good enough - */ -static void -deparseDistinctClause(StringInfo buf, deparse_expr_cxt *context, List *pathkeys) -{ - PlannerInfo *root = context->root; - Query *query = root->parse; - ListCell *l, *dc_l; - bool first = true, varno_assigned = false; - Index varno = 0; /* mostly to quell compiler warning, handled via varno_assigned */ - RangeTblEntry *dc_rte; - RangeTblEntry *rte; - - if (query->distinctClause == NIL) - return; - - foreach (l, query->distinctClause) - { - SortGroupClause *sgc = lfirst_node(SortGroupClause, l); - TargetEntry *tle = get_sortgroupclause_tle(sgc, query->targetList); - - /* - * We only send basic attributes to the remote side. So we can - * pushdown DISTINCT only if the tle is a simple one - * referring to the "Var" directly. Also all varno entries - * need to point to the same relid. - * - * Also handle "DISTINCT col1, CONST1, NULL" types cases - */ - if (IsA(tle->expr, Var)) - { - Var *var = castNode(Var, tle->expr); - - if (first) - { - varno = var->varno; - first = false; - varno_assigned = true; - } - - if (varno != (Index) var->varno) - return; - } - /* We only allow constants apart from vars, but we ignore them */ - else if (!IsA(tle->expr, Const)) - return; - } - - if (query->hasDistinctOn) - { - /* - * Pushing down DISTINCT ON is more complex than plain DISTINCT. - * The DISTINCT ON columns must be a prefix of the ORDER BY columns. - * Without this, the DISTINCT ON would return an unpredictable row - * each time. There is a diagnostic for the case where the ORDER BY - * clause doesn't match the DISTINCT ON clause, so in this case we - * would get an error on the data node. There is no diagnostic for - * the case where the ORDER BY is absent, so in this case we would - * get a wrong result. - * The remote ORDER BY clause is created from the pathkeys of the - * corresponding relation. If the DISTINCT ON columns are not a prefix - * of these pathkeys, we cannot push it down. - */ - ListCell *distinct_cell, *pathkey_cell; - forboth (distinct_cell, query->distinctClause, pathkey_cell, pathkeys) - { - SortGroupClause *sgc = lfirst_node(SortGroupClause, distinct_cell); - TargetEntry *tle = get_sortgroupclause_tle(sgc, query->targetList); - - PathKey *pk = lfirst_node(PathKey, pathkey_cell); - EquivalenceClass *ec = pk->pk_eclass; - - /* - * The find_ec_member_matching_expr() has many checks that don't seem - * to be relevant here. Enumerate the pathkey EquivalenceMembers by - * hand and find the one that matches the DISTINCT ON expression. - */ - ListCell *ec_member_cell; - foreach (ec_member_cell, ec->ec_members) - { - EquivalenceMember *ec_member = lfirst_node(EquivalenceMember, ec_member_cell); - if (equal(ec_member->em_expr, tle->expr)) - break; - } - - if (ec_member_cell == NULL) - { - /* - * Went through all the equivalence class members and didn't - * find a match. - */ - return; - } - } - - if (pathkey_cell == NULL && distinct_cell != NULL) - { - /* Ran out of pathkeys before we matched all the DISTINCT ON columns. */ - return; - } - } - - /* If there are no varno entries in the distinctClause, we are done */ - if (!varno_assigned) - return; - - /* - * If all distinctClause entries point to our rte->relid then it's - * safe to push down to the datanode - * - * The only other case we allow is if the dc_rte->relid has the - * rte->relid as a child - */ - dc_rte = planner_rt_fetch(varno, root); - rte = planner_rt_fetch(context->foreignrel->relid, root); - - if (dc_rte->relid != rte->relid && ts_inheritance_parent_relid(rte->relid) != dc_rte->relid) - return; - - /* - * Ok to pushdown! - * - * The distinctClause entries will be referring to the - * varno pulled above, so adjust the scanrel temporarily - * for the deparsing of the distint clauses - * - * Note that we deparse the targetlist below only for the - * "DISTINCT ON" case. For DISTINCT, the regular targetlist - * deparsing later works - */ - if (query->hasDistinctOn) - { - char *sep = ""; - RelOptInfo *scanrel = context->scanrel; - - Assert(varno > 0 && varno < (Index) root->simple_rel_array_size); - context->scanrel = root->simple_rel_array[varno]; - - appendStringInfoString(buf, "DISTINCT ON ("); - - foreach (dc_l, query->distinctClause) - { - SortGroupClause *srt = lfirst_node(SortGroupClause, dc_l); - - appendStringInfoString(buf, sep); - deparseSortGroupClause(srt->tleSortGroupRef, query->targetList, false, context); - sep = ", "; - } - - appendStringInfoString(buf, ") "); - - /* reset scanrel to the earlier value now */ - context->scanrel = scanrel; - } - else - appendStringInfoString(buf, "DISTINCT "); -} - -/* - * Construct a simple SELECT statement that retrieves desired columns - * of the specified foreign table, and append it to "buf". The output - * contains just "SELECT ... ". - * - * We also create an integer List of the columns being retrieved, which is - * returned to *retrieved_attrs, unless we deparse the specified relation - * as a subquery. - * - * tlist is the list of desired columns. is_subquery is the flag to - * indicate whether to deparse the specified relation as a subquery. - * Read prologue of deparseSelectStmtForRel() for details. - */ -static void -deparseSelectSql(List *tlist, bool is_subquery, List **retrieved_attrs, deparse_expr_cxt *context, - List *pathkeys) -{ - StringInfo buf = context->buf; - RelOptInfo *foreignrel = context->foreignrel; - PlannerInfo *root = context->root; - TsFdwRelInfo *fpinfo = fdw_relinfo_get(foreignrel); - - /* - * Construct SELECT list - */ - appendStringInfoString(buf, "SELECT "); - - if (is_subquery) - { - /* - * For a relation that is deparsed as a subquery, emit expressions - * specified in the relation's reltarget. Note that since this is for - * the subquery, no need to care about *retrieved_attrs. - */ - deparseSubqueryTargetList(context); - } - else if (tlist != NIL || IS_JOIN_REL(foreignrel)) - { - /* - * For a join, hypertable-data node or upper relation the input tlist gives the list of - * columns required to be fetched from the data node. - */ - deparseExplicitTargetList(tlist, false, retrieved_attrs, context); - } - else - { - /* - * For a base relation fpinfo->attrs_used gives the list of columns - * required to be fetched from the data node. - */ - RangeTblEntry *rte = planner_rt_fetch(foreignrel->relid, root); - - /* - * Core code already has some lock on each rel being planned, so we - * can use NoLock here. - */ - Relation rel = table_open(rte->relid, NoLock); - - if (root->parse->distinctClause != NIL) - deparseDistinctClause(buf, context, pathkeys); - - deparseTargetList(buf, - rte, - foreignrel->relid, - rel, - false, - fpinfo->attrs_used, - false, - retrieved_attrs); - table_close(rel, NoLock); - } -} - -/* - * Construct a FROM clause and, if needed, a WHERE clause, and append those to - * "buf". - * - * quals is the list of clauses to be included in the WHERE clause. - * (These may or may not include RestrictInfo decoration.) - */ -static void -deparseFromExpr(List *quals, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - RelOptInfo *scanrel = context->scanrel; - /* Use alias if scan is on multiple rels, unless a per-data node scan */ - bool use_alias = column_qualification_needed(context); - - /* For upper relations, scanrel must be either a joinrel or a baserel */ - Assert(!IS_UPPER_REL(context->foreignrel) || IS_JOIN_REL(scanrel) || IS_SIMPLE_REL(scanrel)); - - /* Construct FROM clause */ - appendStringInfoString(buf, " FROM "); - deparseFromExprForRel(buf, - context->root, - scanrel, - use_alias, - (Index) 0, - NULL, - context->params_list, - context->sca); - - /* Construct WHERE clause */ - if (quals != NIL || context->sca != NULL) - appendStringInfoString(buf, " WHERE "); - - if (context->sca != NULL) - append_chunk_exclusion_condition(context, use_alias); - - if (quals != NIL) - appendConditions(quals, context, (context->sca == NULL)); -} - -/* - * Emit a target list that retrieves the columns specified in attrs_used. - * This is used for both SELECT and RETURNING targetlists; the is_returning - * parameter is true only for a RETURNING targetlist. - * - * The tlist text is appended to buf, and we also create an integer List - * of the columns being retrieved, which is returned to *retrieved_attrs. - * - * If qualify_col is true, add relation alias before the column name. - */ -static void -deparseTargetList(StringInfo buf, RangeTblEntry *rte, Index rtindex, Relation rel, - bool is_returning, Bitmapset *attrs_used, bool qualify_col, - List **retrieved_attrs) -{ - TupleDesc tupdesc = RelationGetDescr(rel); - bool have_wholerow; - bool first; - int i; - - *retrieved_attrs = NIL; - - /* If there's a whole-row reference, we'll need all the columns. */ - have_wholerow = bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, attrs_used); - - first = true; - for (i = 1; i <= tupdesc->natts; i++) - { - Form_pg_attribute attr = TupleDescAttr(tupdesc, i - 1); - - /* Ignore dropped attributes. */ - if (attr->attisdropped) - continue; - - if (have_wholerow || bms_is_member(i - FirstLowInvalidHeapAttributeNumber, attrs_used)) - { - if (!first) - appendStringInfoString(buf, ", "); - else if (is_returning) - appendStringInfoString(buf, " RETURNING "); - first = false; - - deparseColumnRef(buf, rtindex, i, rte, qualify_col); - - *retrieved_attrs = lappend_int(*retrieved_attrs, i); - } - } - - /* - * Add ctid and oid if needed. We currently don't support retrieving any - * other system columns. - */ - if (bms_is_member(SelfItemPointerAttributeNumber - FirstLowInvalidHeapAttributeNumber, - attrs_used)) - { - if (!first) - appendStringInfoString(buf, ", "); - else if (is_returning) - appendStringInfoString(buf, " RETURNING "); - first = false; - - if (qualify_col) - ADD_REL_QUALIFIER(buf, rtindex); - appendStringInfoString(buf, "ctid"); - - *retrieved_attrs = lappend_int(*retrieved_attrs, SelfItemPointerAttributeNumber); - } - /* Don't generate bad syntax if no undropped columns */ - if (first && !is_returning) - appendStringInfoString(buf, "NULL"); -} - -/* - * Deparse the appropriate locking clause (FOR UPDATE or FOR SHARE) for a - * given relation (context->scanrel). - */ -static void -deparseLockingClause(deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - PlannerInfo *root = context->root; - RelOptInfo *rel = context->scanrel; - TsFdwRelInfo *fpinfo = fdw_relinfo_get(rel); - int relid = -1; - - while ((relid = bms_next_member(rel->relids, relid)) >= 0) - { - /* - * Ignore relation if it appears in a lower subquery. Locking clause - * for such a relation is included in the subquery if necessary. - */ - if (bms_is_member(relid, fpinfo->lower_subquery_rels)) - continue; - - /* - * Add FOR UPDATE/SHARE if appropriate. We apply locking during the - * initial row fetch, rather than later on as is done for local - * tables. The extra roundtrips involved in trying to duplicate the - * local semantics exactly don't seem worthwhile (see also comments - * for RowMarkType). - * - * Note: because we actually run the query as a cursor, this assumes - * that DECLARE CURSOR ... FOR UPDATE is supported, which it isn't - * before 8.3. - */ - if (relid == root->parse->resultRelation && - (root->parse->commandType == CMD_UPDATE || root->parse->commandType == CMD_DELETE)) - { - /* Relation is UPDATE/DELETE target, so use FOR UPDATE */ - appendStringInfoString(buf, " FOR UPDATE"); - - /* Add the relation alias if we are here for a join relation */ - if (IS_JOIN_REL(rel)) - appendStringInfo(buf, " OF %s%d", REL_ALIAS_PREFIX, relid); - } - else - { - PlanRowMark *rc = get_plan_rowmark(root->rowMarks, relid); - - if (rc) - { - /* - * Relation is specified as a FOR UPDATE/SHARE target, so - * handle that. (But we could also see LCS_NONE, meaning this - * isn't a target relation after all.) - * - * For now, just ignore any [NO] KEY specification, since (a) - * it's not clear what that means for a remote table that we - * don't have complete information about, and (b) it wouldn't - * work anyway on older data nodes. Likewise, we don't - * worry about NOWAIT. - */ - switch (rc->strength) - { - case LCS_NONE: - /* No locking needed */ - break; - case LCS_FORKEYSHARE: - case LCS_FORSHARE: - appendStringInfoString(buf, " FOR SHARE"); - break; - case LCS_FORNOKEYUPDATE: - case LCS_FORUPDATE: - appendStringInfoString(buf, " FOR UPDATE"); - break; - } - - /* Add the relation alias if we are here for a join relation */ - if (bms_membership(rel->relids) == BMS_MULTIPLE && rc->strength != LCS_NONE) - appendStringInfo(buf, " OF %s%d", REL_ALIAS_PREFIX, relid); - } - } - } -} - -static void -append_chunk_exclusion_condition(deparse_expr_cxt *context, bool use_alias) -{ - StringInfo buf = context->buf; - DataNodeChunkAssignment *sca = context->sca; - RelOptInfo *scanrel = context->scanrel; - ListCell *lc; - bool first = true; - - appendStringInfoString(buf, FUNCTIONS_SCHEMA_NAME "." CHUNK_EXCL_FUNC_NAME "("); - - if (use_alias) - appendStringInfo(buf, "%s%d, ", REL_ALIAS_PREFIX, scanrel->relid); - else - { - /* use a qualfied relation name */ - RangeTblEntry *rte = planner_rt_fetch(scanrel->relid, context->root); - Relation rel = table_open(rte->relid, NoLock); - deparseRelation(buf, rel); - table_close(rel, NoLock); - /* We explicitly append expand operator `.*` to prevent - * confusing parser when using qualified name (otherwise parser believes that schema name is - * relation name) */ - appendStringInfoString(buf, ".*, "); - } - - appendStringInfo(buf, "ARRAY["); - foreach (lc, sca->remote_chunk_ids) - { - int remote_chunk_id = lfirst_int(lc); - - if (!first) - appendStringInfo(buf, ", "); - appendStringInfo(buf, "%d", remote_chunk_id); - - first = false; - } - appendStringInfo(buf, "])"); /* end array and function call */ -} - -/* - * Deparse conditions from the provided list and append them to buf. - * - * The conditions in the list are assumed to be ANDed. This function is used to - * deparse WHERE clauses, JOIN .. ON clauses and HAVING clauses. - * - * Depending on the caller, the list elements might be either RestrictInfos - * or bare clauses. - */ -static void -appendConditions(List *exprs, deparse_expr_cxt *context, bool is_first) -{ - int nestlevel; - ListCell *lc; - StringInfo buf = context->buf; - - /* Make sure any constants in the exprs are printed portably */ - nestlevel = set_transmission_modes(); - - foreach (lc, exprs) - { - Expr *expr = (Expr *) lfirst(lc); - - /* Extract clause from RestrictInfo, if required */ - if (IsA(expr, RestrictInfo)) - expr = ((RestrictInfo *) expr)->clause; - - /* Connect expressions with "AND" and parenthesize each condition. */ - if (!is_first) - appendStringInfoString(buf, " AND "); - - appendStringInfoChar(buf, '('); - deparseExpr(expr, context); - appendStringInfoChar(buf, ')'); - - is_first = false; - } - - reset_transmission_modes(nestlevel); -} - -/* - * Deparse given targetlist and append it to context->buf. - * - * tlist is list of TargetEntry's which in turn contain Var nodes. - * - * retrieved_attrs is the list of continuously increasing integers starting - * from 1. It has same number of entries as tlist. - * - * This is used for both SELECT and RETURNING targetlists; the is_returning - * parameter is true only for a RETURNING targetlist. - */ -static void -deparseExplicitTargetList(List *tlist, bool is_returning, List **retrieved_attrs, - deparse_expr_cxt *context) -{ - ListCell *lc; - StringInfo buf = context->buf; - int i = 0; - - *retrieved_attrs = NIL; - - foreach (lc, tlist) - { - TargetEntry *tle = lfirst_node(TargetEntry, lc); - - if (i > 0) - appendStringInfoString(buf, ", "); - else if (is_returning) - appendStringInfoString(buf, " RETURNING "); - - deparseExpr((Expr *) tle->expr, context); - - *retrieved_attrs = lappend_int(*retrieved_attrs, i + 1); - i++; - } - - if (i == 0 && !is_returning) - appendStringInfoString(buf, "NULL"); -} - -/* - * Emit expressions specified in the given relation's reltarget. - * - * This is used for deparsing the given relation as a subquery. - */ -static void -deparseSubqueryTargetList(deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - RelOptInfo *foreignrel = context->foreignrel; - bool first; - ListCell *lc; - - /* Should only be called in these cases. */ - Assert(IS_SIMPLE_REL(foreignrel) || IS_JOIN_REL(foreignrel)); - - first = true; - foreach (lc, foreignrel->reltarget->exprs) - { - Node *node = (Node *) lfirst(lc); - - if (!first) - appendStringInfoString(buf, ", "); - first = false; - - deparseExpr((Expr *) node, context); - } - - /* Don't generate bad syntax if no expressions */ - if (first) - appendStringInfoString(buf, "NULL"); -} -/* Output join name for given join type */ -const char * -get_jointype_name(JoinType jointype) -{ - switch (jointype) - { - case JOIN_INNER: - return "INNER"; - - case JOIN_LEFT: - return "LEFT"; - - case JOIN_RIGHT: - return "RIGHT"; - - case JOIN_FULL: - return "FULL"; - - default: - /* Shouldn't come here, but protect from buggy code. */ - elog(ERROR, "unsupported join type %d", jointype); - } - - /* Keep compiler happy */ - return NULL; -} - -/* - * Construct FROM clause for given relation - * - * The function constructs ... JOIN ... ON ... for join relation. For a base - * relation it just returns schema-qualified tablename, with the appropriate - * alias if so requested. - * - * 'ignore_rel' is either zero or the RT index of a target relation. In the - * latter case the function constructs FROM clause of UPDATE or USING clause - * of DELETE; it deparses the join relation as if the relation never contained - * the target relation, and creates a List of conditions to be deparsed into - * the top-level WHERE clause, which is returned to *ignore_conds. - */ -static void -deparseFromExprForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel, bool use_alias, - Index ignore_rel, List **ignore_conds, List **params_list, - DataNodeChunkAssignment *sca) -{ - TsFdwRelInfo *fpinfo = fdw_relinfo_get(foreignrel); - - if (IS_JOIN_REL(foreignrel)) - { - StringInfoData join_sql_o; - StringInfoData join_sql_i; - RelOptInfo *outerrel = fpinfo->outerrel; - RelOptInfo *innerrel = fpinfo->innerrel; - bool outerrel_is_target = false; - bool innerrel_is_target = false; - - if (ignore_rel > 0 && bms_is_member(ignore_rel, foreignrel->relids)) - { - /* - * If this is an inner join, add joinclauses to *ignore_conds and - * set it to empty so that those can be deparsed into the WHERE - * clause. Note that since the target relation can never be - * within the nullable side of an outer join, those could safely - * be pulled up into the WHERE clause (see foreign_join_ok()). - * Note also that since the target relation is only inner-joined - * to any other relation in the query, all conditions in the join - * tree mentioning the target relation could be deparsed into the - * WHERE clause by doing this recursively. - */ - if (fpinfo->jointype == JOIN_INNER) - { - *ignore_conds = list_concat(*ignore_conds, fpinfo->joinclauses); - fpinfo->joinclauses = NIL; - } - - /* - * Check if either of the input relations is the target relation. - */ - if (outerrel->relid == ignore_rel) - outerrel_is_target = true; - else if (innerrel->relid == ignore_rel) - innerrel_is_target = true; - } - - /* Deparse outer relation if not the target relation. */ - if (!outerrel_is_target) - { - initStringInfo(&join_sql_o); - deparseRangeTblRef(&join_sql_o, - root, - outerrel, - fpinfo->make_outerrel_subquery, - ignore_rel, - ignore_conds, - params_list, - sca); - - /* - * If inner relation is the target relation, skip deparsing it. - * Note that since the join of the target relation with any other - * relation in the query is an inner join and can never be within - * the nullable side of an outer join, the join could be - * interchanged with higher-level joins (cf. identity 1 on outer - * join reordering shown in src/backend/optimizer/README), which - * means it's safe to skip the target-relation deparsing here. - */ - if (innerrel_is_target) - { - Assert(fpinfo->jointype == JOIN_INNER); - Assert(fpinfo->joinclauses == NIL); - appendBinaryStringInfo(buf, join_sql_o.data, join_sql_o.len); - return; - } - } - - /* Deparse inner relation if not the target relation. */ - if (!innerrel_is_target) - { - initStringInfo(&join_sql_i); - deparseRangeTblRef(&join_sql_i, - root, - innerrel, - fpinfo->make_innerrel_subquery, - ignore_rel, - ignore_conds, - params_list, - sca); - - /* - * If outer relation is the target relation, skip deparsing it. - * See the above note about safety. - */ - if (outerrel_is_target) - { - Assert(fpinfo->jointype == JOIN_INNER); - Assert(fpinfo->joinclauses == NIL); - appendBinaryStringInfo(buf, join_sql_i.data, join_sql_i.len); - return; - } - } - - /* Neither of the relations is the target relation. */ - Assert(!outerrel_is_target && !innerrel_is_target); - - /* - * For a join relation FROM clause entry is deparsed as - * - * ((outer relation) (inner relation) ON (joinclauses)) - */ - appendStringInfo(buf, - "(%s %s JOIN %s ON ", - join_sql_o.data, - get_jointype_name(fpinfo->jointype), - join_sql_i.data); - - /* Append join clause; (TRUE) if no join clause */ - if (fpinfo->joinclauses) - { - deparse_expr_cxt context; - - context.buf = buf; - context.foreignrel = foreignrel; - context.scanrel = foreignrel; - context.root = root; - context.params_list = params_list; - - appendStringInfoChar(buf, '('); - appendConditions(fpinfo->joinclauses, &context, true); - appendStringInfoChar(buf, ')'); - } - else - appendStringInfoString(buf, "(TRUE)"); - - /* End the FROM clause entry. */ - appendStringInfoChar(buf, ')'); - } - else - { - RangeTblEntry *rte = planner_rt_fetch(foreignrel->relid, root); - - /* - * Core code already has some lock on each rel being planned, so we - * can use NoLock here. - */ - Relation rel = table_open(rte->relid, NoLock); - - deparseRelation(buf, rel); - - /* - * Add a unique alias to avoid any conflict in relation names due to - * pulled up subqueries in the query being built for a pushed down - * join. - */ - if (use_alias) - appendStringInfo(buf, " %s%d", REL_ALIAS_PREFIX, foreignrel->relid); - - table_close(rel, NoLock); - } -} - -/* - * Append FROM clause entry for the given relation into buf. - */ -static void -deparseRangeTblRef(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel, bool make_subquery, - Index ignore_rel, List **ignore_conds, List **params_list, - DataNodeChunkAssignment *sca) -{ - TsFdwRelInfo *fpinfo = fdw_relinfo_get(foreignrel); - - /* Should only be called in these cases. */ - Assert(IS_SIMPLE_REL(foreignrel) || IS_JOIN_REL(foreignrel)); - - Assert(fpinfo->local_conds == NIL); - - /* If make_subquery is true, deparse the relation as a subquery. */ - if (make_subquery) - { - List *retrieved_attrs; - int ncols; - - /* - * The given relation shouldn't contain the target relation, because - * this should only happen for input relations for a full join, and - * such relations can never contain an UPDATE/DELETE target. - */ - Assert(ignore_rel == 0 || !bms_is_member(ignore_rel, foreignrel->relids)); - - /* Deparse the subquery representing the relation. */ - appendStringInfoChar(buf, '('); - deparseSelectStmtForRel(buf, - root, - foreignrel, - NIL, - fpinfo->remote_conds, - NIL /* remote_having */, - NIL /* pathkeys */, - true /* is_subquery */, - &retrieved_attrs, - params_list, - sca); - appendStringInfoChar(buf, ')'); - - /* Append the relation alias. */ - appendStringInfo(buf, " %s%d", SUBQUERY_REL_ALIAS_PREFIX, fpinfo->relation_index); - - /* - * Append the column aliases if needed. Note that the subquery emits - * expressions specified in the relation's reltarget (see - * deparseSubqueryTargetList). - */ - ncols = list_length(foreignrel->reltarget->exprs); - if (ncols > 0) - { - int i; - - appendStringInfoChar(buf, '('); - for (i = 1; i <= ncols; i++) - { - if (i > 1) - appendStringInfoString(buf, ", "); - - appendStringInfo(buf, "%s%d", SUBQUERY_COL_ALIAS_PREFIX, i); - } - appendStringInfoChar(buf, ')'); - } - } - else - deparseFromExprForRel(buf, - root, - foreignrel, - true, - ignore_rel, - ignore_conds, - params_list, - sca); -} - -/* - * deparse remote INSERT statement - * - * The statement text is appended to buf, and we also create an integer List - * of the columns being retrieved by RETURNING (if any), which is returned - * to *retrieved_attrs. - */ -void -deparse_insert_stmt(DeparsedInsertStmt *stmt, RangeTblEntry *rte, Index rtindex, Relation rel, - List *target_attrs, bool do_nothing, List *returning_list) -{ - bool first; - ListCell *lc; - StringInfoData buf; - - memset(stmt, 0, sizeof(DeparsedInsertStmt)); - initStringInfo(&buf); - - appendStringInfoString(&buf, "INSERT INTO "); - deparseRelation(&buf, rel); - - stmt->target = buf.data; - stmt->num_target_attrs = list_length(target_attrs); - - initStringInfo(&buf); - - if (target_attrs != NIL) - { - appendStringInfoChar(&buf, '('); - - first = true; - foreach (lc, target_attrs) - { - int attnum = lfirst_int(lc); - - if (!first) - appendStringInfoString(&buf, ", "); - first = false; - - deparseColumnRef(&buf, rtindex, attnum, rte, false); - } - - appendStringInfoString(&buf, ") VALUES "); - - stmt->target_attrs = buf.data; - - initStringInfo(&buf); - } - - stmt->do_nothing = do_nothing; - - deparseReturningList(&buf, - rte, - rtindex, - rel, - rel->trigdesc && rel->trigdesc->trig_insert_after_row, - returning_list, - &stmt->retrieved_attrs); - - if (stmt->retrieved_attrs == NIL) - stmt->returning = NULL; - else - stmt->returning = buf.data; -} - -static int -append_values_params(DeparsedInsertStmt *stmt, StringInfo buf, int pindex) -{ - bool first = true; - - appendStringInfoChar(buf, '('); - - for (unsigned int i = 0; i < stmt->num_target_attrs; i++) - { - if (!first) - appendStringInfoString(buf, ", "); - else - first = false; - - appendStringInfo(buf, "$%d", pindex); - pindex++; - } - - appendStringInfoChar(buf, ')'); - - return pindex; -} - -static const char * -deparsed_insert_stmt_get_sql_internal(DeparsedInsertStmt *stmt, StringInfo buf, int64 num_rows, - bool abbrev) -{ - appendStringInfoString(buf, stmt->target); - - if (stmt->num_target_attrs > 0) - { - appendStringInfoString(buf, stmt->target_attrs); - - if (abbrev) - { - append_values_params(stmt, buf, 1); - - if (num_rows > 1) - { - appendStringInfo(buf, ", ..., "); - append_values_params(stmt, - buf, - (stmt->num_target_attrs * num_rows) - stmt->num_target_attrs + - 1); - } - } - else - { - int pindex = 1; - int64 i; - - for (i = 0; i < num_rows; i++) - { - pindex = append_values_params(stmt, buf, pindex); - - if (i < (num_rows - 1)) - appendStringInfoString(buf, ", "); - } - } - } - else - appendStringInfoString(buf, " DEFAULT VALUES"); - - if (stmt->do_nothing) - appendStringInfoString(buf, " ON CONFLICT DO NOTHING"); - - if (NULL != stmt->returning) - appendStringInfoString(buf, stmt->returning); - - return buf->data; -} - -const char * -deparsed_insert_stmt_get_sql(DeparsedInsertStmt *stmt, int64 num_rows) -{ - StringInfoData buf; - - initStringInfo(&buf); - - return deparsed_insert_stmt_get_sql_internal(stmt, &buf, num_rows, false); -} - -const char * -deparsed_insert_stmt_get_sql_explain(DeparsedInsertStmt *stmt, int64 num_rows) -{ - StringInfoData buf; - - initStringInfo(&buf); - - return deparsed_insert_stmt_get_sql_internal(stmt, &buf, num_rows, true); -} - -enum DeparsedInsertStmtIndex -{ - DeparsedInsertStmtTarget, - DeparsedInsertStmtNumTargetAttrs, - DeparsedInsertStmtTargetAttrs, - DeparsedInsertStmtDoNothing, - DeparsedInsertStmtRetrievedAttrs, - DeparsedInsertStmtReturning, -}; - -List * -deparsed_insert_stmt_to_list(DeparsedInsertStmt *stmt) -{ - List *stmt_list = - list_make5(makeString(pstrdup(stmt->target)), - makeInteger(stmt->num_target_attrs), - makeString(stmt->target_attrs != NULL ? pstrdup(stmt->target_attrs) : ""), - makeInteger(stmt->do_nothing ? 1 : 0), - stmt->retrieved_attrs); - - if (NULL != stmt->returning) - stmt_list = lappend(stmt_list, makeString(pstrdup(stmt->returning))); - - return stmt_list; -} - -void -deparsed_insert_stmt_from_list(DeparsedInsertStmt *stmt, List *list_stmt) -{ - stmt->target = strVal(list_nth(list_stmt, DeparsedInsertStmtTarget)); - stmt->num_target_attrs = intVal(list_nth(list_stmt, DeparsedInsertStmtNumTargetAttrs)); - stmt->target_attrs = (stmt->num_target_attrs > 0) ? - strVal(list_nth(list_stmt, DeparsedInsertStmtTargetAttrs)) : - NULL; - stmt->do_nothing = intVal(list_nth(list_stmt, DeparsedInsertStmtDoNothing)); - stmt->retrieved_attrs = list_nth(list_stmt, DeparsedInsertStmtRetrievedAttrs); - - if (list_length(list_stmt) > DeparsedInsertStmtReturning) - { - Assert(stmt->retrieved_attrs != NIL); - stmt->returning = strVal(list_nth(list_stmt, DeparsedInsertStmtReturning)); - } - else - stmt->returning = NULL; -} - -void -deparseInsertSql(StringInfo buf, RangeTblEntry *rte, Index rtindex, Relation rel, List *targetAttrs, - int64 num_rows, bool doNothing, List *returningList, List **retrieved_attrs) -{ - DeparsedInsertStmt stmt; - - deparse_insert_stmt(&stmt, rte, rtindex, rel, targetAttrs, doNothing, returningList); - - deparsed_insert_stmt_get_sql_internal(&stmt, buf, num_rows, false); - - if (NULL != retrieved_attrs) - *retrieved_attrs = stmt.retrieved_attrs; -} - -/* - * deparse remote UPDATE statement - * - * The statement text is appended to buf, and we also create an integer List - * of the columns being retrieved by RETURNING (if any), which is returned - * to *retrieved_attrs. - */ -void -deparseUpdateSql(StringInfo buf, RangeTblEntry *rte, Index rtindex, Relation rel, List *targetAttrs, - List *returningList, List **retrieved_attrs) -{ - AttrNumber pindex; - bool first; - ListCell *lc; - - appendStringInfoString(buf, "UPDATE "); - deparseRelation(buf, rel); - appendStringInfoString(buf, " SET "); - - pindex = 2; /* ctid is always the first param */ - first = true; - foreach (lc, targetAttrs) - { - int attnum = lfirst_int(lc); - - if (!first) - appendStringInfoString(buf, ", "); - first = false; - - deparseColumnRef(buf, rtindex, attnum, rte, false); - appendStringInfo(buf, " = $%d", pindex); - pindex++; - } - appendStringInfoString(buf, " WHERE ctid = $1"); - - deparseReturningList(buf, - rte, - rtindex, - rel, - rel->trigdesc && rel->trigdesc->trig_update_after_row, - returningList, - retrieved_attrs); -} - -/* - * deparse remote DELETE statement - * - * The statement text is appended to buf, and we also create an integer List - * of the columns being retrieved by RETURNING (if any), which is returned - * to *retrieved_attrs. - */ -void -deparseDeleteSql(StringInfo buf, RangeTblEntry *rte, Index rtindex, Relation rel, - List *returningList, List **retrieved_attrs) -{ - appendStringInfoString(buf, "DELETE FROM "); - deparseRelation(buf, rel); - appendStringInfoString(buf, " WHERE ctid = $1"); - - deparseReturningList(buf, - rte, - rtindex, - rel, - rel->trigdesc && rel->trigdesc->trig_delete_after_row, - returningList, - retrieved_attrs); -} - -/* - * Add a RETURNING clause, if needed, to an INSERT/UPDATE/DELETE. - */ -static void -deparseReturningList(StringInfo buf, RangeTblEntry *rte, Index rtindex, Relation rel, - bool trig_after_row, List *returningList, List **retrieved_attrs) -{ - Bitmapset *attrs_used = NULL; - - /* We currently do not handle triggers (trig_after_row == TRUE) given that - * we know these triggers should exist also on data nodes and - * can/should be executed there. The only reason to handle triggers on the - * frontend is to (1) more efficiently handle BEFORE triggers (executing - * them on the frontend before sending tuples), or (2) have triggers that - * do not exist on data nodes. - * - * Note that, for a hypertable, trig_after_row is always true because of - * the insert blocker trigger. - */ - - if (returningList != NIL) - { - /* - * We need the attrs, non-system and system, mentioned in the local - * query's RETURNING list. - */ - pull_varattnos((Node *) returningList, rtindex, &attrs_used); - } - - if (attrs_used != NULL) - deparseTargetList(buf, rte, rtindex, rel, true, attrs_used, false, retrieved_attrs); - else - *retrieved_attrs = NIL; -} - -/* - * Construct SELECT statement to acquire size in blocks of given relation. - * - * Note: we use local definition of block size, not remote definition. - * This is perhaps debatable. - * - * Note: pg_relation_size() exists in 8.1 and later. - */ -void -deparseAnalyzeSizeSql(StringInfo buf, Relation rel) -{ - StringInfoData relname; - - /* We'll need the remote relation name as a literal. */ - initStringInfo(&relname); - deparseRelation(&relname, rel); - - appendStringInfoString(buf, "SELECT pg_catalog.pg_relation_size("); - deparseStringLiteral(buf, relname.data); - appendStringInfo(buf, "::pg_catalog.regclass) / %d", BLCKSZ); -} - -/* - * Construct SELECT statement to acquire sample rows of given relation. - * - * SELECT command is appended to buf, and list of columns retrieved - * is returned to *retrieved_attrs. - */ -void -deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs) -{ - Oid relid = RelationGetRelid(rel); - TupleDesc tupdesc = RelationGetDescr(rel); - int i; - char *colname; - List *options; - ListCell *lc; - bool first = true; - - *retrieved_attrs = NIL; - - appendStringInfoString(buf, "SELECT "); - for (i = 0; i < tupdesc->natts; i++) - { - /* Ignore dropped columns. */ - if (TupleDescAttr(tupdesc, i)->attisdropped) - continue; - - if (!first) - appendStringInfoString(buf, ", "); - first = false; - - /* Use attribute name or column_name option. */ - colname = NameStr(TupleDescAttr(tupdesc, i)->attname); - options = GetForeignColumnOptions(relid, i + 1); - - foreach (lc, options) - { - DefElem *def = (DefElem *) lfirst(lc); - - if (strcmp(def->defname, "column_name") == 0) - { - colname = defGetString(def); - break; - } - } - - appendStringInfoString(buf, quote_identifier(colname)); - - *retrieved_attrs = lappend_int(*retrieved_attrs, i + 1); - } - - /* Don't generate bad syntax for zero-column relation. */ - if (first) - appendStringInfoString(buf, "NULL"); - - /* - * Construct FROM clause - */ - appendStringInfoString(buf, " FROM "); - deparseRelation(buf, rel); -} - -/* - * Construct name to use for given column, and emit it into buf. - * If it has a column_name FDW option, use that instead of attribute name. - * - * If qualify_col is true, qualify column name with the alias of relation. - */ -static void -deparseColumnRef(StringInfo buf, int varno, int varattno, RangeTblEntry *rte, bool qualify_col) -{ - /* We support fetching the remote side's CTID and OID. */ - if (varattno == SelfItemPointerAttributeNumber) - { - if (qualify_col) - ADD_REL_QUALIFIER(buf, varno); - appendStringInfoString(buf, "ctid"); - } - else if (varattno < 0) - { - /* - * All other system attributes are fetched as 0, except for table OID, - * which is fetched as the local table OID. However, we must be - * careful; the table could be beneath an outer join, in which case it - * must go to NULL whenever the rest of the row does. - */ - Oid fetchval = 0; - - if (varattno == TableOidAttributeNumber) - fetchval = rte->relid; - - if (qualify_col) - { - appendStringInfoString(buf, "CASE WHEN ("); - ADD_REL_QUALIFIER(buf, varno); - appendStringInfo(buf, "*)::text IS NOT NULL THEN %u END", fetchval); - } - else - appendStringInfo(buf, "%u", fetchval); - } - else if (varattno == 0) - { - /* Whole row reference */ - Relation rel; - Bitmapset *attrs_used; - - /* Required only to be passed down to deparseTargetList(). */ - List *retrieved_attrs; - - /* - * The lock on the relation will be held by upper callers, so it's - * fine to open it with no lock here. - */ - rel = table_open(rte->relid, NoLock); - - /* - * The local name of the foreign table can not be recognized by the - * data node and the table it references on data node might - * have different column ordering or different columns than those - * declared locally. Hence we have to deparse whole-row reference as - * ROW(columns referenced locally). Construct this by deparsing a - * "whole row" attribute. - */ - attrs_used = bms_add_member(NULL, 0 - FirstLowInvalidHeapAttributeNumber); - - /* - * In case the whole-row reference is under an outer join then it has - * to go NULL whenever the rest of the row goes NULL. Deparsing a join - * query would always involve multiple relations, thus qualify_col - * would be true. - */ - if (qualify_col) - { - appendStringInfoString(buf, "CASE WHEN ("); - ADD_REL_QUALIFIER(buf, varno); - appendStringInfoString(buf, "*)::text IS NOT NULL THEN "); - } - - appendStringInfoString(buf, "ROW("); - deparseTargetList(buf, rte, varno, rel, false, attrs_used, qualify_col, &retrieved_attrs); - appendStringInfoChar(buf, ')'); - - /* Complete the CASE WHEN statement started above. */ - if (qualify_col) - appendStringInfoString(buf, " END"); - - table_close(rel, NoLock); - bms_free(attrs_used); - } - else - { - char *colname = NULL; - List *options; - ListCell *lc; - - /* varno must not be any of OUTER_VAR, INNER_VAR and INDEX_VAR. */ - Assert(!IS_SPECIAL_VARNO(varno)); - - /* - * If it's a column of a foreign table, and it has the column_name FDW - * option, use that value. - */ - options = GetForeignColumnOptions(rte->relid, varattno); - foreach (lc, options) - { - DefElem *def = (DefElem *) lfirst(lc); - - if (strcmp(def->defname, "column_name") == 0) - { - colname = defGetString(def); - break; - } - } - - /* - * If it's a column of a regular table or it doesn't have column_name - * FDW option, use attribute name. - */ - if (colname == NULL) - colname = get_attname(rte->relid, varattno, false); - - if (qualify_col) - ADD_REL_QUALIFIER(buf, varno); - - appendStringInfoString(buf, quote_identifier(colname)); - } -} - -/* - * Append name of table being queried. - * - * Note, we enforce that table names are the same across nodes. - */ -static void -deparseRelation(StringInfo buf, Relation rel) -{ - const char *nspname; - const char *relname; - - Assert(rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE || - rel->rd_rel->relkind == RELKIND_RELATION); - - nspname = get_namespace_name(RelationGetNamespace(rel)); - relname = RelationGetRelationName(rel); - - appendStringInfo(buf, "%s.%s", quote_identifier(nspname), quote_identifier(relname)); -} - -/* - * Append a SQL string literal representing "val" to buf. - */ -void -deparseStringLiteral(StringInfo buf, const char *val) -{ - const char *valptr; - - /* - * Rather than making assumptions about the data node's value of - * standard_conforming_strings, always use E'foo' syntax if there are any - * backslashes. This will fail on data nodes before 8.1, but those - * are long out of support. - */ - if (strchr(val, '\\') != NULL) - appendStringInfoChar(buf, ESCAPE_STRING_SYNTAX); - appendStringInfoChar(buf, '\''); - for (valptr = val; *valptr; valptr++) - { - char ch = *valptr; - - if (SQL_STR_DOUBLE(ch, true)) - appendStringInfoChar(buf, ch); - appendStringInfoChar(buf, ch); - } - appendStringInfoChar(buf, '\''); -} - -/* - * Deparse given expression into context->buf. - * - * This function must support all the same node types that foreign_expr_walker - * accepts. - * - * Note: unlike ruleutils.c, we just use a simple hard-wired parenthesization - * scheme: anything more complex than a Var, Const, function call or cast - * should be self-parenthesized. - */ -static void -deparseExpr(Expr *node, deparse_expr_cxt *context) -{ - if (node == NULL) - return; - - switch (nodeTag(node)) - { - case T_Var: - deparseVar(castNode(Var, node), context); - break; - case T_Const: - deparseConst(castNode(Const, node), context, 0); - break; - case T_Param: - deparseParam(castNode(Param, node), context); - break; - case T_SubscriptingRef: - deparseSubscriptingRef(castNode(SubscriptingRef, node), context); - break; - case T_FuncExpr: - deparseFuncExpr(castNode(FuncExpr, node), context); - break; - case T_OpExpr: - deparseOpExpr(castNode(OpExpr, node), context); - break; - case T_DistinctExpr: - deparseDistinctExpr(castNode(DistinctExpr, node), context); - break; - case T_ScalarArrayOpExpr: - deparseScalarArrayOpExpr(castNode(ScalarArrayOpExpr, node), context); - break; - case T_RelabelType: - deparseRelabelType(castNode(RelabelType, node), context); - break; - case T_BoolExpr: - deparseBoolExpr(castNode(BoolExpr, node), context); - break; - case T_NullTest: - deparseNullTest(castNode(NullTest, node), context); - break; - case T_ArrayExpr: - deparseArrayExpr(castNode(ArrayExpr, node), context); - break; - case T_Aggref: - deparseAggref(castNode(Aggref, node), context); - break; - default: - elog(ERROR, "unsupported expression type for deparse: %d", (int) nodeTag(node)); - break; - } -} - -/* - * Is it requeired to qualify the column name (e.g., multiple - * tables are part of the query). - */ -static bool -column_qualification_needed(deparse_expr_cxt *context) -{ - Relids relids = context->scanrel->relids; - - if (bms_membership(relids) == BMS_MULTIPLE) - { - if (IS_JOIN_REL(context->scanrel)) - return true; - else if (context->sca == NULL) - return true; - } - - return false; -} - -/* - * Deparse given Var node into context->buf. - * - * If the Var belongs to the foreign relation, just print its remote name. - * Otherwise, it's effectively a Param (and will in fact be a Param at - * run time). Handle it the same way we handle plain Params --- see - * deparseParam for comments. - */ -static void -deparseVar(Var *node, deparse_expr_cxt *context) -{ - Relids relids = context->scanrel->relids; - int relno; - int colno; - - /* Qualify columns when multiple relations are involved, unless it is a - * per-data node scan or a join. */ - bool qualify_col = column_qualification_needed(context); - - /* - * If the Var belongs to the foreign relation that is deparsed as a - * subquery, use the relation and column alias to the Var provided by the - * subquery, instead of the remote name. - */ - if (is_subquery_var(node, context->scanrel, &relno, &colno)) - { - appendStringInfo(context->buf, - "%s%d.%s%d", - SUBQUERY_REL_ALIAS_PREFIX, - relno, - SUBQUERY_COL_ALIAS_PREFIX, - colno); - return; - } - - if (bms_is_member(node->varno, relids) && node->varlevelsup == 0) - deparseColumnRef(context->buf, - node->varno, - node->varattno, - planner_rt_fetch(node->varno, context->root), - qualify_col); - else - { - /* Treat like a Param */ - if (context->params_list) - { - int pindex = 0; - ListCell *lc; - - /* find its index in params_list */ - foreach (lc, *context->params_list) - { - pindex++; - if (equal(node, (Node *) lfirst(lc))) - break; - } - if (lc == NULL) - { - /* not in list, so add it */ - pindex++; - *context->params_list = lappend(*context->params_list, node); - } - - printRemoteParam(pindex, node->vartype, node->vartypmod, context); - } - else - { - printRemotePlaceholder(node->vartype, node->vartypmod, context); - } - } -} - -/* - * Deparse given constant value into context->buf. - * - * This function has to be kept in sync with ruleutils.c's get_const_expr. - * As for that function, showtype can be -1 to never show "::typename" decoration, - * or +1 to always show it, or 0 to show it only if the constant wouldn't be assumed - * to be the right type by default. - */ -static void -deparseConst(Const *node, deparse_expr_cxt *context, int showtype) -{ - StringInfo buf = context->buf; - Oid typoutput; - bool typIsVarlena; - char *extval; - bool isfloat = false; - bool needlabel; - - if (node->constisnull) - { - appendStringInfoString(buf, "NULL"); - if (showtype >= 0) - appendStringInfo(buf, "::%s", deparse_type_name(node->consttype, node->consttypmod)); - return; - } - - getTypeOutputInfo(node->consttype, &typoutput, &typIsVarlena); - extval = OidOutputFunctionCall(typoutput, node->constvalue); - - switch (node->consttype) - { - case INT2OID: - case INT4OID: - case INT8OID: - case OIDOID: - case FLOAT4OID: - case FLOAT8OID: - case NUMERICOID: - { - /* - * No need to quote unless it's a special value such as 'NaN'. - * See comments in get_const_expr(). - */ - if (strspn(extval, "0123456789+-eE.") == strlen(extval)) - { - if (extval[0] == '+' || extval[0] == '-') - appendStringInfo(buf, "(%s)", extval); - else - appendStringInfoString(buf, extval); - if (strcspn(extval, "eE.") != strlen(extval)) - isfloat = true; /* it looks like a float */ - } - else - appendStringInfo(buf, "'%s'", extval); - } - break; - case BITOID: - case VARBITOID: - appendStringInfo(buf, "B'%s'", extval); - break; - case BOOLOID: - if (strcmp(extval, "t") == 0) - appendStringInfoString(buf, "true"); - else - appendStringInfoString(buf, "false"); - break; - default: - deparseStringLiteral(buf, extval); - break; - } - - pfree(extval); - - if (showtype < 0) - return; - - /* - * For showtype == 0, append ::typename unless the constant will be - * implicitly typed as the right type when it is read in. - * - * XXX this code has to be kept in sync with the behavior of the parser, - * especially make_const. - */ - switch (node->consttype) - { - case BOOLOID: - case INT4OID: - case UNKNOWNOID: - needlabel = false; - break; - case NUMERICOID: - needlabel = !isfloat || (node->consttypmod >= 0); - break; - default: - needlabel = true; - break; - } - if (needlabel || showtype > 0) - appendStringInfo(buf, "::%s", deparse_type_name(node->consttype, node->consttypmod)); -} - -/* - * Deparse given Param node. - * - * If we're generating the query "for real", add the Param to - * context->params_list if it's not already present, and then use its index - * in that list as the remote parameter number. During EXPLAIN, there's - * no need to identify a parameter number. - */ -static void -deparseParam(Param *node, deparse_expr_cxt *context) -{ - if (context->params_list) - { - int pindex = 0; - ListCell *lc; - - /* find its index in params_list */ - foreach (lc, *context->params_list) - { - pindex++; - if (equal(node, (Node *) lfirst(lc))) - break; - } - if (lc == NULL) - { - /* not in list, so add it */ - pindex++; - *context->params_list = lappend(*context->params_list, node); - } - - printRemoteParam(pindex, node->paramtype, node->paramtypmod, context); - } - else - { - printRemotePlaceholder(node->paramtype, node->paramtypmod, context); - } -} - -/* - * Deparse a subscripting expression. - */ -static void -deparseSubscriptingRef(SubscriptingRef *node, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - ListCell *lowlist_item; - ListCell *uplist_item; - - /* Always parenthesize the expression. */ - appendStringInfoChar(buf, '('); - - /* - * Deparse referenced container expression first. If that expression includes - * a cast, we have to parenthesize to prevent the array subscript from - * being taken as typename decoration. We can avoid that in the typical - * case of subscripting a Var, but otherwise do it. - */ - if (IsA(node->refexpr, Var)) - deparseExpr(node->refexpr, context); - else - { - appendStringInfoChar(buf, '('); - deparseExpr(node->refexpr, context); - appendStringInfoChar(buf, ')'); - } - - /* Deparse subscript expressions. */ - lowlist_item = list_head(node->reflowerindexpr); /* could be NULL */ - foreach (uplist_item, node->refupperindexpr) - { - appendStringInfoChar(buf, '['); - if (lowlist_item) - { - deparseExpr(lfirst(lowlist_item), context); - appendStringInfoChar(buf, ':'); - lowlist_item = lnext(node->reflowerindexpr, lowlist_item); - } - deparseExpr(lfirst(uplist_item), context); - appendStringInfoChar(buf, ']'); - } - - appendStringInfoChar(buf, ')'); -} - -/* - * Deparse a function call. - */ -static void -deparseFuncExpr(FuncExpr *node, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - bool use_variadic; - bool first; - ListCell *arg; - - /* - * If the function call came from an implicit coercion, then just show the - * first argument. - */ - if (node->funcformat == COERCE_IMPLICIT_CAST) - { - deparseExpr((Expr *) linitial(node->args), context); - return; - } - - /* - * If the function call came from a cast, then show the first argument - * plus an explicit cast operation. - */ - if (node->funcformat == COERCE_EXPLICIT_CAST) - { - Oid rettype = node->funcresulttype; - int32 coercedTypmod; - - /* Get the typmod if this is a length-coercion function */ - (void) exprIsLengthCoercion((Node *) node, &coercedTypmod); - - deparseExpr((Expr *) linitial(node->args), context); - appendStringInfo(buf, "::%s", deparse_type_name(rettype, coercedTypmod)); - return; - } - - /* Check if need to print VARIADIC (cf. ruleutils.c) */ - use_variadic = node->funcvariadic; - - /* - * Normal function: display as proname(args). - */ - appendFunctionName(node->funcid, context); - appendStringInfoChar(buf, '('); - - /* ... and all the arguments */ - first = true; - foreach (arg, node->args) - { - if (!first) - appendStringInfoString(buf, ", "); - if (use_variadic && lnext(node->args, arg) == NULL) - appendStringInfoString(buf, "VARIADIC "); - deparseExpr((Expr *) lfirst(arg), context); - first = false; - } - appendStringInfoChar(buf, ')'); -} - -/* - * Deparse given operator expression. To avoid problems around - * priority of operations, we always parenthesize the arguments. - */ -static void -deparseOpExpr(OpExpr *node, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - HeapTuple tuple; - Form_pg_operator form; - char oprkind; - ListCell *arg; - - /* Retrieve information about the operator from system catalog. */ - tuple = SearchSysCache1(OPEROID, ObjectIdGetDatum(node->opno)); - if (!HeapTupleIsValid(tuple)) - elog(ERROR, "cache lookup failed for operator %u", node->opno); - form = (Form_pg_operator) GETSTRUCT(tuple); - oprkind = form->oprkind; - - /* Sanity check. */ - Assert((oprkind == 'r' && list_length(node->args) == 1) || - (oprkind == 'l' && list_length(node->args) == 1) || - (oprkind == 'b' && list_length(node->args) == 2)); - - /* Always parenthesize the expression. */ - appendStringInfoChar(buf, '('); - - /* Deparse left operand. */ - if (oprkind == 'r' || oprkind == 'b') - { - arg = list_head(node->args); - deparseExpr(lfirst(arg), context); - appendStringInfoChar(buf, ' '); - } - - /* Deparse operator name. */ - deparseOperatorName(buf, form); - - /* Deparse right operand. */ - if (oprkind == 'l' || oprkind == 'b') - { - arg = list_tail(node->args); - appendStringInfoChar(buf, ' '); - deparseExpr(lfirst(arg), context); - } - - appendStringInfoChar(buf, ')'); - - ReleaseSysCache(tuple); -} - -/* - * Print the name of an operator. - */ -static void -deparseOperatorName(StringInfo buf, Form_pg_operator opform) -{ - char *opname; - - /* opname is not a SQL identifier, so we should not quote it. */ - opname = NameStr(opform->oprname); - - /* Print schema name only if it's not pg_catalog */ - if (opform->oprnamespace != PG_CATALOG_NAMESPACE) - { - const char *opnspname; - - opnspname = get_namespace_name(opform->oprnamespace); - /* Print fully qualified operator name. */ - appendStringInfo(buf, "OPERATOR(%s.%s)", quote_identifier(opnspname), opname); - } - else - { - /* Just print operator name. */ - appendStringInfoString(buf, opname); - } -} - -/* - * Deparse IS DISTINCT FROM. - */ -static void -deparseDistinctExpr(DistinctExpr *node, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - - Assert(list_length(node->args) == 2); - - appendStringInfoChar(buf, '('); - deparseExpr(linitial(node->args), context); - appendStringInfoString(buf, " IS DISTINCT FROM "); - deparseExpr(lsecond(node->args), context); - appendStringInfoChar(buf, ')'); -} - -/* - * Deparse given ScalarArrayOpExpr expression. To avoid problems - * around priority of operations, we always parenthesize the arguments. - */ -static void -deparseScalarArrayOpExpr(ScalarArrayOpExpr *node, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - HeapTuple tuple; - Form_pg_operator form; - Expr *arg1; - Expr *arg2; - - /* Retrieve information about the operator from system catalog. */ - tuple = SearchSysCache1(OPEROID, ObjectIdGetDatum(node->opno)); - if (!HeapTupleIsValid(tuple)) - elog(ERROR, "cache lookup failed for operator %u", node->opno); - form = (Form_pg_operator) GETSTRUCT(tuple); - - /* Sanity check. */ - Assert(list_length(node->args) == 2); - - /* Always parenthesize the expression. */ - appendStringInfoChar(buf, '('); - - /* Deparse left operand. */ - arg1 = linitial(node->args); - deparseExpr(arg1, context); - appendStringInfoChar(buf, ' '); - - /* Deparse operator name plus decoration. */ - deparseOperatorName(buf, form); - appendStringInfo(buf, " %s (", node->useOr ? "ANY" : "ALL"); - - /* Deparse right operand. */ - arg2 = lsecond(node->args); - deparseExpr(arg2, context); - - appendStringInfoChar(buf, ')'); - - /* Always parenthesize the expression. */ - appendStringInfoChar(buf, ')'); - - ReleaseSysCache(tuple); -} - -/* - * Deparse a RelabelType (binary-compatible cast) node. - */ -static void -deparseRelabelType(RelabelType *node, deparse_expr_cxt *context) -{ - deparseExpr(node->arg, context); - if (node->relabelformat != COERCE_IMPLICIT_CAST) - appendStringInfo(context->buf, - "::%s", - deparse_type_name(node->resulttype, node->resulttypmod)); -} - -/* - * Deparse a BoolExpr node. - */ -static void -deparseBoolExpr(BoolExpr *node, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - const char *op = NULL; /* keep compiler quiet */ - bool first; - ListCell *lc; - - switch (node->boolop) - { - case AND_EXPR: - op = "AND"; - break; - case OR_EXPR: - op = "OR"; - break; - case NOT_EXPR: - appendStringInfoString(buf, "(NOT "); - deparseExpr(linitial(node->args), context); - appendStringInfoChar(buf, ')'); - return; - } - - appendStringInfoChar(buf, '('); - first = true; - foreach (lc, node->args) - { - if (!first) - appendStringInfo(buf, " %s ", op); - deparseExpr((Expr *) lfirst(lc), context); - first = false; - } - appendStringInfoChar(buf, ')'); -} - -/* - * Deparse IS [NOT] NULL expression. - */ -static void -deparseNullTest(NullTest *node, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - - appendStringInfoChar(buf, '('); - deparseExpr(node->arg, context); - - /* - * For scalar inputs, we prefer to print as IS [NOT] NULL, which is - * shorter and traditional. If it's a rowtype input but we're applying a - * scalar test, must print IS [NOT] DISTINCT FROM NULL to be semantically - * correct. - */ - if (node->argisrow || !type_is_rowtype(exprType((Node *) node->arg))) - { - if (node->nulltesttype == IS_NULL) - appendStringInfoString(buf, " IS NULL)"); - else - appendStringInfoString(buf, " IS NOT NULL)"); - } - else - { - if (node->nulltesttype == IS_NULL) - appendStringInfoString(buf, " IS NOT DISTINCT FROM NULL)"); - else - appendStringInfoString(buf, " IS DISTINCT FROM NULL)"); - } -} - -/* - * Deparse ARRAY[...] construct. - */ -static void -deparseArrayExpr(ArrayExpr *node, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - bool first = true; - ListCell *lc; - - appendStringInfoString(buf, "ARRAY["); - foreach (lc, node->elements) - { - if (!first) - appendStringInfoString(buf, ", "); - deparseExpr(lfirst(lc), context); - first = false; - } - appendStringInfoChar(buf, ']'); - - /* If the array is empty, we need an explicit cast to the array type. */ - if (node->elements == NIL) - appendStringInfo(buf, "::%s", deparse_type_name(node->array_typeid, -1)); -} - -/* - * Deparse an Aggref node. - */ -static void -deparseAggref(Aggref *node, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - bool use_variadic; - bool partial_agg = node->aggsplit != AGGSPLIT_SIMPLE; - Assert(node->aggsplit == AGGSPLIT_SIMPLE || node->aggsplit == AGGSPLIT_INITIAL_SERIAL); - - /* Check if need to print VARIADIC (cf. ruleutils.c) */ - use_variadic = node->aggvariadic; - - /* Find aggregate name from aggfnoid which is a pg_proc entry */ - if (partial_agg) - appendStringInfoString(buf, FUNCTIONS_SCHEMA_NAME "." PARTIALIZE_FUNC_NAME "("); - - appendFunctionName(node->aggfnoid, context); - appendStringInfoChar(buf, '('); - - /* Add DISTINCT */ - appendStringInfoString(buf, (node->aggdistinct != NIL) ? "DISTINCT " : ""); - - if (AGGKIND_IS_ORDERED_SET(node->aggkind)) - { - /* Add WITHIN GROUP (ORDER BY ..) */ - ListCell *arg; - bool first = true; - - Assert(!node->aggvariadic); - Assert(node->aggorder != NIL); - - foreach (arg, node->aggdirectargs) - { - if (!first) - appendStringInfoString(buf, ", "); - first = false; - - deparseExpr((Expr *) lfirst(arg), context); - } - - appendStringInfoString(buf, ") WITHIN GROUP (ORDER BY "); - appendAggOrderBy(node->aggorder, node->args, context); - } - else - { - /* aggstar can be set only in zero-argument aggregates */ - if (node->aggstar) - appendStringInfoChar(buf, '*'); - else - { - ListCell *arg; - bool first = true; - - /* Add all the arguments */ - foreach (arg, node->args) - { - TargetEntry *tle = (TargetEntry *) lfirst(arg); - Node *n = (Node *) tle->expr; - - if (tle->resjunk) - continue; - - if (!first) - appendStringInfoString(buf, ", "); - first = false; - - /* Add VARIADIC */ - if (use_variadic && lnext(node->args, arg) == NULL) - appendStringInfoString(buf, "VARIADIC "); - - deparseExpr((Expr *) n, context); - } - } - - /* Add ORDER BY */ - if (node->aggorder != NIL) - { - appendStringInfoString(buf, " ORDER BY "); - appendAggOrderBy(node->aggorder, node->args, context); - } - } - - /* Add FILTER (WHERE ..) */ - if (node->aggfilter != NULL) - { - appendStringInfoString(buf, ") FILTER (WHERE "); - deparseExpr((Expr *) node->aggfilter, context); - } - - appendStringInfoString(buf, partial_agg ? "))" : ")"); -} - -/* - * Append ORDER BY within aggregate function. - */ -static void -appendAggOrderBy(List *orderList, List *targetList, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - ListCell *lc; - bool first = true; - - foreach (lc, orderList) - { - SortGroupClause *srt = (SortGroupClause *) lfirst(lc); - Node *sortexpr; - Oid sortcoltype; - TypeCacheEntry *typentry; - - if (!first) - appendStringInfoString(buf, ", "); - first = false; - - sortexpr = deparseSortGroupClause(srt->tleSortGroupRef, targetList, false, context); - sortcoltype = exprType(sortexpr); - /* See whether operator is default < or > for datatype */ - typentry = lookup_type_cache(sortcoltype, TYPECACHE_LT_OPR | TYPECACHE_GT_OPR); - if (srt->sortop == typentry->lt_opr) - appendStringInfoString(buf, " ASC"); - else if (srt->sortop == typentry->gt_opr) - appendStringInfoString(buf, " DESC"); - else - { - HeapTuple opertup; - Form_pg_operator operform; - - appendStringInfoString(buf, " USING "); - - /* Append operator name. */ - opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(srt->sortop)); - if (!HeapTupleIsValid(opertup)) - elog(ERROR, "cache lookup failed for operator %u", srt->sortop); - operform = (Form_pg_operator) GETSTRUCT(opertup); - deparseOperatorName(buf, operform); - ReleaseSysCache(opertup); - } - - if (srt->nulls_first) - appendStringInfoString(buf, " NULLS FIRST"); - else - appendStringInfoString(buf, " NULLS LAST"); - } -} - -/* - * Print the representation of a parameter to be sent to the remote side. - * - * Note: we always label the Param's type explicitly rather than relying on - * transmitting a numeric type OID in PQexecParams(). This allows us to - * avoid assuming that types have the same OIDs on the remote side as they - * do locally --- they need only have the same names. - */ -static void -printRemoteParam(int paramindex, Oid paramtype, int32 paramtypmod, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - char *ptypename = deparse_type_name(paramtype, paramtypmod); - - appendStringInfo(buf, "$%d::%s", paramindex, ptypename); -} - -/* - * Print the representation of a placeholder for a parameter that will be - * sent to the remote side at execution time. - * - * This is used when we're just trying to EXPLAIN the remote query. - * We don't have the actual value of the runtime parameter yet, and we don't - * want the remote planner to generate a plan that depends on such a value - * anyway. Thus, we can't do something simple like "$1::paramtype". - * Instead, we emit "((SELECT null::paramtype)::paramtype)". - * In all extant versions of Postgres, the planner will see that as an unknown - * constant value, which is what we want. This might need adjustment if we - * ever make the planner flatten scalar subqueries. Note: the reason for the - * apparently useless outer cast is to ensure that the representation as a - * whole will be parsed as an a_expr and not a select_with_parens; the latter - * would do the wrong thing in the context "x = ANY(...)". - */ -static void -printRemotePlaceholder(Oid paramtype, int32 paramtypmod, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - char *ptypename = deparse_type_name(paramtype, paramtypmod); - - appendStringInfo(buf, "((SELECT null::%s)::%s)", ptypename, ptypename); -} - -/* - * Deparse GROUP BY clause. - */ -static void -appendGroupByClause(List *tlist, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - Query *query = context->root->parse; - ListCell *lc; - bool first = true; - - /* Nothing to be done, if there's no GROUP BY clause in the query. */ - if (!query->groupClause) - return; - - appendStringInfoString(buf, " GROUP BY "); - - /* - * Queries with grouping sets are not pushed down, so we don't expect - * grouping sets here. - */ - Assert(!query->groupingSets); - - foreach (lc, query->groupClause) - { - SortGroupClause *grp = (SortGroupClause *) lfirst(lc); - - if (!first) - appendStringInfoString(buf, ", "); - first = false; - - deparseSortGroupClause(grp->tleSortGroupRef, tlist, true, context); - } -} - -/* - * Deparse ORDER BY clause according to the given pathkeys for given base - * relation. From given pathkeys expressions belonging entirely to the given - * base relation are obtained and deparsed. - */ -static void -appendOrderByClause(List *pathkeys, deparse_expr_cxt *context) -{ - ListCell *lcell; - int nestlevel; - char *delim = " "; - RelOptInfo *baserel = context->scanrel; - StringInfo buf = context->buf; - - /* Make sure any constants in the exprs are printed portably */ - nestlevel = set_transmission_modes(); - - appendStringInfoString(buf, " ORDER BY"); - foreach (lcell, pathkeys) - { - PathKey *pathkey = lfirst(lcell); - Expr *em_expr; - - em_expr = find_em_expr_for_rel(pathkey->pk_eclass, baserel); - Assert(em_expr != NULL); - - appendStringInfoString(buf, delim); - deparseExpr(em_expr, context); - if (pathkey->pk_strategy == BTLessStrategyNumber) - appendStringInfoString(buf, " ASC"); - else - appendStringInfoString(buf, " DESC"); - - if (pathkey->pk_nulls_first) - appendStringInfoString(buf, " NULLS FIRST"); - else - appendStringInfoString(buf, " NULLS LAST"); - - delim = ", "; - } - reset_transmission_modes(nestlevel); -} - -static void -appendLimit(deparse_expr_cxt *context, List *pathkeys) -{ - Query *query = context->root->parse; - - /* Limit is always set to value greater than zero, even for - * the LIMIT 0 case. - * - * We do not explicitly push OFFSET clause, since PostgreSQL - * treats limit_tuples as a sum of original - * LIMIT + OFFSET. - */ - Assert(context->root->limit_tuples >= 1.0); - - /* Do LIMIT deparsing only for supported clauses. - * - * Current implementation does not handle aggregates with LIMIT - * pushdown. It should have different deparsing logic because - * at this point PostgreSQL already excluded LIMIT for the most of - * the incompatible features during group planning: - * distinct, aggs, window functions, group by and having. - * - * See: grouping_planner() backend/optimizer/plan/planner.c - * - * Just make sure this is true. - */ - Assert(!(query->groupClause || query->groupingSets || query->distinctClause || query->hasAggs || - query->hasWindowFuncs || query->hasTargetSRFs || context->root->hasHavingQual)); - - /* JOIN restrict to only one table */ - if (!(list_length(query->jointree->fromlist) == 1 && - IsA(linitial(query->jointree->fromlist), RangeTblRef))) - return; - - /* ORDER BY is used but not pushed down */ - if (pathkeys == NULL && context->root->query_pathkeys) - return; - - /* Use format to round float value */ - appendStringInfo(context->buf, " LIMIT %d", (int) ceil(context->root->limit_tuples)); -} - -/* - * appendFunctionName - * Deparses function name from given function oid. - */ -static void -appendFunctionName(Oid funcid, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - HeapTuple proctup; - Form_pg_proc procform; - const char *proname; - - proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); - if (!HeapTupleIsValid(proctup)) - elog(ERROR, "cache lookup failed for function %u", funcid); - procform = (Form_pg_proc) GETSTRUCT(proctup); - - /* Print schema name only if it's not pg_catalog */ - if (procform->pronamespace != PG_CATALOG_NAMESPACE) - { - const char *schemaname; - - schemaname = get_namespace_name(procform->pronamespace); - appendStringInfo(buf, "%s.", quote_identifier(schemaname)); - } - - proname = NameStr(procform->proname); - - /* Always print the function name */ - appendStringInfoString(buf, quote_identifier(proname)); - - ReleaseSysCache(proctup); -} - -/* - * Appends a sort or group clause. - * - * Like get_rule_sortgroupclause(), returns the expression tree, so caller - * need not find it again. - */ -static Node * -deparseSortGroupClause(Index ref, List *tlist, bool force_colno, deparse_expr_cxt *context) -{ - StringInfo buf = context->buf; - TargetEntry *tle; - Expr *expr; - - tle = get_sortgroupref_tle(ref, tlist); - expr = tle->expr; - - if (force_colno) - { - /* Use column-number form when requested by caller. */ - Assert(!tle->resjunk); - appendStringInfo(buf, "%d", tle->resno); - } - else if (expr && IsA(expr, Const)) - { - /* - * Force a typecast here so that we don't emit something like "GROUP - * BY 2", which will be misconstrued as a column position rather than - * a constant. - */ - deparseConst((Const *) expr, context, 1); - } - else if (!expr || IsA(expr, Var)) - deparseExpr(expr, context); - else - { - /* Always parenthesize the expression. */ - appendStringInfoChar(buf, '('); - deparseExpr(expr, context); - appendStringInfoChar(buf, ')'); - } - - return (Node *) expr; -} - -/* - * Returns true if given Var is deparsed as a subquery output column, in - * which case, *relno and *colno are set to the IDs for the relation and - * column alias to the Var provided by the subquery. - */ -static bool -is_subquery_var(Var *node, RelOptInfo *foreignrel, int *relno, int *colno) -{ - TsFdwRelInfo *fpinfo = fdw_relinfo_get(foreignrel); - RelOptInfo *outerrel = fpinfo->outerrel; - RelOptInfo *innerrel = fpinfo->innerrel; - - /* Should only be called in these cases. */ - Assert(IS_SIMPLE_REL(foreignrel) || IS_JOIN_REL(foreignrel)); - - /* - * If the given relation isn't a join relation, it doesn't have any lower - * subqueries, so the Var isn't a subquery output column. - */ - if (!IS_JOIN_REL(foreignrel)) - return false; - - /* - * If the Var doesn't belong to any lower subqueries, it isn't a subquery - * output column. - */ - if (!bms_is_member(node->varno, fpinfo->lower_subquery_rels)) - return false; - - if (bms_is_member(node->varno, outerrel->relids)) - { - /* - * If outer relation is deparsed as a subquery, the Var is an output - * column of the subquery; get the IDs for the relation/column alias. - */ - if (fpinfo->make_outerrel_subquery) - { - get_relation_column_alias_ids(node, outerrel, relno, colno); - return true; - } - - /* Otherwise, recurse into the outer relation. */ - return is_subquery_var(node, outerrel, relno, colno); - } - else - { - Assert(bms_is_member(node->varno, innerrel->relids)); - - /* - * If inner relation is deparsed as a subquery, the Var is an output - * column of the subquery; get the IDs for the relation/column alias. - */ - if (fpinfo->make_innerrel_subquery) - { - get_relation_column_alias_ids(node, innerrel, relno, colno); - return true; - } - - /* Otherwise, recurse into the inner relation. */ - return is_subquery_var(node, innerrel, relno, colno); - } -} - -/* - * Get the IDs for the relation and column alias to given Var belonging to - * given relation, which are returned into *relno and *colno. - */ -static void -get_relation_column_alias_ids(Var *node, RelOptInfo *foreignrel, int *relno, int *colno) -{ - TsFdwRelInfo *fpinfo = fdw_relinfo_get(foreignrel); - int i; - ListCell *lc; - - /* Get the relation alias ID */ - *relno = fpinfo->relation_index; - - /* Get the column alias ID */ - i = 1; - foreach (lc, foreignrel->reltarget->exprs) - { - if (equal(lfirst(lc), (Node *) node)) - { - *colno = i; - return; - } - i++; - } - - /* Shouldn't get here */ - elog(ERROR, "unexpected expression in subquery output"); -} diff --git a/tsl/src/fdw/deparse.h b/tsl/src/fdw/deparse.h deleted file mode 100644 index 9a64d5d78a4..00000000000 --- a/tsl/src/fdw/deparse.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#pragma once - -#include -#include "data_node_chunk_assignment.h" - -typedef struct DeparsedInsertStmt -{ - const char *target; /* INSERT INTO (...) */ - unsigned int num_target_attrs; - const char *target_attrs; - bool do_nothing; - const char *returning; - List *retrieved_attrs; -} DeparsedInsertStmt; - -extern void deparse_insert_stmt(DeparsedInsertStmt *stmt, RangeTblEntry *rte, Index rtindex, - Relation rel, List *target_attrs, bool do_nothing, - List *returning_list); - -extern List *deparsed_insert_stmt_to_list(DeparsedInsertStmt *stmt); -extern void deparsed_insert_stmt_from_list(DeparsedInsertStmt *stmt, List *list_stmt); - -extern const char *deparsed_insert_stmt_get_sql(DeparsedInsertStmt *stmt, int64 num_rows); -extern const char *deparsed_insert_stmt_get_sql_explain(DeparsedInsertStmt *stmt, int64 num_rows); - -extern void deparseInsertSql(StringInfo buf, RangeTblEntry *rte, Index rtindex, Relation rel, - List *targetAttrs, int64 num_rows, bool doNothing, List *returningList, - List **retrieved_attrs); - -extern void deparseUpdateSql(StringInfo buf, RangeTblEntry *rte, Index rtindex, Relation rel, - List *targetAttrs, List *returningList, List **retrieved_attrs); - -extern void deparseDeleteSql(StringInfo buf, RangeTblEntry *rte, Index rtindex, Relation rel, - List *returningList, List **retrieved_attrs); - -extern bool ts_is_foreign_expr(PlannerInfo *root, RelOptInfo *baserel, Expr *expr); - -extern void classify_conditions(PlannerInfo *root, RelOptInfo *baserel, List *input_conds, - List **remote_conds, List **local_conds); - -extern List *build_tlist_to_deparse(RelOptInfo *foreignrel); - -extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *rel, List *tlist, - List *remote_where, List *remote_having, List *pathkeys, - bool is_subquery, List **retrieved_attrs, List **params_list, - DataNodeChunkAssignment *sca); - -extern const char *get_jointype_name(JoinType jointype); -extern void deparseStringLiteral(StringInfo buf, const char *val); -extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel); -extern void deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs); diff --git a/tsl/src/fdw/estimate.c b/tsl/src/fdw/estimate.c deleted file mode 100644 index dbbd99bdb2f..00000000000 --- a/tsl/src/fdw/estimate.c +++ /dev/null @@ -1,537 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include "relinfo.h" -#include "estimate.h" -#include "deparse.h" - -/* If no remote estimates, assume a sort costs 5% extra. */ -#define DEFAULT_FDW_SORT_MULTIPLIER 1.05 - -typedef struct CostEstimate -{ - double rows; - double retrieved_rows; - int width; - Cost startup_cost; - Cost total_cost; - Cost cpu_per_tuple; - Cost run_cost; -} CostEstimate; - -static bool -find_first_aggref_walker(Node *node, Aggref **aggref) -{ - if (node == NULL) - return false; - - if (IsA(node, Aggref)) - { - *aggref = castNode(Aggref, node); - return true; - } - - return expression_tree_walker(node, find_first_aggref_walker, aggref); -} - -/* - * Get the AggsSplit mode of a relation. - * - * The AggSplit (partial or full aggregation) affects costing. - * All aggregates to compute this relation must have the same - * mode, so we only check mode on first match. - */ -static AggSplit -get_aggsplit(PlannerInfo *root, RelOptInfo *rel) -{ - Aggref *agg; - Assert(root->parse->hasAggs); - - if (find_first_aggref_walker((Node *) rel->reltarget->exprs, &agg)) - return agg->aggsplit; - - /* If the aggregate is only referenced in the HAVING clause it will - * not be present in the targetlist so we have to check HAVING clause too. */ - if (root->hasHavingQual && find_first_aggref_walker((Node *) root->parse->havingQual, &agg)) - return agg->aggsplit; - - /* Since PlannerInfo has hasAggs true (checked in caller) we should - * never get here and always find an Aggref. */ - elog(ERROR, "no aggref found in targetlist or HAVING clause"); - pg_unreachable(); -} - -static void -get_upper_rel_estimate(PlannerInfo *root, RelOptInfo *rel, CostEstimate *ce) -{ - TsFdwRelInfo *fpinfo = fdw_relinfo_get(rel); - TsFdwRelInfo *ofpinfo = fdw_relinfo_get(fpinfo->outerrel); - AggClauseCosts aggcosts; - double input_rows; - int num_group_cols; - double num_groups = 1; - - /* Make sure the core code set the pathtarget. */ - Assert(rel->reltarget != NULL); - - /* - * This cost model is mixture of costing done for sorted and - * hashed aggregates in cost_agg(). We are not sure which - * strategy will be considered at remote side, thus for - * simplicity, we put all startup related costs in startup_cost - * and all finalization and run cost are added in total_cost. - * - * Also, core does not care about costing HAVING expressions and - * adding that to the costs. So similarly, here too we are not - * considering remote and local conditions for costing. - */ - - /* Get rows from input rel */ - input_rows = ofpinfo->rows; - - /* Collect statistics about aggregates for estimating costs. */ - MemSet(&aggcosts, 0, sizeof(AggClauseCosts)); - - if (root->parse->hasAggs) - { - /* Get the aggsplit to use in order to support push-down of partial - * aggregation */ - AggSplit aggsplit = get_aggsplit(root, rel); - - get_agg_clause_costs_compat(root, (Node *) fpinfo->grouped_tlist, aggsplit, &aggcosts); - } - - /* - * Get number of grouping columns and possible number of groups. We don't - * have per-column ndistinct statistics on access node for the root - * distributed hypertable, so in this case hardcode it as 1/10 of all rows - * to incentivize grouping push down. - * We do have per-column per-chunk statistics, so we could do better by - * combining these statistics for the participating chunks. This probably - * should be done together with correcting the join selectivity estimation - * in add_data_node_scan_paths. - */ - num_group_cols = list_length(root->parse->groupClause); - if (fpinfo->type == TS_FDW_RELINFO_HYPERTABLE_DATA_NODE) - { - num_groups = clamp_row_est(input_rows / 10.); - } - else - { - num_groups = estimate_num_groups_compat(root, - get_sortgrouplist_exprs(root->parse->groupClause, - fpinfo->grouped_tlist), - input_rows, - NULL, - NULL); - } - - /* - * Get the retrieved_rows and rows estimates. If there are HAVING - * quals, account for their selectivity. - */ - if (root->parse->havingQual) - { - /* Factor in the selectivity of the remotely-checked quals */ - ce->retrieved_rows = clamp_row_est( - num_groups * clauselist_selectivity(root, fpinfo->remote_conds, 0, JOIN_INNER, NULL)); - /* Factor in the selectivity of the locally-checked quals */ - ce->rows = clamp_row_est(ce->retrieved_rows * fpinfo->local_conds_sel); - } - else - { - /* - * Number of rows expected from data node will be same as - * that of number of groups. - */ - ce->rows = ce->retrieved_rows = num_groups; - } - - /* Use width estimate made by the core code. */ - ce->width = rel->reltarget->width; - - /*----- - * Startup cost includes: - * 1. Startup cost for underneath input * relation - * 2. Cost of performing aggregation, per cost_agg() - * 3. Startup cost for PathTarget eval - *----- - */ - ce->startup_cost = ofpinfo->rel_startup_cost; - ce->startup_cost += rel->reltarget->cost.startup; - ce->startup_cost += aggcosts.transCost.startup; - ce->startup_cost += aggcosts.transCost.per_tuple * input_rows; - ce->startup_cost += aggcosts.finalCost.startup; - ce->startup_cost += (cpu_operator_cost * num_group_cols) * input_rows; - - /*----- - * Run time cost includes: - * 1. Run time cost of underneath input relation, adjusted for - * tlist replacement by apply_scanjoin_target_to_paths() - * 2. Run time cost of performing aggregation, per cost_agg() - *----- - */ - ce->run_cost = ofpinfo->rel_total_cost - ofpinfo->rel_startup_cost; - ce->run_cost += rel->reltarget->cost.per_tuple * input_rows; - ce->run_cost += aggcosts.finalCost.per_tuple * num_groups; - ce->run_cost += cpu_tuple_cost * num_groups; - - /* Account for the eval cost of HAVING quals, if any */ - if (root->parse->havingQual) - { - QualCost remote_cost; - - /* Add in the eval cost of the remotely-checked quals */ - cost_qual_eval(&remote_cost, fpinfo->remote_conds, root); - ce->startup_cost += remote_cost.startup; - ce->run_cost += remote_cost.per_tuple * num_groups; - /* Add in the eval cost of the locally-checked quals */ - ce->startup_cost += fpinfo->local_conds_cost.startup; - ce->run_cost += fpinfo->local_conds_cost.per_tuple * ce->retrieved_rows; - } - - /* Add in tlist eval cost for each output row */ - ce->startup_cost += rel->reltarget->cost.startup; - ce->run_cost += rel->reltarget->cost.per_tuple * ce->rows; -} - -static void -get_base_rel_estimate(PlannerInfo *root, RelOptInfo *rel, CostEstimate *ce) -{ - TsFdwRelInfo *fpinfo = fdw_relinfo_get(rel); - - ce->rows = rel->rows; - ce->width = rel->reltarget->width; - - /* Back into an estimate of the number of retrieved rows. */ - ce->retrieved_rows = clamp_row_est(ce->rows / fpinfo->local_conds_sel); - - /* Clamp retrieved rows estimates to at most rel->tuples. */ - ce->retrieved_rows = Min(ce->retrieved_rows, rel->tuples); - - /* - * Cost as though this were a seqscan, which is pessimistic. We - * effectively imagine the local_conds are being evaluated - * remotely, too. - */ - ce->startup_cost = 0; - ce->run_cost = 0; - ce->run_cost += seq_page_cost * rel->pages; - - ce->startup_cost += rel->baserestrictcost.startup; - ce->cpu_per_tuple = cpu_tuple_cost + rel->baserestrictcost.per_tuple; - ce->run_cost += ce->cpu_per_tuple * rel->tuples; - - /* Add in tlist eval cost for each output row */ - ce->startup_cost += rel->reltarget->cost.startup; - ce->run_cost += rel->reltarget->cost.per_tuple * ce->rows; -} - -/* - * This code is from estimate_path_cost_size() in postgres_fdw. - */ -static void -get_join_rel_estimate(PlannerInfo *root, RelOptInfo *rel, CostEstimate *ce) -{ - TsFdwRelInfo *fpinfo = fdw_relinfo_get(rel); - - TsFdwRelInfo *fpinfo_i; - TsFdwRelInfo *fpinfo_o; - QualCost join_cost; - QualCost remote_conds_cost; - int width; - double nrows; - double rows; - double retrieved_rows; - Cost startup_cost; - Cost run_cost; - - /* Use rows/width estimates made by the core code. */ - rows = rel->rows; - width = rel->reltarget->width; - - /* For join we expect inner and outer relations set */ - Assert(fpinfo->innerrel && fpinfo->outerrel); - - fpinfo_i = fdw_relinfo_get(fpinfo->innerrel); - fpinfo_o = fdw_relinfo_get(fpinfo->outerrel); - - /* Estimate of number of rows in cross product */ - nrows = fpinfo_i->rows * fpinfo_o->rows; - - /* - * Back into an estimate of the number of retrieved rows. Just in - * case this is nuts, clamp to at most nrows. - */ - retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel); - retrieved_rows = Min(retrieved_rows, nrows); - - /* - * The cost of foreign join is estimated as cost of generating - * rows for the joining relations + cost for applying quals on the - * rows. - */ - - /* - * Calculate the cost of clauses pushed down to the foreign server - */ - cost_qual_eval(&remote_conds_cost, fpinfo->remote_conds, root); - /* Calculate the cost of applying join clauses */ - cost_qual_eval(&join_cost, fpinfo->joinclauses, root); - - /* - * Startup cost includes startup cost of joining relations and the - * startup cost for join and other clauses. We do not include the - * startup cost specific to join strategy (e.g. setting up hash - * tables) since we do not know what strategy the foreign server - * is going to use. - */ - startup_cost = fpinfo_i->rel_startup_cost + fpinfo_o->rel_startup_cost; - startup_cost += join_cost.startup; - startup_cost += remote_conds_cost.startup; - startup_cost += fpinfo->local_conds_cost.startup; - - /* - * Run time cost includes: - * - * 1. Run time cost (total_cost - startup_cost) of relations being - * joined - * - * 2. Run time cost of applying join clauses on the cross product - * of the joining relations. - * - * 3. Run time cost of applying pushed down other clauses on the - * result of join - * - * 4. Run time cost of applying nonpushable other clauses locally - * on the result fetched from the foreign server. - */ - run_cost = fpinfo_i->rel_total_cost - fpinfo_i->rel_startup_cost; - run_cost += fpinfo_o->rel_total_cost - fpinfo_o->rel_startup_cost; - run_cost += nrows * join_cost.per_tuple; - nrows = clamp_row_est(nrows * fpinfo->joinclause_sel); - run_cost += nrows * remote_conds_cost.per_tuple; - - /* - * Discount the paths that are likely to be index scans on the remote, the - * same way we do for parameterized data node scan. - */ - bool index_matches_parameterization = false; - ListCell *lc; - foreach (lc, fpinfo->indexed_parameterizations) - { - Bitmapset *item = lfirst(lc); - if (bms_equal(item, fpinfo->innerrel->relids)) - { - index_matches_parameterization = true; - break; - } - } - - if (index_matches_parameterization) - { - run_cost /= 10.; - } - - run_cost += fpinfo->local_conds_cost.per_tuple * retrieved_rows; - - /* Add in tlist eval cost for each output row */ - startup_cost += rel->reltarget->cost.startup; - run_cost += rel->reltarget->cost.per_tuple * rows; - - /* Return results. */ - ce->rows = rows; - ce->width = width; - ce->startup_cost = startup_cost; - ce->run_cost = run_cost; - ce->retrieved_rows = retrieved_rows; -} - -#define REL_HAS_CACHED_COSTS(fpinfo) \ - ((fpinfo)->rel_startup_cost >= 0 && (fpinfo)->rel_total_cost >= 0 && \ - (fpinfo)->rel_retrieved_rows >= 0) - -/* - * Adjust the cost estimates of a foreign grouping path to include the cost of - * generating properly-sorted output. - */ -static void -adjust_foreign_grouping_path_cost(PlannerInfo *root, List *pathkeys, double retrieved_rows, - double width, double limit_tuples, Cost *p_startup_cost, - Cost *p_run_cost) -{ - /* - * If the GROUP BY clause isn't sort-able, the plan chosen by the remote - * side is unlikely to generate properly-sorted output, so it would need - * an explicit sort; adjust the given costs with cost_sort(). Likewise, - * if the GROUP BY clause is sort-able but isn't a superset of the given - * pathkeys, adjust the costs with that function. Otherwise, adjust the - * costs by applying the same heuristic as for the scan or join case. - */ - if (!grouping_is_sortable(root->parse->groupClause) || - !pathkeys_contained_in(pathkeys, root->group_pathkeys)) - { - Path sort_path; /* dummy for result of cost_sort */ - - cost_sort(&sort_path, - root, - pathkeys, - *p_startup_cost + *p_run_cost, - retrieved_rows, - width, - 0.0, - work_mem, - limit_tuples); - - *p_startup_cost = sort_path.startup_cost; - *p_run_cost = sort_path.total_cost - sort_path.startup_cost; - } - else - { - /* - * The default extra cost seems too large for foreign-grouping cases; - * add 1/4th of that default. - */ - double sort_multiplier = 1.0 + (DEFAULT_FDW_SORT_MULTIPLIER - 1.0) * 0.25; - - *p_startup_cost *= sort_multiplier; - *p_run_cost *= sort_multiplier; - } -} - -/* - * fdw_estimate_path_cost_size - * Get cost and size estimates for a foreign scan on given foreign - * relation either a base relation or an upper relation containing - * foreign relations. Estimate rows using whatever statistics we have - * locally, in a way similar to ordinary tables. - * - * pathkeys specify the expected sort order if any for given path being costed. - * - * The function returns the cost and size estimates in p_row, p_width, - * p_startup_cost and p_total_cost variables. - */ -void -fdw_estimate_path_cost_size(PlannerInfo *root, RelOptInfo *rel, List *pathkeys, double *p_rows, - int *p_width, Cost *p_startup_cost, Cost *p_total_cost) -{ - TsFdwRelInfo *fpinfo = fdw_relinfo_get(rel); - CostEstimate ce = { - /* - * Use rows/width estimates made by set_baserel_size_estimates() for - * base foreign relations. - */ - .rows = rel->rows, - .width = rel->reltarget->width, - }; - - /* - * We will come here again and again with different set of pathkeys - * that caller wants to cost. We don't need to calculate the cost of - * bare scan each time. Instead, use the costs if we have cached them - * already. - */ - if (REL_HAS_CACHED_COSTS(fpinfo)) - { - ce.rows = fpinfo->rows; - ce.width = fpinfo->width; - ce.startup_cost = fpinfo->rel_startup_cost; - ce.run_cost = fpinfo->rel_total_cost - fpinfo->rel_startup_cost; - ce.retrieved_rows = fpinfo->rel_retrieved_rows; - } - else if (IS_JOIN_REL(rel) && fpinfo->outerrel != NULL && fpinfo->innerrel != NULL) - get_join_rel_estimate(root, rel, &ce); - else if (IS_UPPER_REL(rel)) - get_upper_rel_estimate(root, rel, &ce); - else - get_base_rel_estimate(root, rel, &ce); - - /* - * Without remote estimates, we have no real way to estimate the cost - * of generating sorted output. It could be free if the query plan - * the remote side would have chosen generates properly-sorted output - * anyway, but in most cases it will cost something. Estimate a value - * high enough that we won't pick the sorted path when the ordering - * isn't locally useful, but low enough that we'll err on the side of - * pushing down the ORDER BY clause when it's useful to do so. - */ - if (pathkeys != NIL) - { - if (IS_UPPER_REL(rel)) - { - Assert(rel->reloptkind == RELOPT_UPPER_REL || - rel->reloptkind == RELOPT_OTHER_UPPER_REL); - - /* FIXME: Currently don't have a way to pass on limit here */ - const double limit_tuples = -1; - - adjust_foreign_grouping_path_cost(root, - pathkeys, - ce.retrieved_rows, - ce.width, - limit_tuples, - &ce.startup_cost, - &ce.run_cost); - } - else - { - ce.startup_cost *= DEFAULT_FDW_SORT_MULTIPLIER; - ce.run_cost *= DEFAULT_FDW_SORT_MULTIPLIER; - } - } - - ce.total_cost = ce.startup_cost + ce.run_cost; - - /* - * Cache the costs for scans without any pathkeys - * before adding the costs for transferring data from the data node. - * These costs are useful for costing the join between this relation and - * another foreign relation or to calculate the costs of paths with - * pathkeys for this relation, when the costs can not be obtained from the - * data node. This function will be called at least once for every - * foreign relation without pathkeys. - */ - if (!REL_HAS_CACHED_COSTS(fpinfo) && pathkeys == NIL) - { - fpinfo->rel_startup_cost = ce.startup_cost; - fpinfo->rel_total_cost = ce.total_cost; - fpinfo->rel_retrieved_rows = ce.retrieved_rows; - } - - /* - * Add some additional cost factors to account for connection overhead - * (fdw_startup_cost), transferring data across the network - * (fdw_tuple_cost per retrieved row), and local manipulation of the data - * (cpu_tuple_cost per retrieved row). - */ - ce.startup_cost += fpinfo->fdw_startup_cost; - ce.total_cost += fpinfo->fdw_startup_cost; - ce.total_cost += fpinfo->fdw_tuple_cost * ce.retrieved_rows; - ce.total_cost += cpu_tuple_cost * ce.retrieved_rows; - - /* Return results. */ - *p_rows = ce.rows; - *p_width = ce.width; - *p_startup_cost = ce.startup_cost; - *p_total_cost = ce.total_cost; -} diff --git a/tsl/src/fdw/estimate.h b/tsl/src/fdw/estimate.h deleted file mode 100644 index e91eaa61a65..00000000000 --- a/tsl/src/fdw/estimate.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#pragma once - -#include -#include -#include - -extern void fdw_estimate_path_cost_size(PlannerInfo *root, RelOptInfo *rel, List *pathkeys, - double *p_rows, int *p_width, Cost *p_startup_cost, - Cost *p_total_cost); diff --git a/tsl/src/fdw/fdw.c b/tsl/src/fdw/fdw.c deleted file mode 100644 index 773c757b279..00000000000 --- a/tsl/src/fdw/fdw.c +++ /dev/null @@ -1,434 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "data_node_scan_plan.h" -#include "debug_guc.h" -#include "debug.h" -#include "fdw.h" -#include "fdw_utils.h" -#include "modify_exec.h" -#include "modify_plan.h" -#include "option.h" -#include "relinfo.h" -#include "scan_exec.h" -#include "scan_plan.h" - -/* - * Parse options from foreign table and apply them to fpinfo. - * - * New options might also require tweaking merge_fdw_options(). - */ -static void -apply_table_options(ForeignTable *table, TsFdwRelInfo *fpinfo) -{ - ListCell *lc; - - foreach (lc, table->options) - { - DefElem *def = (DefElem *) lfirst(lc); - - if (strcmp(def->defname, "fetch_size") == 0) - fpinfo->fetch_size = strtol(defGetString(def), NULL, 10); - } -} - -/* This creates the fdw_relation_info object for hypertables and foreign table - * type objects. */ -static void -get_foreign_rel_size(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid) -{ - RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root); - - /* A base hypertable is a regular table and not a foreign table. It is the only - * kind of regular table that will ever have this callback called on it. */ - if (RELKIND_RELATION == rte->relkind) - { - fdw_relinfo_create(root, baserel, InvalidOid, foreigntableid, TS_FDW_RELINFO_HYPERTABLE); - } - else - { - ForeignTable *table = GetForeignTable(foreigntableid); - - fdw_relinfo_create(root, - baserel, - table->serverid, - foreigntableid, - TS_FDW_RELINFO_FOREIGN_TABLE); - - apply_table_options(table, fdw_relinfo_get(baserel)); - } -} - -static void -get_foreign_paths(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid) -{ - TsFdwRelInfo *fpinfo = fdw_relinfo_get(baserel); - Path *path; - - Assert(fpinfo->type != TS_FDW_RELINFO_HYPERTABLE_DATA_NODE); - - if (fpinfo->type == TS_FDW_RELINFO_HYPERTABLE) - { - if (ts_guc_enable_per_data_node_queries) - data_node_scan_add_node_paths(root, baserel); - return; - } - - if (baserel->reloptkind == RELOPT_JOINREL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("foreign joins are not supported"))); - - /* - * Create simplest ForeignScan path node and add it to baserel. This path - * corresponds to SeqScan path of regular tables (though depending on what - * baserestrict conditions we were able to send to remote, there might - * actually be an indexscan happening there). We already did all the work - * to estimate cost and size of this path. - */ - path = (Path *) create_foreignscan_path(root, - baserel, - NULL, /* default pathtarget */ - fpinfo->rows, - fpinfo->startup_cost, - fpinfo->total_cost, - NIL, /* no pathkeys */ - NULL, /* no outer rel either */ - NULL, /* no extra plan */ - NIL); /* no fdw_private list */ - fdw_utils_add_path(baserel, path); - - /* Add paths with pathkeys */ - fdw_add_paths_with_pathkeys_for_rel(root, - baserel, - NULL, - (CreatePathFunc) create_foreignscan_path); -#ifdef TS_DEBUG - if (ts_debug_optimizer_flags.show_rel) - tsl_debug_log_rel_with_paths(root, baserel, NULL); -#endif -} - -static ForeignScan * -get_foreign_plan(PlannerInfo *root, RelOptInfo *foreignrel, Oid foreigntableid, - ForeignPath *best_path, List *tlist, List *scan_clauses, Plan *outer_plan) -{ - ScanInfo info; - - memset(&info, 0, sizeof(ScanInfo)); - - fdw_scan_info_init(&info, root, foreignrel, &best_path->path, scan_clauses, outer_plan); - - /* - * Create the ForeignScan node for the given relation. - * - * Note that the remote parameter expressions are stored in the fdw_exprs - * field of the finished plan node; we can't keep them in private state - * because then they wouldn't be subject to later planner processing. - */ - return make_foreignscan(tlist, - info.local_exprs, - info.scan_relid, - info.params_list, - info.fdw_private, - info.fdw_scan_tlist, - info.fdw_recheck_quals, - outer_plan); -} - -static void -begin_foreign_scan(ForeignScanState *node, int eflags) -{ - ForeignScan *fsplan = (ForeignScan *) node->ss.ps.plan; - - if ((eflags & EXEC_FLAG_EXPLAIN_ONLY) && !ts_guc_enable_remote_explain) - return; - - node->fdw_state = (TsFdwScanState *) palloc0(sizeof(TsFdwScanState)); - - /* - * This is a per-chunk FDW scan, not per-data-node scan, so we're going to - * scan multiple tables per data node, so we only can use the cursor data - * fetcher. - */ - ((TsFdwScanState *) node->fdw_state)->planned_fetcher_type = CursorFetcherType; - - fdw_scan_init(&node->ss, - node->fdw_state, - fsplan->fs_relids, - fsplan->fdw_private, - fsplan->fdw_exprs, - eflags); -} - -static TupleTableSlot * -iterate_foreign_scan(ForeignScanState *node) -{ - TsFdwScanState *fsstate = (TsFdwScanState *) node->fdw_state; - - return fdw_scan_iterate(&node->ss, fsstate); -} - -static void -end_foreign_scan(ForeignScanState *node) -{ - fdw_scan_end((TsFdwScanState *) node->fdw_state); -} - -static void -rescan_foreign_scan(ForeignScanState *node) -{ - fdw_scan_rescan(&node->ss, (TsFdwScanState *) node->fdw_state); -} - -static TupleTableSlot * -exec_foreign_update(EState *estate, ResultRelInfo *rri, TupleTableSlot *slot, - TupleTableSlot *plan_slot) -{ - TsFdwModifyState *fmstate = (TsFdwModifyState *) rri->ri_FdwState; - - return fdw_exec_foreign_update_or_delete(fmstate, estate, slot, plan_slot, UPDATE_CMD); -} - -static TupleTableSlot * -exec_foreign_delete(EState *estate, ResultRelInfo *rri, TupleTableSlot *slot, - TupleTableSlot *plan_slot) -{ - TsFdwModifyState *fmstate = (TsFdwModifyState *) rri->ri_FdwState; - - return fdw_exec_foreign_update_or_delete(fmstate, estate, slot, plan_slot, DELETE_CMD); -} - -static void -end_foreign_modify(EState *estate, ResultRelInfo *rri) -{ - TsFdwModifyState *fmstate = (TsFdwModifyState *) rri->ri_FdwState; - - /* If fmstate is NULL, we are in EXPLAIN; nothing to do */ - if (fmstate == NULL) - return; - - /* Destroy the execution state */ - fdw_finish_foreign_modify(fmstate); -} - -/* - * Add resjunk column(s) needed for update/delete on a foreign table - */ -#if PG14_LT -static void -add_foreign_update_targets(Query *parsetree, RangeTblEntry *target_rte, Relation target_relation) -{ - Var *var; - const char *attrname; - TargetEntry *tle; - - /* - * In timescaledb_fdw, what we need is the ctid, same as for a regular - * table. - */ - - /* Make a Var representing the desired value */ - var = makeVar(parsetree->resultRelation, - SelfItemPointerAttributeNumber, - TIDOID, - -1, - InvalidOid, - 0); - - /* Wrap it in a resjunk TLE with the right name ... */ - attrname = "ctid"; - - tle = makeTargetEntry((Expr *) var, - list_length(parsetree->targetList) + 1, - pstrdup(attrname), - true); - - /* ... and add it to the query's targetlist */ - parsetree->targetList = lappend(parsetree->targetList, tle); -} -#else -static void -add_foreign_update_targets(PlannerInfo *root, Index rtindex, RangeTblEntry *target_rte, - Relation target_relation) -{ - /* - * In timescaledb_fdw, what we need is the ctid, same as for a regular - * table. - */ - - /* Make a Var representing the desired value */ - Var *var = makeVar(rtindex, SelfItemPointerAttributeNumber, TIDOID, -1, InvalidOid, 0); - - /* Register it as a row-identity column needed by this target rel */ - add_row_identity_var(root, var, rtindex, "ctid"); -} -#endif - -static TupleTableSlot * -exec_foreign_insert(EState *estate, ResultRelInfo *rri, TupleTableSlot *slot, - TupleTableSlot *planslot) -{ - TsFdwModifyState *fmstate = (TsFdwModifyState *) rri->ri_FdwState; - - return fdw_exec_foreign_insert(fmstate, estate, slot, planslot); -} - -static int -is_foreign_rel_updatable(Relation rel) -{ - return (1 << CMD_INSERT) | (1 << CMD_DELETE) | (1 << CMD_UPDATE); -} - -static void -explain_foreign_scan(ForeignScanState *node, struct ExplainState *es) -{ - List *fdw_private = ((ForeignScan *) node->ss.ps.plan)->fdw_private; - - fdw_scan_explain(&node->ss, fdw_private, es, (TsFdwScanState *) node->fdw_state); -} - -static void -begin_foreign_modify(ModifyTableState *mtstate, ResultRelInfo *rri, List *fdw_private, - int subplan_index, int eflags) -{ -#if PG14_LT - Plan *subplan = mtstate->mt_plans[subplan_index]->plan; -#else - Plan *subplan = outerPlanState(mtstate)->plan; -#endif - - /* - * Do nothing in EXPLAIN (no ANALYZE) case. rri->ri_FdwState stays NULL. - */ - if (eflags & EXEC_FLAG_EXPLAIN_ONLY) - return; - - fdw_begin_foreign_modify(&mtstate->ps, rri, mtstate->operation, fdw_private, subplan); -} - -static void -explain_foreign_modify(ModifyTableState *mtstate, ResultRelInfo *rri, List *fdw_private, - int subplan_index, struct ExplainState *es) -{ - fdw_explain_modify(&mtstate->ps, rri, fdw_private, subplan_index, es); -} - -static List * -plan_foreign_modify(PlannerInfo *root, ModifyTable *plan, Index result_relation, int subplan_index) -{ - return fdw_plan_foreign_modify(root, plan, result_relation, subplan_index); -} - -/* - * get_foreign_upper_paths - * Add paths for post-join operations like aggregation, grouping etc. if - * corresponding operations are safe to push down. - * - * Right now, we only support aggregate, grouping and having clause pushdown. - */ -static void -get_foreign_upper_paths(PlannerInfo *root, UpperRelationKind stage, RelOptInfo *input_rel, - RelOptInfo *output_rel, void *extra) -{ - TsFdwRelInfo *fpinfo = input_rel->fdw_private ? fdw_relinfo_get(input_rel) : NULL; - - if (fpinfo == NULL) - return; - - /* We abuse the FDW API's GetForeignUpperPaths callback because, for some - * reason, the regular create_upper_paths_hook is never called for - * partially grouped rels, so we cannot use if for server rels. See end of - * PostgreSQL planner.c:create_partial_grouping_paths(). */ - if (fpinfo->type == TS_FDW_RELINFO_HYPERTABLE_DATA_NODE) - data_node_scan_create_upper_paths(root, stage, input_rel, output_rel, extra); - else - fdw_create_upper_paths(fpinfo, - root, - stage, - input_rel, - output_rel, - extra, - (CreateUpperPathFunc) create_foreign_upper_path); - -#ifdef TS_DEBUG - if (ts_debug_optimizer_flags.show_upper & (1 << stage)) - tsl_debug_log_rel_with_paths(root, output_rel, &stage); -#endif -} - -/* - * get_foreign_join_paths - * Add possible ForeignPath to joinrel, if join is safe to push down. - */ -void -tsl_mn_get_foreign_join_paths(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, - RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra) -{ - data_node_generate_pushdown_join_paths(root, joinrel, outerrel, innerrel, jointype, extra); -} - -static FdwRoutine timescaledb_fdw_routine = { - .type = T_FdwRoutine, - /* scan (mandatory) */ - .GetForeignRelSize = get_foreign_rel_size, - .GetForeignPaths = get_foreign_paths, - .GetForeignPlan = get_foreign_plan, - .BeginForeignScan = begin_foreign_scan, - .IterateForeignScan = iterate_foreign_scan, - .EndForeignScan = end_foreign_scan, - .ReScanForeignScan = rescan_foreign_scan, - .GetForeignUpperPaths = get_foreign_upper_paths, - /* update */ - .IsForeignRelUpdatable = is_foreign_rel_updatable, - .PlanForeignModify = plan_foreign_modify, - .BeginForeignModify = begin_foreign_modify, - .ExecForeignInsert = exec_foreign_insert, - .ExecForeignDelete = exec_foreign_delete, - .ExecForeignUpdate = exec_foreign_update, - .EndForeignModify = end_foreign_modify, - .AddForeignUpdateTargets = add_foreign_update_targets, - /* explain/analyze */ - .ExplainForeignScan = explain_foreign_scan, - .ExplainForeignModify = explain_foreign_modify, - .AnalyzeForeignTable = NULL, -}; - -Datum -timescaledb_fdw_handler(PG_FUNCTION_ARGS) -{ - PG_RETURN_POINTER(×caledb_fdw_routine); -} - -Datum -timescaledb_fdw_validator(PG_FUNCTION_ARGS) -{ - List *options_list = untransformRelOptions(PG_GETARG_DATUM(0)); - Oid catalog = PG_GETARG_OID(1); - - option_validate(options_list, catalog); - - PG_RETURN_VOID(); -} diff --git a/tsl/src/fdw/fdw.h b/tsl/src/fdw/fdw.h deleted file mode 100644 index d9bb08d4690..00000000000 --- a/tsl/src/fdw/fdw.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#pragma once - -#include -#include -#include - -extern void tsl_mn_get_foreign_join_paths(PlannerInfo *root, RelOptInfo *joinrel, - RelOptInfo *outerrel, RelOptInfo *innerrel, - JoinType jointype, JoinPathExtraData *extra); - -extern Datum timescaledb_fdw_handler(PG_FUNCTION_ARGS); -extern Datum timescaledb_fdw_validator(PG_FUNCTION_ARGS); diff --git a/tsl/src/fdw/fdw_utils.c b/tsl/src/fdw/fdw_utils.c deleted file mode 100644 index 01f7ff4e872..00000000000 --- a/tsl/src/fdw/fdw_utils.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#include -#include -#include -#include - -#include "fdw_utils.h" -#include "fdw/relinfo.h" - -#ifdef TS_DEBUG - -/* - * Copy a path. - * - * The returned path is a shallow copy that includes deep copies of a few - * fields of interest when printing debug information. Doing a deep copy of a - * Path is a lot of work so we only copy the fields we need. - * - * The copied path is intended to be used only in debug.c. - * - * Note that PostgreSQL's copyObject does not support copying Path(s) so we - * have our own copy function. - */ -static Path * -copy_path(Path *in) -{ - Path *path; - RelOptInfo *parent = makeNode(RelOptInfo); - - *parent = *in->parent; - - switch (nodeTag(in)) - { - case T_CustomPath: - { - CustomPath *cp_copy = makeNode(CustomPath); - CustomPath *cp = castNode(CustomPath, in); - ListCell *lc; - - *cp_copy = *cp; - cp_copy->custom_paths = NIL; - - foreach (lc, cp->custom_paths) - { - Path *p = copy_path(lfirst_node(Path, lc)); - cp_copy->custom_paths = lappend(cp_copy->custom_paths, p); - } - path = &cp_copy->path; - break; - } - case T_ForeignPath: - { - ForeignPath *fp = makeNode(ForeignPath); - *fp = *castNode(ForeignPath, in); - path = &fp->path; - break; - } - default: - /* Not supported */ - Assert(false); - pg_unreachable(); - return in; - } - - path->parent = parent; - - return path; -} - -static ConsideredPath * -create_considered_path(Path *path) -{ - ConsideredPath *cp = palloc(sizeof(ConsideredPath)); - - cp->path = copy_path(path); - cp->origin = (uintptr_t) path; - - return cp; -} - -void -fdw_utils_add_path(RelOptInfo *rel, Path *new_path) -{ - TsFdwRelInfo *fdw_info = fdw_relinfo_get(rel); - ConsideredPath *cp = create_considered_path(new_path); - - /* Since add_path will deallocate thrown paths we need to create a copy here so we can print it - * later on */ - fdw_info->considered_paths = lappend(fdw_info->considered_paths, cp); - add_path(rel, new_path); -} - -static void -free_path(Path *path) -{ - pfree(path->parent); - - if (nodeTag(path) == T_CustomPath) - { - CustomPath *cp = (CustomPath *) path; - ListCell *lc; - - foreach (lc, cp->custom_paths) - { - Path *p = lfirst(lc); - cp->custom_paths = list_delete_ptr(cp->custom_paths, p); - free_path(p); - } - } - pfree(path); -} - -/* - * Deallocate path copy - */ -void -fdw_utils_free_path(ConsideredPath *cpath) -{ - free_path(cpath->path); - pfree(cpath); -} - -#endif /* TS_DEBUG */ diff --git a/tsl/src/fdw/fdw_utils.h b/tsl/src/fdw/fdw_utils.h deleted file mode 100644 index 61d9a356984..00000000000 --- a/tsl/src/fdw/fdw_utils.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#pragma once - -#include -#include -#include - -#include "relinfo.h" - -#ifdef TS_DEBUG - -extern void fdw_utils_add_path(RelOptInfo *rel, Path *new_path); -extern void fdw_utils_free_path(ConsideredPath *path); -#else - -#define fdw_utils_add_path(rel, path) add_path(rel, path); - -#endif /* TS_DEBUG */ diff --git a/tsl/src/fdw/modify_exec.c b/tsl/src/fdw/modify_exec.c deleted file mode 100644 index 9d3a5c38be6..00000000000 --- a/tsl/src/fdw/modify_exec.c +++ /dev/null @@ -1,712 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include "ts_catalog/chunk_data_node.h" - -#include - -#include "scan_plan.h" -#include "modify_exec.h" -#include "modify_plan.h" -#include "tsl/src/chunk.h" - -/* - * This enum describes what's kept in the fdw_private list for a ModifyTable - * node referencing a timescaledb_fdw foreign table. We store: - * - * 1) INSERT/UPDATE/DELETE statement text to be sent to the data node - * 2) Integer list of target attribute numbers for INSERT/UPDATE - * (NIL for a DELETE) - * 3) Boolean flag showing if the remote query has a RETURNING clause - * 4) Integer list of attribute numbers retrieved by RETURNING, if any - */ -enum FdwModifyPrivateIndex -{ - /* SQL statement to execute remotely (as a String node) */ - FdwModifyPrivateUpdateSql, - /* Integer list of target attribute numbers for INSERT/UPDATE */ - FdwModifyPrivateTargetAttnums, - /* has-returning flag (as an integer Value node) */ - FdwModifyPrivateHasReturning, - /* Integer list of attribute numbers retrieved by RETURNING */ - FdwModifyPrivateRetrievedAttrs, - /* The data nodes for the current chunk */ - FdwModifyPrivateDataNodes, - /* Insert state for the current chunk */ - FdwModifyPrivateChunkInsertState, -}; - -typedef struct TsFdwDataNodeState -{ - TSConnectionId id; - /* for remote query execution */ - TSConnection *conn; /* connection for the scan */ - PreparedStmt *p_stmt; /* prepared statement handle, if created */ -} TsFdwDataNodeState; - -/* - * Execution state of a foreign insert/update/delete operation. - */ -typedef struct TsFdwModifyState -{ - Relation rel; /* relcache entry for the foreign table */ - AttConvInMetadata *att_conv_metadata; /* attribute datatype conversion metadata for converting - result to tuples */ - - /* extracted fdw_private data */ - char *query; /* text of INSERT/UPDATE/DELETE command */ - List *target_attrs; /* list of target attribute numbers */ - bool has_returning; /* is there a RETURNING clause? */ - TupleFactory *tupfactory; - - AttrNumber ctid_attno; /* attnum of input resjunk ctid column */ - - bool prepared; - int num_data_nodes; /* number of "available" datanodes */ - int num_all_data_nodes; /* number of all datanodes assigned to this "rel" */ - List *stale_data_nodes; /* DNs marked stale for this chunk */ - StmtParams *stmt_params; /* prepared statement paremeters */ - TsFdwDataNodeState data_nodes[FLEXIBLE_ARRAY_MEMBER]; -} TsFdwModifyState; - -#define TS_FDW_MODIFY_STATE_SIZE(num_data_nodes) \ - (sizeof(TsFdwModifyState) + (sizeof(TsFdwDataNodeState) * (num_data_nodes))) - -static void -initialize_fdw_data_node_state(TsFdwDataNodeState *fdw_data_node, TSConnectionId id) -{ - fdw_data_node->id = id; - fdw_data_node->conn = remote_dist_txn_get_connection(id, REMOTE_TXN_USE_PREP_STMT); - fdw_data_node->p_stmt = NULL; -} - -/* - * create_foreign_modify - * Construct an execution state of a foreign insert/update/delete - * operation - */ -static TsFdwModifyState * -create_foreign_modify(EState *estate, Relation rel, CmdType operation, Oid check_as_user, - Plan *subplan, char *query, List *target_attrs, bool has_returning, - List *retrieved_attrs, List *server_id_list) -{ - TsFdwModifyState *fmstate; - TupleDesc tupdesc = RelationGetDescr(rel); - ListCell *lc; - Oid user_id = OidIsValid(check_as_user) ? check_as_user : GetUserId(); - int i = 0; - int num_data_nodes, num_all_data_nodes; - int32 hypertable_id = ts_chunk_get_hypertable_id_by_reloid(rel->rd_id); - List *all_replicas = NIL, *avail_replicas = NIL; - - if (hypertable_id == INVALID_HYPERTABLE_ID) - { - num_data_nodes = num_all_data_nodes = 1; - } - else - { - int32 chunk_id = ts_chunk_get_id_by_relid(rel->rd_id); - - all_replicas = ts_chunk_data_node_scan_by_chunk_id(chunk_id, CurrentMemoryContext); - avail_replicas = ts_chunk_data_node_scan_by_chunk_id_filter(chunk_id, CurrentMemoryContext); - num_all_data_nodes = list_length(all_replicas); - } - - /* - * Identify which user to do the remote access as. This should match what - * ExecCheckRTEPerms() does. - */ - - if (NIL != server_id_list) - { - /* - * This is either (1) an INSERT on a hypertable chunk, or (2) an - * UPDATE or DELETE on a chunk. In the former case (1), the data nodes - * were passed on from the INSERT path via the chunk insert state, and - * in the latter case (2), the data nodes were resolved at planning time - * in the FDW planning callback. - */ - - fmstate = - (TsFdwModifyState *) palloc0(TS_FDW_MODIFY_STATE_SIZE(list_length(server_id_list))); - foreach (lc, server_id_list) - { - Oid server_id = lfirst_oid(lc); - TSConnectionId id = remote_connection_id(server_id, user_id); - - initialize_fdw_data_node_state(&fmstate->data_nodes[i++], id); - } - num_data_nodes = list_length(server_id_list); - Assert(num_data_nodes == list_length(avail_replicas)); - } - else - { - /* - * If there is no chunk insert state and no data nodes from planning, - * this is an INSERT, UPDATE, or DELETE on a standalone foreign table. - * - * If it's a regular foreign table then we must get the data node from - * the foreign table's metadata. - * - * Otherwise, we use the list of "available" DNs from earlier - */ - if (hypertable_id == INVALID_HYPERTABLE_ID) - { - ForeignTable *table = GetForeignTable(rel->rd_id); - TSConnectionId id = remote_connection_id(table->serverid, user_id); - - Assert(num_data_nodes == 1 && num_all_data_nodes == 1); - fmstate = (TsFdwModifyState *) palloc0(TS_FDW_MODIFY_STATE_SIZE(num_data_nodes)); - initialize_fdw_data_node_state(&fmstate->data_nodes[0], id); - } - else - { - /* we use only the available replicas */ - fmstate = - (TsFdwModifyState *) palloc0(TS_FDW_MODIFY_STATE_SIZE(list_length(avail_replicas))); - foreach (lc, avail_replicas) - { - ChunkDataNode *node = lfirst(lc); - TSConnectionId id = remote_connection_id(node->foreign_server_oid, user_id); - - initialize_fdw_data_node_state(&fmstate->data_nodes[i++], id); - } - num_data_nodes = list_length(avail_replicas); - } - } - - /* Set up remote query information. */ - fmstate->rel = rel; - fmstate->query = query; - fmstate->target_attrs = target_attrs; - fmstate->has_returning = has_returning; - fmstate->prepared = false; /* PREPARE will happen later */ - fmstate->num_data_nodes = num_data_nodes; - fmstate->num_all_data_nodes = num_all_data_nodes; - - /* Prepare for input conversion of RETURNING results. */ - if (fmstate->has_returning) - fmstate->att_conv_metadata = data_format_create_att_conv_in_metadata(tupdesc, false); - - if (operation == CMD_UPDATE || operation == CMD_DELETE) - { - Assert(subplan != NULL); - - /* Find the ctid resjunk column in the subplan's result */ - fmstate->ctid_attno = ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid"); - if (!AttributeNumberIsValid(fmstate->ctid_attno)) - elog(ERROR, "could not find junk ctid column"); - } - - fmstate->stmt_params = stmt_params_create(fmstate->target_attrs, - operation == CMD_UPDATE || operation == CMD_DELETE, - tupdesc, - 1); - - fmstate->tupfactory = tuplefactory_create_for_rel(rel, retrieved_attrs); - - return fmstate; -} - -/* - * Convert a relation's attribute numbers to the corresponding numbers for - * another relation. - * - * Conversions are necessary when, e.g., a (new) chunk's attribute numbers do - * not match the root table's numbers after a column has been removed. - */ -static List * -convert_attrs(TupleConversionMap *map, List *attrs) -{ - List *new_attrs = NIL; - ListCell *lc; - - foreach (lc, attrs) - { - AttrNumber attnum = lfirst_int(lc); - int i; - - for (i = 0; i < map->outdesc->natts; i++) - { - if (map->attrMap->attnums[i] == attnum) - { - new_attrs = lappend_int(new_attrs, AttrOffsetGetAttrNumber(i)); - break; - } - } - - /* Assert that we found the attribute */ - Assert(i != map->outdesc->natts); - } - - Assert(list_length(attrs) == list_length(new_attrs)); - - return new_attrs; -} - -static List * -get_chunk_server_id_list(const List *chunk_data_nodes) -{ - List *list = NIL; - ListCell *lc; - - foreach (lc, chunk_data_nodes) - { - ChunkDataNode *cdn = lfirst(lc); - - list = lappend_oid(list, cdn->foreign_server_oid); - } - - return list; -} - -void -fdw_begin_foreign_modify(PlanState *pstate, ResultRelInfo *rri, CmdType operation, - List *fdw_private, Plan *subplan) -{ - TsFdwModifyState *fmstate; - EState *estate = pstate->state; - char *query; - List *target_attrs; - bool has_returning; - List *retrieved_attrs; - List *server_id_list = NIL; - ChunkInsertState *cis = NULL; - RangeTblEntry *rte; - - /* Deconstruct fdw_private data. */ - query = strVal(list_nth(fdw_private, FdwModifyPrivateUpdateSql)); - target_attrs = (List *) list_nth(fdw_private, FdwModifyPrivateTargetAttnums); - has_returning = intVal(list_nth(fdw_private, FdwModifyPrivateHasReturning)); - retrieved_attrs = (List *) list_nth(fdw_private, FdwModifyPrivateRetrievedAttrs); - - /* Find RTE. */ - rte = rt_fetch(rri->ri_RangeTableIndex, estate->es_range_table); - - Assert(NULL != rte); - - if (list_length(fdw_private) > FdwModifyPrivateDataNodes) - { - List *data_nodes = (List *) list_nth(fdw_private, FdwModifyPrivateDataNodes); - ListCell *lc; - - foreach (lc, data_nodes) - server_id_list = lappend_oid(server_id_list, lfirst_oid(lc)); - } - - if (list_length(fdw_private) > FdwModifyPrivateChunkInsertState) - { - cis = (ChunkInsertState *) list_nth(fdw_private, FdwModifyPrivateChunkInsertState); - - /* - * A chunk may have different attribute numbers than the root relation - * that we planned the attribute lists for - */ - if (NULL != cis->hyper_to_chunk_map) - { - /* - * Convert the target attributes (the inserted or updated - * attributes) - */ - target_attrs = convert_attrs(cis->hyper_to_chunk_map, target_attrs); - - /* - * Convert the retrieved attributes, if there is a RETURNING - * statement - */ - if (NIL != retrieved_attrs) - retrieved_attrs = convert_attrs(cis->hyper_to_chunk_map, retrieved_attrs); - } - - /* - * If there's a chunk insert state, then it has the authoritative - * data node list. - */ - server_id_list = get_chunk_server_id_list(cis->chunk_data_nodes); - } - - /* Construct an execution state. */ - Oid checkAsUser = InvalidOid; -#if PG16_LT - checkAsUser = rte->checkAsUser; -#else - if (rte->perminfoindex > 0) - { - RTEPermissionInfo *perminfo = getRTEPermissionInfo(estate->es_rteperminfos, rte); - checkAsUser = perminfo->checkAsUser; - } -#endif - fmstate = create_foreign_modify(estate, - rri->ri_RelationDesc, - operation, - checkAsUser, - subplan, - query, - target_attrs, - has_returning, - retrieved_attrs, - server_id_list); - - rri->ri_FdwState = fmstate; -} - -static PreparedStmt * -prepare_foreign_modify_data_node(TsFdwModifyState *fmstate, TsFdwDataNodeState *fdw_data_node) -{ - AsyncRequest *req; - - Assert(NULL == fdw_data_node->p_stmt); - - req = async_request_send_prepare(fdw_data_node->conn, - fmstate->query, - stmt_params_num_params(fmstate->stmt_params)); - - Assert(NULL != req); - - /* - * Async request interface doesn't seem to allow waiting for multiple - * prepared statements in an AsyncRequestSet. Should fix async API - */ - return async_request_wait_prepared_statement(req); -} - -/* - * prepare_foreign_modify - * Establish a prepared statement for execution of INSERT/UPDATE/DELETE - */ -static void -prepare_foreign_modify(TsFdwModifyState *fmstate) -{ - int i; - - for (i = 0; i < fmstate->num_data_nodes; i++) - { - TsFdwDataNodeState *fdw_data_node = &fmstate->data_nodes[i]; - - fdw_data_node->p_stmt = prepare_foreign_modify_data_node(fmstate, fdw_data_node); - } - - fmstate->prepared = true; -} - -/* - * store_returning_result - * Store the result of a RETURNING clause - * - * On error, be sure to release the PGresult on the way out. Callers do not - * have PG_TRY blocks to ensure this happens. - */ -static void -store_returning_result(TsFdwModifyState *fmstate, TupleTableSlot *slot, PGresult *res) -{ - PG_TRY(); - { - HeapTuple newtup = - tuplefactory_make_tuple(fmstate->tupfactory, res, 0, PQbinaryTuples(res)); - - /* tuple will be deleted when it is cleared from the slot */ - ExecStoreHeapTuple(newtup, slot, true); - } - PG_CATCH(); - { - if (res) - PQclear(res); - PG_RE_THROW(); - } - PG_END_TRY(); -} - -static int -response_type(AttConvInMetadata *att_conv_metadata) -{ - if (!ts_guc_enable_connection_binary_data) - return FORMAT_TEXT; - return att_conv_metadata == NULL || att_conv_metadata->binary ? FORMAT_BINARY : FORMAT_TEXT; -} - -static void -fdw_chunk_update_stale_metadata(TsFdwModifyState *fmstate) -{ - List *all_data_nodes; - Relation rel = fmstate->rel; - - if (fmstate->num_all_data_nodes == fmstate->num_data_nodes) - return; - - if (fmstate->num_all_data_nodes > fmstate->num_data_nodes) - { - Chunk *chunk = ts_chunk_get_by_relid(rel->rd_id, true); - /* get filtered list */ - List *serveroids = get_chunk_data_nodes(rel->rd_id); - ListCell *lc; - bool chunk_is_locked = false; - - Assert(list_length(serveroids) == fmstate->num_data_nodes); - - all_data_nodes = ts_chunk_data_node_scan_by_chunk_id(chunk->fd.id, CurrentMemoryContext); - Assert(list_length(all_data_nodes) == fmstate->num_all_data_nodes); - - foreach (lc, all_data_nodes) - { - ChunkDataNode *cdn = lfirst(lc); - /* - * check if this DN is a part of serveroids. If not - * found in serveroids, then we need to remove this - * chunk id to node name mapping and also update the primary - * foreign server if necessary. It's possible that this metadata - * might have been already cleared earlier but we have no way of - * knowing that here. - */ - if (!list_member_oid(serveroids, cdn->foreign_server_oid) && - !list_member_oid(fmstate->stale_data_nodes, cdn->foreign_server_oid)) - { - if (!chunk_is_locked) - { - LockRelationOid(chunk->table_id, ShareUpdateExclusiveLock); - chunk_is_locked = true; - } - - chunk_update_foreign_server_if_needed(chunk, cdn->foreign_server_oid, false); - ts_chunk_data_node_delete_by_chunk_id_and_node_name(cdn->fd.chunk_id, - NameStr(cdn->fd.node_name)); - - /* append this DN serveroid to the list of DNs marked stale for this chunk */ - fmstate->stale_data_nodes = - lappend_oid(fmstate->stale_data_nodes, cdn->foreign_server_oid); - } - } - } -} - -TupleTableSlot * -fdw_exec_foreign_insert(TsFdwModifyState *fmstate, EState *estate, TupleTableSlot *slot, - TupleTableSlot *planslot) -{ - StmtParams *params = fmstate->stmt_params; - AsyncRequestSet *reqset; - AsyncResponseResult *rsp; - int n_rows = -1; - int i; - - if (!fmstate->prepared) - prepare_foreign_modify(fmstate); - - reqset = async_request_set_create(); - - stmt_params_convert_values(params, slot, NULL); - - for (i = 0; i < fmstate->num_data_nodes; i++) - { - TsFdwDataNodeState *fdw_data_node = &fmstate->data_nodes[i]; - AsyncRequest *req = NULL; - int type = response_type(fmstate->att_conv_metadata); - req = async_request_send_prepared_stmt_with_params(fdw_data_node->p_stmt, params, type); - Assert(NULL != req); - async_request_set_add(reqset, req); - } - - while ((rsp = async_request_set_wait_any_result(reqset))) - { - PGresult *res = async_response_result_get_pg_result(rsp); - - if (PQresultStatus(res) != (fmstate->has_returning ? PGRES_TUPLES_OK : PGRES_COMMAND_OK)) - async_response_report_error((AsyncResponse *) rsp, ERROR); - - /* - * If we insert into multiple replica chunks, we should only return - * the results from the first one - */ - if (n_rows == -1) - { - /* Check number of rows affected, and fetch RETURNING tuple if any */ - if (fmstate->has_returning) - { - n_rows = PQntuples(res); - - if (n_rows > 0) - store_returning_result(fmstate, slot, res); - } - else - n_rows = atoi(PQcmdTuples(res)); - } - - /* And clean up */ - async_response_result_close(rsp); - stmt_params_reset(params); - } - - /* - * Currently no way to do a deep cleanup of all request in the request - * set. The worry here is that since this runs in a per-chunk insert state - * memory context, the async API will accumulate a lot of cruft during - * inserts - */ - pfree(reqset); - - /* - * If rows are affected on DNs and a DN was excluded because of being - * "unavailable" then we need to update metadata on the AN to mark - * this chunk as "stale" for that "unavailable" DN - */ - if (n_rows > 0 && fmstate->num_all_data_nodes > fmstate->num_data_nodes) - fdw_chunk_update_stale_metadata(fmstate); - - /* Return NULL if nothing was inserted on the remote end */ - return (n_rows > 0) ? slot : NULL; -} - -/* - * Execute either an UPDATE or DELETE. - */ -TupleTableSlot * -fdw_exec_foreign_update_or_delete(TsFdwModifyState *fmstate, EState *estate, TupleTableSlot *slot, - TupleTableSlot *planslot, ModifyCommand cmd) -{ - StmtParams *params = fmstate->stmt_params; - AsyncRequestSet *reqset; - AsyncResponseResult *rsp; - Datum datum; - bool is_null; - int n_rows = -1; - int i; - - /* Set up the prepared statement on the data node, if we didn't yet */ - if (!fmstate->prepared) - prepare_foreign_modify(fmstate); - - /* Get the ctid that was passed up as a resjunk column */ - datum = ExecGetJunkAttribute(planslot, fmstate->ctid_attno, &is_null); - - /* shouldn't ever get a null result... */ - if (is_null) - elog(ERROR, "ctid is NULL"); - - stmt_params_convert_values(params, - (cmd == UPDATE_CMD ? slot : NULL), - (ItemPointer) DatumGetPointer(datum)); - reqset = async_request_set_create(); - - for (i = 0; i < fmstate->num_data_nodes; i++) - { - AsyncRequest *req = NULL; - TsFdwDataNodeState *fdw_data_node = &fmstate->data_nodes[i]; - int type = response_type(fmstate->att_conv_metadata); - req = async_request_send_prepared_stmt_with_params(fdw_data_node->p_stmt, params, type); - - Assert(NULL != req); - - async_request_attach_user_data(req, fdw_data_node); - async_request_set_add(reqset, req); - } - - while ((rsp = async_request_set_wait_any_result(reqset))) - { - PGresult *res = async_response_result_get_pg_result(rsp); - - if (PQresultStatus(res) != (fmstate->has_returning ? PGRES_TUPLES_OK : PGRES_COMMAND_OK)) - remote_result_elog(res, ERROR); - - /* - * If we update multiple replica chunks, we should only return the - * results from the first one. - */ - if (n_rows == -1) - { - /* Check number of rows affected, and fetch RETURNING tuple if any */ - if (fmstate->has_returning) - { - n_rows = PQntuples(res); - - if (n_rows > 0) - store_returning_result(fmstate, slot, res); - } - else - n_rows = atoi(PQcmdTuples(res)); - } - - /* And clean up */ - async_response_result_close(rsp); - } - - /* - * Currently no way to do a deep cleanup of all request in the request - * set. The worry here is that since this runs in a per-chunk insert state - * memory context, the async API will accumulate a lot of cruft during - * inserts - */ - pfree(reqset); - stmt_params_reset(params); - - /* - * If rows are affected on DNs and a DN was excluded because of being - * "unavailable" then we need to update metadata on the AN to mark - * this chunk as "stale" for that "unavailable" DN - */ - if (n_rows > 0 && fmstate->num_all_data_nodes > fmstate->num_data_nodes) - fdw_chunk_update_stale_metadata(fmstate); - - /* Return NULL if nothing was updated on the remote end */ - return (n_rows > 0) ? slot : NULL; -} - -/* - * finish_foreign_modify - * Release resources for a foreign insert/update/delete operation - */ -void -fdw_finish_foreign_modify(TsFdwModifyState *fmstate) -{ - int i; - - Assert(fmstate != NULL); - - for (i = 0; i < fmstate->num_data_nodes; i++) - { - TsFdwDataNodeState *fdw_data_node = &fmstate->data_nodes[i]; - - /* If we created a prepared statement, destroy it */ - if (NULL != fdw_data_node->p_stmt) - { - prepared_stmt_close(fdw_data_node->p_stmt); - fdw_data_node->p_stmt = NULL; - } - - fdw_data_node->conn = NULL; - } - - stmt_params_free(fmstate->stmt_params); -} - -void -fdw_explain_modify(PlanState *ps, ResultRelInfo *rri, List *fdw_private, int subplan_index, - ExplainState *es) -{ - if (es->verbose) - { - const char *sql = strVal(list_nth(fdw_private, FdwModifyPrivateUpdateSql)); - - ExplainPropertyText("Remote SQL", sql, es); - } -} diff --git a/tsl/src/fdw/modify_exec.h b/tsl/src/fdw/modify_exec.h deleted file mode 100644 index 39263e32e45..00000000000 --- a/tsl/src/fdw/modify_exec.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#pragma once - -#include -#include -#include -#include -#include - -typedef struct TsFdwModifyState TsFdwModifyState; - -typedef enum ModifyCommand -{ - UPDATE_CMD, - DELETE_CMD, -} ModifyCommand; - -extern void fdw_begin_foreign_modify(PlanState *pstate, ResultRelInfo *rri, CmdType operation, - List *fdw_private, Plan *subplan); - -extern TupleTableSlot *fdw_exec_foreign_insert(TsFdwModifyState *fmstate, EState *estate, - TupleTableSlot *slot, TupleTableSlot *planslot); -extern TupleTableSlot *fdw_exec_foreign_update_or_delete(TsFdwModifyState *fmstate, EState *estate, - TupleTableSlot *slot, - TupleTableSlot *planslot, - ModifyCommand cmd); -extern void fdw_finish_foreign_modify(TsFdwModifyState *fmstate); -extern void fdw_explain_modify(PlanState *ps, ResultRelInfo *rri, List *fdw_private, - int subplan_index, ExplainState *es); diff --git a/tsl/src/fdw/modify_plan.c b/tsl/src/fdw/modify_plan.c deleted file mode 100644 index 5a9dc8fafd2..00000000000 --- a/tsl/src/fdw/modify_plan.c +++ /dev/null @@ -1,233 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#include -#include -#include -#include -#include - -#include -#include "deparse.h" -#include "errors.h" -#include "modify_plan.h" -#include "ts_catalog/chunk_data_node.h" - -static List * -get_insert_attrs(Relation rel) -{ - TupleDesc tupdesc = RelationGetDescr(rel); - List *attrs = NIL; - int i; - - for (i = 0; i < tupdesc->natts; i++) - { - Form_pg_attribute attr = TupleDescAttr(tupdesc, i); - - if (!attr->attisdropped) - attrs = lappend_int(attrs, AttrOffsetGetAttrNumber(i)); - } - - return attrs; -} - -static List * -get_update_attrs(Bitmapset *updatedCols) -{ - List *attrs = NIL; - int col = -1; - - while ((col = bms_next_member(updatedCols, col)) >= 0) - { - /* bit numbers are offset by FirstLowInvalidHeapAttributeNumber */ - AttrNumber attno = col + FirstLowInvalidHeapAttributeNumber; - - if (attno <= InvalidAttrNumber) /* shouldn't happen */ - elog(ERROR, "system-column update is not supported"); - - attrs = lappend_int(attrs, attno); - } - - return attrs; -} - -/* get a list of "live" DNs associated with this chunk */ -List * -get_chunk_data_nodes(Oid relid) -{ - int32 chunk_id = ts_chunk_get_id_by_relid(relid); - Assert(chunk_id != 0); - - List *chunk_data_nodes = - ts_chunk_data_node_scan_by_chunk_id_filter(chunk_id, CurrentMemoryContext); - List *serveroids = NIL; - ListCell *lc; - - /* check that alteast one data node is available for this chunk */ - if (chunk_data_nodes == NIL) - { - Hypertable *ht = ts_hypertable_get_by_id(ts_chunk_get_hypertable_id_by_reloid(relid)); - - ereport(ERROR, - (errcode(ERRCODE_TS_INSUFFICIENT_NUM_DATA_NODES), - (errmsg("insufficient number of available data nodes"), - errhint("Increase the number of available data nodes on hypertable \"%s\".", - get_rel_name(ht->main_table_relid))))); - } - - foreach (lc, chunk_data_nodes) - { - ChunkDataNode *data_node = lfirst(lc); - - serveroids = lappend_oid(serveroids, data_node->foreign_server_oid); - } - - list_free(chunk_data_nodes); - - return serveroids; -} - -/* - * Plan INSERT, UPDATE, and DELETE. - * - * The main task of this function is to generate (deparse) the SQL statement - * for the corresponding tables on data nodes. - * - * If the planning involves a hypertable, the function is called differently - * depending on the command: - * - * 1. INSERT - called only once during hypertable planning and the given - * result relation is the hypertable root relation. This is due to - * TimescaleDBs unique INSERT path. We'd like to plan the INSERT as if it - * would happen on the root of the hypertable. This is useful because INSERTs - * should occur via the top-level hypertables on the data nodes - * (preferrably batched), and not once per individual remote chunk - * (inefficient and won't go through the standard INSERT path on the data - * node). - * - * 2. UPDATE and DELETE - called once per chunk and the given result relation - * is the chunk relation. - * - * For non-hypertables, which are foreign tables using the timescaledb_fdw, - * this function is called the way it normally would be for the FDW API, i.e., - * once during planning. - * - * For the TimescaleDB insert path, we actually call - * this function only once on the hypertable's root table instead of once per - * chunk. This is because we want to send INSERT statements to each remote - * hypertable rather than each remote chunk. - * - * UPDATEs and DELETEs work slightly different since we have no "optimized" - * path for such operations. Instead, they happen once per chunk. - */ -List * -fdw_plan_foreign_modify(PlannerInfo *root, ModifyTable *plan, Index result_relation, - int subplan_index) -{ - CmdType operation = plan->operation; - RangeTblEntry *rte = planner_rt_fetch(result_relation, root); - Relation rel; - StringInfoData sql; - List *returning_list = NIL; - List *retrieved_attrs = NIL; - List *target_attrs = NIL; - List *data_nodes = NIL; - bool do_nothing = false; - - initStringInfo(&sql); - - /* - * Extract the relevant RETURNING list if any. - */ - if (plan->returningLists) - returning_list = (List *) list_nth(plan->returningLists, subplan_index); - - /* - * ON CONFLICT DO UPDATE and DO NOTHING case with inference specification - * should have already been rejected in the optimizer, as presently there - * is no way to recognize an arbiter index on a foreign table. Only DO - * NOTHING is supported without an inference specification. - */ - if (plan->onConflictAction == ONCONFLICT_NOTHING) - do_nothing = true; - else if (plan->onConflictAction != ONCONFLICT_NONE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("ON CONFLICT DO UPDATE not supported" - " on distributed hypertables"))); - - /* - * Core code already has some lock on each rel being planned, so we can - * use NoLock here. - */ - rel = table_open(rte->relid, NoLock); - - /* - * Construct the SQL command string - * - * In an INSERT, we transmit all columns that are defined in the foreign - * table. In an UPDATE, we transmit only columns that were explicitly - * targets of the UPDATE, so as to avoid unnecessary data transmission. - * (We can't do that for INSERT since we would miss sending default values - * for columns not listed in the source statement.) - */ - switch (operation) - { - case CMD_INSERT: - target_attrs = get_insert_attrs(rel); - deparseInsertSql(&sql, - rte, - result_relation, - rel, - target_attrs, - 1, - do_nothing, - returning_list, - &retrieved_attrs); - break; - case CMD_UPDATE: - { -#if PG16_LT - Bitmapset *updatedCols = rte->updatedCols; -#else - Bitmapset *updatedCols = NULL; - if (rte->perminfoindex > 0) - { - RTEPermissionInfo *perminfo = getRTEPermissionInfo(root->parse->rteperminfos, rte); - updatedCols = perminfo->updatedCols; - } -#endif - target_attrs = get_update_attrs(updatedCols); - deparseUpdateSql(&sql, - rte, - result_relation, - rel, - target_attrs, - returning_list, - &retrieved_attrs); - data_nodes = get_chunk_data_nodes(rel->rd_id); - break; - } - case CMD_DELETE: - deparseDeleteSql(&sql, rte, result_relation, rel, returning_list, &retrieved_attrs); - data_nodes = get_chunk_data_nodes(rel->rd_id); - break; - default: - elog(ERROR, "unexpected operation: %d", (int) operation); - break; - } - - table_close(rel, NoLock); - - /* - * Build the fdw_private list that will be available to the executor. - * Items in the list must match enum FdwModifyPrivateIndex, above. - */ - return list_make5(makeString(sql.data), - target_attrs, - makeInteger((retrieved_attrs != NIL)), - retrieved_attrs, - data_nodes); -} diff --git a/tsl/src/fdw/modify_plan.h b/tsl/src/fdw/modify_plan.h deleted file mode 100644 index 54babea9f37..00000000000 --- a/tsl/src/fdw/modify_plan.h +++ /dev/null @@ -1,12 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#pragma once - -#include - -extern List *fdw_plan_foreign_modify(PlannerInfo *root, ModifyTable *plan, Index result_relation, - int subplan_index); -extern List *get_chunk_data_nodes(Oid relid); diff --git a/tsl/src/fdw/option.c b/tsl/src/fdw/option.c deleted file mode 100644 index 43f88edcf98..00000000000 --- a/tsl/src/fdw/option.c +++ /dev/null @@ -1,359 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ - -/* - * This file contains source code that was copied and/or modified from - * the PostgreSQL database, which is licensed under the open-source - * PostgreSQL License. Please see the NOTICE at the top level - * directory for a copy of the PostgreSQL License. - */ -/*------------------------------------------------------------------------- - * - * option.c - * FDW option handling for timescaledb_fdw - * - * Portions Copyright (c) 2012-2018, PostgreSQL Global Development Group - * - *------------------------------------------------------------------------- - */ -#include - -#include "scan_plan.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "option.h" -#include "chunk.h" - -/* - * Describes the valid options for objects that this wrapper uses. - */ -typedef struct TsFdwOption -{ - const char *keyword; - Oid optcontext; /* OID of catalog in which option may appear */ -} TsFdwOption; - -/* - * Valid options for timescaledb_fdw. - * Allocated and filled in init_ts_fdw_options - */ -static TsFdwOption *timescaledb_fdw_options = NULL; - -/* - * Helper functions - */ -static void init_ts_fdw_options(void); -static bool is_valid_option(const char *keyword, Oid context); - -/* - * Validate the generic options given to a FOREIGN DATA WRAPPER, SERVER, - * USER MAPPING or FOREIGN TABLE that uses timescaledb_fdw. - * - * Raise an ERROR if the option or its value is considered invalid. - */ -void -option_validate(List *options_list, Oid catalog) -{ - ListCell *cell; - - /* Build our options lists if we didn't yet. */ - init_ts_fdw_options(); - - /* - * Check that only options supported by timescaledb_fdw, and allowed for - * the current object type, are given. - */ - foreach (cell, options_list) - { - DefElem *def = lfirst_node(DefElem, cell); - - if (!is_valid_option(def->defname, catalog)) - { - /* - * Unknown option specified, complain about it. Provide a hint - * with list of valid options for the object. - */ - TsFdwOption *opt; - StringInfoData buf; - - initStringInfo(&buf); - for (opt = timescaledb_fdw_options; opt->keyword; opt++) - { - if (catalog == opt->optcontext) - appendStringInfo(&buf, "%s%s", (buf.len > 0) ? ", " : "", opt->keyword); - } - - ereport(ERROR, - (errcode(ERRCODE_FDW_INVALID_OPTION_NAME), - errmsg("invalid option \"%s\"", def->defname), - errhint("Valid options in this context are: %s", buf.data))); - } - - /* - * Validate option value, when we can do so without any context. - */ - if (strcmp(def->defname, "fdw_startup_cost") == 0 || - strcmp(def->defname, "fdw_tuple_cost") == 0) - { - /* these must have a non-negative numeric value */ - double val; - char *endp; - - val = strtod(defGetString(def), &endp); - if (*endp || val < 0) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("%s requires a non-negative numeric value", def->defname))); - } - else if (strcmp(def->defname, "extensions") == 0) - { - /* check list syntax, warn about uninstalled extensions */ - (void) option_extract_extension_list(defGetString(def), true); - } - else if (strcmp(def->defname, "fetch_size") == 0) - { - int fetch_size; - - fetch_size = strtol(defGetString(def), NULL, 10); - if (fetch_size <= 0) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("%s requires a non-zero and positive integer value", - def->defname))); - } - else if (strcmp(def->defname, "available") == 0) - { - /* This will throw an error if not a boolean */ - defGetBoolean(def); - } - else if (strcmp(def->defname, "reference_tables") == 0) - { - /* check and store list, warn about non existing tables */ - (void) option_extract_join_ref_table_list(defGetString(def)); - } - else if (strcmp(def->defname, "copy_rows_per_message") == 0) - { - int copy_rows_per_message; - - copy_rows_per_message = strtol(defGetString(def), NULL, 10); - - if (copy_rows_per_message <= 0) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("%s requires a non-zero and positive integer value", - def->defname))); - } - } -} - -/* - * Initialize option lists. - */ -static void -init_ts_fdw_options(void) -{ - /* non-libpq FDW-specific FDW options */ - static const TsFdwOption non_libpq_options[] = { - /* cost factors */ - { "fdw_startup_cost", ForeignDataWrapperRelationId }, - { "fdw_startup_cost", ForeignServerRelationId }, - { "fdw_tuple_cost", ForeignDataWrapperRelationId }, - { "fdw_tuple_cost", ForeignServerRelationId }, - /* shippable extensions */ - { "extensions", ForeignDataWrapperRelationId }, - { "extensions", ForeignServerRelationId }, - /* fetch_size is available on both foreign data wrapper and server */ - { "fetch_size", ForeignDataWrapperRelationId }, - { "fetch_size", ForeignServerRelationId }, - { "available", ForeignServerRelationId }, - /* join reference tables */ - { "reference_tables", ForeignDataWrapperRelationId }, - /* Rows per CopyData when ingesting with COPY */ - { "copy_rows_per_message", ForeignDataWrapperRelationId }, - { NULL, InvalidOid } - }; - - /* Prevent redundant initialization. */ - if (timescaledb_fdw_options) - return; - - /* - * Construct an array which consists of the FDW-specific options. - * - * We use plain malloc here to allocate timescaledb_fdw_options because it - * lives as long as the backend process does. - */ - timescaledb_fdw_options = - (TsFdwOption *) malloc(sizeof(TsFdwOption) * sizeof(non_libpq_options)); - - if (timescaledb_fdw_options == NULL) - ereport(ERROR, (errcode(ERRCODE_FDW_OUT_OF_MEMORY), errmsg("out of memory"))); - - /* Append FDW-specific options and dummy terminator. */ - memcpy(timescaledb_fdw_options, non_libpq_options, sizeof(non_libpq_options)); -} - -/* - * Check whether the given option is one of the valid timescaledb_fdw options. - * context is the Oid of the catalog holding the object the option is for. - */ -static bool -is_valid_option(const char *keyword, Oid context) -{ - TsFdwOption *opt; - - Assert(timescaledb_fdw_options); /* must be initialized already */ - - switch (remote_connection_option_type(keyword)) - { - case CONN_OPTION_TYPE_NODE: - return true; - case CONN_OPTION_TYPE_USER: - return true; - case CONN_OPTION_TYPE_NONE: - for (opt = timescaledb_fdw_options; opt->keyword; opt++) - { - if (context == opt->optcontext && strcmp(opt->keyword, keyword) == 0) - return true; - } - } - - return false; -} - -/* - * Parse a comma-separated string and return a List of the OIDs of the - * extensions named in the string. If any names in the list cannot be found, - * report a warning if warn_on_missing is true, else just silently ignore - * them. - */ -List * -option_extract_extension_list(const char *extensions_string, bool warn_on_missing) -{ - List *extension_oids = NIL; - List *extlist; - ListCell *lc; - - /* SplitIdentifierString scribbles on its input, so pstrdup first */ - if (!SplitIdentifierString(pstrdup(extensions_string), ',', &extlist)) - { - /* syntax error in name list */ - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("parameter \"%s\" must be a list of extension names", "extensions"))); - } - - foreach (lc, extlist) - { - const char *extension_name = (const char *) lfirst(lc); - Oid extension_oid = get_extension_oid(extension_name, true); - - if (OidIsValid(extension_oid)) - extension_oids = lappend_oid(extension_oids, extension_oid); - else if (warn_on_missing) - ereport(WARNING, - (errcode(ERRCODE_UNDEFINED_OBJECT), - errmsg("extension \"%s\" is not installed", extension_name))); - } - - list_free(extlist); - - return extension_oids; -} - -bool -option_get_from_options_list_int(List *options, const char *optionname, int *value) -{ - ListCell *lc; - bool found = false; - - Assert(NULL != value); - - foreach (lc, options) - { - DefElem *def = lfirst_node(DefElem, lc); - - if (strcmp(def->defname, optionname) == 0) - { - *value = strtol(defGetString(def), NULL, 10); - found = true; - break; - } - } - - return found; -} - -List * -option_extract_join_ref_table_list(const char *join_tables) -{ - List *ref_table_oids = NIL; - List *ref_table_list; - ListCell *lc; - - /* SplitIdentifierString scribbles on its input, so pstrdup first */ - if (!SplitIdentifierString(pstrdup(join_tables), ',', &ref_table_list)) - { - /* syntax error in name list */ - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("parameter \"reference_tables\" must be a comma-separated list of " - "reference table names"))); - } - - foreach (lc, ref_table_list) - { - char *tablename = (char *) lfirst(lc); - -#if PG16_LT - RangeVar *rangevar = makeRangeVarFromNameList(stringToQualifiedNameList(tablename)); -#else - RangeVar *rangevar = makeRangeVarFromNameList(stringToQualifiedNameList(tablename, NULL)); -#endif - - Oid relOid = RangeVarGetRelidExtended(rangevar, - AccessShareLock, - RVR_MISSING_OK, - NULL /* callback */, - NULL /* callback args*/); - - if (!OidIsValid(relOid)) - { - ereport(ERROR, - (errcode(ERRCODE_INVALID_TABLE_DEFINITION), - errmsg("table \"%s\" does not exist", tablename))); - } - - /* Validate the relation type */ - Relation rel = table_open(relOid, NoLock); - - if (rel->rd_rel->relkind != RELKIND_RELATION) - ereport(ERROR, - (errcode(ERRCODE_INVALID_TABLE_DEFINITION), - errmsg("relation \"%s\" is not an ordinary table. Only ordinary tables can be " - "used as reference tables", - tablename))); - - ref_table_oids = lappend_oid(ref_table_oids, relOid); - table_close(rel, NoLock); - } - - list_free(ref_table_list); - - return ref_table_oids; -} diff --git a/tsl/src/fdw/option.h b/tsl/src/fdw/option.h deleted file mode 100644 index a565d18865d..00000000000 --- a/tsl/src/fdw/option.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#pragma once - -#include - -extern void option_validate(List *options_list, Oid catalog); -extern List *option_extract_extension_list(const char *extensions_string, bool warn_on_missing); -extern List *option_extract_join_ref_table_list(const char *join_tables); -extern bool option_get_from_options_list_int(List *options, const char *optionname, int *value); diff --git a/tsl/src/fdw/relinfo.c b/tsl/src/fdw/relinfo.c deleted file mode 100644 index 8e34a906537..00000000000 --- a/tsl/src/fdw/relinfo.c +++ /dev/null @@ -1,543 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "cache.h" -#include "chunk.h" -#include "chunk_adaptive.h" -#include "deparse.h" -#include "dimension.h" -#include "errors.h" -#include "estimate.h" -#include "extension.h" -#include "hypercube.h" -#include "hypertable.h" -#include "hypertable_cache.h" -#include "option.h" -#include "relinfo.h" -#include "remote/connection.h" -#include "scan_exec.h" -#include "planner.h" - -/* - * Default CPU cost to process 1 row (above and beyond cpu_tuple_cost). - * - * Note that postgres_fdw sets this to 0.01, but we want to penalize - * transferring many tuples in order to make it more attractive to push down - * aggregates and thus transfer/process less tuples. Postgres settings do not - * make much sense, because transferring the tuples over network is surely - * more expensive than transferring them to the local parallel executor. - */ -#define DEFAULT_FDW_TUPLE_COST (DEFAULT_PARALLEL_TUPLE_COST * 10) - -/* Default CPU cost to start up a foreign query. */ -#define DEFAULT_FDW_STARTUP_COST (DEFAULT_PARALLEL_SETUP_COST * 10) - -#define DEFAULT_FDW_FETCH_SIZE 10000 - -#define DEFAULT_CHUNK_LOOKBACK_WINDOW 10 - -/* - * Parse options from the foreign data wrapper and foreign server and apply - * them to fpinfo. The server options take precedence over the data wrapper - * ones. - * - * New options might also require tweaking merge_fdw_options(). - */ -void -apply_fdw_and_server_options(TsFdwRelInfo *fpinfo) -{ - ListCell *lc; - ForeignDataWrapper *fdw = GetForeignDataWrapper(fpinfo->server->fdwid); - List *options[] = { fdw->options, fpinfo->server->options }; - - for (size_t i = 0; i < lengthof(options); i++) - { - foreach (lc, options[i]) - { - DefElem *def = (DefElem *) lfirst(lc); - - if (strcmp(def->defname, "fdw_startup_cost") == 0) - fpinfo->fdw_startup_cost = strtod(defGetString(def), NULL); - else if (strcmp(def->defname, "fdw_tuple_cost") == 0) - fpinfo->fdw_tuple_cost = strtod(defGetString(def), NULL); - else if (strcmp(def->defname, "extensions") == 0) - fpinfo->shippable_extensions = - list_concat(fpinfo->shippable_extensions, - option_extract_extension_list(defGetString(def), false)); - else if (strcmp(def->defname, "fetch_size") == 0) - fpinfo->fetch_size = strtol(defGetString(def), NULL, 10); - else if (strcmp(def->defname, "reference_tables") == 0) - { - /* This option can only be defined per FDW. So, no list_concat of - * FDW and server options is needed. */ - fpinfo->join_reference_tables = - option_extract_join_ref_table_list(defGetString(def)); - } - } - } -} - -TsFdwRelInfo * -fdw_relinfo_get(RelOptInfo *rel) -{ - TimescaleDBPrivate *rel_private = rel->fdw_private; - Assert(rel_private); - TsFdwRelInfo *fdw_relation_info = rel_private->fdw_relation_info; - - /* - * This function is expected to return either null or a fully initialized - * fdw_relation_info struct. - */ - Assert(!fdw_relation_info || fdw_relation_info->type != TS_FDW_RELINFO_UNINITIALIZED); - - return fdw_relation_info; -} - -TsFdwRelInfo * -fdw_relinfo_alloc_or_get(RelOptInfo *rel) -{ - TimescaleDBPrivate *rel_private = rel->fdw_private; - if (rel_private == NULL) - { - rel_private = ts_create_private_reloptinfo(rel); - } - - if (rel_private->fdw_relation_info == NULL) - { - rel_private->fdw_relation_info = (TsFdwRelInfo *) palloc0(sizeof(TsFdwRelInfo)); - } - - return rel_private->fdw_relation_info; -} - -static const double FILL_FACTOR_CURRENT_CHUNK = 0.5; -static const double FILL_FACTOR_HISTORICAL_CHUNK = 1; - -static const DimensionSlice * -get_chunk_time_slice(const Chunk *chunk, const Hyperspace *space) -{ - int32 time_dim_id = hyperspace_get_open_dimension(space, 0)->fd.id; - return ts_hypercube_get_slice_by_dimension_id(chunk->cube, time_dim_id); -} - -/* - * Sums of slices belonging to closed dimensions - */ -static int -get_total_number_of_slices(Hyperspace *space) -{ - int dim_idx; - int total_slices = 0; - - for (dim_idx = 0; dim_idx < space->num_dimensions; dim_idx++) - { - Dimension *dim = &space->dimensions[dim_idx]; - if (IS_CLOSED_DIMENSION(dim)) - total_slices += dim->fd.num_slices; - } - - return total_slices; -} - -/* - * Estimate fill factor for the chunks that don't have ANALYZE statistics. - * Fill factor values are between 0 and 1. It's an indication of how much data is - * in the chunk, expressed as a fraction of its estimated final size. - * - * Fill factor estimation assumes that data written is 'recent' in regards to - * time dimension (eg. almost real-time). For the case when writing historical - * data, given estimates might be more off as we assume that historical chunks - * have fill factor 1. Even for writing historical data we might not be totally - * wrong since most probably data has monotonically increasing time. - * - * Estimation handles two possible hypertable configurations: - * 1. time dimension is of timestamp type - * 2. time dimension is of integer type. - * - * If hypertable uses timestamp type to partition data then there are three - * possible scenarios here: we are beyond chunk end time (historical chunk), we - * are somewhere in between chunk time boundaries (current chunk) or chunk start - * time is in the future (highly unlikely, also treated as current chunk). - * - * For integer type we assume that all chunks w/o ANALYZE stats are current. - * We could use the user-specified integer time function here - * (set_integer_now_func()), but this logic is a fallback so we're keeping it - * simple for now. - * - * Earlier, this function used chunk ids to guess which chunks are created later, - * and treated such chunks as current. Unfortunately, the chunk ids are global - * for all hypertables, so this approach didn't really work if there was more - * than one hypertable. - */ -static double -estimate_chunk_fillfactor(Chunk *chunk, Hyperspace *space) -{ - const Dimension *time_dim = hyperspace_get_open_dimension(space, 0); - const DimensionSlice *time_slice = get_chunk_time_slice(chunk, space); - Oid time_dim_type = ts_dimension_get_partition_type(time_dim); - - if (IS_TIMESTAMP_TYPE(time_dim_type)) - { - TimestampTz now = GetSQLCurrentTimestamp(-1); -#ifdef TS_DEBUG - if (ts_current_timestamp_override_value >= 0) - now = ts_current_timestamp_override_value; -#endif - int64 now_internal_time = - ts_time_value_to_internal(TimestampTzGetDatum(now), TIMESTAMPTZOID); - - /* if we are beyond end range then chunk can possibly be totally filled */ - if (time_slice->fd.range_end <= now_internal_time) - { - /* - * Current time is later than the end of the chunk time range, which - * means it is a historical chunk. - */ - return FILL_FACTOR_HISTORICAL_CHUNK; - } - - /* - * The chunk time range starts later than current time, so we treat it - * as a current chunk. - */ - if (time_slice->fd.range_start >= now_internal_time) - return FILL_FACTOR_CURRENT_CHUNK; - - /* - * Current time falls within chunk time constraints. The fill factor is - * interpolated linearly based on where the current time is inside the - * range, from 'current chunk fill factor' at the start of the range, to - * 'historical chunk fill factor' at the end of the range. - */ - double elapsed = (now_internal_time - time_slice->fd.range_start); - double interval = (time_slice->fd.range_end - time_slice->fd.range_start); - Assert(interval > 0); - Assert(elapsed <= interval); - - Assert(FILL_FACTOR_HISTORICAL_CHUNK >= FILL_FACTOR_CURRENT_CHUNK); - double fill_factor = - FILL_FACTOR_CURRENT_CHUNK + - (FILL_FACTOR_HISTORICAL_CHUNK - FILL_FACTOR_CURRENT_CHUNK) * (elapsed / interval); - - Assert(fill_factor >= 0.); - Assert(fill_factor <= 1.); - return fill_factor; - } - - /* - * This chunk doesn't have the ANALYZE data, so it's more likely to be a - * recently created, current chunk, not an old historical chunk. - */ - return FILL_FACTOR_CURRENT_CHUNK; -} - -static void -estimate_tuples_and_pages_using_shared_buffers(PlannerInfo *root, Hypertable *ht, RelOptInfo *rel) -{ - int64 chunk_size_estimate = ts_chunk_calculate_initial_chunk_target_size(); - const int result_width = rel->reltarget->width; - - if (ht != NULL) - { - int total_slices = get_total_number_of_slices(ht->space); - if (total_slices > 0) - chunk_size_estimate /= total_slices; - } - else - /* half-size seems to be the safest bet */ - chunk_size_estimate /= 2; - - rel->tuples = chunk_size_estimate / (result_width + MAXALIGN(SizeofHeapTupleHeader)); - rel->pages = chunk_size_estimate / BLCKSZ; -} - -/* - * Estimate the chunk size if we don't have ANALYZE statistics, and update the - * moving average of chunk sizes used for estimation. - */ -static void -estimate_chunk_size(PlannerInfo *root, RelOptInfo *chunk_rel) -{ - const int parent_relid = bms_next_member(chunk_rel->top_parent_relids, -1); - if (parent_relid < 0) - { - /* - * In some cases (e.g., UPDATE stmt) top_parent_relids is not set so the - * best we can do is using shared buffers size without partitioning - * information. Since updates are not something we generaly optimize - * for, this should be fine. - */ - if (chunk_rel->pages == 0) - { - /* Can't have nonzero tuples in zero pages */ - Assert(chunk_rel->tuples <= 0); - estimate_tuples_and_pages_using_shared_buffers(root, NULL, chunk_rel); - } - return; - } - - /* - * Check if we have the chunk info cached for this chunk relation. For - * SELECTs, we should have cached it when we performed chunk exclusion. - * The UPDATEs use a completely different code path that doesn't do chunk - * exclusion, so we'll have to look up this info now. - */ - TimescaleDBPrivate *chunk_private = ts_get_private_reloptinfo(chunk_rel); - if (chunk_private->cached_chunk_struct == NULL) - { - RangeTblEntry *chunk_rte = planner_rt_fetch(chunk_rel->relid, root); - chunk_private->cached_chunk_struct = - ts_chunk_get_by_relid(chunk_rte->relid, true /* fail_if_not_found */); - } - - RelOptInfo *parent_info = root->simple_rel_array[parent_relid]; - /* - * The parent FdwRelInfo might not be allocated and initialized here, because - * it happens later in tsl_set_pathlist callback. We don't care about this - * because we only need it for chunk size estimates, so allocate it ourselves. - */ - TsFdwRelInfo *parent_private = fdw_relinfo_alloc_or_get(parent_info); - RangeTblEntry *parent_rte = planner_rt_fetch(parent_relid, root); - Cache *hcache = ts_hypertable_cache_pin(); - Hypertable *ht = ts_hypertable_cache_get_entry(hcache, parent_rte->relid, CACHE_FLAG_NONE); - Hyperspace *hyperspace = ht->space; - - const double fillfactor = - estimate_chunk_fillfactor(chunk_private->cached_chunk_struct, hyperspace); - - /* Can't have nonzero tuples in zero pages */ - Assert(parent_private->average_chunk_pages != 0 || parent_private->average_chunk_tuples <= 0); - Assert(chunk_rel->pages != 0 || chunk_rel->tuples <= 0); - - const bool have_chunk_statistics = chunk_rel->pages != 0; - const bool have_moving_average = - parent_private->average_chunk_pages != 0 || parent_private->average_chunk_tuples > 0; - if (!have_chunk_statistics) - { - /* - * If we don't have the statistics from ANALYZE for this chunk, - * use the moving average of chunk sizes. If we don't have even - * that, use an estimate based on the default shared buffers - * size for a chunk. - */ - if (have_moving_average) - { - chunk_rel->pages = parent_private->average_chunk_pages * fillfactor; - chunk_rel->tuples = parent_private->average_chunk_tuples * fillfactor; - } - else - { - estimate_tuples_and_pages_using_shared_buffers(root, ht, chunk_rel); - chunk_rel->pages *= fillfactor; - chunk_rel->tuples *= fillfactor; - } - } - - if (!have_moving_average) - { - /* - * Initialize the moving average data if we don't have any yet. - * Use even a bad estimate from shared buffers, to save on - * recalculating the same bad estimate for the subsequent chunks - * that are likely to not have the statistics as well. - */ - parent_private->average_chunk_pages = chunk_rel->pages; - parent_private->average_chunk_tuples = chunk_rel->tuples; - } - else if (have_chunk_statistics) - { - /* - * We have the moving average of chunk sizes and a good estimate - * of this chunk size from ANALYZE. Update the moving average. - */ - const double f = 0.1; - parent_private->average_chunk_pages = - (1 - f) * parent_private->average_chunk_pages + f * chunk_rel->pages / fillfactor; - parent_private->average_chunk_tuples = - (1 - f) * parent_private->average_chunk_tuples + f * chunk_rel->tuples / fillfactor; - } - else - { - /* - * Already have some moving average data, but don't have good - * statistics for this chunk. Do nothing. - */ - } - - ts_cache_release(hcache); -} - -TsFdwRelInfo * -fdw_relinfo_create(PlannerInfo *root, RelOptInfo *rel, Oid server_oid, Oid local_table_id, - TsFdwRelInfoType type) -{ - TsFdwRelInfo *fpinfo; - ListCell *lc; - RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); - const char *refname; - - /* - * We use TsFdwRelInfo to pass various information to subsequent - * functions. It might be already partially initialized for a data node - * hypertable, because we use it to maintain the chunk size estimates when - * planning. - */ - fpinfo = fdw_relinfo_alloc_or_get(rel); - Assert(fpinfo->type == TS_FDW_RELINFO_UNINITIALIZED || fpinfo->type == TS_FDW_RELINFO_JOIN || - fpinfo->type == type); - fpinfo->type = type; - - if (type == TS_FDW_RELINFO_UNINITIALIZED || type == TS_FDW_RELINFO_JOIN) - return fpinfo; - - /* - * Set the name of relation in fpinfo, while we are constructing it here. - * It will be used to build the string describing the join relation in - * EXPLAIN output. We can't know whether VERBOSE option is specified or - * not, so always schema-qualify the foreign table name. - */ - - fpinfo->relation_name = makeStringInfo(); - - if (rte != NULL) - { - refname = rte->eref->aliasname; - appendStringInfo(fpinfo->relation_name, - "%s.%s", - quote_identifier(get_namespace_name(get_rel_namespace(rte->relid))), - quote_identifier(get_rel_name(rte->relid))); - if (*refname && strcmp(refname, get_rel_name(rte->relid)) != 0) - appendStringInfo(fpinfo->relation_name, " %s", quote_identifier(rte->eref->aliasname)); - } - - /* - * Set the default values for startup cost, tuple cost, fetch size and shippable_extensions. - * Note that the per-server settings (applied in apply_fdw_and_server_options()) can override - * these values. - */ - fpinfo->fdw_startup_cost = DEFAULT_FDW_STARTUP_COST; - fpinfo->fdw_tuple_cost = DEFAULT_FDW_TUPLE_COST; - fpinfo->fetch_size = DEFAULT_FDW_FETCH_SIZE; - fpinfo->shippable_extensions = list_make1_oid(ts_extension_get_oid()); - - /* Look up foreign-table catalog info. */ - if (OidIsValid(server_oid)) - { - fpinfo->server = GetForeignServer(server_oid); - apply_fdw_and_server_options(fpinfo); - } - - /* - * Identify which baserestrictinfo clauses can be sent to the data - * node and which can't. - */ - classify_conditions(root, - rel, - rel->baserestrictinfo, - &fpinfo->remote_conds, - &fpinfo->local_conds); - - if (type == TS_FDW_RELINFO_HYPERTABLE) - { - /* nothing more to do for hypertables */ - Assert(!OidIsValid(server_oid)); - - return fpinfo; - } - /* Base foreign tables need to be pushed down always. */ - fpinfo->pushdown_safe = true; - - /* - * Identify which attributes will need to be retrieved from the data - * node. These include all attrs needed for joins or final output, plus - * all attrs used in the local_conds. (Note: if we end up using a - * parameterized scan, it's possible that some of the join clauses will be - * sent to the remote and thus we wouldn't really need to retrieve the - * columns used in them. Doesn't seem worth detecting that case though.) - */ - fpinfo->attrs_used = NULL; - pull_varattnos((Node *) rel->reltarget->exprs, rel->relid, &fpinfo->attrs_used); - foreach (lc, fpinfo->local_conds) - { - RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); - - pull_varattnos((Node *) rinfo->clause, rel->relid, &fpinfo->attrs_used); - } - - /* - * Compute the selectivity and cost of the local and remote conditions, so - * that we don't have to do it over again for each path. The best we can do - * for these conditions is to estimate selectivity on the basis of local - * statistics. - */ - fpinfo->local_conds_sel = - clauselist_selectivity(root, fpinfo->local_conds, rel->relid, JOIN_INNER, NULL); - - cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root); - - fpinfo->remote_conds_sel = - clauselist_selectivity(root, fpinfo->remote_conds, rel->relid, JOIN_INNER, NULL); - - cost_qual_eval(&fpinfo->remote_conds_cost, fpinfo->remote_conds, root); - - /* - * Set cached relation costs to some negative value, so that we can detect - * when they are set to some sensible costs during one (usually the first) - * of the calls to fdw_estimate_path_cost_size(). - */ - fpinfo->rel_startup_cost = -1; - fpinfo->rel_total_cost = -1; - fpinfo->rel_retrieved_rows = -1; - - if (type == TS_FDW_RELINFO_FOREIGN_TABLE) - { - /* - * For a chunk, estimate its size if we don't know it, and update the - * moving average of chunk sizes used for this estimation. - */ - estimate_chunk_size(root, rel); - } - - /* Estimate rel size as best we can with local statistics. There are - * no local statistics for data node rels since they aren't real base - * rels (there's no corresponding table in the system to associate - * stats with). Instead, data node rels already have basic stats set - * at creation time based on data-node-chunk assignment. */ - if (fpinfo->type != TS_FDW_RELINFO_HYPERTABLE_DATA_NODE && OidIsValid(rel->relid)) - set_baserel_size_estimates(root, rel); - - /* Fill in basically-bogus cost estimates for use later. */ - fdw_estimate_path_cost_size(root, - rel, - NIL, - &fpinfo->rows, - &fpinfo->width, - &fpinfo->startup_cost, - &fpinfo->total_cost); - - /* No outer and inner relations. */ - fpinfo->make_outerrel_subquery = false; - fpinfo->make_innerrel_subquery = false; - fpinfo->lower_subquery_rels = NULL; - /* Set the relation index. */ - fpinfo->relation_index = rel->relid; - - return fpinfo; -} diff --git a/tsl/src/fdw/relinfo.h b/tsl/src/fdw/relinfo.h deleted file mode 100644 index e6c6fdc0e21..00000000000 --- a/tsl/src/fdw/relinfo.h +++ /dev/null @@ -1,167 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "remote/connection.h" -#include "data_node_chunk_assignment.h" - -/* - * FDW-specific planner information kept in RelOptInfo.fdw_private. For a - * baserel, this struct is created by GetForeignRelSize, although some fields - * are not filled till later. GetForeignJoinPaths creates it for a joinrel, - * and GetForeignUpperPaths creates it for an upperrel. - */ - -typedef enum -{ - TS_FDW_RELINFO_UNINITIALIZED = 0, - TS_FDW_RELINFO_HYPERTABLE_DATA_NODE, - TS_FDW_RELINFO_HYPERTABLE, - TS_FDW_RELINFO_FOREIGN_TABLE, - TS_FDW_RELINFO_REFERENCE_JOIN_PARTITION, - TS_FDW_RELINFO_REFERENCE_TABLE, - TS_FDW_RELINFO_JOIN -} TsFdwRelInfoType; - -#ifdef TS_DEBUG -/* A path considered during planning but which may have been pruned. Used for - * debugging purposes. */ -typedef struct ConsideredPath -{ - Path *path; - uintptr_t origin; /* The pointer value of the original path */ -} ConsideredPath; -#endif /* TS_DEBUG */ - -typedef struct TsFdwRelInfo -{ - TsFdwRelInfoType type; - /* - * True means that the relation can be pushed down. Always true for simple - * foreign scan. - */ - bool pushdown_safe; - - /* - * Restriction clauses, divided into safe and unsafe to pushdown subsets. - * All entries in these lists should have RestrictInfo wrappers; that - * improves efficiency of selectivity and cost estimation. - */ - List *remote_conds; - List *local_conds; - - /* Actual remote restriction clauses for scan (sans RestrictInfos) */ - List *final_remote_exprs; - - /* Bitmap of attr numbers we need to fetch from the remote data node. */ - Bitmapset *attrs_used; - - /* Cost and selectivity of local_conds. */ - QualCost local_conds_cost; - Selectivity local_conds_sel; - - /* Cost and selectivity of remote_conds. */ - QualCost remote_conds_cost; - Selectivity remote_conds_sel; - - /* Selectivity of join conditions */ - Selectivity joinclause_sel; - - /* Estimated size and cost for a scan or join. */ - double rows; - int width; - Cost startup_cost; - Cost total_cost; - - /* Costs excluding costs for transferring data from the data node */ - Cost rel_startup_cost; - Cost rel_total_cost; - double rel_retrieved_rows; - - /* Costs for transferring data across the network */ - Cost fdw_startup_cost; - Cost fdw_tuple_cost; - List *shippable_extensions; /* OIDs of whitelisted extensions */ - - /* Cached catalog information. */ - ForeignTable *table; - ForeignServer *server; - - int fetch_size; /* fetch size for this remote table */ - - /* - * Name of the relation while EXPLAINing ForeignScan. It is used for join - * relations but is set for all relations. For join relation, the name - * indicates which foreign tables are being joined and the join type used. - */ - StringInfo relation_name; - - /* Join information */ - RelOptInfo *outerrel; - RelOptInfo *innerrel; - JoinType jointype; - /* joinclauses contains only JOIN/ON conditions for an outer join */ - List *joinclauses; /* List of RestrictInfo */ - - /* Grouping information */ - List *grouped_tlist; - - /* Subquery information */ - bool make_outerrel_subquery; /* do we deparse outerrel as a - * subquery? */ - bool make_innerrel_subquery; /* do we deparse innerrel as a - * subquery? */ - Relids lower_subquery_rels; /* all relids appearing in lower - * subqueries */ - - /* - * Index of the relation. It is used to create an alias to a subquery - * representing the relation. - */ - int relation_index; - DataNodeChunkAssignment *sca; -#ifdef TS_DEBUG - List *considered_paths; /* List of ConsideredPath objects of all the paths - that planner has considered. This is intended - to be only used for printing cost debug - output */ -#endif - - /* - * Moving averages of chunk size, valid for the hypertable relinfo. - * We use them to compute the size for remote chunks that don't have local - * statistics, e.g. because ANALYZE haven't been run. Note that these values - * are adjusted for fill factor, i.e. they correspond to a fill factor of - * 1.0. The fill factor for a particular chunk is estimated separately. - */ - double average_chunk_pages; - double average_chunk_tuples; - - /* Cached chunk data for the chunk relinfo. */ - struct Chunk *chunk; - - /* OIDs of join reference tables. */ - List *join_reference_tables; - - /* - * Parameterizations of this relation for which we can perform an index scan. - * Encoded by requied_outer bitmapsets. - */ - List *indexed_parameterizations; -} TsFdwRelInfo; - -extern TsFdwRelInfo *fdw_relinfo_create(PlannerInfo *root, RelOptInfo *rel, Oid server_oid, - Oid local_table_id, TsFdwRelInfoType type); -extern TsFdwRelInfo *fdw_relinfo_alloc_or_get(RelOptInfo *rel); -extern TsFdwRelInfo *fdw_relinfo_get(RelOptInfo *rel); -extern void apply_fdw_and_server_options(TsFdwRelInfo *fpinfo); diff --git a/tsl/src/fdw/scan_exec.c b/tsl/src/fdw/scan_exec.c deleted file mode 100644 index c6f799df6d0..00000000000 --- a/tsl/src/fdw/scan_exec.c +++ /dev/null @@ -1,582 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "scan_exec.h" -#include "utils.h" -#include "remote/data_fetcher.h" -#include "remote/copy_fetcher.h" -#include "remote/prepared_statement_fetcher.h" -#include "remote/cursor_fetcher.h" -#include "guc.h" -#include "planner.h" - -/* - * Indexes of FDW-private information stored in fdw_private lists. - * - * These items are indexed with the enum FdwScanPrivateIndex, so an item - * can be fetched with list_nth(). For example, to get the SELECT statement: - * sql = strVal(list_nth(fdw_private, FdwScanPrivateSelectSql)); - */ -enum FdwScanPrivateIndex -{ - /* SQL statement to execute remotely (as a String node) */ - FdwScanPrivateSelectSql, - /* Integer list of attribute numbers retrieved by the SELECT */ - FdwScanPrivateRetrievedAttrs, - /* Integer representing the desired fetch_size */ - FdwScanPrivateFetchSize, - - /* Integer for the OID of the foreign server, used by EXPLAIN */ - FdwScanPrivateServerId, - /* OID list of chunk oids, used by EXPLAIN */ - FdwScanPrivateChunkOids, - /* - * String describing join i.e. names of relations being joined and types - * of join, added when the scan is join - */ - FdwScanPrivateRelations -}; - -/* - * Fill an array with query parameter values in text format. - */ -static void -fill_query_params_array(ExprContext *econtext, FmgrInfo *param_flinfo, List *param_exprs, - const char **param_values) -{ - int nestlevel; - int i; - ListCell *lc; - - nestlevel = set_transmission_modes(); - - i = 0; - foreach (lc, param_exprs) - { - ExprState *expr_state = (ExprState *) lfirst(lc); - Datum expr_value; - bool is_null; - - /* Evaluate the parameter expression */ - expr_value = ExecEvalExpr(expr_state, econtext, &is_null); - - /* - * Get string representation of each parameter value by invoking - * type-specific output function, unless the value is null. - */ - if (is_null) - param_values[i] = NULL; - else - param_values[i] = OutputFunctionCall(¶m_flinfo[i], expr_value); - - i++; - } - - reset_transmission_modes(nestlevel); -} - -/* - * Create data fetcher for node's query with current parameter values. - */ -DataFetcher * -create_data_fetcher(ScanState *ss, TsFdwScanState *fsstate) -{ - ExprContext *econtext = ss->ps.ps_ExprContext; - int num_params = fsstate->num_params; - const char **values = fsstate->param_values; - MemoryContext oldcontext; - StmtParams *params = NULL; - DataFetcher *fetcher = NULL; - - if (NULL != fsstate->fetcher) - return fsstate->fetcher; - - /* - * Construct array of query parameter values in text format. We do the - * conversions in the short-lived per-tuple context, so as not to cause a - * memory leak over repeated scans. - */ - if (num_params > 0) - { - oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); - fill_query_params_array(econtext, fsstate->param_flinfo, fsstate->param_exprs, values); - MemoryContextSwitchTo(oldcontext); - - /* - * Notice that we do not specify param types, thus forcing the data - * node to infer types for all parameters. Since we explicitly cast - * every parameter (see deparse.c), the "inference" is trivial and - * will produce the desired result. This allows us to avoid assuming - * that the data node has the same OIDs we do for the parameters' - * types. - */ - params = stmt_params_create_from_values(values, num_params); - } - - oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_query_memory); - - if (fsstate->planned_fetcher_type == CursorFetcherType) - { - fetcher = - cursor_fetcher_create_for_scan(fsstate->conn, fsstate->query, params, fsstate->tf); - } - else if (fsstate->planned_fetcher_type == PreparedStatementFetcherType) - { - fetcher = prepared_statement_fetcher_create_for_scan(fsstate->conn, - fsstate->query, - params, - fsstate->tf); - } - else - { - /* - * The fetcher type must have been determined by the planner at this - * point, so we shouldn't see 'auto' here. - */ - Assert(fsstate->planned_fetcher_type == CopyFetcherType); - fetcher = copy_fetcher_create_for_scan(fsstate->conn, fsstate->query, params, fsstate->tf); - } - - fsstate->fetcher = fetcher; - MemoryContextSwitchTo(oldcontext); - - fetcher->funcs->set_fetch_size(fetcher, fsstate->fetch_size); - - return fetcher; -} - -/* - * Prepare for processing of parameters used in remote query. - */ -static void -prepare_query_params(PlanState *node, List *fdw_exprs, int num_params, FmgrInfo **param_flinfo, - List **param_exprs, const char ***param_values) -{ - int i; - ListCell *lc; - - Assert(num_params > 0); - - /* Prepare for output conversion of parameters used in remote query. */ - *param_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo) * num_params); - - i = 0; - foreach (lc, fdw_exprs) - { - Node *param_expr = (Node *) lfirst(lc); - Oid typefnoid; - bool isvarlena; - - getTypeOutputInfo(exprType(param_expr), &typefnoid, &isvarlena); - fmgr_info(typefnoid, &(*param_flinfo)[i]); - i++; - } - - /* - * Prepare remote-parameter expressions for evaluation. (Note: in - * practice, we expect that all these expressions will be just Params, so - * we could possibly do something more efficient than using the full - * expression-eval machinery for this. But probably there would be little - * benefit, and it'd require the foreign data wrapper to know more than is - * desirable about Param evaluation.) - */ - *param_exprs = ExecInitExprList(fdw_exprs, node); - - /* Allocate buffer for text form of query parameters. */ - *param_values = (const char **) palloc0(num_params * sizeof(char *)); -} - -#ifdef TS_DEBUG -/* Allow tests to specify the time to push down in place of now() */ -TimestampTz ts_current_timestamp_override_value = -1; - -extern void -fdw_scan_debug_override_current_timestamp(TimestampTz time) -{ - ts_current_timestamp_override_value = time; -} -#endif - -static TSConnection * -get_connection(ScanState *ss, Oid const server_id, Bitmapset *scanrelids, List *exprs) -{ - Scan *scan = (Scan *) ss->ps.plan; - EState *estate = ss->ps.state; - RangeTblEntry *rte; - TSConnectionId id; - int rtindex; - Oid user_oid; - - /* - * Identify which user to do the remote access as. This should match what - * ExecCheckRTEPerms() does. In case of a join or aggregate, use the - * lowest-numbered member RTE as a representative; we would get the same - * result from any. - */ - if (scan->scanrelid > 0) - rtindex = scan->scanrelid; - else - rtindex = bms_next_member(scanrelids, -1); - - rte = rt_fetch(rtindex, estate->es_range_table); - -#if PG16_LT - user_oid = OidIsValid(rte->checkAsUser) ? rte->checkAsUser : GetUserId(); -#else - if (rte->perminfoindex > 0) - { - RTEPermissionInfo *perminfo = getRTEPermissionInfo(estate->es_rteperminfos, rte); - user_oid = OidIsValid(perminfo->checkAsUser) ? perminfo->checkAsUser : GetUserId(); - } - else - { - user_oid = GetUserId(); - } -#endif - - remote_connection_id_set(&id, server_id, user_oid); - - return remote_dist_txn_get_connection(id, - list_length(exprs) ? REMOTE_TXN_USE_PREP_STMT : - REMOTE_TXN_NO_PREP_STMT); -} - -void -fdw_scan_init(ScanState *ss, TsFdwScanState *fsstate, Bitmapset *scanrelids, List *fdw_private, - List *fdw_exprs, int eflags) -{ - int num_params; - Oid server_oid; - ForeignServer *server; - - if ((eflags & EXEC_FLAG_EXPLAIN_ONLY) && !ts_guc_enable_remote_explain) - return; - - /* Check if the server is "available" for use before setting up a connection to it */ - server_oid = intVal(list_nth(fdw_private, FdwScanPrivateServerId)); - server = GetForeignServer(server_oid); - if (!ts_data_node_is_available_by_server(server)) - ereport(ERROR, (errmsg("data node \"%s\" is not available", server->servername))); - - /* - * Get connection to the foreign server. Connection manager will - * establish new connection if necessary. - */ - fsstate->conn = get_connection(ss, server_oid, scanrelids, fdw_exprs); - - /* Get private info created by planner functions. */ - fsstate->query = strVal(list_nth(fdw_private, FdwScanPrivateSelectSql)); - fsstate->retrieved_attrs = (List *) list_nth(fdw_private, FdwScanPrivateRetrievedAttrs); - fsstate->fetch_size = intVal(list_nth(fdw_private, FdwScanPrivateFetchSize)); - - /* - * Prepare for processing of parameters used in remote query, if any. - */ - num_params = list_length(fdw_exprs); - fsstate->num_params = num_params; - - if (num_params > 0) - prepare_query_params(&ss->ps, - fdw_exprs, - num_params, - &fsstate->param_flinfo, - &fsstate->param_exprs, - &fsstate->param_values); - - fsstate->fetcher = NULL; - - fsstate->tf = tuplefactory_create_for_scan(ss, fsstate->retrieved_attrs); - - Assert(fsstate->planned_fetcher_type != AutoFetcherType); - - /* - * If the planner tells us to use the cursor fetcher because there are - * multiple distributed hypertables per query, we have no other option. - */ - if (fsstate->planned_fetcher_type == CursorFetcherType) - { - return; - } - - if (!tuplefactory_is_binary(fsstate->tf) && fsstate->planned_fetcher_type == CopyFetcherType) - { - if (ts_guc_remote_data_fetcher == AutoFetcherType) - { - /* - * The user-set fetcher type was auto, and the planner decided to - * use COPY fetcher, but at execution time (now) we found out - * there is no binary serialization for some data types. In this - * case we can revert to cursor fetcher which supports text - * serialization. - */ - fsstate->planned_fetcher_type = CursorFetcherType; - } - else - { - ereport(ERROR, - (errmsg("cannot use COPY fetcher because some of the column types do not " - "have binary serialization"))); - } - } - - /* - * COPY fetcher uses COPY statement that don't work with prepared - * statements. We only end up here in case the COPY fetcher was chosen by - * the user, so error out. - * Note that this can be optimized for parameters coming from initplans, - * where the parameter takes only one value and technically we could deparse - * it into the query string and use a non-parameterized COPY statement. - */ - if (num_params > 0 && fsstate->planned_fetcher_type == CopyFetcherType) - { - Assert(ts_guc_remote_data_fetcher == CopyFetcherType); - ereport(ERROR, - (errmsg("cannot use COPY fetcher because the plan is parameterized"), - errhint("Set \"timescaledb.remote_data_fetcher\" to \"cursor\" to explicitly " - "set the fetcher type or use \"auto\" to select the fetcher type " - "automatically."))); - } -} - -TupleTableSlot * -fdw_scan_iterate(ScanState *ss, TsFdwScanState *fsstate) -{ - TupleTableSlot *slot = ss->ss_ScanTupleSlot; - DataFetcher *fetcher = fsstate->fetcher; - - if (NULL == fetcher) - fetcher = create_data_fetcher(ss, fsstate); - - fetcher->funcs->store_next_tuple(fetcher, slot); - - return slot; -} - -void -fdw_scan_rescan(ScanState *ss, TsFdwScanState *fsstate) -{ - DataFetcher *fetcher = fsstate->fetcher; - - /* If we haven't created the cursor yet, nothing to do. */ - if (NULL == fsstate->fetcher) - return; - - /* - * If any internal parameters affecting this node have changed, we'd - * better destroy and recreate the cursor. Otherwise, rewinding it should - * be good enough. If we've only fetched zero or one batch, we needn't - * even rewind the cursor, just rescan what we have. - */ - if (ss->ps.chgParam != NULL) - { - int num_params = fsstate->num_params; - Assert(num_params > 0); - - ExprContext *econtext = ss->ps.ps_ExprContext; - - /* - * Construct array of query parameter values in text format. - */ - const char **values = fsstate->param_values; - fill_query_params_array(econtext, fsstate->param_flinfo, fsstate->param_exprs, values); - - /* - * Notice that we do not specify param types, thus forcing the data - * node to infer types for all parameters. Since we explicitly cast - * every parameter (see deparse.c), the "inference" is trivial and - * will produce the desired result. This allows us to avoid assuming - * that the data node has the same OIDs we do for the parameters' - * types. - */ - StmtParams *params = stmt_params_create_from_values(values, num_params); - - fetcher->funcs->rescan(fsstate->fetcher, params); - } - else - { - fetcher->funcs->rewind(fsstate->fetcher); - } -} - -void -fdw_scan_end(TsFdwScanState *fsstate) -{ - /* if fsstate is NULL, we are in EXPLAIN; nothing to do */ - if (fsstate == NULL) - return; - - /* Close the cursor if open, to prevent accumulation of cursors */ - if (NULL != fsstate->fetcher) - { - data_fetcher_free(fsstate->fetcher); - fsstate->fetcher = NULL; - } - - /* Release remote connection */ - fsstate->conn = NULL; - - /* MemoryContexts will be deleted automatically. */ -} - -static char * -get_data_node_explain(const char *sql, TSConnection *conn, ExplainState *es) -{ - AsyncRequest *volatile req = NULL; - AsyncResponseResult *volatile res = NULL; - StringInfo explain_sql = makeStringInfo(); - StringInfo buf = makeStringInfo(); - - appendStringInfo(explain_sql, "%s", "EXPLAIN (VERBOSE "); - if (es->analyze) - appendStringInfo(explain_sql, "%s", ", ANALYZE"); - if (!es->costs) - appendStringInfo(explain_sql, "%s", ", COSTS OFF"); - if (es->buffers) - appendStringInfo(explain_sql, "%s", ", BUFFERS ON"); - if (!es->timing) - appendStringInfo(explain_sql, "%s", ", TIMING OFF"); - if (es->summary) - appendStringInfo(explain_sql, "%s", ", SUMMARY ON"); - else - appendStringInfo(explain_sql, "%s", ", SUMMARY OFF"); - - appendStringInfoChar(explain_sql, ')'); - - appendStringInfo(explain_sql, " %s", sql); - - PG_TRY(); - { - PGresult *pg_res; - int i; - - req = async_request_send(conn, explain_sql->data); - res = async_request_wait_ok_result(req); - pg_res = async_response_result_get_pg_result(res); - appendStringInfoChar(buf, '\n'); - - for (i = 0; i < PQntuples(pg_res); i++) - { - appendStringInfoSpaces(buf, (es->indent + 1) * 2); - appendStringInfo(buf, "%s\n", PQgetvalue(pg_res, i, 0)); - } - - pfree(req); - async_response_result_close(res); - } - PG_CATCH(); - { - if (req != NULL) - pfree(req); - if (res != NULL) - async_response_result_close(res); - - PG_RE_THROW(); - } - PG_END_TRY(); - - return buf->data; -} - -static char * -explain_fetcher_type(DataFetcherType type) -{ - switch (type) - { - case AutoFetcherType: - return "Auto"; - case CopyFetcherType: - return "COPY"; - case CursorFetcherType: - return "Cursor"; - case PreparedStatementFetcherType: - return "Prepared statement"; - default: - Assert(false); - return ""; - } -} - -void -fdw_scan_explain(ScanState *ss, List *fdw_private, ExplainState *es, TsFdwScanState *fsstate) -{ - const char *relations; - - /* - * Add names of relation handled by the foreign scan when the scan is an - * upper rel. - */ - if (list_length(fdw_private) > FdwScanPrivateRelations) - { - relations = strVal(list_nth(fdw_private, FdwScanPrivateRelations)); - ExplainPropertyText("Relations", relations, es); - } - - /* - * Add remote query, data node name, and chunks when VERBOSE option is specified. - */ - if (es->verbose) - { - Oid server_id = intVal(list_nth(fdw_private, FdwScanPrivateServerId)); - ForeignServer *server = GetForeignServer(server_id); - List *chunk_oids = (List *) list_nth(fdw_private, FdwScanPrivateChunkOids); - char *sql; - - ExplainPropertyText("Data node", server->servername, es); - - /* fsstate or fetcher can be NULL, so check that first */ - if (fsstate && fsstate->fetcher) - ExplainPropertyText("Fetcher Type", explain_fetcher_type(fsstate->fetcher->type), es); - - if (chunk_oids != NIL) - { - StringInfoData chunk_names; - ListCell *lc; - bool first = true; - - initStringInfo(&chunk_names); - - foreach (lc, chunk_oids) - { - if (!first) - appendStringInfoString(&chunk_names, ", "); - else - first = false; - appendStringInfoString(&chunk_names, get_rel_name(lfirst_oid(lc))); - } - ExplainPropertyText("Chunks", chunk_names.data, es); - } - - sql = strVal(list_nth(fdw_private, FdwScanPrivateSelectSql)); - - ExplainPropertyText("Remote SQL", sql, es); - - /* fsstate should be set up but better check again to avoid crashes */ - if (ts_guc_enable_remote_explain && fsstate) - { - char *data_node_explain; - - /* EXPLAIN barfs on parameterized queries, so check that first */ - if (fsstate->num_params >= 1) - data_node_explain = "Unavailable due to parameterized query"; - else - data_node_explain = get_data_node_explain(fsstate->query, fsstate->conn, es); - ExplainPropertyText("Remote EXPLAIN", data_node_explain, es); - } - } -} diff --git a/tsl/src/fdw/scan_exec.h b/tsl/src/fdw/scan_exec.h deleted file mode 100644 index ba3fcbcf7c1..00000000000 --- a/tsl/src/fdw/scan_exec.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "remote/data_fetcher.h" -#include "guc.h" - -/* - * Execution state of a foreign scan using timescaledb_fdw. - */ -typedef struct TsFdwScanState -{ - Relation rel; /* relcache entry for the foreign table. NULL - * for a foreign join scan. */ - TupleDesc tupdesc; /* tuple descriptor of scan */ - struct AttConvInMetadata *att_conv_metadata; /* attribute datatype conversion metadata */ - - /* extracted fdw_private data */ - char *query; /* text of SELECT command */ - List *retrieved_attrs; /* list of retrieved attribute numbers */ - - /* for remote query execution */ - struct TSConnection *conn; /* connection for the scan */ - TupleFactory *tf; - struct DataFetcher *fetcher; /* fetches tuples from data node */ - int num_params; /* number of parameters passed to query */ - FmgrInfo *param_flinfo; /* output conversion functions for them */ - List *param_exprs; /* executable expressions for param values */ - const char **param_values; /* textual values of query parameters */ - int fetch_size; /* number of tuples per fetch */ - /* - * The type of data fetcher to use as determined by the planner. Can be - * either Cursor when there are multiple distributed hypertables, or COPY. - * Note that we still can revert to cursor fetcher if binary serialization - * is unavailable for some data types. We can also prefer the prepared - * statement data fetcher when the query is parameterized. We only check - * this when we execute the query. - * - */ - DataFetcherType planned_fetcher_type; - int row_counter; -} TsFdwScanState; - -extern void fdw_scan_init(ScanState *ss, TsFdwScanState *fsstate, Bitmapset *scanrelids, - List *fdw_private, List *fdw_exprs, int eflags); -extern TupleTableSlot *fdw_scan_iterate(ScanState *ss, TsFdwScanState *fsstate); -extern void fdw_scan_rescan(ScanState *ss, TsFdwScanState *fsstate); -extern void fdw_scan_end(TsFdwScanState *fsstate); -extern void fdw_scan_explain(ScanState *ss, List *fdw_private, ExplainState *es, - TsFdwScanState *fsstate); - -extern DataFetcher *create_data_fetcher(ScanState *ss, TsFdwScanState *fsstate); - -#ifdef TS_DEBUG - -extern TimestampTz ts_current_timestamp_override_value; -/* Allow tests to specify the time to push down in place of now() */ -extern void fdw_scan_debug_override_current_timestamp(TimestampTz time); -#endif diff --git a/tsl/src/fdw/scan_plan.c b/tsl/src/fdw/scan_plan.c deleted file mode 100644 index c9a22484bfe..00000000000 --- a/tsl/src/fdw/scan_plan.c +++ /dev/null @@ -1,1074 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "estimate.h" -#include "relinfo.h" -#include "utils.h" -#include "deparse.h" -#include "scan_plan.h" -#include "debug.h" -#include "fdw_utils.h" -#include "scan_exec.h" -#include "chunk.h" - -/* - * get_useful_pathkeys_for_relation - * Determine which orderings of a relation might be useful. - * - * Getting data in sorted order can be useful either because the requested - * order matches the final output ordering for the overall query we're - * planning, or because it enables an efficient merge join. Here, we try - * to figure out which pathkeys to consider. - */ -static List * -get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) -{ - List *useful_pathkeys_list = NIL; - ListCell *lc; - - /* - * Pushing the query_pathkeys to the data node is always worth - * considering, because it might let us avoid a local sort. - */ - if (root->query_pathkeys) - { - bool query_pathkeys_ok = true; - - foreach (lc, root->query_pathkeys) - { - PathKey *pathkey = (PathKey *) lfirst(lc); - EquivalenceClass *pathkey_ec = pathkey->pk_eclass; - Expr *em_expr; - - /* - * The planner and executor don't have any clever strategy for - * taking data sorted by a prefix of the query's pathkeys and - * getting it to be sorted by all of those pathkeys. We'll just - * end up resorting the entire data set. So, unless we can push - * down all of the query pathkeys, forget it. - * - * is_foreign_expr would detect volatile expressions as well, but - * checking ec_has_volatile here saves some cycles. - */ - if (pathkey_ec->ec_has_volatile || !(em_expr = find_em_expr_for_rel(pathkey_ec, rel)) || - !ts_is_foreign_expr(root, rel, em_expr)) - { - query_pathkeys_ok = false; - break; - } - } - - if (query_pathkeys_ok) - useful_pathkeys_list = list_make1(list_copy(root->query_pathkeys)); - } - - return useful_pathkeys_list; -} - -static void -add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel, Path *epq_path, - CreatePathFunc create_scan_path, - CreateUpperPathFunc create_upper_path) -{ - List *useful_pathkeys_list = NIL; /* List of all pathkeys */ - ListCell *lc; - - Assert((create_scan_path || create_upper_path) && !(create_scan_path && create_upper_path)); - - useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel); - - /* Create one path for each set of pathkeys we found above. */ - foreach (lc, useful_pathkeys_list) - { - double rows; - int width; - Cost startup_cost; - Cost total_cost; - List *useful_pathkeys = lfirst(lc); - Path *sorted_epq_path; - Path *scan_path; - - fdw_estimate_path_cost_size(root, - rel, - useful_pathkeys, - &rows, - &width, - &startup_cost, - &total_cost); - - /* - * The EPQ path must be at least as well sorted as the path itself, in - * case it gets used as input to a mergejoin. - */ - sorted_epq_path = epq_path; - if (sorted_epq_path != NULL && - !pathkeys_contained_in(useful_pathkeys, sorted_epq_path->pathkeys)) - sorted_epq_path = - (Path *) create_sort_path(root, rel, sorted_epq_path, useful_pathkeys, -1.0); - - if (create_scan_path) - { - Assert(IS_SIMPLE_REL(rel) || IS_JOIN_REL(rel)); - scan_path = create_scan_path(root, - rel, - NULL, - rows, - startup_cost, - total_cost, - useful_pathkeys, - NULL, - sorted_epq_path, - NIL); - } - else - { - Assert(IS_UPPER_REL(rel)); - scan_path = create_upper_path(root, - rel, - NULL, - rows, - startup_cost, - total_cost, - useful_pathkeys, - sorted_epq_path, - NIL); - } - - fdw_utils_add_path(rel, scan_path); - } -} - -void -fdw_add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel, Path *epq_path, - CreatePathFunc create_scan_path) -{ - add_paths_with_pathkeys_for_rel(root, rel, epq_path, create_scan_path, NULL); -} - -void -fdw_add_upper_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel, Path *epq_path, - CreateUpperPathFunc create_upper_path) -{ - add_paths_with_pathkeys_for_rel(root, rel, epq_path, NULL, create_upper_path); -} - -typedef struct -{ - ParamListInfo boundParams; - PlannerInfo *root; - List *active_fns; - Node *case_val; - bool estimate; -} eval_stable_functions_context; - -static Node *eval_stable_functions_mutator(Node *node, void *context); - -static Expr * -evaluate_stable_function(Oid funcid, Oid result_type, int32 result_typmod, Oid result_collid, - Oid input_collid, List *args, bool funcvariadic, Form_pg_proc funcform) -{ - bool has_nonconst_input = false; - PG_USED_FOR_ASSERTS_ONLY bool has_null_input = false; - ListCell *arg; - FuncExpr *newexpr; - -#ifdef TS_DEBUG - /* Allow tests to specify the time to push down in place of now() */ - if (funcid == F_NOW && ts_current_timestamp_override_value != -1) - { - return (Expr *) makeConst(TIMESTAMPTZOID, - -1, - InvalidOid, - sizeof(TimestampTz), - TimestampTzGetDatum(ts_current_timestamp_override_value), - false, - FLOAT8PASSBYVAL); - } -#endif - - /* - * Can't simplify if it returns a set or a RECORD. See the comments for - * eval_const_expressions(). We should only see the whitelisted functions - * here, no sets or RECORDS among them. - */ - Assert(!funcform->proretset); - Assert(funcform->prorettype != RECORDOID); - - /* - * Check for constant inputs and especially constant-NULL inputs. - */ - foreach (arg, args) - { - if (IsA(lfirst(arg), Const)) - has_null_input |= ((Const *) lfirst(arg))->constisnull; - else - has_nonconst_input = true; - } - - /* - * The simplification of strict functions with constant NULL inputs must - * have been already performed by eval_const_expressions(). - */ - Assert(!(funcform->proisstrict && has_null_input)); - - /* - * Otherwise, can simplify only if all inputs are constants. (For a - * non-strict function, constant NULL inputs are treated the same as - * constant non-NULL inputs.) - */ - if (has_nonconst_input) - return NULL; - - /* - * This is called on the access node for the expressions that will be pushed - * down to data nodes. These expressions can contain only whitelisted stable - * functions, so we shouldn't see volatile functions here. Immutable - * functions can also occur here for expressions like - * `immutable(stable(....))`, after we evaluate the stable function. - */ - Assert(funcform->provolatile != PROVOLATILE_VOLATILE); - - /* - * OK, looks like we can simplify this operator/function. - * - * Build a new FuncExpr node containing the already-simplified arguments. - */ - newexpr = makeNode(FuncExpr); - newexpr->funcid = funcid; - newexpr->funcresulttype = result_type; - newexpr->funcretset = false; - newexpr->funcvariadic = funcvariadic; - newexpr->funcformat = COERCE_EXPLICIT_CALL; /* doesn't matter */ - newexpr->funccollid = result_collid; /* doesn't matter */ - newexpr->inputcollid = input_collid; - newexpr->args = args; - newexpr->location = -1; - - return evaluate_expr((Expr *) newexpr, result_type, result_typmod, result_collid); -} - -/* - * Execute the function to deliver a constant result. - */ -static Expr * -simplify_stable_function(Oid funcid, Oid result_type, int32 result_typmod, Oid result_collid, - Oid input_collid, List **args_p, bool funcvariadic) -{ - List *args = *args_p; - HeapTuple func_tuple; - Form_pg_proc funcform; - Expr *newexpr; - - func_tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); - if (!HeapTupleIsValid(func_tuple)) - elog(ERROR, "cache lookup failed for function %u", funcid); - funcform = (Form_pg_proc) GETSTRUCT(func_tuple); - - /* - * Process the function arguments. Here we must deal with named or defaulted - * arguments, and then recursively apply eval_stable_functions to the whole - * argument list. - */ - args = expand_function_arguments_compat(args, result_type, func_tuple); - args = (List *) expression_tree_mutator((Node *) args, eval_stable_functions_mutator, NULL); - /* Argument processing done, give it back to the caller */ - *args_p = args; - - /* Now attempt simplification of the function call proper. */ - newexpr = evaluate_stable_function(funcid, - result_type, - result_typmod, - result_collid, - input_collid, - args, - funcvariadic, - funcform); - - ReleaseSysCache(func_tuple); - - return newexpr; -} - -/* - * Recursive guts of eval_stable_functions. - * We don't use 'context' here but it is required by the signature of - * expression_tree_mutator. - */ -static Node * -eval_stable_functions_mutator(Node *node, void *context) -{ - if (node == NULL) - return NULL; - switch (nodeTag(node)) - { - case T_FuncExpr: - { - FuncExpr *expr = (FuncExpr *) node; - List *args = expr->args; - Expr *simple; - FuncExpr *newexpr; - - /* - * Code for op/func reduction is pretty bulky, so split it out - * as a separate function. Note: exprTypmod normally returns - * -1 for a FuncExpr, but not when the node is recognizably a - * length coercion; we want to preserve the typmod in the - * eventual Const if so. - */ - simple = simplify_stable_function(expr->funcid, - expr->funcresulttype, - exprTypmod(node), - expr->funccollid, - expr->inputcollid, - &args, - expr->funcvariadic); - if (simple) /* successfully simplified it */ - return (Node *) simple; - - /* - * The expression cannot be simplified any further, so build - * and return a replacement FuncExpr node using the - * possibly-simplified arguments. Note that we have also - * converted the argument list to positional notation. - */ - newexpr = makeNode(FuncExpr); - newexpr->funcid = expr->funcid; - newexpr->funcresulttype = expr->funcresulttype; - newexpr->funcretset = expr->funcretset; - newexpr->funcvariadic = expr->funcvariadic; - newexpr->funcformat = expr->funcformat; - newexpr->funccollid = expr->funccollid; - newexpr->inputcollid = expr->inputcollid; - newexpr->args = args; - newexpr->location = expr->location; - return (Node *) newexpr; - } - case T_OpExpr: - { - OpExpr *expr = (OpExpr *) node; - List *args = expr->args; - Expr *simple; - OpExpr *newexpr; - - /* - * Need to get OID of underlying function. Okay to scribble - * on input to this extent. - */ - set_opfuncid(expr); - - /* - * Code for op/func reduction is pretty bulky, so split it out - * as a separate function. - */ - simple = simplify_stable_function(expr->opfuncid, - expr->opresulttype, - -1, - expr->opcollid, - expr->inputcollid, - &args, - false); - if (simple) /* successfully simplified it */ - return (Node *) simple; - - /* - * The expression cannot be simplified any further, so build - * and return a replacement OpExpr node using the - * possibly-simplified arguments. - */ - newexpr = makeNode(OpExpr); - newexpr->opno = expr->opno; - newexpr->opfuncid = expr->opfuncid; - newexpr->opresulttype = expr->opresulttype; - newexpr->opretset = expr->opretset; - newexpr->opcollid = expr->opcollid; - newexpr->inputcollid = expr->inputcollid; - newexpr->args = args; - newexpr->location = expr->location; - return (Node *) newexpr; - } - default: - break; - } - /* - * For any node type not handled above, copy the node unchanged but - * const-simplify its subexpressions. This is the correct thing for node - * types whose behavior might change between planning and execution, such - * as CurrentOfExpr. It's also a safe default for new node types not - * known to this routine. - */ - return expression_tree_mutator((Node *) node, eval_stable_functions_mutator, NULL); -} - -/* - * Try to evaluate stable functions and operators on the access node. This - * function is similar to eval_const_expressions, but much simpler, because it - * only evaluates the functions and doesn't have to perform any additional - * canonicalizations. - */ -static Node * -eval_stable_functions(PlannerInfo *root, Node *node) -{ - return eval_stable_functions_mutator(node, NULL); -} - -void -fdw_scan_info_init(ScanInfo *scaninfo, PlannerInfo *root, RelOptInfo *rel, Path *best_path, - List *scan_clauses, Plan *outer_plan) -{ - TsFdwRelInfo *fpinfo = fdw_relinfo_get(rel); - List *remote_having = NIL; - List *remote_exprs = NIL; - List *local_exprs = NIL; - List *params_list = NIL; - List *fdw_scan_tlist = NIL; - List *fdw_recheck_quals = NIL; - List *retrieved_attrs; - List *fdw_private; - Index scan_relid; - StringInfoData sql; - ListCell *lc; - - if (IS_SIMPLE_REL(rel)) - { - /* - * For base relations, set scan_relid as the relid of the relation. - */ - scan_relid = rel->relid; - - /* - * In a base-relation scan, we must apply the given scan_clauses. - * - * Separate the scan_clauses into those that can be executed remotely - * and those that can't. baserestrictinfo clauses that were - * previously determined to be safe or unsafe by classifyConditions - * are found in fpinfo->remote_conds and fpinfo->local_conds. Anything - * else in the scan_clauses list will be a join clause, which we have - * to check for remote-safety. - * - * Note: the join clauses we see here should be the exact same ones - * previously examined by GetForeignPaths. Possibly it'd be worth - * passing forward the classification work done then, rather than - * repeating it here. - * - * This code must match "extract_actual_clauses(scan_clauses, false)" - * except for the additional decision about remote versus local - * execution. - */ - foreach (lc, scan_clauses) - { - RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); - - /* Ignore any pseudoconstants, they're dealt with elsewhere */ - if (rinfo->pseudoconstant) - continue; - - if (list_member_ptr(fpinfo->remote_conds, rinfo)) - remote_exprs = lappend(remote_exprs, rinfo->clause); - else if (list_member_ptr(fpinfo->local_conds, rinfo)) - local_exprs = lappend(local_exprs, rinfo->clause); - else if (ts_is_foreign_expr(root, rel, rinfo->clause)) - remote_exprs = lappend(remote_exprs, rinfo->clause); - else - local_exprs = lappend(local_exprs, rinfo->clause); - } - - /* - * For a base-relation scan, we have to support EPQ recheck, which - * should recheck all the remote quals. - */ - fdw_recheck_quals = remote_exprs; - } - else if (IS_JOIN_REL(rel)) - { - /* - * Join relation or upper relation - set scan_relid to 0. - */ - scan_relid = 0; - - /* - * For a join rel, baserestrictinfo is NIL and we are not considering - * parameterization right now, so there should be no scan_clauses for - * a joinrel or an upper rel either. - */ - Assert(!scan_clauses); - - /* - * Instead we get the conditions to apply from the fdw_private - * structure. - */ - remote_exprs = extract_actual_clauses(fpinfo->remote_conds, false); - local_exprs = extract_actual_clauses(fpinfo->local_conds, false); - - /* - * We leave fdw_recheck_quals empty in this case, since we never need - * to apply EPQ recheck clauses. In the case of a joinrel, EPQ - * recheck is handled elsewhere --- see postgresGetForeignJoinPaths(). - * If we're planning an upperrel (ie, remote grouping or aggregation) - * then there's no EPQ to do because SELECT FOR UPDATE wouldn't be - * allowed, and indeed we *can't* put the remote clauses into - * fdw_recheck_quals because the unaggregated Vars won't be available - * locally. - */ - - /* Build the list of columns to be fetched from the foreign server. */ - fdw_scan_tlist = build_tlist_to_deparse(rel); - - /* - * Ensure that the outer plan produces a tuple whose descriptor - * matches our scan tuple slot. Also, remove the local conditions - * from outer plan's quals, lest they be evaluated twice, once by the - * local plan and once by the scan. - */ - if (outer_plan) - { - ListCell *lc; - - /* - * Right now, we only consider grouping and aggregation beyond - * joins. Queries involving aggregates or grouping do not require - * EPQ mechanism, hence should not have an outer plan here. - */ - Assert(!IS_UPPER_REL(rel)); - - /* - * First, update the plan's qual list if possible. In some cases - * the quals might be enforced below the topmost plan level, in - * which case we'll fail to remove them; it's not worth working - * harder than this. - */ - foreach (lc, local_exprs) - { - Node *qual = lfirst(lc); - - outer_plan->qual = list_delete(outer_plan->qual, qual); - - /* - * For an inner join the local conditions of foreign scan plan - * can be part of the joinquals as well. (They might also be - * in the mergequals or hashquals, but we can't touch those - * without breaking the plan.) - */ - if (IsA(outer_plan, NestLoop) || IsA(outer_plan, MergeJoin) || - IsA(outer_plan, HashJoin)) - { - Join *join_plan = (Join *) outer_plan; - - if (join_plan->jointype == JOIN_INNER) - join_plan->joinqual = list_delete(join_plan->joinqual, qual); - } - } - - /* - * Now fix the subplan's tlist --- this might result in inserting - * a Result node atop the plan tree. - */ - outer_plan = - change_plan_targetlist(outer_plan, fdw_scan_tlist, best_path->parallel_safe); - } - } - else - { - /* - * Upper relation - set scan_relid to 0. - */ - scan_relid = 0; - - /* - * For a join rel, baserestrictinfo is NIL and we are not considering - * parameterization right now, so there should be no scan_clauses for - * a joinrel or an upper rel either. - */ - Assert(!scan_clauses); - - /* - * Instead we get the conditions to apply from the fdw_private - * structure. - * For upper relations, the WHERE clause is built from the remote - * conditions of the underlying scan relation. - */ - TsFdwRelInfo *ofpinfo; - ofpinfo = fdw_relinfo_get(fpinfo->outerrel); - remote_exprs = extract_actual_clauses(ofpinfo->remote_conds, false); - remote_having = extract_actual_clauses(fpinfo->remote_conds, false); - local_exprs = extract_actual_clauses(fpinfo->local_conds, false); - - /* - * We leave fdw_recheck_quals empty in this case, since we never need - * to apply EPQ recheck clauses. In the case of a joinrel, EPQ - * recheck is handled elsewhere --- see GetForeignJoinPaths(). If - * we're planning an upperrel (ie, remote grouping or aggregation) - * then there's no EPQ to do because SELECT FOR UPDATE wouldn't be - * allowed, and indeed we *can't* put the remote clauses into - * fdw_recheck_quals because the unaggregated Vars won't be available - * locally. - */ - - /* Build the list of columns to be fetched from the data node. */ - fdw_scan_tlist = build_tlist_to_deparse(rel); - } - - /* - * Try to locally evaluate the stable functions such as now() before pushing - * them to the remote node. - * We have to do this at the execution stage as oppossed to the planning stage, because stable - * functions must be recalculated with each execution of a prepared - * statement. - * Note that the query planner currently only pushes down to remote side - * the whitelisted stable functions, see `function_is_whitelisted()`. So - * this code only has to deal with such functions. - */ - remote_exprs = (List *) eval_stable_functions(root, (Node *) remote_exprs); - remote_having = (List *) eval_stable_functions(root, (Node *) remote_having); - - /* - * Build the query string to be sent for execution, and identify - * expressions to be sent as parameters. - */ - initStringInfo(&sql); - deparseSelectStmtForRel(&sql, - root, - rel, - fdw_scan_tlist, - remote_exprs, - remote_having, - best_path->pathkeys, - false, - &retrieved_attrs, - ¶ms_list, - fpinfo->sca); - - /* Remember remote_exprs for possible use by PlanDirectModify */ - fpinfo->final_remote_exprs = remote_exprs; - - /* Build the chunk oid list for use by EXPLAIN. */ - List *chunk_oids = NIL; - if (fpinfo->sca) - { - foreach (lc, fpinfo->sca->chunks) - { - Chunk *chunk = (Chunk *) lfirst(lc); - chunk_oids = lappend_oid(chunk_oids, chunk->table_id); - } - } - - /* - * Build the fdw_private list that will be available to the executor. - * Items in the list must match order in enum FdwScanPrivateIndex. - */ - fdw_private = list_make5(makeString(sql.data), - retrieved_attrs, - makeInteger(fpinfo->fetch_size), - makeInteger(fpinfo->server->serverid), - chunk_oids); - - if (IS_UPPER_REL(rel)) - fdw_private = lappend(fdw_private, makeString(fpinfo->relation_name->data)); - - scaninfo->fdw_private = fdw_private; - scaninfo->fdw_scan_tlist = fdw_scan_tlist; - scaninfo->fdw_recheck_quals = fdw_recheck_quals; - scaninfo->local_exprs = local_exprs; - scaninfo->params_list = params_list; - scaninfo->scan_relid = scan_relid; - scaninfo->data_node_serverid = rel->serverid; -} - -/* - * Merge FDW options from input relations into a new set of options for a join - * or an upper rel. - * - * For a join relation, FDW-specific information about the inner and outer - * relations is provided using fpinfo_i and fpinfo_o. For an upper relation, - * fpinfo_o provides the information for the input relation; fpinfo_i is - * expected to be NULL. - */ -static void -merge_fdw_options(TsFdwRelInfo *fpinfo, const TsFdwRelInfo *fpinfo_o, const TsFdwRelInfo *fpinfo_i) -{ - /* We must always have fpinfo_o. */ - Assert(fpinfo_o); - - /* fpinfo_i may be NULL, but if present the servers must both match. */ - Assert(!fpinfo_i || fpinfo_i->server->serverid == fpinfo_o->server->serverid); - - /* Currently, we don't support JOINs, so Asserting fpinfo_i is NULL here - * in the meantime. */ - Assert(fpinfo_i == NULL); - - /* - * Copy the server specific FDW options. (For a join, both relations come - * from the same server, so the server options should have the same value - * for both relations.) - */ - fpinfo->fdw_startup_cost = fpinfo_o->fdw_startup_cost; - fpinfo->fdw_tuple_cost = fpinfo_o->fdw_tuple_cost; - fpinfo->shippable_extensions = fpinfo_o->shippable_extensions; - fpinfo->fetch_size = fpinfo_o->fetch_size; -} - -/* - * Assess whether the aggregation, grouping and having operations can be pushed - * down to the data node. As a side effect, save information we obtain in - * this function to TsFdwRelInfo of the input relation. - */ -static bool -foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel, GroupPathExtraData *extra) -{ - Query *query = root->parse; - Node *having_qual = extra->havingQual; - TsFdwRelInfo *fpinfo = fdw_relinfo_get(grouped_rel); - PathTarget *grouping_target = grouped_rel->reltarget; - bool ispartial = extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL; - TsFdwRelInfo *ofpinfo; - List *aggvars; - ListCell *lc; - int i; - List *tlist = NIL; - - /* Cannot have grouping sets since that wouldn't be a distinct coverage of - * all partition keys */ - Assert(query->groupingSets == NIL); - - /* Get the fpinfo of the underlying scan relation. */ - ofpinfo = (TsFdwRelInfo *) fdw_relinfo_get(fpinfo->outerrel); - - /* - * If underlying scan relation has any local conditions, those conditions - * are required to be applied before performing aggregation. Hence the - * aggregate cannot be pushed down. - */ - if (ofpinfo->local_conds) - return false; - - /* - * Examine grouping expressions, as well as other expressions we'd need to - * compute, and check whether they are safe to push down to the data - * node. All GROUP BY expressions will be part of the grouping target - * and thus there is no need to search for them separately. Add grouping - * expressions into target list which will be passed to data node. - */ - i = 0; - foreach (lc, grouping_target->exprs) - { - Expr *expr = (Expr *) lfirst(lc); - Index sgref = get_pathtarget_sortgroupref(grouping_target, i); - ListCell *l; - - /* Check whether this expression is part of GROUP BY clause */ - if (sgref && get_sortgroupref_clause_noerr(sgref, query->groupClause)) - { - TargetEntry *tle; - - /* - * If any GROUP BY expression is not shippable, then we cannot - * push down aggregation to the data node. - */ - if (!ts_is_foreign_expr(root, grouped_rel, expr)) - return false; - - /* - * Pushable, so add to tlist. We need to create a TLE for this - * expression and apply the sortgroupref to it. We cannot use - * add_to_flat_tlist() here because that avoids making duplicate - * entries in the tlist. If there are duplicate entries with - * distinct sortgrouprefs, we have to duplicate that situation in - * the output tlist. - */ - tle = makeTargetEntry(expr, list_length(tlist) + 1, NULL, false); - tle->ressortgroupref = sgref; - tlist = lappend(tlist, tle); - } - else - { - /* - * Non-grouping expression we need to compute. Is it shippable? - */ - if (ts_is_foreign_expr(root, grouped_rel, expr)) - { - /* Yes, so add to tlist as-is; OK to suppress duplicates */ - tlist = add_to_flat_tlist(tlist, list_make1(expr)); - } - else - { - /* Not pushable as a whole; extract its Vars and aggregates */ - aggvars = pull_var_clause((Node *) expr, PVC_INCLUDE_AGGREGATES); - - /* - * If any aggregate expression is not shippable, then we - * cannot push down aggregation to the data node. - */ - if (!ts_is_foreign_expr(root, grouped_rel, (Expr *) aggvars)) - return false; - - /* - * Add aggregates, if any, into the targetlist. Plain Vars - * outside an aggregate can be ignored, because they should be - * either same as some GROUP BY column or part of some GROUP - * BY expression. In either case, they are already part of - * the targetlist and thus no need to add them again. In fact - * including plain Vars in the tlist when they do not match a - * GROUP BY column would cause the data node to complain - * that the shipped query is invalid. - */ - foreach (l, aggvars) - { - Expr *expr = (Expr *) lfirst(l); - - if (IsA(expr, Aggref)) - tlist = add_to_flat_tlist(tlist, list_make1(expr)); - } - } - } - - i++; - } - - /* - * For non-partial aggregations, classify the pushable and non-pushable - * HAVING clauses and save them in remote_conds and local_conds of the - * grouped rel's fpinfo. - * - * For partial agggregations, we never push-down the HAVING clause since - * it either has (1) been reduced by the planner to a simple filter on the - * base rel, or, in case of aggregates, the aggregates must be partials - * and have therefore been pulled up into the target list (unless they're - * already there). Any partial aggregates in the HAVING clause must be - * finalized on the access node and applied there. - */ - if (having_qual && !ispartial) - { - ListCell *lc; - - foreach (lc, (List *) having_qual) - { - Expr *expr = (Expr *) lfirst(lc); - RestrictInfo *rinfo; - - /* - * Currently, the core code doesn't wrap havingQuals in - * RestrictInfos, so we must make our own. - */ - Assert(!IsA(expr, RestrictInfo)); - rinfo = make_restrictinfo_compat(root, - expr, - true, - false, - false, - false, - false, - root->qual_security_level, - grouped_rel->relids, - NULL, - NULL, - NULL); - if (ts_is_foreign_expr(root, grouped_rel, expr)) - fpinfo->remote_conds = lappend(fpinfo->remote_conds, rinfo); - else - fpinfo->local_conds = lappend(fpinfo->local_conds, rinfo); - } - } - - /* - * If there are any local conditions, pull Vars and aggregates from it and - * check whether they are safe to pushdown or not. - */ - if (fpinfo->local_conds) - { - List *aggvars = NIL; - ListCell *lc; - - foreach (lc, fpinfo->local_conds) - { - RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); - - aggvars = list_concat(aggvars, - pull_var_clause((Node *) rinfo->clause, PVC_INCLUDE_AGGREGATES)); - } - - foreach (lc, aggvars) - { - Expr *expr = (Expr *) lfirst(lc); - - /* - * If aggregates within local conditions are not safe to push - * down, then we cannot push down the query. Vars are already - * part of GROUP BY clause which are checked above, so no need to - * access them again here. - */ - if (IsA(expr, Aggref)) - { - if (!ts_is_foreign_expr(root, grouped_rel, expr)) - return false; - - tlist = add_to_flat_tlist(tlist, list_make1(expr)); - } - } - } - - /* Store generated targetlist */ - fpinfo->grouped_tlist = tlist; - - /* Safe to pushdown */ - fpinfo->pushdown_safe = true; - - /* - * Set cached relation costs to some negative value, so that we can detect - * when they are set to some sensible costs, during one (usually the - * first) of the calls to fdw_estimate_path_cost_size(). - */ - fpinfo->rel_startup_cost = -1; - fpinfo->rel_total_cost = -1; - - /* - * Set the string describing this grouped relation to be used in EXPLAIN - * output of corresponding ForeignScan. - */ - fpinfo->relation_name = makeStringInfo(); - appendStringInfo(fpinfo->relation_name, "Aggregate on (%s)", ofpinfo->relation_name->data); - - return true; -} - -/* - * add_foreign_grouping_paths - * Add foreign path for grouping and/or aggregation. - * - * Given input_rel represents the underlying scan. The paths are added to the - * given grouped_rel. - */ -static void -add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel, - GroupPathExtraData *extra, CreateUpperPathFunc create_path) -{ - Query *parse = root->parse; - TsFdwRelInfo *ifpinfo = fdw_relinfo_get(input_rel); - TsFdwRelInfo *fpinfo = fdw_relinfo_get(grouped_rel); - Path *grouppath; - double rows; - int width; - Cost startup_cost; - Cost total_cost; - - /* Nothing to be done, if there is no grouping or aggregation required. */ - if (!parse->groupClause && !parse->groupingSets && !parse->hasAggs && !root->hasHavingQual) - return; - - /* save the input_rel as outerrel in fpinfo */ - fpinfo->outerrel = input_rel; - - /* - * Copy foreign table, data node, user mapping, FDW options etc. - * details from the input relation's fpinfo. - */ - fpinfo->table = ifpinfo->table; - fpinfo->server = ifpinfo->server; - fpinfo->sca = ifpinfo->sca; - merge_fdw_options(fpinfo, ifpinfo, NULL); - - /* - * Assess if it is safe to push down aggregation and grouping. - * - * Use HAVING qual from extra. In case of child partition, it will have - * translated Vars. - */ - if (!foreign_grouping_ok(root, grouped_rel, extra)) - return; - - /* Estimate the cost of push down */ - fdw_estimate_path_cost_size(root, grouped_rel, NIL, &rows, &width, &startup_cost, &total_cost); - - /* Now update this information in the fpinfo */ - fpinfo->rows = rows; - fpinfo->width = width; - fpinfo->startup_cost = startup_cost; - fpinfo->total_cost = total_cost; - - /* Create and add path to the grouping relation. */ - grouppath = (Path *) create_path(root, - grouped_rel, - grouped_rel->reltarget, - rows, - startup_cost, - total_cost, - NIL, /* no pathkeys */ - NULL, - NIL); /* no fdw_private */ - - /* Add generated path into grouped_rel by add_path(). */ - fdw_utils_add_path(grouped_rel, grouppath); - - /* Add paths with pathkeys if there's an order by clause */ - if (root->sort_pathkeys != NIL) - fdw_add_upper_paths_with_pathkeys_for_rel(root, grouped_rel, NULL, create_path); -} - -void -fdw_create_upper_paths(TsFdwRelInfo *input_fpinfo, PlannerInfo *root, UpperRelationKind stage, - RelOptInfo *input_rel, RelOptInfo *output_rel, void *extra, - CreateUpperPathFunc create_path) -{ - Assert(input_fpinfo != NULL); - - TsFdwRelInfo *output_fpinfo = NULL; - - /* - * If input rel is not safe to pushdown, then simply return as we cannot - * perform any post-join operations on the data node. - */ - if (!input_fpinfo->pushdown_safe) - return; - - /* Skip any duplicate calls (i.e., output_rel->fdw_private has already - * been set by a previous call to this function). */ - if (output_rel->fdw_private) - return; - - switch (stage) - { - case UPPERREL_GROUP_AGG: - case UPPERREL_PARTIAL_GROUP_AGG: - output_fpinfo = fdw_relinfo_alloc_or_get(output_rel); - output_fpinfo->type = input_fpinfo->type; - output_fpinfo->pushdown_safe = false; - add_foreign_grouping_paths(root, - input_rel, - output_rel, - (GroupPathExtraData *) extra, - create_path); - break; - /* Currently not handled (or received) */ - case UPPERREL_DISTINCT: - case UPPERREL_ORDERED: - case UPPERREL_SETOP: - case UPPERREL_WINDOW: - case UPPERREL_FINAL: -#if PG15_GE - case UPPERREL_PARTIAL_DISTINCT: -#endif - break; - } -} diff --git a/tsl/src/fdw/scan_plan.h b/tsl/src/fdw/scan_plan.h deleted file mode 100644 index 8e2671effd4..00000000000 --- a/tsl/src/fdw/scan_plan.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "data_node_chunk_assignment.h" - -typedef struct TsFdwRelInfo TsFdwRelInfo; - -typedef struct ScanInfo -{ - Oid data_node_serverid; - Index scan_relid; - List *local_exprs; - List *fdw_private; - List *fdw_scan_tlist; - List *fdw_recheck_quals; - List *params_list; - bool systemcol; -} ScanInfo; - -typedef Path *(*CreatePathFunc)(PlannerInfo *root, RelOptInfo *rel, PathTarget *target, double rows, - Cost startup_cost, Cost total_cost, List *pathkeys, - Relids required_outer, Path *fdw_outerpath, List *fdw_private); - -typedef Path *(*CreateUpperPathFunc)(PlannerInfo *root, RelOptInfo *rel, PathTarget *target, - double rows, Cost startup_cost, Cost total_cost, - List *pathkeys, Path *fdw_outerpath, List *fdw_private); - -extern void fdw_scan_info_init(ScanInfo *scaninfo, PlannerInfo *root, RelOptInfo *rel, - Path *best_path, List *scan_clauses, Plan *outer_plan); - -extern void fdw_add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel, Path *epq_path, - CreatePathFunc create_scan_path); -extern void fdw_add_upper_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel, - Path *epq_path, - CreateUpperPathFunc create_upper_path); - -extern void fdw_create_upper_paths(TsFdwRelInfo *input_fpinfo, PlannerInfo *root, - UpperRelationKind stage, RelOptInfo *input_rel, - RelOptInfo *output_rel, void *extra, - CreateUpperPathFunc create_paths); diff --git a/tsl/src/fdw/shippable.c b/tsl/src/fdw/shippable.c deleted file mode 100644 index 8c0cdec9a93..00000000000 --- a/tsl/src/fdw/shippable.c +++ /dev/null @@ -1,229 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ - -/* - * This file contains source code that was copied and/or modified from - * the PostgreSQL database, which is licensed under the open-source - * PostgreSQL License. Please see the NOTICE at the top level - * directory for a copy of the PostgreSQL License. - */ - -/*------------------------------------------------------------------------- - * - * shippable.c - * Determine which database objects are shippable to a data node. - * - * We need to determine whether particular functions, operators, and indeed - * data types are shippable to a data node for execution --- that is, - * do they exist and have the same behavior remotely as they do locally? - * Built-in objects are generally considered shippable. Other objects can - * be shipped if they are white-listed by the user. - * - * Note: there are additional filter rules that prevent shipping mutable - * functions or functions using nonportable collations. Those considerations - * need not be accounted for here. - * - * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group - * - * IDENTIFICATION - * contrib/postgres_fdw/shippable.c - * - *------------------------------------------------------------------------- - */ - -#include -#include -#include -#include -#include -#include - -#include "scan_plan.h" -#include "shippable.h" -#include "utils.h" -#include "relinfo.h" - -/* Hash table for caching the results of shippability lookups */ -static HTAB *ShippableCacheHash = NULL; - -/* - * Hash key for shippability lookups. We include the FDW server OID because - * decisions may differ per-server. Otherwise, objects are identified by - * their (local!) OID and catalog OID. - */ -typedef struct -{ - /* XXX we assume this struct contains no padding bytes */ - Oid objid; /* function/operator/type OID */ - Oid classid; /* OID of its catalog (pg_proc, etc) */ - Oid serverid; /* FDW server we are concerned with */ -} ShippableCacheKey; - -typedef struct -{ - ShippableCacheKey key; /* hash key - must be first */ - bool shippable; -} ShippableCacheEntry; - -/* - * Flush cache entries when pg_foreign_server is updated. - * - * We do this because of the possibility of ALTER SERVER being used to change - * a server's extensions option. We do not currently bother to check whether - * objects' extension membership changes once a shippability decision has been - * made for them, however. - */ -static void -InvalidateShippableCacheCallback(Datum arg, int cacheid, uint32 hashvalue) -{ - HASH_SEQ_STATUS status; - ShippableCacheEntry *entry; - - /* - * In principle we could flush only cache entries relating to the - * pg_foreign_server entry being outdated; but that would be more - * complicated, and it's probably not worth the trouble. So for now, just - * flush all entries. - */ - hash_seq_init(&status, ShippableCacheHash); - while ((entry = (ShippableCacheEntry *) hash_seq_search(&status)) != NULL) - { - if (hash_search(ShippableCacheHash, (void *) &entry->key, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "hash table corrupted"); - } -} - -/* - * Initialize the backend-lifespan cache of shippability decisions. - */ -static void -InitializeShippableCache(void) -{ - HASHCTL ctl = { - .keysize = sizeof(ShippableCacheKey), - .entrysize = sizeof(ShippableCacheEntry), - .hcxt = TopMemoryContext, - }; - - /* Create the hash table. */ - ShippableCacheHash = - hash_create("Shippability cache", 256, &ctl, HASH_BLOBS | HASH_CONTEXT | HASH_ELEM); - - /* Set up invalidation callback on pg_foreign_server. */ - CacheRegisterSyscacheCallback(FOREIGNSERVEROID, InvalidateShippableCacheCallback, (Datum) 0); -} - -/* - * Returns true if given object (operator/function/type) is shippable - * according to the server options. - * - * Right now "shippability" is exclusively a function of whether the object - * belongs to an extension declared by the user. In the future we could - * additionally have a whitelist of functions/operators declared one at a time. - */ -static bool -lookup_shippable(Oid objectId, Oid classId, TsFdwRelInfo *fpinfo) -{ - Oid extensionOid; - - /* - * Is object a member of some extension? (Note: this is a fairly - * expensive lookup, which is why we try to cache the results.) - */ - extensionOid = getExtensionOfObject(classId, objectId); - - /* If so, is that extension in fpinfo->shippable_extensions? */ - if (OidIsValid(extensionOid) && list_member_oid(fpinfo->shippable_extensions, extensionOid)) - return true; - -#ifndef NDEBUG - /* Special debug functions that we want to ship to data nodes. */ - const char debug_func_prefix[] = "ts_debug_shippable_"; - char *func_name = get_func_name(objectId); - if (func_name != NULL && strncmp(func_name, debug_func_prefix, strlen(debug_func_prefix)) == 0) - { - return true; - } -#endif - - return false; -} - -/* - * Return true if given object is one of PostgreSQL's built-in objects. - * - * We use FirstBootstrapObjectId as the cutoff, so that we only consider - * objects with hand-assigned OIDs to be "built in", not for instance any - * function or type defined in the information_schema. - * - * Our constraints for dealing with types are tighter than they are for - * functions or operators: we want to accept only types that are in pg_catalog, - * else deparse_type_name might incorrectly fail to schema-qualify their names. - * Thus we must exclude information_schema types. - * - * XXX there is a problem with this, which is that the set of built-in - * objects expands over time. Something that is built-in to us might not - * be known to the data node, if it's of an older version. But keeping - * track of that would be a huge exercise. - */ -bool -is_builtin(Oid objectId) -{ - return (objectId < FirstBootstrapObjectIdCompat); -} - -/* - * is_shippable - * Is this object (function/operator/type) shippable to data node? - */ -bool -is_shippable(Oid objectId, Oid classId, TsFdwRelInfo *fpinfo) -{ - ShippableCacheKey key; - ShippableCacheEntry *entry; - - /* Built-in objects are presumed shippable. */ - if (is_builtin(objectId)) - return true; - - /* Otherwise, give up if user hasn't specified any shippable extensions. */ - if (fpinfo->shippable_extensions == NIL) - return false; - - /* Give up if we don't have a remote server. */ - if (fpinfo->server == NULL) - return false; - - /* Initialize cache if first time through. */ - if (!ShippableCacheHash) - InitializeShippableCache(); - - /* Set up cache hash key */ - key.objid = objectId; - key.classid = classId; - key.serverid = fpinfo->server->serverid; - - /* See if we already cached the result. */ - entry = (ShippableCacheEntry *) hash_search(ShippableCacheHash, (void *) &key, HASH_FIND, NULL); - - if (!entry) - { - /* Not found in cache, so perform shippability lookup. */ - bool shippable = lookup_shippable(objectId, classId, fpinfo); - - /* - * Don't create a new hash entry until *after* we have the shippable - * result in hand, as the underlying catalog lookups might trigger a - * cache invalidation. - */ - entry = (ShippableCacheEntry *) - hash_search(ShippableCacheHash, (void *) &key, HASH_ENTER, NULL); - - entry->shippable = shippable; - } - - return entry->shippable; -} diff --git a/tsl/src/fdw/shippable.h b/tsl/src/fdw/shippable.h deleted file mode 100644 index ab4ed90ed2c..00000000000 --- a/tsl/src/fdw/shippable.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ -#pragma once - -#include - -typedef struct TsFdwRelInfo TsFdwRelInfo; - -extern bool is_builtin(Oid objectId); -extern bool is_shippable(Oid objectId, Oid classId, TsFdwRelInfo *fpinfo); diff --git a/tsl/src/hypertable.c b/tsl/src/hypertable.c index d895a7668d1..5e9962333fb 100644 --- a/tsl/src/hypertable.c +++ b/tsl/src/hypertable.c @@ -29,7 +29,6 @@ #include #include -#include "fdw/fdw.h" #include "data_node.h" #include "deparse.h" #include "remote/dist_commands.h" diff --git a/tsl/src/init.c b/tsl/src/init.c index faf66747965..4e0fa021e1f 100644 --- a/tsl/src/init.c +++ b/tsl/src/init.c @@ -36,8 +36,6 @@ #include "data_node.h" #include "dist_util.h" #include "export.h" -#include "fdw/fdw.h" -#include "fdw/relinfo.h" #include "hypertable.h" #include "license_guc.h" #include "nodes/decompress_chunk/planner.h" @@ -214,8 +212,6 @@ CrossModuleFunctions tsl_cm_functions = { .chunks_drop_stale = chunk_drop_stale_chunks, .hypertable_make_distributed = hypertable_make_distributed, .get_and_validate_data_node_list = hypertable_get_and_validate_data_nodes, - .timescaledb_fdw_handler = timescaledb_fdw_handler, - .timescaledb_fdw_validator = timescaledb_fdw_validator, .remote_txn_id_in = remote_txn_id_in_pg, .remote_txn_id_out = remote_txn_id_out_pg, .remote_txn_heal_data_node = remote_txn_heal_data_node, @@ -244,7 +240,6 @@ CrossModuleFunctions tsl_cm_functions = { .hypertable_distributed_set_replication_factor = hypertable_set_replication_factor, .cache_syscache_invalidate = cache_syscache_invalidate, .health_check = ts_dist_health_check, - .mn_get_foreign_join_paths = tsl_mn_get_foreign_join_paths, .recompress_chunk_segmentwise = tsl_recompress_chunk_segmentwise, .get_compressed_chunk_index_for_recompression = tsl_get_compressed_chunk_index_for_recompression, diff --git a/tsl/src/nodes/async_append.c b/tsl/src/nodes/async_append.c index f01c6193c39..06c72f712a2 100644 --- a/tsl/src/nodes/async_append.c +++ b/tsl/src/nodes/async_append.c @@ -16,9 +16,6 @@ #include #include "async_append.h" -#include "fdw/scan_plan.h" -#include "fdw/scan_exec.h" -#include "fdw/data_node_scan_plan.h" #include "planner.h" #include "cache.h" #include "hypertable.h" @@ -351,15 +348,7 @@ static CustomPathMethods async_append_path_methods = { static bool is_data_node_scan_path(Path *path) { - CustomPath *cpath; - if (!IsA(path, CustomPath)) - return false; - - cpath = castNode(CustomPath, path); - if (strcmp(cpath->methods->CustomName, DATA_NODE_SCAN_PATH_NAME) != 0) - return false; - - return true; + return false; } static void diff --git a/tsl/src/planner.c b/tsl/src/planner.c index 49650c68773..8e2741f3250 100644 --- a/tsl/src/planner.c +++ b/tsl/src/planner.c @@ -17,9 +17,6 @@ #include "compat/compat.h" #include "debug_guc.h" #include "debug.h" -#include "fdw/data_node_scan_plan.h" -#include "fdw/fdw.h" -#include "fdw/relinfo.h" #include "guc.h" #include "hypertable_cache.h" #include "hypertable.h" @@ -70,19 +67,6 @@ tsl_create_upper_paths_hook(PlannerInfo *root, UpperRelationKind stage, RelOptIn RelOptInfo *output_rel, TsRelType input_reltype, Hypertable *ht, void *extra) { - bool dist_ht = false; - switch (input_reltype) - { - case TS_REL_HYPERTABLE: - case TS_REL_HYPERTABLE_CHILD: - dist_ht = hypertable_is_distributed(ht); - if (dist_ht) - data_node_scan_create_upper_paths(root, stage, input_rel, output_rel, extra); - break; - default: - break; - } - switch (stage) { case UPPERREL_GROUP_AGG: @@ -226,24 +210,4 @@ tsl_set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntr */ return; } - - Cache *hcache; - Hypertable *ht = - ts_hypertable_cache_get_cache_and_entry(rte->relid, CACHE_FLAG_MISSING_OK, &hcache); - - if (rel->fdw_private != NULL && ht != NULL && hypertable_is_distributed(ht)) - { - FdwRoutine *fdw = (FdwRoutine *) DatumGetPointer( - DirectFunctionCall1(timescaledb_fdw_handler, PointerGetDatum(NULL))); - - fdw->GetForeignRelSize(root, rel, rte->relid); - fdw->GetForeignPaths(root, rel, rte->relid); - -#ifdef TS_DEBUG - if (ts_debug_optimizer_flags.show_rel) - tsl_debug_log_rel_with_paths(root, rel, (UpperRelationKind *) NULL); -#endif - } - - ts_cache_release(hcache); } diff --git a/tsl/test/shared/expected/extension.out b/tsl/test/shared/expected/extension.out index 1b1dfbc9ed0..38c01376f6e 100644 --- a/tsl/test/shared/expected/extension.out +++ b/tsl/test/shared/expected/extension.out @@ -325,8 +325,6 @@ ORDER BY pronamespace::regnamespace::text COLLATE "C", p.oid::regprocedure::text time_bucket_gapfill(interval,timestamp with time zone,timestamp with time zone,timestamp with time zone) time_bucket_gapfill(interval,timestamp without time zone,timestamp without time zone,timestamp without time zone) time_bucket_gapfill(smallint,smallint,smallint,smallint) - timescaledb_fdw_handler() - timescaledb_fdw_validator(text[],oid) timescaledb_post_restore() timescaledb_pre_restore() timescaledb_experimental.add_policies(regclass,boolean,"any","any","any","any")