From 7413296e67b972265c84b0e380ca1f631e45417b Mon Sep 17 00:00:00 2001 From: mperezfuster Date: Mon, 11 Sep 2023 23:27:47 +0100 Subject: [PATCH 001/106] Docs: new two gucs for timestamp9 module (#16398) * Docs: new two gucs for timestamp9 module * Update gpdb-doc/markdown/ref_guide/modules/timestamp9.html.md Co-authored-by: Xing Guo --------- Co-authored-by: David Yozie Co-authored-by: Xing Guo --- .../ref_guide/modules/timestamp9.html.md | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/gpdb-doc/markdown/ref_guide/modules/timestamp9.html.md b/gpdb-doc/markdown/ref_guide/modules/timestamp9.html.md index 6d7e6a48ee7a..89cbaf587111 100644 --- a/gpdb-doc/markdown/ref_guide/modules/timestamp9.html.md +++ b/gpdb-doc/markdown/ref_guide/modules/timestamp9.html.md @@ -408,6 +408,61 @@ testdb=# SELECT now()::timestamp9; (1 row) ``` +## Support For Date/Time Functions + +The `timestamp9` module defines two server configuration parameters that you set to enable date/time functions defined in the `pg_catalog` schema on `timestamp` types. Visit the [PostgreSQL Documentation](https://www.postgresql.org/docs/12/functions-datetime.html#:~:text=Table%C2%A09.31.%C2%A0Date/Time%20Functions) for a list of the supported date/time functions. The parameters are: + +- `timestamp9.enable_implicit_cast_timestamp9_ltz_to_timestamptz`: when enabled, casting a `timestamp9_ltz` value to `timestamp with time zone` becomes implicit. +- `timestamp9.enable_implicit_cast_timestamp9_ntz_to_timestamp`: when enabled, casting a `timestamp9_ntz` value to `timestamp without time zone` becomes implicit. + +The default value for both configuration parameters is `off`. For example, if you try use the `date` function with `timestamp9` and `timestamp9.enable_implicit_cast_timestamp9_ltz_to_timestamptz` is set to `off`: + +``` +postgres=# SELECT date('2022-01-01'::timestamp9_ltz); +ERROR: implicitly cast timestamp9_ltz to timestamptz is not allowed +HINT: either set 'timestamp9.enable_implicit_cast_timestamp9_ltz_to_timestamptz' to 'on' or do it explicitly +``` +Enable the configuration parameter in order to use the `date` function: + +``` +postgres=# SET timestamp9.enable_implicit_cast_timestamp9_ltz_to_timestamptz TO 'ON'; +SET +postgres=# SELECT date('2022-01-01'::timestamp9_ltz); + date +------------ + 01-01-2022 +(1 row) +``` + +Note that enabling these configuration parameters will also result in multiple casting paths from `timestamp9` types and built-in `timestamp` types. You may encounter error messages such as: + +``` +postgres=# select '2019-09-19'::timestamp9_ltz <= '2019-09-20'::timestamptz; +ERROR: operator is not unique: timestamp9_ltz <= timestamp with time zone +LINE 1: select '2019-09-19'::timestamp9_ltz <= '2019-09-20'::timesta... +HINT: Could not choose a best candidate operator. You might need to add explicit type casts. +``` + +In this situation, cast the type explicitly: + +``` +postgres=# select '2019-09-19'::timestamp9_ntz <= '2019-09-20'::timestamptz::timestamp9_ntz; +?column? +---------- + t +(1 row) +``` + +Alternatively, cast the `timestamp9_ntz` value to the `timestamptz` value: + +``` +postgres=# select '2019-09-19'::timestamp9_ntz::timestamptz <= '2019-09-20'::timestamptz; +?column? +---------- + t +(1 row) +``` + ## Examples ### `TIMESTAMP9_LTZ` Examples From 21e7c1038279df0512e2c72834eb21b7ffc98cef Mon Sep 17 00:00:00 2001 From: Shaoqi Bai Date: Tue, 12 Sep 2023 12:58:52 -0700 Subject: [PATCH 002/106] Fix wrong var name for resource reduced-frequency-trigger when os_type is ubuntu20.04, the reduced-frequency-trigger-start-[[ os_type ]] reduced-frequency-trigger-stop-[[ os_type ]] will become reduced-frequency-trigger-start-ubuntu20.04 reduced-frequency-trigger-stop-ubuntu20.04 For concourse, when there is a dot in var's name, it will look for the field 04 for var reduced-frequency-trigger-start-ubuntu20 and filed 04 for var reduced-frequency-trigger-stop-ubuntu20 refer: https://concourse-ci.org/vars.html#var-syntax To workaround this, add double quote to the var [GPR-1532] Authored-by: Shaoqi Bai --- concourse/pipelines/gpdb_6X_STABLE-generated.yml | 2 +- concourse/pipelines/templates/gpdb-tpl.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/concourse/pipelines/gpdb_6X_STABLE-generated.yml b/concourse/pipelines/gpdb_6X_STABLE-generated.yml index ba6ddeedd009..7244feb51fa5 100644 --- a/concourse/pipelines/gpdb_6X_STABLE-generated.yml +++ b/concourse/pipelines/gpdb_6X_STABLE-generated.yml @@ -12,7 +12,7 @@ ## file (example: templates/gpdb-tpl.yml) and regenerate the pipeline ## using appropriate tool (example: gen_pipeline.py -t prod). ## ---------------------------------------------------------------------- -## Generated by gen_pipeline.py at: 2023-09-06 12:50:28.297086 +## Generated by gen_pipeline.py at: 2023-09-12 12:58:30.539301 ## Template file: gpdb-tpl.yml ## OS Types: ## Test Sections: ['icw', 'cli', 'aa', 'release'] diff --git a/concourse/pipelines/templates/gpdb-tpl.yml b/concourse/pipelines/templates/gpdb-tpl.yml index fc09efb4dd30..0680d627f14b 100644 --- a/concourse/pipelines/templates/gpdb-tpl.yml +++ b/concourse/pipelines/templates/gpdb-tpl.yml @@ -849,8 +849,8 @@ resources: type: time source: location: America/Los_Angeles - start: ((reduced-frequency-trigger-start-[[ os_type ]])) - stop: ((reduced-frequency-trigger-stop-[[ os_type ]])) + start: (("reduced-frequency-trigger-start-[[ os_type ]]")) + stop: (("reduced-frequency-trigger-stop-[[ os_type ]]")) {% if os_type != "centos7" %} days: [Monday] {% else %} From a7e1ea63f3db805be35c26e49636804b429586a5 Mon Sep 17 00:00:00 2001 From: David Yozie Date: Tue, 12 Sep 2023 14:24:08 -0700 Subject: [PATCH 003/106] Revert "docs - open parallel retrieve cursor on behalf of current user (#16027)" This reverts commit ee6e25bfdeda467cc8c5fd210a6dd23f67becbe9. --- .../parallel_retrieve_cursor.html.md | 361 ------------------ 1 file changed, 361 deletions(-) delete mode 100644 gpdb-doc/markdown/admin_guide/parallel_retrieve_cursor.html.md diff --git a/gpdb-doc/markdown/admin_guide/parallel_retrieve_cursor.html.md b/gpdb-doc/markdown/admin_guide/parallel_retrieve_cursor.html.md deleted file mode 100644 index 430bd8662f03..000000000000 --- a/gpdb-doc/markdown/admin_guide/parallel_retrieve_cursor.html.md +++ /dev/null @@ -1,361 +0,0 @@ ---- -title: Retrieving Query Results with a Parallel Retrieve Cursor ---- - -A *parallel retrieve cursor* is an enhanced cursor implementation that you can use to create a special kind of cursor on the Greenplum Database coordinator node, and retrieve query results, on demand and in parallel, directly from the Greenplum segments. - -## About Parallel Retrieve Cursors - -You use a cursor to retrieve a smaller number of rows at a time from a larger - query. When you declare a parallel retrieve cursor, the Greenplum - Database Query Dispatcher (QD) dispatches the query plan to each Query Executor - (QE), and creates an *endpoint* on each QE before it executes the query. - An endpoint is a query result source for a parallel retrieve cursor on a specific - QE. Instead of returning the query result to the QD, an endpoint retains the - query result for retrieval via a different process: a direct connection to the - endpoint. You open a special retrieve mode connection, called a *retrieve - session*, and use the new `RETRIEVE` SQL command to retrieve - query results from each parallel retrieve cursor endpoint. You can retrieve - from parallel retrieve cursor endpoints on demand and in parallel. - -You can use the following functions and views to examine and manage parallel retrieve cursors and endpoints: - -|Function, View Name|Description| -|-------------------|-----------| -|gp\_get\_endpoints\(\)

[gp\_endpoints](../ref_guide/system_catalogs/catalog_ref-views.html#gp_endpoints)|List the endpoints associated with all active parallel retrieve cursors declared by the current user in the current database. When the Greenplum Database superuser invokes this function, it returns a list of all endpoints for all parallel retrieve cursors declared by all users in the current database.| -|gp\_get\_session\_endpoints\(\)

[gp\_session\_endpoints](../ref_guide/system_catalogs/catalog_ref-views.html#gp_session_endpoints)|List the endpoints associated with all parallel retrieve cursors declared in the current session for the current user.| -|gp\_get\_segment\_endpoints\(\)

[gp\_segment\_endpoints](../ref_guide/system_catalogs/catalog_ref-views.html#gp_segment_endpoints)|List the endpoints created in the QE for all active parallel retrieve cursors declared by the current user. When the Greenplum Database superuser accesses this view, it returns a list of all endpoints on the QE created for all parallel retrieve cursors declared by all users.| -|gp\_wait\_parallel\_retrieve\_cursor\(cursorname text, timeout\_sec int4 \)|Return cursor status or block and wait for results to be retrieved from all endpoints associated with the specified parallel retrieve cursor.| - -
Each of these functions and views is located in the pg_catalog schema, and each RETURNS TABLE.
- -## Using a Parallel Retrieve Cursor - -You will perform the following tasks when you use a Greenplum Database parallel retrieve cursor to read query results in parallel from Greenplum segments: - -1. [Declare the parallel retrieve cursor](#declare_cursor). -1. [List the endpoints of the parallel retrieve cursor](#list_endpoints). -1. [Open a retrieve connection to each endpoint](#open_retrieve_conn). -1. [Retrieve data from each endpoint](#retrieve_data). -1. [Wait for data retrieval to complete](#wait). -1. [Handle data retrieval errors](#error_handling). -1. [Close the parallel retrieve cursor](#close). - -In addition to the above, you may optionally choose to [List all parallel retrieve cursors](#list_all_prc) in the system or [List segment-specific retrieve session information](#utility_endpoints). - -### Declaring a Parallel Retrieve Cursor - -You [DECLARE](../ref_guide/sql_commands/DECLARE.html#topic1) a cursor to retrieve a smaller number of rows at a time from a larger query. When you declare a parallel retrieve cursor, you can retrieve the query results directly from the Greenplum Database segments. - -The syntax for declaring a parallel retrieve cursor is similar to that of declaring a regular cursor; you must additionally include the `PARALLEL RETRIEVE` keywords in the command. You can declare a parallel retrieve cursor only within a transaction, and the cursor name that you specify when you declare the cursor must be unique within the transaction. - -For example, the following commands begin a transaction and declare a parallel retrieve cursor named `prc1` to retrieve the results from a specific query: - -``` sql -BEGIN; -DECLARE prc1 PARALLEL RETRIEVE CURSOR FOR query; -``` - -Greenplum Database creates the endpoint(s) on the QD or QEs, depending on the *query* parameters: - -- Greenplum Database creates an endpoint on the QD when the query results must be gathered by the coordinator. For example, this `DECLARE` statement requires that the coordinator gather the query results: - - ``` sql - DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1 ORDER BY a; - ``` -
You may choose to run the EXPLAIN command on the parallel retrieve cursor query to identify when motion is involved. Consider using a regular cursor for such queries.
- -- When the query involves direct dispatch to a segment (the query is filtered on the distribution key), Greenplum Database creates the endpoint(s) on specific segment host(s). For example, this `DECLARE` statement may result in the creation of single endpoint: - - ``` sql - DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1 WHERE a=1; - ``` - -- Greenplum Database creates the endpoints on all segment hosts when all hosts contribute to the query results. This example `DECLARE` statement results in all segments contributing query results: - - ``` sql - DECLARE c3 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; - ``` - -The `DECLARE` command returns when the endpoints are ready and query execution has begun. - -### Listing a Parallel Retrieve Cursor's Endpoints - -You can obtain the information that you need to initiate a retrieve - connection to an endpoint by invoking the `gp_get_endpoints()` - function or examining the `gp_endpoints` view in a session on - the Greenplum Database coordinator host: - -``` sql -SELECT * FROM gp_get_endpoints(); -SELECT * FROM gp_endpoints; -``` - -These commands return the list of endpoints in a table with the following columns: - -|Column Name|Description| -|-----------|-----------| -|gp\_segment\_id|The QE's endpoint `gp_segment_id`.| -|auth\_token|The authentication token for a retrieve session.| -|cursorname|The name of the parallel retrieve cursor.| -|sessionid|The identifier of the session in which the parallel retrieve cursor was created.| -|hostname|The name of the host from which to retrieve the data for the endpoint.| -|port|The port number from which to retrieve the data for the endpoint.| -|username|The name of the current user; *you must initiate the retrieve session as this user*.| -|state|The state of the endpoint; the valid states are:

READY: The endpoint is ready to be retrieved.

ATTACHED: The endpoint is attached to a retrieve connection.

RETRIEVING: A retrieve session is retrieving data from the endpoint at this moment.

FINISHED: The endpoint has been fully retrieved.

RELEASED: Due to an error, the endpoint has been released and the connection closed.| -|endpointname|The endpoint identifier; you provide this identifier to the `RETRIEVE` command.| - -Refer to the [gp_endpoints](../ref_guide/system_catalogs/catalog_ref-views.html#gp_endpoints) view reference page for more information about the endpoint attributes returned by these commands. - -You can similarly invoke the `gp_get_session_endpoints()` function or examine the `gp_session_endpoints` view to list the endpoints created for the parallel retrieve cursors declared in the current session and by the current user. - -### Opening a Retrieve Session - -After you declare a parallel retrieve cursor, you can open a retrieve session to each endpoint. Only a single retrieve session may be open to an endpoint at any given time. - -
A retrieve session is independent of the parallel retrieve cursor itself and the endpoints.
- -Retrieve session authentication does not depend on the `pg_hba.conf` file, but rather on an authentication token (`auth_token`) generated by Greenplum Database. - -
Because Greenplum Database skips pg_hba.conf-controlled authentication for a retrieve session, for security purposes you may invoke only the RETRIEVE command in the session.
- -When you initiate a retrieve session to an endpoint: - -- The user that you specify for the retrieve session must be the user that declared the parallel retrieve cursor (the `username` returned by `gp_endpoints`). This user must have Greenplum Database login privileges. - -- You specify the `hostname` and `port` returned by `gp_endpoints` for the endpoint. - -- You authenticate the retrieve session by specifying the `auth_token` returned for the endpoint via the `PGPASSWORD` environment variable, or when prompted for the retrieve session `Password`. - -- You must specify the [gp_retrieve_conn](../ref_guide/config_params/guc-list.html#gp_retrieve_conn) server configuration parameter on the connection request, and set the value to `true` . - -For example, if you are initiating a retrieve session via `psql`: - -``` shell -PGOPTIONS='-c gp_retrieve_conn=true' psql -h -p -U -d -``` - -To distinguish a retrieve session from other sessions running on a segment host, Greenplum Database includes the `[retrieve]` tag on the `ps` command output display for the process. - -### Retrieving Data From the Endpoint - -Once you establish a retrieve session, you retrieve the tuples associated with a query result on that endpoint using the [RETRIEVE](../ref_guide/sql_commands/RETRIEVE.html#topic1) command. - -You can specify a (positive) number of rows to retrieve, or `ALL` rows: - -``` sql -RETRIEVE 7 FROM ENDPOINT prc10000003300000003; -RETRIEVE ALL FROM ENDPOINT prc10000003300000003; -``` - -Greenplum Database returns an empty set if there are no more rows to retrieve from the endpoint. - -
You can retrieve from multiple parallel retrieve cursors from the same retrieve session only when their auth_tokens match.
- -### Waiting for Data Retrieval to Complete - -Use the `gp_wait_parallel_retrieve_cursor()` function to display the the status of data retrieval from a parallel retrieve cursor, or to wait for all endpoints to finishing retrieving the data. You invoke this function in the transaction block in which you declared the parallel retrieve cursor. - -`gp_wait_parallel_retrieve_cursor()` returns `true` only when all tuples are fully retrieved from all endpoints. In all other cases, the function returns `false` and may additionally throw an error. - -The function signatures of `gp_wait_parallel_retrieve_cursor()` follow: - -``` sql -gp_wait_parallel_retrieve_cursor( cursorname text ) -gp_wait_parallel_retrieve_cursor( cursorname text, timeout_sec int4 ) -``` - -You must identify the name of the cursor when you invoke this function. The timeout argument is optional: - -- The default timeout is `0` seconds: Greenplum Database checks the retrieval status of all endpoints and returns the result immediately. - -- A timeout value of `-1` seconds instructs Greenplum to block until all data from all endpoints has been retrieved, or block until an error occurs. - -- The function reports the retrieval status after a timeout occurs for any other positive timeout value that you specify. - -`gp_wait_parallel_retrieve_cursor()` returns when it encounters one of the following conditions: - -- All data has been retrieved from all endpoints. -- A timeout has occurred. -- An error has occurred. - -### Handling Data Retrieval Errors - -An error can occur in a retrieve sesson when: - -- You cancel or interrupt the retrieve operation. -- The endpoint is only partially retrieved when the retrieve session quits. - -When an error occurs in a specific retrieve session, Greenplum Database removes the endpoint from the QE. Other retrieve sessions continue to function as normal. - -If you close the transaction before fully retrieving from all endpoints, or if `gp_wait_parallel_retrieve_cursor()` returns an error, Greenplum Database terminates all remaining open retrieve sessions. - -### Closing the Cursor - -When you have completed retrieving data from the parallel retrieve cursor, close the cursor and end the transaction: - -``` sql -CLOSE prc1; -END; -``` - -
When you close a parallel retrieve cursor, Greenplum Database terminates any open retrieve sessions associated with the cursor.
- -On closing, Greenplum Database frees all resources associated with the parallel retrieve cursor and its endpoints. - -### Listing All Parallel Retrieve Cursors - -The [pg_cursors](../ref_guide/system_catalogs/catalog_ref-views.html#pg_cursors) view lists all declared cursors that are currently available in the system. You can obtain information about all parallel retrieve cursors by running the following command: - -``` sql -SELECT * FROM pg_cursors WHERE is_parallel = true; -``` - -### Listing Segment-Specific Retrieve Session Information - -You can obtain information about all retrieve sessions to a specific QE endpoint by invoking the `gp_get_segment_endpoints()` function or examining the `gp_segment_endpoints` view: - -``` sql -SELECT * FROM gp_get_segment_endpoints(); -SELECT * FROM gp_segment_endpoints; -``` - -These commands provide information about the retrieve sessions associated with a QE endpoint for all active parallel retrieve cursors declared by the current user. When the Greenplum Database superuser invokes the command, it returns the retrieve session information for all endpoints on the QE created for all parallel retrieve cursors declared by all users. - -You can obtain segment-specific retrieve session information in two ways: from the QD, or via a utility-mode connection to the endpoint: - -- QD example: - - ``` sql - SELECT * from gp_dist_random('gp_segment_endpoints'); - ``` - - Display the information filtered to a specific segment: - - ``` sql - SELECT * from gp_dist_random('gp_segment_endpoints') WHERE gp_segment_id = 0; - ``` - -- Example utilizing a utility-mode connection to the endpoint: - - ``` sql - $ PGOPTIONS='-c gp_session_role=utility' psql -h sdw3 -U localuser -p 6001 -d testdb - - testdb=> SELECT * FROM gp_segment_endpoints; - ``` - -The commands return endpoint and retrieve session information in a table with the following columns: - -|Column Name|Description| -|-----------|-----------| -|auth\_token|The authentication token for a the retrieve session.| -|databaseid|The identifier of the database in which the parallel retrieve cursor was created.| -|senderpid|The identifier of the process sending the query results.| -|receiverpid|The process identifier of the retrieve session that is receiving the query results.| -|state|The state of the endpoint; the valid states are:

READY: The endpoint is ready to be retrieved.

ATTACHED: The endpoint is attached to a retrieve connection.

RETRIEVING: A retrieve session is retrieving data from the endpoint at this moment.

FINISHED: The endpoint has been fully retrieved.

RELEASED: Due to an error, the endpoint has been released and the connection closed.| -|gp\_segment\_id|The QE's endpoint `gp_segment_id`.| -|sessionid|The identifier of the session in which the parallel retrieve cursor was created.| -|username|The name of the user that initiated the retrieve session.| -|endpointname|The endpoint identifier.| -|cursorname|The name of the parallel retrieve cursor.| - -Refer to the [gp_segment_endpoints](../ref_guide/system_catalogs/catalog_ref-views.html#gp_segment_endpoints) view reference page for more information about the endpoint attributes returned by these commands. - - -## Limiting the Number of Concurrently Open Cursors - -By default, Greenplum Database does not limit the number of parallel retrieve cursors that are active in the cluster \(up to the maximum value of 1024\). The Greenplum Database superuser can set the [gp\_max\_parallel\_cursors](../ref_guide/config_params/guc-list.html#gp_max_parallel_cursors) server configuration parameter to limit the number of open cursors. - - -## Known Issues and Limitations - -The parallel retrieve cursor implementation has the following limitations: - -- The VMware Greenplum Query Optimizer (GPORCA) does not support queries on a parallel retrieve cursor. -- Greenplum Database ignores the `BINARY` clause when you declare a parallel retrieve cursor. -- Parallel retrieve cursors cannot be declared `WITH HOLD`. -- Parallel retrieve cursors do not support the `FETCH` and `MOVE` cursor operations. -- Parallel retrieve cursors are not supported in SPI; you cannot declare a parallel retrieve cursor in a PL/pgSQL function. - - -## Additional Documentation - -Refer to the [README](https://github.com/greenplum-db/gpdb/tree/main/src/backend/cdb/endpoint/README) in the Greenplum Database `github` repository for additional information about the parallel retrieve cursor implementation. You can also find parallel retrieve cursor [programming examples](https://github.com/greenplum-db/gpdb/tree/main/src/test/examples/) in the repository. - - -## Example - -Create a parallel retrieve cursor and use it to pull query results from a Greenplum Database cluster: - -1. Open a `psql` session to the Greenplum Database coordinator host: - - ``` shell - psql -d testdb - ``` - -1. Start the transaction: - - ``` sql - BEGIN; - ``` - -1. Declare a parallel retrieve cursor named `prc1` for a `SELECT *` query on a table: - - ``` sql - DECLARE prc1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; - ``` - -1. Obtain the endpoints for this parallel retrieve cursor: - - ``` sql - SELECT * FROM gp_endpoints WHERE cursorname='prc1'; - gp_segment_id | auth_token | cursorname | sessionid | hostname | port | username | state | endpointname - ---------------+----------------------------------+------------+-----------+----------+------+----------+-------+---------------------- - 2 | 39a2dc90a82fca668e04d04e0338f105 | prc1 | 51 | sdw1 | 6000 | bill | READY | prc10000003300000003 - 3 | 1a6b29f0f4cad514a8c3936f9239c50d | prc1 | 51 | sdw1 | 6001 | bill | READY | prc10000003300000003 - 4 | 1ae948c8650ebd76bfa1a1a9fa535d93 | prc1 | 51 | sdw2 | 6000 | bill | READY | prc10000003300000003 - 5 | f10f180133acff608275d87966f8c7d9 | prc1 | 51 | sdw2 | 6001 | bill | READY | prc10000003300000003 - 6 | dda0b194f74a89ed87b592b27ddc0e39 | prc1 | 51 | sdw3 | 6000 | bill | READY | prc10000003300000003 - 7 | 037f8c747a5dc1b75fb10524b676b9e8 | prc1 | 51 | sdw3 | 6001 | bill | READY | prc10000003300000003 - 8 | c43ac67030dbc819da9d2fd8b576410c | prc1 | 51 | sdw4 | 6000 | bill | READY | prc10000003300000003 - 9 | e514ee276f6b2863142aa2652cbccd85 | prc1 | 51 | sdw4 | 6001 | bill | READY | prc10000003300000003 - (8 rows) - ``` - -1. Wait until all endpoints are fully retrieved: - - ``` sql - SELECT gp_wait_parallel_retrieve_cursor( 'prc1', -1 ); - ``` - -1. For each endpoint: - - 1. Open a retrieve session. For example, to open a retrieve session to the segment instance running on `sdw3`, port number `6001`, run the following command in a *different terminal window*; when prompted for the password, provide the `auth_token` identified in row 7 of the `gp_endpoints` output: - - ``` sql - $ PGOPTIONS='-c gp_retrieve_conn=true' psql -h sdw3 -U localuser -p 6001 -d testdb - Password: - ```` - - 1. Retrieve data from the endpoint: - - ``` sql - -- Retrieve 7 rows of data from this session - RETRIEVE 7 FROM ENDPOINT prc10000003300000003 - -- Retrieve the remaining rows of data from this session - RETRIEVE ALL FROM ENDPOINT prc10000003300000003 - ``` - - 1. Exit the retrieve session: - - ``` sql - \q - ``` - -1. In the original `psql` session (the session in which you declared the parallel retrieve cursor), verify that the `gp_wait_parallel_retrieve_cursor()` function returned `t`. Then close the cursor and complete the transaction: - - ``` sql - CLOSE prc1; - END; - ``` - From acc5310e8b4156c8c3857fc21571fe013c169f98 Mon Sep 17 00:00:00 2001 From: Lisa Owen Date: Tue, 12 Sep 2023 16:13:48 -0600 Subject: [PATCH 004/106] docs - open parallel retrieve cursor for current user (6x) (#16427) --- .../modules/gp_parallel_retrieve_cursor.html.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/gpdb-doc/markdown/ref_guide/modules/gp_parallel_retrieve_cursor.html.md b/gpdb-doc/markdown/ref_guide/modules/gp_parallel_retrieve_cursor.html.md index f798b83121d1..f6d37c8c2529 100644 --- a/gpdb-doc/markdown/ref_guide/modules/gp_parallel_retrieve_cursor.html.md +++ b/gpdb-doc/markdown/ref_guide/modules/gp_parallel_retrieve_cursor.html.md @@ -34,9 +34,9 @@ The `gp_parallel_retrieve_cursor` module provides the following functions and vi |Function, View Name|Description| |-------------------|-----------| -|gp\_get\_endpoints\(\)

[gp\_endpoints](../system_catalogs/gp_endpoints.html#topic1)|List the endpoints associated with all active parallel retrieve cursors declared by the current session user in the current database. When the Greenplum Database superuser invokes this function, it returns a list of all endpoints for all parallel retrieve cursors declared by all users in the current database.| -|gp\_get\_session\_endpoints\(\)

[gp\_session\_endpoints](../system_catalogs/gp_session_endpoints.html#topic1)|List the endpoints associated with all parallel retrieve cursors declared in the current session for the current session user.| -|gp\_get\_segment\_endpoints\(\)

[gp\_segment\_endpoints](../system_catalogs/gp_segment_endpoints.html#topic1)|List the endpoints created in the QE for all active parallel retrieve cursors declared by the current session user. When the Greenplum Database superuser accesses this view, it returns a list of all endpoints on the QE created for all parallel retrieve cursors declared by all users.| +|gp\_get\_endpoints\(\)

[gp\_endpoints](../system_catalogs/gp_endpoints.html#topic1)|List the endpoints associated with all active parallel retrieve cursors declared by the current user in the current database. When the Greenplum Database superuser invokes this function, it returns a list of all endpoints for all parallel retrieve cursors declared by all users in the current database.| +|gp\_get\_session\_endpoints\(\)

[gp\_session\_endpoints](../system_catalogs/gp_session_endpoints.html#topic1)|List the endpoints associated with all parallel retrieve cursors declared in the current session for the current user.| +|gp\_get\_segment\_endpoints\(\)

[gp\_segment\_endpoints](../system_catalogs/gp_segment_endpoints.html#topic1)|List the endpoints created in the QE for all active parallel retrieve cursors declared by the current user. When the Greenplum Database superuser accesses this view, it returns a list of all endpoints on the QE created for all parallel retrieve cursors declared by all users.| |gp\_wait\_parallel\_retrieve\_cursor\(cursorname text, timeout\_sec int4 \)|Return cursor status or block and wait for results to be retrieved from all endpoints associated with the specified parallel retrieve cursor.| > **Note** Each of these functions and views is located in the `pg_catalog` schema, and each `RETURNS TABLE`. @@ -112,7 +112,7 @@ These commands return the list of endpoints in a table with the following column |sessionid|The identifier of the session in which the parallel retrieve cursor was created.| |hostname|The name of the host from which to retrieve the data for the endpoint.| |port|The port number from which to retrieve the data for the endpoint.| -|username|The name of the session user \(not the current user\); *you must initiate the retrieve session as this user*.| +|username|The name of the current user; *you must initiate the retrieve session as this user*.| |state|The state of the endpoint; the valid states are:

READY: The endpoint is ready to be retrieved.

ATTACHED: The endpoint is attached to a retrieve connection.

RETRIEVING: A retrieve session is retrieving data from the endpoint at this moment.

FINISHED: The endpoint has been fully retrieved.

RELEASED: Due to an error, the endpoint has been released and the connection closed.| |endpointname|The endpoint identifier; you provide this identifier to the `RETRIEVE` command.| @@ -132,7 +132,7 @@ Retrieve session authentication does not depend on the `pg_hba.conf` file, but r When you initiate a retrieve session to an endpoint: -- The user that you specify for the retrieve session must be the session user that declared the parallel retrieve cursor \(the `username` returned by `gp_endpoints`\). This user must have Greenplum Database login privileges. +- The user that you specify for the retrieve session must be the user that declared the parallel retrieve cursor \(the `username` returned by `gp_endpoints`\). This user must have Greenplum Database login privileges. - You specify the `hostname` and `port` returned by `gp_endpoints` for the endpoint. - You authenticate the retrieve session by specifying the `auth_token` returned for the endpoint via the `PGPASSWORD` environment variable, or when prompted for the retrieve session `Password`. - You must specify the [gp\_retrieve\_conn](../config_params/guc-list.html#gp_retrieve_conn) server configuration parameter on the connection request, and set the value to `true` . @@ -218,7 +218,7 @@ SELECT * FROM gp_get_segment_endpoints(); SELECT * FROM gp_segment_endpoints; ``` -These commands provide information about the retrieve sessions associated with a QE endpoint for all active parallel retrieve cursors declared by the current session user. When the Greenplum Database superuser invokes the command, it returns the retrieve session information for all endpoints on the QE created for all parallel retrieve cursors declared by all users. +These commands provide information about the retrieve sessions associated with a QE endpoint for all active parallel retrieve cursors declared by the current user. When the Greenplum Database superuser invokes the command, it returns the retrieve session information for all endpoints on the QE created for all parallel retrieve cursors declared by all users. You can obtain segment-specific retrieve session information in two ways: from the QD, or via a utility-mode connection to the endpoint: @@ -254,7 +254,7 @@ The commands return endpoint and retrieve session information in a table with th |state|The state of the endpoint; the valid states are:

READY: The endpoint is ready to be retrieved.

ATTACHED: The endpoint is attached to a retrieve connection.

RETRIEVING: A retrieve session is retrieving data from the endpoint at this moment.

FINISHED: The endpoint has been fully retrieved.

RELEASED: Due to an error, the endpoint has been released and the connection closed.| |gp\_segment\_id|The QE's endpoint `gp_segment_id`.| |sessionid|The identifier of the session in which the parallel retrieve cursor was created.| -|username|The name of the session user that initiated the retrieve session.| +|username|The name of the user that initiated the retrieve session.| |endpointname|The endpoint identifier.| |cursorname|The name of the parallel retrieve cursor.| From ac2e000c35b74907cb130aa7b95c2e25ddf765e6 Mon Sep 17 00:00:00 2001 From: Debosri Dhar Brahma <87465345+ddharbrahma@users.noreply.github.com> Date: Wed, 13 Sep 2023 19:25:50 +0530 Subject: [PATCH 005/106] Added documentation for gpcheckperf option (#16395) * Added documentation for gpcheckperf option * Changed buffer-size default size to 8KB --- gpdb-doc/markdown/utility_guide/ref/gpcheckperf.html.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gpdb-doc/markdown/utility_guide/ref/gpcheckperf.html.md b/gpdb-doc/markdown/utility_guide/ref/gpcheckperf.html.md index 8ed4a55ddc9c..9a682b49f1c2 100644 --- a/gpdb-doc/markdown/utility_guide/ref/gpcheckperf.html.md +++ b/gpdb-doc/markdown/utility_guide/ref/gpcheckperf.html.md @@ -7,7 +7,7 @@ Verifies the baseline hardware performance of the specified hosts. ``` gpcheckperf -d [-d ...]     {-f  | - h [-h hostname ...]} -    [-r ds] [-B ] [-S ] [-D] [-v|-V] +    [-r ds] [-B ] [-S ] [--buffer-size ] [-D] [-v|-V] gpcheckperf -d     {-f  | - h [-h< hostname> ...]} @@ -37,6 +37,9 @@ Before using `gpcheckperf`, you must have a trusted host setup between the hosts -B block\_size : Specifies the block size \(in KB or MB\) to use for disk I/O test. The default is 32KB, which is the same as the Greenplum Database page size. The maximum block size is 1 MB. +--buffer-size buffer_size +: Specifies the size of the send buffer in kilobytes. Default size is 32 kilobytes. + -d test\_directory : For the disk I/O test, specifies the file system directory locations to test. You must have write access to the test directory on all hosts involved in the performance test. You can use the `-d` option multiple times to specify multiple test directories \(for example, to test disk I/O of your primary and mirror data directories\). From 54a34372f673844f14ee4bcabb2415d5eba1fbc1 Mon Sep 17 00:00:00 2001 From: Marcus Robb Date: Tue, 19 Sep 2023 00:22:24 -0700 Subject: [PATCH 006/106] Backport - Add materialized views to list of tables to be analyzed (#16450) --- gpMgmt/bin/analyzedb | 10 +++++----- gpMgmt/test/behave/mgmt_utils/analyzedb.feature | 8 ++++++++ .../mgmt_utils/steps/analyzedb_mgmt_utils.py | 15 ++++++++++++++- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/gpMgmt/bin/analyzedb b/gpMgmt/bin/analyzedb index 3e81d7283ada..d0ffb22542a0 100755 --- a/gpMgmt/bin/analyzedb +++ b/gpMgmt/bin/analyzedb @@ -57,7 +57,7 @@ WHERE pp.paristemplate = false AND pp.parrelid = cl.oid AND pr1.paroid = pp.oid GET_ALL_DATA_TABLES_SQL = """ select n.nspname as schemaname, c.relname as tablename from pg_class c, pg_namespace n where -c.relnamespace = n.oid and c.relkind='r'::char and (c.relnamespace >= 16384 or n.nspname = 'public' or n.nspname = 'pg_catalog') and c.oid not in (select reloid from pg_exttable) +c.relnamespace = n.oid and (c.relkind='r'::char or c.relkind = 'm'::char) and (c.relnamespace >= 16384 or n.nspname = 'public' or n.nspname = 'pg_catalog') and c.oid not in (select reloid from pg_exttable) EXCEPT select distinct schemaname, tablename from (%s) AS pps1 EXCEPT @@ -66,7 +66,7 @@ select distinct partitionschemaname, parentpartitiontablename from (%s) AS pps2 GET_VALID_DATA_TABLES_SQL = """ select n.nspname as schemaname, c.relname as tablename from pg_class c, pg_namespace n where -c.relnamespace = n.oid and c.oid in (%s) and c.relkind='r'::char and (c.relnamespace >= 16384 or n.nspname = 'public' or n.nspname = 'pg_catalog') and c.oid not in (select reloid from pg_exttable) +c.relnamespace = n.oid and c.oid in (%s) and (c.relkind='r'::char or c.relkind = 'm'::char) and (c.relnamespace >= 16384 or n.nspname = 'public' or n.nspname = 'pg_catalog') and c.oid not in (select reloid from pg_exttable) """ GET_REQUESTED_AO_DATA_TABLE_INFO_SQL = """ @@ -90,7 +90,7 @@ GET_REQUESTED_LAST_OP_INFO_SQL = """ GET_ALL_DATA_TABLES_IN_SCHEMA_SQL = """ select n.nspname as schemaname, c.relname as tablename from pg_class c, pg_namespace n where -c.relnamespace = n.oid and c.relkind='r'::char and (c.relnamespace >= 16384 or n.nspname = 'public' or n.nspname = 'pg_catalog') and c.oid not in (select reloid from pg_exttable) +c.relnamespace = n.oid and (c.relkind='r'::char or c.relkind = 'm'::char) and (c.relnamespace >= 16384 or n.nspname = 'public' or n.nspname = 'pg_catalog') and c.oid not in (select reloid from pg_exttable) and n.nspname = '%s' EXCEPT select distinct schemaname, tablename from (%s) AS pps1 @@ -111,7 +111,7 @@ select distinct partitionschemaname, parentpartitiontablename from (%s) AS pps1 GET_REQUESTED_NON_AO_TABLES_SQL = """ select n.nspname as schemaname, c.relname as tablename from pg_class c, pg_namespace n where -c.relnamespace = n.oid and c.relkind='r'::char and (c.relnamespace >= 16384 or n.nspname = 'public' or n.nspname = 'pg_catalog') +c.relnamespace = n.oid and (c.relkind='r'::char or c.relkind = 'm'::char) and (c.relnamespace >= 16384 or n.nspname = 'public' or n.nspname = 'pg_catalog') and c.oid not in (select relid from pg_appendonly) and c.oid in (%s) and c.oid not in (select reloid from pg_exttable) EXCEPT select distinct schemaname, tablename from (%s) AS pps1 @@ -564,7 +564,7 @@ class AnalyzeDb(Operation): At the same time, parse the requested columns and populate the col_dict. If a requested table is partitioned, expand all the leaf partitions. """ - logger.info("Getting and verifying input tables...") + logger.info("Getting and verifying input tables and materialized views...") if self.single_table: # Check that the table name given on the command line is schema-qualified. diff --git a/gpMgmt/test/behave/mgmt_utils/analyzedb.feature b/gpMgmt/test/behave/mgmt_utils/analyzedb.feature index 4165310d6426..a673f88a0897 100644 --- a/gpMgmt/test/behave/mgmt_utils/analyzedb.feature +++ b/gpMgmt/test/behave/mgmt_utils/analyzedb.feature @@ -1777,3 +1777,11 @@ Feature: Incrementally analyze the database And the user runs "dropdb schema_with_temp_table" And the user drops the named connection "default" + Scenario: analyzedb finds materialized views + Given a materialized view "public.mv_test_view" exists on table "pg_class" + And the user runs "analyzedb -a -d incr_analyze" + Then analyzedb should print "-public.mv_test_view" to stdout + And the user runs "analyzedb -a -s public -d incr_analyze" + Then analyzedb should print "-public.mv_test_view" to stdout + And the user runs "analyzedb -a -t public.mv_test_view -d incr_analyze" + Then analyzedb should print "-public.mv_test_view" to stdout diff --git a/gpMgmt/test/behave/mgmt_utils/steps/analyzedb_mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/analyzedb_mgmt_utils.py index f418cdbd703f..2ce4aacce7d6 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/analyzedb_mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/analyzedb_mgmt_utils.py @@ -35,7 +35,6 @@ """ - @given('there is a regular "{storage_type}" table "{tablename}" with column name list "{col_name_list}" and column type list "{col_type_list}" in schema "{schemaname}"') def impl(context, storage_type, tablename, col_name_list, col_type_list, schemaname): schemaname_no_quote = schemaname @@ -93,6 +92,12 @@ def impl(context, view_name, table_name): create_view_on_table(context.conn, view_name, table_name) +@given('a materialized view "{view_name}" exists on table "{table_name}"') +def impl(context, view_name, table_name): + create_materialized_view_on_table_in_schema(context.conn, viewname=view_name, + tablename=table_name) + + @given('"{qualified_table}" appears in the latest state files') @then('"{qualified_table}" should appear in the latest state files') def impl(context, qualified_table): @@ -448,3 +453,11 @@ def create_view_on_table(conn, viewname, tablename): " AS SELECT * FROM " + tablename dbconn.execSQL(conn, query) conn.commit() + + +def create_materialized_view_on_table_in_schema(conn, tablename, viewname): + query = "DROP MATERIALIZED VIEW IF EXISTS " + viewname + ";" \ + "CREATE MATERIALIZED VIEW " + viewname + \ + " AS SELECT * FROM " + tablename + dbconn.execSQL(conn, query) + conn.commit() From 4b2ff712bc8d22201c21f6f5c05cfa7faeade4b3 Mon Sep 17 00:00:00 2001 From: Lisa Owen Date: Tue, 19 Sep 2023 09:52:47 -0600 Subject: [PATCH 007/106] docs - use resource groups to limit greenplum_fdw concurrency (#16382) * docs - use resource groups to limit greenplum_fdw concurrency * review edits requested * below -> above --- .../ref_guide/modules/greenplum_fdw.html.md | 51 ++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/gpdb-doc/markdown/ref_guide/modules/greenplum_fdw.html.md b/gpdb-doc/markdown/ref_guide/modules/greenplum_fdw.html.md index 125d010624aa..0da8f73d906e 100644 --- a/gpdb-doc/markdown/ref_guide/modules/greenplum_fdw.html.md +++ b/gpdb-doc/markdown/ref_guide/modules/greenplum_fdw.html.md @@ -74,7 +74,7 @@ num_segments option, the default value is the number of segments on the local Greenplum Database cluster. -The following example command creates a server named `gpc1_testdb` that will be used to access tables residing in the database named `testdb` on the remote `8`-segment Greenplum Database cluster whose master is running on the host `gpc1_master`, port `5432`: +The following example command creates a server named `gpc1_testdb` that will be used to access tables residing in the database named `testdb` on the remote 8-segment Greenplum Database cluster whose master is running on the host `gpc1_master`, port `5432`: ``` CREATE SERVER gpc1_testdb FOREIGN DATA WRAPPER greenplum_fdw @@ -164,6 +164,55 @@ Setting this option at the foreign table-level overrides a foreign server-level `greenplum_fdw` manages transactions as described in the [Transaction Management](https://www.postgresql.org/docs/9.4/postgres-fdw.html) topic in the PostgreSQL `postgres_fdw` documentation. +## About Using Resource Groups to Limit Concurrency + +You can create a dedicated user and resource group to manage `greenplum_fdw` concurrency on the remote Greenplum clusters. In the following example scenario, local cluster 2 reads data from remote cluster 1. + +Remote cluster (1) configuration: + +1. Create a dedicated Greenplum Database user/role to represent the `greenplum_fdw` users on cluster 2 that initiate queries. For example, to create a role named `gpcluster2_users`: + + ``` + CREATE ROLE gpcluster2_users; + ``` + +1. Create a dedicated resource group to manage resources for these users: + + ``` + CREATE RESOURCE GROUP rg_gpcluster2_users with (concurrency=2, cpu_max_percent=20); + ALTER ROLE gpcluster2_users RESOURCE GROUP rg_gpcluster2_users; + ``` + + When you configure the remote cluster as described above, the `rg_gpcluster2_users` resource group manages the resources used by all queries that are initiated by `gpcluster2_users`. + +Local cluster (2) configuration: + +1. Create a `greenplum_fdw` foreign server to access the remote cluster. For example, to create a server named `gpc1_testdb` that accesses the `testdb` database: + + ``` + CREATE SERVER gpc1_testdb FOREIGN DATA WRAPPER greenplum_fdw + OPTIONS (host 'gpc1_master', port '5432', dbname 'testdb', mpp_execute 'all segments', ); + ``` + +1. Map local users of the `greenplum_fdw` foreign server to the remote role. For example, to map specific users of the `gpc1_testdb` server on the local cluster to the `gpcluster2_users` role on the remote cluster: + + ``` + CREATE USER MAPPING FOR greenplum_fdw_user1 SERVER gpc1_testdb + OPTIONS (user ‘gpcluster2_users’, password ‘changeme’); + CREATE USER MAPPING FOR greenplum_fdw_user2 SERVER gpc1_testdb + OPTIONS (user ‘gpcluster2_users’, password ‘changeme’); + ``` + +1. Create a foreign table referencing a table on the remote cluster. For example to create a foreign table that references table `t1` on the remote cluster: + + ``` + CREATE FOREIGN TABLE table_on_cluster1 ( tc1 int ) + SERVER gpc1_testdb + OPTIONS (schema_name 'public', table_name 't1', mpp_execute 'all segments'); + ``` + +All local queries on foreign table `table_on_cluster1` are bounded on the remote cluster by the `rg_gpcluster2_users` resource group limits. + ## Known Issues and Limitations The `greenplum_fdw` module has the following known issues and limitations: From aa01025a04da23aebc76c15b445ab14ebecc2619 Mon Sep 17 00:00:00 2001 From: Lisa Owen Date: Tue, 19 Sep 2023 10:34:15 -0600 Subject: [PATCH 008/106] docs - add install instructs for madlib 2.0.0 for 6x (#16383) --- gpdb-doc/markdown/analytics/madlib.html.md | 68 ++++++++++++++----- .../platform-requirements-overview.md.hbs | 2 +- 2 files changed, 51 insertions(+), 19 deletions(-) diff --git a/gpdb-doc/markdown/analytics/madlib.html.md b/gpdb-doc/markdown/analytics/madlib.html.md index 2104d9d0ae3e..cdbbdd374df0 100644 --- a/gpdb-doc/markdown/analytics/madlib.html.md +++ b/gpdb-doc/markdown/analytics/madlib.html.md @@ -4,14 +4,6 @@ title: Machine Learning and Deep Learning using MADlib Apache MADlib is an open-source library for scalable in-database analytics. The Greenplum MADlib extension provides the ability to run machine learning and deep learning workloads in a Greenplum Database. -This chapter includes the following information: - -- [Installing MADlib](#topic3) -- [Upgrading MADlib](#topic_eqm_klx_hw) -- [Uninstalling MADlib](#topic6) -- [Examples](#topic9) -- [References](#topic10) - You can install it as an extension in a Greenplum Database system you can run data-parallel implementations of mathematical, statistical, graph, machine learning, and deep learning methods on structured and unstructured data. For Greenplum and MADlib version compatibility, refer to [MADlib FAQ](https://cwiki.apache.org/confluence/display/MADLIB/FAQ#FAQ-Q1-2WhatdatabaseplatformsdoesMADlibsupportandwhatistheupgradematrix?). MADlib’s suite of SQL-based algorithms run at scale within a single Greenplum Database engine without needing to transfer data between the database and other tools. @@ -53,9 +45,22 @@ For information about PivotalR, including supported MADlib functionality, see [h The R package for PivotalR can be found at [https://cran.r-project.org/web/packages/PivotalR/index.html](https://cran.r-project.org/web/packages/PivotalR/index.html). +## Prerequisites + +> **Important** Greenplum Database supports MADlib version 2.x for VMware Greenplum 6.x on RHEL8 platforms only. Upgrading from MADlib version 1.x to version 2.x is not supported. + +MADlib requires the `m4` macro processor version 1.4.13 or later. Ensure that you have access to, or superuser permissions to install, this package on each Greenplum Database host. + +MADlib 2.x requires Python 3. If you are installing version 2.x, you must also set up the Python 3 environment by registering the `python3u` extension in all databases that will use MADlib: + +``` +CREATE EXTENSION python3u; +``` + +You must register the extension before you install MADlib 2.x. + ## Installing MADlib -> **Note** MADlib requires the `m4` macro processor version 1.4.13 or later. To install MADlib on Greenplum Database, you first install a compatible Greenplum MADlib package and then install the MADlib function libraries on all databases that will use MADlib. @@ -65,23 +70,38 @@ If you have GPUs installed on some or across all hosts in the cluster, then the ### Installing the Greenplum Database MADlib Package -Before you install the MADlib package, make sure that your Greenplum database is running, you have sourced `greenplum_path.sh`, and that the`$MASTER_DATA_DIRECTORY` and `$GPHOME` variables are set. +Before you install the MADlib package, make sure that your Greenplum database is running, you have sourced `greenplum_path.sh`, and that the `$MASTER_DATA_DIRECTORY` and `$GPHOME` environment variables are set. -1. Download the MADlib extension package from [VMware Tanzu Network](https://network.pivotal.io/products/pivotal-gpdb). +1. Download the MADlib extension package from [VMware Tanzu Network](https://network.tanzu.vmware.com/products/vmware-greenplum/). 2. Copy the MADlib package to the Greenplum Database master host. 3. Follow the instructions in [Verifying the Greenplum Database Software Download](../install_guide/verify_sw.html) to verify the integrity of the **Greenplum Advanced Analytics MADlib** software. 4. Unpack the MADlib distribution package. For example: + + To unpack version 1.21: ``` $ tar xzvf madlib-1.21.0+1-gp6-rhel7-x86_64.tar.gz ``` + To unpack version 2.0.0: + + ``` + $ tar xzvf madlib-2.0.0-gp6-rhel8-x86_64.tar.gz + ``` + 5. Install the software package by running the `gppkg` command. For example: + To install version 1.21: + ``` $ gppkg -i ./madlib-1.21.0+1-gp6-rhel7-x86_64/madlib-1.21.0+1-gp6-rhel7-x86_64.gppkg ``` + To install version 2.0.0: + + ``` + $ gppkg -i ./madlib-2.0.0-gp6-rhel8-x86_64/madlib-2.0.0-gp6-rhel8-x86_64.gppkg + ``` ### Adding MADlib Functions to a Database @@ -107,15 +127,19 @@ $ madpack -s madlib -p greenplum -c gpadmin@mdw:5432/testdb install-check > **Note** The command `madpack -h` displays information for the utility. -## Upgrading MADlib +## Upgrading MADlib + +> **Important** Greenplum Database does not support directly upgrading from MADlib 1.x to version 2.x. You must back up your MADlib models, uninstall version 1.x, install version 2.x, and reload the models. -You upgrade an installed MADlib package with the Greenplum Database `gppkg` utility and the MADlib `madpack` command. +You upgrade an installed MADlib version 1.x package with the Greenplum Database `gppkg` utility and the MADlib `madpack` command. For information about the upgrade paths that MADlib supports, see the MADlib support and upgrade matrix in the [MADlib FAQ page](https://cwiki.apache.org/confluence/display/MADLIB/FAQ#FAQ-Q1-2WhatdatabaseplatformsdoesMADlibsupportandwhatistheupgradematrix?). -### Upgrading a MADlib Package +### Upgrading a MADlib 1.x Package -To upgrade MADlib, run the `gppkg` utility with the `-u` option. This command upgrades an installed MADlib package to MADlib 1.21.0+1. +> **Important** Greenplum Database does not support upgrading from MADlib version 1.x to version 2.x. + +To upgrade MADlib, run the `gppkg` utility with the `-u` option. This command upgrades an installed MADlib 1.x package to MADlib 1.21.0+1. ``` $ gppkg -u madlib-1.21.0+1-gp6-rhel7-x86_64.gppkg @@ -123,9 +147,9 @@ $ gppkg -u madlib-1.21.0+1-gp6-rhel7-x86_64.gppkg ### Upgrading MADlib Functions -After you upgrade the MADlib package from one major version to another, run `madpack upgrade` to upgrade the MADlib functions in a database schema. +After you upgrade the MADlib package from one minor version to another, run `madpack upgrade` to upgrade the MADlib functions in a database schema. -> **Note** Use `madpack upgrade` only if you upgraded a major MADlib package version, for example from 1.19.0 to 1.21.0. You do not need to update the functions within a patch version upgrade, for example from 1.16+1 to 1.16+3. +> **Note** Use `madpack upgrade` only if you upgraded a minor MADlib package version, for example from 1.19.0 to 1.21.0. You do not need to update the functions within a patch version upgrade, for example from 1.16+1 to 1.16+3. This example command upgrades the MADlib functions in the schema `madlib` of the Greenplum Database `test`. @@ -150,12 +174,20 @@ $ madpack -s madlib -p greenplum -c gpadmin@mdw:5432/testdb uninstall ### Uninstall the Greenplum Database MADlib Package -If no databases use the MADlib functions, use the Greenplum `gppkg` utility with the `-r` option to uninstall the MADlib package. When removing the package you must specify the package and version. This example uninstalls MADlib package version 1.21. +If no databases use the MADlib functions, use the Greenplum `gppkg` utility with the `-r` option to uninstall the MADlib package. When removing the package you must specify the package and version. For example: + +To uninstall MADlib package version 1.21.0: ``` $ gppkg -r madlib-1.21.0+1-gp6-rhel7-x86_64 ``` +To uninstall MADlib package version 2.0.0: + +``` +$ gppkg -r madlib-2.0.0-gp6-rhel8-x86_64 +``` + You can run the `gppkg` utility with the options `-q --all` to list the installed extensions and their versions. After you uninstall the package, restart the database. diff --git a/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs b/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs index 2d6b0bbfbc29..526b34da466d 100644 --- a/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs +++ b/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs @@ -191,7 +191,7 @@ This table lists the versions of the Greenplum Extensions that are compatible wi MADlib Machine Learning -1.21, 1.20, 1.19, 1.18, 1.17, 1.16 +2.0, 1.21, 1.20, 1.19, 1.18, 1.17, 1.16 Support matrix at MADlib FAQ. From 312bb0ad554ef95a878148bd0785a0cdf1d9139d Mon Sep 17 00:00:00 2001 From: Marbin Tan Date: Fri, 8 Sep 2023 14:49:18 -0700 Subject: [PATCH 009/106] Remove `getaddrinfo` in `SendDummyPacket()` to address malloc deadlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `SendDummyPacket` eventually calls `getaddrinfo` (which is a reentrant), however, `getaddrinfo` is not an async-signal-safe function. `getaddrinfo` internally calls `malloc`, which is strongly advised to not do within a signal handler as it may cause deadlocks. Cache the accepted socket information for the listener, so that it can be reused in `SendDummyPacket()`. The purpose of `SendDummyPacket` is to exit more quickly; it circumvents the polling that happens, which eventually times out after 250ms. Without `SendDummyPacket()`, there will be multiple test failures since some tests expects the backend connection to terminate almost immediately. To view all the async-signal-safe functions, please view the signal-safety(7) — Linux manual page. Reviewed-by: Soumyadeep Chakraborty Reviewed-by: Andrew Repp This commit is inspired by 91a9a57eb131e21c96cccbac16f0a5ab024e2215. This is not a direct cherry-pick as there were conflicts, so I did most of the changes manually. --- src/backend/cdb/motion/ic_udpifc.c | 108 ++++++++++------------------- 1 file changed, 38 insertions(+), 70 deletions(-) diff --git a/src/backend/cdb/motion/ic_udpifc.c b/src/backend/cdb/motion/ic_udpifc.c index f007bb8c94c7..003136e488dd 100644 --- a/src/backend/cdb/motion/ic_udpifc.c +++ b/src/backend/cdb/motion/ic_udpifc.c @@ -635,6 +635,9 @@ typedef struct ICStatistics /* Statistics for UDP interconnect. */ static ICStatistics ic_statistics; +static struct addrinfo udp_dummy_packet_addrinfo; +static struct sockaddr udp_dummy_packet_sockaddr; + /*========================================================================= * STATIC FUNCTIONS declarations */ @@ -659,7 +662,8 @@ static void SendDummyPacket(void); static void getSockAddr(struct sockaddr_storage *peer, socklen_t *peer_len, const char *listenerAddr, int listenerPort); static void setXmitSocketOptions(int txfd); static uint32 setSocketBufferSize(int fd, int type, int expectedSize, int leastSize); -static void setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFamily); +static void setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, + int *txFamily, struct addrinfo *listenerAddrinfo, struct sockaddr *listenerSockaddr); static ChunkTransportStateEntry *startOutgoingUDPConnections(ChunkTransportState *transportStates, Slice *sendSlice, int *pOutgoingCount); @@ -1155,7 +1159,7 @@ resetRxThreadError() * Setup udp listening socket. */ static void -setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFamily) +setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFamily, struct addrinfo *listenerAddrinfo, struct sockaddr *listenerSockaddr) { int errnoSave; int fd = -1; @@ -1300,6 +1304,16 @@ setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFami goto error; } + /* + * cache the successful addrinfo and sockaddr of the listening socket, so + * we can use this information to connect to the listening socket. + */ + if (listenerAddrinfo != NULL && listenerSockaddr != NULL ) + { + memcpy(listenerAddrinfo, rp, sizeof(udp_dummy_packet_addrinfo)); + memcpy(listenerSockaddr, rp->ai_addr, sizeof(udp_dummy_packet_sockaddr)); + } + freeaddrinfo(addrs); /* No longer needed */ /* @@ -1438,8 +1452,9 @@ InitMotionUDPIFC(int *listenerSocketFd, uint16 *listenerPort) /* * setup listening socket and sending socket for Interconnect. */ - setupUDPListeningSocket(listenerSocketFd, listenerPort, &txFamily); - setupUDPListeningSocket(&ICSenderSocket, &ICSenderPort, &ICSenderFamily); + setupUDPListeningSocket(listenerSocketFd, listenerPort, &txFamily, + &udp_dummy_packet_addrinfo, &udp_dummy_packet_sockaddr); + setupUDPListeningSocket(&ICSenderSocket, &ICSenderPort, &ICSenderFamily, NULL, NULL); /* Initialize receive control data. */ resetMainThreadWaiting(&rx_control_info.mainWaitingState); @@ -1540,6 +1555,9 @@ CleanupMotionUDPIFC(void) ICSenderPort = 0; ICSenderFamily = 0; + memset(&udp_dummy_packet_addrinfo, 0, sizeof(udp_dummy_packet_addrinfo)); + memset(&udp_dummy_packet_sockaddr, 0, sizeof(udp_dummy_packet_sockaddr)); + #ifdef USE_ASSERT_CHECKING /* @@ -6875,74 +6893,37 @@ WaitInterconnectQuitUDPIFC(void) static void SendDummyPacket(void) { - int sockfd = -1; int ret; struct addrinfo *addrs = NULL; - struct addrinfo *rp; - struct addrinfo hint; - uint16 udp_listener; - char port_str[32] = {0}; + uint16 udp_listener_port; char *dummy_pkt = "stop it"; int counter; + struct sockaddr_in *addr_in = NULL; + struct sockaddr_in dest_addr; + /* * Get address info from interconnect udp listener port */ - udp_listener = (Gp_listener_port >> 16) & 0x0ffff; - snprintf(port_str, sizeof(port_str), "%d", udp_listener); - - MemSet(&hint, 0, sizeof(hint)); - hint.ai_socktype = SOCK_DGRAM; - hint.ai_family = AF_UNSPEC; /* Allow for IPv4 or IPv6 */ - - /* Never do name resolution */ -#ifdef AI_NUMERICSERV - hint.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV; -#else - hint.ai_flags = AI_NUMERICHOST; -#endif + udp_listener_port = (Gp_listener_port >> 16) & 0x0ffff; - ret = pg_getaddrinfo_all(interconnect_address, port_str, &hint, &addrs); - if (ret || !addrs) - { - elog(LOG, "send dummy packet failed, pg_getaddrinfo_all(): %m"); - goto send_error; - } - - for (rp = addrs; rp != NULL; rp = rp->ai_next) - { - /* Create socket according to pg_getaddrinfo_all() */ - sockfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); - if (sockfd < 0) - continue; - - if (!pg_set_noblock(sockfd)) - { - if (sockfd >= 0) - { - closesocket(sockfd); - sockfd = -1; - } - continue; - } - break; - } - - if (rp == NULL) - { - elog(LOG, "send dummy packet failed, create socket failed: %m"); - goto send_error; - } + addr_in = (struct sockaddr_in *) &udp_dummy_packet_sockaddr; + memset(&dest_addr, 0, sizeof(dest_addr)); + dest_addr.sin_family = addr_in->sin_family; + dest_addr.sin_port = htons(udp_listener_port); + dest_addr.sin_addr.s_addr = addr_in->sin_addr.s_addr; /* - * Send a dummy package to the interconnect listener, try 10 times + * Send a dummy package to the interconnect listener, try 10 times. + * We don't want to close the socket at the end of this function, since + * the socket will eventually close during the motion layer cleanup. */ counter = 0; while (counter < 10) { counter++; - ret = sendto(sockfd, dummy_pkt, strlen(dummy_pkt), 0, rp->ai_addr, rp->ai_addrlen); + ret = sendto(ICSenderSocket, dummy_pkt, strlen(dummy_pkt), 0, (struct sockaddr *) &dest_addr, sizeof(dest_addr)); if (ret < 0) { if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) @@ -6950,7 +6931,7 @@ SendDummyPacket(void) else { elog(LOG, "send dummy packet failed, sendto failed: %m"); - goto send_error; + return; } } break; @@ -6958,21 +6939,8 @@ SendDummyPacket(void) if (counter >= 10) { - elog(LOG, "send dummy packet failed, sendto failed: %m"); - goto send_error; + elog(LOG, "send dummy packet failed, sendto failed with 10 times: %m"); } - - pg_freeaddrinfo_all(hint.ai_family, addrs); - closesocket(sockfd); - return; - -send_error: - - if (addrs) - pg_freeaddrinfo_all(hint.ai_family, addrs); - if (sockfd != -1) - closesocket(sockfd); - return; } uint32 From c3811491d84944eae455dcb8d9119858deae31c2 Mon Sep 17 00:00:00 2001 From: Chandan Kunal Date: Wed, 20 Sep 2023 15:24:23 +0530 Subject: [PATCH 010/106] Fix derived distribution spec for CPhysicalJoin (#16423) Problem: Wrong results generated for subquery in projection list for replicated tables. Analysis: To derive distribution for any join operator, CPhysicalJoin::PdsDerive() is invoked. For deriving distribution it checks DistributionSpec for outer and inner children. when we have DistributionSpec for outer child as replicate and inner child as universal then we return universal as derived distribution. Eventually "Gather Motion" is not created and as data is not there with coordinator so it gives no rows as output. Backport of https://github.com/greenplum-db/gpdb/commit/5c36a44ab03c43e82ef08006ad6021773c6176b6 --- .../minidump/JoinOnReplicatedUniversal.mdp | 294 ++++++++++++++++++ .../data/dxl/minidump/SubqueryOuterRefTVF.mdp | 4 +- .../libgpopt/src/operators/CPhysicalJoin.cpp | 10 +- src/backend/gporca/server/CMakeLists.txt | 3 +- src/test/regress/expected/rpt.out | 39 ++- src/test/regress/expected/rpt_optimizer.out | 43 +++ src/test/regress/sql/rpt.sql | 9 + 7 files changed, 394 insertions(+), 8 deletions(-) create mode 100644 src/backend/gporca/data/dxl/minidump/JoinOnReplicatedUniversal.mdp diff --git a/src/backend/gporca/data/dxl/minidump/JoinOnReplicatedUniversal.mdp b/src/backend/gporca/data/dxl/minidump/JoinOnReplicatedUniversal.mdp new file mode 100644 index 000000000000..cef2133ef16b --- /dev/null +++ b/src/backend/gporca/data/dxl/minidump/JoinOnReplicatedUniversal.mdp @@ -0,0 +1,294 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/backend/gporca/data/dxl/minidump/SubqueryOuterRefTVF.mdp b/src/backend/gporca/data/dxl/minidump/SubqueryOuterRefTVF.mdp index 3f1c092385db..edabce95f90c 100644 --- a/src/backend/gporca/data/dxl/minidump/SubqueryOuterRefTVF.mdp +++ b/src/backend/gporca/data/dxl/minidump/SubqueryOuterRefTVF.mdp @@ -274,7 +274,7 @@ - + @@ -290,7 +290,7 @@ - + diff --git a/src/backend/gporca/libgpopt/src/operators/CPhysicalJoin.cpp b/src/backend/gporca/libgpopt/src/operators/CPhysicalJoin.cpp index b92ce281a8f5..3267296e50aa 100644 --- a/src/backend/gporca/libgpopt/src/operators/CPhysicalJoin.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CPhysicalJoin.cpp @@ -458,11 +458,13 @@ CPhysicalJoin::PdsDerive(CMemoryPool *mp, CExpressionHandle &exprhdl) const CDistributionSpec *pds; - if (CDistributionSpec::EdtStrictReplicated == pdsOuter->Edt() || - CDistributionSpec::EdtTaintedReplicated == pdsOuter->Edt() || - CDistributionSpec::EdtUniversal == pdsOuter->Edt()) + if ((CDistributionSpec::EdtStrictReplicated == pdsOuter->Edt() || + CDistributionSpec::EdtTaintedReplicated == pdsOuter->Edt() || + CDistributionSpec::EdtUniversal == pdsOuter->Edt()) && + CDistributionSpec::EdtUniversal != pdsInner->Edt()) { - // if outer is replicated/universal, return inner distribution + // if outer is replicated/universal and inner is not universal + // then return inner distribution pds = pdsInner; } else diff --git a/src/backend/gporca/server/CMakeLists.txt b/src/backend/gporca/server/CMakeLists.txt index 737825722603..0fa4d6ac7b07 100644 --- a/src/backend/gporca/server/CMakeLists.txt +++ b/src/backend/gporca/server/CMakeLists.txt @@ -335,7 +335,8 @@ ReplicatedJoinRandomDistributedTable ReplicatedLOJHashDistributedTable ReplicatedLOJRandomDistributedTable ReplicatedLOJReplicated ReplicatedNLJReplicated ReplicatedTableAggregate ReplicatedTableCTE ReplicatedTableGroupBy ReplicatedJoinPartitionedTable -ReplicatedTableInClause ReplicatedTableSequenceInsert; +ReplicatedTableInClause ReplicatedTableSequenceInsert +JoinOnReplicatedUniversal; CTaintedReplicatedTest: InsertNonSingleton NonSingleton TaintedReplicatedAgg TaintedReplicatedWindowAgg TaintedReplicatedLimit TaintedReplicatedFilter diff --git a/src/test/regress/expected/rpt.out b/src/test/regress/expected/rpt.out index 3f69ca175a3c..c71bcd1d0b13 100644 --- a/src/test/regress/expected/rpt.out +++ b/src/test/regress/expected/rpt.out @@ -1341,7 +1341,7 @@ explain (costs off) select * from rep_tab; set optimizer_enable_replicated_table=off; set optimizer_trace_fallback=on; explain (costs off) select * from rep_tab; - QUERY PLAN + QUERY PLAN ------------------------------------------ Gather Motion 1:1 (slice1; segments: 1) -> Seq Scan on rep_tab @@ -1350,6 +1350,43 @@ explain (costs off) select * from rep_tab; reset optimizer_trace_fallback; reset optimizer_enable_replicated_table; +-- Ensure plan with Gather Motion node is generated. +drop table if exists t; +NOTICE: table "t" does not exist, skipping +create table t (i int, j int) distributed replicated; +insert into t values (1, 2); +explain (costs off) select j, (select j) AS "Correlated Field" from t; + QUERY PLAN +------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t + SubPlan 1 (slice1; segments: 1) + -> Result + Optimizer: Postgres query optimizer +(5 rows) + +select j, (select j) AS "Correlated Field" from t; + j | Correlated Field +---+------------------ + 2 | 2 +(1 row) + +explain (costs off) select j, (select 5) AS "Uncorrelated Field" from t; + QUERY PLAN +------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t + InitPlan 1 (returns $0) (slice2) + -> Result + Optimizer: Postgres query optimizer +(5 rows) + +select j, (select 5) AS "Uncorrelated Field" from t; + j | Uncorrelated Field +---+-------------------- + 2 | 5 +(1 row) + -- start_ignore drop schema rpt cascade; NOTICE: drop cascades to 13 other objects diff --git a/src/test/regress/expected/rpt_optimizer.out b/src/test/regress/expected/rpt_optimizer.out index b9b770382279..4f098d02720f 100644 --- a/src/test/regress/expected/rpt_optimizer.out +++ b/src/test/regress/expected/rpt_optimizer.out @@ -1348,6 +1348,49 @@ WARNING: relcache reference leak: relation "rep_tab" not closed reset optimizer_trace_fallback; reset optimizer_enable_replicated_table; +-- Ensure plan with Gather Motion node is generated. +drop table if exists t; +NOTICE: table "t" does not exist, skipping +create table t (i int, j int) distributed replicated; +insert into t values (1, 2); +explain (costs off) select j, (select j) AS "Correlated Field" from t; + QUERY PLAN +------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Result + -> Seq Scan on t + SubPlan 1 (slice1; segments: 1) + -> Result + -> Result + Optimizer: Pivotal Optimizer (GPORCA) +(7 rows) + +select j, (select j) AS "Correlated Field" from t; + j | Correlated Field +---+------------------ + 2 | 2 +(1 row) + +explain (costs off) select j, (select 5) AS "Uncorrelated Field" from t; + QUERY PLAN +------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Result + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on t + -> Materialize + -> Result + -> Result + Optimizer: Pivotal Optimizer (GPORCA) +(9 rows) + +select j, (select 5) AS "Uncorrelated Field" from t; + j | Uncorrelated Field +---+-------------------- + 2 | 5 +(1 row) + -- start_ignore drop schema rpt cascade; NOTICE: drop cascades to 13 other objects diff --git a/src/test/regress/sql/rpt.sql b/src/test/regress/sql/rpt.sql index e7793c3f436d..561fb2794373 100644 --- a/src/test/regress/sql/rpt.sql +++ b/src/test/regress/sql/rpt.sql @@ -549,6 +549,15 @@ explain (costs off) select * from rep_tab; reset optimizer_trace_fallback; reset optimizer_enable_replicated_table; +-- Ensure plan with Gather Motion node is generated. +drop table if exists t; +create table t (i int, j int) distributed replicated; +insert into t values (1, 2); +explain (costs off) select j, (select j) AS "Correlated Field" from t; +select j, (select j) AS "Correlated Field" from t; +explain (costs off) select j, (select 5) AS "Uncorrelated Field" from t; +select j, (select 5) AS "Uncorrelated Field" from t; + -- start_ignore drop schema rpt cascade; -- end_ignore From 2addf9d2d842b8ef3c41c1b79243c20e474821d2 Mon Sep 17 00:00:00 2001 From: Marbin Tan Date: Wed, 20 Sep 2023 15:29:42 -0700 Subject: [PATCH 011/106] Revert "Remove `getaddrinfo` in `SendDummyPacket()` to address malloc deadlock" This reverts commit 312bb0ad554ef95a878148bd0785a0cdf1d9139d. --- src/backend/cdb/motion/ic_udpifc.c | 108 +++++++++++++++++++---------- 1 file changed, 70 insertions(+), 38 deletions(-) diff --git a/src/backend/cdb/motion/ic_udpifc.c b/src/backend/cdb/motion/ic_udpifc.c index 003136e488dd..f007bb8c94c7 100644 --- a/src/backend/cdb/motion/ic_udpifc.c +++ b/src/backend/cdb/motion/ic_udpifc.c @@ -635,9 +635,6 @@ typedef struct ICStatistics /* Statistics for UDP interconnect. */ static ICStatistics ic_statistics; -static struct addrinfo udp_dummy_packet_addrinfo; -static struct sockaddr udp_dummy_packet_sockaddr; - /*========================================================================= * STATIC FUNCTIONS declarations */ @@ -662,8 +659,7 @@ static void SendDummyPacket(void); static void getSockAddr(struct sockaddr_storage *peer, socklen_t *peer_len, const char *listenerAddr, int listenerPort); static void setXmitSocketOptions(int txfd); static uint32 setSocketBufferSize(int fd, int type, int expectedSize, int leastSize); -static void setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, - int *txFamily, struct addrinfo *listenerAddrinfo, struct sockaddr *listenerSockaddr); +static void setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFamily); static ChunkTransportStateEntry *startOutgoingUDPConnections(ChunkTransportState *transportStates, Slice *sendSlice, int *pOutgoingCount); @@ -1159,7 +1155,7 @@ resetRxThreadError() * Setup udp listening socket. */ static void -setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFamily, struct addrinfo *listenerAddrinfo, struct sockaddr *listenerSockaddr) +setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFamily) { int errnoSave; int fd = -1; @@ -1304,16 +1300,6 @@ setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFami goto error; } - /* - * cache the successful addrinfo and sockaddr of the listening socket, so - * we can use this information to connect to the listening socket. - */ - if (listenerAddrinfo != NULL && listenerSockaddr != NULL ) - { - memcpy(listenerAddrinfo, rp, sizeof(udp_dummy_packet_addrinfo)); - memcpy(listenerSockaddr, rp->ai_addr, sizeof(udp_dummy_packet_sockaddr)); - } - freeaddrinfo(addrs); /* No longer needed */ /* @@ -1452,9 +1438,8 @@ InitMotionUDPIFC(int *listenerSocketFd, uint16 *listenerPort) /* * setup listening socket and sending socket for Interconnect. */ - setupUDPListeningSocket(listenerSocketFd, listenerPort, &txFamily, - &udp_dummy_packet_addrinfo, &udp_dummy_packet_sockaddr); - setupUDPListeningSocket(&ICSenderSocket, &ICSenderPort, &ICSenderFamily, NULL, NULL); + setupUDPListeningSocket(listenerSocketFd, listenerPort, &txFamily); + setupUDPListeningSocket(&ICSenderSocket, &ICSenderPort, &ICSenderFamily); /* Initialize receive control data. */ resetMainThreadWaiting(&rx_control_info.mainWaitingState); @@ -1555,9 +1540,6 @@ CleanupMotionUDPIFC(void) ICSenderPort = 0; ICSenderFamily = 0; - memset(&udp_dummy_packet_addrinfo, 0, sizeof(udp_dummy_packet_addrinfo)); - memset(&udp_dummy_packet_sockaddr, 0, sizeof(udp_dummy_packet_sockaddr)); - #ifdef USE_ASSERT_CHECKING /* @@ -6893,37 +6875,74 @@ WaitInterconnectQuitUDPIFC(void) static void SendDummyPacket(void) { + int sockfd = -1; int ret; struct addrinfo *addrs = NULL; - uint16 udp_listener_port; + struct addrinfo *rp; + struct addrinfo hint; + uint16 udp_listener; + char port_str[32] = {0}; char *dummy_pkt = "stop it"; int counter; - struct sockaddr_in *addr_in = NULL; - struct sockaddr_in dest_addr; - /* * Get address info from interconnect udp listener port */ - udp_listener_port = (Gp_listener_port >> 16) & 0x0ffff; + udp_listener = (Gp_listener_port >> 16) & 0x0ffff; + snprintf(port_str, sizeof(port_str), "%d", udp_listener); + + MemSet(&hint, 0, sizeof(hint)); + hint.ai_socktype = SOCK_DGRAM; + hint.ai_family = AF_UNSPEC; /* Allow for IPv4 or IPv6 */ + + /* Never do name resolution */ +#ifdef AI_NUMERICSERV + hint.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV; +#else + hint.ai_flags = AI_NUMERICHOST; +#endif - addr_in = (struct sockaddr_in *) &udp_dummy_packet_sockaddr; - memset(&dest_addr, 0, sizeof(dest_addr)); - dest_addr.sin_family = addr_in->sin_family; - dest_addr.sin_port = htons(udp_listener_port); - dest_addr.sin_addr.s_addr = addr_in->sin_addr.s_addr; + ret = pg_getaddrinfo_all(interconnect_address, port_str, &hint, &addrs); + if (ret || !addrs) + { + elog(LOG, "send dummy packet failed, pg_getaddrinfo_all(): %m"); + goto send_error; + } + + for (rp = addrs; rp != NULL; rp = rp->ai_next) + { + /* Create socket according to pg_getaddrinfo_all() */ + sockfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); + if (sockfd < 0) + continue; + + if (!pg_set_noblock(sockfd)) + { + if (sockfd >= 0) + { + closesocket(sockfd); + sockfd = -1; + } + continue; + } + break; + } + + if (rp == NULL) + { + elog(LOG, "send dummy packet failed, create socket failed: %m"); + goto send_error; + } /* - * Send a dummy package to the interconnect listener, try 10 times. - * We don't want to close the socket at the end of this function, since - * the socket will eventually close during the motion layer cleanup. + * Send a dummy package to the interconnect listener, try 10 times */ counter = 0; while (counter < 10) { counter++; - ret = sendto(ICSenderSocket, dummy_pkt, strlen(dummy_pkt), 0, (struct sockaddr *) &dest_addr, sizeof(dest_addr)); + ret = sendto(sockfd, dummy_pkt, strlen(dummy_pkt), 0, rp->ai_addr, rp->ai_addrlen); if (ret < 0) { if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) @@ -6931,7 +6950,7 @@ SendDummyPacket(void) else { elog(LOG, "send dummy packet failed, sendto failed: %m"); - return; + goto send_error; } } break; @@ -6939,8 +6958,21 @@ SendDummyPacket(void) if (counter >= 10) { - elog(LOG, "send dummy packet failed, sendto failed with 10 times: %m"); + elog(LOG, "send dummy packet failed, sendto failed: %m"); + goto send_error; } + + pg_freeaddrinfo_all(hint.ai_family, addrs); + closesocket(sockfd); + return; + +send_error: + + if (addrs) + pg_freeaddrinfo_all(hint.ai_family, addrs); + if (sockfd != -1) + closesocket(sockfd); + return; } uint32 From 58fbd68b8981b6c600d9408daefa68b70e1571bf Mon Sep 17 00:00:00 2001 From: mperezfuster Date: Mon, 25 Sep 2023 19:22:16 +0100 Subject: [PATCH 012/106] Docs: add configuration parameter work_mem (#16467) * Docs: add configuration parameter work_mem * Small copyedit --------- Co-authored-by: David Yozie --- .../ref_guide/config_params/guc-list.html.md | 12 ++++++++++++ .../config_params/guc_category-list.html.md | 1 + 2 files changed, 13 insertions(+) diff --git a/gpdb-doc/markdown/ref_guide/config_params/guc-list.html.md b/gpdb-doc/markdown/ref_guide/config_params/guc-list.html.md index e06dd079a808..7fd95f055555 100644 --- a/gpdb-doc/markdown/ref_guide/config_params/guc-list.html.md +++ b/gpdb-doc/markdown/ref_guide/config_params/guc-list.html.md @@ -3251,6 +3251,18 @@ The value of [wal\_sender\_timeout](#replication_timeout) controls the time that |-----------|-------|-------------------| |integer 0- INT\_MAX/1000|10 sec|master, system, reload, superuser| +## work_mem + +Sets the maximum amount of memory to be used by a query operation (such as a sort or hash table) before writing to temporary disk files. If this value is specified without units, it is taken as kilobytes. The default value is 32 MB. Note that for a complex query, several sort or hash operations might be running in parallel; each operation will be allowed to use as much memory as this value specifies before it starts to write data into temporary files. In addition, several running sessions may be performing such operations concurrently. Therefore, the total memory used could be many times the value of `work_mem`; keep this fact in mind when choosing the value for this parameter. Sort operations are used for `ORDER BY`, `DISTINCT`, and merge joins. Hash tables are used in hash joins, hash-based aggregation, and hash-based processing of `IN` subqueries. Apart from sorting and hashing, bitmap index scans also rely on `work_mem`. Operations relying on tuplestores such as function scans, CTEs, PL/pgSQL and administration UDFs also rely on `work_mem`. + +Apart from assigning memory to specific execution operators, setting `work_mem` also influences certain query plans over others, when the Postgres-based planner is used as the optimizer. + +`work_mem` is a distinct memory management concept that does not interact with resource queue or resource group memory controls, which are imposed at the query level. + +|Value Range|Default|Set Classifications| +|-----------|-------|-------------------| +|number of kilobytes|32MB|coordinator, session, reload| + ## writable\_external\_table\_bufsize Size of the buffer that Greenplum Database uses for network communication, such as the `gpfdist` utility and external web tables \(that use http\). Valid units are `KB` \(as in `128KB`\), `MB`, `GB`, and `TB`. Greenplum Database stores data in the buffer before writing the data out. For information about `gpfdist`, see the *Greenplum Database Utility Guide*. diff --git a/gpdb-doc/markdown/ref_guide/config_params/guc_category-list.html.md b/gpdb-doc/markdown/ref_guide/config_params/guc_category-list.html.md index 7386f014ba33..42e15321a9d5 100644 --- a/gpdb-doc/markdown/ref_guide/config_params/guc_category-list.html.md +++ b/gpdb-doc/markdown/ref_guide/config_params/guc_category-list.html.md @@ -76,6 +76,7 @@ These parameters control system memory usage. - [max_stack_depth](guc-list.html#max_stack_depth) - [shared_buffers](guc-list.html#shared_buffers) - [temp_buffers](guc-list.html#temp_buffers) +- [work_mem](guc-list.html#work_mem) ### OS Resource Parameters From 57afa86db61d9d98bb2475c147907cfad6c11314 Mon Sep 17 00:00:00 2001 From: Praveen Kumar Date: Tue, 26 Sep 2023 11:43:48 +0530 Subject: [PATCH 013/106] [6x]: Fix gpcheckperf discrepancy in network test results (v6.23 vs v6.25) (#16389) Fixes - https://github.com/greenplum-db/gpdb/issues/16356 * Fix gpcheckperf discrepancy in network test results (v6.23 vs v6.25) Problem - gpcheckperf provides different results when executing a sequential network test with a specified hostfile between versions 6.23 and 6.25. RCA - The problem arises from the getHostList function, which assigns GV.opt['-h'] with a list of hosts because of which the code interprets that the -h option is set with the host list.As GV.opt['-h'] is already assigned during network testing, the code incorrectly combines hosts from both the GV.opt['-f'] option and GV.opt['-h'], when it should exclusively retrieve hosts from either GV.opt['-f'] or GV.opt['-h']. It leads to redundant host list data, resulting in the observed issue Solution - Omit the assignment of the global variable GV.opt['-h'] within the "getHostList" function. Instead, utilize a function call to retrieve the host list when it is required in other parts of the code. Test - Added behave test case for the fix. --- gpMgmt/bin/gpcheckperf | 19 ++++---- .../test/unit/test_unit_gpcheckperf.py | 46 +++++++++++++++++-- .../behave/mgmt_utils/gpcheckperf.feature | 13 ++++++ .../behave/mgmt_utils/steps/mgmt_utils.py | 13 ++++++ 4 files changed, 78 insertions(+), 13 deletions(-) diff --git a/gpMgmt/bin/gpcheckperf b/gpMgmt/bin/gpcheckperf index 520b94272481..82e89703a0c0 100755 --- a/gpMgmt/bin/gpcheckperf +++ b/gpMgmt/bin/gpcheckperf @@ -149,7 +149,8 @@ def gpsync(src, dst): -P : print information showing the progress of the transfer """ proc = [] - for peer in GV.opt['-h']: + host_list = getHostList() + for peer in host_list: cmd = 'rsync -P -a -c -e "ssh -o BatchMode=yes -o StrictHostKeyChecking=no" {0} {1}:{2}' \ .format(src, unix.canonicalize(peer), dst) if GV.opt['-v']: @@ -665,10 +666,11 @@ def setupNetPerfTest(): print '-------------------' hostlist = ssh_utils.HostList() - for h in GV.opt['-h']: - hostlist.add(h) if GV.opt['-f']: hostlist.parseFile(GV.opt['-f']) + else: + for h in GV.opt['-h']: + hostlist.add(h) h = hostlist.get() if len(h) == 0: @@ -999,20 +1001,21 @@ def getHostList(): :return: returns a list of hosts """ hostlist = ssh_utils.HostList() - for h in GV.opt['-h']: - hostlist.add(h) if GV.opt['-f']: hostlist.parseFile(GV.opt['-f']) + else: + for h in GV.opt['-h']: + hostlist.add(h) try: hostlist.checkSSH() except ssh_utils.SSHError, e: sys.exit('[Error] {0}' .format(str(e))) - GV.opt['-h'] = hostlist.filterMultiHomedHosts() - if len(GV.opt['-h']) == 0: + host_list = hostlist.filterMultiHomedHosts() + if len(host_list) == 0: usage('Error: missing hosts in -h and/or -f arguments') - return GV.opt['-h'] + return host_list def main(): diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckperf.py b/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckperf.py index 644790e41634..c7efb3e99081 100644 --- a/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckperf.py +++ b/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckperf.py @@ -1,13 +1,17 @@ import imp import os import sys -from mock import patch +from mock import patch, MagicMock from gppylib.test.unit.gp_unittest import GpTestCase,run_tests +from gppylib.util import ssh_utils class GpCheckPerf(GpTestCase): def setUp(self): - gpcheckcat_file = os.path.abspath(os.path.dirname(__file__) + "/../../../gpcheckperf") - self.subject = imp.load_source('gpcheckperf', gpcheckcat_file) + gpcheckperf_file = os.path.abspath(os.path.dirname(__file__) + "/../../../gpcheckperf") + self.subject = imp.load_source('gpcheckperf', gpcheckperf_file) + self.mocked_hostlist = MagicMock() + ssh_utils.HostList = MagicMock(return_value=self.mocked_hostlist) + def tearDown(self): super(GpCheckPerf, self).tearDown() @@ -83,13 +87,45 @@ def test_scp_enabled(self, mock_hostlist, mock_gpscp, mock_isScpEnabled): self.subject.main() mock_gpscp.assert_called_with(src, target) - def test_gpsync_failed_to_copy(self): + @patch('gpcheckperf.getHostList', return_value=['localhost', "invalid_host"]) + def test_gpsync_failed_to_copy(self, mock_hostlist): src = '%s/lib/multidd' % os.path.abspath(os.path.dirname(__file__) + "/../../../") target = '=:tmp/' - self.subject.GV.opt['-h'] = ['localhost', "invalid_host"] with self.assertRaises(SystemExit) as e: self.subject.gpsync(src, target) self.assertIn('[Error] command failed for host:invalid_host', e.exception.code) + + def test_get_host_list_with_host_file(self): + self.subject.GV.opt = {'-f': 'hostfile.txt', '-h': ['host1', 'host2']} + self.mocked_hostlist.filterMultiHomedHosts.return_value = ['host3', 'host4'] + + result = self.subject.getHostList() + + self.assertEqual(result, ['host3', 'host4']) + self.mocked_hostlist.parseFile.assert_called_with('hostfile.txt') + self.mocked_hostlist.checkSSH.assert_called() + + + def test_get_host_list_without_host_file(self): + self.subject.GV.opt = {'-f': '', '-h': ['host1', 'host2']} + self.mocked_hostlist.filterMultiHomedHosts.return_value = ['host1', 'host2'] + + result = self.subject.getHostList() + + self.assertEqual(result, ['host1', 'host2']) + self.mocked_hostlist.add.assert_any_call('host1') + self.mocked_hostlist.add.assert_any_call('host2') + self.mocked_hostlist.checkSSH.assert_called() + + + def test_get_host_list_with_ssh_error(self): + self.mocked_hostlist.checkSSH.side_effect = ssh_utils.SSHError("Test ssh error") + + with self.assertRaises(SystemExit) as e: + self.subject.getHostList() + + self.assertEqual(e.exception.code, '[Error] Test ssh error') + if __name__ == '__main__': run_tests() diff --git a/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature b/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature index 7f1764c25502..a39977350b21 100644 --- a/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature +++ b/gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature @@ -126,3 +126,16 @@ Feature: Tests for gpcheckperf Then gpcheckperf should return a return code of 0 And gpcheckperf should print "--buffer-size value is not specified or invalid. Using default \(32 kilobytes\)" to stdout And gpcheckperf should print "avg = " to stdout + + + @concourse_cluster + Scenario: gpcheckperf runs sequential network test with hostfile + Given the database is running + Given the user runs command "echo -e "cdw\nsdw1" > /tmp/hostfile_gpchecknet" + When the user runs "gpcheckperf -f /tmp/hostfile_gpchecknet -d /data/gpdata/ -r n" + Then gpcheckperf should return a return code of 0 + And gpcheckperf should print the following lines 1 times to stdout + """ + cdw -> sdw1 + sdw1 -> cdw + """ diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index d4e0ddb20b74..662280896577 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -4280,3 +4280,16 @@ def impl(context, table, dbname, count): if int(count) != sum(current_row_count): raise Exception( "%s table in %s has %d rows, expected %d rows." % (table, dbname, sum(current_row_count), int(count))) + +@then('{command} should print the following lines {num} times to stdout') +def impl(context, command, num): + """ + Verify that each pattern occurs a specific number of times in the output. + """ + expected_lines = context.text.strip().split('\n') + for expected_pattern in expected_lines: + match_count = len(re.findall(re.escape(expected_pattern), context.stdout_message)) + if match_count != int(num): + raise Exception( + "Expected %s to occur %s times but Found %d times" .format(expected_pattern, num, match_count)) + From 9c7983918bdce34287fb495f0ce746f476236f73 Mon Sep 17 00:00:00 2001 From: Rakesh Sharma Date: Tue, 26 Sep 2023 14:40:02 +0530 Subject: [PATCH 014/106] Fix utilities do not honor -d flag when MASTER_DATA_DIRECTORY is not set. (#16433) (#16478) Issue: Following are the gpMgmt utilities that do not honor the -d flag, when MASTER_DATA_DIRECTORY is not set. 1. gpstart 2. gpstop 3. gpstate 4. gprecoverseg 5. gpaddmirror RCA: to get the master data directory gp.getmaster_datadir() function is called from the above-listed utilities. the function does not have any provision to return the master data directory which is provided with the -d flag. currently, it looks for MASTER_DATA_DIRECTORY and MASTER_DATA_DIRECTORY env variable. also in some of the utilities we were creating lock files before parsing the provided options which looks like the design flow that was causing the utilities to crash when looking for master data directory. Fix: Added a global flag which holds the data directory provided with -d option. so when we run the utility and do parsing it sets the flag with the provided datadirectory and the same will be returned when we call gp.gp.getmaster_datadir(). Test: Added behave test cases to use the provided data directory if MASTER_DATA_DIRECTORY is not set. Added behave test case to check if the provided master data directory is preferred over the already set master_data_dir env variable. it is tested by setting a wrong MASTER_DATA_DIRECTORY env variable and when we run the utility with the correct data directory using the -d option then the utility should execute successfully. --- gpMgmt/bin/gppylib/commands/gp.py | 17 ++++++- gpMgmt/bin/gppylib/mainUtils.py | 37 ++++++++------ .../behave/mgmt_utils/gprecoverseg.feature | 31 +++++++++++- gpMgmt/test/behave/mgmt_utils/gpstart.feature | 26 ++++++++++ gpMgmt/test/behave/mgmt_utils/gpstate.feature | 49 +++++++++++++++++++ gpMgmt/test/behave/mgmt_utils/gpstop.feature | 20 ++++++++ .../behave/mgmt_utils/steps/mgmt_utils.py | 11 ++++- 7 files changed, 171 insertions(+), 20 deletions(-) diff --git a/gpMgmt/bin/gppylib/commands/gp.py b/gpMgmt/bin/gppylib/commands/gp.py index 97f288e5c528..1b26ac04905d 100644 --- a/gpMgmt/bin/gppylib/commands/gp.py +++ b/gpMgmt/bin/gppylib/commands/gp.py @@ -1177,12 +1177,27 @@ def get_gphome(): raise GpError('Environment Variable GPHOME not set') return gphome +''' +gprecoverseg, gpstart, gpstate, gpstop, gpaddmirror have -d option to give the master data directory. +but its value was not used throughout the utilities. to fix this the best possible way is +to set and retrieve that set master dir when we call get_masterdatadir(). +''' +option_master_datadir = None +def set_masterdatadir(master_datadir=None): + global option_master_datadir + option_master_datadir = master_datadir ###### +# if -d is provided with utility, it will be prioritiese over other options. def get_masterdatadir(): - master_datadir = os.environ.get('MASTER_DATA_DIRECTORY') + if option_master_datadir is not None: + master_datadir = option_master_datadir + else: + master_datadir = os.environ.get('MASTER_DATA_DIRECTORY') + if not master_datadir: raise GpError("Environment Variable MASTER_DATA_DIRECTORY not set!") + return master_datadir ###### diff --git a/gpMgmt/bin/gppylib/mainUtils.py b/gpMgmt/bin/gppylib/mainUtils.py index 17ab20afde93..c43b31d41602 100644 --- a/gpMgmt/bin/gppylib/mainUtils.py +++ b/gpMgmt/bin/gppylib/mainUtils.py @@ -174,7 +174,7 @@ def acquire(self): # If the process is already killed, remove the lock directory. if not unix.check_pid(self.pidfilepid): shutil.rmtree(self.ppath) - + # try and acquire the lock try: self.pidlockfile.acquire() @@ -264,6 +264,18 @@ def simple_main(createOptionParserFn, createCommandFn, mainOptions=None): def simple_main_internal(createOptionParserFn, createCommandFn, mainOptions): + + """ + if -d option is provided in that case doing parsing after creating + lock file would not be a good idea therefore handling -d option before lock. + """ + parser = createOptionParserFn() + (parserOptions, parserArgs) = parser.parse_args() + + if parserOptions.ensure_value("masterDataDirectory", None) is not None: + parserOptions.master_data_directory = os.path.abspath(parserOptions.masterDataDirectory) + gp.set_masterdatadir(parserOptions.master_data_directory) + """ If caller specifies 'pidlockpath' in mainOptions then we manage the specified pid file within the MASTER_DATA_DIRECTORY before proceeding @@ -282,13 +294,13 @@ def simple_main_internal(createOptionParserFn, createCommandFn, mainOptions): # at this point we have whatever lock we require try: - simple_main_locked(createOptionParserFn, createCommandFn, mainOptions) + simple_main_locked(parserOptions, parserArgs, createCommandFn, mainOptions) finally: if sml is not None: sml.release() -def simple_main_locked(createOptionParserFn, createCommandFn, mainOptions): +def simple_main_locked(parserOptions, parserArgs, createCommandFn, mainOptions): """ Not to be called externally -- use simple_main instead """ @@ -304,7 +316,6 @@ def simple_main_locked(createOptionParserFn, createCommandFn, mainOptions): parser = None forceQuiet = mainOptions is not None and mainOptions.get("forceQuietOutput") - options = None if mainOptions is not None and mainOptions.get("programNameOverride"): global gProgramName @@ -320,30 +331,24 @@ def simple_main_locked(createOptionParserFn, createCommandFn, mainOptions): hostname = unix.getLocalHostname() username = unix.getUserName() - parser = createOptionParserFn() - (options, args) = parser.parse_args() - if useHelperToolLogging: gplog.setup_helper_tool_logging(execname, hostname, username) else: gplog.setup_tool_logging(execname, hostname, username, - logdir=options.ensure_value("logfileDirectory", None), nonuser=nonuser) + logdir=parserOptions.ensure_value("logfileDirectory", None), nonuser=nonuser) if forceQuiet: gplog.quiet_stdout_logging() else: - if options.ensure_value("verbose", False): + if parserOptions.ensure_value("verbose", False): gplog.enable_verbose_logging() - if options.ensure_value("quiet", False): + if parserOptions.ensure_value("quiet", False): gplog.quiet_stdout_logging() - if options.ensure_value("masterDataDirectory", None) is not None: - options.master_data_directory = os.path.abspath(options.masterDataDirectory) - if not suppressStartupLogMessage: logger.info("Starting %s with args: %s" % (gProgramName, ' '.join(sys.argv[1:]))) - commandObject = createCommandFn(options, args) + commandObject = createCommandFn(parserOptions, parserArgs) exitCode = commandObject.run() exit_status = exitCode @@ -365,10 +370,10 @@ def simple_main_locked(createOptionParserFn, createCommandFn, mainOptions): e.cmd.results.stderr)) exit_status = 2 except Exception, e: - if options is None: + if parserOptions is None: logger.exception("%s failed. exiting...", gProgramName) else: - if options.ensure_value("verbose", False): + if parserOptions.ensure_value("verbose", False): logger.exception("%s failed. exiting...", gProgramName) else: logger.fatal("%s failed. (Reason='%s') exiting..." % (gProgramName, e)) diff --git a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature index 25e1edb9ae09..fca67a2ac6b4 100644 --- a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature +++ b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature @@ -1,7 +1,7 @@ @gprecoverseg Feature: gprecoverseg tests - Scenario Outline: recovery works with tablespaces + Scenario Outline: recovery works with tablespaces Given the database is running And a tablespace is created with data And user stops all primary processes @@ -296,7 +296,33 @@ Feature: gprecoverseg tests And all the segments are running And the segments are synchronized - Scenario: gprecoverseg differential recovery displays rsync progress to the user + Scenario: gprecoverseg runs with given master data directory option + Given the database is running + And all the segments are running + And the segments are synchronized + And user stops all mirror processes + And user can start transactions + And "MASTER_DATA_DIRECTORY" environment variable is not set + Then the user runs utility "gprecoverseg" with master data directory and "-F -a" + And gprecoverseg should return a return code of 0 + And "MASTER_DATA_DIRECTORY" environment variable should be restored + And all the segments are running + And the segments are synchronized + + Scenario: gprecoverseg priorities given master data directory over env option + Given the database is running + And all the segments are running + And the segments are synchronized + And user stops all mirror processes + And user can start transactions + And the environment variable "MASTER_DATA_DIRECTORY" is set to "/tmp/" + Then the user runs utility "gprecoverseg" with master data directory and "-F -a" + And gprecoverseg should return a return code of 0 + And "MASTER_DATA_DIRECTORY" environment variable should be restored + And all the segments are running + And the segments are synchronized + + Scenario: gprecoverseg differential recovery displays rsync progress to the user Given the database is running And all the segments are running And the segments are synchronized @@ -1410,6 +1436,7 @@ Feature: gprecoverseg tests And the segments are synchronized And the backup pid file is deleted on "primary" segment And the background pid is killed on "primary" segment + Examples: | scenario | args | | differential | -a --differential | diff --git a/gpMgmt/test/behave/mgmt_utils/gpstart.feature b/gpMgmt/test/behave/mgmt_utils/gpstart.feature index ce6787ebac22..4c7c21c74a8d 100644 --- a/gpMgmt/test/behave/mgmt_utils/gpstart.feature +++ b/gpMgmt/test/behave/mgmt_utils/gpstart.feature @@ -27,6 +27,32 @@ Feature: gpstart behave tests And gpstart should return a return code of 0 And all the segments are running + @demo_cluster + Scenario: gpstart runs with given master data directory option + Given the database is running + And running postgres processes are saved in context + And the user runs "gpstop -a" + And gpstop should return a return code of 0 + And verify no postgres process is running on all hosts + And "MASTER_DATA_DIRECTORY" environment variable is not set + Then the user runs utility "gpstart" with master data directory and "-a" + And gpstart should return a return code of 0 + And "MASTER_DATA_DIRECTORY" environment variable should be restored + And all the segments are running + + @demo_cluster + Scenario: gpstart priorities given master data directory over env option + Given the database is running + And running postgres processes are saved in context + And the user runs "gpstop -a" + And gpstop should return a return code of 0 + And verify no postgres process is running on all hosts + And the environment variable "MASTER_DATA_DIRECTORY" is set to "/tmp/" + Then the user runs utility "gpstart" with master data directory and "-a" + And gpstart should return a return code of 0 + And "MASTER_DATA_DIRECTORY" environment variable should be restored + And all the segments are running + @concourse_cluster @demo_cluster Scenario: gpstart starts even if a segment host is unreachable diff --git a/gpMgmt/test/behave/mgmt_utils/gpstate.feature b/gpMgmt/test/behave/mgmt_utils/gpstate.feature index d126cf8d191d..6df5e71861e1 100644 --- a/gpMgmt/test/behave/mgmt_utils/gpstate.feature +++ b/gpMgmt/test/behave/mgmt_utils/gpstate.feature @@ -596,6 +596,55 @@ Feature: gpstate tests And the pg_log files on primary segments should not contain "connections to primary segments are not allowed" And the user drops log_timestamp table + Scenario: gpstate runs with given master data directory option + Given the cluster is generated with "3" primaries only + And "MASTER_DATA_DIRECTORY" environment variable is not set + Then the user runs utility "gpstate" with master data directory and "-a -b" + And gpstate should return a return code of 0 + And gpstate output has rows with keys values + | Master instance = Active | + | Master standby = No master standby configured | + | Total segment instance count from metadata = 3 | + | Primary Segment Status | + | Total primary segments = 3 | + | Total primary segment valid \(at master\) = 3 | + | Total primary segment failures \(at master\) = 0 | + | Total number of postmaster.pid files missing = 0 | + | Total number of postmaster.pid files found = 3 | + | Total number of postmaster.pid PIDs missing = 0 | + | Total number of postmaster.pid PIDs found = 3 | + | Total number of /tmp lock files missing = 0 | + | Total number of /tmp lock files found = 3 | + | Total number postmaster processes missing = 0 | + | Total number postmaster processes found = 3 | + | Mirror Segment Status | + | Mirrors not configured on this array + And "MASTER_DATA_DIRECTORY" environment variable should be restored + + Scenario: gpstate priorities given master data directory over env option + Given the cluster is generated with "3" primaries only + And the environment variable "MASTER_DATA_DIRECTORY" is set to "/tmp/" + Then the user runs utility "gpstate" with master data directory and "-a -b" + And gpstate should return a return code of 0 + And gpstate output has rows with keys values + | Master instance = Active | + | Master standby = No master standby configured | + | Total segment instance count from metadata = 3 | + | Primary Segment Status | + | Total primary segments = 3 | + | Total primary segment valid \(at master\) = 3 | + | Total primary segment failures \(at master\) = 0 | + | Total number of postmaster.pid files missing = 0 | + | Total number of postmaster.pid files found = 3 | + | Total number of postmaster.pid PIDs missing = 0 | + | Total number of postmaster.pid PIDs found = 3 | + | Total number of /tmp lock files missing = 0 | + | Total number of /tmp lock files found = 3 | + | Total number postmaster processes missing = 0 | + | Total number postmaster processes found = 3 | + | Mirror Segment Status | + | Mirrors not configured on this array + And "MASTER_DATA_DIRECTORY" environment variable should be restored ########################### @concourse_cluster tests ########################### # The @concourse_cluster tag denotes the scenario that requires a remote cluster diff --git a/gpMgmt/test/behave/mgmt_utils/gpstop.feature b/gpMgmt/test/behave/mgmt_utils/gpstop.feature index 410ff6625c2c..626eb672eb18 100644 --- a/gpMgmt/test/behave/mgmt_utils/gpstop.feature +++ b/gpMgmt/test/behave/mgmt_utils/gpstop.feature @@ -10,6 +10,26 @@ Feature: gpstop behave tests Then gpstop should return a return code of 0 And verify no postgres process is running on all hosts + @demo_cluster + Scenario: gpstop runs with given master data directory option + Given the database is running + And running postgres processes are saved in context + And "MASTER_DATA_DIRECTORY" environment variable is not set + Then the user runs utility "gpstop" with master data directory and "-a" + And gpstop should return a return code of 0 + And "MASTER_DATA_DIRECTORY" environment variable should be restored + And verify no postgres process is running on all hosts + + @demo_cluster + Scenario: gpstop priorities given master data directory over env option + Given the database is running + And running postgres processes are saved in context + And the environment variable "MASTER_DATA_DIRECTORY" is set to "/tmp/" + Then the user runs utility "gpstop" with master data directory and "-a" + And gpstop should return a return code of 0 + And "MASTER_DATA_DIRECTORY" environment variable should be restored + And verify no postgres process is running on all hosts + @concourse_cluster @demo_cluster Scenario: when there are user connections gpstop waits to shutdown until user switches to fast mode diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index 662280896577..bf4cd5cd8bd1 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -362,7 +362,7 @@ def impl(context, dbname): drop_database(context, dbname) -@given('{env_var} environment variable is not set') +@given('"{env_var}" environment variable is not set') def impl(context, env_var): if not hasattr(context, 'orig_env'): context.orig_env = dict() @@ -1200,6 +1200,15 @@ def impl(context, options): context.execute_steps(u'''Then the user runs command "gpactivatestandby -a %s" from standby master''' % options) context.standby_was_activated = True + +@given('the user runs utility "{utility}" with master data directory and "{options}"') +@when('the user runs utility "{utility}" with master data directory and "{options}"') +@then('the user runs utility "{utility}" with master data directory and "{options}"') +def impl(context, utility, options): + cmd = "{} -d {} {}".format(utility, master_data_dir, options) + context.execute_steps(u'''then the user runs command "%s"''' % cmd ) + + @then('gpintsystem logs should {contain} lines about running backout script') def impl(context, contain): string_to_find = 'Run command bash .*backout_gpinitsystem.* on master to remove these changes$' From 7ba6531efb1eb075f3711384207059c24b4fd4b8 Mon Sep 17 00:00:00 2001 From: Zhenglong Li Date: Tue, 26 Sep 2023 18:14:35 +0800 Subject: [PATCH 015/106] fix REFRESH MATERIALIZED VIEW on AO table with index (#16485) This is the backport of #16465, to fix the issue #16447. The resolution is quite simple and direct: if an AO materialized view has indexes, create the block directory for it. --- src/backend/commands/matview.c | 13 +++++++++++-- src/test/regress/expected/matview.out | 19 +++++++++++++++++++ .../regress/expected/matview_optimizer.out | 18 ++++++++++++++++++ src/test/regress/sql/matview.sql | 12 ++++++++++++ 4 files changed, 60 insertions(+), 2 deletions(-) diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 283e4c8ef894..c8084bb7de3f 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -174,6 +174,7 @@ ExecRefreshMatView(RefreshMatViewStmt *stmt, const char *queryString, Oid save_userid; int save_sec_context; int save_nestlevel; + bool createAoBlockDirectory; RefreshClause *refreshClause; /* MATERIALIZED_VIEW_FIXME: Refresh MatView is not MPP-fied. */ @@ -332,13 +333,16 @@ ExecRefreshMatView(RefreshMatViewStmt *stmt, const char *queryString, else tableSpace = matviewRel->rd_rel->reltablespace; + /* If an AO temp table has index, we need to create it. */ + createAoBlockDirectory = matviewRel->rd_rel->relhasindex; + /* * Create the transient table that will receive the regenerated data. Lock * it against access by any other process until commit (by which time it * will be gone). */ OIDNewHeap = make_new_heap(matviewOid, tableSpace, concurrent, - ExclusiveLock, false, true); + ExclusiveLock, createAoBlockDirectory, true); LockRelationOid(OIDNewHeap, AccessExclusiveLock); dest = CreateTransientRelDestReceiver(OIDNewHeap, matviewOid, concurrent, stmt->skipData); @@ -485,6 +489,7 @@ transientrel_init(QueryDesc *queryDesc) Oid OIDNewHeap; bool concurrent; LOCKMODE lockmode; + bool createAoBlockDirectory; RefreshClause *refreshClause; refreshClause = queryDesc->plannedstmt->refreshClause; @@ -515,13 +520,17 @@ transientrel_init(QueryDesc *queryDesc) { tableSpace = matviewRel->rd_rel->reltablespace; } + + /* If an AO temp table has index, we need to create it. */ + createAoBlockDirectory = matviewRel->rd_rel->relhasindex; + /* * Create the transient table that will receive the regenerated data. Lock * it against access by any other process until commit (by which time it * will be gone). */ OIDNewHeap = make_new_heap(matviewOid, tableSpace, concurrent, - ExclusiveLock, false, false); + ExclusiveLock, createAoBlockDirectory, false); LockRelationOid(OIDNewHeap, AccessExclusiveLock); queryDesc->dest = CreateTransientRelDestReceiver(OIDNewHeap, matviewOid, concurrent, diff --git a/src/test/regress/expected/matview.out b/src/test/regress/expected/matview.out index 9227a113dc05..f1f67c78434c 100644 --- a/src/test/regress/expected/matview.out +++ b/src/test/regress/expected/matview.out @@ -649,3 +649,22 @@ distributed randomly; refresh materialized view mat_view_github_issue_11956; drop materialized view mat_view_github_issue_11956; drop table t_github_issue_11956; +-- test REFRESH MATERIALIZED VIEW on AO table with index +-- more details could be found at https://github.com/greenplum-db/gpdb/issues/16447 +CREATE TABLE base_table (idn character varying(10) NOT NULL); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'idn' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO base_table select i from generate_series(1, 5000) i; +CREATE MATERIALIZED VIEW base_view WITH (APPENDONLY=true) AS SELECT tt1.idn AS idn_ban FROM base_table tt1; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'idn_ban' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE INDEX test_id1 on base_view using btree(idn_ban); +REFRESH MATERIALIZED VIEW base_view ; +SELECT * FROM base_view where idn_ban = '10'; + idn_ban +--------- + 10 +(1 row) + +DROP MATERIALIZED VIEW base_view; +DROP TABLE base_table; diff --git a/src/test/regress/expected/matview_optimizer.out b/src/test/regress/expected/matview_optimizer.out index af498f2ef712..a4efb1caa6fd 100644 --- a/src/test/regress/expected/matview_optimizer.out +++ b/src/test/regress/expected/matview_optimizer.out @@ -650,3 +650,21 @@ distributed randomly; refresh materialized view mat_view_github_issue_11956; drop materialized view mat_view_github_issue_11956; drop table t_github_issue_11956; +-- test REFRESH MATERIALIZED VIEW on AO table with index +-- more details could be found at https://github.com/greenplum-db/gpdb/issues/16447 +CREATE TABLE base_table (idn character varying(10) NOT NULL); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'idn' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO base_table select i from generate_series(1, 5000) i; +CREATE MATERIALIZED VIEW base_view WITH (APPENDONLY=true) AS SELECT tt1.idn AS idn_ban FROM base_table tt1; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. +CREATE INDEX test_id1 on base_view using btree(idn_ban); +REFRESH MATERIALIZED VIEW base_view ; +SELECT * FROM base_view where idn_ban = '10'; + idn_ban +--------- + 10 +(1 row) + +DROP MATERIALIZED VIEW base_view; +DROP TABLE base_table; diff --git a/src/test/regress/sql/matview.sql b/src/test/regress/sql/matview.sql index 170e28d19ec8..e8dd415553a5 100644 --- a/src/test/regress/sql/matview.sql +++ b/src/test/regress/sql/matview.sql @@ -253,3 +253,15 @@ refresh materialized view mat_view_github_issue_11956; drop materialized view mat_view_github_issue_11956; drop table t_github_issue_11956; + +-- test REFRESH MATERIALIZED VIEW on AO table with index +-- more details could be found at https://github.com/greenplum-db/gpdb/issues/16447 +CREATE TABLE base_table (idn character varying(10) NOT NULL); +INSERT INTO base_table select i from generate_series(1, 5000) i; +CREATE MATERIALIZED VIEW base_view WITH (APPENDONLY=true) AS SELECT tt1.idn AS idn_ban FROM base_table tt1; +CREATE INDEX test_id1 on base_view using btree(idn_ban); +REFRESH MATERIALIZED VIEW base_view ; +SELECT * FROM base_view where idn_ban = '10'; + +DROP MATERIALIZED VIEW base_view; +DROP TABLE base_table; From 043b5377001d330568cdab7aa0149e67761656bc Mon Sep 17 00:00:00 2001 From: Zhenglong Li Date: Tue, 26 Sep 2023 18:14:55 +0800 Subject: [PATCH 016/106] [6X] add a new GUC of gp_detect_data_correctness to detect data correctness during OS upgrade (#16367) This is the backport of #16333 During OS upgrades, such as an upgrade from CentOS 7 to CentOS 8, there could be some locale changes happen that lead to the data distribution or data partition position change. In order to detect it, we add a new GUC of gp_detect_data_correctness, if it sets to on, we will not insert data actually, we just check whether the data belongs to this segment or this partition table or not --- src/backend/cdb/cdbhash.c | 16 ++++++- src/backend/cdb/cdbvars.c | 2 + src/backend/executor/nodeModifyTable.c | 49 ++++++++++++++++++++ src/backend/utils/misc/guc_gp.c | 11 +++++ src/include/cdb/cdbhash.h | 1 + src/include/cdb/cdbvars.h | 3 ++ src/include/utils/sync_guc_name.h | 1 + src/test/isolation2/expected/guc_gp.out | 61 +++++++++++++++++++++++++ src/test/isolation2/sql/guc_gp.sql | 30 ++++++++++++ 9 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 src/test/isolation2/expected/guc_gp.out create mode 100644 src/test/isolation2/sql/guc_gp.sql diff --git a/src/backend/cdb/cdbhash.c b/src/backend/cdb/cdbhash.c index 114ed0cd1e93..dfd1a2afb57b 100644 --- a/src/backend/cdb/cdbhash.c +++ b/src/backend/cdb/cdbhash.c @@ -131,6 +131,7 @@ makeCdbHash(int numsegs, int natts, Oid *hashfuncs) CdbHash * makeCdbHashForRelation(Relation rel) { + CdbHash *h; GpPolicy *policy = rel->rd_cdbpolicy; Oid *hashfuncs; int i; @@ -149,7 +150,20 @@ makeCdbHashForRelation(Relation rel) hashfuncs[i] = cdb_hashproc_in_opfamily(opfamily, typeoid); } - return makeCdbHash(policy->numsegments, policy->nattrs, hashfuncs); + h = makeCdbHash(policy->numsegments, policy->nattrs, hashfuncs); + pfree(hashfuncs); + return h; +} + +/* release all memory of CdbHash */ +void freeCdbHash(CdbHash *hash) +{ + if (hash) + { + if (hash->hashfuncs) + pfree(hash->hashfuncs); + pfree(hash); + } } /* diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c index aca9dd71db21..9b6557401b58 100644 --- a/src/backend/cdb/cdbvars.c +++ b/src/backend/cdb/cdbvars.c @@ -104,6 +104,8 @@ int gp_reject_percent_threshold; /* SREH reject % kicks off only bool gp_select_invisible = false; /* debug mode to allow select to * see "invisible" rows */ +bool gp_detect_data_correctness; /* Detect if the current data distribution is correct */ + /* * Configurable timeout for snapshot add: exceptionally busy systems may take * longer than our old hard-coded version -- so here is a tuneable version. diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 3cba0ee49db4..01bc9661a871 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -314,6 +314,55 @@ ExecInsert(TupleTableSlot *parentslot, rel_is_aorows = RelationIsAoRows(resultRelationDesc); rel_is_external = RelationIsExternal(resultRelationDesc); + /* + * if we set the GUC gp_detect_data_correctness to true, we just verify the data belongs + * to current partition and segment, we'll not insert the data really, so just return NULL. + * + * Above has already checked the partition correctness, so we just need check distribution + * correctness. + */ + if (gp_detect_data_correctness) + { + /* Initialize hash function and structure */ + CdbHash *hash; + GpPolicy *policy = resultRelationDesc->rd_cdbpolicy; + MemTuple memTuple = ExecFetchSlotMemTuple(parentslot); + + /* Skip randomly and replicated distributed relation */ + if (!GpPolicyIsHashPartitioned(policy)) + return NULL; + + hash = makeCdbHashForRelation(resultRelationDesc); + + cdbhashinit(hash); + + /* Add every attribute in the distribution policy to the hash */ + for (int i = 0; i < policy->nattrs; i++) + { + int attnum = policy->attrs[i]; + bool isNull; + Datum attr; + + attr = memtuple_getattr(memTuple, parentslot->tts_mt_bind, + attnum, &isNull); + + cdbhash(hash, i + 1, attr, isNull); + } + + /* End check if one tuple is in the wrong segment */ + if (cdbhashreduce(hash) != GpIdentity.segindex) + { + ereport(ERROR, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("trying to insert row into wrong segment"))); + } + + freeCdbHash(hash); + + /* Do nothing */ + return NULL; + } + /* * Prepare the right kind of "insert desc". */ diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 841f89340059..20a63f02bca1 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -3297,6 +3297,17 @@ struct config_bool ConfigureNamesBool_gp[] = NULL, NULL, NULL }, + { + {"gp_detect_data_correctness", PGC_USERSET, UNGROUPED, + gettext_noop("Detect if the current partitioning of the table or data distribution is correct."), + NULL, + GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE + }, + &gp_detect_data_correctness, + false, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL diff --git a/src/include/cdb/cdbhash.h b/src/include/cdb/cdbhash.h index 15bd957e15bb..40be9811f334 100644 --- a/src/include/cdb/cdbhash.h +++ b/src/include/cdb/cdbhash.h @@ -50,6 +50,7 @@ typedef struct CdbHash */ extern CdbHash *makeCdbHash(int numsegs, int natts, Oid *typeoids); extern CdbHash *makeCdbHashForRelation(Relation rel); +extern void freeCdbHash(CdbHash *h); /* * Initialize CdbHash for hashing the next tuple values. diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index ba269c127776..a72d6a4202b2 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -183,6 +183,9 @@ extern int gp_reject_percent_threshold; */ extern bool gp_select_invisible; +/* Detect if the current partitioning of the table or data distribution is correct */ +extern bool gp_detect_data_correctness; + /* * Used to set the maximum length of the current query which is displayed * when the user queries pg_stat_activty table. diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 4b9a23665477..5621a9de1baa 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -18,6 +18,7 @@ "gp_blockdirectory_minipage_size", "gp_debug_linger", "gp_default_storage_options", + "gp_detect_data_correctness", "gp_disable_tuple_hints", "gp_enable_mk_sort", "gp_enable_motion_mk_sort", diff --git a/src/test/isolation2/expected/guc_gp.out b/src/test/isolation2/expected/guc_gp.out new file mode 100644 index 000000000000..70b788cd6498 --- /dev/null +++ b/src/test/isolation2/expected/guc_gp.out @@ -0,0 +1,61 @@ +-- case 1: test gp_detect_data_correctness +create table data_correctness_detect(a int, b int); +CREATE +create table data_correctness_detect_randomly(a int, b int) distributed randomly; +CREATE +create table data_correctness_detect_replicated(a int, b int) distributed replicated; +CREATE + +set gp_detect_data_correctness = on; +SET +-- should no data insert +insert into data_correctness_detect select i, i from generate_series(1, 100) i; +INSERT 0 +select count(*) from data_correctness_detect; + count +------- + 0 +(1 row) +insert into data_correctness_detect_randomly select i, i from generate_series(1, 100) i; +INSERT 0 +select count(*) from data_correctness_detect_randomly; + count +------- + 0 +(1 row) +insert into data_correctness_detect_replicated select i, i from generate_series(1, 100) i; +INSERT 0 +select count(*) from data_correctness_detect_replicated; + count +------- + 0 +(1 row) +set gp_detect_data_correctness = off; +SET + +-- insert some data that not belongs to it +1U: insert into data_correctness_detect select i, i from generate_series(1, 100) i; +INSERT 100 +1U: insert into data_correctness_detect_randomly select i, i from generate_series(1, 100) i; +INSERT 100 +1U: insert into data_correctness_detect_replicated select i, i from generate_series(1, 100) i; +INSERT 100 +set gp_detect_data_correctness = on; +SET +insert into data_correctness_detect select * from data_correctness_detect; +ERROR: trying to insert row into wrong segment (seg1 127.0.1.1:6003 pid=3027104) +insert into data_correctness_detect select * from data_correctness_detect_randomly; +INSERT 0 +insert into data_correctness_detect select * from data_correctness_detect_replicated; +INSERT 0 + +-- clean up +set gp_detect_data_correctness = off; +SET +drop table data_correctness_detect; +DROP +drop table data_correctness_detect_randomly; +DROP +drop table data_correctness_detect_replicated; +DROP + diff --git a/src/test/isolation2/sql/guc_gp.sql b/src/test/isolation2/sql/guc_gp.sql new file mode 100644 index 000000000000..2e5a560704ae --- /dev/null +++ b/src/test/isolation2/sql/guc_gp.sql @@ -0,0 +1,30 @@ +-- case 1: test gp_detect_data_correctness +create table data_correctness_detect(a int, b int); +create table data_correctness_detect_randomly(a int, b int) distributed randomly; +create table data_correctness_detect_replicated(a int, b int) distributed replicated; + +set gp_detect_data_correctness = on; +-- should no data insert +insert into data_correctness_detect select i, i from generate_series(1, 100) i; +select count(*) from data_correctness_detect; +insert into data_correctness_detect_randomly select i, i from generate_series(1, 100) i; +select count(*) from data_correctness_detect_randomly; +insert into data_correctness_detect_replicated select i, i from generate_series(1, 100) i; +select count(*) from data_correctness_detect_replicated; +set gp_detect_data_correctness = off; + +-- insert some data that not belongs to it +1U: insert into data_correctness_detect select i, i from generate_series(1, 100) i; +1U: insert into data_correctness_detect_randomly select i, i from generate_series(1, 100) i; +1U: insert into data_correctness_detect_replicated select i, i from generate_series(1, 100) i; +set gp_detect_data_correctness = on; +insert into data_correctness_detect select * from data_correctness_detect; +insert into data_correctness_detect select * from data_correctness_detect_randomly; +insert into data_correctness_detect select * from data_correctness_detect_replicated; + +-- clean up +set gp_detect_data_correctness = off; +drop table data_correctness_detect; +drop table data_correctness_detect_randomly; +drop table data_correctness_detect_replicated; + From 85e23aa122f9a37706e5a966e8136ca8faa2c664 Mon Sep 17 00:00:00 2001 From: Huansong Fu Date: Wed, 8 Mar 2023 07:17:31 -0800 Subject: [PATCH 017/106] [6X] Add views/functions to check missing and orphaned data files Backport from 6bbafd96d698740e3b8706de8c292ec0daa3a295. For 6X, we create a new extension called "gp_check_functions" instead of burn the function/views in to gp_toolkit. The main reason is to avoid requiring existing 6X users to reinstall gp_toolkit which might have many objects depending on. Correspondingly, the views will be created under the default namespace. To use: ``` create extension if not exists gp_check_missing_orphaned_files -- checking non-extended files select * from gp_check_missing_files; select * from gp_check_orphaned_files; -- checking all data files including the extended data files -- (e.g. 12345.1, 99999.2). These do not count supporting files -- such as .fsm .vm etc. And currently we only support checking -- extended data files for AO/CO tables, not heap. select * from gp_check_missing_files_ext; select * from gp_check_orphaned_files_ext; ``` Other adjustments: * In 6X, external, foreign and virtual tables could have valid relfilenode but they do not have datafiles stored in the common tablespaces. Skipping them by checking s.relstorage. * In 6X, it is known that extended datafiles created and truncated in the same transaction won't be removed (see #15342). Unfortunately, our orphaned file checking scripts could not differentiate such a false alarm case with other cases (but it is debatable whether such datafiles should really be counted as false alarm). So in order to not mess up with the test, now we drop the tables in test truncate_gp so that those datafiles will be removed. * We create function get_tablespace_version_directory_name() in the new extension. With that, we remove the same defition in regress test and adjust the tests accordingly. Original commit message: 1. Add views to get "existing" relation files in the database, including the default, global and user tablespaces. Note that this won't expose files outside of the data directory as we only use pg_ls_dir to get the file list, which won't have reach to any files outside the data directories (including the user tablespace directory). 2. Add views to get "expected" relation files in the database, using the knowledge from the catalog. 3. Using 1 and 2, construct views to get the missing files (i.e. files that are expected but not existed) and orphaned files (i.e. files that are there unexpectedly). 4. Create views to run the above views in MPP. Also, we support checking extended data files for AO/CO tables 5. Add regress tests. To use: ``` -- checking non-extended files select * from gp_toolkit.gp_check_missing_files; select * from gp_toolkit.gp_check_orphaned_files; -- checking all data files including the extended data files -- (e.g. 12345.1, 99999.2). These do not count supporting files -- such as .fsm .vm etc. And currently we only support checking -- extended data files for AO/CO tables, not heap. select * from gp_toolkit.gp_check_missing_files_ext; select * from gp_toolkit.gp_check_orphaned_files_ext; ``` Note: * As mentioned, currently support checking all the non-extended data files and the extended data files of AO/CO tables. The main reason to separate these two is performance: constructing expected file list for AO/CO segments runs dynamic SQL on each aoseg/aocsseg table and could be slow. So only do that if really required. * For heap tables, currently have no way to get the expected number of datafiles for a certain table: we cannot use pg_relation_size because that is in turn dependent on the number of datafiels itself. So always skip its extended files for now. --- gpcontrib/Makefile | 7 +- gpcontrib/gp_check_functions/Makefile | 15 + .../gp_check_functions--1.0.0.sql | 366 ++++++++++++++++++ .../gp_check_functions/gp_check_functions.c | 33 ++ .../gp_check_functions.control | 5 + src/test/regress/expected/truncate_gp.out | 6 + src/test/regress/greenplum_schedule | 1 + .../input/alter_db_set_tablespace.source | 6 +- src/test/regress/input/gp_check_files.source | 87 +++++ src/test/regress/input/gp_tablespace.source | 7 +- .../output/alter_db_set_tablespace.source | 8 +- src/test/regress/output/gp_check_files.source | 91 +++++ src/test/regress/output/gp_tablespace.source | 7 +- src/test/regress/regress_gp.c | 7 - src/test/regress/sql/truncate_gp.sql | 7 + 15 files changed, 627 insertions(+), 26 deletions(-) create mode 100644 gpcontrib/gp_check_functions/Makefile create mode 100644 gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql create mode 100644 gpcontrib/gp_check_functions/gp_check_functions.c create mode 100644 gpcontrib/gp_check_functions/gp_check_functions.control create mode 100644 src/test/regress/input/gp_check_files.source create mode 100644 src/test/regress/output/gp_check_files.source diff --git a/gpcontrib/Makefile b/gpcontrib/Makefile index badd038a5617..a78a52e213a8 100644 --- a/gpcontrib/Makefile +++ b/gpcontrib/Makefile @@ -24,7 +24,8 @@ ifeq "$(enable_debug_extensions)" "yes" gp_array_agg \ gp_percentile_agg \ gp_error_handling \ - gp_subtransaction_overflow + gp_subtransaction_overflow \ + gp_check_functions else recurse_targets = gp_sparse_vector \ gp_distribution_policy \ @@ -35,7 +36,8 @@ else gp_array_agg \ gp_percentile_agg \ gp_error_handling \ - gp_subtransaction_overflow + gp_subtransaction_overflow \ + gp_check_functions endif ifeq "$(with_zstd)" "yes" @@ -99,3 +101,4 @@ installcheck: $(MAKE) -C gp_sparse_vector installcheck $(MAKE) -C gp_percentile_agg installcheck $(MAKE) -C gp_subtransaction_overflow installcheck + $(MAKE) -C gp_check_functions installcheck diff --git a/gpcontrib/gp_check_functions/Makefile b/gpcontrib/gp_check_functions/Makefile new file mode 100644 index 000000000000..3052b65bb818 --- /dev/null +++ b/gpcontrib/gp_check_functions/Makefile @@ -0,0 +1,15 @@ +EXTENSION = gp_check_functions +DATA = gp_check_functions--1.0.0.sql +MODULES = gp_check_functions +# REGRESS testing is covered by the main suite test 'gp_check_files' as we need the custom tablespace directory support + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = gpcontrib/gp_check_functions +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql b/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql new file mode 100644 index 000000000000..f3ec81a45cfc --- /dev/null +++ b/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql @@ -0,0 +1,366 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION gp_check_functions" to load this file. \quit + +CREATE OR REPLACE FUNCTION get_tablespace_version_directory_name() +RETURNS text +AS '$libdir/gp_check_functions' +LANGUAGE C; + +-------------------------------------------------------------------------------- +-- @function: +-- __get_ao_segno_list +-- +-- @in: +-- +-- @out: +-- oid - relation oid +-- int - segment number +-- +-- @doc: +-- UDF to retrieve AO segment file numbers for each ao_row table +-- +-------------------------------------------------------------------------------- + +CREATE OR REPLACE FUNCTION __get_ao_segno_list() +RETURNS TABLE (relid oid, segno int) AS +$$ +DECLARE + table_name text; + rec record; + cur refcursor; + row record; +BEGIN + -- iterate over the aoseg relations + FOR rec IN SELECT sc.relname segrel, tc.oid tableoid + FROM pg_appendonly a + JOIN pg_class tc ON a.relid = tc.oid + JOIN pg_class sc ON a.segrelid = sc.oid + WHERE tc.relstorage = 'a' + LOOP + table_name := rec.segrel; + -- Fetch and return each row from the aoseg table + BEGIN + OPEN cur FOR EXECUTE format('SELECT segno FROM pg_aoseg.%I', table_name); + SELECT rec.tableoid INTO relid; + LOOP + FETCH cur INTO row; + EXIT WHEN NOT FOUND; + segno := row.segno; + IF segno <> 0 THEN -- there's no '.0' file, it means the file w/o extension + RETURN NEXT; + END IF; + END LOOP; + CLOSE cur; + EXCEPTION + -- If failed to open the aoseg table (e.g. the table itself is missing), continue + WHEN OTHERS THEN + RAISE WARNING 'Failed to read %: %', table_name, SQLERRM; + END; + END LOOP; + RETURN; +END; +$$ +LANGUAGE plpgsql; + +GRANT EXECUTE ON FUNCTION __get_ao_segno_list() TO public; + +-------------------------------------------------------------------------------- +-- @function: +-- __get_aoco_segno_list +-- +-- @in: +-- +-- @out: +-- oid - relation oid +-- int - segment number +-- +-- @doc: +-- UDF to retrieve AOCO segment file numbers for each ao_column table +-- +-------------------------------------------------------------------------------- + +CREATE OR REPLACE FUNCTION __get_aoco_segno_list() +RETURNS TABLE (relid oid, segno int) AS +$$ +DECLARE + table_name text; + rec record; + cur refcursor; + row record; +BEGIN + -- iterate over the aocoseg relations + FOR rec IN SELECT sc.relname segrel, tc.oid tableoid + FROM pg_appendonly a + JOIN pg_class tc ON a.relid = tc.oid + JOIN pg_class sc ON a.segrelid = sc.oid + WHERE tc.relstorage = 'c' + LOOP + table_name := rec.segrel; + -- Fetch and return each extended segno corresponding to attnum and segno in the aocoseg table + BEGIN + OPEN cur FOR EXECUTE format('SELECT ((a.attnum - 1) * 128 + s.segno) as segno ' + 'FROM (SELECT * FROM pg_attribute_encoding ' + 'WHERE attrelid = %s) a CROSS JOIN pg_aoseg.%I s', + rec.tableoid, table_name); + SELECT rec.tableoid INTO relid; + LOOP + FETCH cur INTO row; + EXIT WHEN NOT FOUND; + segno := row.segno; + IF segno <> 0 THEN -- there's no '.0' file, it means the file w/o extension + RETURN NEXT; + END IF; + END LOOP; + CLOSE cur; + EXCEPTION + -- If failed to open the aocoseg table (e.g. the table itself is missing), continue + WHEN OTHERS THEN + RAISE WARNING 'Failed to read %: %', table_name, SQLERRM; + END; + END LOOP; + RETURN; +END; +$$ +LANGUAGE plpgsql; + +GRANT EXECUTE ON FUNCTION __get_aoco_segno_list() TO public; + +-------------------------------------------------------------------------------- +-- @view: +-- __get_exist_files +-- +-- @doc: +-- Retrieve a list of all existing data files in the default +-- and user tablespaces. +-- +-------------------------------------------------------------------------------- +-- return the list of existing files in the database +CREATE OR REPLACE VIEW __get_exist_files AS +-- 1. List of files in the default tablespace +SELECT 0 AS tablespace, filename +FROM pg_ls_dir('base/' || ( + SELECT d.oid::text + FROM pg_database d + WHERE d.datname = current_database() +)) +AS filename +UNION +-- 2. List of files in the global tablespace +SELECT 1664 AS tablespace, filename +FROM pg_ls_dir('global/') +AS filename +UNION +-- 3. List of files in user-defined tablespaces +SELECT ts.oid AS tablespace, + pg_ls_dir('pg_tblspc/' || ts.oid::text || '/' || get_tablespace_version_directory_name() || '/' || + (SELECT d.oid::text FROM pg_database d WHERE d.datname = current_database()), true/*missing_ok*/,false/*include_dot*/) AS filename +FROM pg_tablespace ts +WHERE ts.oid > 1664; + +GRANT SELECT ON __get_exist_files TO public; + +-------------------------------------------------------------------------------- +-- @view: +-- __get_expect_files +-- +-- @doc: +-- Retrieve a list of expected data files in the database, +-- using the knowledge from catalogs. This does not include +-- any extended data files, nor does it include external, +-- foreign or virtual tables. +-- +-------------------------------------------------------------------------------- +CREATE OR REPLACE VIEW __get_expect_files AS +SELECT s.reltablespace AS tablespace, s.relname, s.relstorage, + (CASE WHEN s.relfilenode != 0 THEN s.relfilenode ELSE pg_relation_filenode(s.oid) END)::text AS filename +FROM pg_class s +WHERE s.relstorage NOT IN ('x', 'v', 'f'); + +GRANT SELECT ON __get_expect_files TO public; + +-------------------------------------------------------------------------------- +-- @view: +-- __get_expect_files_ext +-- +-- @doc: +-- Retrieve a list of expected data files in the database, +-- using the knowledge from catalogs. This includes all +-- the extended data files for AO/CO tables, nor does it +-- include external, foreign or virtual tables. +-- +-------------------------------------------------------------------------------- +CREATE OR REPLACE VIEW __get_expect_files_ext AS +SELECT s.reltablespace AS tablespace, s.relname, s.relstorage, + (CASE WHEN s.relfilenode != 0 THEN s.relfilenode ELSE pg_relation_filenode(s.oid) END)::text AS filename +FROM pg_class s +WHERE s.relstorage NOT IN ('x', 'v', 'f') +UNION +-- AO extended files +SELECT c.reltablespace AS tablespace, c.relname, c.relstorage, + format(c.relfilenode::text || '.' || s.segno::text) AS filename +FROM __get_ao_segno_list() s +JOIN pg_class c ON s.relid = c.oid +WHERE c.relstorage NOT IN ('x', 'v', 'f') +UNION +-- CO extended files +SELECT c.reltablespace AS tablespace, c.relname, c.relstorage, + format(c.relfilenode::text || '.' || s.segno::text) AS filename +FROM __get_aoco_segno_list() s +JOIN pg_class c ON s.relid = c.oid +WHERE c.relstorage NOT IN ('x', 'v', 'f'); + +GRANT SELECT ON __get_expect_files_ext TO public; + +-------------------------------------------------------------------------------- +-- @view: +-- __check_orphaned_files +-- +-- @doc: +-- Check orphaned data files on default and user tablespaces, +-- not including extended files. +-- +-------------------------------------------------------------------------------- +CREATE OR REPLACE VIEW __check_orphaned_files AS +SELECT f1.tablespace, f1.filename +from __get_exist_files f1 +LEFT JOIN __get_expect_files f2 +ON f1.tablespace = f2.tablespace AND f1.filename = f2.filename +WHERE f2.tablespace IS NULL + AND f1.filename SIMILAR TO '[0-9]+'; + +GRANT SELECT ON __check_orphaned_files TO public; + +-------------------------------------------------------------------------------- +-- @view: +-- __check_orphaned_files_ext +-- +-- @doc: +-- Check orphaned data files on default and user tablespaces, +-- including extended files. +-- +-------------------------------------------------------------------------------- +CREATE OR REPLACE VIEW __check_orphaned_files_ext AS +SELECT f1.tablespace, f1.filename +FROM __get_exist_files f1 +LEFT JOIN __get_expect_files_ext f2 +ON f1.tablespace = f2.tablespace AND f1.filename = f2.filename +WHERE f2.tablespace IS NULL + AND f1.filename SIMILAR TO '[0-9]+(\.[0-9]+)?' + AND NOT EXISTS ( + -- XXX: not supporting heap for now, do not count them + SELECT 1 FROM pg_class c + WHERE c.relfilenode::text = split_part(f1.filename, '.', 1) + AND c.relstorage = 'h' + ); + +GRANT SELECT ON __check_orphaned_files_ext TO public; + +-------------------------------------------------------------------------------- +-- @view: +-- __check_missing_files +-- +-- @doc: +-- Check missing data files on default and user tablespaces, +-- not including extended files. +-- +-------------------------------------------------------------------------------- +CREATE OR REPLACE VIEW __check_missing_files AS +SELECT f1.tablespace, f1.relname, f1.filename +from __get_expect_files f1 +LEFT JOIN __get_exist_files f2 +ON f1.tablespace = f2.tablespace AND f1.filename = f2.filename +WHERE f2.tablespace IS NULL + AND f1.filename SIMILAR TO '[0-9]+'; + +GRANT SELECT ON __check_missing_files TO public; + +-------------------------------------------------------------------------------- +-- @view: +-- __check_missing_files_ext +-- +-- @doc: +-- Check missing data files on default and user tablespaces, +-- including extended files. +-- +-------------------------------------------------------------------------------- +CREATE OR REPLACE VIEW __check_missing_files_ext AS +SELECT f1.tablespace, f1.relname, f1.filename +FROM __get_expect_files_ext f1 +LEFT JOIN __get_exist_files f2 +ON f1.tablespace = f2.tablespace AND f1.filename = f2.filename +WHERE f2.tablespace IS NULL + AND f1.filename SIMILAR TO '[0-9]+(\.[0-9]+)?'; + +GRANT SELECT ON __check_missing_files_ext TO public; + +-------------------------------------------------------------------------------- +-- @view: +-- gp_check_orphaned_files +-- +-- @doc: +-- User-facing view of __check_orphaned_files. +-- Gather results from coordinator and all segments. +-- +-------------------------------------------------------------------------------- +CREATE OR REPLACE VIEW gp_check_orphaned_files AS +SELECT pg_catalog.gp_execution_segment() AS gp_segment_id, * +FROM gp_dist_random('__check_orphaned_files') +UNION ALL +SELECT -1 AS gp_segment_id, * +FROM __check_orphaned_files; + +GRANT SELECT ON gp_check_orphaned_files TO public; + +-------------------------------------------------------------------------------- +-- @view: +-- gp_check_orphaned_files_ext +-- +-- @doc: +-- User-facing view of __check_orphaned_files_ext. +-- Gather results from coordinator and all segments. +-- +-------------------------------------------------------------------------------- +CREATE OR REPLACE VIEW gp_check_orphaned_files_ext AS +SELECT pg_catalog.gp_execution_segment() AS gp_segment_id, * +FROM gp_dist_random('__check_orphaned_files_ext') +UNION ALL +SELECT -1 AS gp_segment_id, * +FROM __check_orphaned_files; -- not checking ext on coordinator + +GRANT SELECT ON gp_check_orphaned_files_ext TO public; + +-------------------------------------------------------------------------------- +-- @view: +-- gp_check_missing_files +-- +-- @doc: +-- User-facing view of __check_missing_files. +-- Gather results from coordinator and all segments. +-- +-------------------------------------------------------------------------------- +CREATE OR REPLACE VIEW gp_check_missing_files AS +SELECT pg_catalog.gp_execution_segment() AS gp_segment_id, * +FROM gp_dist_random('__check_missing_files') +UNION ALL +SELECT -1 AS gp_segment_id, * +FROM __check_missing_files; + +GRANT SELECT ON gp_check_missing_files TO public; + +-------------------------------------------------------------------------------- +-- @view: +-- gp_check_missing_files_ext +-- +-- @doc: +-- User-facing view of __check_missing_files_ext. +-- Gather results from coordinator and all segments. +-- +-------------------------------------------------------------------------------- +CREATE OR REPLACE VIEW gp_check_missing_files_ext AS +SELECT pg_catalog.gp_execution_segment() AS gp_segment_id, * +FROM gp_dist_random('__check_missing_files_ext') +UNION ALL +SELECT -1 AS gp_segment_id, * +FROM __check_missing_files; -- not checking ext on coordinator + +GRANT SELECT ON gp_check_missing_files_ext TO public; diff --git a/gpcontrib/gp_check_functions/gp_check_functions.c b/gpcontrib/gp_check_functions/gp_check_functions.c new file mode 100644 index 000000000000..6bd72747eeda --- /dev/null +++ b/gpcontrib/gp_check_functions/gp_check_functions.c @@ -0,0 +1,33 @@ +/*------------------------------------------------------------------------- + * + * gp_check_functions.c + * GPDB helper functions for checking various system fact/status. + * + * + * Copyright (c) 2022-Present VMware Software, Inc. + * + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "fmgr.h" +#include "funcapi.h" +#include "catalog/catalog.h" +#include "utils/builtins.h" + +Datum get_tablespace_version_directory_name(PG_FUNCTION_ARGS); + +PG_MODULE_MAGIC; +PG_FUNCTION_INFO_V1(get_tablespace_version_directory_name); + +/* + * get the GPDB-specific directory name for user tablespace + */ +Datum +get_tablespace_version_directory_name(PG_FUNCTION_ARGS) +{ + PG_RETURN_TEXT_P(CStringGetTextDatum(GP_TABLESPACE_VERSION_DIRECTORY)); +} + diff --git a/gpcontrib/gp_check_functions/gp_check_functions.control b/gpcontrib/gp_check_functions/gp_check_functions.control new file mode 100644 index 000000000000..8fe18ab67cbd --- /dev/null +++ b/gpcontrib/gp_check_functions/gp_check_functions.control @@ -0,0 +1,5 @@ +# gp_check_functions extension + +comment = 'various GPDB helper views/functions' +default_version = '1.0.0' +relocatable = true diff --git a/src/test/regress/expected/truncate_gp.out b/src/test/regress/expected/truncate_gp.out index 771439e42eb9..4684a73247a0 100644 --- a/src/test/regress/expected/truncate_gp.out +++ b/src/test/regress/expected/truncate_gp.out @@ -109,3 +109,9 @@ select stat_table_segfile_size('regression', 'truncate_with_create_heap'); (4,segfile:16384/19220,0) (3 rows) +-- It is a known issue that extended datafiles won't be removed in such case (see #15342), +-- but since they have 0 size 0 it shouldn't really matter. +-- However, we do not want these file to create false alarms in the gp_check_files test +-- later, so drop them now. +drop table truncate_with_create_ao; +drop table truncate_with_create_aocs; diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index 4847772264f9..d33bc0b65c0e 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -46,6 +46,7 @@ test: shared_scan test: spi_processed64bit test: python_processed64bit test: gp_tablespace_with_faults +test: gp_check_files # below test(s) inject faults so each of them need to be in a separate group test: gp_tablespace diff --git a/src/test/regress/input/alter_db_set_tablespace.source b/src/test/regress/input/alter_db_set_tablespace.source index 56d83b18f275..61034f6dce6f 100644 --- a/src/test/regress/input/alter_db_set_tablespace.source +++ b/src/test/regress/input/alter_db_set_tablespace.source @@ -15,10 +15,8 @@ CREATE SCHEMA adst; SET search_path TO adst,public; -CREATE OR REPLACE FUNCTION get_tablespace_version_directory_name() - RETURNS TEXT -AS '@abs_builddir@/regress.so', 'get_tablespace_version_directory_name' - LANGUAGE C; +-- to get function get_tablespace_version_directory_name() +CREATE EXTENSION gp_check_functions; -- start_ignore CREATE LANGUAGE plpythonu; diff --git a/src/test/regress/input/gp_check_files.source b/src/test/regress/input/gp_check_files.source new file mode 100644 index 000000000000..31bcf40f88d1 --- /dev/null +++ b/src/test/regress/input/gp_check_files.source @@ -0,0 +1,87 @@ +-- Test views/functions to check missing/orphaned data files + +-- start_matchsubs +-- m/aoseg_\d+/ +-- s/aoseg_\d+/aoseg_xxx/g +-- m/aocsseg_\d+/ +-- s/aocsseg_\d+/aocsseg_xxx/g +-- m/aovisimap_\d+/ +-- s/aovisimap_\d+/aovisimap_xxx/g +-- end_matchsubs + +checkpoint; + +create extension gp_check_functions; + +-- we'll use a specific tablespace to test +CREATE TABLESPACE checkfile_ts LOCATION '@testtablespace@'; +set default_tablespace = checkfile_ts; + +-- create a table that we'll delete the files +CREATE TABLE checkmissing_heap(a int, b int, c int); +insert into checkmissing_heap select i,i,i from generate_series(1,100)i; + +-- go to seg1's data directory for the tablespace we just created +\cd @testtablespace@ +select dbid from gp_segment_configuration where content = 1 and role = 'p' \gset +\cd :dbid +select get_tablespace_version_directory_name() as version_dir \gset +\cd :version_dir +select oid from pg_database where datname = current_database() \gset +\cd :oid + +-- Now remove the data file for the table we just created. +-- But check to see if the working directory is what we expect (under +-- the test tablespace). Also just delete one and only one file that +-- is number-named. +\! if pwd | grep -q "^@testtablespace@/.*$"; then find . -maxdepth 1 -type f -regex '.*\/[0-9]+' -exec rm {} \; -quit; fi + +-- now create AO/CO tables and delete only their extended files +CREATE TABLE checkmissing_ao(a int, b int, c int) WITH (appendonly=true, orientation=row); +CREATE TABLE checkmissing_co(a int, b int, c int) WITH (appendonly=true, orientation=column); +insert into checkmissing_ao select i,i,i from generate_series(1,100)i; +insert into checkmissing_co select i,i,i from generate_series(1,100)i; + +-- Now remove the extended data file '.1' for the AO/CO tables we just created. +-- Still, check to see if the working directory is what we expect, and only +-- delete exact two '.1' files. +\! if pwd | grep -q "^@testtablespace@/.*$"; then find . -maxdepth 1 -type f -regex '.*\/[0-9]+\.1' -exec rm {} \; -quit; fi +\! if pwd | grep -q "^@testtablespace@/.*$"; then find . -maxdepth 1 -type f -regex '.*\/[0-9]+\.1' -exec rm {} \; -quit; fi + +-- create some orphaned files +\! touch 987654 +\! touch 987654.3 + +-- create some normal tables +CREATE TABLE checknormal_heap(a int, b int, c int); +CREATE TABLE checknormal_ao(a int, b int, c int) WITH (appendonly=true, orientation=row); +CREATE TABLE checknormal_co(a int, b int, c int) WITH (appendonly=true, orientation=column); +insert into checknormal_heap select i,i,i from generate_series(1,100)i; +insert into checknormal_ao select i,i,i from generate_series(1,100)i; +insert into checknormal_co select i,i,i from generate_series(1,100)i; + +-- check non-extended files +select gp_segment_id, filename from gp_check_orphaned_files; +select gp_segment_id, regexp_replace(filename, '\d+', 'x'), relname from gp_check_missing_files; + +SET client_min_messages = ERROR; + +-- check extended files +select gp_segment_id, filename from gp_check_orphaned_files_ext; +select gp_segment_id, regexp_replace(filename, '\d+', 'x'), relname from gp_check_missing_files_ext; + +RESET client_min_messages; + +-- cleanup +drop table checkmissing_heap; +drop table checkmissing_ao; +drop table checkmissing_co; +drop table checknormal_heap; +drop table checknormal_ao; +drop table checknormal_co; + +\! rm -rf @testtablespace@/*; + +DROP TABLESPACE checkfile_ts; +DROP EXTENSION gp_check_functions; + diff --git a/src/test/regress/input/gp_tablespace.source b/src/test/regress/input/gp_tablespace.source index 07ab138e577f..c118dd56c6ab 100644 --- a/src/test/regress/input/gp_tablespace.source +++ b/src/test/regress/input/gp_tablespace.source @@ -35,10 +35,8 @@ BEGIN END; $$ language plpgsql; -CREATE OR REPLACE FUNCTION get_tablespace_version_directory_name() - RETURNS TEXT - AS '@abs_builddir@/regress.so', 'get_tablespace_version_directory_name' - LANGUAGE C; +-- to get function get_tablespace_version_directory_name() +CREATE EXTENSION gp_check_functions; -- create tablespaces we can use @@ -211,4 +209,5 @@ CREATE TABLE t_dir_empty(a int); \! rm -rf @testtablespace@/*; DROP TABLE IF EXISTS t_dir_empty; DROP TABLESPACE testspace_dir_empty; +DROP EXTENSION gp_check_functions; diff --git a/src/test/regress/output/alter_db_set_tablespace.source b/src/test/regress/output/alter_db_set_tablespace.source index c1f40d30fc84..ba330572f29e 100644 --- a/src/test/regress/output/alter_db_set_tablespace.source +++ b/src/test/regress/output/alter_db_set_tablespace.source @@ -11,10 +11,8 @@ -- end_ignore CREATE SCHEMA adst; SET search_path TO adst,public; -CREATE OR REPLACE FUNCTION get_tablespace_version_directory_name() - RETURNS TEXT -AS '@abs_builddir@/regress.so', 'get_tablespace_version_directory_name' - LANGUAGE C; +-- to get function get_tablespace_version_directory_name() +CREATE EXTENSION gp_check_functions; -- start_ignore CREATE LANGUAGE plpythonu; -- end_ignore @@ -1431,7 +1429,7 @@ DROP TABLESPACE adst_destination_tablespace; -- Final cleanup DROP SCHEMA adst CASCADE; NOTICE: drop cascades to 5 other objects -DETAIL: drop cascades to function get_tablespace_version_directory_name() +DETAIL: drop cascades to extension gp_check_functions drop cascades to function setup_tablespace_location_dir_for_test(text) drop cascades to function setup() drop cascades to function list_db_tablespace(text,text) diff --git a/src/test/regress/output/gp_check_files.source b/src/test/regress/output/gp_check_files.source new file mode 100644 index 000000000000..8f5fca89f8ac --- /dev/null +++ b/src/test/regress/output/gp_check_files.source @@ -0,0 +1,91 @@ +-- Test views/functions to check missing/orphaned data files +-- start_matchsubs +-- m/aoseg_\d+/ +-- s/aoseg_\d+/aoseg_xxx/g +-- m/aocsseg_\d+/ +-- s/aocsseg_\d+/aocsseg_xxx/g +-- m/aovisimap_\d+/ +-- s/aovisimap_\d+/aovisimap_xxx/g +-- end_matchsubs +checkpoint; +create extension gp_check_functions; +-- we'll use a specific tablespace to test +CREATE TABLESPACE checkfile_ts LOCATION '@testtablespace@'; +set default_tablespace = checkfile_ts; +-- create a table that we'll delete the files +CREATE TABLE checkmissing_heap(a int, b int, c int); +insert into checkmissing_heap select i,i,i from generate_series(1,100)i; +-- go to seg1's data directory for the tablespace we just created +\cd @testtablespace@ +select dbid from gp_segment_configuration where content = 1 and role = 'p' \gset +\cd :dbid +select get_tablespace_version_directory_name() as version_dir \gset +\cd :version_dir +select oid from pg_database where datname = current_database() \gset +\cd :oid +-- Now remove the data file for the table we just created. +-- But check to see if the working directory is what we expect (under +-- the test tablespace). Also just delete one and only one file that +-- is number-named. +\! if pwd | grep -q "^@testtablespace@/.*$"; then find . -maxdepth 1 -type f -regex '.*\/[0-9]+' -exec rm {} \; -quit; fi +-- now create AO/CO tables and delete only their extended files +CREATE TABLE checkmissing_ao(a int, b int, c int) WITH (appendonly=true, orientation=row); +CREATE TABLE checkmissing_co(a int, b int, c int) WITH (appendonly=true, orientation=column); +insert into checkmissing_ao select i,i,i from generate_series(1,100)i; +insert into checkmissing_co select i,i,i from generate_series(1,100)i; +-- Now remove the extended data file '.1' for the AO/CO tables we just created. +-- Still, check to see if the working directory is what we expect, and only +-- delete exact two '.1' files. +\! if pwd | grep -q "^@testtablespace@/.*$"; then find . -maxdepth 1 -type f -regex '.*\/[0-9]+\.1' -exec rm {} \; -quit; fi +\! if pwd | grep -q "^@testtablespace@/.*$"; then find . -maxdepth 1 -type f -regex '.*\/[0-9]+\.1' -exec rm {} \; -quit; fi +-- create some orphaned files +\! touch 987654 +\! touch 987654.3 +-- create some normal tables +CREATE TABLE checknormal_heap(a int, b int, c int); +CREATE TABLE checknormal_ao(a int, b int, c int) WITH (appendonly=true, orientation=row); +CREATE TABLE checknormal_co(a int, b int, c int) WITH (appendonly=true, orientation=column); +insert into checknormal_heap select i,i,i from generate_series(1,100)i; +insert into checknormal_ao select i,i,i from generate_series(1,100)i; +insert into checknormal_co select i,i,i from generate_series(1,100)i; +-- check non-extended files +select gp_segment_id, filename from gp_check_orphaned_files; + gp_segment_id | filename +---------------+---------- + 1 | 987654 +(1 row) + +select gp_segment_id, regexp_replace(filename, '\d+', 'x'), relname from gp_check_missing_files; + gp_segment_id | regexp_replace | relname +---------------+----------------+------------------- + 1 | x | checkmissing_heap +(1 row) + +SET client_min_messages = ERROR; +-- check extended files +select gp_segment_id, filename from gp_check_orphaned_files_ext; + gp_segment_id | filename +---------------+---------- + 1 | 987654 + 1 | 987654.3 +(2 rows) + +select gp_segment_id, regexp_replace(filename, '\d+', 'x'), relname from gp_check_missing_files_ext; + gp_segment_id | regexp_replace | relname +---------------+----------------+------------------- + 1 | x | checkmissing_heap + 1 | x.1 | checkmissing_ao + 1 | x.1 | checkmissing_co +(3 rows) + +RESET client_min_messages; +-- cleanup +drop table checkmissing_heap; +drop table checkmissing_ao; +drop table checkmissing_co; +drop table checknormal_heap; +drop table checknormal_ao; +drop table checknormal_co; +\! rm -rf @testtablespace@/*; +DROP TABLESPACE checkfile_ts; +DROP EXTENSION gp_check_functions; diff --git a/src/test/regress/output/gp_tablespace.source b/src/test/regress/output/gp_tablespace.source index 982d4d531cd7..941c4c79a99a 100644 --- a/src/test/regress/output/gp_tablespace.source +++ b/src/test/regress/output/gp_tablespace.source @@ -33,10 +33,8 @@ BEGIN return has_init_file_for_oid(relation_id); END; $$ language plpgsql; -CREATE OR REPLACE FUNCTION get_tablespace_version_directory_name() - RETURNS TEXT - AS '@abs_builddir@/regress.so', 'get_tablespace_version_directory_name' - LANGUAGE C; +-- to get function get_tablespace_version_directory_name() +CREATE EXTENSION gp_check_functions; -- create tablespaces we can use CREATE TABLESPACE testspace LOCATION '@testtablespace@'; CREATE TABLESPACE ul_testspace LOCATION '@testtablespace@_unlogged'; @@ -385,3 +383,4 @@ CREATE TABLE t_dir_empty(a int); \! rm -rf @testtablespace@/*; DROP TABLE IF EXISTS t_dir_empty; DROP TABLESPACE testspace_dir_empty; +DROP EXTENSION gp_check_functions; diff --git a/src/test/regress/regress_gp.c b/src/test/regress/regress_gp.c index 43aba7ceead2..1f8321129d67 100644 --- a/src/test/regress/regress_gp.c +++ b/src/test/regress/regress_gp.c @@ -2149,13 +2149,6 @@ broken_int4out(PG_FUNCTION_ARGS) return DirectFunctionCall1(int4out, Int32GetDatum(arg)); } -PG_FUNCTION_INFO_V1(get_tablespace_version_directory_name); -Datum -get_tablespace_version_directory_name(PG_FUNCTION_ARGS) -{ - PG_RETURN_TEXT_P(CStringGetTextDatum(GP_TABLESPACE_VERSION_DIRECTORY)); -} - PG_FUNCTION_INFO_V1(gp_tablespace_temptablespaceOid); Datum gp_tablespace_temptablespaceOid(PG_FUNCTION_ARGS) diff --git a/src/test/regress/sql/truncate_gp.sql b/src/test/regress/sql/truncate_gp.sql index f0b8b3e2c6b5..3a2d3987212f 100644 --- a/src/test/regress/sql/truncate_gp.sql +++ b/src/test/regress/sql/truncate_gp.sql @@ -86,3 +86,10 @@ end; -- the heap table segment file size after truncate should be zero select stat_table_segfile_size('regression', 'truncate_with_create_heap'); + +-- It is a known issue that extended datafiles won't be removed in such case (see #15342), +-- but since they have 0 size 0 it shouldn't really matter. +-- However, we do not want these file to create false alarms in the gp_check_files test +-- later, so drop them now. +drop table truncate_with_create_ao; +drop table truncate_with_create_aocs; From 39f5027d53a9b319e992f1ff2dc28c662fb27fb1 Mon Sep 17 00:00:00 2001 From: Huansong Fu Date: Thu, 27 Apr 2023 08:53:51 -0700 Subject: [PATCH 018/106] Using __gp_aoseg/__gp_aocsseg in missing/orphaned file views The __gp_aoseg/__gp_aocsseg functions provide more details such as the eof of segments. Use them for the check missing/orphaned file views, and make two changes: * For checking missing files, ignore those w/ eof<=0. They might be recycled but their aoseg/aocsseg entries are still there. * For checking orphaned files, ignore those that still have base file (w/o any extension number) being present. Those might be the ones that have been truncated but not yet removed. They might be the ones that are left behind when column is rewritten during ALTER COLUMN. Now the checking logic becomes: only if the base file is orphaned too, we will report all the extensions along with it. Also run the regress test at the end of the schedule for more chance to catch abnormalies. --- .../gp_check_functions--1.0.0.sql | 33 +++++++++++-------- src/test/regress/expected/truncate_gp.out | 6 ---- src/test/regress/greenplum_schedule | 4 ++- src/test/regress/sql/truncate_gp.sql | 6 ---- 4 files changed, 22 insertions(+), 27 deletions(-) diff --git a/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql b/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql index f3ec81a45cfc..33a7b8756999 100644 --- a/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql +++ b/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql @@ -31,16 +31,19 @@ DECLARE row record; BEGIN -- iterate over the aoseg relations - FOR rec IN SELECT sc.relname segrel, tc.oid tableoid + FOR rec IN SELECT tc.oid tableoid, tc.relname, ns.nspname FROM pg_appendonly a JOIN pg_class tc ON a.relid = tc.oid - JOIN pg_class sc ON a.segrelid = sc.oid + JOIN pg_namespace ns ON tc.relnamespace = ns.oid WHERE tc.relstorage = 'a' LOOP - table_name := rec.segrel; + table_name := rec.relname; -- Fetch and return each row from the aoseg table BEGIN - OPEN cur FOR EXECUTE format('SELECT segno FROM pg_aoseg.%I', table_name); + OPEN cur FOR EXECUTE format('SELECT segno ' + 'FROM gp_toolkit.__gp_aoseg(''%I.%I'') ' + 'WHERE eof > 0', + rec.nspname, rec.relname); SELECT rec.tableoid INTO relid; LOOP FETCH cur INTO row; @@ -54,7 +57,7 @@ BEGIN EXCEPTION -- If failed to open the aoseg table (e.g. the table itself is missing), continue WHEN OTHERS THEN - RAISE WARNING 'Failed to read %: %', table_name, SQLERRM; + RAISE WARNING 'Failed to get aoseg info for %: %', table_name, SQLERRM; END; END LOOP; RETURN; @@ -89,19 +92,19 @@ DECLARE row record; BEGIN -- iterate over the aocoseg relations - FOR rec IN SELECT sc.relname segrel, tc.oid tableoid + FOR rec IN SELECT tc.oid tableoid, tc.relname, ns.nspname FROM pg_appendonly a JOIN pg_class tc ON a.relid = tc.oid - JOIN pg_class sc ON a.segrelid = sc.oid + JOIN pg_namespace ns ON tc.relnamespace = ns.oid WHERE tc.relstorage = 'c' LOOP - table_name := rec.segrel; + table_name := rec.relname; -- Fetch and return each extended segno corresponding to attnum and segno in the aocoseg table BEGIN - OPEN cur FOR EXECUTE format('SELECT ((a.attnum - 1) * 128 + s.segno) as segno ' - 'FROM (SELECT * FROM pg_attribute_encoding ' - 'WHERE attrelid = %s) a CROSS JOIN pg_aoseg.%I s', - rec.tableoid, table_name); + OPEN cur FOR EXECUTE format('SELECT physical_segno as segno ' + 'FROM gp_toolkit.__gp_aocsseg(''%I.%I'') ' + 'WHERE eof > 0', + rec.nspname, rec.relname); SELECT rec.tableoid INTO relid; LOOP FETCH cur INTO row; @@ -115,7 +118,7 @@ BEGIN EXCEPTION -- If failed to open the aocoseg table (e.g. the table itself is missing), continue WHEN OTHERS THEN - RAISE WARNING 'Failed to read %: %', table_name, SQLERRM; + RAISE WARNING 'Failed to get aocsseg info for %: %', table_name, SQLERRM; END; END LOOP; RETURN; @@ -251,7 +254,9 @@ WHERE f2.tablespace IS NULL SELECT 1 FROM pg_class c WHERE c.relfilenode::text = split_part(f1.filename, '.', 1) AND c.relstorage = 'h' - ); + ) + -- If the base file exists, do not count the extension files + AND substring(f1.filename from '[0-9]+') IN (SELECT filename FROM __check_orphaned_files); GRANT SELECT ON __check_orphaned_files_ext TO public; diff --git a/src/test/regress/expected/truncate_gp.out b/src/test/regress/expected/truncate_gp.out index 4684a73247a0..771439e42eb9 100644 --- a/src/test/regress/expected/truncate_gp.out +++ b/src/test/regress/expected/truncate_gp.out @@ -109,9 +109,3 @@ select stat_table_segfile_size('regression', 'truncate_with_create_heap'); (4,segfile:16384/19220,0) (3 rows) --- It is a known issue that extended datafiles won't be removed in such case (see #15342), --- but since they have 0 size 0 it shouldn't really matter. --- However, we do not want these file to create false alarms in the gp_check_files test --- later, so drop them now. -drop table truncate_with_create_ao; -drop table truncate_with_create_aocs; diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index d33bc0b65c0e..01d5cb60258f 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -46,7 +46,6 @@ test: shared_scan test: spi_processed64bit test: python_processed64bit test: gp_tablespace_with_faults -test: gp_check_files # below test(s) inject faults so each of them need to be in a separate group test: gp_tablespace @@ -306,4 +305,7 @@ test: create_extension_fail # check syslogger (since GP syslogger code is divergent from upstream) test: syslogger_gp +# run this at the end of the schedule for more chance to catch abnormalies +test: gp_check_files + # end of tests diff --git a/src/test/regress/sql/truncate_gp.sql b/src/test/regress/sql/truncate_gp.sql index 3a2d3987212f..e79417cddc99 100644 --- a/src/test/regress/sql/truncate_gp.sql +++ b/src/test/regress/sql/truncate_gp.sql @@ -87,9 +87,3 @@ end; -- the heap table segment file size after truncate should be zero select stat_table_segfile_size('regression', 'truncate_with_create_heap'); --- It is a known issue that extended datafiles won't be removed in such case (see #15342), --- but since they have 0 size 0 it shouldn't really matter. --- However, we do not want these file to create false alarms in the gp_check_files test --- later, so drop them now. -drop table truncate_with_create_ao; -drop table truncate_with_create_aocs; From 8627cb6cf2fcce158d7e643aaab3f3c392cfd054 Mon Sep 17 00:00:00 2001 From: Huansong Fu Date: Thu, 22 Jun 2023 11:52:44 -0700 Subject: [PATCH 019/106] Align 6X missing/orphaned views with 7X This is to incorporate the stability improvement changes we made for the 7X views in #15480. Mainly three things were done in that PR: 1. Do not count AO/CO file segments with eof=0 as missing; 2. Do not count files for views as missing; 3. Do not count extended file segments as orphaned as long as its base refile is expected. The 6X views already count the first and second points. Now just make them more aligned with how 7X does it. The third point is not in 6X, include it. This doesn't bump the extension version because we haven't released it yet. --- .../gp_check_functions--1.0.0.sql | 85 ++++++------------- src/test/regress/expected/.gitignore | 1 + src/test/regress/input/gp_check_files.source | 4 +- src/test/regress/output/gp_check_files.source | 9 +- src/test/regress/sql/.gitignore | 1 + 5 files changed, 32 insertions(+), 68 deletions(-) diff --git a/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql b/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql index 33a7b8756999..1aa76c404cdc 100644 --- a/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql +++ b/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql @@ -15,6 +15,7 @@ LANGUAGE C; -- @out: -- oid - relation oid -- int - segment number +-- eof - eof of the segment file -- -- @doc: -- UDF to retrieve AO segment file numbers for each ao_row table @@ -22,7 +23,7 @@ LANGUAGE C; -------------------------------------------------------------------------------- CREATE OR REPLACE FUNCTION __get_ao_segno_list() -RETURNS TABLE (relid oid, segno int) AS +RETURNS TABLE (relid oid, segno int, eof bigint) AS $$ DECLARE table_name text; @@ -40,15 +41,15 @@ BEGIN table_name := rec.relname; -- Fetch and return each row from the aoseg table BEGIN - OPEN cur FOR EXECUTE format('SELECT segno ' - 'FROM gp_toolkit.__gp_aoseg(''%I.%I'') ' - 'WHERE eof > 0', + OPEN cur FOR EXECUTE format('SELECT segno, eof ' + 'FROM gp_toolkit.__gp_aoseg(''%I.%I'') ', rec.nspname, rec.relname); SELECT rec.tableoid INTO relid; LOOP FETCH cur INTO row; EXIT WHEN NOT FOUND; segno := row.segno; + eof := row.eof; IF segno <> 0 THEN -- there's no '.0' file, it means the file w/o extension RETURN NEXT; END IF; @@ -76,6 +77,7 @@ GRANT EXECUTE ON FUNCTION __get_ao_segno_list() TO public; -- @out: -- oid - relation oid -- int - segment number +-- eof - eof of the segment file -- -- @doc: -- UDF to retrieve AOCO segment file numbers for each ao_column table @@ -83,7 +85,7 @@ GRANT EXECUTE ON FUNCTION __get_ao_segno_list() TO public; -------------------------------------------------------------------------------- CREATE OR REPLACE FUNCTION __get_aoco_segno_list() -RETURNS TABLE (relid oid, segno int) AS +RETURNS TABLE (relid oid, segno int, eof bigint) AS $$ DECLARE table_name text; @@ -101,15 +103,15 @@ BEGIN table_name := rec.relname; -- Fetch and return each extended segno corresponding to attnum and segno in the aocoseg table BEGIN - OPEN cur FOR EXECUTE format('SELECT physical_segno as segno ' - 'FROM gp_toolkit.__gp_aocsseg(''%I.%I'') ' - 'WHERE eof > 0', + OPEN cur FOR EXECUTE format('SELECT physical_segno as segno, eof ' + 'FROM gp_toolkit.__gp_aocsseg(''%I.%I'') ', rec.nspname, rec.relname); SELECT rec.tableoid INTO relid; LOOP FETCH cur INTO row; EXIT WHEN NOT FOUND; segno := row.segno; + eof := row.eof; IF segno <> 0 THEN -- there's no '.0' file, it means the file w/o extension RETURN NEXT; END IF; @@ -190,6 +192,12 @@ GRANT SELECT ON __get_expect_files TO public; -- using the knowledge from catalogs. This includes all -- the extended data files for AO/CO tables, nor does it -- include external, foreign or virtual tables. +-- Also ignore AO segments w/ eof=0. They might be created just for +-- modcount whereas no data has ever been inserted to the seg. +-- Or, they could be created when a seg has only aborted rows. +-- In both cases, we can ignore these segs, because no matter +-- whether the data files exist or not, the rest of the system +-- can handle them gracefully. -- -------------------------------------------------------------------------------- CREATE OR REPLACE VIEW __get_expect_files_ext AS @@ -203,14 +211,14 @@ SELECT c.reltablespace AS tablespace, c.relname, c.relstorage, format(c.relfilenode::text || '.' || s.segno::text) AS filename FROM __get_ao_segno_list() s JOIN pg_class c ON s.relid = c.oid -WHERE c.relstorage NOT IN ('x', 'v', 'f') +WHERE s.eof >0 AND c.relstorage NOT IN ('x', 'v', 'f') UNION -- CO extended files SELECT c.reltablespace AS tablespace, c.relname, c.relstorage, format(c.relfilenode::text || '.' || s.segno::text) AS filename FROM __get_aoco_segno_list() s JOIN pg_class c ON s.relid = c.oid -WHERE c.relstorage NOT IN ('x', 'v', 'f'); +WHERE s.eof > 0 AND c.relstorage NOT IN ('x', 'v', 'f'); GRANT SELECT ON __get_expect_files_ext TO public; @@ -219,47 +227,24 @@ GRANT SELECT ON __get_expect_files_ext TO public; -- __check_orphaned_files -- -- @doc: --- Check orphaned data files on default and user tablespaces, --- not including extended files. +-- Check orphaned data files on default and user tablespaces. +-- A file is considered orphaned if its main relfilenode is not expected +-- to exist. For example, '12345.1' is an orphaned file if there is no +-- table has relfilenode=12345, but not otherwise. +-- Therefore, this view counts for file extension as well and we do not +-- need a "_ext" view like the missing file view. -- -------------------------------------------------------------------------------- CREATE OR REPLACE VIEW __check_orphaned_files AS SELECT f1.tablespace, f1.filename from __get_exist_files f1 LEFT JOIN __get_expect_files f2 -ON f1.tablespace = f2.tablespace AND f1.filename = f2.filename +ON f1.tablespace = f2.tablespace AND substring(f1.filename from '[0-9]+') = f2.filename WHERE f2.tablespace IS NULL - AND f1.filename SIMILAR TO '[0-9]+'; + AND f1.filename SIMILAR TO '[0-9]+(\.)?(\_)?%'; GRANT SELECT ON __check_orphaned_files TO public; --------------------------------------------------------------------------------- --- @view: --- __check_orphaned_files_ext --- --- @doc: --- Check orphaned data files on default and user tablespaces, --- including extended files. --- --------------------------------------------------------------------------------- -CREATE OR REPLACE VIEW __check_orphaned_files_ext AS -SELECT f1.tablespace, f1.filename -FROM __get_exist_files f1 -LEFT JOIN __get_expect_files_ext f2 -ON f1.tablespace = f2.tablespace AND f1.filename = f2.filename -WHERE f2.tablespace IS NULL - AND f1.filename SIMILAR TO '[0-9]+(\.[0-9]+)?' - AND NOT EXISTS ( - -- XXX: not supporting heap for now, do not count them - SELECT 1 FROM pg_class c - WHERE c.relfilenode::text = split_part(f1.filename, '.', 1) - AND c.relstorage = 'h' - ) - -- If the base file exists, do not count the extension files - AND substring(f1.filename from '[0-9]+') IN (SELECT filename FROM __check_orphaned_files); - -GRANT SELECT ON __check_orphaned_files_ext TO public; - -------------------------------------------------------------------------------- -- @view: -- __check_missing_files @@ -316,24 +301,6 @@ FROM __check_orphaned_files; GRANT SELECT ON gp_check_orphaned_files TO public; --------------------------------------------------------------------------------- --- @view: --- gp_check_orphaned_files_ext --- --- @doc: --- User-facing view of __check_orphaned_files_ext. --- Gather results from coordinator and all segments. --- --------------------------------------------------------------------------------- -CREATE OR REPLACE VIEW gp_check_orphaned_files_ext AS -SELECT pg_catalog.gp_execution_segment() AS gp_segment_id, * -FROM gp_dist_random('__check_orphaned_files_ext') -UNION ALL -SELECT -1 AS gp_segment_id, * -FROM __check_orphaned_files; -- not checking ext on coordinator - -GRANT SELECT ON gp_check_orphaned_files_ext TO public; - -------------------------------------------------------------------------------- -- @view: -- gp_check_missing_files diff --git a/src/test/regress/expected/.gitignore b/src/test/regress/expected/.gitignore index a180015d21dd..cbfe088dca04 100644 --- a/src/test/regress/expected/.gitignore +++ b/src/test/regress/expected/.gitignore @@ -14,6 +14,7 @@ dispatch.out external_table.out filespace.out gpcopy.out +gp_check_files.out gptokencheck.out /gp_transactions.out /gp_tablespace_with_faults.out diff --git a/src/test/regress/input/gp_check_files.source b/src/test/regress/input/gp_check_files.source index 31bcf40f88d1..35898e3d5b1e 100644 --- a/src/test/regress/input/gp_check_files.source +++ b/src/test/regress/input/gp_check_files.source @@ -9,6 +9,7 @@ -- s/aovisimap_\d+/aovisimap_xxx/g -- end_matchsubs +-- start from a clean state checkpoint; create extension gp_check_functions; @@ -61,13 +62,12 @@ insert into checknormal_ao select i,i,i from generate_series(1,100)i; insert into checknormal_co select i,i,i from generate_series(1,100)i; -- check non-extended files -select gp_segment_id, filename from gp_check_orphaned_files; select gp_segment_id, regexp_replace(filename, '\d+', 'x'), relname from gp_check_missing_files; SET client_min_messages = ERROR; -- check extended files -select gp_segment_id, filename from gp_check_orphaned_files_ext; +select gp_segment_id, filename from gp_check_orphaned_files; select gp_segment_id, regexp_replace(filename, '\d+', 'x'), relname from gp_check_missing_files_ext; RESET client_min_messages; diff --git a/src/test/regress/output/gp_check_files.source b/src/test/regress/output/gp_check_files.source index 8f5fca89f8ac..82484d47124b 100644 --- a/src/test/regress/output/gp_check_files.source +++ b/src/test/regress/output/gp_check_files.source @@ -7,6 +7,7 @@ -- m/aovisimap_\d+/ -- s/aovisimap_\d+/aovisimap_xxx/g -- end_matchsubs +-- start from a clean state checkpoint; create extension gp_check_functions; -- we'll use a specific tablespace to test @@ -49,12 +50,6 @@ insert into checknormal_heap select i,i,i from generate_series(1,100)i; insert into checknormal_ao select i,i,i from generate_series(1,100)i; insert into checknormal_co select i,i,i from generate_series(1,100)i; -- check non-extended files -select gp_segment_id, filename from gp_check_orphaned_files; - gp_segment_id | filename ----------------+---------- - 1 | 987654 -(1 row) - select gp_segment_id, regexp_replace(filename, '\d+', 'x'), relname from gp_check_missing_files; gp_segment_id | regexp_replace | relname ---------------+----------------+------------------- @@ -63,7 +58,7 @@ select gp_segment_id, regexp_replace(filename, '\d+', 'x'), relname from gp_chec SET client_min_messages = ERROR; -- check extended files -select gp_segment_id, filename from gp_check_orphaned_files_ext; +select gp_segment_id, filename from gp_check_orphaned_files; gp_segment_id | filename ---------------+---------- 1 | 987654 diff --git a/src/test/regress/sql/.gitignore b/src/test/regress/sql/.gitignore index eced9e142fe2..7ec5f933e613 100644 --- a/src/test/regress/sql/.gitignore +++ b/src/test/regress/sql/.gitignore @@ -38,6 +38,7 @@ bb_mpph.sql transient_types.sql hooktest.sql gpcopy.sql +gp_check_files.sql trigger_sets_oid.sql query_info_hook_test.sql gp_tablespace.sql From 90b4548dd22d37d451573aa0f55eac65aa6f8e9f Mon Sep 17 00:00:00 2001 From: Huansong Fu Date: Tue, 19 Sep 2023 07:51:44 -0700 Subject: [PATCH 020/106] Improve gp_toolkit.gp_check_orphaned_files for more reliable results 6X backport of #16428. Mostly a clean merge except that in 6X the views are in gp_check_functions instead of gp_toolkit. Another difference is that 6X does not have cluster-wide gp_stat_activity and also pg_stat_activity does not show background backends (so we do not have the need to check 'backend_type'). So adjust accordingly. Original commit message: This commit mainly improves the gp_check_orphaned_files view in the sense that, since ultimately its user is likely going to remove the reported orphaned files, we would not like that to cause any potential issues due to causes like: * Main relation files that are associated with dropped tables are kept until the next CHECKPOINT in order to prevent potential issue with crash recovery (see comments for mdunlink()). So removing them would have issue. * Relation files that are created during an ongoing transactions could be recognized as orphaned: another session will see an old pg_class.relfilenode so it would think the new relfilenode is orphaned. So if one removes that, we might have data loss. So accordingly, the improvements are: * We should force a CHECKPOINT prior to collecting the orphaned file list. * We should exclude other activities that might change pg_class.relfilenode while running the view. This is done by locking pg_class in SHARE mode (which blocks writes but allows read) with "nowait" flag (which allows the lock attempt to immediately return so we are not blocked forever). We should also check pg_stat_activity to make sure that there is no idle transaction (because the idle transaction might've already modified pg_class and released the lock). In the new view we do that by simply making sure there's no concurrent client sessions. These steps will need to be written in a function. So the rewrite the gp_check_orphaned_files view to be SELECT'ing from a new UDF. Also improve the view results by adding relative path of each file being reported for convenience about further action of the files. For the test, adjusted a few places so that the new changes won't cause flakiness. --- .../gp_check_functions--1.0.0.sql | 109 +++++++++++++----- src/test/isolation2/expected/misc.out | 19 +++ src/test/isolation2/sql/misc.sql | 14 +++ src/test/regress/input/gp_check_files.source | 46 ++++++-- src/test/regress/output/gp_check_files.source | 52 +++++++-- 5 files changed, 194 insertions(+), 46 deletions(-) diff --git a/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql b/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql index 1aa76c404cdc..ff5ff837665d 100644 --- a/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql +++ b/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql @@ -141,26 +141,24 @@ GRANT EXECUTE ON FUNCTION __get_aoco_segno_list() TO public; -------------------------------------------------------------------------------- -- return the list of existing files in the database CREATE OR REPLACE VIEW __get_exist_files AS --- 1. List of files in the default tablespace -SELECT 0 AS tablespace, filename -FROM pg_ls_dir('base/' || ( - SELECT d.oid::text - FROM pg_database d - WHERE d.datname = current_database() -)) -AS filename -UNION --- 2. List of files in the global tablespace -SELECT 1664 AS tablespace, filename -FROM pg_ls_dir('global/') -AS filename -UNION --- 3. List of files in user-defined tablespaces -SELECT ts.oid AS tablespace, - pg_ls_dir('pg_tblspc/' || ts.oid::text || '/' || get_tablespace_version_directory_name() || '/' || - (SELECT d.oid::text FROM pg_database d WHERE d.datname = current_database()), true/*missing_ok*/,false/*include_dot*/) AS filename -FROM pg_tablespace ts -WHERE ts.oid > 1664; +WITH Tablespaces AS ( +-- 1. The default tablespace + SELECT 0 AS tablespace, 'base/' || d.oid::text AS dirname + FROM pg_database d + WHERE d.datname = current_database() + UNION +-- 2. The global tablespace + SELECT 1664 AS tablespace, 'global/' AS dirname + UNION +-- 3. The user-defined tablespaces + SELECT ts.oid AS tablespace, + 'pg_tblspc/' || ts.oid::text || '/' || get_tablespace_version_directory_name() || '/' || + (SELECT d.oid::text FROM pg_database d WHERE d.datname = current_database()) AS dirname + FROM pg_tablespace ts + WHERE ts.oid > 1664 +) +SELECT tablespace, files.filename, dirname || '/' || files.filename AS filepath +FROM Tablespaces, pg_ls_dir(dirname) AS files(filename); GRANT SELECT ON __get_exist_files TO public; @@ -236,7 +234,7 @@ GRANT SELECT ON __get_expect_files_ext TO public; -- -------------------------------------------------------------------------------- CREATE OR REPLACE VIEW __check_orphaned_files AS -SELECT f1.tablespace, f1.filename +SELECT f1.tablespace, f1.filename, f1.filepath from __get_exist_files f1 LEFT JOIN __get_expect_files f2 ON f1.tablespace = f2.tablespace AND substring(f1.filename from '[0-9]+') = f2.filename @@ -245,6 +243,69 @@ WHERE f2.tablespace IS NULL GRANT SELECT ON __check_orphaned_files TO public; +-------------------------------------------------------------------------------- +-- @function: +-- __gp_check_orphaned_files_func +-- +-- @in: +-- +-- @out: +-- gp_segment_id int - segment content ID +-- tablespace oid - tablespace OID +-- filename text - name of the orphaned file +-- filepath text - relative path of the orphaned file in data directory +-- +-- @doc: +-- (Internal UDF, shouldn't be exposed) +-- UDF to retrieve orphaned files and their paths +-- +-------------------------------------------------------------------------------- + +CREATE OR REPLACE FUNCTION __gp_check_orphaned_files_func() +RETURNS TABLE ( + gp_segment_id int, + tablespace oid, + filename text, + filepath text +) +LANGUAGE plpgsql AS $$ +BEGIN + BEGIN + -- lock pg_class so that no one will be adding/altering relfilenodes + LOCK TABLE pg_class IN SHARE MODE NOWAIT; + + -- make sure no other active/idle transaction is running + IF EXISTS ( + SELECT 1 + FROM (SELECT * from pg_stat_activity UNION ALL SELECT * FROM gp_dist_random('pg_stat_activity'))q + WHERE + sess_id <> -1 + AND sess_id <> current_setting('gp_session_id')::int -- Exclude the current session + ) THEN + RAISE EXCEPTION 'There is a client session running on one or more segment. Aborting...'; + END IF; + + -- force checkpoint to make sure we do not include files that are normally pending delete + CHECKPOINT; + + RETURN QUERY + SELECT pg_catalog.gp_execution_segment() AS gp_segment_id, * + FROM gp_dist_random('__check_orphaned_files') + UNION ALL + SELECT -1 AS gp_segment_id, * + FROM __check_orphaned_files; + EXCEPTION + WHEN lock_not_available THEN + RAISE EXCEPTION 'cannot obtain SHARE lock on pg_class'; + WHEN OTHERS THEN + RAISE; + END; + + RETURN; +END; +$$; +GRANT EXECUTE ON FUNCTION __gp_check_orphaned_files_func() TO public; + -------------------------------------------------------------------------------- -- @view: -- __check_missing_files @@ -293,11 +354,7 @@ GRANT SELECT ON __check_missing_files_ext TO public; -- -------------------------------------------------------------------------------- CREATE OR REPLACE VIEW gp_check_orphaned_files AS -SELECT pg_catalog.gp_execution_segment() AS gp_segment_id, * -FROM gp_dist_random('__check_orphaned_files') -UNION ALL -SELECT -1 AS gp_segment_id, * -FROM __check_orphaned_files; +SELECT * FROM __gp_check_orphaned_files_func(); GRANT SELECT ON gp_check_orphaned_files TO public; diff --git a/src/test/isolation2/expected/misc.out b/src/test/isolation2/expected/misc.out index 0fc8dff0dee2..e2edac1654d0 100644 --- a/src/test/isolation2/expected/misc.out +++ b/src/test/isolation2/expected/misc.out @@ -51,3 +51,22 @@ CREATE -- 0U: create table utilitymode_pt_lt_tab (col1 int, col2 decimal) distributed by (col1) partition by list(col2) (partition part1 values(1)); ERROR: cannot create partition table in utility mode + +-- +-- gp_check_orphaned_files should not be running with concurrent transaction (even idle) +-- +-- use a different database to do the test, otherwise we might be reporting tons +-- of orphaned files produced by the many intential PANICs/restarts in the isolation2 tests. +create database check_orphaned_db; +CREATE +1:@db_name check_orphaned_db: create extension gp_check_functions; +CREATE +1:@db_name check_orphaned_db: begin; +BEGIN +2:@db_name check_orphaned_db: select * from gp_check_orphaned_files; +ERROR: There is a client session running on one or more segment. Aborting... +1q: ... +2q: ... + +drop database check_orphaned_db; +DROP diff --git a/src/test/isolation2/sql/misc.sql b/src/test/isolation2/sql/misc.sql index 9c02970b35fc..248a47fb5e34 100644 --- a/src/test/isolation2/sql/misc.sql +++ b/src/test/isolation2/sql/misc.sql @@ -38,3 +38,17 @@ -- 0U: create table utilitymode_pt_lt_tab (col1 int, col2 decimal) distributed by (col1) partition by list(col2) (partition part1 values(1)); + +-- +-- gp_check_orphaned_files should not be running with concurrent transaction (even idle) +-- +-- use a different database to do the test, otherwise we might be reporting tons +-- of orphaned files produced by the many intential PANICs/restarts in the isolation2 tests. +create database check_orphaned_db; +1:@db_name check_orphaned_db: create extension gp_check_functions; +1:@db_name check_orphaned_db: begin; +2:@db_name check_orphaned_db: select * from gp_check_orphaned_files; +1q: +2q: + +drop database check_orphaned_db; diff --git a/src/test/regress/input/gp_check_files.source b/src/test/regress/input/gp_check_files.source index 35898e3d5b1e..b4969f509700 100644 --- a/src/test/regress/input/gp_check_files.source +++ b/src/test/regress/input/gp_check_files.source @@ -9,11 +9,33 @@ -- s/aovisimap_\d+/aovisimap_xxx/g -- end_matchsubs --- start from a clean state -checkpoint; - create extension gp_check_functions; +-- helper function to repeatedly run gp_check_orphaned_files for up to 10 minutes, +-- in case any flakiness happens (like background worker makes LOCK pg_class unsuccessful etc.) +CREATE OR REPLACE FUNCTION run_orphaned_files_view() +RETURNS TABLE(gp_segment_id INT, filename TEXT) AS $$ +DECLARE + retry_counter INT := 0; +BEGIN + WHILE retry_counter < 120 LOOP + BEGIN + RETURN QUERY SELECT q.gp_segment_id, q.filename FROM gp_check_orphaned_files q; + RETURN; -- If successful + EXCEPTION + WHEN OTHERS THEN + RAISE LOG 'attempt failed % with error: %', retry_counter + 1, SQLERRM; + -- When an exception occurs, wait for 5 seconds and then retry + PERFORM pg_sleep(5); + retry_counter := retry_counter + 1; + END; + END LOOP; + + -- all retries failed + RAISE EXCEPTION 'failed to retrieve orphaned files after 10 minutes of retries.'; +END; +$$ LANGUAGE plpgsql; + -- we'll use a specific tablespace to test CREATE TABLESPACE checkfile_ts LOCATION '@testtablespace@'; set default_tablespace = checkfile_ts; @@ -31,6 +53,19 @@ select get_tablespace_version_directory_name() as version_dir \gset select oid from pg_database where datname = current_database() \gset \cd :oid +-- create some orphaned files +\! touch 987654 +\! touch 987654.3 + +-- check orphaned files, note that this forces a checkpoint internally. +set client_min_messages = ERROR; +select gp_segment_id, filename from run_orphaned_files_view(); +reset client_min_messages; + +-- remove the orphaned files so not affect subsequent tests +\! rm 987654 +\! rm 987654.3 + -- Now remove the data file for the table we just created. -- But check to see if the working directory is what we expect (under -- the test tablespace). Also just delete one and only one file that @@ -49,10 +84,6 @@ insert into checkmissing_co select i,i,i from generate_series(1,100)i; \! if pwd | grep -q "^@testtablespace@/.*$"; then find . -maxdepth 1 -type f -regex '.*\/[0-9]+\.1' -exec rm {} \; -quit; fi \! if pwd | grep -q "^@testtablespace@/.*$"; then find . -maxdepth 1 -type f -regex '.*\/[0-9]+\.1' -exec rm {} \; -quit; fi --- create some orphaned files -\! touch 987654 -\! touch 987654.3 - -- create some normal tables CREATE TABLE checknormal_heap(a int, b int, c int); CREATE TABLE checknormal_ao(a int, b int, c int) WITH (appendonly=true, orientation=row); @@ -67,7 +98,6 @@ select gp_segment_id, regexp_replace(filename, '\d+', 'x'), relname from gp_chec SET client_min_messages = ERROR; -- check extended files -select gp_segment_id, filename from gp_check_orphaned_files; select gp_segment_id, regexp_replace(filename, '\d+', 'x'), relname from gp_check_missing_files_ext; RESET client_min_messages; diff --git a/src/test/regress/output/gp_check_files.source b/src/test/regress/output/gp_check_files.source index 82484d47124b..4d638b8c24db 100644 --- a/src/test/regress/output/gp_check_files.source +++ b/src/test/regress/output/gp_check_files.source @@ -7,9 +7,31 @@ -- m/aovisimap_\d+/ -- s/aovisimap_\d+/aovisimap_xxx/g -- end_matchsubs --- start from a clean state -checkpoint; create extension gp_check_functions; +-- helper function to repeatedly run gp_check_orphaned_files for up to 10 minutes, +-- in case any flakiness happens (like background worker makes LOCK pg_class unsuccessful etc.) +CREATE OR REPLACE FUNCTION run_orphaned_files_view() +RETURNS TABLE(gp_segment_id INT, filename TEXT) AS $$ +DECLARE + retry_counter INT := 0; +BEGIN + WHILE retry_counter < 120 LOOP + BEGIN + RETURN QUERY SELECT q.gp_segment_id, q.filename FROM gp_check_orphaned_files q; + RETURN; -- If successful + EXCEPTION + WHEN OTHERS THEN + RAISE LOG 'attempt failed % with error: %', retry_counter + 1, SQLERRM; + -- When an exception occurs, wait for 5 seconds and then retry + PERFORM pg_sleep(5); + retry_counter := retry_counter + 1; + END; + END LOOP; + + -- all retries failed + RAISE EXCEPTION 'failed to retrieve orphaned files after 10 minutes of retries.'; +END; +$$ LANGUAGE plpgsql; -- we'll use a specific tablespace to test CREATE TABLESPACE checkfile_ts LOCATION '@testtablespace@'; set default_tablespace = checkfile_ts; @@ -24,6 +46,22 @@ select get_tablespace_version_directory_name() as version_dir \gset \cd :version_dir select oid from pg_database where datname = current_database() \gset \cd :oid +-- create some orphaned files +\! touch 987654 +\! touch 987654.3 +-- check orphaned files, note that this forces a checkpoint internally. +set client_min_messages = ERROR; +select gp_segment_id, filename from run_orphaned_files_view(); + gp_segment_id | filename +---------------+---------- + 1 | 987654.3 + 1 | 987654 +(2 rows) + +reset client_min_messages; +-- remove the orphaned files so not affect subsequent tests +\! rm 987654 +\! rm 987654.3 -- Now remove the data file for the table we just created. -- But check to see if the working directory is what we expect (under -- the test tablespace). Also just delete one and only one file that @@ -39,9 +77,6 @@ insert into checkmissing_co select i,i,i from generate_series(1,100)i; -- delete exact two '.1' files. \! if pwd | grep -q "^@testtablespace@/.*$"; then find . -maxdepth 1 -type f -regex '.*\/[0-9]+\.1' -exec rm {} \; -quit; fi \! if pwd | grep -q "^@testtablespace@/.*$"; then find . -maxdepth 1 -type f -regex '.*\/[0-9]+\.1' -exec rm {} \; -quit; fi --- create some orphaned files -\! touch 987654 -\! touch 987654.3 -- create some normal tables CREATE TABLE checknormal_heap(a int, b int, c int); CREATE TABLE checknormal_ao(a int, b int, c int) WITH (appendonly=true, orientation=row); @@ -58,13 +93,6 @@ select gp_segment_id, regexp_replace(filename, '\d+', 'x'), relname from gp_chec SET client_min_messages = ERROR; -- check extended files -select gp_segment_id, filename from gp_check_orphaned_files; - gp_segment_id | filename ----------------+---------- - 1 | 987654 - 1 | 987654.3 -(2 rows) - select gp_segment_id, regexp_replace(filename, '\d+', 'x'), relname from gp_check_missing_files_ext; gp_segment_id | regexp_replace | relname ---------------+----------------+------------------- From 625eb68d9e179765b86c0ca3320cc57c20dd22f9 Mon Sep 17 00:00:00 2001 From: mperezfuster Date: Wed, 27 Sep 2023 18:34:30 +0100 Subject: [PATCH 021/106] Docs: updated the Software Dependencies page for 6 (#16515) --- .../platform-requirements-overview.md.hbs | 49 ++++++++++++------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs b/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs index 526b34da466d..0a404057b7c4 100644 --- a/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs +++ b/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs @@ -40,58 +40,73 @@ Greenplum Database 6 requires the following software packages on RHEL/CentOS 6/7 - bash - bzip2 - curl -- krb5 +- iproute +- krb5-devel - libcgroup (RHEL/CentOS 6) -- libcgroup-tools (RHEL/CentOS 7) +- libcgroup-tools (RHEL/CentOS 7 and RHEL/CentOS 8) - libcurl -- libevent +- libevent (RHEL/CentOS 7 and RHEL/CentOS 8) +- libeven2 (RHEL/CentOS 6) +- libuuid - libxml2 - libyaml -- zlib +- less +- net-tools (Debian/Fedora) - openldap +- openssh - openssh-client +- openssh-server - openssl -- openssl-libs \(RHEL7/Centos7\) +- openssl-libs (RHEL/CentOS 7 and RHEL/CentOS 8) - perl - readline - rsync -- R -- sed \(used by `gpinitsystem`\) +- sed - tar +- which - zip +- zlib VMware Greenplum Database 6 client software requires these operating system packages: - apr -- apr-util +- bzip2 +- libedit - libyaml -- libevent +- libevent (RHEL/CentOS 7 and RHEL/CentOS 8) +- libevent2 (RHEL/CentOS 6) +- openssh +- zlib On Ubuntu systems, Greenplum Database 6 requires the following software packages, which are installed automatically as dependencies when you install Greenplum Database with the Debian package installer: -- libapr1 -- libaprutil1 - bash - bzip2 +- iproute2 +- iputils-ping - krb5-multidev +- libapr1 +- libaprutil1 - libcurl3-gnutls - libcurl4 - libevent-2.1-6 +- libldap-2.4-2 +- libreadline7 or libreadline8 +- libuuid1 - libxml2 - libyaml-0-2 -- zlib1g -- libldap-2.4-2 +- less +- locales +- net-tools - openssh-client +- openssh-server - openssl - perl -- readline - rsync - sed - tar - zip -- net-tools -- less -- iproute2 +- zlib1g Greenplum Database 6 uses Python 2.7.18, which is included with the product installation \(and not installed as a package dependency\). From b55063283ba3616442bd64648d9922203a3f0366 Mon Sep 17 00:00:00 2001 From: David Yozie Date: Wed, 27 Sep 2023 10:37:51 -0700 Subject: [PATCH 022/106] Docs - remaining feedback for #16515 --- .../platform-requirements-overview.md.hbs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs b/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs index 0a404057b7c4..995d11f15964 100644 --- a/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs +++ b/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs @@ -43,10 +43,10 @@ Greenplum Database 6 requires the following software packages on RHEL/CentOS 6/7 - iproute - krb5-devel - libcgroup (RHEL/CentOS 6) -- libcgroup-tools (RHEL/CentOS 7 and RHEL/CentOS 8) +- libcgroup-tools (RHEL/CentOS 7 and RHEL/Rocky 8) - libcurl -- libevent (RHEL/CentOS 7 and RHEL/CentOS 8) -- libeven2 (RHEL/CentOS 6) +- libevent (RHEL/CentOS 7 and RHEL/Rocky 8) +- libevent2 (RHEL/CentOS 6) - libuuid - libxml2 - libyaml @@ -57,7 +57,7 @@ Greenplum Database 6 requires the following software packages on RHEL/CentOS 6/7 - openssh-client - openssh-server - openssl -- openssl-libs (RHEL/CentOS 7 and RHEL/CentOS 8) +- openssl-libs (RHEL/CentOS 7 and RHEL/Rocky 8) - perl - readline - rsync @@ -73,7 +73,7 @@ VMware Greenplum Database 6 client software requires these operating system pack - bzip2 - libedit - libyaml -- libevent (RHEL/CentOS 7 and RHEL/CentOS 8) +- libevent (RHEL/CentOS 7 and RHEL/Rocky 8) - libevent2 (RHEL/CentOS 6) - openssh - zlib From be48d55c3d636879e11945c5aa37a022d195fd8f Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 29 Jan 2020 15:43:32 +0100 Subject: [PATCH 023/106] Fail if recovery target is not reached Backported from GPDB7: https://github.com/greenplum-db/gpdb/commit/a93ab09e380a62240e8ea99d1c17f5385def8673 with the following changes: - In the TAP test framework added the `standby` parameter for the enable_restoring function to distinguish between standby and recovery which in GPDB7 is done via .signal files. - In the TAP test put recovery_target_name into recovery.conf file instead of postgresql.conf - In the TAP test added variables declaration. Original GPDB7 commit message: Backported from upstream with change in the test: diff: - run_log(['pg_ctl', '-D', $node_standby->data_dir, - '-l', $node_standby->logfile, 'start']); + run_log(['pg_ctl', '-D', $node_standby->data_dir, '-l', $node_standby->logfile, '-o', "-c gp_role=utility --gp_dbid=$node_standby->{_dbid} --gp_contentid=0 -c maintenance_mode=on", 'start']); Original Postgres commit on REL_13_BETA1: https://github.com/postgres/postgres/commit/dc788668bb269b10a108e87d14fefd1b9301b793 Original Postgres commit message: Before, if a recovery target is configured, but the archive ended before the target was reached, recovery would end and the server would promote without further notice. That was deemed to be pretty wrong. With this change, if the recovery target is not reached, it is a fatal error. Based-on-patch-by: Leif Gunnar Erlandsen Reviewed-by: Kyotaro Horiguchi Discussion: https://www.postgresql.org/message-id/flat/993736dd3f1713ec1f63fc3b653839f5@lako.no --- src/backend/access/transam/xlog.c | 19 ++++++++++++---- src/test/perl/PostgresNode.pm | 11 +++++++--- src/test/recovery/t/003_recovery_targets.pl | 24 ++++++++++++++++++++- 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 50e0cbb0eff7..def5332d9b87 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -6526,7 +6526,7 @@ StartupXLOG(void) XLogCtlInsert *Insert; CheckPoint checkPoint; bool wasShutdown; - bool reachedStopPoint = false; + bool reachedRecoveryTarget = false; bool haveBackupLabel = false; XLogRecPtr RecPtr, checkPointLoc, @@ -7353,7 +7353,7 @@ StartupXLOG(void) */ if (recoveryStopsBefore(record)) { - reachedStopPoint = true; /* see below */ + reachedRecoveryTarget = true; break; } @@ -7529,7 +7529,7 @@ StartupXLOG(void) /* Exit loop if we reached inclusive recovery target */ if (recoveryStopsAfter(record)) { - reachedStopPoint = true; + reachedRecoveryTarget = true; break; } @@ -7541,7 +7541,7 @@ StartupXLOG(void) * end of main redo apply loop */ - if (reachedStopPoint) + if (reachedRecoveryTarget) { if (!reachedConsistency) ereport(FATAL, @@ -7597,7 +7597,18 @@ StartupXLOG(void) /* there are no WAL records following the checkpoint */ ereport(LOG, (errmsg("redo is not required"))); + } + + /* + * This check is intentionally after the above log messages that + * indicate how far recovery went. + */ + if (ArchiveRecoveryRequested && + recoveryTarget != RECOVERY_TARGET_UNSET && + !reachedRecoveryTarget) + ereport(FATAL, + (errmsg("recovery ended before configured recovery target was reached"))); } else { diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm index 7a64a7663267..57f0cf351c24 100644 --- a/src/test/perl/PostgresNode.pm +++ b/src/test/perl/PostgresNode.pm @@ -609,6 +609,9 @@ Restoring WAL segments from archives using restore_command can be enabled by passing the keyword parameter has_restoring => 1. This is disabled by default. +If has_restoring is used, standby mode is used by default. To use +recovery mode instead, pass the keyword parameter standby => 0. + The backup is copied, leaving the original unmodified. pg_hba.conf is unconditionally set to enable replication connections. @@ -625,6 +628,7 @@ sub init_from_backup $params{has_streaming} = 0 unless defined $params{has_streaming}; $params{has_restoring} = 0 unless defined $params{has_restoring}; + $params{standby} = 1 unless defined $params{standby}; print "# Initializing node \"$node_name\" from backup \"$backup_name\" of node \"$root_name\"\n"; @@ -655,7 +659,7 @@ port = $port "unix_socket_directories = '$host'"); } $self->enable_streaming($root_node) if $params{has_streaming}; - $self->enable_restoring($root_node) if $params{has_restoring}; + $self->enable_restoring($root_node, $params{standby}) if $params{has_restoring}; } =pod @@ -849,7 +853,7 @@ standby_mode=on # Internal routine to enable archive recovery command on a standby node sub enable_restoring { - my ($self, $root_node) = @_; + my ($self, $root_node, $standby) = @_; my $path = TestLib::perl2host($root_node->archive_dir); my $name = $self->name; @@ -870,8 +874,9 @@ sub enable_restoring $self->append_conf( 'recovery.conf', qq( restore_command = '$copy_command' -standby_mode = on +standby_mode = $standby )); + return; } # Internal routine to enable archiving diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl index 2e967b6cbe4f..ffb50ecb0c86 100644 --- a/src/test/recovery/t/003_recovery_targets.pl +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -3,7 +3,8 @@ use warnings; use PostgresNode; use TestLib; -use Test::More tests =>6; +use Test::More tests =>7; +use Time::HiRes qw(usleep); # Create and test a standby from given backup, with a certain recovery target. # Choose $until_lsn later than the transaction commit that causes the row @@ -128,3 +129,24 @@ sub test_recovery_standby "recovery_target_time = '$recovery_time'"); test_recovery_standby('multiple conflicting settings', 'standby_6', $node_master, \@recovery_params, "3000", $lsn3); + +# Check behavior when recovery ends before target is reached + +my $node_standby = get_new_node('standby_8'); +$node_standby->init_from_backup($node_master, 'my_backup', + has_restoring => 1, standby => 0); +$node_standby->append_conf('recovery.conf', + "recovery_target_name = 'does_not_exist'"); +run_log(['pg_ctl', '-w', '-D', $node_standby->data_dir, '-l', + $node_standby->logfile, '-o', "-c gp_role=utility --gp_dbid=$node_standby->{_dbid} --gp_contentid=0", + 'start']); + +# wait up to 10 seconds for postgres to terminate +foreach my $i (0..100) +{ + last if ! -f $node_standby->data_dir . '/postmaster.pid'; + usleep(100_000); +} +my $logfile = slurp_file($node_standby->logfile()); +ok($logfile =~ qr/FATAL: recovery ended before configured recovery target was reached/, + 'recovery end before target reached is a fatal error'); From 7289215ace954695daeee5d5fdb25b69fed46038 Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Sat, 25 Apr 2020 18:45:27 -0700 Subject: [PATCH 024/106] Raise a timeout to 180s, in test 003_recovery_targets.pl. Backported from Postgres REL_13_BETA1 postgres/postgres@8961355 Original commit message: Buildfarm member chipmunk has failed twice due to taking >30s, and twenty-four runs of other members have used >5s. The test is new in v13, so no back-patch. --- src/test/recovery/t/003_recovery_targets.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl index ffb50ecb0c86..659781245db8 100644 --- a/src/test/recovery/t/003_recovery_targets.pl +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -141,8 +141,8 @@ sub test_recovery_standby $node_standby->logfile, '-o', "-c gp_role=utility --gp_dbid=$node_standby->{_dbid} --gp_contentid=0", 'start']); -# wait up to 10 seconds for postgres to terminate -foreach my $i (0..100) +# wait up to 180s for postgres to terminate +foreach my $i (0..1800) { last if ! -f $node_standby->data_dir . '/postmaster.pid'; usleep(100_000); From 0fa944d967598d26088914db1a737a6cecc59bd5 Mon Sep 17 00:00:00 2001 From: Wenru Yan <94830465+yanwr1@users.noreply.github.com> Date: Fri, 29 Sep 2023 08:45:26 +0800 Subject: [PATCH 025/106] print log message with write_stderr when reach vmem or resgroup limit. When memory usage have reached Vmem limit or resource group limit, it will loop in gp_malloc and gp_failed_to_alloc if new allocation happens, and then errors out with "ERRORDATA_STACK_SIZE exceeded". We are therefore printing the log message header using write_stderr. (cherry picked from commit a7210a4be2068abfe7c5e2ee135016d5b8c77cd8) --- src/backend/utils/mmgr/memprot.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/backend/utils/mmgr/memprot.c b/src/backend/utils/mmgr/memprot.c index 15e3aa01c67b..1c46b15b3718 100644 --- a/src/backend/utils/mmgr/memprot.c +++ b/src/backend/utils/mmgr/memprot.c @@ -315,7 +315,13 @@ static void gp_failed_to_alloc(MemoryAllocationStatus ec, int en, int sz) } else if (ec == MemoryFailure_VmemExhausted) { - elog(LOG, "Logging memory usage for reaching Vmem limit"); + /* + * The memory usage have reached Vmem limit, it will loop in gp_malloc + * and gp_failed_to_alloc if new allocation happens, and then errors out + * with "ERRORDATA_STACK_SIZE exceeded". We are therefore printing the + * log message header using write_stderr. + */ + write_stderr("Logging memory usage for reaching Vmem limit"); } else if (ec == MemoryFailure_SystemMemoryExhausted) { @@ -330,7 +336,10 @@ static void gp_failed_to_alloc(MemoryAllocationStatus ec, int en, int sz) } else if (ec == MemoryFailure_ResourceGroupMemoryExhausted) { - elog(LOG, "Logging memory usage for reaching resource group limit"); + /* + * The behavior in resgroup group mode is the same as MemoryFailure_VmemExhausted. + */ + write_stderr("Logging memory usage for reaching resource group limit"); } else elog(ERROR, "Unknown memory failure error code"); From 1a027eb5163ce189f2e830317acfe293b3753b78 Mon Sep 17 00:00:00 2001 From: Shaoqi Bai Date: Tue, 3 Oct 2023 12:51:16 -0700 Subject: [PATCH 026/106] Update respsonse size from https://www.bing.com/ When curl with empty header to https://www.bing.com/, the response size used to be more than 10000, but it's not the case anymore, it's 5594 now, so update test to be more than 1000 to match the change Authored-by: Shaoqi Bai --- gpcontrib/gpcloud/test/s3restful_service_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpcontrib/gpcloud/test/s3restful_service_test.cpp b/gpcontrib/gpcloud/test/s3restful_service_test.cpp index f921c43a4359..39932b3323c1 100644 --- a/gpcontrib/gpcloud/test/s3restful_service_test.cpp +++ b/gpcontrib/gpcloud/test/s3restful_service_test.cpp @@ -25,7 +25,7 @@ TEST(S3RESTfulService, GetWithEmptyHeader) { EXPECT_EQ(RESPONSE_OK, resp.getStatus()); EXPECT_EQ("Success", resp.getMessage()); - EXPECT_EQ(true, resp.getRawData().size() > 10000); + EXPECT_EQ(true, resp.getRawData().size() > 1000); } TEST(S3RESTfulService, GetWithoutURL) { From 367edc6b4dfd909fe38fc288ade9e294d74e3f9a Mon Sep 17 00:00:00 2001 From: Nihal Jain Date: Wed, 13 Sep 2023 18:25:54 +0530 Subject: [PATCH 027/106] gpexpand: Fix error when database has tablespaces **Issue:** Currently `gpexpand` errors out whenever it is run using a user-created input file (not created using the `gpexpand` interview process) on a cluster that has `custom tablespaces` created with the following error - ``` $ cat gpexpand_inputfile_20230914_201220 jnihal3MD6M.vmware.com|jnihal3MD6M.vmware.com|7005|/tmp/demoDataDir3|5|3|p $ gpexpand -i gpexpand_inputfile_20230914_201220 20230914:20:13:04:066896 gpexpand:jnihal3MD6M:jnihal-[ERROR]:-gpexpand failed: [Errno 2] No such file or directory: 'gpexpand_inputfile_20230914_201220.ts' ``` **RCA:** This is happening due to the commit 9b70ba8698f656c40ee62e5519314f7db1e4655e. This commit introduced a change, where it requires `gpexpand` to have a separate tablespace input configuration file (`.ts`) whenever there are `custom tablespaces` in the database. However, this file only gets created whenever the user uses the `gpexpand` interview process to create the input file. In cases where the user manually creates the input file, the tablespace file is missing which causes the above error. **Fix:** Add a check in the `read_tablespace_file()` function to assert if the file is present or not. In cases where the file is not present, create the file automatically and exit from the process to give users a chance to review them (if they want to change the `tablespace` location) and prompt them to re-run `gpexpand`. The call to the `read_tablespace_file()` is also moved before we start the expansion process. This is because we want to exit from the process before we start the expansion so that the user does not have to `rollback` when they re-run `gpexpand`. ``` $ gpexpand -i gpexpand_inputfile_20230914_201220 20230914:20:24:00:014186 gpexpand:jnihal3MD6M:jnihal-[WARNING]:-Could not locate tablespace input configuration file 'gpexpand_inputfile_20230914_201220.ts'. A new tablespace input configuration file is written to 'gpexpand_inputfile_20230914_201220.ts'. Please review the file and re-run with: gpexpand -i gpexpand_inputfile_20230914_201220 20230914:20:24:00:014186 gpexpand:jnihal3MD6M:jnihal-[INFO]:-Exiting... $ gpexpand -i gpexpand_inputfile_20230914_201220 --> re-run with the same input file ``` --- gpMgmt/bin/gpexpand | 16 ++++++++++- .../test/behave/mgmt_utils/gpexpand.feature | 27 +++++++++++++++++++ .../behave/mgmt_utils/steps/mgmt_utils.py | 11 ++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/gpMgmt/bin/gpexpand b/gpMgmt/bin/gpexpand index 7e9e5be6dc52..0a1f5466a2e3 100755 --- a/gpMgmt/bin/gpexpand +++ b/gpMgmt/bin/gpexpand @@ -1258,6 +1258,20 @@ class gpexpand: tablespace_inputfile = self.options.filename + ".ts" + """ + Check if the tablespace input file exists or not + In cases where the user manually creates an input file, the file + will not be present. In such cases create the file and exit giving the + user a chance to review it and re-run gpexpand. + """ + if not os.path.exists(tablespace_inputfile): + self.generate_tablespace_inputfile(tablespace_inputfile) + self.logger.warning("Could not locate tablespace input configuration file '{0}'. A new tablespace input configuration file is written " \ + "to '{0}'. Please review the file and re-run with: gpexpand -i {1}".format(tablespace_inputfile, self.options.filename)) + + logger.info("Exiting...") + sys.exit(1) + new_tblspc_info = {} with open(tablespace_inputfile) as f: @@ -2573,10 +2587,10 @@ def main(options, args, parser): _gp_expand.validate_heap_checksums() newSegList = _gp_expand.read_input_files() _gp_expand.addNewSegments(newSegList) + newTableSpaceInfo = _gp_expand.read_tablespace_file() _gp_expand.sync_packages() _gp_expand.start_prepare() _gp_expand.lock_catalog() - newTableSpaceInfo = _gp_expand.read_tablespace_file() _gp_expand.add_segments(newTableSpaceInfo) _gp_expand.update_original_segments() _gp_expand.cleanup_new_segments() diff --git a/gpMgmt/test/behave/mgmt_utils/gpexpand.feature b/gpMgmt/test/behave/mgmt_utils/gpexpand.feature index 246d70d4a675..60515272ed80 100644 --- a/gpMgmt/test/behave/mgmt_utils/gpexpand.feature +++ b/gpMgmt/test/behave/mgmt_utils/gpexpand.feature @@ -229,6 +229,33 @@ Feature: expand the cluster by adding more segments When the user runs gpexpand to redistribute Then the tablespace is valid after gpexpand + @gpexpand_no_mirrors + Scenario: expand a cluster with tablespace when there is no tablespace configuration file + Given the database is not running + And a working directory of the test as '/data/gpdata/gpexpand' + And the user runs command "rm -rf /data/gpdata/gpexpand/*" + And a temporary directory under "/data/gpdata/gpexpand/expandedData" to expand into + And a cluster is created with no mirrors on "cdw" and "sdw1" + And database "gptest" exists + And a tablespace is created with data + And another tablespace is created with data + And there are no gpexpand_inputfiles + And the cluster is setup for an expansion on hosts "cdw" + And the user runs gpexpand interview to add 1 new segment and 0 new host "ignore.host" + And the number of segments have been saved + And there are no gpexpand tablespace input configuration files + When the user runs gpexpand with the latest gpexpand_inputfile without ret code check + Then gpexpand should return a return code of 1 + And gpexpand should print "[WARNING]:-Could not locate tablespace input configuration file" escaped to stdout + And gpexpand should print "A new tablespace input configuration file is written to" escaped to stdout + And gpexpand should print "Please review the file and re-run with: gpexpand -i" escaped to stdout + And verify if a gpexpand tablespace input configuration file is created + When the user runs gpexpand with the latest gpexpand_inputfile with additional parameters "--silent" + And verify that the cluster has 1 new segments + And all the segments are running + When the user runs gpexpand to redistribute + Then the tablespace is valid after gpexpand + @gpexpand_verify_redistribution Scenario: Verify data is correctly redistributed after expansion Given the database is not running diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index bf4cd5cd8bd1..799da36f65b4 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -3050,6 +3050,17 @@ def impl(context, num_of_segments, num_of_hosts, hostnames): def impl(context): map(os.remove, glob.glob("gpexpand_inputfile*")) +@given('there are no gpexpand tablespace input configuration files') +def impl(context): + list(map(os.remove, glob.glob("{}/*.ts".format(context.working_directory)))) + if len(glob.glob('{}/*.ts'.format(context.working_directory))) != 0: + raise Exception("expected no gpexpand tablespace input configuration files") + +@then('verify if a gpexpand tablespace input configuration file is created') +def impl(context): + if len(glob.glob('{}/*.ts'.format(context.working_directory))) != 1: + raise Exception("expected gpexpand tablespace input configuration file to be created") + @when('the user runs gpexpand with the latest gpexpand_inputfile with additional parameters {additional_params}') def impl(context, additional_params=''): gpexpand = Gpexpand(context, working_directory=context.working_directory) From e7a6fad7594a4211e04e1c45760ff47a494ac58b Mon Sep 17 00:00:00 2001 From: Lisa Owen Date: Wed, 4 Oct 2023 10:48:34 -0600 Subject: [PATCH 028/106] docs - correct greenplum_fdw resgroup create command (#16500) --- gpdb-doc/markdown/ref_guide/modules/greenplum_fdw.html.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpdb-doc/markdown/ref_guide/modules/greenplum_fdw.html.md b/gpdb-doc/markdown/ref_guide/modules/greenplum_fdw.html.md index 0da8f73d906e..5ff18f371993 100644 --- a/gpdb-doc/markdown/ref_guide/modules/greenplum_fdw.html.md +++ b/gpdb-doc/markdown/ref_guide/modules/greenplum_fdw.html.md @@ -179,7 +179,7 @@ Remote cluster (1) configuration: 1. Create a dedicated resource group to manage resources for these users: ``` - CREATE RESOURCE GROUP rg_gpcluster2_users with (concurrency=2, cpu_max_percent=20); + CREATE RESOURCE GROUP rg_gpcluster2_users with (concurrency=2, cpu_rate_limit=20, memory_limit=10); ALTER ROLE gpcluster2_users RESOURCE GROUP rg_gpcluster2_users; ``` From c0c3294ae2aa721b200732babca5b93e16aa62d6 Mon Sep 17 00:00:00 2001 From: Lisa Owen Date: Wed, 4 Oct 2023 10:56:35 -0600 Subject: [PATCH 029/106] docs - update query on pg_stat_all_indexes ref page to remove dup indexes (#16535) --- .../markdown/ref_guide/system_catalogs/pg_stat_indexes.html.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpdb-doc/markdown/ref_guide/system_catalogs/pg_stat_indexes.html.md b/gpdb-doc/markdown/ref_guide/system_catalogs/pg_stat_indexes.html.md index 742cdfee8f12..99ba30da5b6d 100644 --- a/gpdb-doc/markdown/ref_guide/system_catalogs/pg_stat_indexes.html.md +++ b/gpdb-doc/markdown/ref_guide/system_catalogs/pg_stat_indexes.html.md @@ -56,7 +56,7 @@ FROM SELECT * FROM pg_stat_all_indexes WHERE relid < 16384) m, pg_stat_all_indexes s -WHERE m.relid = s.relid; +WHERE m.relid = s.relid AND m.indexrelid = s.indexrelid; CREATE VIEW pg_stat_sys_indexes_gpdb6 AS From 3f89e626ed5401f767d7f7946688e7a4f169d102 Mon Sep 17 00:00:00 2001 From: Lisa Owen Date: Wed, 4 Oct 2023 15:27:28 -0600 Subject: [PATCH 030/106] docs - add filepath column to gp_check_orphaned_files (#16542) * docs - add filepath column to gp_check_orphaned_files * add a caution --- .../markdown/ref_guide/gp_toolkit.html.md | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/gpdb-doc/markdown/ref_guide/gp_toolkit.html.md b/gpdb-doc/markdown/ref_guide/gp_toolkit.html.md index 61104782fa91..970a691f62a3 100644 --- a/gpdb-doc/markdown/ref_guide/gp_toolkit.html.md +++ b/gpdb-doc/markdown/ref_guide/gp_toolkit.html.md @@ -874,6 +874,58 @@ This external table runs the `df` \(disk free\) command on the active segment ho |dfdevice|The device name| |dfspace|Free disk space in the segment file system in kilobytes| +## Checking for Missing and Orphaned Data Files + +Greenplum Database considers a relation data file that is present in the catalog, but not on disk, to be missing. Conversely, when Greenplum encounters an unexpected data file on disk that is not referenced in any relation, it considers that file to be orphaned. + +Greenplum Database provides the following views to help identify if missing or orphaned files exist in the current database: + +- [gp_check_orphaned_files](#mf_orphaned) +- [gp_check_missing_files](#mf_missing) +- [gp_check_missing_files_ext](#mf_missing_ext) + +Consider it a best practice to check for these conditions prior to expanding the cluster or before offline maintenance. + +By default, the views identified in this section are available to `PUBLIC`. + +### gp_check_orphaned_files + +The `gp_check_orphaned_files` view scans the default and user-defined tablespaces for orphaned data files. Greenplum Database considers normal data files, files with an underscore (`_`) in the name, and extended numbered files (files that contain a `.` in the name) in this check. `gp_check_orphaned_files` gathers results from the Greenplum Database MASTER and all segments. + +|Column|Description| +|------|-----------| +| gp_segment_id | The Greenplum Database segment identifier. | +| tablespace | The identifier of the tablespace in which the orphaned file resides. | +| filename | The file name of the orphaned data file. | +| filepath | The file system path of the orphaned data file, relative to `$MASTER_DATA_DIRECTORY`. | + +> **Caution** Use this view as one of many data points to identify orphaned data files. Do not delete files based solely on results from querying this view. + + +### gp_check_missing_files + +The `gp_check_missing_files` view scans heap and append-optimized, column-oriented tables for missing data files. Greenplum considers only normal data files (files that do not contain a `.` or an `_` in the name) in this check. `gp_check_missing_files` gathers results from the Greenplum Database master and all segments. + +|Column|Description| +|------|-----------| +| gp_segment_id | The Greenplum Database segment identifier. | +| tablespace | The identifier of the tablespace in which the table resides. | +| relname | The name of the table that has a missing data file(s). | +| filename | The file name of the missing data file. | + + +### gp_check_missing_files_ext + +The `gp_check_missing_files_ext` view scans only append-optimized, column-oriented tables for missing extended data files. Greenplum Database considers both normal data files and extended numbered files (files that contain a `.` in the name) in this check. Files that contain an `_` in the name are not considered. `gp_check_missing_files_ext` gathers results from the Greenplum Database segments only. + +|Column|Description| +|------|-----------| +| gp_segment_id | The Greenplum Database segment identifier. | +| tablespace | The identifier of the tablespace in which the table resides. | +| relname | The name of the table that has a missing extended data file(s). | +| filename | The file name of the missing extended data file. | + + ## Checking for Uneven Data Distribution All tables in Greenplum Database are distributed, meaning their data is divided across all of the segments in the system. If the data is not distributed evenly, then query processing performance may decrease. The following views can help diagnose if a table has uneven data distribution: From 35d6e73600e6e8958039c89a307c585b17c042cc Mon Sep 17 00:00:00 2001 From: Lisa Owen Date: Wed, 4 Oct 2023 15:32:37 -0600 Subject: [PATCH 031/106] docs - add gp_check_functions module docs (#16539) --- .../install_guide/install_modules.html.md | 3 +- .../modules/gp_check_functions.html.md | 85 +++++++++++++++++++ .../markdown/ref_guide/modules/intro.html.md | 1 + gpdb-doc/markdown/ref_guide/toc.md | 1 + 4 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 gpdb-doc/markdown/ref_guide/modules/gp_check_functions.html.md diff --git a/gpdb-doc/markdown/install_guide/install_modules.html.md b/gpdb-doc/markdown/install_guide/install_modules.html.md index 9a51da51f804..4f20b7702f6d 100644 --- a/gpdb-doc/markdown/install_guide/install_modules.html.md +++ b/gpdb-doc/markdown/install_guide/install_modules.html.md @@ -29,15 +29,16 @@ You can register the following modules in this manner:
  • diskquota
  • fuzzystrmatch
  • gp_array_agg
  • +
  • gp_check_functions
  • gp_parallel_retrieve_cursor
  • gp_percentile_agg
  • gp_sparse_vector
  • greenplum_fdw
  • +
  • hstore
    • -
    • hstore
    • ip4r
    • ltree
    • orafce (VMware Greenplum only)
    • diff --git a/gpdb-doc/markdown/ref_guide/modules/gp_check_functions.html.md b/gpdb-doc/markdown/ref_guide/modules/gp_check_functions.html.md new file mode 100644 index 000000000000..949a2886dc30 --- /dev/null +++ b/gpdb-doc/markdown/ref_guide/modules/gp_check_functions.html.md @@ -0,0 +1,85 @@ +# gp_check_functions + +The `gp_check_functions` module implements views that identify missing and orphaned relation files. + +The `gp_check_functions` module is a Greenplum Database extension. + +## Installing and Registering the Module + +The `gp_check_functions` module is installed when you install Greenplum Database. Before you can use the views defined in the module, you must register the `gp_check_functions` extension in each database in which you want to use the views: +o + +``` +CREATE EXTENSION gp_check_functions; +``` + +Refer to [Installing Additional Supplied Modules](../../install_guide/install_modules.html) for more information. + + +## Checking for Missing and Orphaned Data Files + +Greenplum Database considers a relation data file that is present in the catalog, but not on disk, to be missing. Conversely, when Greenplum encounters an unexpected data file on disk that is not referenced in any relation, it considers that file to be orphaned. + +Greenplum Database provides the following views to help identify if missing or orphaned files exist in the current database: + +- [gp_check_orphaned_files](#orphaned) +- [gp_check_missing_files](#missing) +- [gp_check_missing_files_ext](#missing_ext) + +Consider it a best practice to check for these conditions prior to expanding the cluster or before offline maintenance. + +By default, the views in this module are available to `PUBLIC`. + +### gp_check_orphaned_files + +The `gp_check_orphaned_files` view scans the default and user-defined tablespaces for orphaned data files. Greenplum Database considers normal data files, files with an underscore (`_`) in the name, and extended numbered files (files that contain a `.` in the name) in this check. `gp_check_orphaned_files` gathers results from the Greenplum Database master and all segments. + +|Column|Description| +|------|-----------| +| gp_segment_id | The Greenplum Database segment identifier. | +| tablespace | The identifier of the tablespace in which the orphaned file resides. | +| filename | The file name of the orphaned data file. | +| filepath | The file system path of the orphaned data file, relative to `$MASTER_DATA_DIRECTORY`. | + +> **Caution** Use this view as one of many data points to identify orphaned data files. Do not delete files based solely on results from querying this view. + + +### gp_check_missing_files + +The `gp_check_missing_files` view scans heap and append-optimized, column-oriented tables for missing data files. Greenplum considers only normal data files (files that do not contain a `.` or an `_` in the name) in this check. `gp_check_missing_files` gathers results from the Greenplum Database master and all segments. + +|Column|Description| +|------|-----------| +| gp_segment_id | The Greenplum Database segment identifier. | +| tablespace | The identifier of the tablespace in which the table resides. | +| relname | The name of the table that has a missing data file(s). | +| filename | The file name of the missing data file. | + + +### gp_check_missing_files_ext + +The `gp_check_missing_files_ext` view scans only append-optimized, column-oriented tables for missing extended data files. Greenplum Database considers both normal data files and extended numbered files (files that contain a `.` in the name) in this check. Files that contain an `_` in the name, and `.fsm`, `.vm`, and other supporting files, are not considered. `gp_check_missing_files_ext` gathers results from the Greenplum Database segments only. + +|Column|Description| +|------|-----------| +| gp_segment_id | The Greenplum Database segment identifier. | +| tablespace | The identifier of the tablespace in which the table resides. | +| relname | The name of the table that has a missing extended data file(s). | +| filename | The file name of the missing extended data file. | + + +## Examples + +Check for missing and orphaned non-extended files: + +``` sql +SELECT * FROM gp_check_missing_files; +SELECT * FROM gp_check_orphaned_files; +``` + +Check for missing extended data files for append-optimized, column-oriented tables: + +``` sql +SELECT * FROM gp_check_missing_files_ext; +``` + diff --git a/gpdb-doc/markdown/ref_guide/modules/intro.html.md b/gpdb-doc/markdown/ref_guide/modules/intro.html.md index a1893a14d54c..f18978b82e8e 100644 --- a/gpdb-doc/markdown/ref_guide/modules/intro.html.md +++ b/gpdb-doc/markdown/ref_guide/modules/intro.html.md @@ -16,6 +16,7 @@ The following Greenplum Database and PostgreSQL `contrib` modules are installed; - [diskquota](diskquota.html) - Allows administrators to set disk usage quotas for Greenplum Database roles and schemas. - [fuzzystrmatch](fuzzystrmatch.html) - Determines similarities and differences between strings. - [gp\_array\_agg](gp_array_agg.html) - Implements a parallel `array_agg()` aggregate function for Greenplum Database. +- [gp\_check\_functions](gp_check_functions.html) - Provides views to check for orphaned and missing relation files. - [gp\_legacy\_string\_agg](gp_legacy_string_agg.html) - Implements a legacy, single-argument `string_agg()` aggregate function that was present in Greenplum Database 5. - [gp\_parallel\_retrieve\_cursor](gp_parallel_retrieve_cursor.html) - Provides extended cursor functionality to retrieve data, in parallel, directly from Greenplum Database segments. - [gp\_percentile\_agg](gp_percentile_agg.html) - Improves GPORCA performance for ordered-set aggregate functions. diff --git a/gpdb-doc/markdown/ref_guide/toc.md b/gpdb-doc/markdown/ref_guide/toc.md index 6f466d059533..41240f545026 100644 --- a/gpdb-doc/markdown/ref_guide/toc.md +++ b/gpdb-doc/markdown/ref_guide/toc.md @@ -177,6 +177,7 @@ Doc Index - [diskquota](./modules/diskquota.md) - [fuzzystrmatch](./modules/fuzzystrmatch.md) - [gp\_array\_agg](./modules/gp_array_agg.md) + - [gp\_check\_functions](./modules/gp_check_functions.md) - [gp\_legacy\_string\_agg](./modules/gp_legacy_string_agg.md) - [gp\_parallel\_retrieve\_cursor (Beta)](./modules/gp_parallel_retrieve_cursor.md) - [gp\_percentile\_agg](./modules/gp_percentile_agg.md) From e58a08f3aba64ef005624ff9d0a0963f8b6c056e Mon Sep 17 00:00:00 2001 From: Annpurna Shahani Date: Tue, 3 Oct 2023 17:11:04 +0530 Subject: [PATCH 032/106] [6X] Abort rebalance if replay lag is big on mirror Issue: If there is a big gap between flush_lsn and replay_lsn on mirror then trigerring rebalance will cause a panic. Fix: Added a check to WAL Replay Location of mirror. gprecoverseg rebalance should throw an exception if replay lag (flush_lsn - replay_lsn) on mirror is more than the default allowed limit (10 GB). ALLOWED_REPLAY_LAG (default value 10 GB) can be configured using flag --replay-lag with gprecoverseg -r. The check for the replay lag can also be disabled completely using the new flag --disable-replay-lag. New flags: --disbale-replay-lag: will turn off the replay_lag check during rebalance Usage: gprecoverseg -r --disable-replay-lag --replay-lag: will set the provided value as the new allowed replay lag on mirror Usage: gprecoverseg -r --replay-lag 2 Backport of https://github.com/greenplum-db/gpdb/pull/16220 --- gpMgmt/bin/gppylib/commands/gp.py | 5 ++ .../gppylib/operations/rebalanceSegments.py | 46 ++++++++++++++-- .../bin/gppylib/programs/clsRecoverSegment.py | 10 +++- .../test/unit/test_unit_gprecoverseg.py | 2 + .../test/unit/test_unit_rebalance_segment.py | 54 ++++++++++++++++++- gpMgmt/doc/gprecoverseg_help | 12 ++++- .../behave/mgmt_utils/gprecoverseg.feature | 41 ++++++++++++++ .../behave/mgmt_utils/steps/mgmt_utils.py | 14 +++++ .../utility_guide/ref/gprecoverseg.html.md | 8 ++- 9 files changed, 183 insertions(+), 9 deletions(-) diff --git a/gpMgmt/bin/gppylib/commands/gp.py b/gpMgmt/bin/gppylib/commands/gp.py index 1b26ac04905d..43235c1e6773 100644 --- a/gpMgmt/bin/gppylib/commands/gp.py +++ b/gpMgmt/bin/gppylib/commands/gp.py @@ -52,6 +52,11 @@ #max batch size of thread pool on master MAX_MASTER_NUM_WORKERS=64 +# Maximum replay lag (in GBs) allowed on mirror when rebalancing the segments +# The default value for ALLOWED_REPLAY_LAG has been decided to be 10 GBs as mirror +# took 5 mins to replay 10 GB lag on a local demo cluster. +ALLOWED_REPLAY_LAG = 10 + # Application name used by the pg_rewind instance that gprecoverseg starts # during incremental recovery. gpstate uses this to figure out when incremental # recovery is active. diff --git a/gpMgmt/bin/gppylib/operations/rebalanceSegments.py b/gpMgmt/bin/gppylib/operations/rebalanceSegments.py index 5f2470a477c5..296997e234e7 100644 --- a/gpMgmt/bin/gppylib/operations/rebalanceSegments.py +++ b/gpMgmt/bin/gppylib/operations/rebalanceSegments.py @@ -1,5 +1,6 @@ import sys import signal +from contextlib import closing from gppylib.gparray import GpArray from gppylib.db import dbconn from gppylib.commands.gp import GpSegStopCmd @@ -8,7 +9,32 @@ from gppylib.operations.segment_reconfigurer import SegmentReconfigurer -MIRROR_PROMOTION_TIMEOUT=600 +MIRROR_PROMOTION_TIMEOUT = 600 + +logger = gplog.get_default_logger() + + +def replay_lag(primary_db): + """ + This function returns replay lag (diff of flush_lsn and replay_lsn) on mirror segment. Goal being if there is a + lot to catchup on mirror the user should be warned about that and rebalance opertion should be aborted. + params: primary segment info + return value: replay lag in bytes + replay lag in bytes: diff of flush_lsn and replay_lsn on mirror + """ + port = primary_db.getSegmentPort() + host = primary_db.getSegmentHostName() + logger.debug('Get replay lag on mirror of primary segment with host:{}, port:{}'.format(host, port)) + sql = "select pg_xlog_location_diff(flush_location, replay_location) from pg_stat_replication;" + + try: + dburl = dbconn.DbURL(hostname=host, port=port) + with closing(dbconn.connect(dburl, utility=True, encoding='UTF8')) as conn: + replay_lag = dbconn.execSQLForSingleton(conn, sql) + except Exception as ex: + raise Exception("Failed to query pg_stat_replication for host:{}, port:{}, error: {}". + format(host, port, str(ex))) + return replay_lag class ReconfigDetectionSQLQueryCommand(base.SQLCommand): @@ -26,11 +52,13 @@ def run(self): class GpSegmentRebalanceOperation: - def __init__(self, gpEnv, gpArray, batch_size, segment_batch_size): + def __init__(self, gpEnv, gpArray, batch_size, segment_batch_size, disable_replay_lag, replay_lag): self.gpEnv = gpEnv self.gpArray = gpArray self.batch_size = batch_size self.segment_batch_size = segment_batch_size + self.disable_replay_lag = disable_replay_lag + self.replay_lag = replay_lag self.logger = gplog.get_default_logger() def rebalance(self): @@ -45,10 +73,20 @@ def rebalance(self): continue if segmentPair.up() and segmentPair.reachable() and segmentPair.synchronized(): + if not self.disable_replay_lag: + self.logger.info("Allowed replay lag during rebalance is {} GB".format(self.replay_lag)) + replay_lag_in_bytes = replay_lag(segmentPair.primaryDB) + if float(replay_lag_in_bytes) >= (self.replay_lag * 1024 * 1024 * 1024): + raise Exception("{} bytes of xlog is still to be replayed on mirror with dbid {}, let " + "mirror catchup on replay then trigger rebalance. Use --replay-lag to " + "configure the allowed replay lag limit or --disable-replay-lag to disable" + " the check completely if you wish to continue with rebalance anyway" + .format(replay_lag_in_bytes, segmentPair.primaryDB.getSegmentDbId())) unbalanced_primary_segs.append(segmentPair.primaryDB) else: self.logger.warning( - "Not rebalancing primary segment dbid %d with its mirror dbid %d because one is either down, unreachable, or not synchronized" \ + "Not rebalancing primary segment dbid %d with its mirror dbid %d because one is either down, " + "unreachable, or not synchronized" \ % (segmentPair.primaryDB.dbid, segmentPair.mirrorDB.dbid)) if not len(unbalanced_primary_segs): @@ -76,7 +114,7 @@ def rebalance(self): pool.addCommand(cmd) base.join_and_indicate_progress(pool) - + failed_count = 0 completed = pool.getCompletedItems() for res in completed: diff --git a/gpMgmt/bin/gppylib/programs/clsRecoverSegment.py b/gpMgmt/bin/gppylib/programs/clsRecoverSegment.py index 0ceef210a637..5e737bbc2132 100644 --- a/gpMgmt/bin/gppylib/programs/clsRecoverSegment.py +++ b/gpMgmt/bin/gppylib/programs/clsRecoverSegment.py @@ -98,7 +98,7 @@ def outputToFile(self, mirrorBuilder, gpArray, fileName): def getRecoveryActionsBasedOnOptions(self, gpEnv, gpArray): if self.__options.rebalanceSegments: - return GpSegmentRebalanceOperation(gpEnv, gpArray, self.__options.parallelDegree, self.__options.parallelPerHost) + return GpSegmentRebalanceOperation(gpEnv, gpArray, self.__options.parallelDegree, self.__options.parallelPerHost, self.__options.disableReplayLag, self.__options.replayLag) else: instance = RecoveryTripletsFactory.instance(gpArray, self.__options.recoveryConfigFile, self.__options.newRecoverHosts, self.__options.parallelDegree) segs = [GpMirrorToBuild(t.failed, t.live, t.failover, self.__options.forceFullResynchronization, self.__options.differentialResynchronization) @@ -253,6 +253,9 @@ def run(self): if self.__options.differentialResynchronization and self.__options.outputSampleConfigFile: raise ProgramArgumentValidationException("Invalid -o provided with --differential argument") + if self.__options.disableReplayLag and not self.__options.rebalanceSegments: + raise ProgramArgumentValidationException("--disable-replay-lag should be used only with -r") + faultProberInterface.getFaultProber().initializeProber(gpEnv.getMasterPort()) confProvider = configInterface.getConfigurationProvider().initializeProvider(gpEnv.getMasterPort()) @@ -461,6 +464,11 @@ def createParser(): addTo.add_option("-r", None, default=False, action='store_true', dest='rebalanceSegments', help='Rebalance synchronized segments.') + addTo.add_option("--replay-lag", None, type="float", default=gp.ALLOWED_REPLAY_LAG, + dest="replayLag", + metavar="", help='Allowed replay lag on mirror, lag should be provided in GBs') + addTo.add_option("--disable-replay-lag", None, default=False, action='store_true', + dest='disableReplayLag', help='Disable replay lag check when rebalancing segments') addTo.add_option('', '--hba-hostnames', action='store_true', dest='hba_hostnames', help='use hostnames instead of CIDR in pg_hba.conf') diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_gprecoverseg.py b/gpMgmt/bin/gppylib/test/unit/test_unit_gprecoverseg.py index 17130b791b1a..b3d08c16d839 100644 --- a/gpMgmt/bin/gppylib/test/unit/test_unit_gprecoverseg.py +++ b/gpMgmt/bin/gppylib/test/unit/test_unit_gprecoverseg.py @@ -24,6 +24,8 @@ def __init__(self): self.recoveryConfigFile = None self.outputSpareDataDirectoryFile = None self.rebalanceSegments = None + self.disableReplayLag = None + self.replayLag = None self.outputSampleConfigFile = None self.parallelDegree = 1 diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_rebalance_segment.py b/gpMgmt/bin/gppylib/test/unit/test_unit_rebalance_segment.py index 39dafa75d203..c789de0b4949 100644 --- a/gpMgmt/bin/gppylib/test/unit/test_unit_rebalance_segment.py +++ b/gpMgmt/bin/gppylib/test/unit/test_unit_rebalance_segment.py @@ -4,6 +4,7 @@ from gppylib.gparray import GpArray, Segment from gppylib.commands.base import CommandResult from gppylib.operations.rebalanceSegments import GpSegmentRebalanceOperation +from gppylib.operations.rebalanceSegments import replay_lag class RebalanceSegmentsTestCase(GpTestCase): @@ -11,10 +12,15 @@ def setUp(self): self.pool = Mock() self.pool.getCompletedItems.return_value = [] + mock_logger = Mock(spec=['log', 'warn', 'info', 'debug', 'error', 'warning', 'fatal']) + self.apply_patches([ patch("gppylib.commands.base.WorkerPool.__init__", return_value=None), patch("gppylib.commands.base.WorkerPool", return_value=self.pool), patch('gppylib.programs.clsRecoverSegment.GpRecoverSegmentProgram'), + patch('gppylib.operations.rebalanceSegments.logger', return_value=mock_logger), + patch('gppylib.db.dbconn.connect', autospec=True), + patch('gppylib.db.dbconn.execSQLForSingleton', return_value='5678') ]) self.mock_gp_recover_segment_prog_class = self.get_mock_from_apply_patch('GpRecoverSegmentProgram') @@ -32,8 +38,11 @@ def setUp(self): self.success_command_mock.get_results.return_value = CommandResult( 0, "stdout success text", "stderr text", True, False) - self.subject = GpSegmentRebalanceOperation(Mock(), self._create_gparray_with_2_primary_2_mirrors(), 1, 1) - self.subject.logger = Mock() + self.subject = GpSegmentRebalanceOperation(Mock(), self._create_gparray_with_2_primary_2_mirrors(), 1, 1, False, + 10) + self.subject.logger = Mock(spec=['log', 'warn', 'info', 'debug', 'error', 'warning', 'fatal']) + + self.mock_logger = self.get_mock_from_apply_patch('logger') def tearDown(self): super(RebalanceSegmentsTestCase, self).tearDown() @@ -58,6 +67,47 @@ def test_rebalance_returns_failure(self): result = self.subject.rebalance() self.assertFalse(result) + @patch('gppylib.db.dbconn.execSQLForSingleton', return_value='56780000000') + def test_rebalance_returns_warning(self, mock1): + with self.assertRaises(Exception) as ex: + self.subject.rebalance() + self.assertEqual('56780000000 bytes of xlog is still to be replayed on mirror with dbid 2, let mirror catchup ' + 'on replay then trigger rebalance. Use --replay-lag to configure the allowed replay lag limit ' + 'or --disable-replay-lag to disable the check completely if you wish to continue with ' + 'rebalance anyway', str(ex.exception)) + self.assertEqual([call("Get replay lag on mirror of primary segment with host:sdw1, port:40000")], + self.mock_logger.debug.call_args_list) + self.assertEqual([call("Determining primary and mirror segment pairs to rebalance"), + call('Allowed replay lag during rebalance is 10 GB')], + self.subject.logger.info.call_args_list) + + @patch('gppylib.db.dbconn.execSQLForSingleton', return_value='5678000000') + def test_rebalance_does_not_return_warning(self, mock1): + self.subject.rebalance() + self.assertEqual([call("Get replay lag on mirror of primary segment with host:sdw1, port:40000")], + self.mock_logger.debug.call_args_list) + + @patch('gppylib.db.dbconn.execSQLForSingleton', return_value='56780000000') + def test_rebalance_replay_lag_is_disabled(self, mock1): + self.subject.disable_replay_lag = True + self.subject.rebalance() + self.assertNotIn([call("Get replay lag on mirror of primary segment with host:sdw1, port:40000")], + self.mock_logger.debug.call_args_list) + self.assertIn([call("Determining primary and mirror segment pairs to rebalance")], + self.subject.logger.info.call_args_list) + + @patch('gppylib.db.dbconn.connect', side_effect=Exception()) + def test_replay_lag_connect_exception(self, mock1): + with self.assertRaises(Exception) as ex: + replay_lag(self.primary0) + self.assertEqual('Failed to query pg_stat_replication for host:sdw1, port:40000, error: ', str(ex.exception)) + + @patch('gppylib.db.dbconn.execSQLForSingleton', side_effect=Exception()) + def test_replay_lag_query_exception(self, mock1): + with self.assertRaises(Exception) as ex: + replay_lag(self.primary0) + self.assertEqual('Failed to query pg_stat_replication for host:sdw1, port:40000, error: ', str(ex.exception)) + def _create_gparray_with_2_primary_2_mirrors(self): master = Segment.initFromString( "1|-1|p|p|s|u|mdw|mdw|5432|/data/master") diff --git a/gpMgmt/doc/gprecoverseg_help b/gpMgmt/doc/gprecoverseg_help index 2e9fdf9ef5f4..43fb43b5206e 100755 --- a/gpMgmt/doc/gprecoverseg_help +++ b/gpMgmt/doc/gprecoverseg_help @@ -14,7 +14,7 @@ gprecoverseg [-p [,...]] [-F] [-a] [-q] [-s] [--no-progress] [-l ] -gprecoverseg -r +gprecoverseg -r [--replay-lag ] [--disable-replay-lag] gprecoverseg -o @@ -243,6 +243,16 @@ their preferred roles. All segments must be valid and synchronized before running gprecoverseg -r. If there are any in progress queries, they will be cancelled and rolled back. +--replay-lag +Replay lag(in GBs) allowed on mirror when rebalancing the segments. Default is 10 GB. If +the replay_lag (flush_lsn-replay_lsn) is more than the value provided with this option +then rebalance will be aborted. + + +--disable-replay-lag +Disable replay lag check when rebalancing segments + + -s Show pg_rewind/pg_basebackup progress sequentially instead of inplace. Useful diff --git a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature index fca67a2ac6b4..0eb667244c8e 100644 --- a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature +++ b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature @@ -1904,6 +1904,47 @@ Feature: gprecoverseg tests Then gprecoverseg should return a return code of 0 And the cluster is rebalanced + @demo_cluster + @concourse_cluster + Scenario: gprecoverseg rebalance aborts and throws exception if replay lag on mirror is more than or equal to the allowed limit + Given the database is running + And all the segments are running + And the segments are synchronized + And all files in gpAdminLogs directory are deleted on all hosts in the cluster + And user immediately stops all primary processes for content 0 + And user can start transactions + When the user runs "gprecoverseg -av" + Then gprecoverseg should return a return code of 0 + When the user runs "gprecoverseg -ar --replay-lag 0" + Then gprecoverseg should return a return code of 2 + And gprecoverseg should print ".* bytes of xlog is still to be replayed on mirror with dbid.*, let mirror catchup on replay then trigger rebalance" regex to logfile + When the user runs "gprecoverseg -ar --disable-replay-lag" + Then gprecoverseg should return a return code of 0 + And all the segments are running + And user can start transactions + + @demo_cluster + @concourse_cluster + Scenario: gprecoverseg errors out if invalid options are used with --disable-replay-lag + Given the database is running + And all the segments are running + And the segments are synchronized + And all files in gpAdminLogs directory are deleted on all hosts in the cluster + And user immediately stops all primary processes for content 0,1,2 + And user can start transactions + When the user runs "gprecoverseg -av" + Then gprecoverseg should return a return code of 0 + And verify that mirror on content 0,1,2 is up + When the user runs "gprecoverseg -aF --disable-replay-lag" + Then gprecoverseg should return a return code of 2 + And gprecoverseg should print "--disable-replay-lag should be used only with -r" to stdout + When the user runs "gprecoverseg -ar" + Then gprecoverseg should return a return code of 0 + And gprecoverseg should print "Allowed replay lag during rebalance is 10 GB" to stdout + And all the segments are running + And user can start transactions + + @remove_rsync_bash @concourse_cluster Scenario: None of the accumulated wal (after running pg_start_backup and before copying the pg_control file) is lost during differential diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index 799da36f65b4..82e9d30fbf54 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -2883,6 +2883,20 @@ def impl(context, command, target): if target not in contents: raise Exception("cannot find %s in %s" % (target, filename)) + +@then('{command} should print "{target}" regex to logfile') +def impl(context, command, target): + log_dir = _get_gpAdminLogs_directory() + filename = glob.glob('%s/%s_*.log' % (log_dir, command))[0] + contents = '' + with open(filename) as fr: + for line in fr: + contents += line + + pat = re.compile(target) + if not pat.search(contents): + raise Exception("cannot find %s in %s" % (target, filename)) + @given('verify that a role "{role_name}" exists in database "{dbname}"') @then('verify that a role "{role_name}" exists in database "{dbname}"') def impl(context, role_name, dbname): diff --git a/gpdb-doc/markdown/utility_guide/ref/gprecoverseg.html.md b/gpdb-doc/markdown/utility_guide/ref/gprecoverseg.html.md index 067321228c7c..414e8bcdb4c3 100644 --- a/gpdb-doc/markdown/utility_guide/ref/gprecoverseg.html.md +++ b/gpdb-doc/markdown/utility_guide/ref/gprecoverseg.html.md @@ -10,7 +10,7 @@ gprecoverseg [[-p [,...]] | -i ] [-d ] [--no-progress] [-l ] -gprecoverseg -r +gprecoverseg -r [--replay-lag ] [--disable-replay-lag] gprecoverseg -o               [-p [,...]] @@ -168,6 +168,12 @@ The recovery process marks the segment as up again in the Greenplum Database sys -r \(rebalance segments\) : After a segment recovery, segment instances may not be returned to the preferred role that they were given at system initialization time. This can leave the system in a potentially unbalanced state, as some segment hosts may have more active segments than is optimal for top system performance. This option rebalances primary and mirror segments by returning them to their preferred roles. All segments must be valid and resynchronized before running `gprecoverseg -r`. If there are any in progress queries, they will be cancelled and rolled back. +--replay-lag +: Replay lag(in GBs) allowed on mirror when rebalancing the segments. Default is 10 GB. If the replay_lag (flush_lsn-replay_lsn) is more than the value provided with this option then rebalance will be aborted. + +--disable-replay-lag +: Disable replay lag check when rebalancing segments + -s \(sequential progress\) : Show `pg_basebackup` or `pg_rewind` progress sequentially instead of in-place. Useful when writing to a file, or if a tty does not support escape sequences. The default is to show progress in-place. From 4c45cdc41b58bac14079a781e963e86af64e35d7 Mon Sep 17 00:00:00 2001 From: Lisa Owen Date: Thu, 5 Oct 2023 10:32:33 -0600 Subject: [PATCH 033/106] docs - gp6 supports madlib 2.1.0 (#16549) --- gpdb-doc/markdown/analytics/madlib.html.md | 28 +++++++++++++------ .../platform-requirements-overview.md.hbs | 2 +- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/gpdb-doc/markdown/analytics/madlib.html.md b/gpdb-doc/markdown/analytics/madlib.html.md index cdbbdd374df0..42d2c23c9c7c 100644 --- a/gpdb-doc/markdown/analytics/madlib.html.md +++ b/gpdb-doc/markdown/analytics/madlib.html.md @@ -83,10 +83,10 @@ Before you install the MADlib package, make sure that your Greenplum database is $ tar xzvf madlib-1.21.0+1-gp6-rhel7-x86_64.tar.gz ``` - To unpack version 2.0.0: + To unpack version 2.1.0: ``` - $ tar xzvf madlib-2.0.0-gp6-rhel8-x86_64.tar.gz + $ tar xzvf madlib-2.1.0-gp6-rhel8-x86_64.tar.gz ``` 5. Install the software package by running the `gppkg` command. For example: @@ -97,10 +97,10 @@ Before you install the MADlib package, make sure that your Greenplum database is $ gppkg -i ./madlib-1.21.0+1-gp6-rhel7-x86_64/madlib-1.21.0+1-gp6-rhel7-x86_64.gppkg ``` - To install version 2.0.0: + To install version 2.1.0: ``` - $ gppkg -i ./madlib-2.0.0-gp6-rhel8-x86_64/madlib-2.0.0-gp6-rhel8-x86_64.gppkg + $ gppkg -i ./madlib-2.1.0-gp6-rhel8-x86_64/madlib-2.1.0-gp6-rhel8-x86_64.gppkg ``` ### Adding MADlib Functions to a Database @@ -131,13 +131,13 @@ $ madpack -s madlib -p greenplum -c gpadmin@mdw:5432/testdb install-check > **Important** Greenplum Database does not support directly upgrading from MADlib 1.x to version 2.x. You must back up your MADlib models, uninstall version 1.x, install version 2.x, and reload the models. -You upgrade an installed MADlib version 1.x package with the Greenplum Database `gppkg` utility and the MADlib `madpack` command. +You upgrade an installed MADlib version 1.x or 2.x package with the Greenplum Database `gppkg` utility and the MADlib `madpack` command. For information about the upgrade paths that MADlib supports, see the MADlib support and upgrade matrix in the [MADlib FAQ page](https://cwiki.apache.org/confluence/display/MADLIB/FAQ#FAQ-Q1-2WhatdatabaseplatformsdoesMADlibsupportandwhatistheupgradematrix?). ### Upgrading a MADlib 1.x Package -> **Important** Greenplum Database does not support upgrading from MADlib version 1.x to version 2.x. +> **Important** Greenplum Database does not support upgrading from MADlib version 1.x to version 2.x. Use this procedure to upgrade from an older MADlib version 1.x release to a newer version 1.x release. To upgrade MADlib, run the `gppkg` utility with the `-u` option. This command upgrades an installed MADlib 1.x package to MADlib 1.21.0+1. @@ -145,11 +145,21 @@ To upgrade MADlib, run the `gppkg` utility with the `-u` option. This command up $ gppkg -u madlib-1.21.0+1-gp6-rhel7-x86_64.gppkg ``` +### Upgrading a MADlib 2.x Package + +> **Important** Greenplum Database does not support upgrading from MADlib version 1.x to version 2.x. Use this procedure to upgrade from an older MADlib version 2.x release to a newer version 2.x release. + +To upgrade MADlib, run the `gppkg` utility with the `-u` option. This command upgrades an installed MADlib 2.0.x package to MADlib 2.1.0: + +``` +$ gppkg -u madlib-2.1.0-gp6-rhel8-x86_64.gppkg +``` + ### Upgrading MADlib Functions After you upgrade the MADlib package from one minor version to another, run `madpack upgrade` to upgrade the MADlib functions in a database schema. -> **Note** Use `madpack upgrade` only if you upgraded a minor MADlib package version, for example from 1.19.0 to 1.21.0. You do not need to update the functions within a patch version upgrade, for example from 1.16+1 to 1.16+3. +> **Note** Use `madpack upgrade` only if you upgraded a minor MADlib package version, for example from 1.19.0 to 1.21.0, or from 2.0.0 to 2.1.0. You do not need to update the functions within a patch version upgrade, for example from 1.16+1 to 1.16+3. This example command upgrades the MADlib functions in the schema `madlib` of the Greenplum Database `test`. @@ -182,10 +192,10 @@ To uninstall MADlib package version 1.21.0: $ gppkg -r madlib-1.21.0+1-gp6-rhel7-x86_64 ``` -To uninstall MADlib package version 2.0.0: +To uninstall MADlib package version 2.1.0: ``` -$ gppkg -r madlib-2.0.0-gp6-rhel8-x86_64 +$ gppkg -r madlib-2.1.0-gp6-rhel8-x86_64 ``` You can run the `gppkg` utility with the options `-q --all` to list the installed extensions and their versions. diff --git a/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs b/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs index 995d11f15964..5ec295c14e50 100644 --- a/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs +++ b/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs @@ -206,7 +206,7 @@ This table lists the versions of the Greenplum Extensions that are compatible wi MADlib Machine Learning -2.0, 1.21, 1.20, 1.19, 1.18, 1.17, 1.16 +2.1, 2.0, 1.21, 1.20, 1.19, 1.18, 1.17, 1.16 Support matrix at MADlib FAQ. From cf2ea12fb4517603d5180e4111e7328d3d4a571f Mon Sep 17 00:00:00 2001 From: David Yozie Date: Mon, 9 Oct 2023 07:55:32 -0700 Subject: [PATCH 034/106] Docs - fix broken link (rename cluster recovery to disaster recovery) --- gpdb-doc/markdown/utility_guide/utility-programs.html.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpdb-doc/markdown/utility_guide/utility-programs.html.md b/gpdb-doc/markdown/utility_guide/utility-programs.html.md index ca829dcfadd2..1d77b730bb6f 100644 --- a/gpdb-doc/markdown/utility_guide/utility-programs.html.md +++ b/gpdb-doc/markdown/utility_guide/utility-programs.html.md @@ -41,7 +41,7 @@ Greenplum Database provides the following utility programs. Superscripts identif - [gpmovemirrors](ref/gpmovemirrors.html) - [gpmt](ref/gpmt.html) - [gppkg](ref/gppkg.html) -- [gpcr](https://docs.vmware.com/en/VMware-Greenplum-Cluster-Recovery/1.0/greenplum-cluster-recovery/GUID-ref-gpcr.html) +- [gpdr](https://docs.vmware.com/en/VMware-Greenplum-Disaster-Recovery/1.0/greenplum-disaster-recovery/ref-gpdr.html) - [gprecoverseg](ref/gprecoverseg.html) - [gpreload](ref/gpreload.html) - [gprestore](https://docs.vmware.com/en/VMware-Greenplum-Backup-and-Restore/index.html)1 From eea26ac438606ebd69a78f1e3daf4f45b5a8bbf0 Mon Sep 17 00:00:00 2001 From: David Yozie Date: Mon, 9 Oct 2023 08:47:31 -0700 Subject: [PATCH 035/106] Docs - fixing broken external link to Dell ECS --- gpdb-doc/markdown/admin_guide/external/g-s3-protocol.html.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpdb-doc/markdown/admin_guide/external/g-s3-protocol.html.md b/gpdb-doc/markdown/admin_guide/external/g-s3-protocol.html.md index 6eb12429afc7..09049d33c0c0 100644 --- a/gpdb-doc/markdown/admin_guide/external/g-s3-protocol.html.md +++ b/gpdb-doc/markdown/admin_guide/external/g-s3-protocol.html.md @@ -8,7 +8,7 @@ Amazon Simple Storage Service \(Amazon S3\) provides secure, durable, highly-sca You can define read-only external tables that use existing data files in the S3 bucket for table data, or writable external tables that store the data from INSERT operations to files in the S3 bucket. Greenplum Database uses the S3 URL and prefix specified in the protocol URL either to select one or more files for a read-only table, or to define the location and filename format to use when uploading S3 files for `INSERT` operations to writable tables. -The `s3` protocol also supports [Dell EMC Elastic Cloud Storage](https://www.emc.com/en-us/storage/ecs/index.htm) \(ECS\), an Amazon S3 compatible service. +The `s3` protocol also supports [Dell Elastic Cloud Storage](https://www.dell.com/en-us/dt/learn/data-storage/ecs.htm) \(ECS\), an Amazon S3 compatible service. > **Note** The `pxf` protocol can access data in S3 and other object store systems such as Azure, Google Cloud Storage, and Minio. The `pxf` protocol can also access data in external Hadoop systems \(HDFS, Hive, HBase\), and SQL databases. See [pxf:// Protocol](g-pxf-protocol.html). From 720464e21327db1991d956d37b497960fc93fa47 Mon Sep 17 00:00:00 2001 From: bhari Date: Tue, 10 Oct 2023 18:47:04 +0530 Subject: [PATCH 036/106] [6X backport] Fix ORCA nested SubLink processing during query normalization (#16509) * Fix ORCA invalid processing of nested SubLinks with GROUP BY attributes The ORCA optimizer could produce an invalid query plan due to incorrect processing of nested SubLinks (SubLink contains one more SubLink or rtable subquery inside) during query normalization. The incorrect behaviour took place when a query had a nested SubLink in its targetList, and SubLink lower levels contained attributes that were referenced in the query's GROUP BY clause. ORCA could construct for such queries an invalid plan, according to which some wrong and unexpected attributes got into the nested part of SubLink's plan (the example is provided in tests section). And the presence of those attributes could lead to incorrect query execution or even segmentation fault. Inside the NormalizeGroupByProjList function the context.m_lower_table_tlist, which contains grouping attributes from target_list_copy, is created. And the entries of target_list_copy, that are not grouping attributes, are mutated inside the RunExtractAggregatesMutator in order to make their inner expressions reference the corresponding entries from context.m_lower_table_tlist. The invalid plan could be produced because of incorrect mapping between Var's varattno (in target_list_copy) and some TargetEntry's resno (in context.m_lower_table_tlist). This mapping is established by RunExtractAggregatesMutator, where for the Var code branch the following assignment takes place: var->varattno = found_tle->resno, where found_tle is the corresponding entry from context.m_lower_table_tlist. However, the behaviour of this function was erroneous for the case of nested SubLinks because it mutated only the upper level of nested SubLink structure, and other subqueries remained unmodified because RunExtractAggregatesMutator did not have a branch for the Query, which was required for futher diving into lower subqueries. And because inner Vars hadn't been mutated, they started reference wrong attributes. This patch modifies RunExtractAggregatesMutator function by adding the Query branch to make the mutator correctly step into nested SubLink queries. And the call of MutateQueryOrExpressionTree is replaced with the call of mutator itself in order to correctly modify m_current_query_level when stepping into lower subqueries. * Fix ORCA invalid processing of nested SubLinks referenced in GROUP BY clause. The ORCA optimizer could fallback to the legacy optimizer due to incorrect processing of nested SubLinks (SubLink contains one more SubLink or rtable subquery inside) during query normalization. The erroneous behaviour took place when a query had a nested SubLink in its targetList and this SubLink was in GROUP BY clause itself (see the test section). During query normalization for queries like in the tests the RunGroupingColMutator function is called. This function processed nested SubLinks incorrectly. It has Query code branch, which is supposed to be executed for various subqueries of the original query. However, for the nested SubLink case, when mutating the subquery of the SubLink (at second query level), the m_current_query_level field was not increased respectively, because MutateQueryOrExpressionTree, which was responsible for calling the RunGroupingColMutator on the lower subquery, did not know anything about the context. Therefore, for the query like in the tests, wrong value of the m_current_query_level led to an unnecessary modification of the Var's varlevelsup value. And when Var's varlevelsup value had been unnecessary modified, it started referencing different query level. This could lead to the fallback or invalid plan construction at further planning stages. This patch fixes erroneous behaviour of RunGroupingColMutator by replacing the call of MutateQueryOrExpressionTree with the call of RunGroupingColMutator in order to properly modify m_current_query_level when mutating lower level RTEs. * Fix ORCA invalid processing of nested SubLinks under aggregates. The ORCA optimizer could produce an invalid query plan and fallback to the legacy optimizer due to incorrect processing of nested SubLinks (SubLink contains one more SubLink or rtable subquery inside) during query normalization. The issue could arise when query had a AggRef inside a Sublink and that AggRef contained one more SubLink inside (see the example of the query in the tests section). While mutating the SubLink inside RunExtractAggregatesMutator, the Aggref branch is executed for SubLink's query targetList, which mutates args of the AggRef. And if inside this AggRef arg some Var had varlevelsup value greater than the m_agg_levels_up, i.e the Var referenced a relation that is higher than the AggRef, the Var was not mutated at all. The varlevelsup field was not modified and because of that, when AggRef was pulled up to the zero query level (by appending it to the context.m_lower_table_tlist), the Var, whose varlevelsup was higher than AggRef's, and which remained unchanged, started referencing the wrong relation. This could lead to the fallback or invalid plan construction when processing attributes at further planning stages. This patch makes the RunExtractAggregatesMutator modify the varlevelsup of the Var relatively the AggRef level in order to preserve proper query level. * Add testcase for fixed issue of panic in ORCA Test nested query with aggregate inside a sublink. ORCA was not correctly normalizing the aggregate expression inside the sublink's nested query. The column variable accessed in aggregate was not accessible to the aggregate after the normalization of query. This was causing segmentation fault when access of column variable is tried in rtable which doesn't have the RTE, leading to crash of psql. The behavior was fixed recently in commit 44157d7. This test case checks for regressions in that behavior. --------- Co-authored-by: Alexander Kondakov --- .../gpopt/translate/CQueryMutators.cpp | 87 ++++++++--- .../regress/expected/aggregates_optimizer.out | 2 - src/test/regress/expected/bfv_olap.out | 1 - .../regress/expected/bfv_olap_optimizer.out | 3 - src/test/regress/expected/subselect.out | 117 +++++++++++++++ src/test/regress/expected/subselect_gp.out | 34 +++++ .../expected/subselect_gp_optimizer.out | 34 +++++ .../regress/expected/subselect_optimizer.out | 138 ++++++++++++++++++ src/test/regress/sql/bfv_olap.sql | 1 - src/test/regress/sql/subselect.sql | 54 +++++++ src/test/regress/sql/subselect_gp.sql | 15 ++ 11 files changed, 456 insertions(+), 30 deletions(-) diff --git a/src/backend/gpopt/translate/CQueryMutators.cpp b/src/backend/gpopt/translate/CQueryMutators.cpp index 665c3bfc78df..689ab6440712 100644 --- a/src/backend/gpopt/translate/CQueryMutators.cpp +++ b/src/backend/gpopt/translate/CQueryMutators.cpp @@ -436,11 +436,12 @@ CQueryMutators::RunGroupingColMutator(Node *node, GPOS_ASSERT(IsA(old_sublink->subselect, Query)); - new_sublink->subselect = gpdb::MutateQueryOrExpressionTree( - old_sublink->subselect, - (MutatorWalkerFn) CQueryMutators::RunGroupingColMutator, context, - 0 // flags -- mutate into cte-lists - ); + // One need to call the Query mutator for subselect and take into + // account that SubLink can be multi-level. Therefore, the + // context->m_current_query_level must be modified properly + // while diving into such nested SubLink. + new_sublink->subselect = + RunGroupingColMutator(old_sublink->subselect, context); context->m_current_query_level--; @@ -686,22 +687,34 @@ CQueryMutators::RunExtractAggregatesMutator(Node *node, // Handle other top-level outer references in the project element. if (var->varlevelsup == context->m_current_query_level) { - if (var->varlevelsup == context->m_agg_levels_up) + if (var->varlevelsup >= context->m_agg_levels_up) { - // If Var references the top level query inside an Aggref that also - // references top level query, the Aggref is moved to the derived query - // (see comments in Aggref if-case above). Thus, these Var references - // are brought up to the top-query level. + // If Var references the top level query (varlevelsup = m_current_query_level) + // inside an Aggref that also references top level query, the Aggref is moved + // to the derived query (see comments in Aggref if-case above). + // And, therefore, if we are mutating such Vars inside the Aggref, we must + // change their varlevelsup field in order to preserve correct reference level. + // i.e these Vars are pulled up as the part of the Aggref by the m_agg_levels_up. // e.g: - // explain select (select sum(foo.a) from jazz) from foo group by a, b; + // select (select max((select foo.a))) from foo; // is transformed into - // select (select fnew.sum_t from jazz) - // from (select foo.a, foo.b, sum(foo.a) sum_t - // from foo group by foo.a, foo.b) fnew; - // - // Note the foo.a var which is in sum() in a subquery must now become a - // var referencing the current query level. - var->varlevelsup = 0; + // select (select fnew.max_t) + // from (select max((select foo.a)) max_t from foo) fnew; + // Here the foo.a inside max referenced top level RTE foo at + // varlevelsup = 2 inside the Aggref at agglevelsup 1. Then the + // Aggref is brought up to the top-query-level of fnew and foo.a + // inside Aggref is bumped up by original Aggref's level. + // We may visualize that logic with the following diagram: + // Query <------┐ <--------------------┐ + // | | + // | m_agg_levels_up = 1 | + // | | + // Aggref --┘ | varlevelsup = 2 + // | + // | + // | + // Var -------------------------┘ + var->varlevelsup -= context->m_agg_levels_up; return (Node *) var; } @@ -798,17 +811,45 @@ CQueryMutators::RunExtractAggregatesMutator(Node *node, GPOS_ASSERT(IsA(old_sublink->subselect, Query)); - new_sublink->subselect = gpdb::MutateQueryOrExpressionTree( - old_sublink->subselect, - (MutatorWalkerFn) RunExtractAggregatesMutator, (void *) context, - 0 // mutate into cte-lists - ); + // One need to call the Query mutator for subselect and take into + // account that SubLink can be multi-level. Therefore, the + // context->m_current_query_level must be modified properly + // while diving into such nested SubLink. + new_sublink->subselect = + RunExtractAggregatesMutator(old_sublink->subselect, context); context->m_current_query_level--; return (Node *) new_sublink; } + if (IsA(node, Query)) + { + // Mutate Query tree and ignore rtable subqueries in order to modify + // m_current_query_level properly when mutating them below. + Query *query = gpdb::MutateQueryTree( + (Query *) node, (MutatorWalkerFn) RunExtractAggregatesMutator, + context, QTW_IGNORE_RT_SUBQUERIES); + + ListCell *lc; + ForEach(lc, query->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + if (RTE_SUBQUERY == rte->rtekind) + { + Query *subquery = rte->subquery; + context->m_current_query_level++; + rte->subquery = (Query *) RunExtractAggregatesMutator( + (Node *) subquery, context); + context->m_current_query_level--; + gpdb::GPDBFree(subquery); + } + } + + return (Node *) query; + } + return gpdb::MutateExpressionTree( node, (MutatorWalkerFn) RunExtractAggregatesMutator, context); } diff --git a/src/test/regress/expected/aggregates_optimizer.out b/src/test/regress/expected/aggregates_optimizer.out index 20fcffbc19a5..3935f1874b92 100644 --- a/src/test/regress/expected/aggregates_optimizer.out +++ b/src/test/regress/expected/aggregates_optimizer.out @@ -373,8 +373,6 @@ LINE 4: where sum(distinct a.four + b.four) = b.four)... select (select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1))) from tenk1 o; -INFO: GPORCA failed to produce a plan, falling back to planner -DETAIL: Query-to-DXL Translation: No variable entry found due to incorrect normalization of query max ------ 9999 diff --git a/src/test/regress/expected/bfv_olap.out b/src/test/regress/expected/bfv_olap.out index cc7cb31136e7..4e4512653e8d 100644 --- a/src/test/regress/expected/bfv_olap.out +++ b/src/test/regress/expected/bfv_olap.out @@ -638,7 +638,6 @@ select * from (select sum(a.salary) over(), count(*) 2100 | 1 (2 rows) --- this query currently falls back, needs to be fixed select (select rn from (select row_number() over () as rn, name from t1_github_issue_10143 where code = a.code diff --git a/src/test/regress/expected/bfv_olap_optimizer.out b/src/test/regress/expected/bfv_olap_optimizer.out index 1a55c4fe8b4f..068ef921da6f 100644 --- a/src/test/regress/expected/bfv_olap_optimizer.out +++ b/src/test/regress/expected/bfv_olap_optimizer.out @@ -638,7 +638,6 @@ select * from (select sum(a.salary) over(), count(*) 2100 | 1 (2 rows) --- this query currently falls back, needs to be fixed select (select rn from (select row_number() over () as rn, name from t1_github_issue_10143 where code = a.code @@ -647,8 +646,6 @@ select (select rn from (select row_number() over () as rn, name ,sum(sum(a.salary)) over() from t2_github_issue_10143 a group by a.code; -INFO: GPORCA failed to produce a plan, falling back to planner -DETAIL: Query-to-DXL Translation: No variable entry found due to incorrect normalization of query dongnm | sum --------+------ | 2100 diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 295aad6d500b..22815988bb8d 100755 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -960,3 +960,120 @@ fetch backward all in c1; commit; --end_ignore +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink, which contains attributes referenced in query's GROUP BY clause. +-- Due to presence of non-grouping columns in targetList, ORCA performs query +-- normalization, during which ORCA establishes a correspondence between vars +-- from targetlist entries to grouping attributes. And this process should +-- correctly handle nested structures. The inner part of SubPlan in the test +-- should contain only t.j. +-- start_ignore +drop table if exists t; +NOTICE: table "t" does not exist, skipping +-- end_ignore +create table t (i int, j int) distributed by (i); +insert into t values (1, 2); +explain (verbose, costs off) +select j, +(select j from (select j) q2) +from t +group by i, j; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Output: t.j, ((SubPlan 1)), t.i + -> HashAggregate + Output: t.j, (SubPlan 1), t.i + Group Key: t.i, t.j + -> Seq Scan on public.t + Output: t.j, t.i + SubPlan 1 (slice1; segments: 1) + -> Result + Output: t.j + Optimizer: Postgres query optimizer +(11 rows) + +select j, +(select j from (select j) q2) +from t +group by i, j; + j | j +---+--- + 2 | 2 +(1 row) + +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink when this SubLink is inside the GROUP BY clause. Attribute, which is +-- not grouping column (1 as c), is added to query targetList to make ORCA +-- perform query normalization. During normalization ORCA modifies the vars of +-- the grouping elements of targetList in order to produce a new Query tree. +-- The modification of vars inside nested part of SubLinks should be handled +-- correctly. ORCA shouldn't fall back due to missing variable entry as a result +-- of incorrect query normalization. +explain (verbose, costs off) +select j, 1 as c, +(select j from (select j) q2) q1 +from t +group by j, q1; + QUERY PLAN +------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + Output: t.j, 1, ((SubPlan 1)) + -> HashAggregate + Output: t.j, 1, ((SubPlan 1)) + Group Key: t.j, ((SubPlan 1)) + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: t.j, ((SubPlan 1)) + Hash Key: t.j, ((SubPlan 1)) + -> HashAggregate + Output: t.j, ((SubPlan 1)) + Group Key: t.j, (SubPlan 1) + -> Seq Scan on public.t + Output: t.j, (SubPlan 1) + SubPlan 1 (slice1; segments: 1) + -> Result + Output: t.j + Optimizer: Postgres query optimizer +(17 rows) + +select j, 1 as c, +(select j from (select j) q2) q1 +from t +group by j, q1; + j | c | q1 +---+---+---- + 2 | 1 | 2 +(1 row) + +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink, and this SubLink is under aggregation. ORCA shouldn't fall back due +-- to missing variable entry as a result of incorrect query normalization. ORCA +-- should correctly process args of the aggregation during normalization. +explain (verbose, costs off) +select (select max((select t.i))) from t; + QUERY PLAN +------------------------------------------------ + Aggregate + Output: (SubPlan 2) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (max((SubPlan 1))) + -> Aggregate + Output: max((SubPlan 1)) + -> Seq Scan on public.t + Output: t.i + SubPlan 1 (slice1; segments: 1) + -> Result + Output: t.i + SubPlan 2 (slice0) + -> Result + Output: max((max((SubPlan 1)))) + Optimizer: Postgres query optimizer +(15 rows) + +select (select max((select t.i))) from t; + max +----- + 1 +(1 row) + +drop table t; diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index 89f31abdb1cd..cd15afff0f1a 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -3112,3 +3112,37 @@ select * from r where b in (select b from s where c=10 order by c limit 2); 1 | 2 | 3 (1 row) +-- Test nested query with aggregate inside a sublink, +-- ORCA should correctly normalize the aggregate expression inside the +-- sublink's nested query and the column variable accessed in aggregate should +-- be accessible to the aggregate after the normalization of query. +-- If the query is not supported, ORCA should gracefully fallback to postgres +explain (COSTS OFF) with t0 AS ( + SELECT + ROW_TO_JSON((SELECT x FROM (SELECT max(t.b)) x)) + AS c + FROM r + JOIN s ON true + JOIN s as t ON true + ) +SELECT c FROM t0; + QUERY PLAN +--------------------------------------------------------------------------------------- + Aggregate + -> Gather Motion 3:1 (slice3; segments: 3) + -> Aggregate + -> Nested Loop + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Nested Loop + -> Seq Scan on r + -> Materialize + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on s + -> Materialize + -> Seq Scan on s t + SubPlan 1 (slice0) + -> Subquery Scan on x + -> Result + Optimizer: Postgres query optimizer +(16 rows) + diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index 706dca15d9b4..a5493728802c 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -3253,3 +3253,37 @@ select * from r where b in (select b from s where c=10 order by c limit 2); 1 | 2 | 3 (1 row) +-- Test nested query with aggregate inside a sublink, +-- ORCA should correctly normalize the aggregate expression inside the +-- sublink's nested query and the column variable accessed in aggregate should +-- be accessible to the aggregate after the normalization of query. +-- If the query is not supported, ORCA should gracefully fallback to postgres +explain (COSTS OFF) with t0 AS ( + SELECT + ROW_TO_JSON((SELECT x FROM (SELECT max(t.b)) x)) + AS c + FROM r + JOIN s ON true + JOIN s as t ON true + ) +SELECT c FROM t0; + QUERY PLAN +--------------------------------------------------------------------------------------- + Aggregate + -> Gather Motion 3:1 (slice3; segments: 3) + -> Aggregate + -> Nested Loop + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Nested Loop + -> Seq Scan on r + -> Materialize + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on s + -> Materialize + -> Seq Scan on s t + SubPlan 1 (slice0) + -> Subquery Scan on x + -> Result + Optimizer: Postgres query optimizer +(16 rows) + diff --git a/src/test/regress/expected/subselect_optimizer.out b/src/test/regress/expected/subselect_optimizer.out index ce71ba62a90e..85e610f315b5 100644 --- a/src/test/regress/expected/subselect_optimizer.out +++ b/src/test/regress/expected/subselect_optimizer.out @@ -1009,3 +1009,141 @@ fetch backward all in c1; ERROR: backward scan is not supported in this version of Greenplum Database commit; --end_ignore +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink, which contains attributes referenced in query's GROUP BY clause. +-- Due to presence of non-grouping columns in targetList, ORCA performs query +-- normalization, during which ORCA establishes a correspondence between vars +-- from targetlist entries to grouping attributes. And this process should +-- correctly handle nested structures. The inner part of SubPlan in the test +-- should contain only t.j. +-- start_ignore +drop table if exists t; +NOTICE: table "t" does not exist, skipping +-- end_ignore +create table t (i int, j int) distributed by (i); +insert into t values (1, 2); +explain (verbose, costs off) +select j, +(select j from (select j) q2) +from t +group by i, j; + QUERY PLAN +---------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: j, ((SubPlan 1)) + -> Result + Output: j, (SubPlan 1) + -> GroupAggregate + Output: j, i + Group Key: t.i, t.j + -> Sort + Output: i, j + Sort Key: t.i, t.j + -> Seq Scan on public.t + Output: i, j + SubPlan 1 (slice1; segments: 3) + -> Result + Output: t.j + -> Result + Output: true + Optimizer: Pivotal Optimizer (GPORCA) +(18 rows) + +select j, +(select j from (select j) q2) +from t +group by i, j; + j | j +---+--- + 2 | 2 +(1 row) + +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink when this SubLink is inside the GROUP BY clause. Attribute, which is +-- not grouping column (1 as c), is added to query targetList to make ORCA +-- perform query normalization. During normalization ORCA modifies the vars of +-- the grouping elements of targetList in order to produce a new Query tree. +-- The modification of vars inside nested part of SubLinks should be handled +-- correctly. ORCA shouldn't fall back due to missing variable entry as a result +-- of incorrect query normalization. +explain (verbose, costs off) +select j, 1 as c, +(select j from (select j) q2) q1 +from t +group by j, q1; + QUERY PLAN +------------------------------------------------------------------------ + Result + Output: j, 1, ((SubPlan 1)) + -> Gather Motion 3:1 (slice2; segments: 3) + Output: j, ((SubPlan 1)) + -> GroupAggregate + Output: j, ((SubPlan 1)) + Group Key: t.j, ((SubPlan 1)) + -> Sort + Output: j, ((SubPlan 1)) + Sort Key: t.j, ((SubPlan 1)) + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: j, ((SubPlan 1)) + Hash Key: j, ((SubPlan 1)) + -> Result + Output: j, ((SubPlan 1)) + -> Result + Output: (SubPlan 1), j + -> Seq Scan on public.t + Output: j + SubPlan 1 (slice1; segments: 3) + -> Result + Output: t.j + -> Result + Output: true + Optimizer: Pivotal Optimizer (GPORCA) +(25 rows) + +select j, 1 as c, +(select j from (select j) q2) q1 +from t +group by j, q1; + j | c | q1 +---+---+---- + 2 | 1 | 2 +(1 row) + +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink, and this SubLink is under aggregation. ORCA shouldn't fall back due +-- to missing variable entry as a result of incorrect query normalization. ORCA +-- should correctly process args of the aggregation during normalization. +explain (verbose, costs off) +select (select max((select t.i))) from t; + QUERY PLAN +------------------------------------------------------ + Result + Output: (SubPlan 2) + -> Aggregate + Output: max((max((SubPlan 1)))) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (max((SubPlan 1))) + -> Aggregate + Output: max((SubPlan 1)) + -> Seq Scan on public.t + Output: i + SubPlan 1 (slice1; segments: 3) + -> Result + Output: t.i + -> Result + Output: true + SubPlan 2 (slice0) + -> Result + Output: (max((max((SubPlan 1))))) + -> Result + Output: true + Optimizer: Pivotal Optimizer (GPORCA) +(21 rows) + +select (select max((select t.i))) from t; + max +----- + 1 +(1 row) + +drop table t; diff --git a/src/test/regress/sql/bfv_olap.sql b/src/test/regress/sql/bfv_olap.sql index 01124ffbaa2b..8c2c290bc4bf 100644 --- a/src/test/regress/sql/bfv_olap.sql +++ b/src/test/regress/sql/bfv_olap.sql @@ -420,7 +420,6 @@ select * from (select sum(a.salary) over(), count(*) from t2_github_issue_10143 a group by a.salary) T; --- this query currently falls back, needs to be fixed select (select rn from (select row_number() over () as rn, name from t1_github_issue_10143 where code = a.code diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 0d84d0316bc5..e2ce210d0599 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -517,3 +517,57 @@ fetch backward all in c1; commit; --end_ignore + +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink, which contains attributes referenced in query's GROUP BY clause. +-- Due to presence of non-grouping columns in targetList, ORCA performs query +-- normalization, during which ORCA establishes a correspondence between vars +-- from targetlist entries to grouping attributes. And this process should +-- correctly handle nested structures. The inner part of SubPlan in the test +-- should contain only t.j. +-- start_ignore +drop table if exists t; +-- end_ignore +create table t (i int, j int) distributed by (i); +insert into t values (1, 2); + +explain (verbose, costs off) +select j, +(select j from (select j) q2) +from t +group by i, j; + +select j, +(select j from (select j) q2) +from t +group by i, j; + +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink when this SubLink is inside the GROUP BY clause. Attribute, which is +-- not grouping column (1 as c), is added to query targetList to make ORCA +-- perform query normalization. During normalization ORCA modifies the vars of +-- the grouping elements of targetList in order to produce a new Query tree. +-- The modification of vars inside nested part of SubLinks should be handled +-- correctly. ORCA shouldn't fall back due to missing variable entry as a result +-- of incorrect query normalization. +explain (verbose, costs off) +select j, 1 as c, +(select j from (select j) q2) q1 +from t +group by j, q1; + +select j, 1 as c, +(select j from (select j) q2) q1 +from t +group by j, q1; + +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink, and this SubLink is under aggregation. ORCA shouldn't fall back due +-- to missing variable entry as a result of incorrect query normalization. ORCA +-- should correctly process args of the aggregation during normalization. +explain (verbose, costs off) +select (select max((select t.i))) from t; + +select (select max((select t.i))) from t; + +drop table t; diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql index 67d8a149a507..cc592c611762 100644 --- a/src/test/regress/sql/subselect_gp.sql +++ b/src/test/regress/sql/subselect_gp.sql @@ -1215,3 +1215,18 @@ explain (costs off) select * from r where b in (select b from s where c=10 order select * from r where b in (select b from s where c=10 order by c); explain (costs off) select * from r where b in (select b from s where c=10 order by c limit 2); select * from r where b in (select b from s where c=10 order by c limit 2); + +-- Test nested query with aggregate inside a sublink, +-- ORCA should correctly normalize the aggregate expression inside the +-- sublink's nested query and the column variable accessed in aggregate should +-- be accessible to the aggregate after the normalization of query. +-- If the query is not supported, ORCA should gracefully fallback to postgres +explain (COSTS OFF) with t0 AS ( + SELECT + ROW_TO_JSON((SELECT x FROM (SELECT max(t.b)) x)) + AS c + FROM r + JOIN s ON true + JOIN s as t ON true + ) +SELECT c FROM t0; From f0dd1660927e6cfc46f1860092fc5e8d5b9c81dd Mon Sep 17 00:00:00 2001 From: Praveen Kumar Date: Wed, 11 Oct 2023 10:33:29 +0530 Subject: [PATCH 037/106] [6x]: Cleanup orphaned directory of dropped database after differential recovery (#16525) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cleanup orphaned directory of the dropped database after differential recovery Issue - Even if the database is dropped before the recovery process, differential recovery still leaves behind an orphaned directory for the dropped database under the base directory. RCA - When a database is created, a file named pg_internal.init is also generated in the database directory. During the rsync process of pg_data, we intentionally exclude this pg_internal.init file (--exclude=pg_internal.init). However, because this file remains in the database directory and is excluded from rsync, it results in an error message stating "cannot delete non-empty directory: base/db_oid". As a consequence, it becomes problematic to delete the directory of a dropped database. Fix - Removed the ‘pg_internal.init’ file from the excluded list of rsync, so that it won't be problematic to delete the directory of a dropped database. pg_internal.init file is rebuilt on startup so there is no problem if it is copied during sync_pg_data. --- gpMgmt/sbin/gpsegrecovery.py | 1 - .../behave/mgmt_utils/gprecoverseg.feature | 21 +++++++++++++++++++ .../behave/mgmt_utils/steps/mgmt_utils.py | 11 ++++++++++ .../mgmt_utils/steps/recoverseg_mgmt_utils.py | 12 +++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/gpMgmt/sbin/gpsegrecovery.py b/gpMgmt/sbin/gpsegrecovery.py index 4ed2efa11369..181d378fa212 100644 --- a/gpMgmt/sbin/gpsegrecovery.py +++ b/gpMgmt/sbin/gpsegrecovery.py @@ -172,7 +172,6 @@ def sync_pg_data(self): "current_logfiles.tmp", "postmaster.pid", "postmaster.opts", - "pg_internal.init", "internal.auto.conf", "pg_dynshmem", "pg_notify/*", diff --git a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature index 0eb667244c8e..1a58cb631783 100644 --- a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature +++ b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature @@ -1965,3 +1965,24 @@ Feature: gprecoverseg tests And user can start transactions Then the row count of table test_recoverseg in "postgres" should be 2000 And the cluster is recovered in full and rebalanced + + + @demo_cluster + @concourse_cluster + Scenario: Cleanup orphaned directory of dropped database after differential recovery + Given the database is running + And all the segments are running + And the segments are synchronized + And the user runs psql with "-c 'CREATE DATABASE test_orphan_dir'" against database "template1" + And save the information of the database "test_orphan_dir" + And the "primary" segment information is saved + And the primary on content 0 is stopped + And user can start transactions + And the user runs psql with "-c 'DROP DATABASE test_orphan_dir'" against database "template1" + When the user runs "gprecoverseg -a --differential" + Then gprecoverseg should return a return code of 0 + And the user runs psql with "-c 'SELECT gp_request_fts_probe_scan()'" against database "template1" + And the status of the primary on content 0 should be "u" + Then verify deletion of orphaned directory of the dropped database + And the cluster is rebalanced + diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index 82e9d30fbf54..de545ca93a7e 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -4327,3 +4327,14 @@ def impl(context, command, num): raise Exception( "Expected %s to occur %s times but Found %d times" .format(expected_pattern, num, match_count)) + + + +@given('save the information of the database "{dbname}"') +def impl(context, dbname): + with dbconn.connect(dbconn.DbURL(dbname='template1'), unsetSearchPath=False) as conn: + query = """SELECT datname,oid FROM pg_database WHERE datname='{0}';""" .format(dbname) + datname, oid = dbconn.execSQLForSingletonRow(conn, query) + context.db_name = datname + context.db_oid = oid + diff --git a/gpMgmt/test/behave/mgmt_utils/steps/recoverseg_mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/recoverseg_mgmt_utils.py index 213ec374907d..dbaec8c3bd96 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/recoverseg_mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/recoverseg_mgmt_utils.py @@ -761,3 +761,15 @@ def get_host_address(hostname): return host_address[0] +@then('verify deletion of orphaned directory of the dropped database') +def impl(context): + hostname = context.pseg_hostname + db_data_dir = "{0}/base/{1}".format(context.pseg_data_dir, context.db_oid) + cmd = Command("list directory", cmdStr="test -d {}".format(db_data_dir), ctxt=REMOTE, remoteHost=hostname) + cmd.run() + rc = cmd.get_return_code() + if rc == 0: + raise Exception('Orphaned directory:"{0}" of dropped database:"{1}" exists on host:"{2}"' .format(db_data_dir, + context.db_name, hostname)) + + From ff13851affd4fbeeed76a3447b1fa970c06e8ab5 Mon Sep 17 00:00:00 2001 From: Ning Wu Date: Tue, 17 Oct 2023 10:50:32 +0800 Subject: [PATCH 038/106] Remove yum install from verify_gpdb_versions task we have found several times the concourse task failed with `no URLs in mirrorlist` on this task, in order to reduce error, install the package in the docker image. [GPR-1562] Authored-by: Ning Wu --- concourse/scripts/verify_gpdb_versions.bash | 2 -- 1 file changed, 2 deletions(-) diff --git a/concourse/scripts/verify_gpdb_versions.bash b/concourse/scripts/verify_gpdb_versions.bash index a79b579f94b4..5955f2b8adf5 100755 --- a/concourse/scripts/verify_gpdb_versions.bash +++ b/concourse/scripts/verify_gpdb_versions.bash @@ -20,8 +20,6 @@ assert_postgres_version_matches() { fi } -yum -d0 -y install git - GREENPLUM_INSTALL_DIR=/usr/local/greenplum-db-devel GPDB_SRC_SHA=$(cd gpdb_src && git rev-parse HEAD) From 3e7110530cac6dba77981c330eb3c4b5673d9bae Mon Sep 17 00:00:00 2001 From: Hongxu Ma Date: Tue, 17 Oct 2023 13:46:23 +0800 Subject: [PATCH 039/106] Improve prune behavior for the read-only transaction (#16494) This PR backported #16458 to 6x. ---- Previous PR #13355 fixed #10314. But it introduced a new bug: IC-UDP may hang forever in some scenarios (lots of IC instances in single one UDF). Improve it in this PR: * for the non-read-only transaction, keep the previous logic (before PR-13355) to prevent the new bug * for the read-only transaction, introduce gp_interconnect_cursor_ic_table_size to config the size of Cursor History Table as a workaround --- src/backend/cdb/cdbvars.c | 1 + src/backend/cdb/motion/ic_udpifc.c | 104 +++++++++++++++++++---------- src/backend/utils/misc/guc_gp.c | 11 +++ src/include/cdb/cdbvars.h | 10 +++ src/include/utils/sync_guc_name.h | 1 + 5 files changed, 90 insertions(+), 37 deletions(-) diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c index 9b6557401b58..dd3b57608292 100644 --- a/src/backend/cdb/cdbvars.c +++ b/src/backend/cdb/cdbvars.c @@ -199,6 +199,7 @@ int Gp_interconnect_queue_depth = 4; /* max number of messages * waiting in rx-queue before * we drop. */ int Gp_interconnect_snd_queue_depth = 2; +int Gp_interconnect_cursor_ic_table_size = 128; int Gp_interconnect_timer_period = 5; int Gp_interconnect_timer_checking_period = 20; int Gp_interconnect_default_rtt = 20; diff --git a/src/backend/cdb/motion/ic_udpifc.c b/src/backend/cdb/motion/ic_udpifc.c index f007bb8c94c7..9cce2e356a6b 100644 --- a/src/backend/cdb/motion/ic_udpifc.c +++ b/src/backend/cdb/motion/ic_udpifc.c @@ -186,17 +186,6 @@ struct ConnHashTable (a)->srcPid == (b)->srcPid && \ (a)->dstPid == (b)->dstPid && (a)->icId == (b)->icId)) - -/* - * Cursor IC table definition. - * - * For cursor case, there may be several concurrent interconnect - * instances on QD. The table is used to track the status of the - * instances, which is quite useful for "ACK the past and NAK the future" paradigm. - * - */ -#define CURSOR_IC_TABLE_SIZE (128) - /* * CursorICHistoryEntry * @@ -229,8 +218,9 @@ struct CursorICHistoryEntry typedef struct CursorICHistoryTable CursorICHistoryTable; struct CursorICHistoryTable { + uint32 size; uint32 count; - CursorICHistoryEntry *table[CURSOR_IC_TABLE_SIZE]; + CursorICHistoryEntry **table; }; /* @@ -280,6 +270,13 @@ struct ReceiveControlInfo /* Cursor history table. */ CursorICHistoryTable cursorHistoryTable; + + /* + * Last distributed transaction id when SetupUDPInterconnect is called. + * Coupled with cursorHistoryTable, it is used to handle multiple + * concurrent cursor cases. + */ + DistributedTransactionId lastDXatId; }; /* @@ -914,8 +911,13 @@ dumpTransProtoStats() static void initCursorICHistoryTable(CursorICHistoryTable *t) { + MemoryContext old; t->count = 0; - memset(t->table, 0, sizeof(t->table)); + t->size = Gp_interconnect_cursor_ic_table_size; + + old = MemoryContextSwitchTo(ic_control_info.memContext); + t->table = palloc0(sizeof(struct CursorICHistoryEntry *) * t->size); + MemoryContextSwitchTo(old); } /* @@ -927,7 +929,7 @@ addCursorIcEntry(CursorICHistoryTable *t, uint32 icId, uint32 cid) { MemoryContext old; CursorICHistoryEntry *p; - uint32 index = icId % CURSOR_IC_TABLE_SIZE; + uint32 index = icId % t->size; old = MemoryContextSwitchTo(ic_control_info.memContext); p = palloc0(sizeof(struct CursorICHistoryEntry)); @@ -957,7 +959,7 @@ static void updateCursorIcEntry(CursorICHistoryTable *t, uint32 icId, uint8 status) { struct CursorICHistoryEntry *p; - uint8 index = icId % CURSOR_IC_TABLE_SIZE; + uint8 index = icId % t->size; for (p = t->table[index]; p; p = p->next) { @@ -978,7 +980,7 @@ static CursorICHistoryEntry * getCursorIcEntry(CursorICHistoryTable *t, uint32 icId) { struct CursorICHistoryEntry *p; - uint8 index = icId % CURSOR_IC_TABLE_SIZE; + uint8 index = icId % t->size; for (p = t->table[index]; p; p = p->next) { @@ -1000,7 +1002,7 @@ pruneCursorIcEntry(CursorICHistoryTable *t, uint32 icId) { uint8 index; - for (index = 0; index < CURSOR_IC_TABLE_SIZE; index++) + for (index = 0; index < t->size; index++) { struct CursorICHistoryEntry *p, *q; @@ -1049,7 +1051,7 @@ purgeCursorIcEntry(CursorICHistoryTable *t) { uint8 index; - for (index = 0; index < CURSOR_IC_TABLE_SIZE; index++) + for (index = 0; index < t->size; index++) { struct CursorICHistoryEntry *trash; @@ -1446,6 +1448,7 @@ InitMotionUDPIFC(int *listenerSocketFd, uint16 *listenerPort) /* allocate a buffer for sending disorder messages */ rx_control_info.disorderBuffer = palloc0(MIN_PACKET_SIZE); + rx_control_info.lastDXatId = InvalidTransactionId; rx_control_info.lastTornIcId = 0; initCursorICHistoryTable(&rx_control_info.cursorHistoryTable); @@ -3077,34 +3080,61 @@ SetupUDPIFCInterconnect_Internal(SliceTable *sliceTable) set_test_mode(); #endif + /* Prune the QD's history table if it is too large */ if (Gp_role == GP_ROLE_DISPATCH) { - /* - * Prune the history table if it is too large - * - * We only keep history of constant length so that - * - The history table takes only constant amount of memory. - * - It is long enough so that it is almost impossible to receive - * packets from an IC instance that is older than the first one - * in the history. - */ - if (rx_control_info.cursorHistoryTable.count > (2 * CURSOR_IC_TABLE_SIZE)) - { - uint32 prune_id = sliceTable->ic_instance_id - CURSOR_IC_TABLE_SIZE; + CursorICHistoryTable *ich_table = &rx_control_info.cursorHistoryTable; + DistributedTransactionId distTransId = getDistributedTransactionId(); - /* - * Only prune if we didn't underflow -- also we want the prune id - * to be newer than the limit (hysteresis) + if (ich_table->count > (2 * ich_table->size)) + { + /* + * distTransId != lastDXatId + * Means the last transaction is finished, it's ok to make a prune. */ - if (prune_id < sliceTable->ic_instance_id) + if (distTransId != rx_control_info.lastDXatId) { if (gp_log_interconnect >= GPVARS_VERBOSITY_DEBUG) - elog(DEBUG1, "prune cursor history table (count %d), icid %d", rx_control_info.cursorHistoryTable.count, sliceTable->ic_instance_id); - pruneCursorIcEntry(&rx_control_info.cursorHistoryTable, prune_id); + elog(DEBUG1, "prune cursor history table (count %d), icid %d, prune_id %d", + ich_table->count, sliceTable->ic_instance_id, sliceTable->ic_instance_id); + pruneCursorIcEntry(ich_table, sliceTable->ic_instance_id); + } + } + /* + * distTransId == lastDXatId and they are not InvalidTransactionId(0) + * Means current Non Read-Only transaction isn't finished, MUST not prune. + */ + else if (rx_control_info.lastDXatId != InvalidTransactionId) + { + ; + } + /* + * distTransId == lastDXatId and they are InvalidTransactionId(0) + * Means both are Read-Only transactions or the same transaction. + */ + else + { + if (ich_table->count > (2 * ich_table->size)) + { + uint32 prune_id = sliceTable->ic_instance_id - ich_table->size; + + /* + * Only prune if we didn't underflow -- also we want the prune id + * to be newer than the limit (hysteresis) + */ + if (prune_id < sliceTable->ic_instance_id) + { + if (gp_log_interconnect >= GPVARS_VERBOSITY_DEBUG) + elog(DEBUG1, "prune cursor history table (count %d), icid %d, prune_id %d", + ich_table->count, sliceTable->ic_instance_id, prune_id); + pruneCursorIcEntry(ich_table, prune_id); + } } } - addCursorIcEntry(&rx_control_info.cursorHistoryTable, sliceTable->ic_instance_id, gp_command_count); + addCursorIcEntry(ich_table, sliceTable->ic_instance_id, gp_command_count); + /* save the latest transaction id. */ + rx_control_info.lastDXatId = distTransId; } /* now we'll do some setup for each of our Receiving Motion Nodes. */ diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 20a63f02bca1..91aaf41b594c 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -3681,6 +3681,17 @@ struct config_int ConfigureNamesInt_gp[] = NULL, NULL, NULL }, + { + {"gp_interconnect_cursor_ic_table_size", PGC_USERSET, GP_ARRAY_TUNING, + gettext_noop("Sets the size of Cursor Table in the UDP interconnect"), + gettext_noop("You can try to increase it when a UDF which contains many concurrent " + "cursor queries hangs. The default value is 128.") + }, + &Gp_interconnect_cursor_ic_table_size, + 128, 128, 102400, + NULL, NULL, NULL + }, + { {"gp_interconnect_timer_period", PGC_USERSET, GP_ARRAY_TUNING, gettext_noop("Sets the timer period (in ms) for UDP interconnect"), diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index a72d6a4202b2..cb5cce1f8ade 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -420,6 +420,16 @@ extern int Gp_interconnect_queue_depth; * */ extern int Gp_interconnect_snd_queue_depth; + +/* + * Cursor IC table size. + * + * For cursor case, there may be several concurrent interconnect + * instances on QD. The table is used to track the status of the + * instances, which is quite useful for "ACK the past and NAK the future" paradigm. + * + */ +extern int Gp_interconnect_cursor_ic_table_size; extern int Gp_interconnect_timer_period; extern int Gp_interconnect_timer_checking_period; extern int Gp_interconnect_default_rtt; diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 5621a9de1baa..259986f3d678 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -32,6 +32,7 @@ "gp_indexcheck_insert", "gp_indexcheck_vacuum", "gp_initial_bad_row_limit", + "gp_interconnect_cursor_ic_table_size", "gp_interconnect_debug_retry_interval", "gp_interconnect_default_rtt", "gp_interconnect_fc_method", From fe384c35f0fd27629276547d8d7cefc5fc61b8a0 Mon Sep 17 00:00:00 2001 From: chaotian <108248800+charliettxx@users.noreply.github.com> Date: Tue, 17 Oct 2023 14:33:10 +0800 Subject: [PATCH 040/106] Fix Insert Error with unknown type (#16498) unknown-type constants and params in the SELECT's targetlist are copied up as-is rather than being referenced as subquery outputs. This is to ensure that when we try to coerce them to the target column's datatype. --- src/backend/parser/analyze.c | 31 ++++++++++++++++++++++----- src/test/regress/expected/strings.out | 9 ++++++++ src/test/regress/sql/strings.sql | 7 ++++++ 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 21f7345fc2f4..a5666f901484 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -657,13 +657,34 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) * separate from the subquery's tlist because we may add columns, * insert datatype coercions, etc.) * - * Const and Param nodes of type UNKNOWN in the SELECT's targetlist - * no longer need special treatment here. They'll be assigned proper - * types later by coerce_type() upon assignment to the target columns. - * Otherwise this fails: INSERT INTO foo SELECT 'bar', ... FROM baz + * HACK: unknown-type constants and params in the SELECT's targetlist + * are copied up as-is rather than being referenced as subquery + * outputs. This is to ensure that when we try to coerce them to + * the target column's datatype, the right things happen (see + * special cases in coerce_type). Otherwise, this fails: + * INSERT INTO foo SELECT 'bar', ... FROM baz *---------- */ - expandRTE(rte, rtr->rtindex, 0, -1, false, NULL, &exprList); + exprList = NIL; + foreach(lc, selectQuery->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + Expr *expr; + + if (tle->resjunk) + continue; + if (tle->expr && + (IsA(tle->expr, Const) ||IsA(tle->expr, Param)) && + exprType((Node *) tle->expr) == UNKNOWNOID) + expr = tle->expr; + else + { + Var *var = makeVarFromTargetEntry(rtr->rtindex, tle); + + expr = (Expr *) var; + } + exprList = lappend(exprList, expr); + } /* Prepare row for assignment to target table */ exprList = transformInsertRow(pstate, exprList, diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 9187a83c7328..a77c73e9e3f1 100755 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -1933,6 +1933,15 @@ SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5 Th\000o\x02\x03 (1 row) +-- copy unknown-type column from targetlist rather than reference to subquery outputs +CREATE DOMAIN public.date_timestamp AS timestamp without time zone; +create table dt1(a int, b int, c public.date_timestamp, d public.date_timestamp); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into dt1 values(1, 1, now(), now()); +insert into dt1 select a, b, 'Thu Sep 14 03:19:54 EDT 2023' as c, 'Thu Sep 14 03:19:54 EDT 2023' as d from dt1; +DROP TABLE dt1; +DROP DOMAIN public.date_timestamp; -- Clean up GPDB-added tables DROP TABLE char_strings_tbl; DROP TABLE varchar_strings_tbl; diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 19265c9e78e0..4b6ca3c5d90c 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -644,6 +644,13 @@ SELECT encode(overlay(E'Th\\000omas'::bytea placing E'Th\\001omas'::bytea from 2 SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 8),'escape'); SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5 for 3),'escape'); +-- copy unknown-type column from targetlist rather than reference to subquery outputs +CREATE DOMAIN public.date_timestamp AS timestamp without time zone; +create table dt1(a int, b int, c public.date_timestamp, d public.date_timestamp); +insert into dt1 values(1, 1, now(), now()); +insert into dt1 select a, b, 'Thu Sep 14 03:19:54 EDT 2023' as c, 'Thu Sep 14 03:19:54 EDT 2023' as d from dt1; +DROP TABLE dt1; +DROP DOMAIN public.date_timestamp; -- Clean up GPDB-added tables DROP TABLE char_strings_tbl; From 9b261a59dba28dc9e85d0e17e52c4d9b412950a5 Mon Sep 17 00:00:00 2001 From: Annpurna Shahani Date: Tue, 17 Oct 2023 11:43:54 +0530 Subject: [PATCH 041/106] [6X] gprecoverseg rebalance: revert 10 GB default value for replay lag Check for replay lag during rebalance has been introduced through commit d5f26a703a91735b4798630d0894326e9961513d and the default value of replay lag is set to 10 GB which can be configured using --replay-lag flag. Reverting the default value of replay lag as the user can configure to any required value. --disable-replay-lag flag is not required now as there will be no replay lag check by default. --- gpMgmt/bin/gppylib/commands/gp.py | 5 ---- .../gppylib/operations/rebalanceSegments.py | 8 +++--- .../bin/gppylib/programs/clsRecoverSegment.py | 10 +++---- .../test/unit/test_unit_gprecoverseg.py | 1 - .../test/unit/test_unit_rebalance_segment.py | 17 +++--------- gpMgmt/doc/gprecoverseg_help | 12 +++------ .../behave/mgmt_utils/gprecoverseg.feature | 27 ++++--------------- .../utility_guide/ref/gprecoverseg.html.md | 7 ++--- 8 files changed, 21 insertions(+), 66 deletions(-) diff --git a/gpMgmt/bin/gppylib/commands/gp.py b/gpMgmt/bin/gppylib/commands/gp.py index 43235c1e6773..1b26ac04905d 100644 --- a/gpMgmt/bin/gppylib/commands/gp.py +++ b/gpMgmt/bin/gppylib/commands/gp.py @@ -52,11 +52,6 @@ #max batch size of thread pool on master MAX_MASTER_NUM_WORKERS=64 -# Maximum replay lag (in GBs) allowed on mirror when rebalancing the segments -# The default value for ALLOWED_REPLAY_LAG has been decided to be 10 GBs as mirror -# took 5 mins to replay 10 GB lag on a local demo cluster. -ALLOWED_REPLAY_LAG = 10 - # Application name used by the pg_rewind instance that gprecoverseg starts # during incremental recovery. gpstate uses this to figure out when incremental # recovery is active. diff --git a/gpMgmt/bin/gppylib/operations/rebalanceSegments.py b/gpMgmt/bin/gppylib/operations/rebalanceSegments.py index 296997e234e7..0ab305c57438 100644 --- a/gpMgmt/bin/gppylib/operations/rebalanceSegments.py +++ b/gpMgmt/bin/gppylib/operations/rebalanceSegments.py @@ -52,12 +52,11 @@ def run(self): class GpSegmentRebalanceOperation: - def __init__(self, gpEnv, gpArray, batch_size, segment_batch_size, disable_replay_lag, replay_lag): + def __init__(self, gpEnv, gpArray, batch_size, segment_batch_size, replay_lag): self.gpEnv = gpEnv self.gpArray = gpArray self.batch_size = batch_size self.segment_batch_size = segment_batch_size - self.disable_replay_lag = disable_replay_lag self.replay_lag = replay_lag self.logger = gplog.get_default_logger() @@ -73,14 +72,13 @@ def rebalance(self): continue if segmentPair.up() and segmentPair.reachable() and segmentPair.synchronized(): - if not self.disable_replay_lag: + if self.replay_lag is not None: self.logger.info("Allowed replay lag during rebalance is {} GB".format(self.replay_lag)) replay_lag_in_bytes = replay_lag(segmentPair.primaryDB) if float(replay_lag_in_bytes) >= (self.replay_lag * 1024 * 1024 * 1024): raise Exception("{} bytes of xlog is still to be replayed on mirror with dbid {}, let " "mirror catchup on replay then trigger rebalance. Use --replay-lag to " - "configure the allowed replay lag limit or --disable-replay-lag to disable" - " the check completely if you wish to continue with rebalance anyway" + "configure the allowed replay lag limit." .format(replay_lag_in_bytes, segmentPair.primaryDB.getSegmentDbId())) unbalanced_primary_segs.append(segmentPair.primaryDB) else: diff --git a/gpMgmt/bin/gppylib/programs/clsRecoverSegment.py b/gpMgmt/bin/gppylib/programs/clsRecoverSegment.py index 5e737bbc2132..80f7cee8d37b 100644 --- a/gpMgmt/bin/gppylib/programs/clsRecoverSegment.py +++ b/gpMgmt/bin/gppylib/programs/clsRecoverSegment.py @@ -98,7 +98,7 @@ def outputToFile(self, mirrorBuilder, gpArray, fileName): def getRecoveryActionsBasedOnOptions(self, gpEnv, gpArray): if self.__options.rebalanceSegments: - return GpSegmentRebalanceOperation(gpEnv, gpArray, self.__options.parallelDegree, self.__options.parallelPerHost, self.__options.disableReplayLag, self.__options.replayLag) + return GpSegmentRebalanceOperation(gpEnv, gpArray, self.__options.parallelDegree, self.__options.parallelPerHost, self.__options.replayLag) else: instance = RecoveryTripletsFactory.instance(gpArray, self.__options.recoveryConfigFile, self.__options.newRecoverHosts, self.__options.parallelDegree) segs = [GpMirrorToBuild(t.failed, t.live, t.failover, self.__options.forceFullResynchronization, self.__options.differentialResynchronization) @@ -253,8 +253,8 @@ def run(self): if self.__options.differentialResynchronization and self.__options.outputSampleConfigFile: raise ProgramArgumentValidationException("Invalid -o provided with --differential argument") - if self.__options.disableReplayLag and not self.__options.rebalanceSegments: - raise ProgramArgumentValidationException("--disable-replay-lag should be used only with -r") + if self.__options.replayLag and not self.__options.rebalanceSegments: + raise ProgramArgumentValidationException("--replay-lag should be used only with -r") faultProberInterface.getFaultProber().initializeProber(gpEnv.getMasterPort()) @@ -464,11 +464,9 @@ def createParser(): addTo.add_option("-r", None, default=False, action='store_true', dest='rebalanceSegments', help='Rebalance synchronized segments.') - addTo.add_option("--replay-lag", None, type="float", default=gp.ALLOWED_REPLAY_LAG, + addTo.add_option("--replay-lag", None, type="float", dest="replayLag", metavar="", help='Allowed replay lag on mirror, lag should be provided in GBs') - addTo.add_option("--disable-replay-lag", None, default=False, action='store_true', - dest='disableReplayLag', help='Disable replay lag check when rebalancing segments') addTo.add_option('', '--hba-hostnames', action='store_true', dest='hba_hostnames', help='use hostnames instead of CIDR in pg_hba.conf') diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_gprecoverseg.py b/gpMgmt/bin/gppylib/test/unit/test_unit_gprecoverseg.py index b3d08c16d839..ebe8f73848b3 100644 --- a/gpMgmt/bin/gppylib/test/unit/test_unit_gprecoverseg.py +++ b/gpMgmt/bin/gppylib/test/unit/test_unit_gprecoverseg.py @@ -24,7 +24,6 @@ def __init__(self): self.recoveryConfigFile = None self.outputSpareDataDirectoryFile = None self.rebalanceSegments = None - self.disableReplayLag = None self.replayLag = None self.outputSampleConfigFile = None diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_rebalance_segment.py b/gpMgmt/bin/gppylib/test/unit/test_unit_rebalance_segment.py index c789de0b4949..2cda90f970ce 100644 --- a/gpMgmt/bin/gppylib/test/unit/test_unit_rebalance_segment.py +++ b/gpMgmt/bin/gppylib/test/unit/test_unit_rebalance_segment.py @@ -38,8 +38,7 @@ def setUp(self): self.success_command_mock.get_results.return_value = CommandResult( 0, "stdout success text", "stderr text", True, False) - self.subject = GpSegmentRebalanceOperation(Mock(), self._create_gparray_with_2_primary_2_mirrors(), 1, 1, False, - 10) + self.subject = GpSegmentRebalanceOperation(Mock(), self._create_gparray_with_2_primary_2_mirrors(), 1, 1, 10) self.subject.logger = Mock(spec=['log', 'warn', 'info', 'debug', 'error', 'warning', 'fatal']) self.mock_logger = self.get_mock_from_apply_patch('logger') @@ -72,9 +71,8 @@ def test_rebalance_returns_warning(self, mock1): with self.assertRaises(Exception) as ex: self.subject.rebalance() self.assertEqual('56780000000 bytes of xlog is still to be replayed on mirror with dbid 2, let mirror catchup ' - 'on replay then trigger rebalance. Use --replay-lag to configure the allowed replay lag limit ' - 'or --disable-replay-lag to disable the check completely if you wish to continue with ' - 'rebalance anyway', str(ex.exception)) + 'on replay then trigger rebalance. Use --replay-lag to configure the allowed replay lag limit.' + , str(ex.exception)) self.assertEqual([call("Get replay lag on mirror of primary segment with host:sdw1, port:40000")], self.mock_logger.debug.call_args_list) self.assertEqual([call("Determining primary and mirror segment pairs to rebalance"), @@ -87,15 +85,6 @@ def test_rebalance_does_not_return_warning(self, mock1): self.assertEqual([call("Get replay lag on mirror of primary segment with host:sdw1, port:40000")], self.mock_logger.debug.call_args_list) - @patch('gppylib.db.dbconn.execSQLForSingleton', return_value='56780000000') - def test_rebalance_replay_lag_is_disabled(self, mock1): - self.subject.disable_replay_lag = True - self.subject.rebalance() - self.assertNotIn([call("Get replay lag on mirror of primary segment with host:sdw1, port:40000")], - self.mock_logger.debug.call_args_list) - self.assertIn([call("Determining primary and mirror segment pairs to rebalance")], - self.subject.logger.info.call_args_list) - @patch('gppylib.db.dbconn.connect', side_effect=Exception()) def test_replay_lag_connect_exception(self, mock1): with self.assertRaises(Exception) as ex: diff --git a/gpMgmt/doc/gprecoverseg_help b/gpMgmt/doc/gprecoverseg_help index 43fb43b5206e..7ac114c0e2f1 100755 --- a/gpMgmt/doc/gprecoverseg_help +++ b/gpMgmt/doc/gprecoverseg_help @@ -14,7 +14,7 @@ gprecoverseg [-p [,...]] [-F] [-a] [-q] [-s] [--no-progress] [-l ] -gprecoverseg -r [--replay-lag ] [--disable-replay-lag] +gprecoverseg -r [--replay-lag ] gprecoverseg -o @@ -244,13 +244,9 @@ running gprecoverseg -r. If there are any in progress queries, they will be cancelled and rolled back. --replay-lag -Replay lag(in GBs) allowed on mirror when rebalancing the segments. Default is 10 GB. If -the replay_lag (flush_lsn-replay_lsn) is more than the value provided with this option -then rebalance will be aborted. - - ---disable-replay-lag -Disable replay lag check when rebalancing segments +Replay lag(in GBs) allowed on mirror when rebalancing the segments. If the replay_lag +(flush_lsn-replay_lsn) is more than the value provided with this option then rebalance +will be aborted. -s diff --git a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature index 1a58cb631783..a1bf16e5630c 100644 --- a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature +++ b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature @@ -1913,37 +1913,20 @@ Feature: gprecoverseg tests And all files in gpAdminLogs directory are deleted on all hosts in the cluster And user immediately stops all primary processes for content 0 And user can start transactions + When the user runs "gprecoverseg -av --replay-lag 10" + Then gprecoverseg should return a return code of 2 + And gprecoverseg should print "--replay-lag should be used only with -r" to stdout When the user runs "gprecoverseg -av" Then gprecoverseg should return a return code of 0 When the user runs "gprecoverseg -ar --replay-lag 0" Then gprecoverseg should return a return code of 2 + And gprecoverseg should print "Allowed replay lag during rebalance is 0.0 GB" to stdout And gprecoverseg should print ".* bytes of xlog is still to be replayed on mirror with dbid.*, let mirror catchup on replay then trigger rebalance" regex to logfile - When the user runs "gprecoverseg -ar --disable-replay-lag" + When the user runs "gprecoverseg -ar" Then gprecoverseg should return a return code of 0 And all the segments are running And user can start transactions - @demo_cluster - @concourse_cluster - Scenario: gprecoverseg errors out if invalid options are used with --disable-replay-lag - Given the database is running - And all the segments are running - And the segments are synchronized - And all files in gpAdminLogs directory are deleted on all hosts in the cluster - And user immediately stops all primary processes for content 0,1,2 - And user can start transactions - When the user runs "gprecoverseg -av" - Then gprecoverseg should return a return code of 0 - And verify that mirror on content 0,1,2 is up - When the user runs "gprecoverseg -aF --disable-replay-lag" - Then gprecoverseg should return a return code of 2 - And gprecoverseg should print "--disable-replay-lag should be used only with -r" to stdout - When the user runs "gprecoverseg -ar" - Then gprecoverseg should return a return code of 0 - And gprecoverseg should print "Allowed replay lag during rebalance is 10 GB" to stdout - And all the segments are running - And user can start transactions - @remove_rsync_bash @concourse_cluster diff --git a/gpdb-doc/markdown/utility_guide/ref/gprecoverseg.html.md b/gpdb-doc/markdown/utility_guide/ref/gprecoverseg.html.md index 414e8bcdb4c3..32c3f574aa5b 100644 --- a/gpdb-doc/markdown/utility_guide/ref/gprecoverseg.html.md +++ b/gpdb-doc/markdown/utility_guide/ref/gprecoverseg.html.md @@ -10,7 +10,7 @@ gprecoverseg [[-p [,...]] | -i ] [-d ] [--no-progress] [-l ] -gprecoverseg -r [--replay-lag ] [--disable-replay-lag] +gprecoverseg -r [--replay-lag ] gprecoverseg -o               [-p [,...]] @@ -169,10 +169,7 @@ The recovery process marks the segment as up again in the Greenplum Database sys : After a segment recovery, segment instances may not be returned to the preferred role that they were given at system initialization time. This can leave the system in a potentially unbalanced state, as some segment hosts may have more active segments than is optimal for top system performance. This option rebalances primary and mirror segments by returning them to their preferred roles. All segments must be valid and resynchronized before running `gprecoverseg -r`. If there are any in progress queries, they will be cancelled and rolled back. --replay-lag -: Replay lag(in GBs) allowed on mirror when rebalancing the segments. Default is 10 GB. If the replay_lag (flush_lsn-replay_lsn) is more than the value provided with this option then rebalance will be aborted. - ---disable-replay-lag -: Disable replay lag check when rebalancing segments +: Replay lag(in GBs) allowed on mirror when rebalancing the segments. If the replay_lag (flush_lsn-replay_lsn) is more than the value provided with this option then rebalance will be aborted. -s \(sequential progress\) : Show `pg_basebackup` or `pg_rewind` progress sequentially instead of in-place. Useful when writing to a file, or if a tty does not support escape sequences. The default is to show progress in-place. From d51eac64d60b0b25668e0078cfb06488d84e41bf Mon Sep 17 00:00:00 2001 From: Ning Wu Date: Tue, 17 Oct 2023 17:55:17 +0800 Subject: [PATCH 042/106] Update the verify_gpdb_versions task image change to gcr.io/data-gpdb-public-images/gpdb6-rocky8-build image which includes git. [GPR-1562] Authored-by: Ning Wu --- concourse/tasks/verify_gpdb_versions.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/concourse/tasks/verify_gpdb_versions.yml b/concourse/tasks/verify_gpdb_versions.yml index e5b013e8e1f1..11016bea0053 100644 --- a/concourse/tasks/verify_gpdb_versions.yml +++ b/concourse/tasks/verify_gpdb_versions.yml @@ -4,8 +4,8 @@ platform: linux image_resource: type: registry-image source: - repository: centos - tag: 7 + repository: gcr.io/data-gpdb-public-images/gpdb6-rocky8-build + tag: latest inputs: - name: gpdb_src From 332d96eb91f8eb1ce433444a5fb5d87628a9d897 Mon Sep 17 00:00:00 2001 From: Tao Tang Date: Thu, 12 Oct 2023 16:09:06 +0800 Subject: [PATCH 043/106] docs - updated workload_mgmt_resgroups to support cgroup v1 on rhel9 (6X) (#16580) libcgroup/libcgroup-utils packages are official removed since RHEL9. A new service is provided in the document to support gpdb6 with cgroup v1 on RHEL 9.x Co-authored-by: mperezfuster --- .../workload_mgmt_resgroups.html.md | 129 ++++++++++-------- 1 file changed, 69 insertions(+), 60 deletions(-) diff --git a/gpdb-doc/markdown/admin_guide/workload_mgmt_resgroups.html.md b/gpdb-doc/markdown/admin_guide/workload_mgmt_resgroups.html.md index 04c093b7362f..86c889eaa36d 100644 --- a/gpdb-doc/markdown/admin_guide/workload_mgmt_resgroups.html.md +++ b/gpdb-doc/markdown/admin_guide/workload_mgmt_resgroups.html.md @@ -8,24 +8,6 @@ When you assign a resource group to a role \(a role-based resource group\), the Similarly, when you assign a resource group to an external component, the group limits apply to all running instances of the component. For example, if you create a resource group for a PL/Container external component, the memory limit that you define for the group specifies the maximum memory usage for all running instances of each PL/Container runtime to which you assign the group. -This topic includes the following subtopics: - -- [Understanding Role and Component Resource Groups](#topic8339intro) -- [Resource Group Attributes and Limits](#topic8339introattrlim) - - [Memory Auditor](#topic8339777) - - [Transaction Concurrency Limit](#topic8339717179) - - [CPU Limits](#topic833971717) - - [Memory Limits](#topic8339717) -- [Using VMware Greenplum Command Center to Manage Resource Groups](#topic999) -- [Configuring and Using Resource Groups](#topic71717999) - - [Enabling Resource Groups](#topic8) - - [Creating Resource Groups](#topic10) - - [Configuring Automatic Query Termination Based on Memory Usage](#topic_jlz_hzg_pkb) - - [Assigning a Resource Group to a Role](#topic17) -- [Monitoring Resource Group Status](#topic22) -- [Moving a Query to a Different Resource Group](#moverg) -- [Resource Group Frequently Asked Questions](#topic777999) - **Parent topic:** [Managing Resources](wlmgmt.html) ## Understanding Role and Component Resource Groups @@ -282,29 +264,17 @@ Refer to the [Greenplum Command Center documentation](http://docs.vmware.com/en/ If you use RedHat 6 and the performance with resource groups is acceptable for your use case, upgrade your kernel to version 2.6.32-696 or higher to benefit from other fixes to the cgroups implementation. -### Prerequisite +### Prerequisites Greenplum Database resource groups use Linux Control Groups \(cgroups\) to manage CPU resources. Greenplum Database also uses cgroups to manage memory for resource groups for external components. With cgroups, Greenplum isolates the CPU and external component memory usage of your Greenplum processes from other processes on the node. This allows Greenplum to support CPU and external component memory usage restrictions on a per-resource-group basis. -> **Note** Redhat 8.x supports two versions of cgroups: cgroup v1 and cgroup v2. Greenplum Database only supports cgroup v1. Follow the steps below to make sure that your system is mounting the `cgroups-v1` filesystem at startup. +> **Note** Redhat 8.x/9.x supports two versions of cgroups: cgroup v1 and cgroup v2. Greenplum Database only supports cgroup v1. Follow the steps below to make sure that your system is mounting the `cgroups-v1` filesystem at startup. For detailed information about cgroups, refer to the Control Groups documentation for your Linux distribution. Complete the following tasks on each node in your Greenplum Database cluster to set up cgroups for use with resource groups: -1. If not already installed, install the Control Groups operating system package on each Greenplum Database node. The command that you run to perform this task will differ based on the operating system installed on the node. You must be the superuser or have `sudo` access to run the command: - - Redhat/CentOS 7.x/8.x systems: - - ``` - sudo yum install libcgroup-tools - ``` - - Redhat/CentOS 6.x systems: - - ``` - sudo yum install libcgroup - ``` - -1. If you are using Redhat 8.x, make sure that you configured the system to mount the `cgroups-v1` filesystem by default during system boot by running the following command: +1. If you are using Redhat 8.x/9.x, make sure that you configured the system to mount the `cgroups-v1` filesystem by default during system boot by running the following command: ``` stat -fc %T /sys/fs/cgroup/ @@ -325,14 +295,19 @@ Complete the following tasks on each node in your Greenplum Database cluster to Reboot the system for the changes to take effect. +1. Create the required cgroup hierarchies on each Greenplum Database node. Since the hierarchies are cleaned when the operating system rebooted, a service is applied to recreate them automatically on boot. Follow the below steps based on your operating system version. + +#### Redhat/CentOS 6.x/7.x/8.x + +These operating systems include the `libcgroup-tools` package (for Redhat/CentOS 7.x/8.x) or `libcgroup` (for Redhat/CentOS 6.x) 1. Locate the cgroups configuration file `/etc/cgconfig.conf`. You must be the superuser or have `sudo` access to edit this file: ``` - sudo vi /etc/cgconfig.conf + vi /etc/cgconfig.conf ``` -2. Add the following configuration information to the file: +1. Add the following configuration information to the file: ``` group gpdb { @@ -359,19 +334,38 @@ Complete the following tasks on each node in your Greenplum Database cluster to This content configures CPU, CPU accounting, CPU core set, and memory control groups managed by the `gpadmin` user. Greenplum Database uses the memory control group only for those resource groups created with the `cgroup` `MEMORY_AUDITOR`. -3. Start the cgroups service on each Greenplum Database node. The command that you run to perform this task will differ based on the operating system installed on the node. You must be the superuser or have `sudo` access to run the command: +1. Start the cgroups service on each Greenplum Database node. You must be the superuser or have `sudo` access to run the command: + - Redhat/CentOS 7.x/8.x systems: + + ``` + cgconfigparser -l /etc/cgconfig.conf + ``` + - Redhat/CentOS 6.x systems: + + ``` + service cgconfig start + ``` + +1. To automatically recreate Greenplum Database required cgroup hierarchies and parameters when your system is restarted, configure your system to enable the Linux cgroup service daemon `cgconfig.service` \(Redhat/CentOS 7.x/8.x\) or `cgconfig` \(Redhat/CentOS 6.x\) at node start-up. To ensure the configuration is persistent after reboot, run the following commands as user root: + - Redhat/CentOS 7.x/8.x systems: ``` - sudo cgconfigparser -l /etc/cgconfig.conf + systemctl enable cgconfig.service + ``` + + To start the service immediately \(without having to reboot\) enter: + + ``` + systemctl start cgconfig.service ``` - Redhat/CentOS 6.x systems: ``` - sudo service cgconfig start + chkconfig cgconfig on ``` -4. Identify the `cgroup` directory mount point for the node: +1. Identify the `cgroup` directory mount point for the node: ``` grep cgroup /proc/mounts @@ -379,7 +373,7 @@ Complete the following tasks on each node in your Greenplum Database cluster to The first line of output identifies the `cgroup` mount point. -5. Verify that you set up the Greenplum Database cgroups configuration correctly by running the following commands. Replace \ with the mount point that you identified in the previous step: +1. Verify that you set up the Greenplum Database cgroups configuration correctly by running the following commands. Replace \ with the mount point that you identified in the previous step: ``` ls -l /cpu/gpdb @@ -390,26 +384,41 @@ Complete the following tasks on each node in your Greenplum Database cluster to If these directories exist and are owned by `gpadmin:gpadmin`, you have successfully configured cgroups for Greenplum Database CPU resource management. -6. To automatically recreate Greenplum Database required cgroup hierarchies and parameters when your system is restarted, configure your system to enable the Linux cgroup service daemon `cgconfig.service` \(Redhat/CentOS 7.x/8.x\) or `cgconfig` \(Redhat/CentOS 6.x\) at node start-up. For example, configure one of the following cgroup service commands in your preferred service auto-start tool: - - Redhat/CentOS 7.x/8.x systems: - - ``` - sudo systemctl enable cgconfig.service - ``` - - To start the service immediately \(without having to reboot\) enter: - - ``` - sudo systemctl start cgconfig.service - ``` - - Redhat/CentOS 6.x systems: - - ``` - sudo chkconfig cgconfig on - ``` - - You may choose a different method to recreate the Greenplum Database resource group cgroup hierarchies. - +#### Redhat 9.x + +If you are using Redhat 9.x, the `libcgroup` and `libcgroup-tools` packages are not available with the operating system. In this scenario, you must manually create a service that automatically recreates the cgroup hierarchies after a system boot. Add the following bash script for systemd so it runs automatically during system startup. Perform the following steps as user root: + +1. Create `greenplum-cgroup-v1-config.service` + ``` + vim /etc/systemd/system/greenplum-cgroup-v1-config.service + ``` + +2. Write the following content into `greenplum-cgroup-v1-config.service`. If the user is not `gpadmin`, replace it with the appropriate user. + ``` + [Unit] + Description=Greenplum Cgroup v1 Configuration + + [Service] + Type=oneshot + RemainAfterExit=yes + WorkingDirectory=/sys/fs/cgroup + # set up hierarchies only if cgroup v1 mounted + ExecCondition=bash -c '[ xcgroupfs = x$(stat -fc "%%T" /sys/fs/cgroup/memory) ] || exit 1' + ExecStart=bash -ec '\ + for controller in cpu cpuacct cpuset memory;do \ + [ -e $controller/gpdb ] || mkdir $controller/gpdb; \ + chown -R gpadmin:gpadmin $controller/gpdb; \ + done' + + [Install] + WantedBy=basic.target + ``` + +3. Reload systemd daemon and enable the service: + ``` + systemctl daemon-reload + systemctl enable greenplum-cgroup-v1-config.service + ``` ### Procedure From 54f03e9216d1435d8df5c3526afc2c959ce776de Mon Sep 17 00:00:00 2001 From: Lisa Owen Date: Wed, 18 Oct 2023 14:45:06 -0600 Subject: [PATCH 044/106] docs - clarify fillfactor valid for heap tables only (6x) (#16617) * docs - clarify fillfactor valid for heap tables only (6x) * unrelated edits requested by david --- .../markdown/ref_guide/sql_commands/ALTER_TABLE.html.md | 8 +++++++- .../markdown/ref_guide/sql_commands/CREATE_TABLE.html.md | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/gpdb-doc/markdown/ref_guide/sql_commands/ALTER_TABLE.html.md b/gpdb-doc/markdown/ref_guide/sql_commands/ALTER_TABLE.html.md index c86c0e209166..c070049b2c26 100644 --- a/gpdb-doc/markdown/ref_guide/sql_commands/ALTER_TABLE.html.md +++ b/gpdb-doc/markdown/ref_guide/sql_commands/ALTER_TABLE.html.md @@ -210,9 +210,12 @@ where storage\_parameter is: - **SET WITHOUT OIDS** — Removes the OID system column from the table. - > **Caution** VMware does not support using `SET WITH OIDS` or `oids=TRUE` to assign an OID system column.On large tables, such as those in a typical Greenplum Database system, using OIDs for table rows can cause wrap-around of the 32-bit OID counter. Once the counter wraps around, OIDs can no longer be assumed to be unique, which not only makes them useless to user applications, but can also cause problems in the Greenplum Database system catalog tables. In addition, excluding OIDs from a table reduces the space required to store the table on disk by 4 bytes per row, slightly improving performance. You cannot create OIDS on a partitioned or column-oriented table \(an error is displayed\). This syntax is deprecated and will be removed in a future Greenplum release. + You cannot create OIDS on a partitioned or column-oriented table \(an error is displayed\). This syntax is deprecated and will be removed in a future Greenplum release. + + > **Caution** VMware does not support using `SET WITH OIDS` or `oids=TRUE` to assign an OID system column. On large tables, such as those in a typical Greenplum Database system, using OIDs for table rows can cause the 32-bit counter to wrap-around. After the counter wraps around, OIDs can no longer be assumed to be unique, which not only makes them useless to user applications, but can also cause problems in the Greenplum Database system catalog tables. In addition, excluding OIDs from a table reduces the space required to store the table on disk by 4 bytes per row, slightly improving performance. - **SET \( FILLFACTOR = value\) / RESET \(FILLFACTOR\)** — Changes the fillfactor for the table. The fillfactor for a table is a percentage between 10 and 100. 100 \(complete packing\) is the default. When a smaller fillfactor is specified, `INSERT` operations pack table pages only to the indicated percentage; the remaining space on each page is reserved for updating rows on that page. This gives `UPDATE` a chance to place the updated copy of a row on the same page as the original, which is more efficient than placing it on a different page. For a table whose entries are never updated, complete packing is the best choice, but in heavily updated tables smaller fillfactors are appropriate. Note that the table contents will not be modified immediately by this command. You will need to rewrite the table to get the desired effects. That can be done with [VACUUM](VACUUM.html) or one of the forms of `ALTER TABLE` that forces a table rewrite. For information about the forms of `ALTER TABLE` that perform a table rewrite, see [Notes](#section5). + - **SET DISTRIBUTED** — Changes the distribution policy of a table. Changing a hash distribution policy, or changing to or from a replicated policy, will cause the table data to be physically redistributed on disk, which can be resource intensive. *Greenplum Database does not permit changing the distribution policy of a writable external table.* - **INHERIT parent\_table / NO INHERIT parent\_table** — Adds or removes the target table as a child of the specified parent table. Queries against the parent will include records of its child table. To be added as a child, the target table must already contain all the same columns as the parent \(it could have additional columns, too\). The columns must have matching data types, and if they have `NOT NULL` constraints in the parent then they must also have `NOT NULL` constraints in the child. There must also be matching child-table constraints for all `CHECK` constraints of the parent, except those marked non-inheritable \(that is, created with `ALTER TABLE ... ADD CONSTRAINT ... NO INHERIT`\) in the parent, which are ignored; all child-table constraints matched must not be marked non-inheritable. Currently `UNIQUE`, `PRIMARY KEY`, and `FOREIGN KEY` constraints are not considered, but this may change in the future. - OF type\_name — This form links the table to a composite type as though `CREATE TABLE OF` had formed it. The table's list of column names and types must precisely match that of the composite type; the presence of an `oid` system column is permitted to differ. The table must not inherit from any other table. These restrictions ensure that `CREATE TABLE OF` would permit an equivalent table definition. @@ -282,6 +285,9 @@ index\_name FILLFACTOR : Set the fillfactor percentage for a table. +: The fillfactor option is valid only for heap tables (`appendoptimized=false`). + + value : The new value for the `FILLFACTOR` parameter, which is a percentage between 10 and 100. 100 is the default. diff --git a/gpdb-doc/markdown/ref_guide/sql_commands/CREATE_TABLE.html.md b/gpdb-doc/markdown/ref_guide/sql_commands/CREATE_TABLE.html.md index 4d707b9b3d37..af12821d6953 100644 --- a/gpdb-doc/markdown/ref_guide/sql_commands/CREATE_TABLE.html.md +++ b/gpdb-doc/markdown/ref_guide/sql_commands/CREATE_TABLE.html.md @@ -387,6 +387,8 @@ WITH \( storage\_parameter=value \) : **fillfactor** — The fillfactor for a table is a percentage between 10 and 100. 100 \(complete packing\) is the default. When a smaller fillfactor is specified, `INSERT` operations pack table pages only to the indicated percentage; the remaining space on each page is reserved for updating rows on that page. This gives `UPDATE` a chance to place the updated copy of a row on the same page as the original, which is more efficient than placing it on a different page. For a table whose entries are never updated, complete packing is the best choice, but in heavily updated tables smaller fillfactors are appropriate. This parameter cannot be set for TOAST tables. +: The fillfactor option is valid only for heap tables (`appendoptimized=FALSE`). + : **analyze_hll_non_part_table** — Set this storage parameter to `true` to force collection of HLL statistics even if the table is not part of a partitioned table. This is useful if the table will be exchanged or added to a partitioned table, so that the table does not need to be re-analyzed. The default is `false`. : **oids=FALSE** — This setting is the default, and it ensures that rows do not have object identifiers assigned to them. VMware does not support using `WITH OIDS` or `oids=TRUE` to assign an OID system column.On large tables, such as those in a typical Greenplum Database system, using OIDs for table rows can cause wrap-around of the 32-bit OID counter. Once the counter wraps around, OIDs can no longer be assumed to be unique, which not only makes them useless to user applications, but can also cause problems in the Greenplum Database system catalog tables. In addition, excluding OIDs from a table reduces the space required to store the table on disk by 4 bytes per row, slightly improving performance. You cannot create OIDS on a partitioned or column-oriented table \(an error is displayed\). This syntax is deprecated and will be removed in a future Greenplum release. From 11d915a908117278a0c2edcc92a63f8826b5bb07 Mon Sep 17 00:00:00 2001 From: Yao Wang Date: Tue, 24 Oct 2023 11:28:13 +0800 Subject: [PATCH 045/106] [6X] Add GUC gp_workfile_compression_overhead_limit for ZSTD buffer (#16510) Basically it's a back port of #16243, plus some other essential changes: 1. Removed the statistic info of max/min file size, and used average file size instead; 2. Included the work file number of not only hash table (for inner table), but also outer table; 3. Updated the test case zlib to compare work file number and compressed work file number to avoid checking accurate file number on different envs (planner vs. orca, 6X vs. 7X). --- src/backend/cdb/cdbvars.c | 6 + src/backend/executor/nodeHash.c | 22 +++ src/backend/executor/nodeHashjoin.c | 24 ++++ src/backend/storage/file/buffile.c | 62 ++++++++- src/backend/utils/misc/guc_gp.c | 11 ++ .../utils/workfile_manager/workfile_mgr.c | 2 + src/include/cdb/cdbvars.h | 1 + src/include/executor/hashjoin.h | 6 + src/include/utils/sync_guc_name.h | 1 + src/include/utils/workfile_mgr.h | 6 + src/test/regress/expected/zlib.out | 131 ++++++++++++++++++ src/test/regress/sql/zlib.sql | 115 +++++++++++++++ 12 files changed, 386 insertions(+), 1 deletion(-) diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c index dd3b57608292..5ec110bc510b 100644 --- a/src/backend/cdb/cdbvars.c +++ b/src/backend/cdb/cdbvars.c @@ -315,6 +315,12 @@ int gp_workfile_limit_per_query = 0; /* Maximum number of workfiles to be created by a query */ int gp_workfile_limit_files_per_query = 0; +/* + * The overhead memory (kB) used by all compressed workfiles of a single + * workfile_set + */ +int gp_workfile_compression_overhead_limit = 0; + /* Gpmon */ bool gp_enable_gpperfmon = false; int gp_gpperfmon_send_interval = 1; diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index ee2e32f9fa60..325af303ad0c 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -62,6 +62,8 @@ ExecHashTableExplainBatches(HashJoinTable hashtable, int ibatch_end, const char *title); +static inline void ResetWorkFileSetStatsInfo(HashJoinTable hashtable); + /* ---------------------------------------------------------------- * ExecHash * @@ -340,6 +342,8 @@ ExecHashTableCreate(HashState *hashState, HashJoinState *hjstate, List *hashOper hashtable->hjstate = hjstate; hashtable->first_pass = true; + ResetWorkFileSetStatsInfo(hashtable); + /* * Create temporary memory contexts in which to keep the hashtable working * storage. See notes in executor/hashjoin.h. @@ -1502,6 +1506,16 @@ ExecHashTableExplainEnd(PlanState *planstate, struct StringInfoData *buf) hashtable->nbatch_outstart, hashtable->nbatch, "Secondary Overflow"); + + appendStringInfo(buf, + "Work file set: %u files (%u compressed), " + "avg file size %lu, " + "compression buffer size %lu bytes \n", + hashtable->workset_num_files, + hashtable->workset_num_files_compressed, + hashtable->workset_avg_file_size, + hashtable->workset_compression_buf_total); + ResetWorkFileSetStatsInfo(hashtable); } /* Report hash chain statistics. */ @@ -2099,3 +2113,11 @@ ExecHashRemoveNextSkewBucket(HashState *hashState, HashJoinTable hashtable) hashtable->spaceUsedSkew = 0; } } + +static inline void ResetWorkFileSetStatsInfo(HashJoinTable hashtable) +{ + hashtable->workset_num_files = 0; + hashtable->workset_num_files_compressed = 0; + hashtable->workset_avg_file_size = 0; + hashtable->workset_compression_buf_total = 0; +} diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index d65daee42ba0..bb0451d2805d 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -63,6 +63,8 @@ static void SpillCurrentBatch(HashJoinState *node); static bool ExecHashJoinReloadHashTable(HashJoinState *hjstate); static void ExecEagerFreeHashJoin(HashJoinState *node); +static inline void SaveWorkFileSetStatsInfo(HashJoinTable hashtable); + /* ---------------------------------------------------------------- * ExecHashJoin * @@ -287,6 +289,16 @@ ExecHashJoin_guts(HashJoinState *node) } else node->hj_JoinState = HJ_NEED_NEW_BATCH; + + /* + * When all the tuples of outer table have been read, + * and we are ready to process the first batch, it means + * a good time to collect statistic info of all temp + * files. + */ + if (hashtable->curbatch == 0) + SaveWorkFileSetStatsInfo(hashtable); + continue; } @@ -1480,4 +1492,16 @@ ExecHashJoinReloadHashTable(HashJoinState *hjstate) return true; } +static inline void SaveWorkFileSetStatsInfo(HashJoinTable hashtable) +{ + workfile_set *work_set = hashtable->work_set; + if (work_set) + { + hashtable->workset_num_files = work_set->num_files; + hashtable->workset_num_files_compressed = work_set->num_files_compressed; + hashtable->workset_avg_file_size = work_set->total_bytes / work_set->num_files; + hashtable->workset_compression_buf_total = work_set->compression_buf_total; + } +} + /* EOF */ diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c index 02ae0c4f3528..e52e43ea1cfb 100644 --- a/src/backend/storage/file/buffile.c +++ b/src/backend/storage/file/buffile.c @@ -132,7 +132,19 @@ struct BufFile /* This holds holds compressed input, during decompression. */ ZSTD_inBuffer compressed_buffer; bool decompression_finished; + + /* Memory usage by ZSTD compression buffer */ + size_t compressed_buffer_size; #endif + + /* + * workfile_set for the files in current buffile. The workfile_set creator + * should take care of the workfile_set's lifecycle. So, no need to call + * workfile_mgr_close_set under the buffile logic. + * If the workfile_set is created in BufFileCreateTemp. The workfile_set + * should get freed once all the files in it are closed in BufFileClose. + */ + workfile_set *work_set; }; /* @@ -175,6 +187,10 @@ makeBufFile(File firstfile) file->maxoffset = 0L; file->buffer = palloc(BLCKSZ); +#ifdef USE_ZSTD + file->compressed_buffer_size = 0; +#endif + return file; } @@ -225,6 +241,7 @@ BufFileCreateTempInSet(workfile_set *work_set, bool interXact) file = makeBufFile(pfile); file->isTemp = true; + file->work_set = work_set; FileSetIsWorkfile(file->file); RegisterFileWithSet(file->file, work_set); @@ -286,6 +303,7 @@ BufFileCreateNamedTemp(const char *fileName, bool interXact, workfile_set *work_ if (work_set) { + file->work_set = work_set; FileSetIsWorkfile(file->file); RegisterFileWithSet(file->file, work_set); } @@ -984,11 +1002,26 @@ bool gp_workfile_compression; /* GUC */ void BufFilePledgeSequential(BufFile *buffile) { + workfile_set *work_set = buffile->work_set; + if (buffile->maxoffset != 0) elog(ERROR, "cannot pledge sequential access to a temporary file after writing it"); - if (gp_workfile_compression) + AssertImply(work_set->compression_buf_total > 0, gp_workfile_compression); + + /* + * If gp_workfile_compression_overhead_limit is 0, it means no limit for + * memory used by compressed work files. Othersize, compress the work file + * only when the used memory size is under the limit. + */ + if (gp_workfile_compression && + (gp_workfile_compression_overhead_limit == 0 || + work_set->compression_buf_total < + gp_workfile_compression_overhead_limit * 1024UL)) + { BufFileStartCompression(buffile); + work_set->num_files_compressed++; + } } /* @@ -1054,6 +1087,7 @@ static void BufFileDumpCompressedBuffer(BufFile *file, const void *buffer, Size nbytes) { ZSTD_inBuffer input; + size_t compressed_buffer_size = 0; file->uncompressed_bytes += nbytes; @@ -1086,6 +1120,32 @@ BufFileDumpCompressedBuffer(BufFile *file, const void *buffer, Size nbytes) file->maxoffset += wrote; } } + + /* + * Calculate the delta of buffer used by ZSTD stream and take it into + * account to work_set->comp_buf_total. + * On GPDB 7X, we call ZSTD API ZSTD_sizeof_CStream() to get the buffer + * size. However, the API is unavaliable on 6X (marked as + * ZSTD_STATIC_LINKING_ONLY) due to different version of ZSTD lib. + * After some experiments, it's proved that the compression buffer size + * per file is pretty stable (about 1.3MB) regard of the temp file size, + * so we simply use the hard-coded value here. + * We may use the API ZSTD_sizeof_CStream() in future if the ZSTD lib + * version is updated on 6X. + */ + + compressed_buffer_size = 1.3 * 1024 * 1024; + + /* + * As ZSTD comments said, the memory usage can evolve (increase or + * decrease) over time. We update work_set->compressed_buffer_size only + * when compressed_buffer_size increases. It means we apply the comp buff + * limit to max ever memory usage and ignore the case of memory decreasing. + */ + if (compressed_buffer_size > file->compressed_buffer_size) + file->work_set->compression_buf_total + += compressed_buffer_size - file->compressed_buffer_size; + file->compressed_buffer_size = compressed_buffer_size; } /* diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 91aaf41b594c..5382348498cd 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -3517,6 +3517,17 @@ struct config_int ConfigureNamesInt_gp[] = NULL, NULL, NULL }, + { + {"gp_workfile_compression_overhead_limit", PGC_USERSET, RESOURCES, + gettext_noop("The overhead memory (kB) limit for all compressed workfiles of a single workfile_set."), + gettext_noop("0 for no limit. Once the limit is hit, the following files will not be compressed."), + GUC_UNIT_KB + }, + &gp_workfile_compression_overhead_limit, + 2048 * 1024, 0, INT_MAX, + NULL, NULL, NULL + }, + { {"gp_workfile_limit_per_segment", PGC_POSTMASTER, RESOURCES, gettext_noop("Maximum disk space (in KB) used for workfiles per segment."), diff --git a/src/backend/utils/workfile_manager/workfile_mgr.c b/src/backend/utils/workfile_manager/workfile_mgr.c index fae74545d7f7..2704c7952a2b 100644 --- a/src/backend/utils/workfile_manager/workfile_mgr.c +++ b/src/backend/utils/workfile_manager/workfile_mgr.c @@ -634,6 +634,8 @@ workfile_mgr_create_set_internal(const char *operator_name, const char *prefix) work_set->total_bytes = 0; work_set->active = true; work_set->pinned = false; + work_set->compression_buf_total = 0; + work_set->num_files_compressed = 0; /* Track all workfile_sets created in current process */ if (!localCtl.initialized) diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index cb5cce1f8ade..b75b0c4cabf0 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -849,6 +849,7 @@ extern int gpperfmon_log_alert_level; extern int gp_workfile_limit_per_segment; extern int gp_workfile_limit_per_query; extern int gp_workfile_limit_files_per_query; +extern int gp_workfile_compression_overhead_limit; extern int gp_workfile_caching_loglevel; extern int gp_sessionstate_loglevel; extern int gp_workfile_bytes_to_checksum; diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index d4b4d86641ac..ffe5f28eb16b 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -208,6 +208,12 @@ typedef struct HashJoinTableData HashJoinState * hjstate; /* reference to the enclosing HashJoinState */ bool first_pass; /* Is this the first pass (pre-rescan) */ + + /* Statistic info of work file set, copied from work_set */ + uint32 workset_num_files; + uint32 workset_num_files_compressed; + uint64 workset_avg_file_size; + uint64 workset_compression_buf_total; } HashJoinTableData; #endif /* HASHJOIN_H */ diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 259986f3d678..a5bd03e67f67 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -81,6 +81,7 @@ "gp_vmem_idle_resource_timeout", "gp_workfile_caching_loglevel", "gp_workfile_compression", + "gp_workfile_compression_overhead_limit", "gp_workfile_limit_files_per_query", "gp_workfile_limit_per_query", "IntervalStyle", diff --git a/src/include/utils/workfile_mgr.h b/src/include/utils/workfile_mgr.h index c31d7332c49e..2c50d52185e8 100644 --- a/src/include/utils/workfile_mgr.h +++ b/src/include/utils/workfile_mgr.h @@ -87,6 +87,12 @@ typedef struct workfile_set /* Used to track workfile_set created in current process */ dlist_node local_node; + + /* Total memory usage by compression buffer */ + uint64 compression_buf_total; + + /* Number of compressed work files */ + uint32 num_files_compressed; } workfile_set; /* Workfile Set operations */ diff --git a/src/test/regress/expected/zlib.out b/src/test/regress/expected/zlib.out index 18f8e3deae08..e74822bcf218 100644 --- a/src/test/regress/expected/zlib.out +++ b/src/test/regress/expected/zlib.out @@ -20,6 +20,36 @@ CREATE TABLE test_zlib_hashjoin (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, INSERT INTO test_zlib_hashjoin SELECT i,i,i,i,i,i,i,i FROM (select generate_series(1, nsegments * 333333) as i from (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; +-- start_ignore +create language plpythonu; +-- end_ignore +-- Check if compressed work file count is limited to file_count_limit +-- If the parameter is_comp_buff_limit is true, it means the comp_workfile_created +-- must be smaller than file_count_limit because some work files are not compressed; +-- If the parameter is_comp_buff_limit is false, it means the comp_workfile_created +-- must be equal to file_count_limit because all work files are compressed. +create or replace function check_workfile_compressed(explain_query text, + is_comp_buff_limit bool) +returns setof int as +$$ +import re +rv = plpy.execute(explain_query) +search_text = 'Work file set' +result = [] +for i in range(len(rv)): + cur_line = rv[i]['QUERY PLAN'] + if search_text.lower() in cur_line.lower(): + p = re.compile('(\d+) files \((\d+) compressed\)') + m = p.search(cur_line) + workfile_created = int(m.group(1)) + comp_workfile_created = int(m.group(2)) + if is_comp_buff_limit: + result.append(int(comp_workfile_created < workfile_created)) + else: + result.append(int(comp_workfile_created == workfile_created)) +return result +$$ +language plpythonu; SET statement_mem=5000; --Fail after workfile creation and before add it to workfile set select gp_inject_fault('workfile_creation_failure', 'reset', 2); @@ -147,3 +177,104 @@ select gp_inject_fault('workfile_creation_failure', 'reset', 2); Success: (1 row) +-- Test gp_workfile_compression_overhead_limit to control the memory limit used by +-- compressed temp file +DROP TABLE IF EXISTS test_zlib_memlimit; +NOTICE: table "test_zlib_memlimit" does not exist, skipping +create table test_zlib_memlimit(a int, b text, c timestamp) distributed by (a); +insert into test_zlib_memlimit select id, 'test ' || id, clock_timestamp() from + (select generate_series(1, nsegments * 30000) as id from + (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; +insert into test_zlib_memlimit select 1,'test', now() from + (select generate_series(1, nsegments * 2000) as id from + (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; +insert into test_zlib_memlimit select id, 'test ' || id, clock_timestamp() from + (select generate_series(1, nsegments * 3000) as id from + (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; +analyze test_zlib_memlimit; +set statement_mem='4500kB'; +set gp_workfile_compression=on; +set gp_workfile_limit_files_per_query=0; +-- Run the query with a large value of gp_workfile_compression_overhead_limit +-- The compressed file number should be equal to total work file number +set gp_workfile_compression_overhead_limit=2048000; +select * from check_workfile_compressed(' +explain (analyze) +with B as (select distinct a+1 as a,b,c from test_zlib_memlimit) +,C as (select distinct a+2 as a,b,c from test_zlib_memlimit) +,D as (select a+3 as a,b,c from test_zlib_memlimit) +,E as (select a+4 as a,b,c from test_zlib_memlimit) +,F as (select (a+5)::text as a,b,c from test_zlib_memlimit) +select count(*) from test_zlib_memlimit A +inner join B on A.a = B.a +inner join C on A.a = C.a +inner join D on A.a = D.a +inner join E on A.a = E.a +inner join F on A.a::text = F.a ;', +false) limit 6; + check_workfile_compressed +--------------------------- + 1 + 1 + 1 + 1 + 1 + 1 +(6 rows) + +-- Run the query with a smaller value of gp_workfile_compression_overhead_limit +-- The compressed file number should be less than total work file number +set gp_workfile_compression_overhead_limit=1000; +select * from check_workfile_compressed(' +explain (analyze) +with B as (select distinct a+1 as a,b,c from test_zlib_memlimit) +,C as (select distinct a+2 as a,b,c from test_zlib_memlimit) +,D as (select a+3 as a,b,c from test_zlib_memlimit) +,E as (select a+4 as a,b,c from test_zlib_memlimit) +,F as (select (a+5)::text as a,b,c from test_zlib_memlimit) +select count(*) from test_zlib_memlimit A +inner join B on A.a = B.a +inner join C on A.a = C.a +inner join D on A.a = D.a +inner join E on A.a = E.a +inner join F on A.a::text = F.a ;', +true) limit 6; + check_workfile_compressed +--------------------------- + 1 + 1 + 1 + 1 + 1 + 1 +(6 rows) + +-- Run the query with gp_workfile_compression_overhead_limit=0, which means +-- no limit +-- The compressed file number should be equal to total work file number +set gp_workfile_compression_overhead_limit=0; +select * from check_workfile_compressed(' +explain (analyze) +with B as (select distinct a+1 as a,b,c from test_zlib_memlimit) +,C as (select distinct a+2 as a,b,c from test_zlib_memlimit) +,D as (select a+3 as a,b,c from test_zlib_memlimit) +,E as (select a+4 as a,b,c from test_zlib_memlimit) +,F as (select (a+5)::text as a,b,c from test_zlib_memlimit) +select count(*) from test_zlib_memlimit A +inner join B on A.a = B.a +inner join C on A.a = C.a +inner join D on A.a = D.a +inner join E on A.a = E.a +inner join F on A.a::text = F.a ;', +false) limit 6; + check_workfile_compressed +--------------------------- + 1 + 1 + 1 + 1 + 1 + 1 +(6 rows) + +DROP TABLE test_zlib_memlimit; diff --git a/src/test/regress/sql/zlib.sql b/src/test/regress/sql/zlib.sql index 97720cf7ed6d..431cd6311244 100644 --- a/src/test/regress/sql/zlib.sql +++ b/src/test/regress/sql/zlib.sql @@ -23,6 +23,38 @@ INSERT INTO test_zlib_hashjoin SELECT i,i,i,i,i,i,i,i FROM (select generate_series(1, nsegments * 333333) as i from (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; +-- start_ignore +create language plpythonu; +-- end_ignore + +-- Check if compressed work file count is limited to file_count_limit +-- If the parameter is_comp_buff_limit is true, it means the comp_workfile_created +-- must be smaller than file_count_limit because some work files are not compressed; +-- If the parameter is_comp_buff_limit is false, it means the comp_workfile_created +-- must be equal to file_count_limit because all work files are compressed. +create or replace function check_workfile_compressed(explain_query text, + is_comp_buff_limit bool) +returns setof int as +$$ +import re +rv = plpy.execute(explain_query) +search_text = 'Work file set' +result = [] +for i in range(len(rv)): + cur_line = rv[i]['QUERY PLAN'] + if search_text.lower() in cur_line.lower(): + p = re.compile('(\d+) files \((\d+) compressed\)') + m = p.search(cur_line) + workfile_created = int(m.group(1)) + comp_workfile_created = int(m.group(2)) + if is_comp_buff_limit: + result.append(int(comp_workfile_created < workfile_created)) + else: + result.append(int(comp_workfile_created == workfile_created)) +return result +$$ +language plpythonu; + SET statement_mem=5000; --Fail after workfile creation and before add it to workfile set @@ -86,3 +118,86 @@ drop table test_zlib; drop table test_zlib_t1; select gp_inject_fault('workfile_creation_failure', 'reset', 2); + +-- Test gp_workfile_compression_overhead_limit to control the memory limit used by +-- compressed temp file + +DROP TABLE IF EXISTS test_zlib_memlimit; +create table test_zlib_memlimit(a int, b text, c timestamp) distributed by (a); +insert into test_zlib_memlimit select id, 'test ' || id, clock_timestamp() from + (select generate_series(1, nsegments * 30000) as id from + (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; +insert into test_zlib_memlimit select 1,'test', now() from + (select generate_series(1, nsegments * 2000) as id from + (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; +insert into test_zlib_memlimit select id, 'test ' || id, clock_timestamp() from + (select generate_series(1, nsegments * 3000) as id from + (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; +analyze test_zlib_memlimit; + +set statement_mem='4500kB'; +set gp_workfile_compression=on; +set gp_workfile_limit_files_per_query=0; + +-- Run the query with a large value of gp_workfile_compression_overhead_limit +-- The compressed file number should be equal to total work file number + +set gp_workfile_compression_overhead_limit=2048000; + +select * from check_workfile_compressed(' +explain (analyze) +with B as (select distinct a+1 as a,b,c from test_zlib_memlimit) +,C as (select distinct a+2 as a,b,c from test_zlib_memlimit) +,D as (select a+3 as a,b,c from test_zlib_memlimit) +,E as (select a+4 as a,b,c from test_zlib_memlimit) +,F as (select (a+5)::text as a,b,c from test_zlib_memlimit) +select count(*) from test_zlib_memlimit A +inner join B on A.a = B.a +inner join C on A.a = C.a +inner join D on A.a = D.a +inner join E on A.a = E.a +inner join F on A.a::text = F.a ;', +false) limit 6; + +-- Run the query with a smaller value of gp_workfile_compression_overhead_limit +-- The compressed file number should be less than total work file number + +set gp_workfile_compression_overhead_limit=1000; + +select * from check_workfile_compressed(' +explain (analyze) +with B as (select distinct a+1 as a,b,c from test_zlib_memlimit) +,C as (select distinct a+2 as a,b,c from test_zlib_memlimit) +,D as (select a+3 as a,b,c from test_zlib_memlimit) +,E as (select a+4 as a,b,c from test_zlib_memlimit) +,F as (select (a+5)::text as a,b,c from test_zlib_memlimit) +select count(*) from test_zlib_memlimit A +inner join B on A.a = B.a +inner join C on A.a = C.a +inner join D on A.a = D.a +inner join E on A.a = E.a +inner join F on A.a::text = F.a ;', +true) limit 6; + +-- Run the query with gp_workfile_compression_overhead_limit=0, which means +-- no limit +-- The compressed file number should be equal to total work file number + +set gp_workfile_compression_overhead_limit=0; + +select * from check_workfile_compressed(' +explain (analyze) +with B as (select distinct a+1 as a,b,c from test_zlib_memlimit) +,C as (select distinct a+2 as a,b,c from test_zlib_memlimit) +,D as (select a+3 as a,b,c from test_zlib_memlimit) +,E as (select a+4 as a,b,c from test_zlib_memlimit) +,F as (select (a+5)::text as a,b,c from test_zlib_memlimit) +select count(*) from test_zlib_memlimit A +inner join B on A.a = B.a +inner join C on A.a = C.a +inner join D on A.a = D.a +inner join E on A.a = E.a +inner join F on A.a::text = F.a ;', +false) limit 6; + +DROP TABLE test_zlib_memlimit; From cb69040a5a71876d40e3b9821bc73c8668201225 Mon Sep 17 00:00:00 2001 From: xuejing zhao <80750564+zxuejing@users.noreply.github.com> Date: Tue, 24 Oct 2023 12:04:08 +0800 Subject: [PATCH 046/106] Handle parallel retrieve cursor errors via timeout mechanism (#16496) Parallel retrieve cursors depended on the RETRIEVE command for detecting errors of the peer QEs, it doesn't work if the error happens on non-root slices like https://github.com/greenplum-db/gpdb/issues/15143. This commit uses SIGALRM (called timeout in GPDB) to fix it. The signal handler intentionally detects if the executor already errors out. The alarm is enabled when a parallel cursor is declared, and disabled when the amount of parallel retrieve cursor is 0. backport from main branch: efedbc21fca3f3b171e9f7aa4180ab55566d7956 --- src/backend/cdb/endpoint/cdbendpointutils.c | 50 ++++++++++++++++++- src/backend/commands/portalcmds.c | 5 ++ src/backend/utils/init/postinit.c | 32 ++++++++++++ src/backend/utils/misc/timeout.c | 16 ++++-- src/backend/utils/mmgr/portalmem.c | 37 ++++++++++++++ src/include/cdb/cdbendpoint.h | 2 + src/include/utils/portal.h | 2 + src/include/utils/timeout.h | 13 ++++- .../status_check.source | 12 +++++ .../status_check.source | 26 ++++++++++ 10 files changed, 189 insertions(+), 6 deletions(-) diff --git a/src/backend/cdb/endpoint/cdbendpointutils.c b/src/backend/cdb/endpoint/cdbendpointutils.c index c484cc81be67..9ed3bcbbc4c5 100644 --- a/src/backend/cdb/endpoint/cdbendpointutils.c +++ b/src/backend/cdb/endpoint/cdbendpointutils.c @@ -24,6 +24,7 @@ #include "cdbendpoint_private.h" #include "cdb/cdbutil.h" #include "cdb/cdbvars.h" +#include "utils/timeout.h" /* @@ -167,4 +168,51 @@ generate_endpoint_name(char *name, const char *cursorName) len += ENDPOINT_NAME_COMMANDID_LEN; name[len] = '\0'; -} \ No newline at end of file +} + +/* + * Check every parallel retrieve cursor status and cancel QEs if it has error. + * + * Also return true if it has error. + */ +bool +gp_check_parallel_retrieve_cursor_error(void) +{ + List *portals; + ListCell *lc; + bool has_error = false; + EState *estate = NULL; + + portals = GetAllParallelRetrieveCursorPortals(); + + foreach(lc, portals) + { + Portal portal = (Portal)lfirst(lc); + + estate = portal->queryDesc->estate; + + if (estate->dispatcherState->primaryResults->errcode) + has_error = true; + else + has_error = cdbdisp_checkForCancel(estate->dispatcherState); + } + + /* free the list to avoid memory leak */ + list_free(portals); + + return has_error; +} + +/* + * Enable the timeout of parallel retrieve cursor check if not yet + */ +void +enable_parallel_retrieve_cursor_check_timeout(void) +{ + if (Gp_role == GP_ROLE_DISPATCH && + !get_timeout_active(GP_PARALLEL_RETRIEVE_CURSOR_CHECK_TIMEOUT)) + { + enable_timeout_after(GP_PARALLEL_RETRIEVE_CURSOR_CHECK_TIMEOUT, + GP_PARALLEL_RETRIEVE_CURSOR_CHECK_PERIOD_MS); + } +} diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c index 565a803e1475..1dc8ea246e5f 100644 --- a/src/backend/commands/portalcmds.c +++ b/src/backend/commands/portalcmds.c @@ -191,8 +191,13 @@ PerformCursorOpen(PlannedStmt *stmt, ParamListInfo params, Assert(portal->strategy == PORTAL_ONE_SELECT); if (PortalIsParallelRetrieveCursor(portal)) + { WaitEndpointsReady(portal->queryDesc->estate); + /* Enable the check error timer if the alarm is not active */ + enable_parallel_retrieve_cursor_check_timeout(); + } + /* * We're done; the query won't actually be run until PerformPortalFetch is * called. diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 923b7fc1fa8b..5df37c57a02c 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -90,6 +90,7 @@ static void CheckMyDatabase(const char *name, bool am_superuser); static void InitCommunication(void); static void ShutdownPostgres(int code, Datum arg); static void StatementTimeoutHandler(void); +static void GpParallelRetrieveCursorCheckTimeoutHandler(void); static void LockTimeoutHandler(void); static void ClientCheckTimeoutHandler(void); static bool ThereIsAtLeastOneRole(void); @@ -686,6 +687,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, { RegisterTimeout(DEADLOCK_TIMEOUT, CheckDeadLock); RegisterTimeout(STATEMENT_TIMEOUT, StatementTimeoutHandler); + RegisterTimeout(GP_PARALLEL_RETRIEVE_CURSOR_CHECK_TIMEOUT, GpParallelRetrieveCursorCheckTimeoutHandler); RegisterTimeout(LOCK_TIMEOUT, LockTimeoutHandler); RegisterTimeout(GANG_TIMEOUT, IdleGangTimeoutHandler); RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler); @@ -1438,6 +1440,36 @@ StatementTimeoutHandler(void) #endif kill(MyProcPid, SIGINT); } +extern bool DoingCommandRead; +static void +GpParallelRetrieveCursorCheckTimeoutHandler(void) +{ + /* + * issue: https://github.com/greenplum-db/gpdb/issues/15143 + * + * handle errors of parallel retrieve cursor's non-root slices + */ + if (DoingCommandRead) + { + Assert(Gp_role == GP_ROLE_DISPATCH); + + /* It calls cdbdisp_checkForCancel(), which doesn't raise error */ + gp_check_parallel_retrieve_cursor_error(); + int num = GetNumOfParallelRetrieveCursors(); + + /* Reset the alarm to check after a timeout */ + if (num > 0) + { + elog(DEBUG1, "There are still %d parallel retrieve cursors alive", num); + enable_parallel_retrieve_cursor_check_timeout(); + } + } + else + { + elog(DEBUG1, "DoingCommandRead is false, check parallel cursor timeout delay"); + enable_parallel_retrieve_cursor_check_timeout(); + } +} /* * LOCK_TIMEOUT handler: trigger a query-cancel interrupt. diff --git a/src/backend/utils/misc/timeout.c b/src/backend/utils/misc/timeout.c index 78faeb6a3545..20d82410126d 100644 --- a/src/backend/utils/misc/timeout.c +++ b/src/backend/utils/misc/timeout.c @@ -417,13 +417,23 @@ RegisterTimeout(TimeoutId id, timeout_handler_proc handler) /* There's no need to disable the signal handler here. */ - if (id >= USER_TIMEOUT) + /* + * GP_ABI_BUMP_FIXME + * + * all the GP_PARALLEL_RETRIEVE_CURSOR_CHECK_TIMEOUT here were MAX_TIMEOUTS, + * we did the change to avoid ABI break via putting the + * GP_PARALLEL_RETRIEVE_CURSOR_CHECK_TIMEOUT after the reserved + * USER_TIMEOUTs and before MAX_TIMEOUTS. + * + * restore to the original shape once we are fine to bump the ABI version. + */ + if (id >= USER_TIMEOUT && id < GP_PARALLEL_RETRIEVE_CURSOR_CHECK_TIMEOUT) { /* Allocate a user-defined timeout reason */ - for (id = USER_TIMEOUT; id < MAX_TIMEOUTS; id++) + for (id = USER_TIMEOUT; id < GP_PARALLEL_RETRIEVE_CURSOR_CHECK_TIMEOUT; id++) if (all_timeouts[id].timeout_handler == NULL) break; - if (id >= MAX_TIMEOUTS) + if (id >= GP_PARALLEL_RETRIEVE_CURSOR_CHECK_TIMEOUT) ereport(FATAL, (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), errmsg("cannot add more timeout reasons"))); diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c index 620c9bfafb92..08a41f3d0bec 100644 --- a/src/backend/utils/mmgr/portalmem.c +++ b/src/backend/utils/mmgr/portalmem.c @@ -1282,3 +1282,40 @@ ThereAreNoReadyPortals(void) return true; } + +/* Find all Parallel Retrieve cursors and return a list of Portals */ +List * +GetAllParallelRetrieveCursorPortals(void) +{ + List *portals; + PortalHashEnt *hentry; + HASH_SEQ_STATUS status; + + if (PortalHashTable == NULL) + return NULL; + + portals = NULL; + hash_seq_init(&status, PortalHashTable); + while ((hentry = hash_seq_search(&status)) != NULL) + { + if (PortalIsParallelRetrieveCursor(hentry->portal) && + hentry->portal->queryDesc != NULL) + portals = lappend(portals, hentry->portal); + } + + return portals; +} + +/* Return the amount of parallel retrieve cursors */ +int +GetNumOfParallelRetrieveCursors(void) +{ + List *portals; + int sum; + + portals = GetAllParallelRetrieveCursorPortals(); + sum = list_length(portals); + + list_free(portals); + return sum; +} diff --git a/src/include/cdb/cdbendpoint.h b/src/include/cdb/cdbendpoint.h index b9fd6d74353d..8be13ff746af 100644 --- a/src/include/cdb/cdbendpoint.h +++ b/src/include/cdb/cdbendpoint.h @@ -140,6 +140,8 @@ extern enum EndPointExecPosition GetParallelCursorEndpointPosition(PlannedStmt * extern void WaitEndpointsReady(EState *estate); extern void AtAbort_EndpointExecState(void); extern void allocEndpointExecState(void); +extern bool gp_check_parallel_retrieve_cursor_error(void); +extern void enable_parallel_retrieve_cursor_check_timeout(void); /* * Below functions should run on Endpoints(QE/Entry DB). diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h index 64b503996889..ebe7c61d735d 100644 --- a/src/include/utils/portal.h +++ b/src/include/utils/portal.h @@ -253,5 +253,7 @@ extern bool ThereAreNoReadyPortals(void); extern void AtExitCleanup_ResPortals(void); extern void TotalResPortalIncrements(int pid, Oid queueid, Cost *totalIncrements, int *num); +extern List *GetAllParallelRetrieveCursorPortals(void); +extern int GetNumOfParallelRetrieveCursors(void); #endif /* PORTAL_H */ diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h index fef38e5539de..cf987128399c 100644 --- a/src/include/utils/timeout.h +++ b/src/include/utils/timeout.h @@ -16,6 +16,9 @@ #include "datatype/timestamp.h" +/* GPDB: the period of parallel retrieve cursor check */ +#define GP_PARALLEL_RETRIEVE_CURSOR_CHECK_PERIOD_MS (10000) + /* * Identifiers for timeout reasons. Note that in case multiple timeouts * trigger at the same time, they are serviced in the order of this enum. @@ -33,8 +36,14 @@ typedef enum TimeoutId CLIENT_CONNECTION_CHECK_TIMEOUT, /* First user-definable timeout reason */ USER_TIMEOUT, - /* Maximum number of timeout reasons */ - MAX_TIMEOUTS = 16 + /* + * GP_ABI_BUMP_FIXME + * To not break ABI, we have to reserve the timeouts from the **original** + * USER_TIMEOUT (included) and the **original** MAX_TIMEOUTS, [9, 16) in + * this case. + */ + GP_PARALLEL_RETRIEVE_CURSOR_CHECK_TIMEOUT = 16, + MAX_TIMEOUTS } TimeoutId; /* callback function signature */ diff --git a/src/test/isolation2/input/parallel_retrieve_cursor/status_check.source b/src/test/isolation2/input/parallel_retrieve_cursor/status_check.source index 92719a0c9ac9..c9d55e859c25 100644 --- a/src/test/isolation2/input/parallel_retrieve_cursor/status_check.source +++ b/src/test/isolation2/input/parallel_retrieve_cursor/status_check.source @@ -256,3 +256,15 @@ insert into t1 select generate_series(1,100); 2: CLOSE c8; 2: END; +---------- Test9: Test parallel retrieve cursor auto-check +1: drop table if exists t1; +1: create table t1(a int, b int); +1: insert into t1 values (generate_series(1,100000), 1); +1: insert into t1 values (-1, 1); +1: BEGIN; +1: DECLARE c9 PARALLEL RETRIEVE CURSOR FOR select count(*) from t1 group by sqrt(a); select count() from gp_get_endpoints(); +-- GP_PARALLEL_RETRIEVE_CURSOR_CHECK_TIMEOUT is 10s, we sleep 12 to check all QEs are already finished. +1: ! sleep 12; +1: SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c9'; +1: rollback; +1q: diff --git a/src/test/isolation2/output/parallel_retrieve_cursor/status_check.source b/src/test/isolation2/output/parallel_retrieve_cursor/status_check.source index 393822f7cf30..49788ae5f841 100644 --- a/src/test/isolation2/output/parallel_retrieve_cursor/status_check.source +++ b/src/test/isolation2/output/parallel_retrieve_cursor/status_check.source @@ -1392,3 +1392,29 @@ CLOSE 2: END; END +---------- Test9: Test parallel retrieve cursor auto-check +1: drop table if exists t1; +DROP +1: create table t1(a int, b int); +CREATE +1: insert into t1 values (generate_series(1,100000), 1); +INSERT 100000 +1: insert into t1 values (-1, 1); +INSERT 1 +1: BEGIN; +BEGIN +1: DECLARE c9 PARALLEL RETRIEVE CURSOR FOR select count(*) from t1 group by sqrt(a); select count() from gp_get_endpoints(); + count +------- + 3 +(1 row) +-- GP_PARALLEL_RETRIEVE_CURSOR_CHECK_TIMEOUT is 10s, we sleep 12 to check all QEs are already finished. +1: ! sleep 12; + +1: SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c9'; + endpointname | auth_token | hostname | port | state +--------------+------------+----------+------+------- +(0 rows) +1: rollback; +ERROR: cannot take square root of a negative number (seg2 slice1 127.0.1.1:6004 pid=83657) +1q: ... From 924f616bd8e8ca598926083a9b592aab50826b07 Mon Sep 17 00:00:00 2001 From: Marbin Tan Date: Wed, 4 Oct 2023 10:13:54 -0700 Subject: [PATCH 047/106] Fix long running execution for bitmap index When concurrently inserting to a table and then creating a bitmap index, there is a possibility that a select on that table will take a very long time to finish. The symptom shows 100% CPU utilization and the explain query may take an hour to complete. For more information see: https://github.com/greenplum-db/gpdb/issues/15389 Refactor the code such that it doesn't do anymore busy work. The previous implementation does a while loop of each BM_HRL_WORD_SIZE per fill. If the startTid and nextTid are extremely far apart with a large fillLength, then the while loop will take forever to finish. Since all we're doing in the while loop is advancing values, then this could be computed in a single pass. The change should cut down the explain query time from hours into milliseconds. This is a cherry-pick from 6cb6d03585e. Modified test to not use optimizer = on; it looks like 6X can trigger BitmapAnd without ORCA. Reviewed-by: Huansong Fu --- src/backend/access/bitmap/bitmaputil.c | 44 ++++++++-------- .../expected/bitmap_index_concurrent.out | 51 +++++++++++++++++++ .../sql/bitmap_index_concurrent.sql | 43 ++++++++++++++++ 3 files changed, 118 insertions(+), 20 deletions(-) diff --git a/src/backend/access/bitmap/bitmaputil.c b/src/backend/access/bitmap/bitmaputil.c index a0e73bf424d4..7ee30fdcc25f 100644 --- a/src/backend/access/bitmap/bitmaputil.c +++ b/src/backend/access/bitmap/bitmaputil.c @@ -405,32 +405,36 @@ _bitmap_catchup_to_next_tid(BMBatchWords *words, BMIterateResult *result) /* reset next tid to skip all empty words */ if (words->firstTid > result->nextTid) result->nextTid = words->firstTid; + continue; } - else + + if (fillLength > 0) { - while (fillLength > 0 && words->firstTid < result->nextTid) - { - /* update fill word to reflect expansion */ - words->cwords[result->lastScanWordNo]--; - words->firstTid += BM_HRL_WORD_SIZE; - fillLength--; - } + /* update fill word to reflect expansion */ - /* comsume all the fill words, try to fetch next words */ - if (fillLength == 0) - { - words->nwords--; - continue; - } + uint64 fillToUse = (result->nextTid - words->firstTid) / BM_HRL_WORD_SIZE + 1; + if (fillToUse > fillLength) + fillToUse = fillLength; - /* - * Catch up the next tid to search, but there still fill words. - * Return current state. - */ - if (words->firstTid >= result->nextTid) - return; + words->cwords[result->lastScanWordNo] -= fillToUse; + words->firstTid += fillToUse * BM_HRL_WORD_SIZE; + fillLength -= fillToUse; } + + /* comsume all the fill words, try to fetch next words */ + if (fillLength == 0) + { + words->nwords--; + continue; + } + + /* + * Catch up the next tid to search, but there still fill words. + * Return current state. + */ + if (words->firstTid >= result->nextTid) + return; } else { diff --git a/src/test/isolation2/expected/bitmap_index_concurrent.out b/src/test/isolation2/expected/bitmap_index_concurrent.out index 4cccce527a45..281ad02fe627 100644 --- a/src/test/isolation2/expected/bitmap_index_concurrent.out +++ b/src/test/isolation2/expected/bitmap_index_concurrent.out @@ -345,3 +345,54 @@ SELECT count(*) FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0; 6401 (1 row) +-- Regression test, when large amount of inserts concurrent inserts happen, +-- querying the table shouldn't take along time. +-- This test is from https://github.com/greenplum-db/gpdb/issues/15389 +DROP TABLE IF EXISTS bug.let_me_out; +DROP +DROP SCHEMA IF EXISTS bug; +DROP +CREATE SCHEMA bug; +CREATE +CREATE TABLE bug.let_me_out ( date_column date NULL, int_column int4 NULL ) WITH (appendonly = true, orientation = column) distributed randomly; +CREATE + +1&: INSERT INTO bug.let_me_out(date_column, int_column) SELECT ('2017-01-01'::timestamp + random() * ('2023-08-10'::timestamp - '2017-01-01'::timestamp))::date AS date_column, id / 50000 AS int_column -- id % 700 as int_column FROM generate_series(1, 30000000) s(id); + +2&: INSERT INTO bug.let_me_out(date_column, int_column) SELECT ('2017-01-01'::timestamp + random() * ('2023-08-10'::timestamp - '2017-01-01'::timestamp))::date AS date_column, id / 50000 AS int_column -- id % 700 as int_column FROM generate_series(30000000, 50000000) s(id); + +1<: <... completed> +INSERT 30000000 +2<: <... completed> +INSERT 20000001 + +CREATE INDEX idx_let_me_out__date_column ON bug.let_me_out USING bitmap (date_column); +CREATE +CREATE INDEX idx_let_me_out__int_column ON bug.let_me_out USING bitmap (int_column); +CREATE +VACUUM FULL ANALYZE bug.let_me_out; +VACUUM + +SET random_page_cost = 1; +SET +-- expected to finish under 250ms, but if we go over 60000, then something really bad happened +SET statement_timeout=60000; +SET +EXPLAIN ANALYZE SELECT date_column, int_column FROM bug.let_me_out WHERE date_column in ('2023-03-19', '2023-03-08', '2023-03-13', '2023-03-29', '2023-03-20', '2023-03-28', '2023-03-23', '2023-03-04', '2023-03-05', '2023-03-18', '2023-03-14', '2023-03-06', '2023-03-15', '2023-03-31', '2023-03-11', '2023-03-21', '2023-03-24', '2023-03-30', '2023-03-26', '2023-03-03', '2023-03-22', '2023-03-01', '2023-03-12', '2023-03-17', '2023-03-27', '2023-03-07', '2023-03-16', '2023-03-10', '2023-03-25', '2023-03-09', '2023-03-02') AND int_column IN (1003,1025,1026,1033,1034,1216,1221,160,161,1780,3049,305,3051,3052,3069,3077,3083,3084,3092,3121,3122,3123,3124,3180,3182,3183,3184,3193,3225,3226,3227,3228,3234,3267,3269,3270,3271,3272,3277,3301,3302,3303,3305,3307,3308,3310,3314,3317,3318,3319,3320,3321,3343,3344,3345,3347,3348,3388,339,341,345,346,347,349,3522,3565,3606,3607,3610,3612,3613,3637,3695,3738,3739,3740,3741,3742,3764,3829,3859,3861,3864,3865,3866,3867,3870,3871,3948,3967,3969,3971,3974,3975,3976,4043,4059,4061,4062,4064,4065,4069,4070,4145,42,423,4269,43,4300,4303,4308,4311,4312,4313,4361,4449,445,446,4475,4476,4479,4480,4483,4485,4486,450,4581,4609,4610,4611,4613,4614,4685,4707,4708,4709,4710,4799,4800,4825,4831,4832,4905,4940,4941,4942,4945,4947,4948,4953,4954,4957,540,572,627,743,762,763,77,787,80,81,84,871,899,901,902,905,906); + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=67739.00..108455.94 rows=270079 width=8) (actual time=32.410..123.035 rows=20163 loops=1) + -> Bitmap Heap Scan on let_me_out (cost=67739.00..108455.94 rows=90027 width=8) (actual time=33.937..119.569 rows=6800 loops=1) + Recheck Cond: ((date_column = ANY ('{03-19-2023,03-08-2023,03-13-2023,03-29-2023,03-20-2023,03-28-2023,03-23-2023,03-04-2023,03-05-2023,03-18-2023,03-14-2023,03-06-2023,03-15-2023,03-31-2023,03-11-2023,03-21-2023,03-24-2023,03-30-2023,03-26-2023,03-03-2023,03-22-2023,03-01-2023,03-12-2023,03-17-2023,03-27-2023,03-07-2023,03-16-2023,03-10-2023,03-25-2023,03-09-2023,03-02-2023}'::date[])) AND (int_column = ANY ('{1003,1025,1026,1033,1034,1216,1221,160,161,1780,3049,305,3051,3052,3069,3077,3083,3084,3092,3121,3122,3123,3124,3180,3182,3183,3184,3193,3225,3226,3227,3228,3234,3267,3269,3270,3271,3272,3277,3301,3302,3303,3305,3307,3308,3310,3314,3317,3318,3319,3320,3321,3343,3344,3345,3347,3348,3388,339,341,345,346,347,349,3522,3565,3606,3607,3610,3612,3613,3637,3695,3738,3739,3740,3741,3742,3764,3829,3859,3861,3864,3865,3866,3867,3870,3871,3948,3967,3969,3971,3974,3975,3976,4043,4059,4061,4062,4064,4065,4069,4070,4145,42,423,4269,43,4300,4303,4308,4311,4312,4313,4361,4449,445,446,4475,4476,4479,4480,4483,4485,4486,450,4581,4609,4610,4611,4613,4614,4685,4707,4708,4709,4710,4799,4800,4825,4831,4832,4905,4940,4941,4942,4945,4947,4948,4953,4954,4957,540,572,627,743,762,763,77,787,80,81,84,871,899,901,902,905,906}'::integer[]))) + -> BitmapAnd (cost=67739.00..67739.00 rows=36530 width=0) (actual time=17.288..17.288 rows=1 loops=1) + -> Bitmap Index Scan on idx_let_me_out__date_column (cost=0.00..5393.04 rows=221868 width=0) (actual time=7.834..7.834 rows=31 loops=1) + Index Cond: (date_column = ANY ('{03-19-2023,03-08-2023,03-13-2023,03-29-2023,03-20-2023,03-28-2023,03-23-2023,03-04-2023,03-05-2023,03-18-2023,03-14-2023,03-06-2023,03-15-2023,03-31-2023,03-11-2023,03-21-2023,03-24-2023,03-30-2023,03-26-2023,03-03-2023,03-22-2023,03-01-2023,03-12-2023,03-17-2023,03-27-2023,03-07-2023,03-16-2023,03-10-2023,03-25-2023,03-09-2023,03-02-2023}'::date[])) + -> Bitmap Index Scan on idx_let_me_out__int_column (cost=0.00..62210.67 rows=2744086 width=0) (actual time=9.449..9.449 rows=169 loops=1) + Index Cond: (int_column = ANY ('{1003,1025,1026,1033,1034,1216,1221,160,161,1780,3049,305,3051,3052,3069,3077,3083,3084,3092,3121,3122,3123,3124,3180,3182,3183,3184,3193,3225,3226,3227,3228,3234,3267,3269,3270,3271,3272,3277,3301,3302,3303,3305,3307,3308,3310,3314,3317,3318,3319,3320,3321,3343,3344,3345,3347,3348,3388,339,341,345,346,347,349,3522,3565,3606,3607,3610,3612,3613,3637,3695,3738,3739,3740,3741,3742,3764,3829,3859,3861,3864,3865,3866,3867,3870,3871,3948,3967,3969,3971,3974,3975,3976,4043,4059,4061,4062,4064,4065,4069,4070,4145,42,423,4269,43,4300,4303,4308,4311,4312,4313,4361,4449,445,446,4475,4476,4479,4480,4483,4485,4486,450,4581,4609,4610,4611,4613,4614,4685,4707,4708,4709,4710,4799,4800,4825,4831,4832,4905,4940,4941,4942,4945,4947,4948,4953,4954,4957,540,572,627,743,762,763,77,787,80,81,84,871,899,901,902,905,906}'::integer[])) + Planning time: 11.073 ms + (slice0) Executor memory: 119K bytes. + (slice1) Executor memory: 49521K bytes avg x 3 workers, 49521K bytes max (seg0). + Memory used: 128000kB + Optimizer: Postgres query optimizer + Execution time: 126.450 ms +(14 rows) diff --git a/src/test/isolation2/sql/bitmap_index_concurrent.sql b/src/test/isolation2/sql/bitmap_index_concurrent.sql index 0d236da5076a..9db6ded7f095 100644 --- a/src/test/isolation2/sql/bitmap_index_concurrent.sql +++ b/src/test/isolation2/sql/bitmap_index_concurrent.sql @@ -184,3 +184,46 @@ SELECT gp_inject_fault('after_read_one_bitmap_idx_page', 'reset', dbid) FROM gp_ -- Let's check the total tuple count after the test. SELECT count(*) FROM bmupdate WHERE id >= 97 and id <= 99 and gp_segment_id = 0; +-- Regression test, when large amount of inserts concurrent inserts happen, +-- querying the table shouldn't take along time. +-- This test is from https://github.com/greenplum-db/gpdb/issues/15389 +DROP TABLE IF EXISTS bug.let_me_out; +DROP SCHEMA IF EXISTS bug; +CREATE SCHEMA bug; +CREATE TABLE bug.let_me_out +( + date_column date NULL, + int_column int4 NULL +) +WITH (appendonly = true, orientation = column) +distributed randomly; + +1&: INSERT INTO bug.let_me_out(date_column, int_column) + SELECT ('2017-01-01'::timestamp + random() * ('2023-08-10'::timestamp - '2017-01-01'::timestamp))::date AS date_column, + id / 50000 AS int_column + -- id % 700 as int_column + FROM generate_series(1, 30000000) s(id); + +2&: INSERT INTO bug.let_me_out(date_column, int_column) + SELECT ('2017-01-01'::timestamp + random() * ('2023-08-10'::timestamp - '2017-01-01'::timestamp))::date AS date_column, + id / 50000 AS int_column + -- id % 700 as int_column + FROM generate_series(30000000, 50000000) s(id); + +1<: +2<: + +CREATE INDEX idx_let_me_out__date_column ON bug.let_me_out USING bitmap (date_column); +CREATE INDEX idx_let_me_out__int_column ON bug.let_me_out USING bitmap (int_column); +VACUUM FULL ANALYZE bug.let_me_out; + +SET random_page_cost = 1; +-- expected to finish under 250ms, but if we go over 60000, then something really bad happened +SET statement_timeout=60000; +EXPLAIN ANALYZE +SELECT date_column, + int_column +FROM bug.let_me_out +WHERE date_column in ('2023-03-19', '2023-03-08', '2023-03-13', '2023-03-29', '2023-03-20', '2023-03-28', '2023-03-23', '2023-03-04', '2023-03-05', '2023-03-18', '2023-03-14', '2023-03-06', '2023-03-15', '2023-03-31', '2023-03-11', '2023-03-21', '2023-03-24', '2023-03-30', '2023-03-26', '2023-03-03', '2023-03-22', '2023-03-01', '2023-03-12', '2023-03-17', '2023-03-27', '2023-03-07', '2023-03-16', '2023-03-10', '2023-03-25', '2023-03-09', '2023-03-02') +AND +int_column IN (1003,1025,1026,1033,1034,1216,1221,160,161,1780,3049,305,3051,3052,3069,3077,3083,3084,3092,3121,3122,3123,3124,3180,3182,3183,3184,3193,3225,3226,3227,3228,3234,3267,3269,3270,3271,3272,3277,3301,3302,3303,3305,3307,3308,3310,3314,3317,3318,3319,3320,3321,3343,3344,3345,3347,3348,3388,339,341,345,346,347,349,3522,3565,3606,3607,3610,3612,3613,3637,3695,3738,3739,3740,3741,3742,3764,3829,3859,3861,3864,3865,3866,3867,3870,3871,3948,3967,3969,3971,3974,3975,3976,4043,4059,4061,4062,4064,4065,4069,4070,4145,42,423,4269,43,4300,4303,4308,4311,4312,4313,4361,4449,445,446,4475,4476,4479,4480,4483,4485,4486,450,4581,4609,4610,4611,4613,4614,4685,4707,4708,4709,4710,4799,4800,4825,4831,4832,4905,4940,4941,4942,4945,4947,4948,4953,4954,4957,540,572,627,743,762,763,77,787,80,81,84,871,899,901,902,905,906); From 7f3c91f7bc6b4fc417b6a7775d5b67b85e2fec4e Mon Sep 17 00:00:00 2001 From: Marbin Tan Date: Fri, 8 Sep 2023 14:49:18 -0700 Subject: [PATCH 048/106] Remove `getaddrinfo` in `SendDummyPacket()` to address malloc deadlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `SendDummyPacket` eventually calls `getaddrinfo` (which is a reentrant), however, `getaddrinfo` is not an async-signal-safe function. `getaddrinfo` internally calls `malloc`, which is strongly advised to not do within a signal handler as it may cause deadlocks. Cache the accepted socket information for the listener, so that it can be reused in `SendDummyPacket()`. The purpose of `SendDummyPacket` is to exit more quickly; it circumvents the polling that happens, which eventually times out after 250ms. Without `SendDummyPacket()`, there will be multiple test failures since some tests expects the backend connection to terminate almost immediately. To view all the async-signal-safe functions, please view the signal-safety(7) — Linux manual page. ==================== Accommodate for AF_INET6 and AF_INET when doing a motion layer IPC teardown Previously on commit 70306db18e2, we removed pg_getaddrinfo_all for signal handlers. However, in doing so, the capability of supporting both AF_INET6 and AF_INET was lost; this responsibility must now be handled by us. The commit mentioned above fixed the issue for AF_INET (IPv4), but not for AF_INET6 (IPv6). This commit also addresses the situation for both AF_INET and AF_INET6. ==================== This commit backports: 70306db18e2bfac4969bf327f5ada45bf33f57a1 and 1ee34a424548e759e82c93cf9b4fe1411465e0e9. Conflicts resolved: - setupUDPListeningSocket is not one to one with 7X, so resolve conflict on the location where `listenerSockaddr` is `memcpy`ed. - resolve conflicts due to difference of from the modernized version and refactor from 50748b7 Reviewed-by: Soumyadeep Chakraborty --- src/backend/cdb/motion/ic_udpifc.c | 201 ++++++----- src/backend/cdb/motion/test/Makefile | 9 + .../cdb/motion/test/cdbsenddummypacket_test.c | 331 ++++++++++++++++++ 3 files changed, 445 insertions(+), 96 deletions(-) create mode 100644 src/backend/cdb/motion/test/Makefile create mode 100644 src/backend/cdb/motion/test/cdbsenddummypacket_test.c diff --git a/src/backend/cdb/motion/ic_udpifc.c b/src/backend/cdb/motion/ic_udpifc.c index 9cce2e356a6b..07892938875f 100644 --- a/src/backend/cdb/motion/ic_udpifc.c +++ b/src/backend/cdb/motion/ic_udpifc.c @@ -632,6 +632,9 @@ typedef struct ICStatistics /* Statistics for UDP interconnect. */ static ICStatistics ic_statistics; +/* Cached sockaddr of the listening udp socket */ +static struct sockaddr_storage udp_dummy_packet_sockaddr; + /*========================================================================= * STATIC FUNCTIONS declarations */ @@ -653,10 +656,16 @@ static void setRxThreadError(int eno); static void resetRxThreadError(void); static void SendDummyPacket(void); +static void ConvertToIPv4MappedAddr(struct sockaddr_storage *sockaddr, socklen_t *o_len); +#if defined(__darwin__) +#define s6_addr32 __u6_addr.__u6_addr32 +static void ConvertIPv6WildcardToLoopback(struct sockaddr_storage* dest); +#endif static void getSockAddr(struct sockaddr_storage *peer, socklen_t *peer_len, const char *listenerAddr, int listenerPort); static void setXmitSocketOptions(int txfd); static uint32 setSocketBufferSize(int fd, int type, int expectedSize, int leastSize); -static void setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFamily); +static void setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, + int *txFamily, struct sockaddr_storage *listenerSockaddr); static ChunkTransportStateEntry *startOutgoingUDPConnections(ChunkTransportState *transportStates, Slice *sendSlice, int *pOutgoingCount); @@ -1157,7 +1166,7 @@ resetRxThreadError() * Setup udp listening socket. */ static void -setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFamily) +setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFamily, struct sockaddr_storage *listenerSockaddr) { int errnoSave; int fd = -1; @@ -1324,6 +1333,13 @@ setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFami else *listenerPort = ntohs(((struct sockaddr_in *) &our_addr)->sin_port); + /* + * cache the successful sockaddr of the listening socket, so + * we can use this information to connect to the listening socket. + */ + if (listenerSockaddr != NULL) + memcpy(listenerSockaddr, &our_addr, sizeof(struct sockaddr_storage)); + setXmitSocketOptions(fd); return; @@ -1440,8 +1456,8 @@ InitMotionUDPIFC(int *listenerSocketFd, uint16 *listenerPort) /* * setup listening socket and sending socket for Interconnect. */ - setupUDPListeningSocket(listenerSocketFd, listenerPort, &txFamily); - setupUDPListeningSocket(&ICSenderSocket, &ICSenderPort, &ICSenderFamily); + setupUDPListeningSocket(listenerSocketFd, listenerPort, &txFamily, &udp_dummy_packet_sockaddr); + setupUDPListeningSocket(&ICSenderSocket, &ICSenderPort, &ICSenderFamily, NULL); /* Initialize receive control data. */ resetMainThreadWaiting(&rx_control_info.mainWaitingState); @@ -1543,6 +1559,8 @@ CleanupMotionUDPIFC(void) ICSenderPort = 0; ICSenderFamily = 0; + memset(&udp_dummy_packet_sockaddr, 0, sizeof(udp_dummy_packet_sockaddr)); + #ifdef USE_ASSERT_CHECKING /* @@ -2852,30 +2870,8 @@ setupOutgoingUDPConnection(ChunkTransportState *transportStates, ChunkTransportS */ if (pEntry->txfd_family == AF_INET6) { - struct sockaddr_storage temp; - const struct sockaddr_in *in = (const struct sockaddr_in *) &conn->peer; - struct sockaddr_in6 *in6_new = (struct sockaddr_in6 *) &temp; - - memset(&temp, 0, sizeof(temp)); - elog(DEBUG1, "We are inet6, remote is inet. Converting to v4 mapped address."); - - /* Construct a V4-to-6 mapped address. */ - temp.ss_family = AF_INET6; - in6_new->sin6_family = AF_INET6; - in6_new->sin6_port = in->sin_port; - in6_new->sin6_flowinfo = 0; - - memset(&in6_new->sin6_addr, '\0', sizeof(in6_new->sin6_addr)); - /* in6_new->sin6_addr.s6_addr16[5] = 0xffff; */ - ((uint16 *) &in6_new->sin6_addr)[5] = 0xffff; - /* in6_new->sin6_addr.s6_addr32[3] = in->sin_addr.s_addr; */ - memcpy(((char *) &in6_new->sin6_addr) + 12, &(in->sin_addr), 4); - in6_new->sin6_scope_id = 0; - - /* copy it back */ - memcpy(&conn->peer, &temp, sizeof(struct sockaddr_in6)); - conn->peer_len = sizeof(struct sockaddr_in6); + ConvertToIPv4MappedAddr(&conn->peer, &conn->peer_len); } else { @@ -6900,109 +6896,122 @@ WaitInterconnectQuitUDPIFC(void) } /* - * Send a dummy packet to interconnect thread to exit poll() immediately + * If the socket was created AF_INET6, but the address we want to + * send to is IPv4 (AF_INET), we need to change the address + * format. On Linux, this is not necessary: glibc automatically + * handles this. But on MAC OSX and Solaris, we need to convert + * the IPv4 address to IPv4-mapped IPv6 address in AF_INET6 format. + * + * The comment above relies on getaddrinfo() via function getSockAddr to get + * the correct V4-mapped address. We need to be careful here as we need to + * ensure that the platform we are using is POSIX 1003-2001 compliant. + * Just to be on the safeside, we'll be keeping this function for + * now to be used for all platforms and not rely on POSIX. + * + * Since this can be called in a signal handler, we avoid the use of + * async-signal unsafe functions such as memset/memcpy */ static void -SendDummyPacket(void) +ConvertToIPv4MappedAddr(struct sockaddr_storage *sockaddr, socklen_t *o_len) { - int sockfd = -1; - int ret; - struct addrinfo *addrs = NULL; - struct addrinfo *rp; - struct addrinfo hint; - uint16 udp_listener; - char port_str[32] = {0}; - char *dummy_pkt = "stop it"; - int counter; + const struct sockaddr_in *in = (const struct sockaddr_in *) sockaddr; + struct sockaddr_storage temp = {0}; + struct sockaddr_in6 *in6_new = (struct sockaddr_in6 *) &temp; - /* - * Get address info from interconnect udp listener port - */ - udp_listener = (Gp_listener_port >> 16) & 0x0ffff; - snprintf(port_str, sizeof(port_str), "%d", udp_listener); + /* Construct a IPv4-to-IPv6 mapped address. */ + temp.ss_family = AF_INET6; + in6_new->sin6_family = AF_INET6; + in6_new->sin6_port = in->sin_port; + in6_new->sin6_flowinfo = 0; - MemSet(&hint, 0, sizeof(hint)); - hint.ai_socktype = SOCK_DGRAM; - hint.ai_family = AF_UNSPEC; /* Allow for IPv4 or IPv6 */ + ((uint16 *) &in6_new->sin6_addr)[5] = 0xffff; - /* Never do name resolution */ -#ifdef AI_NUMERICSERV - hint.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV; -#else - hint.ai_flags = AI_NUMERICHOST; + in6_new->sin6_addr.s6_addr32[3] = in->sin_addr.s_addr; + in6_new->sin6_scope_id = 0; + + /* copy it back */ + *sockaddr = temp; + *o_len = sizeof(struct sockaddr_in6); +} + +#if defined(__darwin__) +/* macos does not accept :: as the destination, we will need to covert this to the IPv6 loopback */ +static void +ConvertIPv6WildcardToLoopback(struct sockaddr_storage* dest) +{ + char address[INET6_ADDRSTRLEN]; + /* we want to terminate our own process, so this should be local */ + const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *) &udp_dummy_packet_sockaddr; + inet_ntop(AF_INET6, &in6->sin6_addr, address, sizeof(address)); + if (strcmp("::", address) == 0) + ((struct sockaddr_in6 *)dest)->sin6_addr = in6addr_loopback; +} #endif - ret = pg_getaddrinfo_all(interconnect_address, port_str, &hint, &addrs); - if (ret || !addrs) - { - elog(LOG, "send dummy packet failed, pg_getaddrinfo_all(): %m"); - goto send_error; - } +/* + * Send a dummy packet to interconnect thread to exit poll() immediately + */ +static void +SendDummyPacket(void) +{ + int ret; + char *dummy_pkt = "stop it"; + int counter; + struct sockaddr_storage dest; + socklen_t dest_len; - for (rp = addrs; rp != NULL; rp = rp->ai_next) - { - /* Create socket according to pg_getaddrinfo_all() */ - sockfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); - if (sockfd < 0) - continue; + Assert(udp_dummy_packet_sockaddr.ss_family == AF_INET || udp_dummy_packet_sockaddr.ss_family == AF_INET6); + Assert(ICSenderFamily == AF_INET || ICSenderFamily == AF_INET6); - if (!pg_set_noblock(sockfd)) - { - if (sockfd >= 0) - { - closesocket(sockfd); - sockfd = -1; - } - continue; - } - break; + dest = udp_dummy_packet_sockaddr; + dest_len = (ICSenderFamily == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); + +#if defined(__darwin__) + if (ICSenderFamily == AF_INET6) + { +#if defined(__darwin__) + if (udp_dummy_packet_sockaddr.ss_family == AF_INET6) + ConvertIPv6WildcardToLoopback(&dest); +#endif + if (udp_dummy_packet_sockaddr.ss_family == AF_INET) + ConvertToIPv4MappedAddr(&dest, &dest_len); } +#endif - if (rp == NULL) + if (ICSenderFamily == AF_INET && udp_dummy_packet_sockaddr.ss_family == AF_INET6) { - elog(LOG, "send dummy packet failed, create socket failed: %m"); - goto send_error; + /* the size of AF_INET6 is bigger than the side of IPv4, so + * converting from IPv6 to IPv4 may potentially not work. */ + ereport(LOG, (errmsg("sending dummy packet failed: cannot send from AF_INET to receiving on AF_INET6"))); + return; } /* - * Send a dummy package to the interconnect listener, try 10 times + * Send a dummy package to the interconnect listener, try 10 times. + * We don't want to close the socket at the end of this function, since + * the socket will eventually close during the motion layer cleanup. */ - counter = 0; while (counter < 10) { counter++; - ret = sendto(sockfd, dummy_pkt, strlen(dummy_pkt), 0, rp->ai_addr, rp->ai_addrlen); + ret = sendto(ICSenderSocket, dummy_pkt, strlen(dummy_pkt), 0, (struct sockaddr *) &dest, dest_len); if (ret < 0) { if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) continue; else { - elog(LOG, "send dummy packet failed, sendto failed: %m"); - goto send_error; + ereport(LOG, (errmsg("send dummy packet failed, sendto failed: %m"))); + return; } } break; } if (counter >= 10) - { - elog(LOG, "send dummy packet failed, sendto failed: %m"); - goto send_error; - } + ereport(LOG, (errmsg("send dummy packet failed, sendto failed with 10 times: %m"))); - pg_freeaddrinfo_all(hint.ai_family, addrs); - closesocket(sockfd); - return; - -send_error: - - if (addrs) - pg_freeaddrinfo_all(hint.ai_family, addrs); - if (sockfd != -1) - closesocket(sockfd); - return; } uint32 diff --git a/src/backend/cdb/motion/test/Makefile b/src/backend/cdb/motion/test/Makefile new file mode 100644 index 000000000000..878fde1fdd1e --- /dev/null +++ b/src/backend/cdb/motion/test/Makefile @@ -0,0 +1,9 @@ +subdir=src/backend/cdb/motion +top_builddir=../../../../.. +include $(top_builddir)/src/Makefile.global + +TARGETS=cdbsenddummypacket + +include $(top_builddir)/src/backend/mock.mk + +cdbsenddummypacket.t: EXCL_OBJS += src/backend/cdb/motion/ic_udpifc.o diff --git a/src/backend/cdb/motion/test/cdbsenddummypacket_test.c b/src/backend/cdb/motion/test/cdbsenddummypacket_test.c new file mode 100644 index 000000000000..9cde98fc5efd --- /dev/null +++ b/src/backend/cdb/motion/test/cdbsenddummypacket_test.c @@ -0,0 +1,331 @@ +#include +#include +#include +#include +#include "cmockery.h" + +#include "../../motion/ic_udpifc.c" + +bool break_loop = false; + +/* + * PROTOTYPES + */ + +extern ssize_t __real_sendto(int sockfd, const void *buf, size_t len, int flags, + const struct sockaddr *dest_addr, socklen_t addrlen); +int __wrap_errcode(int sqlerrcode); +int __wrap_errdetail(const char *fmt,...); +int __wrap_errmsg(const char *fmt,...); +ssize_t __wrap_sendto(int sockfd, const void *buf, size_t len, int flags, + const struct sockaddr *dest_addr, socklen_t addrlen); +void __wrap_elog_finish(int elevel, const char *fmt,...); +void __wrap_elog_start(const char *filename, int lineno, const char *funcname); +void __wrap_errfinish(int dummy __attribute__((unused)),...); +void __wrap_errstart(int elevel, const char *filename, int lineno, const char *funcname, const char *domain); +void __wrap_write_log(const char *fmt,...); + +/* + * WRAPPERS + */ + +int __wrap_errcode(int sqlerrcode) {return 0;} +int __wrap_errdetail(const char *fmt,...) { return 0; } +int __wrap_errmsg(const char *fmt,...) { return 0; } +void __wrap_elog_start(const char *filename, int lineno, const char *funcname) {} +void __wrap_errfinish(int dummy __attribute__((unused)),...) {} +void __wrap_errstart(int elevel, const char *filename, int lineno, const char *funcname, const char *domain){} + +void __wrap_write_log(const char *fmt,...) +{ + /* check if we actually receive the message that sends the error */ + if (strcmp("Interconnect error: short conn receive (\%d)", fmt) == 0) + break_loop = true; +} + +void __wrap_elog_finish(int elevel, const char *fmt,...) +{ + assert_true(elevel <= LOG); +} + +ssize_t __wrap_sendto(int sockfd, const void *buf, size_t len, int flags, const struct sockaddr *dest_addr, socklen_t addrlen) +{ + assert_true(sockfd != PGINVALID_SOCKET); +#if defined(__darwin__) + /* check to see if we converted the wildcard value to something routeable */ + if (udp_dummy_packet_sockaddr.ss_family == AF_INET6) + { + const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *) dest_addr; + char address[INET6_ADDRSTRLEN]; + inet_ntop(AF_INET6, &in6->sin6_addr, address, sizeof(address)); + /* '::' and '::1' should always be '::1' */ + assert_true(strcmp("::1", address) == 0); + } +#endif + + return __real_sendto(sockfd, buf, len, flags, dest_addr, addrlen); +} + +/* + * HELPER FUNCTIONS + */ + +static void wait_for_receiver(bool should_fail) +{ + int counter = 0; + /* break_loop should be reset at the beginning of each test + * The while loop will end early once __wrap_write_log is called; + * this should happen when the receiver polls the message that + * SendDummyPacket sends. + */ + while(!break_loop) + { + /* we are sleeping for a generous amount of time; we should never + * need this much time. There is something wrong if it takes this long. + * + * expect to fail if the communication is invalid, i.e,. IPv4 to IPv6 + */ + if (counter > 2) + break; + sleep(1); + counter++; + } + + if (should_fail) + assert_true(counter > 2); + else + assert_true(counter < 2); +} + +static void +start_receiver() +{ + pthread_attr_t t_atts; + sigset_t pthread_sigs; + int pthread_err; + + pthread_attr_init(&t_atts); + pthread_attr_setstacksize(&t_atts, Max(PTHREAD_STACK_MIN, (128 * 1024))); + ic_set_pthread_sigmasks(&pthread_sigs); + pthread_err = pthread_create(&ic_control_info.threadHandle, &t_atts, rxThreadFunc, NULL); + ic_reset_pthread_sigmasks(&pthread_sigs); + + pthread_attr_destroy(&t_atts); + if (pthread_err != 0) + { + ic_control_info.threadCreated = false; + printf("failed to create thread"); + fail(); + } + + ic_control_info.threadCreated = true; +} + +static sa_family_t +create_sender_socket(sa_family_t af) +{ + int sockfd = socket(af, + SOCK_DGRAM, + 0); + if (sockfd < 0) + { + printf("send dummy packet failed, create socket failed: %m\n"); + fail(); + return PGINVALID_SOCKET; + } + + if (!pg_set_noblock(sockfd)) + { + if (sockfd >= 0) + { + closesocket(sockfd); + } + printf("send dummy packet failed, setting socket with noblock failed: %m\n"); + fail(); + return PGINVALID_SOCKET; + } + + return sockfd; +} + +/* + * START UNIT TEST + */ + +static void +test_send_dummy_packet_ipv4_to_ipv4(void **state) +{ + break_loop = false; + int listenerSocketFd; + uint16 listenerPort; + int txFamily; + + interconnect_address = "0.0.0.0"; + setupUDPListeningSocket(&listenerSocketFd, &listenerPort, &txFamily, &udp_dummy_packet_sockaddr); + + Gp_listener_port = (listenerPort << 16); + UDP_listenerFd = listenerSocketFd; + + ICSenderSocket = create_sender_socket(AF_INET); + ICSenderFamily = AF_INET; + + SendDummyPacket(); + + const struct sockaddr_in *in = (const struct sockaddr_in *) &udp_dummy_packet_sockaddr; + assert_true(txFamily == AF_INET); + assert_true(in->sin_family == AF_INET); + assert_true(listenerPort == ntohs(in->sin_port)); + assert_true(strcmp("0.0.0.0", inet_ntoa(in->sin_addr)) == 0); + + wait_for_receiver(false); +} + +/* Sending from IPv4 to receiving on IPv6 is currently not supported. + * The size of AF_INET6 is bigger than the side of IPv4, so converting from + * IPv6 to IPv4 may potentially not work. + */ +static void +test_send_dummy_packet_ipv4_to_ipv6_should_fail(void **state) +{ + break_loop = false; + int listenerSocketFd; + uint16 listenerPort; + int txFamily; + + interconnect_address = "::"; + setupUDPListeningSocket(&listenerSocketFd, &listenerPort, &txFamily, &udp_dummy_packet_sockaddr); + + Gp_listener_port = (listenerPort << 16); + UDP_listenerFd = listenerSocketFd; + + ICSenderSocket = create_sender_socket(AF_INET); + ICSenderFamily = AF_INET; + + SendDummyPacket(); + + const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *) &udp_dummy_packet_sockaddr; + assert_true(txFamily == AF_INET6); + assert_true(in6->sin6_family == AF_INET6); + assert_true(listenerPort == ntohs(in6->sin6_port)); + + wait_for_receiver(true); +} + +static void +test_send_dummy_packet_ipv6_to_ipv6(void **state) +{ + break_loop = false; + int listenerSocketFd; + uint16 listenerPort; + int txFamily; + + interconnect_address = "::1"; + setupUDPListeningSocket(&listenerSocketFd, &listenerPort, &txFamily, &udp_dummy_packet_sockaddr); + + Gp_listener_port = (listenerPort << 16); + UDP_listenerFd = listenerSocketFd; + + ICSenderSocket = create_sender_socket(AF_INET6); + ICSenderFamily = AF_INET6; + + SendDummyPacket(); + + const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *) &udp_dummy_packet_sockaddr; + assert_true(txFamily == AF_INET6); + assert_true(in6->sin6_family == AF_INET6); + assert_true(listenerPort == ntohs(in6->sin6_port)); + + wait_for_receiver(false); +} + +static void +test_send_dummy_packet_ipv6_to_ipv4(void **state) +{ + break_loop = false; + int listenerSocketFd; + uint16 listenerPort; + int txFamily; + + interconnect_address = "0.0.0.0"; + setupUDPListeningSocket(&listenerSocketFd, &listenerPort, &txFamily, &udp_dummy_packet_sockaddr); + + Gp_listener_port = (listenerPort << 16); + UDP_listenerFd = listenerSocketFd; + + ICSenderSocket = create_sender_socket(AF_INET6); + ICSenderFamily = AF_INET6; + + SendDummyPacket(); + + const struct sockaddr_in *in = (const struct sockaddr_in *) &udp_dummy_packet_sockaddr; + assert_true(txFamily == AF_INET); + assert_true(in->sin_family == AF_INET); + assert_true(listenerPort == ntohs(in->sin_port)); + assert_true(strcmp("0.0.0.0", inet_ntoa(in->sin_addr)) == 0); + + wait_for_receiver(false); +} + + +static void +test_send_dummy_packet_ipv6_to_ipv6_wildcard(void **state) +{ + break_loop = false; + int listenerSocketFd; + uint16 listenerPort; + int txFamily; + + interconnect_address = "::"; + setupUDPListeningSocket(&listenerSocketFd, &listenerPort, &txFamily, &udp_dummy_packet_sockaddr); + + Gp_listener_port = (listenerPort << 16); + UDP_listenerFd = listenerSocketFd; + + ICSenderSocket = create_sender_socket(AF_INET6); + ICSenderFamily = AF_INET6; + + SendDummyPacket(); + + const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *) &udp_dummy_packet_sockaddr; + assert_true(txFamily == AF_INET6); + assert_true(in6->sin6_family == AF_INET6); + assert_true(listenerPort == ntohs(in6->sin6_port)); + + wait_for_receiver(false); +} + +int +main(int argc, char* argv[]) +{ + cmockery_parse_arguments(argc, argv); + + int is_ipv6_supported = true; + int sockfd = socket(AF_INET6, SOCK_DGRAM, 0); + if (sockfd < 0 && errno == EAFNOSUPPORT) + is_ipv6_supported = false; + + log_min_messages = DEBUG1; + + start_receiver(); + + if (is_ipv6_supported) + { + const UnitTest tests[] = { + unit_test(test_send_dummy_packet_ipv4_to_ipv4), + unit_test(test_send_dummy_packet_ipv4_to_ipv6_should_fail), + unit_test(test_send_dummy_packet_ipv6_to_ipv6), + unit_test(test_send_dummy_packet_ipv6_to_ipv4), + unit_test(test_send_dummy_packet_ipv6_to_ipv6_wildcard), + }; + return run_tests(tests); + } + else + { + printf("WARNING: IPv6 is not supported, skipping unittest\n"); + const UnitTest tests[] = { + unit_test(test_send_dummy_packet_ipv4_to_ipv4), + }; + return run_tests(tests); + } + return 0; +} From 70270654da95e02927c4817040458bc5695e385d Mon Sep 17 00:00:00 2001 From: Hongxu Ma Date: Wed, 25 Oct 2023 17:04:16 +0800 Subject: [PATCH 049/106] [6X] Fix segmentation fault during dispatch interrupt (#16602) This commit ported #16141 to 6x. ---- QD sends ready to execute plan to QE as an array of bytes named as queryText (see buildGpQueryString function for details). This message may be large enough in case of complex multi-node plan. This message is common for any process of any slice on QEs. To avoid duplicating this message for each of hundreds and thousands connection for the query, gpdb temporary replace malloc'ed output connections buffers with queryText, allocated with Postgres allocator (see PQsendGpQuery_shared). Normally, no messages are written to connection buffer until dispatching was done and buffer will be replaced back (see cdbdisp_waitDispatchFinish_async->pqFlushNonBlocking->pqSendSome). But this process may be interrupted by client. In case of receiving SIGTERM, gpdb aborts transaction, destroy dispatcher state and closes connection to QEs. libpq tries to append termination X-message by default (see closePGconn). But the shared buffer mustn't be modified by the separate connection. Moreover, all others buffer attributes (e.g. size) left untouched and may lead to decision to reallocate buffer allocated by different allocator and consequent segmentation fault. This patch excludes sending termination message in case of shared buffer still not sent. All other message types aren't sent by QD during dispatch. --- src/backend/cdb/dispatcher/test/Makefile | 10 +- .../cdb/dispatcher/test/cdbdisp_query_test.c | 316 ++++++++++++++++++ .../cdb/dispatcher/test/gpsegconfig_dump | 4 + src/interfaces/libpq/fe-connect.c | 6 +- src/interfaces/libpq/fe-misc.c | 3 + 5 files changed, 336 insertions(+), 3 deletions(-) create mode 100644 src/backend/cdb/dispatcher/test/cdbdisp_query_test.c create mode 100644 src/backend/cdb/dispatcher/test/gpsegconfig_dump diff --git a/src/backend/cdb/dispatcher/test/Makefile b/src/backend/cdb/dispatcher/test/Makefile index 8ff64dcedf2d..77fc66e34433 100644 --- a/src/backend/cdb/dispatcher/test/Makefile +++ b/src/backend/cdb/dispatcher/test/Makefile @@ -3,7 +3,8 @@ top_builddir = ../../../../.. include $(top_builddir)/src/Makefile.global TARGETS=cdbdispatchresult \ - cdbgang + cdbgang \ + cdbdisp_query include $(top_builddir)/src/backend/mock.mk @@ -23,4 +24,9 @@ cdbgang.t: \ $(MOCK_DIR)/backend/utils/mmgr/redzone_handler_mock.o \ $(MOCK_DIR)/backend/utils/misc/faultinjector_mock.o -include $(top_builddir)/src/backend/mock.mk +cdbdisp_query.t: \ + $(MOCK_DIR)/backend/access/transam/xlog_mock.o \ + $(MOCK_DIR)/backend/libpq/fe-exec_mock.o \ + $(MOCK_DIR)/backend/libpq/fe-misc_mock.o \ + $(MOCK_DIR)/backend/cdb/cdbfts_mock.o \ + $(MOCK_DIR)/backend/utils/misc/gpexpand_mock.o diff --git a/src/backend/cdb/dispatcher/test/cdbdisp_query_test.c b/src/backend/cdb/dispatcher/test/cdbdisp_query_test.c new file mode 100644 index 000000000000..8355f71b5f59 --- /dev/null +++ b/src/backend/cdb/dispatcher/test/cdbdisp_query_test.c @@ -0,0 +1,316 @@ +#include +#include +#include +#include "cmockery.h" +#include "postgres.h" + +#include "storage/ipc.h" +#include "storage/proc.h" + +#include "../cdbdisp_query.c" + + +#undef PG_RE_THROW +#define PG_RE_THROW() siglongjmp(*PG_exception_stack, 1) + + +int __wrap_errmsg(const char *fmt,...); +int __wrap_errcode(int sqlerrcode); +bool __wrap_errstart(int elevel, const char *filename, int lineno, + const char *funcname, const char *domain); +void __wrap_errfinish(int dummy __attribute__((unused)),...); +Gang *__wrap_cdbgang_createGang_async(List *segments, SegmentType segmentType); +int __wrap_pqPutMsgStart(char msg_type, bool force_len, PGconn *conn); +int __wrap_PQcancel(PGcancel *cancel, char *errbuf, int errbufsize); +char *__wrap_serializeNode(Node *node, int *size, int *uncompressed_size_out); +char *__wrap_qdSerializeDtxContextInfo(int *size, bool wantSnapshot, bool inCursor, int txnOptions, char *debugCaller); +void __wrap_VirtualXactLockTableInsert(VirtualTransactionId vxid); +void __wrap_AcceptInvalidationMessages(void); +static void terminate_process(); + + +int +__wrap_errmsg(const char *fmt,...) +{ + check_expected(fmt); + optional_assignment(fmt); + return (int) mock(); +} + + +int +__wrap_errcode(int sqlerrcode) +{ + check_expected(sqlerrcode); + return (int) mock(); +} + + +bool +__wrap_errstart(int elevel, const char *filename, int lineno, + const char *funcname, const char *domain) +{ + check_expected(elevel); + check_expected(filename); + check_expected(lineno); + check_expected(funcname); + check_expected(domain); + optional_assignment(filename); + optional_assignment(funcname); + optional_assignment(domain); + return (bool) mock(); +} + + +void __wrap_errfinish(int dummy __attribute__((unused)),...) +{ + PG_RE_THROW(); +} + + +static void +expect_ereport(int expect_elevel) +{ + expect_any(__wrap_errmsg, fmt); + will_be_called(__wrap_errmsg); + + expect_any(__wrap_errcode, sqlerrcode); + will_be_called(__wrap_errcode); + + expect_value(__wrap_errstart, elevel, expect_elevel); + expect_any(__wrap_errstart, filename); + expect_any(__wrap_errstart, lineno); + expect_any(__wrap_errstart, funcname); + expect_any(__wrap_errstart, domain); + if (expect_elevel < ERROR) + { + will_return(__wrap_errstart, false); + } + else + { + will_return(__wrap_errstart, true); + } +} + + +Gang * +__wrap_cdbgang_createGang_async(List *segments, SegmentType segmentType) +{ + MemoryContext oldContext = MemoryContextSwitchTo(DispatcherContext); + Gang *gang = buildGangDefinition(segments, segmentType); + + MemoryContextSwitchTo(oldContext); + + PGconn *conn = (PGconn *) malloc(sizeof(PGconn)); + + MemSet(conn, 0, sizeof(PGconn)); + initPQExpBuffer(&conn->errorMessage); + initPQExpBuffer(&conn->workBuffer); + gang->db_descriptors[0]->conn = conn; + + return gang; +} + + +int +__wrap_pqPutMsgStart(char msg_type, bool force_len, PGconn *conn) +{ + if (conn->outBuffer_shared) + fail_msg("Mustn't send something else during dispatch!"); + check_expected(msg_type); + check_expected(force_len); + check_expected(conn); + optional_assignment(conn); + return (int) mock(); +} + + +int +__wrap_PQcancel(PGcancel *cancel, char *errbuf, int errbufsize) +{ + return (int) mock(); +} + + +char * +__wrap_serializeNode(Node *node, int *size, int *uncompressed_size_out) +{ + const int alloc_size = 1024; + + if (size != NULL) + *size = alloc_size; + if (uncompressed_size_out != NULL) + *uncompressed_size_out = alloc_size; + + return (char *) palloc(alloc_size); +} + + +char * +__wrap_qdSerializeDtxContextInfo(int *size, bool wantSnapshot, bool inCursor, int txnOptions, char *debugCaller) +{ + const int alloc_size = 1024; + + assert_int_not_equal(size, NULL); + *size = alloc_size; + + return (char *) palloc(alloc_size); +} + + +void +__wrap_VirtualXactLockTableInsert(VirtualTransactionId vxid) +{ + mock(); +} + +void +__wrap_AcceptInvalidationMessages(void) +{ + mock(); +} + + +static void +terminate_process() +{ + die(SIGTERM); +} + +/* + * Test query may be interrupted during plan dispatching + */ +static void +test__CdbDispatchPlan_may_be_interrupted(void **state) +{ + PlannedStmt *plannedstmt = (PlannedStmt *) palloc(sizeof(PlannedStmt)); + QueryDesc *queryDesc = (QueryDesc *) palloc(sizeof(QueryDesc)); + + queryDesc->plannedstmt = plannedstmt; + /* ddesc->secContext is filled in cdbdisp_buildPlanQueryParms() */ + queryDesc->ddesc = (QueryDispatchDesc *) palloc(sizeof(QueryDispatchDesc)); + /* source text is required for buildGpQueryString() */ + queryDesc->sourceText = "select a from t1;"; + + /* slice table is needed to allocate gang */ + SliceTable *table = (SliceTable *) palloc(sizeof(SliceTable)); + Slice *slice = makeNode(Slice); + + slice->sliceIndex = 1; + slice->gangType = GANGTYPE_PRIMARY_READER; + slice->segments = list_make1_int(0); + table->slices = lappend(table->slices, slice); + + queryDesc->estate = CreateExecutorState(); + queryDesc->estate->es_sliceTable = table; + + /* cdbcomponent_getCdbComponents() mocks */ + will_be_called(FtsNotifyProber); + will_return(getFtsVersion, 1); + will_return(GetGpExpandVersion, 1); + + /* StartTransactionCommand() mocks */ + will_return(RecoveryInProgress, false); + will_be_called(__wrap_VirtualXactLockTableInsert); + will_be_called(__wrap_AcceptInvalidationMessages); + will_be_called(initialize_wal_bytes_written); + + /* + * cdbdisp_dispatchToGang() + * + * start sending MPP query to QE inside PQsendGpQuery_shared() replace + * connection buffer with the shared one + */ + expect_any(PQsendQueryStart, conn); + will_return(PQsendQueryStart, true); + + /* first try to flush MPP query inside PQsendGpQuery_shared() */ + expect_any(pqFlushNonBlocking, conn); + will_return(pqFlushNonBlocking, 1); + + /* + * cdbdisp_waitDispatchFinish() + * + * query will be interrupted before poll() + */ + expect_any(pqFlushNonBlocking, conn); + will_return_with_sideeffect(pqFlushNonBlocking, 1, &terminate_process, NULL); + + /* process was terminated by administrative command */ + expect_ereport(FATAL); + + /* QD will trying to cancel queries on QEs */ + will_return(__wrap_PQcancel, TRUE); + + /* during close and free connection */ + expect_any_count(pqClearAsyncResult, conn, 2); + will_be_called_count(pqClearAsyncResult, 2); + + /* + * BUT! pqPutMsgStart mustn't be called + * + * we can't send termination message (X) until shared message isn't sent + * out the buffer completely + */ + + /* + * dirty hack. cluster topology needed to allocate gangs is loaded from + * gpsegconfig_dump outside of transaction + */ + cdbcomponent_getCdbComponents(); + + StartTransactionCommand(); + + PG_TRY(); + { + CdbDispatchPlan(queryDesc, false, false); + fail(); + } + PG_CATCH(); + { + /* + * SIGTERM handling emulation gpdb bail out from CheckDispatchResult + * without flushing unsent messages in case of process exit in + * progress AtAbort_DispatcherState will be called during transaction + * abort + */ + proc_exit_inprogress = true; + + AtAbort_DispatcherState(); + } + PG_END_TRY(); +} + +int +main(int argc, char *argv[]) +{ + pqsignal(SIGUSR1, SIG_IGN); + pqsignal(SIGUSR2, SIG_IGN); + + cmockery_parse_arguments(argc, argv); + + const UnitTest tests[] = + { + unit_test(test__CdbDispatchPlan_may_be_interrupted) + }; + + Gp_role = GP_ROLE_DISPATCH; + /* to start transaction */ + PGPROC proc; + + MyBackendId = 7; + proc.backendId = MyBackendId; + MyProc = &proc; + /* to build cdb components info */ + GpIdentity.dbid = 1; + GpIdentity.segindex = -1; + + MemoryContextInit(); + + /* to avoid mocking cdbtm.c functions */ + MyTmGxactLocal = (TMGXACTLOCAL *) MemoryContextAllocZero(TopMemoryContext, sizeof(TMGXACTLOCAL)); + + SetSessionUserId(1000, true); + + return run_tests(tests); +} \ No newline at end of file diff --git a/src/backend/cdb/dispatcher/test/gpsegconfig_dump b/src/backend/cdb/dispatcher/test/gpsegconfig_dump new file mode 100644 index 000000000000..c033071faae6 --- /dev/null +++ b/src/backend/cdb/dispatcher/test/gpsegconfig_dump @@ -0,0 +1,4 @@ +1 -1 p p n u 6000 localhost localhost +2 0 p p n u 6002 localhost localhost +3 1 p p n u 6003 localhost localhost +4 2 p p n u 6004 localhost localhost diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index c418da5615f0..c42c8d684f07 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -3220,8 +3220,12 @@ sendTerminateConn(PGconn *conn) /* * Note that the protocol doesn't allow us to send Terminate messages * during the startup phase. + * + * GPDB: we won't manage to send any pq messages until dispatch isn't + * finished. But we can be here during dispatch interruption. */ - if (conn->sock != PGINVALID_SOCKET && conn->status == CONNECTION_OK) + if (conn->sock != PGINVALID_SOCKET && conn->status == CONNECTION_OK && + !conn->outBuffer_shared) { /* * Try to send "close connection" message to backend. Ignore any diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c index 98a06c287272..99ba3954f302 100644 --- a/src/interfaces/libpq/fe-misc.c +++ b/src/interfaces/libpq/fe-misc.c @@ -563,6 +563,9 @@ pqCheckInBufferSpace(size_t bytes_needed, PGconn *conn) int pqPutMsgStart(char msg_type, bool force_len, PGconn *conn) { + /* GPDB: we won't manage to send new message during dispatch */ + Assert(!conn->outBuffer_shared); + int lenPos; int endPos; From 5a90b924b2240c5990752278b8139fd8535fda38 Mon Sep 17 00:00:00 2001 From: Milica Trivich <124007332+milatrivich@users.noreply.github.com> Date: Wed, 25 Oct 2023 14:52:03 +0300 Subject: [PATCH 050/106] Remove return at the end of void functions. --- src/backend/access/appendonly/appendonlywriter.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/backend/access/appendonly/appendonlywriter.c b/src/backend/access/appendonly/appendonlywriter.c index 786ea874c300..2517f74171eb 100644 --- a/src/backend/access/appendonly/appendonlywriter.c +++ b/src/backend/access/appendonly/appendonlywriter.c @@ -139,8 +139,6 @@ InitAppendOnlyWriter(void) errmsg("not enough shared memory for append only writer"))); ereport(DEBUG1, (errmsg("initialized append only writer"))); - - return; } /* @@ -730,7 +728,6 @@ DeregisterSegnoForCompactionDrop(Oid relid, List *compactedSegmentFileList) } release_lightweight_lock(); - return; } void @@ -775,7 +772,6 @@ RegisterSegnoForCompactionDrop(Oid relid, List *compactedSegmentFileList) } release_lightweight_lock(); - return; } /* From 06ad6911c55af8d6eb7551f81632b5167e06294e Mon Sep 17 00:00:00 2001 From: Xing Guo Date: Wed, 25 Oct 2023 22:12:14 +0800 Subject: [PATCH 051/106] [6X] Add support for checking ABI compatibility. (#16298) Currently, we're suffering from ABI issues in Greenplum 6. This patch adds support for checking such issues before release by utilizing GitHub action. Currently, the ABI checking strategy is comparing postgres binary from the last released tag with the current 6X_STABLE branch. Issues need to address in follow-up patches. - ABI checking targets: In this PR, only the postgres binary is checked, we can check more targets in future, e.g., libpq.so. --------- Co-authored-by: Brent Doil --- .../download-report-from-gh-action.png | Bin 0 -> 128291 bytes .abi-check/6.25.3/postgres.symbols.ignore | 1 + .abi-check/6.25.3/postgres.types.ignore | 1 + .abi-check/README.md | 74 ++++++++ .github/workflows/greenplum-abi-tests.yml | 168 ++++++++++++++++++ 5 files changed, 244 insertions(+) create mode 100644 .abi-check/.images/download-report-from-gh-action.png create mode 100644 .abi-check/6.25.3/postgres.symbols.ignore create mode 100644 .abi-check/6.25.3/postgres.types.ignore create mode 100644 .abi-check/README.md create mode 100644 .github/workflows/greenplum-abi-tests.yml diff --git a/.abi-check/.images/download-report-from-gh-action.png b/.abi-check/.images/download-report-from-gh-action.png new file mode 100644 index 0000000000000000000000000000000000000000..7c89c8af1729f9f0328ab9e05c3368daebb380e6 GIT binary patch literal 128291 zcmbTeWmsEF_XkQ#jY84l)}p026t@<4iUkc62`~Pf#4K(D6YW*1QOgm zm-9d8eeS3G<<2vCGJ7X`GHdO%XRY6ognm|)d-amyB?bn@D+PIJ4GfH@!5A1%ie5Z^ zIJ0~r^5o$S>#Mw;3kC*m_dl;kiLAJk7#MFb6r?|Cd1maw{0v`jUP~M*CAmF;X@5G@bFLOIdwZgh{e{1c6y;buwKZ_P;#LF@c3MjCKMd95_c45*&L z*3O>yo#TH8x8!db9{qQKiSZWezabgM3x)rNe=t7E|3lS3?HPt7;lCpcE)4v*|AyZl zeT(^r;C~tp#!rTSM;Ko)-u(Zcn(G^-oP=&Q$J8-xM!&c*Q*rFRxeb5jeIY$+h#Q1>EKBSr{m}47`+TX#07RvWNy=C=fGWPM- zTnh|pyUUv#b<&o3n+OF6Ia5k%+RfL+&)fbE|)fWum+23#(SNMd( zN$yV^_kXk{Xd7jDgcc4%;1{B-|I&D?AQ~4V{mJAAVN6XiifC=OH>)H~N(^Yd_&gKI zG&$}bU#LQuqETD6KblFI{F2quQ_bf|ke}gn7AJ=ezCzfTyrnmmNZSz9#aM74<9B0$ zy3rN9U_BzRM+bCJ;X;ZZd0~8_wn)@`k>%j7PeHRSn6~KfNvTh03@bep{JY?IH?-EH z9JZxOQEsQq+iBssGU#0?in3};B>Ug3v@*Ml>E5YDw9ldWl5KDG`zMSzm-OTBcXafo zZ+AFbj%rF}ng^X(y;H49g?uU|TyyhuHR5!1;dOsemXx`5|pBE8Xrn41|IYCh?v5st<$sgji+h_i} zdjTJw;VQg?6ZH0G941ySsNi3?AN8K}@l--1&b*4vM1Mfw1a5k?{xNqOVIHszmXN5s zudPv3#@^@x1w)&r*Z?{qMBIj@(5u?2bYbSdI?Y%gXAg{2jV|;u(Titua-5oqni57P(o7vhnR|Fi5ERav^Zm9KWB8C+M$8o%1P zjLe^!rS>F3K`JAL%iG*DsxFw1nU1av1pgI5PhT<~hVe1aC+d?0>tV9J`+9goQAgk? zeF}e$Tb}!gI)eDDRJfp|`9MJu<&F>3aw6?Q@PRExPdxbU3q(=Plg9o7(6AJk3`WkTen-Kg zVR}&fFmo)m2fBck;iOl58mO>;PsdDuGETjyajI|THTe8Sb%m&z)7O9aurS?gC2OHo ziKgxC*BY#Wm+=zHNiPbUInVfueB7X{tR=c;9OMSVH^Gqn9>h}iz3;Ddp>h1$aand! zyn{Q)T=%?7)&&vM#s0nFSzdl<4AZ~6)#_N!m$#uyw4=8oJ1+=Eyk_kG{`#cj-!kKs zWzwbG7eO<<`Ci}leSpK;?{y930-u!3GRxF+ytzG7gp7fX_HBlSn^(wQ8B@(oRC7^$ zts|D`vZqTdUoyCVJc_dUYA%#WfGV3($rW1F>=F9AuGnO|(*wQE!9_>soNlhsygAo{ zUN#h7)(6%X1`|9w2p-X5@^DS>^f_oXTLBpz`@_Z|c+X4giaYF<+l_Pz3o@7r@#;bH z4uY+xV}BJ22U*G&p`c{rCJ~?07r<} zG2icj<7-mil-y^xiEc3Kc9$f|CFzJa6#;<@&XP8mg*;6E*kUsk4~iCLKgIk%as1X7 zZ6ZnNcIJ_I^nz}%|C)my)m*W)RKLTHZK4y%5Bb1H7|CTFE)Q82tw+Jx&bIO4cXi%U zDRWnx__m#oA~!eVEKV%ZeLv<%Kn}fYrj(C16&aiGowDe2q^|LU_@+iNd+BaDr6ylLikp3S^Ztj}9%8P1^9?|kT^0`ME zPO-z?qXRPuU}d|a#njh#(8F~)5zBJ-XLVj!1YSr|*_I<3rLNl^ztAkJc%%1CDgbO| ztTgSA*sLlVN68)um=%-D_kyPa?F^gS4at5~avm$|9sR=-6Zu!ip@CN|7^Jr18bANj z4J_nL3?>fzrfDbW*Ccnz;4Db}y0e={3mL!N_Td@~r|HWu2s~H#JPk{hgf}Z-ps6^> z9K<50v@pU-l}9z9Q4G+ZUD8jkJE7Ivo3$F`Cpc;f&}p>p0J~3HnbF%g2vlOV!*G}$ zf5!qQ4yA>XAzN{6dABH1aS*nCB(lBI>?2{SLj(kT!1pav{kq{nDs>Y~bc7ML8zjl- z!xbjW1>CJB&3%5`Gtl z%CEM4D7#-xZhS6)$R<1ZFya%{d_n}w1Tsx#2Nb&p99K)xW-CBlozD)>UAqq}{$)b| z|1(s>k$yk+#X&78XEG%vWk^U6PvG+lZ(`Xk07$zG7I z;oi(+V|~vyDlRk6W-%UqfmsxAJx={1BsW!mQO#;vESsb02ez19g{_CuaRuaa?FK4k z&)8OlgOe?`ad@iRCpc|UIV+#WscT?-WYqJdV)8j}CKG2(p}*BziUXqS7U0e0_fVm1 zFj!zdpr>7wHT>k5w_SoDfIr}Po!DeQb0VQ+klCopw5wKt(=R#R2f5KvyMlOLW?21Y ziJm-;qN~xd?n`-Y0XUG=qB&RVg*N9-;>?lzOO?=?IN5W?t8FR z>>shB4z{me5k=FkpghhdHEzNqZ)bKW9vK)X8TkPGmhe1X3OQ3`;_(ZMF)mT4{KoqeHz%XPD}U6pU?iK;{FTHeDq36 zmtO07Vo|pm003LMAE#+-nI(4mj@l*PV&9Z&SG=k&*1=9&P^n)Z95W}KIll*rX%icl zOhY0XfR|lvJj9D&5!!Z>+Z?Kr#2M(>&1K{5#Ms^3R}ntTG3^N=)%)eB$)fr@gYC_J z$MRv;vK^c1-XT&}1M5rq+O|G=u)LMP4px)j`9_bA*vNpcu*wJe`r@1*FnH)|W4Ve7 zFay_cEngw2cO+6ptb+Kzh0EhCX=M%Z88sv8GHC#QH*Ah{eN@Qr_?DD&Im?8kiW+l!(SL*5Gee6REXb_5|9JP0dTV`ut;t{)L z6!2F&Kz8-dZR{80!BL_`5!@DzB#4YQ6LacSF$K9-E7H5>w$9aDn@Z~6F5;Qk+qE); zlZ+RWeTkZu-if-T_i|(i)%(5s$(ps#f9-mG)k!IiohSufYWCt5sf+{vu4`NaT+K^{ z+Wm%U2!JXhlNLnWRHR2dY#jvj&SSzf(qjuQa#MS5RQuXFv)4>5`63qSZuB%bxHFsi z&BL6$j{8_H%8DZD@FTdqKI>#%?6FtqCN}*Lnem3Gdo9NX{?I#T`p`WZ6u>XO8T^s0 z-q&>F^Y!NSJkch5Uh7^H+0pC&7?^^y;uCQAWj{^3(!asS5!Dsqr<)-%bLSWU9TiXY z*@|O0FjsJ4bs~>n+e(UuWfrRKa`0{GW)o8eF-LDD_$C`o%-PuqJ4e76bgDzeADCvY zxT*w5j-nRv)}OJXBST#-l|C|RLj4tmf;b1_CXY2P9LyrGBvC9hoI!Wa+g(Ggf-itp zjexH^{5n)Td6QqVWRn~?X7pAJuRrlPmVv?Y0Dt6{Mx+YIN~g`>?w_YF89~z0vu#(a zt-Hc>xt)ROoQXm92FNmxMJ&zcZ?D$OqAa$IhlOHir#h)EOwOkAcj(OM2$id7(!5ws zB!OnUPFE!BXe`f3H9m{?Vr%yc*lxy!qycwmj?}QF*@lK_+Og)(p2Cdn#iAdmOBq)9<5E0!3}0f7+e6&k+}iKG#}*XxEwvv14{Cgqs6i!8??x*vG20W z#wzB-t93|tUd@-LJ?B%!SVpu>cjQdrP5b@lH$$VX0h@hW*|up1x5H~E5pq#fY&BF{ z{P71u;a<*5^a~&UBl=x6-W-8NSHKWEkJPgj`-al7jNmcGaqsN%yIaQ?xbb{K z02A@K@9XK9POqn^|L;1CLtQR&`P}=%aodM&Jaz`zE)UD;B34Rb+2<##BKz-Khc}j0 zG2O5!xVEFC|L@PI?j60@1;k?^5Pp7&5{AQ@j>3xSx@uZWGl|h&akfUpF4I+PZ31s} zEwHKbSm51ic2dg+uf7FRKT9Ezg@w6AVdZQ-N9(yQNSKjvTeRv~n@kC><`#UlM@7M>Q2T+9}^Bg&|_`yf$L)0IKUWwsiluM|v7i_3}+IC7r0Z!C)ig{G)05jxj5 zI?E?r&2$P(lz_qDYCMs+nwjsD{@8IpSJAN8$`&UgcA>*;``x6S9F_$>8>7FLmQWvj z0`-m=Jx-e66W-CO?WjQqPP&r09T5T535OA?U`Gx}bzSy#gz?8;$$@$|d+%#zRP|gE z3OK+?%$733?iugd6_l=pdD|N<`)nE`7=t+>HFep@^)=p=MeSjF8HI?83Bf(27@WrE znkb<4xV&bbpcdkufk2NB=t^XXD|H@_88n@2_Bes;nJHK+;}SlAABVU!>Am!RGoman zsxv~UuD8%unf%nP4e(q##1W&{X(%-CZBsCK=EzT`JLQ<|CmrizXa&vkz5hO-Fl~Cv zA*G(D_@qIV%1K&r!Nw<$7(FH#SAUUS^Zu#2nZg9Zdg!jR?7D`8A3b!rtC$2!5P4}-AAL#Ow2 zKKr=d&}fVPc=GAM?knG_70XKiQ#r@r&;rzNq*Ew7Pwgx>J{-5ECcpi96w8;DiG>wy z^Dr5RxU8(L3G#;33DLXi<4E{C=HuabYgH_LWK; z68h0>f4RFx^*d+p^q7IwcFkQemoH7;eY(W42T>#I=}6KBQHWgY8k3pSz% z%dUpKK8tPgsnw7*9O5?9zF<-jaF`+Rh?w4XNH%5FY!i0N(?j?i=So$lONhnl7}1-B z4t+L*l^_2wI}#DUgj9|GEqIQB@vV*PkLKf@qR-1dGlpITrwDtAchN_Ei8KBRPGFqhg!c`3p9u{(YedgNvo&J84Uv>T(@+N? zxomKWOgJ{Iq{T2tReVSpSLAY2-%@;_q*QP1ppfLwbDa}>ookff`e_h)go{Ot=jxO& zcRMu|b{;>9W-fmZ;lx|hvvyFHF0;M%evV7f$Fh<6H`s50Jg&a-jjR>z;qiO4L$(Dv zwEAkr9N>CZXf<#SvoqZSkvrz1hm@{wiBU}s2ky)FxgzG9^=unEG#2~U5qtH&qcvR4 zMnuXP$arO^sfx}3)8!=_>(foCa?{3C4JmsY&!4)mb0W1Pim3ikb?1!9@J zcE6{eqd%CY4A!BDU+rZzuu@PUH78^_{8Z5@2JCZHjMBgA!fG18)Fqxz=nrv>ptnNN z#ioz)?4p+YYtr0z@{*)0l!jIe-_Yk?FQudyj*)qY6I6qIptOLq&edkQ7I;8Z`^y^!Ik%4wXY6J7zo2VN3r=e&Ltnq2XBqzOq z4YA7eJ>Fm(M<0#uEl1;81ET2F7oy4KFVn=vZ3Ow2l3s7-7f^9p9lprmWT1;3Yf=%H zcICVGwMd1K`WB`xh6>+b}4)bZJvmI7Tb_!tp$Hi`)hbDvv}h%YJB9X)9^Pt_xSY07Cy zllZfIpTi>ex{Z#m{#<_tdSiCJ319((=TMD(l@%UU%yd~&tJjw@O6!mP%s2=M2;gn- zpIKs^_X!kZR3p2EdQ@h7A|IUHp|*dEj1yD+K6lr@5euK1CSR0Nr_EWMhEfce$JGTP z&tU0^JUqnFJ6p^}N6w0-M${flqG~4lv(yV>5P$_jK5fABw)1b55YjD8R}~Lu6S{xR zRmsF!{`H)a*J<=Yv*DE8yDwexp_P?@euy0sR%5u2mp*zWqzmCwXFoi3I>5PpRVx-O z2Ip}t(L@wbL}hNPnV9eRv}x6|Zh~m+!e$(SYEt?8r!1hh|>2 zJ(qEQ!_=s+U2WbybWzAG{q&|mNLbn3^~|V7Sa@x5a)s8buhiH>|4fJlX+)`sC`+pp z2H5%ASdgl)ZxjrObdq?fxKu$idHnXV9ruQc88!WrK~@RoyM{!fme;Qhd={;Z!n@3P z-*0xQ_^4l)GdAAvS7K!e*oFn(2^#dVOtz4;RWaEh)@~KuBAa*raxu z1*#NK!M1W$;wel0dA(-g7!bnqUc59{vnVr5agJI~FKz2hM6Fu}&!p{tL z(+)~8_QsCH!f4Iy$dmnS-&J6Y8L=v*AVyYj&OA7gCq* z8}r4DgEonOef+w2vjCk0LDJrSOl=Y) zCQL^+H!2s?s}^E+1VwzrnT5DdPPT{j0kv|eSTaJ5$>QQ<^ZS#rq&<~-H|n3X7r9Le z5*f)u8Oar%-{!4*JP8%4yqyR#Eq3b|tbctiX(2ceH_PTP0`({sd+8A@15xTJk;-GR zmeQSaDp$)>foQ(FnvnXPD#_ehr%Elo&z~!)S@KJF;e9!Cy|l0GPF9YQ`!yJ~L0*Q<$t|NQUC_s?)U_v4 zc(ymOTHkgmU3 zl1TCsNj5g*XheVIg-5m}yG*y~Y&mdRU-&VWYG-Azgz!LjSBUeMs1-U{Id|V7rCzmC z2RGh(ZJ%HIp79yG)xF_vTD{y+dG7Ii(N&zvZ&Fh|GuvQ2hRmSk4PA*~tai1&U7nX( zlFNIw){S?LXvg2cC3I)B zc9jh%^O{>ymY!)|M))*ITwV1^6$ulLS6*4Z*O7Cr>nv1CzR`VM{|23L><@N2bI&QY z=3dLeMhvH&l^v|>EQcv@#Jg3HslA9aG^zBy4&WBw?b#zQQKda!OJAhmAFQ%}Mb;^}6Y zJzg@tv*9J2RCE)4lw>K_PCP60=tr5ku|}xT#a`HtR)d9g0?F99c_X_5hv+q`hWzY@ zP($t@CgsClxuHltV`P(}|QEXm1Su5Tg`^r!<*6KNvAOG|Y!0(#`viFc`QW0I~Cd6!5xI9!&0H;V`Yl zMlmK$)YrtG%BgO8LDBRgSVx=VgO}sqTmU~&D*FWB1o5;S^rsA>t~?J4veL|~_rJo` zuKPRXf0mUJgLQ2baCKx*w@#oh;%wui+%yvYycqH)L#v*to2_%#{k95RHzw)o} zMzg*0@3c6UZXO`oW0C-wC`!|_X>&5`V=IFrJk7K@_jYCLY-O26vYl63mQIVkI7@{= zu7lMh1s&2;6BA3I=$@ozH~D!S3=FP6*OYOeZzn*bNaK?+!vJ_C9&y+w!~KTV=3v!y z#XTuv;X2pWP8uzvwtVGb(f}ppG<5rwJE>CKb-cY< zB4SrxqCf3C{Z`QCYD4bx2(u}uuAqo;roJu%;pOeKy}$5hu%x844y{mtoM5fcw$j+f zd+;;7iyMFq41d;Xv0oCS5|qwc5}U@nNh~a?8}coDcXNDF-*?7;B0w$Vb>6uHp&wVG za;51(PWkomjVwIPBFOgjx!w(me{Lp4b+%{i++3zb&AvJofZp)1meZJQ<9yGeyi80^ zSWnmgG4-hmb@G8~^Jjf`#)DN7$55w((<$hlt1{`1xNE<%mTs6Cu1(HQ8|?9MS>vKRa={PCIHi(A46Xgx`s_Ob30&FAZ2c;X znrrv4$2tR6mxZfKTt=086cHyS0S83L=4UVzvjS>#{UA2c{;w;;Lb=vAvomkc|1uwC zQ{VEt=)}}u1?CE~+*d)yK&!vqC6INIa@xSR(w`PDWJO^^8NV!|N+te}N1LY|?928d zahZ8ouGLqe*|wvP?l=BkKCtQY|Rl9d)MM@RM{faND9WTy!2gQz8 z^mHl`4CRdu@~*ytp3alHlGY=JQo%pl2*%Uv55zP)bMm=3_xc6o+Qc$Rt{^rRdL}b0 zn-03q`w9#Fsuh8W%)Ksw*BUC?(EPvHV=ge}71Bf?r=pT>#Z|p|_&M{&1r#e-e@upW z1TQ0J0g}6`mBhrCCc{Up)Ub(i1N19i))``QzghvXOr)Cx#*Ou#yRvd>i?>Q(CLqn7~1=}WaW&qm-6 zn{^MpmCO5);R&2TBr*p_1}uswF-R~R9d0P$^Kkog?{>B_(;;H|BeUGgH)l@BY7rL* z$L)XVJZeiJcq!8p+_?C{%hy&8{riI6TaloWC!zA{n>j**=%hrOk4dg_Q{C1N=ffMY zl)To4FL(A^iv+WNAC%uz^j+Ohr40V*C=+_%ad6k+Ub3B+j*CV$;zVmtWsa67x|E&; z@IBrAXOnI!cf{x2CQ*yeZ+gJK3X4$=JYf9r=x$ zQcG0X8gB(^IXZ=tgr_^g*^uU{q>wmbSGBH$?Ewu+{EngwaMx`}_ZG`*0koHiJBGU? zFdit8lUcklsCU$s&PA?oddc=UX0I%ASUdny7{RisxKzYnZgZ){s49>A5^8nBp)t*X zVrQx@kBW;_w$*ejJ5foq6??SGbYT4{#2mR^Y^Mm?TH7gWa(y3Fgj5xr&>olBUpugW zwbQ4Qmle`#oE++QIP=c<`CF+%x-qb}xIMROmYVdiGburr$z@ay!O(~CmtG^(nSLhT zZ5i5(r?|Lj_A4_J-wAxgMU2i$3Yw5nZO`RshbK$x%iN*sL`OYYl*^7csc~5Y``q?F z*Iu+@icK2_7ckdot?l$K?TSvvnloKLn?o5wp4UNb&CNyir;-+rXDYj?#bkB5b2x7P zREJ+EGjZx8=SOslq?kvl1m4CWsPBaj>MgjUk`|Ph-46y@KS@g)Vn+OY2nHO^-5^)d zivin*S!zV>=p5kVHgvuvldl`sT4`YM&fWyy#{K6i#ANqf2yTGewk;BRe^23Z2@_-Z zz(a0&e@izq)G=cgGMch*_wxCx9W!S?0F&klOAXtQ%PT+h+)f;-V1+jPT-&4LOL!cf zYZJER7$U=ZR`%Mc4pPd^hXd|%dF9nErFOXziG=SUE$b~fvL>|z+W2W`O6ZHrrK8jyDo&Gd31hC?6Uknr-VM@Ee=vQQC0s^tEEI2>*0CW%u-LM))wS4V_C+tBFY0(8+B;?xsgyjOXwo~NDf09ge0!&`;uU{VX8VGZ z!kvq5`hT`^1BI(+=l>K*6K4`KI3 zXH7ksT`6)habTDck&CL{j%h~Mc`ql*&x|UnH97?FS_%=h6mK=kVeuoRXolXkw_q0223SqQ=ExXkm9FZ&Ss$=vy+d2|dc4N<+5I^nuA*VZFV+eDS)Yevh{0 z)5}nLqX2D-hVeyICPyP7x zUtffU>Hcc7ZS=eHy}8jnH?%vwKaWScKxL@+Pwb&tmy)(tC8?y>uqC(kSQ3GbN<7F*3y0X%m*Of?NCCE(RSGSaufjzzLgt66Mp8Q{&n!(?>>^Pda zuMVXtd_Uk+A|b4-(@uppcho;}&uNdm51Cu2n)OGD`r#+vY!o?lS*SAS`UD{=gW9<5 zwjfSN)q#Q;PCnvIB-g#IohQJus_219GEH%*{Fbh)jrCl&qE43-Itz_NFCtq?Wax*! zc=ko=#9t-)>5b1-nllpgl%yv#7X=u7x}Tr(3Fcysq4Q27IkemaeXW(Tlo(j&$P+iR z{+_+di|DKG98216TUn2&rjW@mBOc;N2E^NPdedK&t$W%SG~~Q-uQo1=N@cKSA)OW8 z-sW6ti&avy|0w(+)I`u*=d@eNbkgz(L-+nYTe&E7ZkC_8O*Tq6BUq?{f0rUN&XETm-pS<0^3Lg}iBSv5%Zzr|)W}vPDW9@WNVd37 zLvP}3gK})*>7Wg&^P81pA1N13=LzgH)p-5dtLi2u2@{#h3{!rYG3^XY-O75XWdiL4 zBaYWW!_v4Xv%SmByM+|XGX6*{Zm(vPUX=T(=&oP8vK82x6tk7=;`n;!%Gn~^-65>` z_dYAb`77hr?rA{3WlD*ZNzM&Juw7L_jumtWGZoY1IPO;{);MYCLzHj>5CpEBR&l-@ zyBs|jaT`y`uG;_HIP){^QeY7zBy?lFp+`rzF5C*?btQye0j}?fX46JsMz~-3-%jlB zms~do=`Tvyb602VUre~RpHsH!=(;8M_jhW5*Z3cN3hzz}3ZRbYVH|S02WJzO?zhV5 zp$~{pfzQOo@5jL*i^|V^M3O*q(a5{cuQE@eiBHC+We~38Gm%|}miX#s15ZKsFa9wX z@2~j@UYVvaON5*CM((w|_Lw3Xj`u4!GN==asceK*tOWul4P|dE8eEMx#I=cBvk*-x ziz~3zO5v&Fh&_WS3JB#Pe%r@)uLU`yU+UDQCs{hKB;?y3J{ALK6#ZWGdp8(GD`42< zMn|X6TRj?R;tI&Tqn(bV?{Xa4{p`~0cN6DL&4+#~>Q?tjT~m82;dzACq)UQ#n>VLa663Ue9deHU^tS#`j#aGP0FEq|-u$BP(zSVb;E@6xxeFieZy zKEUro|Nah?`T@njipLZNCT%aS;0tNY{m6VjTW~(Y1PwG~5;GbNvn_I-GG2)=>t4P{ zzG)Ox#y8LlXA8zHl*`)!G!LurO$+e(C0<&er`0 zND(DDxn2|vpZ64ztnA+Uv%qJs2wx7%n8~^bK9iUFyAspQHM=8RgjY{Upg!3%G%E?H z=P!16MG<8Q^E{!Z!v7$7(y>2~}W&aGxmvudaU zUd}7=;f*S#Gscho>j4*b!$Vw`*&9sYtoQ|*wfnkMl?;@Yz43;BAC`JO7=ZvXjP z?pQq|1=lu3`bp9YUZI%9ybR@BIa+YA~3hB0(bgMF5F`S7i8 zwKifQCReci*WNgXK};SwL3}lUFA?hOti{PZCQ$&hFj`J3%X$*O>od8lhr%*$PmjVb zR0TNdtCx7~DoA%HnUnr5l`4cYpP7-#QyP@3p*m@f(A zwEaPk({*4FPc4RbL*J8=sZ>+p$_w8Bf)VbYr)3K$iNvoB;;_;Pfh<;>%Idd1Ixh&6+vd$put@IFA|30%~ z4DT>2PbhyYd}-}<8QFe!_mG&A=J|61hR)Yv^QxxX-t5gqyBpbNk8)w>*se=*R)iF0x#4~X~#h{ zR-EKyN88}B_GZH7Uc|ymauQ*L!S+y^i6RO+H_M>K9kI6iqh=zU@v5jMim6vFgfkNb z&4XOHO--2Tr4_h}*^4*Bq*{R^@pDcz74WGcV-?+I#`4@OMu1T)dGi`PZqy_1!INH$ z#Fw4UQ9)fK%j?nUx!t$D7-62qv<4`f-;RrhD%S>5!n_Op>Ujq0O1BO|lz4mz7iT{SYWxi2NNtz*{f3KNKquIxdOB zL_w(lvti^qc-jJVy*&aS{el60wQhcDsEfTTAaRCy7?5nvunQI` za4=X7;lat=7EP67I1CNbh8WO?Z=0%zKwhf6a{?yo?1a0wizGB*KRBFT7haf_zoNFn2ajW)lOdi3bdt2J1Q?1wSlKH1QHH=8yudu)S zwlQx&qL#kJXS)@l=Sa1GM7cA%muA63YhSEz>snirdPz4FWy}x9yl=F>IAlfTF-jlzJgiK-;&nyL^jHF8j(4+W0b({1<7#rU-$KUp9*CA$RP^ z0qK-^B4E^z?{fY1K!_=x`Mii>)Y0zcpnmc0EA`k$dO;VtW~fkZ%(PO%*h&emeVF!Lj}VhlyDyH3fco z#2ZVqkJBe^m8!H(vWcl&?}|2Xx$tNvM*YD*5WDKzl$L(TPfJgxfh{dMD~89J!Q)Ir zyJ`u;%Z5{LG!%-Zfkb48^PZ`|2dJ+dd#dV z9?2horA&L$v=erouha*6QXURg{!5FWMcM?hx!2oCfAb84huIIKbvGB%OJ?#1h_3Q| zSBJOyf>brLFsUx)I@QXq@dZ=xDXQ7nwS@i_K;BpcIVy*cUvjLWd~XP1-wt#~8KGe69e%*dKLK4O`7VR3l12$t7Bi2&N7lut_k%R76C)VuW`FDG5LLE= zxK6^BYBp|5x`0hR@G;VIq<)&(u4cwwpA8-E5g zf`R!(?ra%_w_fW{_r#ikD;wB$>gxvBL?RV)S3RG*XkOig4K}o`+9x{yDk=XS-ftP} zWAMpbNTI~M%Wit$PL5Ft;4#}&vNN+MyuM(4+&)<1IMDA|^BWD*G)U7ktME3Q>0NxN z7s?WJZ!1hy-o3?aru_I%Zn8h?Gpky`?sJy4W1Z_p!qLOEr1X`2NA=+)=q_xdk~djI zZlj%1T=MG6-sswPCi2;zJYV{*MKK36rVx8zVMUk{t;)qK-pWYFrqajMH9xjT{C3(p z=0bV{8kdXILeb4y8L2t`WTVQ5=Tm%xIbZ9cTjx3@qe#a(I~1T@RmP*x6x{*PyuMa6&y}z^~X_GYJwZatNqc%8lTN{_^ESQe}a$A=kUS&6FL)|D; z_jUIUvpW-)W9($_qRXvd)SSulsgS#kh&{5w8}V$9|7qvdBH{Ac1keQj!Z-JP|HU>? zL;L=R_NUA!a|y1B*dvk0Jx!%^790e$2QPQ-=92f04Cy`WkMPObWLa<{SDI&P?O*>` z2rC5@#MyvEJHLTe$Cj%$pH9z@0%(@28V}&6NDJC}xIO9KWD+P8jA#;_(~chXUf8iN zx9|}`>Hc&&JO6u%J0PA_Yz6d%wv>k2`;N@0{tUScchkcdcpn{luKw_yVw;Zkl{? zk+v(}p39^9{nLW_GuuiG=JMoToJsfnvk(0WsPYn#hrmy7iP`hSL_TwK2hjAv8kat6 z+pXM2Q{>|ERbRcEqfEuktPTspHjVsbXRPvtht$W1%$!w^4ldX`Ik_@!x3r094YJ6^ zlee?@JA)}$9o0J_aQdW6WfClMI$ZkK^FGfiBGtqQDf7!rW@sXpD^vsePon2m=ediY z!do2To9y`&q8(R~Y%cdHJO8Mp$;)RqJ03EJ^k&P8;8T-qm^jjJa8uFYb_oy(3O0q5 zJp`M_-UI7m_g@!SYX3&pry>e`?7VV9#y+TI$7Z|+^*)N~N9WN$UwL&7Aw48FRep-= z(6M+yyF>^nlbSU>Q(U6QqmBUXZ?dg(Q<)A+S3E)y#d!ohVaNJg6w>flgwER@49y#P zmvvl`%)P0n^7RN0=KAI?99&daZ`I0hpZ=45Xi1xL9h+*&m);bY##cb_Px;M_r-jC4 z2mQfQA65KAq3)EIRE9~?{;^s3HkBtcX&{+Tp>pe8Eg5o0WVlBo(iQ$DM=KCc90CHf zTDw^&dY-s(7&f2QhrsVTEGwh-ODb2BNArtIUko;zA<3>lzUDE_UGHeR-u5O~f)3Vc z*AZM0gp3;;e7hpzyBn@Dgb-PHLB1($&(!?9+y|{|(P={}<*Fx8hj+zx*Dq%qD7tGu zOco3ljQE-`$NPrOeXMCY3$k#j7Q&bzuLQ<9574*k(J4>md@ba|rbnGw1@ytm^iTYb zXfi2f`kv?`-UjW9biquIIup zaR|C}Rleed3VWJg#EoLIU!?oXY|h|)n$diO= ztwFGe_psG8y8>{!oXM1}kK#=TH1x|)EWQC-)W(RcXov36snDZNEE(Z1aB2lKW>#u>!gAEh@a&hEa)$lWxQQOkUS z$ZfZYNOxmj@9w(e(b2u16Gru5YD+WVh{r$`vinjn;WoFD?Qt}pck?tmt%g-<0&k1pV$Q?Htj6L)}$QHkSq{YN`4q!CIf5!M)7Dco7;`(K>OQp4+ z$%0%VEw`u3tB196CDgDI!+XiObRb3*#+4B0^j(TUdWY-=KS_iyvi}}cdVg(uPkxuo z!^2k>1?%vcDR-nSS6rcS3wmo7i$HRX$q6wS${a^^o07+#`U>pqfGi2~_$~0cHIrnX zk$+g!ZP&8gRqju&VQ4m-Tp=LG`0}SJnbY=}VbuipHufJ!rL=QUScxA#{PlLA=Rd`r zr=cB^gwp2bHp2HqYBmqG&nNqyLCPL8PWHE!mXZDlm8}6Mzh%?uMo71dG?h;{D4L%^ zT{|WtWnz-1Ixd0%i-4OadPQAcQr(B;?3+cy6O7AN5z_XznMa1Jn@EOrTbikrfym{| zf-taicg5JR`{$TH5*28Taa+*&u%arUA#pYCP@8c^GmSJVB{?Uq0?7 z-+-7^`4fRVqL?895oTPB4>{NW<^mw@hxdQ!KZTe9iotja_ym@7 zf0^%V)*jjhACn(rc&l^(v=vJh1e^$)%I`K>jc}DQ5Bm@I0?i~| z`afae|0hW+CL-dQnCIb_!_|)Vc@kPt&oAa5v=s* z9oxA(*^^uW&{41ju@Qt2yp&N>NnP6^rTWkw{#O5yKXzA0^g6eOknM0S#lc3S=lt;( z4_H^-|CJsLeBn4T2w~85=hCTcei9hj@bKgR*V+Z@+Nb#X{i(&gr+$6%t~1;&3c;gY zZu7Gg3`qHs?-k8UdH3w;!yNy#Z;!Y$#%QId{(ilH&M(25&jP{0CucUW@c;dt!5`YB zYHqoAT*(6)4+Srx6aIvFwBlu@H!>V(?RQ)P{4=bu*TN3}PKI$C%ksMXVxMp3UgIJL zJVx0|Et+D2lS&W z#qN5iQAx zyp|UAx8n|v`!oWU`pU{+1gJ0Ah&W@$!APu|h`6J>_P18V0{@H2@Phrjv5^`*JPC7v zM5C*gF{E$v3-&Fsz`qr_|7wp9ab;yCw61A2F$&KAigg3sq2M4C|NQT! zGO+n26-i&@A|&ne{;fri0Qe)FGvyVwJPv@Nt*mUWhj}1jW@5tOUP>{%ac#=BF!-tj zD&75p9fj9}Yr(Y>gKqlI@Oi+ukf%Ij-FV-v(wbhO#vx-v`l`@7Kb@0%njwj^s3!6QX@| z9Lg5TKlwix@4)g74VP4boxsLKgd86+zuyN~%(8oDJPX|JYFdSGYG@GrNrMA!h78>| z7ujF9?1j!zkNzp^s}F8rx$T2;HfL0**|>mtXF*u#6`8aplEn8hKcic~M=<6v2-MH} z#e&NyNAe#|Tokj-If|i`tMW>a5GxFlB>sMf2G7To=#^h||8aEJ)5CV)aOd6j)Jiz| zx92<1ZHV|^M>yaMFc@Rj!6#{X?M?ajXH<47uQVmzV`$@l<{coWR1`J7Sx)2b{6|Q7 zdU_aT*Wa}W`fsUQdhe8M{|-(705Mb_T-LzearzeSCcUa5Y3b=+?cCr5E`Zj_m6(?G z2;Lv`*}(tPO8_9y4Yneic6|2+=k-o}VKEwxO%BMePuKe)w4jnl|Jl=v`aYyAy-85l zy~hZ4aQ{Ee2Ks_X&(=zp?|0u?pW0+$p{l#;q!?zjIM;3b$I~}y_+Aa(W8$+74DP@A zxGk5+V?I^i-5fG!d%w8_3E7`7dVq~WCKSOYE$`>=QmFY!t>nPaluw!hSSA&f8EuC@mM8Te7PLD6f$Bn z=m#xS?tb2vB#lQ?rwGXMsd_j5kE^q-%ws<@cNqrE*V9qbgjE7x2lT@e zk|!Z||Ll-}IHc?}I+-f22BVaH)L(BRQ<=3)D_@V(`QBfy=gU=fb+Or;o}?x@H`3oO z&(_-zy>tRYC$KQydF=D6-}8!1$H16vy{GZgV;=E%8}1vyA;Z97M1Qr-dMo~2@DJ5tlVotIZvH#_W`?*6fr&3Za4 zK6bkO*H6gt{q2=RRN?6Od~m>fZeMsN#`J~aHs$IZ%%#WT$qY^py3P6yFl$(kUp7O& zBH>B3x?Pi_Tn1l4CV$(&d9B{t+dH?n$D&J-6$1HWyS%@75kKkbf;vyB{{78%8}@ex z_)=%`=z4CgT8($r?|aywXY3uIe5B~`Y>QB ztHv!L6mPi*{h8opB&&In@3ntyEHbon@-@qNnV0_Rh=#~Euu_aOUd#UNBsts1Vr-KV zJ-km;;dexw&(m0?W>c1(lhsI zI7&btjQ3HgG*`g+;y4ir&Dy%ZFXY;ztt>WK$I7bp>(_Vj2tm7VuMc5psWhrpt|V5% zwu{%Je~7Xs-I3$6LH@FQl7x9R?~@xHLqC4KeFolBA`BU}XHvAFjnVDZlf_WWiG?Mn z!`UiVB)n8twCB$(R?i!k&lgtCZ8h5}=WQfFVx`9U@WK@ZR!hXx`3$+KBfC<;h9(cM zjAHidD~@g()jHK2$@|=fvF%<=MgwnfguG%guJTZ_~4P=J{q^+MUPSfd5@I(x=pnKN@m|NN4uZcsiyG(f z(x6UwZN8^qh4ylj`Dt*EiM`t z;LoR$-jqt~O(i|gx?n-LS=N0w-Ov5-jEw*d;7M}w&czd>@E$mFW%BBU*VT<);Nrhh zQ^*u}w_1DHiI;}gjVg4JomV1F%c=Ue1p%^Qm;TG4qk&P3}K0PgcSCJB(2?+fe_oA9A zrJ|{N|8kt__Qnmwj_eqYgrSj}L3ST9r;gLo=BqD)Y)7hW^CTtT^J1xhza2jzf~4L= zEhGNdPeS@oEOt_Phb9xZi#CV-=5hP9(!zlq70^hR8UQGIe3vt0Or@o*EcDTCb7~u% z;`e#?3w)#B^Rz)PRRVJ4C?zDm06V(vYP!07qfQ|AKhIsD(?eV*gK6SwpOX0hbqV$= z&&}=!Dlj!Y9cOpsI9NLCrI?tQ*bDN}eXDOuUVJ}&?99(MtOY5chRw~-7gkl>z2u`# z$S_Zu?jwI>99mB(9B6g8kH$xOz@JL9m&1c^>M~VMOZ8%~J zJCe*#z7&|+m}X21+Km`d#m@4}?FmWmt^7vI!oosn#FPMzkklzUv-u+_3G)g{C8%Nz z?=p9j^au5?vi$sdeJ`_{Ds9=?uRgOgg>T1;m8rvNV#q@5j@Iwr>=zexqr9D}q2PD; z+8baIt-}7t&vp@X@W=LxeeH}-yxm3OU3aygq#wury*V+>&ZN7aDq{q9p&cIZxW{iE z?_kzD+Y*M?GU0sd!09q@7I~$hdZY{!;!1Y2Ahz5ZrXd`@&dYoGZ8*#8Z-rXnlsCTCv}dObYeW{&({G$&T6QsrC7FaHNxw6hk9h z)7P6-uTmb@shgbA(%%I;W}|k%#n0@JEtLP`ndL5aAgLM4N;|vhq=&;@jzcBwI5=f{ z(^RKi?@#*F)))K0Byeu_N#hPR@&2zI>{Uj`RG(&NZ=%_#61B3ju<;;5$VJro@!?^V zOVob{xB&7_iy%?h<>dl#b<-3LAIg8ne(d5Bv+FjrKjH9kbjgeH^Tbm5>9%%^Uq964 z`%0Q)hoz%r%(;$IvG>#TMn%%)@`Iw~BnY>Ks@A@}96VgF(y(_Me#CZM=W7q0z2&ox zJa}qqMgPrS>$en5{MvLqo39o2!k)*w|2HdiTyxQ816z|62b4xMi3tT{aQud}k;R=0 z`+>Xo1eRd?m)0Z2_xk<0SBjzpHF$QPo$oo-7|OGw6Ge(HJBzqoQ##QdX4SM~yuWl+ zmZwO`r=2+1c)hQ!8?t-`vnfq&Gk$K0N>cFL(~=CAmv(T`libEgRPmD%&8A3c=Pz&2 zWIB}^ax6^AKl;aOMZGWf^bj|y`0;g`^30JHZ;oXJ>|lItvwTZX_TUm)rf;jB3`O{z z$hSGav$SN6Qx@$|s3R4ROO@T9^aa`rZ%^#2a}R_lGYYu5<)U|gf!JEF=h$*qnmK-NVV4|jIiC> zw|s0=z&AMKVQDpE>R!uH4B(gdsPhvt;CyI*(XQ1A?=Yj37-=a!KYGtC4QA3wcOYhE zi>^COc!ZvOgpW{7T932f16dOf(;(1uUA@Bb4*j{$H%~)n+*pz$Q7Tr{2P>$!sCAW; z;NR(qS0R9cU*aI6?z{_rcyPeOB2FXN_6S{t!Q{@%_OgejP+Og9^uEYW(7d!4R*a#O zj?pREqj8Q*`8P@wu2zN+^`C#`r5JzSF&(@XZt{M;=`cB%{`D?`G4;4PAOhiQ-n!Hh zv-E>bfG9uqqkp>%CHj@Cs{WdMBCGn{&a&Mr-6Q=e3vfiU;@O_stO7VbShDt z6`Z6@o;W0C_e<4Wv5}!$p~M_w@}e$|P_d>}4Fp2hSveCUiUcY$?o- zG{-zW!Xo0r+pAt^%u9K8Wm69a-0Zs^ik-dEt())o{&4VE3*?<7`%u8~@IEcIbb@q# ztC|#MJ=HfhmWm@d2AIFAL}%lZ4zr{?My14#)P)R7pUrmD3_nk4^4BiK0t@<%B*6=xUgw+!%-BLe)M3?*4Jmg~zGh#W`P6Eo^o*r*jJ zdtDwq_r`DXm%$Dlq>*7o*B@TjDQ-_TCE=}7~uiRsSF>Mv$=U3A^~F@wPu)y z)QOv^`g3ub7Y5aI%S3v{sg^Co6em(o`jbg~nYvTjwQYS6xX+s3|b_dHco zg1cW@e6IhfP_o`|N7_wVUCryLkK~Yrm+_m8kM&YZ-6yusg4jOpMxi$=5vD6{)QFxi%oZ0`RB`n$ALObbh@C(kw$@2W)R_ndj zg6(ALbe491+G;YS_Qt*Q_0+u1t+v9sxvz?KXN{7r3T)O=epH>)`pnn?S>Pf3FN*F= z_e{w#lBEbqPO)D}96-MDw>aT52~3hL{?2f}2Im;nn~A5~{yMnrJYsV${}LpZ6sxPE zL;uAey9gB~lou;6W}s%xo^ZE#@ZHsG=SQ)$Ydg{E0N?GO+;26yulk{kmH9_8a{;Cn zq>3jH0KZ0N<9Tv94t#RRq`qR{ujR0i2%K&W-txP-w-lPRw;o*fBe@_|MMJcEaaP_> zEO`iZob5M@UloI#-@bPA$(vVmdOQr@!<4SFjT!m>zG`~Vm0v~r)|1MjaHFx zA3g80|H&{m+^*1)Vi*Pig#M8jEeL*dP6KXVG4o5MGj>k+7R8dQmK2g697cPjwmTmL z-@(o(+~?9=7^^B%WbOU|AqfVp2fFR!*j4AEs?vkGj#bOymE{QYtwK>Q7AubKeEjO; zK@Mqe5V-y{b_22~79Fs>k}dFjPH;R9ao91(+6uq< znDVuGVig%pIZKi&B_(fM{kVvIe;an`(J4-RX<-9>${C>YzJ0-Pd9RU0P!3HyDIgJt zbDWjkJ4UGQ@?sz>@(buH_wysKymb~ic}rB980wj&?DI*skUfog4A>)RzT^4OTF^f^ly%k zUAli6HH((maWflzB%=$e(P^0gDQRoyI`a}SR{cVM%R)3Y-_ck?adB7>U47!RL2$~# zgY;%S?4r6Yk`(N$TllE2Zej1<5XmT(=#4zCibTO!)LikBoRt<1jlnPNl{dnz`1L~) z&ILXBU@<9{n)gAkBhq6ydKafO)eT<OrBFpO>n;$`Vk-6 zU~`qe5keM5LHm+ig*a0HYn$=9doYaEmaJjjQ)p3iD}n1_((@R(CIP}I@>-<2%)Dld zs=$UCsuS$FFtG%0~3eaF*PjR}p3=Ojed13fSp4Vs#H!BO<7K`g`ueU|6_RnDL8hQm9#i zvtBExST?;njcQ~C$0saHqv#Yfs|*=^cJZ_~=V=yUw9imVl^_8n4CS;su0O+r8&}f~ zaAgCKy8Gg32+-@@c2)H<9;e!GKgk*#m?stwnp-st0w&m;@3YMdgiR0BT*p}gou&7i z_SKH2ur=^pRMB>eo-`nV6?$Dt3)V{=o<$|m%n6p73&gB^P;9a~`6+Gx&&^f3BrZoV zg94J_MO7wu5w*YK7xT+1;yeAT!;5%m#`-)>UJtLQ`R0dK)@8+D1|6z#s7sku+jc_) zztbrjzsXngO@bCBl1pV4Q`t;!iMJR2?9*@edFjDk^%`(IhH%YZ`K&!2ZX_zIT3+bO zkRU|YCzSeOYNr;NbNvzQiqO5*tI)(9mk)cnKGPZ9ngMyPF6zgV|Hw2xYG;{3s>-0Z z_~Lm;G?5U1rcGOtD3$*8#9-KHDpiCzfLls5{S-7$w~*O(Y3=bxycJr51<1isYg5NQ zDw$23M+E(5{|M2gUjWn4`=@0ZaEo(ti?U_sx*OrV8S zZC1mnalApGwMKv$;g5qH1}G(1aH1Herxr#8VS=uQ=1Xk2he_k(;|)xU(RU@lr`aeW zVk&BdVba7G9^$0Z<-dkDVYrWiWptwZ)A3I$EM+fuU11k5~+@0J}ygz5z{{ymT?oRC$;?g%tx4du! zc+-T){=9SpTubBjn@`CWwKE2Knw64{uP@6L1~wgHkfgDVO_Ch(8>8GJVCr(&Tu<%@ zTRMjD-3+H5s6)rr#c3qZd@V($+Xp+FR@N6KwK=)vvo6Yc(+m>(3{7OGtfhSTt!3NK zwfo2)?91vMJ+e8c&6KD`7SFt(1(gdF_(|gcy_Z&z(&+rN{B>1!mVzb+H)*{ANI-6J z_7t~VDy~c_NsMeJmo9S@&FT5=B!*1@*TlzTlaws|bw4*N?fPm#2$#WosDq!U_bfUr|F(XO>5 za&c|}YKm;Ym}No@x417eD0W{|%jf)35if{242tX?hcxHfsAvy)f27BWmGx5rmrTsW z>8KSCIYK7KHOB|E1n)&5^B*IRkuP-?0v;5A*b138CJ6>%W#xWA6Vin>#$OclzbqLMHM>pg;lllWs*Kc`CN z$qkou4VuN3&gE9=$Yv3i#@K&`nK+~=fCmo1+}p@ntJb290&lZh0fas8=Fv`{G@>Yo zi=DP1u>h3>lWntj*P4PaO>7++y~?#n%-F89CA`>JiX|%%!L?!CD+i!z47f-GslGftWL2EbKocozS*gneSBi5z&b<-qxbJ=I^-d=vc z8zX`=Q$pRX?I4aSY?;>F>lO_D;YSpmMReZ;!*`tAjPO&v!#k*u9IDkKT6kbRaT5EF zODPPOe_S%okHv-Y#RY4d%iNAyI8*7SXBWJ2J|iE-jb+%J{pE?9GxBZ!$IMjm5M8X}ZI`or$x{AtP{Fs`)c~#8CRHnkIc0Lc z0*21>ksh(&c7o_lgZmlF(aGvW+>H~lIwauBx zwI7}KdstYQTElDa;za|aQGQ7D7pvMjZide|a>EDFPy)zC9HVIJ2yjg*u#G%^KJp}8 z+Ie7r;piKA@ZyD#-rQ_8WGeuqeY2LDbJZQK0Xq|mO;79hRC)+nMfdoU|86Bu zcv5>5gwTbAmcs_88$QimE>BZj@KtweGAdO!koFB+f0-X6n!M`4ID2 zZ|U0Ml!tL}Mg<1=6kCWolE2T37$prCba8D3UrHe_nE6?_vi4&dSGNni8_ed^85roRP5oiM)0@NGd8Tn zy1SoX*+KxYnm^$vZ*8Cez)1Ekp!mx{9pODevTv4mkhUjXb*6dV^Gf0N8hJv~|Kb94 zBu?gkuTa|^IOF6di*l=lk)%_a_-cR`E3c~=!i^#4NNyo0fS6q4)2_I$M>eb0Xw%s! zlO*1@%~U<+bYX;8hj68pb7S>UAl5W|`!9Fxq2Sf>vA;9phLRgEYjKqerpZ$gVm=o$ zS$V7Y8l45@_jLPI5c{7#<)+P-T-~b{L}_9zIiC)O=h2Ryrkw2^3rRZ3NIp%i)xahe zR;A?K9px>+SADE5SJ|}4Xx*lFRj6Tcq6i&vdOm41)p+Qy&e(9*P}=50f6gy{!?Ib@ zTh@k;fEezl+4|-mXH^##gSGMQcv!4je>hSUCx5%t6hH!f!@hJCl+-p505Yg8(K!_~ z+j+8uwf{?H!?G8y^uZf3nLc+!_m-V2-Xqz-AX@o&?9xu}_uRtb!oL#xZ-iAgd?f2Z zEsutDQy-p5>+aRd;L*9j6RriR&-HN{!U)pjg{3Ex68f6oH`q=VMAvks6r+f&n-xi* z0dYmUl}FS(mjwn8Sy^dFxy^bDNvW}CyY!q~>izKubpy78X}dF`&d&UlPO1xAc}d-s zjThMJ?I`{vu!6cQtd#0D%uxD=B4S#z78tXfkbdn@T2dYKB{O$_8oL{%ArpiH#DY|3 z>(B1!9vw}Ic2ZMWX1y$)g9_$o8l(-{OS#_FW|V83>P;a5@q|(&#NEPq+Eb1VI_y1e zPL9^~Z-!tAsmYO8p}|>7Do(7S-;W!~G7&EB?=MP%$3Uw$)BV#_AtQyE+$GN02Pz=c z@><~&!R?z|R*nFP1-5W2y z&U=2ruhSjl>hk=)_;==-)NQuX4vsDM1)%reBGq_$^9cjT-u#s!nAE zSr%nWDN}JxoeCi;+G+1fT5t$U1Dsjp>SZ8w?GLcpsz|K0^B9s$bn@-VsWBzb+-Q5t zWI+1VjYE|27Tzr;G5+bg%1{9(!SMXpYIOWC)wt&8yy)-x(v}eCqTiCkEJdARZEwiw zdTP~30dom0VSIa=fl8^le`>RB!FqyPtyZ=N*U?g4XM7T~vC_LY@4Mdo0E#DEj-}(B z4C|DgU53k%RF)93x3$2?cbhh$-074ex}J@^lpY5os>bV#zcaa~@0RhD7WO9}_-^95 zF8vISFb*P$$wh?8<;+IWq}Q<~&UK#ccZ$sYWz5XIgt(eHg+! zUW_w_xZG&gEJ{OiLLau>W;M~X9l~Y{g(KbP+USxqp8Jxe-shRz-R*EHsx~+cx-;Q1 zg9)xG+z$WDGkwe!n~bg;SJIUMHct|0`$##8fH^dH-xqX;;CiLzpxzSd!?1fSO~vx* z$K>)K5Exu2j`Wknai_h71GbU^%kC!(6Y_brh7dzwx8qQdM9v-i_o zva_BPOBTxSg+&=}Q!$sjRG(|PyQX(u$z4}#cn9n&ZqARvuXRzM711X9z1-0gDbn1( z-Pv=btTW55^Zs1!2*9>WXYwn`(>L2cGPXOplG`1-L97s$haBK}>T}iXsN*|9 zna)PDh8d$9J0+E_h7)?sIazX3Il8#SxZr~o-dRyy)aUK`TyE`_j8nG(@6p!xuXJ-y zBjnZ1gx=TUg;OOPs-YcPq{dyIX>*+?5e(na*S%$eAL;&$0JY1BEHn9FYxaOu?DQz! z+J?`_>OC`q?wvx)il;4$aBiH4*V=;rDAwJmaz!=4*(Q@p9+U*i{R+u6e_*H4I4CmX zvY385#0TmK^8<}6dh{88k@ey(!_aR+pCv(4azrB7G@qmX19c74070bxYBcR9WfdRB zT?GkAu<#?p2J2NGo@EX>mlYVrjZGBx7~|ALw69V6 z+R*;H;6MLvXL3V`|3wJEPX-}AfeHKvnj?f|LXs2vf7fm2D~S4kZ~Y@bp#QfS|EkRh zF9)m!{=eee|L*XA$zj7#lKvm=M>x!p6X^m4eyPKR8YV=Z00E>*6)BW%YvX@(^9wDw zkozQr<>(aSqO1c)3e_!^BLTr>QT;E$RZ}{KfhQ+yj9RsCs5~bJ2M0$-3UYFQ@7b9R zYFI%*flzTdH4ROS3lxeGx0ADoX{=~Dju;_GtyerM$y{#mLQpPB1EeZv3dA((f;yyg)rTHIQk6MVdqLqHM z(^XUq=;624*=Zml9Q;l&O&0xD*4aCMvAF!>zOc;7lex3cwy%Jp&$O1-v`-i0Y4QpV z=66Q7KQqfFqxZRf*#;RoN7O@Q)pI&@W?=$pk~}Lzzm4j(|2aecm!Iy$$llh+zrv|d zthtSf*>)IlUE)bi^WNG_ymMCW-PNkjEpH^6gOb}#?Oo4OB)}el=1T{~f{%`{#+VCC zy~HT4(3b#ftMg8K#pX#RtKquXE~u_wzdSj19p9qLM2P4HzNA@29MbX9s6t7ZM!U?% zV!?iDB!TrufE&6@N?J(V#@g~3J%~l2oq6UUvNE$eIZ_$nRrIXZC=r}b3^NbwG8yt# z%&5E{cIPL+rDWR^QlZZr#OIYtv4{q_R7` zkubJch`4-#{|>1uPJGDl%G&C-7?j?*IkD#Mu{W_&iPaDG$gGEO{F0t5_0uOcy+;Tu zv|w913=6xDm2h}^Eq432SS)Mzb$+FxPoB!KA|Y-|CE~pM1Ne%L!PLNZYHXlydi+S;Htuhc7r~qQCzlde`}sHgXKGlW zdx@^+k5!H3;WJZ9U^DSvz3b$hoT1~Uhe*v~c`vt2uxUWT5OJ}&6*TOqEWS)99b_s4 z?b*eO@aEn@z6kK|xgvOGZEfaG&iU<1I=b{0!IZ?fHB#2POMSFb8A4u>LU@5Zp>f&_ zRhp}a|J2IHI(K@`=b?c?$~c%L`0&6i#>t^EuJ}z&riSCj$3S;bz|@e#5~9VJq>AYtLgO2#~fqDu@7o`7wZVr0RL)V$vLjATKzq}91bs` z=*4Bq@1mPF)Ho#pNm=9FtuVi**DIdyTu69jdfRieb(UT-Xdi*t zl4B8&U4Fu_7}lV!W!Ef{BjVU<6b9P|4<#PBILGGAzlehTd0ZUkr#d+^4hPQ8c(NFz zYlN||J-DBc4APAQj`zMb%Djpmvg!FaoPsGA6DOxZK)05|PR#HjkK$yYdPo*Iz>u9X z!vhcaimft`a3|<}dnlqDY@mK@J9wplKXYz9eIi*%MgsaPE4=tl`LxyK=Y{kT8f&8@ zyH{>0vAkfdCZ~Q*4pG3xFOm%dK;)}Ri#b0CCc5S9TX>f9Q` zQ%AdY2qXiV%W3bTTaknYE_aS|;}*ac*uw5>8-1p46(yF$Kz=>Kc;=q_^Ml9t+M(y# zoqf4-ER7xT+ynG9NX@ZlAB{X=#t8N*7BeGDTLn<=?#YA*EvdihuNnc||4hbbl} zrZ`2UBQ0p{l_6i3tfrYx;UpYE&1(i7LKN26K`y6nA)>fli|Kfr(h$Is147lr8F^Yd zn=dGh38JbVz4!G(KqbQGyXc-Rls>POYToJj zfbV%vWFBacHa+Gx0yac^oaRx>d=1L&1q_>QeT;mGTL9q53XKn3ysZ1-ZvFAFbqq7n zv2!-CvoR4+njv`CAN(NTK0+P;d2bVZ1cm7`Tz~JTkU$ElE}LlCnDz7jt6cC}Q~MI! z0ItY9=uf&#_ z-AUlEsmdrh)&e%>1?kira)|G=-+2bS);ap~TElyH!l`SnoCSS08TD(Tp5G;quRj0M zw_%3jx#M@wttAC-MWBgLd6blmLxoRgY^YL^>dtJ8$$}x_lo<1BsrRKVS11ys+01cl z(wS(4{ycr~knK=;3q;!dSm$3N;C^e6FLv{@MgilO5r(<^al45|dWzbl&TmGpghlrA zupV-u-H;}zaVG}G+D6@Gx+BsiYNDxrqY+-Pm%*?^{sv3BQNv^8iTvSi+ZqM7W=OU- zi|~c%%biPysiu0m8t0g1<70?aRy&pNzD>HRfk_=lW4ntJx5KrSA#=IQ*0B-raEXk@ zoFH7QeC4*BUKwd>JBfu!VFjOdu4iCiz1vR8-6%KZGwJHaXQjrpI=a%o;57nsIS}Qx zYMQ3*70Y^Hs!4EaT}RoNk&e-^vbwQ0*6(~5U$f-jdDdAsbI@W!jyv;we^42rZD4>J zzt$!$p#QSq(tcVwMxLs++SNjObG^MAlQfvacpg$iR$o~mJ%+#6QBT{lb=fATv(fn( za(`JfJ>@&NBea{7Zw_H)bL&L#rkbfK>YVg&Al0R^f``PIzAE(B7Cv5mAJ5)w)d$|eS0~N^_5a!+LIeuAZF~YtmapITMrs$G=~tk zxAgnnn7nPep7BS0hbb)>G&JV&)D5EV(W@G=7JZVwV3u@n`QS#$CoY63kN8Tup@2wKceB}pc^b1N^OY>F@c`dW( zuoX!iPSo3yWK$~dC<$_m@(A=prZL3vJQ|i+)P?iWfgCv5GSX+-6owzt5WfA520=dy z4u!4zDL;J#^2Bc*-^k3o-JR1?T{MP}; z2pJzAKQE+?u2jlzpkg!?nfD$}Cf3dU=zuqPerp!so9Si3&39d`s6It$irx-dPlK zCg0uEEK$~&zjmePOCvJ`7c;xwHFn<)l~YPlsJc+pT^tSE93Zx+c8P`SPUtt=L}__) zl9I7mba`yyApLcl)-7D5(iHVZVG=S$)SpN`FJeQ+tg7l%3{rJ~pUwLLHF_A~Y{kY-vye!c%T_>oNj1K)F!<@xPn&oyh1mJ7WD6Xr|K$x9G@pf= zJ5{>x0@^qnUsT+sb{dgWH(KcN7y;yqvY)iYk91G@@7%4qA6hx<)s8DlfbOjPj$jXp zCkf3!D*O)sVaH}6{sWrrEu4g2Xb*S8o24_sLnnEwsjD(nm)-Gh

      %2dZ z>M+1?Yml!%S2ThFz)Eo`T|{!U8hkvWO=n`wU-%JO1B1edyuWs z@$gsy>s6bIjF^>#7zpng_I++`$sYT!7QxrQU!_iF?3Yon@U51wm5F62ir655gJ+v% zl)s@6z;{x6CleVZD!+YWEL2{2opZjw!>%KC>W~0oy_aempWA||s}&#e?l|Df8Ck~O zw2KS?jrtyU72rjbUQ@39$rT^y*z`DyOiA2nZ}E*y7QKR>Jn8AdpJ+gFps2 zqVIJJ8`Oc74Vi@n8G?--A5&8l-}u*D%UO>tQKQzD^CWtb04vtPCYj*~sDD%y(D7oV z>+9>P5j3QMLTg05O2el{WvF3Qg^Y2;RckL!u5XvNn~IV$BA_!*1oOSrrLk#ZNS$BmB2<9$*2}w& zx^^AhmBaeFj|x6Bg!M~b#mss5HFiG+j4_jaU)O>F!nd7^!?m4@J#;sS zeRsS&?T7%c#cSM=3ov{1&fZ+r#Bw%E(S3?P`QyYSdeSt$2{OSz6Y62A_lmji{u=#B zJfgz4^tORz*a}g>G+$_r-QO~#fB=R|SY@3OV)o6&PN{1bx}J=utHk_nW00NI$GJpZ zulV(khBBM-n}V!(R|9|8jVnQa36ns`(gH|CA@X7PVtxAa@KS`?8S+ZG#``>~SJ>O^ zSdWKw`&oax|5(MA0()us{iQbrMKxzR>Eo#A*7s z>JLQ7uzKHWw-=G+zhuiv0DuV^x)skbq;ox<2i?~+JVV4`^N)lMdmQ&#*$hac*$7Td zfI-dAzJo{(py5%__WHGUmkD|249lFC7<_Sc^6=@#ldJyCGffle&mJzvp|VY# zw2Y08 z<0Ghjk`kjvfD3#zoA0F{-*u;^A!0JrgpY$`49sVOjLA;qnmOC^&mh_>on~+Zt~FV{ zhi6(|7vACqrxcfP=f-dd!>Q*oUptP$I$UFR2w%$gt_C7LyYa=LOb#dR!}s0*{AEC> zr&Ms^o6PdQU%hpubcG51BVngn7CY~V(}a@Fk?c_UPryIEPMNVN0zUZP5F@DZEWNKSC7nMIE;ibTM}F zI0uOjTAnE=<==T1#Yg79{0Xiwq`|IKkK$XrIq%Kk&-S(Z?eWI=Y%_`bU7|32qpHk` z8UU(09lD2+?*lVC_#~Rg*EM1qStE%W2NZUEcP3stHJo0Z9tCC)NB$b$q?T}`ek(2U zNNgdp_=TKLgFLJF-mCizZ(A6CGx*`Q`aBc`NU8L3-C3aJdFRzRBlSshr(vAyq2*l$F;y)ZI^e!yaI99Zu|&{P!f;~*6`d;V$=Pc;sTS-WQ%69Vf{M*gZ98y%i~~!!dLVn$lpSp4 z#iWF&nw8}J?h>!({xDJ2B-K z0GTI7!Oba(f6l7DY2{AMZ=hjwCXRRait8d!jso~UJo z?iS|5|ZtA3d*% zg3P9ggoogsxEVRF*0w&G1OZ58%d%^aYb9q)Ai{cmBNx_+i5XFWUpnSVn_&6T{i3B5 z@4YR0Y+lVw25KW;QWd_`tPF<$93AR$yEe-`TjF|D4mS?9aN`|8?g@91o7viBinsh) z@)QAZM$_vs&(y7lQ?ynv2jd->N{s|1xgC}zgzXzQt2|d$lN&@PWb0r|xD9-LyfYeV zH$Oy4fNeoY(F*G9UZ!c#HMva6_`#kInrA9Gn~!OMk|Aqww)*Ts#WS`$TE}lcm;A%) z0+k}rj`l)6vRE(vDeKPkXDD|iwszi%*X~JVT}kw%iD1fWdi`ijqV|%*_fQX zd2Ab6p{d)S-n|3G90_P45?R6zZY~2{Il!)tVJ-k}*f8zo)ChNW$w(bTFMEDyR@a8l9t;EhMY_Cg?Uud&l4`M2(xi4}O zM{k2*S5jGf6pNdcFWeA(0`?N5IU@ zYPo)NuQoGx>T8f8^xVt24k-pYI^ZjXbM3k{NCY~mT5Y_p#FkGXrgVT@XkE&jFerhz zXl@$Na8CMw+N-H)H+2u{>;oYH-BV-+9Uez86!1T7zXq55y^tMo+Ur5_9()-K+{w<{ zTJj*)a_1$ZjkW`YVlJ^bAQh;re{h|fAh#h4uI8)2bS5-7kpCtUU|x;CI`Y1UXY2iG ziq&kx_BdEt834A<0xI{7H_C5J$^l>hL(fyEwbsd)t#^16gyiA!^4dq~t@3S`4XNUv zrD6ys2m}a&aWdI;YalSlt_!-}&eo-90GG%%}Ef z=QK^FaU}hA+$7Z{->U6As84-t%R4F0|6;o^*DoL!`?v-TIthh`4`$Cmao){M=7mf>w7(=DXrKLFt1PGP2$zw?I1Qju9C zIN=2J?4IE2S;7JB8SofjbD!CXB9M8K6#E_Q@+Jy8-gA|zBNc?4j*fIUGW}QKEdIa5 z2g&y^fUm0RExD-P0UP>ZP$xWie*Pd|sssfUM3t1byl)fBm^%2;$rz^+99*zSuykAJ ziIyZe@KDOgZsN_Z__8W?gz5Eqrh&^}pOUe-#B8P{7}NG3)=Eqkfs_&R8!aY3{y71X z7YP&WWA9M?+z7w~Spe`|1>4{$bs7vULIXc1$KZ9;(Z~Q)Gv5DVVhAv2oH_1^y^&52a-_V?+cZ7`WOLgwgr^ivhvVjJGvnNKM0{+j0PcehsDbE3r4&Eb4== zOLyINt|SR>x+C?1Ppsw}1j^TT7Boor?*Uh>C>1rHcjSUR{)>BHjkv!-Cb!Oxn^}SHzaI^qZ(jcz;cVeKMJ> zs2Ai86cLsIZh)A=Mis}Sg)=A_t?7A;0l$nOqy-wPrvC2ImvS8qCpZME8K+!;CRS*_ll7;k`N9Sk4rTgg%F*p6aPh`F- z`qXDU;drA?%<~VVY=kT!Au(&k8Xv7pEyrbGaIKuE%&J6Tg`h3nlO}`rdYS)O1^=ai zpSsY|$#QOvg_2rkii|XKpZ?e)?La1l+p@DZen!pyQJah=3lC>hJvxbG{kPAaA6gB*2o8vu*e2q53x8qOkMv3DhkRutN#^) z0sao!zIQ2P;G;hem_AXQyLowgKGVfSQYP2JkR3Emn5fwtrjn@Z&+Zx)N7#G+DG%5T zZu*GLPLWSMHYTN`Enp#m6ur#3st52F)&F)6{(p#j%c!`Ta7&mN1PBm31WVB10fJiy zjk`l|5ALoB0lIN_Ng%kp(?D?R;7;T2);M#>``-EP`qs=^GjnIHsbA22y8Ki<`+0Wl zQaW(Ik%(PiMSq0cESWVsh4ym1doDWL2zp|RBKY7cb{(>;VO+_{I=Od(c z_THrLD{8bDNTFE~sD09I`t$dwWZj0zhV7mFZOLyg1-N_?`n#vDKDRz54%Z!fbzhe-ii?`fJmKJr?JEhiKWDsH-iKaM z=^i(O3Ao)SOgnYKN+cj2z9!jBBx9u4B#zdKms5>dv&`rYQqwwbke4Tp4o$k(aL+KN zZ(c9tA)_g{gkCT=c@3t2GtH!D=j3NUA1=xt4b;VWi*`;+&;u{pg3g`LHGCHk@V;j< zpj8*UrK7MoJ{R3MlcBEkueFWF!k>Fv&?m_>AnEO;Vjj4XY~qj619=hlQYBTggqvNT zzUzFtr4yz38Q#{Jwg`@7x02&sow4n8SF)>r`i2V-aPJD1Q?D-1IG&Hlcuw{~&r-X@L&b21;^Y;-M~49WNzzJ zan1$viKAxa#F3MV!Pqgix`5TGNz=CzF1xt9h|1}7-HGAs?CbUk?0%gyleus3aKfHV zq~_n(QG*jc1kshuKJY&GwUY3wSGw^T>I*WiFogr3 z*vca91ij=bmGkRbMv6hTzJm}A!@z1)!d-aU3~&oY$CkCUQtP1o3| znSoGpvZ368Eo#sFg-Ds6s$WJ5>ywVAg3NY@tYQfE#K#&;-Gk`Y^|Z#-Z`{6Oge(%= z#QUfQpbi?{@_T@MDn#dK1e4P3*R2SU6yD)MsawcxFTeacd;X{t*9d7?!-3bv)#<1E z6t)l(67Y^~bz{z5YgV1dBT4hVEh>qYJ4qdK(%LJ9-ZUP5Def`he4%T4>YAYGgOd8* zZ;P`iafKB%eAa8Q>Rsk>OyhanpR|cn%p1Dq)G|u@J{l||nMmA9zBs$-rNKvj4KXPq z6zFGMpl6PtwMmB6ezq5br$M)lhE!jClkaYNH1tb%>Q=if4Gj&{PnYH#(x0nLt%omI z)&?e_$iK#2Nz;&KDoqQ{K#p<~%M;OlqMlyOm z+ko5zrE~LuRhtH17c2(GJfj|W<`zcE$?mMNbHdUf--dKWy+XfzwnLc=gmqQtO0X9V zzqM{&&5$^XtHa~^1#&Ze%zG$dZ>Y)-J{l?>=?$^(`9DD!R0aVImdkex49fZ35Sc4V z1X@7wt*Un6A5pbTLv3ccw=q5QV_jOJO+kYuh@BpbWL8f$mW=v>QY+DI1vj%W+&C#! zH8nDK;@@nTmv>0+>Zf-Dsj(JaRK)Ne?v;Vo;tg`9ALJys@EXc13EfYpCNlj!=EFm{ zoB>~fNDzTOx2+`a0A$+pLdIZ1^^+oeK)|p&o&$XfNx}7^a`V$C%VDl4$4$V2Fq8}v zk-t8WoH|O|umz-Cs!LuZkfK1%Kzou5GV(25A2J#(U>^Jy*1PXZVN|U&h8g11B?7~A z{HX&U5YHp-KU5)fs2*xE+dxTUqNV@%t9cR+({rIi=cPvSxfoby4)09cT~)@&j@@IU zvO!Y+b1&k3o<6Qr%lZRe0^2(guj8$Z8g$T)_764oXU_YQD1Wdn1R+g)7VzH9JxF5w z^tW+16%m>4E6#1Nec;l}FZYPOl6k#@Cg;Xr85&~8(08$l+KwjFto@&GU!w*h&$dr{ zBFb9`-kVYKh-@C^j3(R46}fwTmKYg~^npg}xZtVo z`WfT`G9ABpzjS~7(Vfdjv#@*L`r>4o&&s=s)W4Pw8B5(@)&8?j`Q%Mty1*2QTupeB z0yd9+rpuod-+Tjuh4Qrpf0)v`o6YTlV&5O_9{j=H?fxCSPtr$~`w9SFY;DluV@6@Ni6IooX$b z7!Ft5V5;!2032z+Y!y<^9IL)#h4S%6rX(6Z^lUf%19oPJ zK~rz#C>SH|SJ1m@&Emy9Jv(>y=&q(W%vU%82?AZdmpYUzf&A2=Pd%!RO8#=tLWkaI zt-}PT762~S`dq-beqTb$&nT3Sgg=jSSFK2E6F3h#Pp<#ZWuVn9lE$*l8mv$qh? z_Wr)0`_Ye z$Z4bS@`FDl6{Td-?nDMR^8QL6s?uM=t%T zdtmw5uBRl}?gu*=#?YHS$-%-W1J*NTBBvNv=gO;VzXAYS*=xn+3{fSd4`pNkt?qc+ z{roNDp87uCcMC1x)3DjjQ9%3LO*K+lU?y+1aV)_*A?61F7R zO?|MucFMl-6x)Ij2!H9IiDQ zs0iGfW76H$Wrdb9s6cfw$qu`r>)?KCNq~V66kk=%lz6N|{qp5I_2rCjQ6JHtbWS?I zK?P`otN7=kA~=B96(M>ElK$;Be*Ah)W{`ChpUdQYAqz8;W8wmPbXYK(^@dqWzRGqv ze5BIht}aqvL2o5Ax%G;z-n&SB}j=}p^!NI}o@D37F`FgvS z)vL2WzHzKPsRGs787Eph?m(R!H77GHX||-IpFi(jTw5l3UhttF2Ukiv*gcNjx>BBp z>k*OFd?A4+bADhF*NccA%WNMFpwJFxt(ua1RM^fLFqP>;REjH#vpM{cDj-PdO@l&L zN2Rb5Qs~nHydPPd*bhS%?9r7_*Wif(L{ZSt(AG_rANiFO7=Gwo6k989bZ%;i>@XJb zrjJ8IQX??c4`MS8JO$y14ZRmA zcwqk&GhhMw@`C67x3H#U5;!*Ym-=A*4WZXs%D3d`01mBJn=HETPYiG3q4B#6-mC?0_5pX-~ATpX>GR}2gc>S}5W z*Bic|4qu(v^AG;u0U0gG^)G>5pns?{SNVK#qrSGJS5!DiN}99F${su->xbUY)acCi z_4PIS!V_$P7_7t3f`WpQhZOGhSGnt>a*uob_ZrR(N7qU81ZLamqr!kBbC;ozkr5@z z-vn}Lq@CWng!tZ#PI2iaa$W3X*xvj5VxVsMjOJ_C0(VPX42(0gxebu9`NHRx@_#jm zT7yI|=}^~yz91+pKx=}+^!?X8j?|;mbA(3!C>-pMjGG;P?EM@kOc|Ih=;pL>FlL&Z zJg?JmzO^L=iZ?C3m-KeqSh$snl7QCR+_yA))VWxC0G0on4h;Yqmt+;ZMwK@jOUa(-4U_hD=OHad3BkY?M2nSg#K5=)mCr2%l>_0apioU4%Ipw2uBh0vpz^Z zKcow%9(U~j$OK9A4fVgRM?&+9{oA*|Gw+a;aQ;4wg!BVRLh|1~{T~0nc=+&0zdV-V z`zKNHzZE@{EPRQk$(U4u0o1FtTSP~;I!kBh5r4$ zii$?y6>k!Y?pua`3cp)hBQ$MsGs!E>%50C$h5~Wdt6C#q1Z}^!OZh0;5A{FBCy>@l z7?n}h37{T@1Kvd-9oo=;Z_ivFY4Y+3P?)Ny$IMO230-4~ibnE_kQEImYiep*bUxZy z9vP`{_*tejjQzJTuK^846K4)pCRt}vCRtTgRia#pmk%{pEGR0%Lkqyi*LeANP!|Oh z6%7E6waJXU}1`+l2VFYLZP;d0T&lpcG!wqj5JRFf9Bu_dFPj-j{GDImjF zK~%JJ_&0RC0oZN7{6LhoBHcq)x_h}7x2uUMe5Q|~(3okfnw$2gNTLP6RhOs_E%Ec! z$zsDcq<08?bBk{~@-v{f$(M>4CRFEj*JeVoJ=3s41E|+PlsJUap9~YQ&F=8{j=( z*(VlXbx^R_yq2e?rgkUY6r$pz-B@Ub{`-Cb+6+%tBcmmJd$qhw{daGDFHX#CeBv1e z`uqD`_pioXLHfBRC8T`L#Mtk*y9TwQNyo;N2Po)3LCaajHHK zAh5PlFZrwtyD0At_ryH3v9yG*I7ZBSp63@rnclXo9W%~M?SmJnr-itwir}qn13OS= zz3MMUDRF7d`b&s#9i83X#!xc8eRB^nHJ9_MQqx)2{q?DFYO8i*vV(dzSE2hKItsowBAe@qg7ZCLids)JuLb0(tm@KojaPs4s>%AD&2_eBE)8~6b@lk9quGfYtX5PsNIuDj|NKYYeL#pktuMfeonv`tw;ERnXq}0gn3`B? z*EUSFbq53x;t|NDe-Ks|rxl>2+*kTm!x=PuXxNPi+nX~FfJQq?IR)U-kBdwa+nrEks zG&MiJR8(`RsS}k~PpEG;3q3q}n^_2vRN@2VFCFUn*I_3cNubkHa}QI~2||(PB(JCJ zV;jvLoSampUvE1d1$JjUcGFBg!nDMg)~kBeV0Liwo$}$HMnM>7t#T0~t1QI*?7%v)s?G3lluCXvl>@R-Chd z$a88cp}BlIr^&-DAVBhK_~0H75=jbJ^Is3&=H1owFir(W&pT}?{(!6hZ{Phhz z@A^Q-#t1tb;6fFpz{Zvo)HV2G{Oak?(6 z`F5yu0#1&6%G~^*s#Z!&+AYO9?q8p=q9l%Wvh&UjUt*qTNjV{)ZyyoJ;E!=WcU!8>( zO4Ff9IH-Gfq0tjqi!mKXfT=t-S*k|cKGyN;?t&WCA~i$>u$cHu|D9O(nK?OB-^VMF z(D`YIKa8e$drjZX(#B&Xvv>7!Z8)bN3>}(XCMof$si5tU^RTn;+yvD9DQ{bTjh3DL zcuP=IGcGP}Is0S1HW@Y%QBCZ&ss0Z*tnzc(b5oZm3EeSP1AQKin9pN0^IoboO2dYR zY~EWd3g!Ee=L)=~FrpNpaM)j0*>WKt7BA5=c&m5(4p7{CL&3#0nJgeWPV0BN zJuRXvLMa6G1A8DG)KyjIaq4y66;T&w<^Plfu{fMi5o5eKB^UCcAgj#GT)ye#&L_oX z*6a0DKL7OQB?&~p)XK`(#ALdT{a+(8QdZeavWm;fb7@KSXtrP8=II@;_dy=UJX7O3 z{@OK2UZTH0&$$~@Y;T)c@KkKJx6oh%iOh?D#wkd)B&)#`82j9zj`oz01+hN6T3U13iBZ3pG!~o`qF?;F6bVAn@F+1C}Xi^pj)(N5>Pf zFjUe%@X@{zB`O?JA@_n`Rk50rmF_3&da$(W)oy&j-%pTFPsXfrde@GJbn4u84KJT zRZz97NBji>a3n5})ZYeW_lLc}5E1zXtRtU((**Ms-I-tZsapC>lO!RIv$F#(tLT>G zN7;wc`B+visw=9h5?>NDSPWj}iL(M5dCYHGFOIBpKOU0ep)?Y#ZZgj!3Uv{Y`H3!5 zvoW=!-=r&_x~kYX!-K;iQ%$NWn&xwTb+!jE4i1jQVr=3`pUT7QPU(1}&7H=2UR=eV z#9@5=b~q-<9AZ}3`*vLfP;w{c|Ezr+!#X#atI0l?IPY0e=?Lt46UCZ5R-2JIsf_>h z(no>VUt}-8Fbaz%b+0Qr%Le3yWc%!#EO}1Rj{rm^8=pJjPe0$E|M3w;p(y_J&T6zo z*2nTnxDsf?cYqPMx#mgeK9D}Rv&pwaKEu52m!zFk|)BH!G{ z*Yws>*j`|sd)GANN*-%76zwn01G36!08FXK)226`BTE63?E7O4xnKbmQ8~PTh8)C->R#YOmvT7P8e_&_^$4dd|EaKD3@DXgPd1lZu3Nd|csoU$E$G zZ?0)#lAUSvcV)atq+?*PEGXRC`ob}}^ua>>GM5So39k^OaL#xVP!S!CDJ~88pRxy( zoIrnzBWE|rF8lSX$nU;EiORxy?vp>$jsw91*b>hMe)LYDQq<9!G(7%4@~^@>)PuTd zPIb*`bQOIkQJVw$)Q-R3>+3&A-S5+;UDf9glRoj6pSte;Vzw2NM;Y$ABEw(*mgkk} z#QnKXN{R*jqQXf`>M@caSt(j@WW?Vu6&OjwCknq&XnIfpIF&pC=j%rikqIT3W@3w*6o-2cek@9Zk%b8wq4$288 zm54_W7U2)I#7xZpUa(@+ipEx{TMQo`aB3ii%h z9Eg;ZwBW$aw@^0rN`7QFSPDD4n~MIZI0GFW6EGF8xXB;$_|);a-Zpsb1y=f|zUTCi z7fJ$}2Bg+VEvaPy4H9NdD`=}JsRt3m_RlY=7lXdCi@UsLEcKrd`3ni%sI%`669mu+ zm@TX=aPSm1WFp0;iKQ)*tRu%T4d#I4b@xa+&VkXbVf+3ED{0 zP?Q(^rl_LEwlfvcXiE~+FAR~;tZ%uvAH4)$Ba~UXI(>{+-E67pSNH|rg#Y-;k_DADs4 zf9kTmJyWh+i~T6eX0C~PT3A+g&rtj3$6LXSYF7ZiRXdC)A0IoIDhDWh8SZjfE8sW_ zB?mU84WxsbnsWg`#8k6cMgy-_6QqAlv#NX^yIH!z*G@tM`c+k#KhXmX0D|ZQJ_l3d zokK1Co8>Ejjc;sXCVy>F zIv+_-BWBwGZ)4?=&xiVMp5bz&>Xz%X2 ziH?SM=$aK_*?H0eZDaRs^Kx>in5a9zzFT9ID|p*Pz@kax^E9id$Z9;dvz?Q}?lukyPW%-VCA!hr)MPM`qqZ_8CCxhl z*xX~3_rY8kuRoL}<*eJHU~;(i{nPDl#AUthvXoSKj{b0c3l_tpd&kU>o;>abe7@MEvK{%E~c3>}JLrt00>Q%#`;K za8Yq_OFCalw`U6zJ+D{a=}I|D=PiqNDBtC^`TdfQ@K?%GZP~d7CVF-T%aw+44sp3U zuWZXn=H%C}qr*Z7Sgeh#1w7YJGnbcvZSJvHn1IXb$41x;C#MzNfeYO>YEC*9Gas#) zirtqWOcywFqB!W@^YE~D)QPi^{t27?an&zNt}b693sX}MR%V~|UH5Ei>aT444bk!E zRCX#7lgw#^UIMf%EVf!IPK%q7P-ftwaD5-2udqWT4%KR+(cH4>-R5ho(C4qbZ+adP zr?`NA<^LScmfpDd-Y;AKj|U6skA|$Q=%1&TKj_n{r zF308UAiJo@Yk70_phSy^I0A5KobSa7HhZ!%0rL~uwYxby9|sP+zWiI9PYRp)x9V!u zTgnhn=J*b0VoK6p@Ip>OffR@`@cMjny0#5BYNW~iB(G;s#wNQ$3Gb*s-Y4C=)@r85 z3Rp&F3GmZWQq1&!z^eFVVN;b14UH-2*v~G2cv*f28K0oDgWD|UMD%Y0JUl_~kHqpxw5GfZQIyT$82n&GFGR zFsJRjTBM%c(G7VZlhy+s_1ka0vH@S1ID*D^(*bh{_pU)(qi1qry~k<%pNagVH~DI@ zi)p_Z`MT&dohuh{@s^aIirCiffb(1fz}F90k;&#^-r5`xQQmlgH!TdK$HKF-v$dHc zE>CXUX|M&nlhc!u&N>xr*J)CTwoh~gh>4>oTkn?Z$z1lVAw9E}Kin8bqf-)SZOlt@ zX^HIgTc1-%$u!V#FdkewMoAc(n$nL8_E`E|*mZS@&ChF>(nuZ5YGCLF1HgB=5OH<3 z3965^<6&k88rb@H&Vun6chm=$|@x4eY9 zIF*@E5Jv2-j;TsZhY-cb#*&bdP-RfFvRg9YK-~D#8BzY-iR-@{GeQE{(ECvaNynY1 zDsKi_=SQh$ZTTCC*V!uWyq`T5R&g3g5`tx$83Q2{HgquwL1q2_A({?iFXbT_nxCCa zoHcTEbnLY${!BDUE-xz<7`QxXVPOFpt1t>oP8NqdO)o8mO6!?_um5cznHnHUn zhNU`_ZbMORcdM>|rx(#{ilJd+eor+O72}ORs`TyZmzNc(&ri146KRFN$HjrDa3JaS z?(*q5v(v6oyDaX*PR>r?cTj+q{p24o>!%G^m-ZpowX3uLr+7SKyYRDGutllgqCT9} z!SU%yay_<*O>|r#VdYDC+&pQ`B&XgNoLaI4*>~bE=;`UH;01YO3-uLBAW&c5NHym# zwWFfGp)tkiEYqrLi$f)^Mtxmj;kPFZI*m0-d=sx_Zpr8LZAa`hmd#QLtG}vl(WcI=*8g=+7&q%9R+)buB$aHHfy3?qwv9P6B^#7bC>|x3#Vl zW3k`MJP7o^b{qSfdQ?WR%=%t;R0unEabK=C4V}yr-7yL&TXY9Y8QfBunVP0SUlIcD zsO{APRLuVb-pAkl|G~TcQvkfzUV(0lifVWK-X{g18C1Xc(mLj28kk zYcwW3{@?k2pAK05oBAqy#JjygPzFM^BC?9b)Igw>a zR%T(S3;Lfhf7cTfEZnG+LUfLf_Vzgd&u*5-EOT&&1P3#B=-{~i867n#*N7m?g=mrS zd!4SDd-zjMmX<}bxW_hMW4??^NhwsRRqGN1ord2tS48VBPp~rSMKMqE|uKlVC+@fF_dSC zN!2Ybdsl$<;ob7e5b#szPyNsR2S*)Got@7XnmxM61Qyy#yf@^+;>MJ80y~}1+~i2Rc0md-Zy=I|DYZ7z)hBvTG7eMidml2u6KIMc z!&e=ri=kmlK|V#y;nd$2PM??#C5nN3Q!gt`#|J)hxY2DZslPw-wab;eF9&Q_z?D>M zW1XC#SH^Byn_g0)oWKlPovw|@6^C=|aNaozRi`LV0*;36h|xO!g;G{oStXe?5@7$Y zS2X$rxU*}mjdT<=+z<3Nz-NYl^e7F`dwsFNFZBuIM6ly6hqWu1TtpC(|UX z??-ycoMRNNa&N=E8!V?Jol!RO%Ja1TL(O2>ODtR-tOq~U;gN-fRHlEqlj+j-sFtv& zsf;6e<7R|&gu_m%nu4P^9WXPZv?u`6kCTQfG&^&7d&K%+KkcoaT;wh;9{0#hpT*hC z889!m0F58{7b!g61))(9ul5Uzikf&FuCb&Oh>2bY20l$mNeT#*#dFoT9R=*uiJJ-@ zwu3ckz(HfK+@6tRmMY+?+jV4VVs6g*meC0?FBKyU0XGM2ko<*(*xnOY4$5w|4LwUPELZgFbEwdKGe+-sVgiicTPe3I zxw?lliZ6#u8%*@VlRf^v%-e`X)X6rnK-yQyZm(*BXE^WI3W~(=yj4u{#V$*4#*VRH zOyhpU#5~^@n5#LMqN@*l*P~-%vUa}t3PCm>72(Hwgi|v>vb43mx;ZvaYdrjuuO30! zVpmmJTa7iVr7BWWz$;Rl1Vsn?-v6mU^>5^x;gjWSGpljHHH`o6{qVal2V=!BFlvY) zPB!}Phd)7$mQ`74&DMz0l@P}m1Y`1{XogMN9;prr@<1qe3Qgp`sSq!(NUK0)AZnIY z2DT%`kYjepfZ7T7GP)L@rxxF}-=J*>>*_w1<3lAX5!%rz?4cc`A)k<44@_+jro8og zCRri%pn&|Kc-ix<6`%sE3$`7p;FCGzf>$Tx-Fxu&T#Wkqh=)@5(&kXY(`(o`>nBWn z$EUw}UMGZ-^`yG>Oh!TKZA;T%jI&-Kj0nThDl?=(RNU7bqi{iESc>BJ`d-g?*%R{3 zh3SQIM^_k2JAsYheVv9Ol7Y=~i72iYN`q@nZMEz6#7;=$R(~e!*D>Ux_8f+)>r@d5 ze#hm)#$K^908f%0lE3x-Jutk`oJz8{N1E2)c@@YM3YJ|RKX#`oCF$9nwO&8CsU55j zqG^G+5Jr4Qm2Q}wotZ}2mB8h{@&0{^Jpc>MJy^|<`-KXwvyVa-$8tKbI@lcRrnLgb z>L&TGjdx-9;j9gVqQcz#Hf&?{;9i5p=>Kz@`JAfV)cwksjP$<7dipF}#fg!;7zjZ2 z_qf$~_tS`?JSH!=IYAd`;Z=zveK`juXtdJ~&XTm|iUr-Q{%Xd)=CqYt!o{|C;wGVJ zyS3H}*UX@3VZk%b?IM>76GBYR<$+%kyNs7%APm0M6A0Y)&#Es_y?xrW${sapBe>w^ zmX^DEg4RBU+n=*5E3w={w70rm8a=AEtF@zgpp3!R4I-qf^;YSkp7!;1CrQnb~BL5VJ3W)jd>IwUa#5e6eagCxzCyTol$ zmh)sm^=2Cpi7rxe!mTh_Y5oKkzV41np^e=>6I${KRtJmKi&n7lQG_&WMoO~NyE9hj zLx=n$x1cQ8`&j{OL_i1BgR}fke+*Cc&G!q-$G~0WCqM3JK&q2Fclm3$SGkM3o?yA`EN@+%b0x>yhT6T94CM=uRZNRxEeZGa#;`YfbCD+KQj*vwwD|Mc z$9O?i2i($M_Par{dUbBScr04$wsUhQp1ujmPH*cnA_vZ2LG6x~>xGAq87V0ClQLYj z3te+Sl&dtl(w4*4t8<+p8&}hIi6VmhzMJ96D~oi}1T2Z0Hm$$jl4XEp3pNt1f_^Bl zh$EsL2}^+^ZHqlM&wSKCpl?;T!_W;W*C7e-xjyORj%P&IzB803kBMi<&=LFTJGyaX z_Nee0Tll{BHKx}r+z(uLO?Zo!APqvcS>d{K;axi2SEaP(2Hki@3GGTyQ_wfLENe4k zN7bC1Vl+nV0>=oCVn4bDwPkt9^y(ju%nvVN4*W@lneENidcs@guTASJA^RD@Cdb$l z{)FNOv!u-io$Y^GnD;54%v(y2RgtcCzfgADgB7Y0(cg7CLV)oNFI4x1Jj;5PF9|VG zTUbJmXCyh$jI-|v6N+l(aKK8cb?be>BEH_8^?vK{KqK%WVk4;SkVxJ3e3SoILiNe7 z-{_@gbNSW&g`yj>EkdsxR!^AbeTRR-&)@L^#}Qr_Y-c+T%nNp3H|9~_@__KTJEY#* z4Nougy0F^m)tQUxqpS<2MJJHdvrtjE!19H@*ExcFoL0GTOVcwP<=#l}WF+#9!2F_( zi1#U3TP7p42oYm6nFcQ3R5Gm1L8qI-uNBV@+GfKwt6hSbLcmQ$At-X-LF(mBJ(;xG zL197lF1x8{S#<3WkQ|Nh7u5YxKMdE9DKT!QAy!t>l>UPzyh=)?WQ~LukCYW_Yl;yo9e|7D;pK)ldwL zYo)!*KAC)adtCwXrQ3hazLk@ZfRcNKZ=cj)u3CnQjlsF-A#*Q~0M?-SS~xjWZH9|c z$xv{0w3u@Oe$W0uqY+eakd}O@$Uyo#>u1z`zofJbG%eAGk5xcZRkO2qV4-tvc)DrS zjOlfW)gz?CM|4u211cfpp*4=Q{(377Dzv6711CE}v5@2w^4KUOzhux#DK_ z7}ahYAaQH!r)G=HhRq)F=amlR2k3IR6lo^) zZMY;`wJ}}|h~BOVvH@;_o_jqPzb_b)?+8u|R!KFWd9hjDnqsX^@UxA$G|eLs z&%T~7z=K}KTHZdV&kqInw0gR$c{`rO>9jH0^*+`@@s5GdpSEODg@ujcmA_aGfh78Dfo3%&t zs#k*4pTHIfFUU3qgu69*rT0IqGZ(JCLcn%%7^N_3TKEzbB87JOIJ3aBy<()I!6qx4 z0LReM@;!^pCzp)$+4>0`<1NbJ#**vpZ8Tl{V7-&en*!Tjj%s3Ugwk;bnuFe$b9!~) zaGfZ}GgPev*30MN(wkWD@GS%D*7a^VmwV2i6aHvum-Wl2*p~q;1(^ARHdpr$h>(eF zX|ZqdrskyimzfKu1pzm&5s3da;?CvH=Pl~7a?QYH`RFvhzz$QE@uHy5O0Qox;KK@o z@OUw(fBe>+OW-{_VW6i`jHO(rw$Qg@vV>>Nt(A45}IJyXhTvgg{nl=3{jWPSe~=ntkpM_?CrGL(=H@_ORInK$Dcl zQhJt8II;0pTk;&A>na&~j7!_h)n9)C_WFQ0Rco##WXf7yo;IXLJY>FUQYY-l=A%^h_lZC7P8P8IT?1;Ov(WGM`q2u28m?^E4pij zAb(Z?+WPIad;ytaudH-STr$ID+bq_JjWj4W1;_IOIRo4?U{scB;rGyVuP^VVA0z$1 zWDN4zry2Hi1Jwr+$S!@y+8HZ4JU{CB%@OaP7t((i6<&_r``DvtqtKjQ+22FUT-p3B z3&+uNKFD8FoPfK3DtDOe9G3LyYlhaazcQcP5d>Ha1q61QO05mdv&HfsXU?%l5zke7 z57O;LfCnA4er9n3;z+9O3L|&!JqOdmVV=~cy`}liWJ-w*CdbBYb_jw1$)IzUH5;qz zx_-6U3Mq7d0+1JlRPd#JNbB^E!~{z=D??v~gQy`sU}K;|{NaijqX|3P{qjTi^92PH z_1zfVYj>yKH=Ju-LrJgUnPxCKOtl;)?hen?-=H5O?H?n+;9|?xw>U44SZp3 zpULYJ<82S=kk|0^=cprW_LyY3>2I#J?@}aQOo2|{J6P$199YY(yAJpcX>32K`3Mea z#Z#lYNkV{8oE7%p_S$}Uffj`Tk}{GXFHd;Nw)gr@Og*Sxa#%V#6CyF$H}MA15~_)s z!YaLyBcmPPiof1exa7d%*@t|;Ppkp@w5*}236=6>EMod3%V|Po zr5STrlf9SdE*z#M(LV9}w~P9!_ajuYyB34q1CTN4u3$M70+<-SGItI`sOfKnOb#ka z&!tEv8ZGcmg_qzP@(VoVI^G|5+S+cDc$jsXu97akzT7LQ&zZ%s8t5r!0LZra;ZvVEV;*h54BIgOlwh zXt50g;As(gFoo%I9XS#!%Nkm~o@$8D#OHvji|jB;=xasHIT;@;L;O>%F?g&f49hWI z6nkL4W|tV60z%CHZ~?sCu7R#gB@k(W*SGc$M-G}|cXzaTcIBjn% zN3qo5OzGr&Uw-uBGfAaqn(eGBUkl@lrUY@OBgdKvQ(Wxs);@N(qZWZ#TEJoc5!6A{ zbhFu#P3igBeu~j>C%NuLO10xPW4A{L_gLod*f626qbkEAkqN)wR%0K^Y7U4i^btb`3)-r(8}%s! zL3Nr$UvSE&kZCvt=B=H;GpD4u7xiJ)%qfE&C*_!APIeQo(0Yh|ztm)lY{XFb6wxYkd@ijDhTsD%=mh@8}y% z>vIhN!TCI`!Jy|A(S_+Vi&bpnp75off(eR{L%pc$>+}_V3W`B8e}TWe0KW(#3&0UF z25A&xTm{E(_@$@K)L0mdebY(wSl*Dx+5BN{8zJ|O_qhX=L`Z~eFZOXXW>@L}Eky+x z=ZLcUu;Rl$eUtJ)2*^t*I~BE1aw>4r5vdt33t+ck@kBZdmr^=s)WW+rHN@;T9T=e- zx*!4~$bJ#T543n^i>24^hvoUUCT^kb^h z62*IIM{jZ>Gcz;2`Pv#18tvUWZpo!lRgVAs=`mPt=&r$QJTq($h>y6Ey#<1v0Nsr( zTJyPDOUuuVCB!c zqF*JL03mblM7kC0iCOsW2Mt}6;vhiDx|-n#a8({GY1~IYLno}}p@!qCmH7LlA6g}{8?+kGOs z!u7#&A+S{bK!kO-B28v=;j9|XXOzE(_BRtnc(0jv(2E`GpTeLuI?s)Jnim=$8qia=Qrs;DC;0Yg z2gtw3#FZ+^W!&7ZuM}mXsX7aYFl_h@3&_Mlz$)+Vx1&hSV%U$L7F^}*d0u0h{u6At zkdK7<6SGcY!T8EE)4iT%Pa5|2t?b@Xry3ixwDek zym?k>dNCD8yQk&)q4;KSB6lO4#-jfFP-68d1C;sn@J~w}qbYjfP}4{E?XBNL>msy6 zpLuDkpu+3?>sa%JY|?tVF>1HB4=zB@KvZIo+6`B-ee z&Rx2~KRJ1(v+hDz#7offU~zZC65v{_t*LUMHX>@1SpIqa9flVa|;i5 zj7TfI*f^#O7ezDLT7c*OP-QteY};fGSD$XVc`81*oLo6T(7z@(>ku!^Jrxs3$P})x zcV|#9CFOQY1eg*f%mOs>xAg4nA>cY~UE7GS`T6&J?%P!W<0pi(1~{L<{y$UOiw;LsVG-1c`yoK%Wln7hvWnX8n0w_O3!L&!P0q} z^Xj-w5$OQQC4!Xa&wgc17>1SQAhJ-X zcE@er7wWPnm?G$mgI!%-0|YN`9%n2)vM8`%&yhE%1P(ouzruchbrJQ1lXDMvb;X_q zxoqkq_P)7;PN67R8T_(lmD4qxd-2S7HeE&a09(_|g;b! z*Fp#%{U9i1tCT)tWIFA@c%4&>5g^$_LV<}LC5CQf?@Xe~p^ERJqd5F0k(ViS9NuJF zTVo}8-{ht#$Fx1|KkB~thZaSySo1O=%o}TxjG5D;VA*Zq?cbNz4@*p08HX!lzVU>= zy3K~&BY-|?q(NGBM^7787c+338j}^oGq^n@ZffGnj{t6y4P0((w2^Q$Ixg+j zav+>+UI+4}E(0rNaG~*_76`CuXlacLRY*{MZnp8SPns^-v<8k2f<745*+!&?wG>OE__7L=couY@%zJv?HJLkh1Xcq>s5HtFke%H*EhHBV zKfb(?O{pcoC$uy-ujh7ERt9blw%uR8aY620FP=p^MrTgH|6OJxO>unw`cJbu90MC_ z6+RXD-?=DLQ&oj7%f+F9sxG*vZ{(B1JAx1sTlmrW-l{9VyPvD_*@rkZj`|Avn2!q7 z(do>pl526c73fw*h}JFU+Yy{n-8;*JWcGFc-0$4Pbp5n{`yT0;#kN0jZ5`tEZsmXd zAV!LvX&%h;%*ska_sMn0hR0NOEj5L0>DT3A8EG5c^t$Z&^mOCaua79f?^w5z$I6;E zV4@UJNlE!C*KbN6E&nDWCU$mpY)*dHY-~&ARRsr!gan5G67j=rU)`y?`}#ggg72mVn ztyx1X>N&INA_AH#*;kc|?xs<0b!PGvl2B1c3MExlMeO(l$Odg3Kw(v|7E*n}fN0Cw zkiXL2Xw&oUy}Ev|6p;>vE&q$;6x9{ei{rQsfPa=WNrJhtzh^0<`+IIqyF#l_t?R5? zIVS4o&x`8hB4T2Jp+F`Z33xi-!;f0>X>S%e&j7F6 zI`X)Gv!`DJy|U7&ufIP~gjh|`T8bok8zSWBcnIHCY zcXqvh1hx0Id@=Z%>-%XSiI6LLfLBDEboKGXE^T!`R_!ttc%4+ z$UZ_BFdXFUug~ZDq~KVG2xZ2IMwWhD(A=EwU{sv$&3*aW)6vAfr@J;eP~sD=)_(*2 zDSs^?B)jVmz24*(MwE%gytR7U)Xn{pF0v(lLE&!(ywG1sNilVu#WiqkaNWW$v+r5+ zPX^{q{Wo9m-k4x+zFy|0u4Kx+jYLu?{lVtal`R85o-22FxLHs<*gNF=21D1UzWW~z zOe>OW*%5X<_ZN@Bg2pL*v;QO4CdMGjsh*F7g~f(?a`3m1LBTySYvbqsbtw)r(`LO_ z0ljppaD}F*6_u5EvN{tAG%MYf3SWC>dK3$iBO^K9K(??Xt63FNzB+jD`S|#}#SDYj z)Z|}|26K(Y;#ir>&8fir$_b0X%*<5GE)vziuv%tvj7@p=1<-OD8ygEit5v40D#^(H z&m^3!h8Pg=ARyG{Pb9-Xvv`ISmfGI3%UxvFXgzaP5`&b6q|;Een{CDG7%i&1Y{#UBbk4a7(^G7~P8A%WcZYM999{|nxy$UHOr$WX5 z$jceU{)fDSrsOO9*BeVK3$)R&-S=-sSs7~8F1r885&^`NkzYwRhgZ{ld|YsS0HNf3 z=9kjqHW73fulFQI`bC)qnbZ$1_E&1up6>)KC!oymU922#F@9xhdw__=NWkYTTA=^w z|6lBz>1-!THML2XJ-<|5b2G5OlzLs|@_Ud>t^z%6^&Y3TrlzKj7SD{^Paf|3fmx@_ zt^n9;6#MzEhV-)I!{%0cdiu@5RFB!JKm$u4Ca!RnAoOiGH{QKn;!yuT|Tpj=f|SFhndfh zHh#Uu;}ApET!lF1Qy>%t@tjk_4r;jLNcz5gt7&MM zYgbiO8KE2+fX@kvNDh^e#^2|?KRvs3O;eNOC_kDZ2GHez&9E?)8PWgce5{}-7Wb9woTx2A;25|iw2 ze=)@077qs>85T-T2i?~JSOh+>ZRY)*>CVp>NAtW=Xp$POcQDdJ?en{%B!2rpfrt(v z3F|)~@-WKEY2Iek&7PoV2soO|QBpq=*qFF~$p{HHEX=b#Gbrh^5*?LLi?{j??Ngv9 z`tyzduZ@)tM@#LtcabSB#X0baiCUBjDUXf|nj575OpC{`jWWkyax#1Ne3bo5eo9cw z18r^ghdQF>w}K44fGS}R{Y&iC>E~OA8wHRybOlGKIKTVn)MEyqF8uOllSNrZ?(B|r z&j2xWZ>P(UQ)j}@Q)r9Wk$YQW7|>lo(H?`KGnm$BU0W>ppEF^OO7g48$oLt1Qk~sb zFT#1N?Odw2n|iNay^s9dg!~HT-OFmSzuzkGf(2-qhUwtN|KAIdk+3i;K4@>Ksd->O zp&2^-wurT z>tnp9`*?)@WRf6N=xIZ^{Q5anG};@MC%o-gfEd)B(s-06{XVk7rcsUlPx2fX)jz86 z*KDi*QSyLO!14b<6!iCb|5xNjLSmzTPfRwUkUSC1{H&~PE&-l65@mH|W%aH&pdsu0 zN5~|gn{E1*yPmPXpLWJZjeLXb@n!ZGmiUSaYpmMq)?~HS#`L^dg65#V<4}h$a!t z71q{{I&Y^x)bmpJ|37(1oTP(6IQ9h&FQ%;nwN|!y`G3>_rH7kd?&8vR;L-O-Ms|Kl zMoG!`M^Mi|CmH@OI?HO;74FgL_84zN7cN5;F7u9La0uDVS|cwJyZ_|t1!{;Wc{ceM zXrcgkQ^;IxdZF^*U${PO1y2iJ(%5Jr)Fr^8N{lZ6S)t zCSyS+QqXLW)>l+i{(NsvOf=_QeQg^gTq~bsRfPdM5k?&@wq>aRV3?*rk!Et;*v0=S zxU^kLx?fU$7;XDw%cX9h$e+$*1W@(#Op=nyBI3er{a~5cxp8yjP>DrFhSt8c$UeqU zJ(2jXM*>`osvgG6K>6;)zE&ch2#UB~CnrQ%q*hNc_l%)rv&?A!sa`Ek6YPhnAqMX-*xqQWIbIzZ>wor z>?+4nWM+vD2n3Fi2`IeYLJ1kCUFm{C2w> z%p}`5-40=WYfx$8-<1B&b=Ri*ePdtK;iyH`I;H$%(avh zQtpP2s_OZ7OV(G&+q@P+t*@d8?PqI9!DKzyO+O7dSy|2WeIBH#t$lekAKbm(?0KQB zRjkWE#NjvqbTW4rJCMUeOhUqwtCD80U*q<47fcaXN%aEERu%K1%f0bNgXJX1l7OX4wvpW59a-b zQa2pA0#Gv)-64I|HMZKk+&mqflr{@h-0buR+ZjFuikZenLE+f8HYRL#1Cl6iZX!JZ z4o`Slyt`O=m-nV!!29)lgO7x>>_C@`K{WOg0!-C5wZO$gtTzt?uD=;KbX3qhh7B;A zeD9#$7@=n&&to>eZ^N@<1O^76BE(uYTE6XGywHnv4TT0VZSeaYKy9>|zfT!22 z%AbN@%+1Y*S4cjOe4-a>g4Em|(A1QRXV2k?7aLuG4b3YQ0vt}Wtjx^!iP4OjRTDiu z+Y37%1ky@dplz)QcFXm(Cad=?KAzN9*SELfIGj7%qbooqL~|u7E=D9!mMedt9q`T7 zR(rBwe&xQ?kMSq}4zO;5vs^3?EB*wlQM#&W-@ z@j@5bKR948;LAl2qQZErH`USi6^QCrJFLGxf<(6m52bR;-7o)2X3W(DVC3$#TU^$}AJ4uG6VhT-Ebs}Bi(mTLW-%pV zP>lO_QwXoat{qFN39PlE8~EDlAYZx$pk7d;+ro>OGI^bb8`v{QAT**SQ{VzLnRT;T&;EH+~Gnwe{g#XbVZ3;ft8$FtU9=VRW^x9 z{^EG5nAfPh&NOV(Z6U-p7(Vcoh&zA-h1sxJIl%nf0oQpL>0lX+M`TNN-QWf z1e--+I4R{jmQv@dC4caQ(bnF0KNfC9p`oF}6W7$#7;xZXrG2n7>oR(P#v5ZGbWTlC zFbF(C50aAd(%MW|F3!(G_oI6Sbhdii7Gxjtz^=!!FPAGeCYIBF*7xese`5n9M3f}9 zw`Z$gbar;ukqc_j$wR>rk2;p=)=Cm>Zf5Fp;|%WE)8Q+-oD|dJb6(X$=pih;@r@bT ztM&wMiZoy&xG5B$MO*(JjWXTQP8bU<5v%XmU~(;`JT{&tr_j3$`ZlK3kp?dAR8Qw^;!?jw3r&j2aQ@R2aMYsF;=8W#T+3>bFkO-Yxf8 zR$8Gy@eQ|1noS&M!L;OGSggYzYE39D(yUf*W;#3D{I1}+mxlKm32EX=M@u`N@Lz(| z-%;P&C*Z| z-vwX;hYE`&RiCK*d}d-Y9-iBN*W%(LLVjOu#dpd`vfuh4N~5E8B@7S`eR3&DN!8*$ z>NsiR^))>`Jvd^G5A4ouSQ?SdL6#f`ZEfwRY33{E*J=+B#Lz3|ES?J;Ad5Sg ze{g&7bcyp~&}-o_Ijm5O>iXxb-Hx5RqMFbnEK<_8~`eK)(!TX*=Qh2iGAPkZa8 ztxkw>gEY$F>#ORXgMF5`eWlCNz!``^nSz&ht7{N?1X5-$;;b1IOtG1{nVI={0wjUx zganNZ#lrYOaUo};5_2Qo10LDbn+=Su(h~+e=$EO=Dt^QOV~)@cY0s2wd94(?npx10fIxyCvDex z4$;^6`TF7OTH?}e3kw6l9s&CK{gVFVi)Vj8Mj7=oO%dtq^+bH}U6(M-u?t2(1kM%)h=3RE@s-3!-GsPF0 zT)Y-K^09_fJReC+dMn=6mKYc@VHe~;7b*@$qK!$Sd%$3VnYOuU7Vr%`dr?adOHJ?> zrv){xYTxSXfi3M_3qH>qv{T2;h2_|o7+j9L(rbHVt9A-{N;&*Oe1=iC-zz-T))7R! zp5Wu_>+1nNXTQ{PadJt>R|YL$-}_~3valztqcdp%P28l?n^))ElK5TxK zDS?EdT~qfnn$vRO<53Agm$IcYD@H&0~?W^a0b@pHG^Nv)ARX+D}qt+Oys;s_oYM+~_t8*o2M8@L| zD$PXq-VjQBi%h`xu|MbHHLUS?ysa>;xs|D(YLay>iDvR!{qoA3DN5Y=R^00)4=X4 zdpvCHeNa@aKNFMe=}XTZCR|?M+osc1zTbEw(i6}-zBh^FetK+IO1SvzS7@SI!C@LA zRsy4*JM{A6$B!8?nfQbG=Jl?)uqz_ICf~F8ir)vkZo{bvE#4>l;3pJ90c}rPF-58X z`OFYRa#PTm9cCoJo`9U#V(gQtJ#FN@^Ns4)&Qd<>e5`L_9d90j-g1YTnv+_nwhr_G zzcQ4=`RtahLurcr`(?1@)it#91V*O0D)|$DNP(%P){vCj zUB{exdyY3zpcW=iFpn-`Q&b#`@?g_!8z|y%J0Pf0s+X%MDJ%Q4q(rE`H7702aq~$s z?yFN)HuGbAYz*u+Z?E>`az~%-PSjM=Zn}Qhy??Nx+H-buW9MXLWn&`?n3yIsev`liF* zr88mIBu^`AifJTV*Ha6$azEwuszEiyIs!ho`=CJPQSZV2V4Zhas$0oG!g9LOq9=iJ z0A>l*Wdj}RaiYP3Y8YxvQfk+#BP%PbOhXdXX>|>3h&gRFoz6!dHwTmQLaBTF@DKuT z8_*K9v?5C;@%0KXKP@tFo|@sF7qPW$&^ z1nAmDXIPI1i)0)vDEZ0XB&Au3iA1&t1i zyGDh5Jv~2oi~+sJ3CQo%OLdnj6`;Qx{sK!8fXE4$ZQxQKcbru3YHa1xxTo_hWu9!C zkoL^Yx2Ue%ES6ozCUA%Zz0WT%wbf5SZ?IB5ZtEfY#;bknxvKKrF2nI0*(C0*T5C?b zx!F0kB7@Rh3{Bm{^$r~AyUk-<`8}#rG2H8xyLE% zcxKsf_DOsgp*O@wBzpVBRlw@i0vJA)q<)gg9IqmfR2$2-9wy{(@Uf*35u=E8((IJg z`&>XmFGDrZyMD^`jG&~(_^(d}i@`lyM70%1LHUVFXwt8pSi9RB#faL^2>A!S?%9q; zRoc?etgcV3CRZlUjd{5(%IKfwV?79JY`76p(;Mb-X@r3y8A?=d{YH4_R!l2B(1d~M3o_vX zzxb`N){ht4&M0Z{TV`-z)D`%VLcRooUUcB_B~(R-^M*Va`~_3>WQ_xkPie1GQJG*MH3+w z%{cf7)ABZ~cVrqezQbk}g<&fJqZnerR-j8U4esPItuhscs;=>Bu6Qr>bwcOUFz@Q8n+=Xn zMDP@L|NIE^3G+Yh!P@_^8n*cBiBtLak^b@@c`NwO|NiFRrINZ-y%?LAUpwcJv#d!B;q9QIa?_ z6L=iwB;*8JV>{VQbtCCD--*g^0V8hIZxU~28M-b@D3Bvkp1t<)jGHDA$_7IQta(vI zvXyNHBFL2!nK%{bqOo^(Sa1Y8%pVu{lTEBphe$Yz{JvEjjM$ZSQTQ$y8dRLsXIT z`Ubql`_Y+v`Rp2GMXB#MaO9@1rXfz*ELuiZ6bXZCjx%$J5vU?86#>q;lS_S7&$c#o z8*zAjQt2c#OSSM3pE}ivM^6T$`WVW_~^S7siHUE zezh5lpyefclZv_cQAOdxJ3!jZ*R}N__DhNFLl?!6!98cX{(4(mn{uCQ5>KYiaLe+H zVs-)gj0I4ECl#X~$t)A4h*xs3_A#Qqm9FAeBGLKLZ8RnqLBoPT&1_*i(Gy~@NeV}t zm<#n-ZjdpQwRu_a&=p1pV}T$J9#-#IJVTVJ$c6XW78Hm!AJPS# zk70Cl@FFI1-U5ira=alfH8tKZwp3(njJ4));b?b%rddTtq$du~tz@Y#laDO6w=;0< zLfG$nYfZ=Z^Xn|t@;XJ0^z{$cy{46L#6g~4egvr2>&h1o)T3qh^bD1=uq*S2e+^NP z?^mV5=z_f+x>QfH5-a%@#mB*^^+R1c5vkU7y6Vz$4IjYKU^wFnr}bMMWau!x-0PqM z_BFK{>=(~c2@l`HqI_UUOhUNp#(4AQ2BeE1NvtM-^QPeA5n$(65&jyI(UMu5?U)-F zVkoOzkQn`0T_lDUW35eu6%ifM{w$h{go2osGNCtJCkaO<&*h#GqT86*V5~s>k_`T7 zS>t|rL}}$O+R~};F*?xR4bqjO<3^;IulkBNd1;Y{**832L6Eko4hH6Jg5JBIA034g zJC`VY7Cu^EWd*&b8F_ILHVmBNt@pR1P1Eu-P|R@?Qd1xAzKFT(!M-C`k{>NU@j@na zs;X5xu$;=%uN1%dKF@i?_cbzt*3b5+?bmXkA8t)-QuJGbG>q+|Y|0gMk=N(fTHN)p zp3eq5_4NbMsAJ_dBBI=n@8y)G8ytDb=~=1SoAu@FLn$7!I2Yy!lJM2d%6oG$m+;+0 zqg3u?9M&c*qa~Xh9&q*a{)|CrgCp?Ijt}K7j%{phf=~!_&1hA3QRbxjEsq}kftQg% zLd><&a)K}JRce#8)Uc`@Wv&@nuGz%J6-mH|g(^oZF{X?rUDs6W0^nPenO0wJ4 ziHt*r=nazq_2lf*+<2${jbb%^Pz)PAziXF$L9nb$Sljgr2uEP&FT8CTv{*Re8^-0V z$L4AGncoN<>L*}zX(Guqwp;e$a+`aB8RoG)lOLvGS7pB_J1_h&z~t~8CVg$Fss3kb zg4#au`_!eIH;tY*H5vj7C&!Z17rOPX&F{AkQO9{(CQAB>X}@KHBrNeo6Nx+ql{ z?k>FqXgILSfhPt@FsKaxFdABs3a}_P`^h1I?Jxi3AK0cl>5GMucEIaOQ#+|3=*m41Pf*a_7#gKQL>zL7=?IlpLxgVZ1=q-ebB8)UqGgt9m}s_j_^Npu&2 z-|r;OinW!LDSNK{PSUfFxN~NsXFOX-E+mh`=k?s!l-t{V3G1$Qw!c{EB`s|wE*|9h z*M*=lC^Fc!ry=3iek|OaFPDh|e|KK!sKvR*8J4H#-6XtCmF1awpop;aw)8I8vM&{x z@`B;~g6W@JrVXb);x|DbFXke^*w%a9T<){JK`Qlu^o&ta$lS>e;2*I_p>fPR9|sz=B0UT5XA zZUR5K37ks)CaZ}&Ew{6&0{}SLFEmx_urGsRR%e$_A8CSfA|guEwxES(M3M`C^TRC? z{;&NBFkLT)2v=9%q%>YQx?agk%QNJU|KSXcka2dcGB5uT@GS_$#K0yJNMnyk=ZB|# zPG_^Ux52)zPl?@eI(-h0`0$#5&-l>5uzZhv%?Q1(79%hN$?$(Qb$*2y^%bAbIl$f} zO`zEmRRjSM5dc`e=PNDO0Uf}!j$WtSv$ySr{3SY38WTWFFKu<~m#Q9(HD}IooX*et zQ>*U)WB%JWjo`~8-;3M9iJeN|{?}|YYh-TT&3%SB@Pq&8@nEmdh*rH=54?PIwrO*> zcQm?Au~g?c+SaDqocaI#R|q*; z1vSPla@L@wrKs*FF$R3gsP>|>;WwSmG%nyLzc?fz6!0~&H8uU9xaNNCg7*mU9-)!3 z&P&&tSI7i7d^Qu{0zriaju^l&Hd9q6H%^pC3-z2=SArnTo3FOjv(Zql2U+jH>={KQ z(Rh>Ac-DrAf}2~Q2s0R9e8a;jK~ARa^Ui+tb0WU|)4)n+o)pvy&&8nX!Y|-+&4wR* zX1+2HCb4tZ*}GY6H-@O>yn>zB8Er1Cm;=jY$!qPl4@_o*H?{-UQdmPN%4w}mplpd# zxmB!LQ|FKwH1D>L&u6o7L5pQR{PS}60uK@KEZsD0hYgq51Z*%svD0R`p?9p!=(H*k zloz=@E&Mu{64H%g##uracWmtC;0#B0973pfa5~RR`!+wESsup8QYgsNQ}j%n7a4V% zhloIzkpMV2z~wq-HEJ(QE7R;+oF{A6k(F4G8Ph>#DNeB{)H(yh<`^v}D~k*8!4DsP zXFN1^8ZjpiQe?Xx`v*r3{NTA+*jWbgwu3!kf5Tzp8f(^xi&Zrd2OpEOUJe@*u_Y9YOb*5n&YyIVNTfWbOLRetC^K=!@@B zf%Q8V^H&{fA{4PF8#n#HzEZg*j866S)7D5;*TS*R%1bKQa+01Oj#!-U0o}hA_w04U zA>jityK?czkoHa=?=#Zu zeQF?Du3Xg*$w%61SKCb2xOk*~5y|+$;Sv*8sWLr85D^iP#9=wnpoT~((GBVjQ?86KxJgKeMj1l6-QrzM(sz&0Jd9A;5-M^@G5Bb|I5#T-j^9IW`ezP(X(qwbJJn?u z-}$4WhIw1H)mCh|H{Em_yf1o2SIEOQ6Ik*$0AAhf>1@AP5SX02cfY*l$o+E#J@n>^c z7q5$b_3`E5wl?pHuZ94lm6MSvow-9(l`UTs&Dhm)=m41~__=ULB&7@vLfB*4| z8_@(&*Pz}HhmC;V9%42D!J=-V(cod8{Oz;0T`;YiKb|OmmaL+&NCa^e;O}ZzCtzNG zR6&m&%?#q@nnBrLPC#X`_yEOine-{VW6Q(JTj~sL9)N#wZ}=XTn`;IX#HUJ$@(7Pt zgr7fc&cIZoiMut;y>rxktNJO{U++Uok~Qa%X5|heM{;)idi1HDNrn*r0dBt<6j0+oOi`=Uf{EBx?5#M~_rU1Q>PiOF09+8ULY-giVu_k;zod2Y8d?4OH+a z4h{|tC+Hu_%EB@+4V%zw!ayj{cd+PZFC!&4IX=FACO9}`TcgO{LeX`T!QaGc1Oi zhp5i=u5f%73n67?G-8vLZ*K~M+#z)e$@gc+EjG0SxV{ATBFp0zs#uh-Ang=L&#b5N ze2@ObiV5#gMiJzA?o8ld*7^pwFr;;wfH+RBj2SjtBwQ?w)Tjw|$Tu&I2pE!4jcn4B zll0Gnb&cE68WnExuY?(5c`NLQ1av6r=skt|LmCMQRE8zK4STtjGZ-{;IP_J40E;_O zs-cWiuK6=HwPZKk8Fz4DL6*I4u4hD1ZERqGjhsCW^%P9sQYLox$*IjswX3yt^sm}} zwKWxF0a^Fc0rlEP6ajfIA9Jksd5Al%TU#+9jl$B>1(mUxyB0>uj*>kxAX%*Dml8|O zmm~x3Y)cik-SM`(6Jc>@KJaX9GeQdN&qX@7TU%lo(dK70CypH-4jrQFI#i=I+sv4^ z*^)9^>^#jz|456&cDivkteUS!ZFh@F=Nw>_b;(sN;tKEhQ|0^TR#!9;Y z*s2Nr)_6BA7&E@*sq60&P|DIiZgSrf@Q0H}j-&YiA1P$xkzvXalv{GCnUqkP^ z(o_*92Rq8^czQsOk(}YAMo-8oh)Rl7-^Jy03W(%nrE4myi37;U$Z)Z-sa{Fa0ytl0 zYnZBbkbXeW_w{h^&=4cl3ZRC3tEz$ycbSjuDDUKE`HC2g9-*mL^T^zmYq+SZtJf(0 z0PQpPb($LrTwwi$GZZNl@CAMMw+VQ!=yaO~y65|6W`3YLo{zKu=HO%@%~*h=dp%84 z$8h^y#iCLB9lKtgcc$2QP(d(|(0LQ+_nRT1$HcUgW`-$9Z6og6(vcXWO}I61`)G5q zlIrxVm(j9s#(bPD+NhF6f3GLex`=V6^RV{!LZ*9D=8iHTj5-j~bfRz@OlAz@)|4z45t9bKL10F4#&`JAC7*qCXR zNTBQC869b6WP+iXdkGfNu%Frbc*%oPqoX(i&c_`m#uNQ~zV}sGSL7pHrpvwRI%V$_ ze!IDpsoR-vZs6`&n=2VI1ji+kvT<^}3#j9rUGlh}ePFjO9tf9_l=`@!=XB(&o$@)S zaIfx&ktX)0T#s`-<2n7NTmpPxBBg*{5EUD4f4w7P#p;`Apd0h>@Zcjho#>e=RUHiWl!QdEHFXi=Dm+Ks7h!AXpa;c|;iFX4 zD#wc~`1P2Wm@f)H!AdI^?F58Gs`GNm9NM^5j;p`2+k=sTPd~TM<8`mXP?;8RDoM(faCh%0 zUqR2V)%m>QCmlV?jFYIv?)~U@)Vug9(#*tce8<_xN6#S5iacd|{g7;AwTmJP15?`h zC6BzT`fY=^h~(!wCT{xvC2d|A7pBD52ysaJ>4l6x$nJHZ*MY(z3q4b3N zztw}+YJ*8x7X-xO_jfhMU2=1m)5~(nY|`@blcTFQ?p`$%SE={>`_epfnJP&2HB{v@51_~2{B5Y z!O_ynWM$VP^CwY4eByfy(R$tGt6bU@u@17GQx9E^{vM%HC83D2JT8cwQ1O5q=uIch z6TN-{wd-+k%E8%|ILA@OP6K{h+;SS}(+z<%;8@W?E@h7gceVfx6$I7yD8qC$8$F){$koP*EZ30_ z<}VL9zxJl zznFf%Snv*6@f}ULyd?7kR-Rq^%>&L3q7j-hfao&_(?l}dx#m5%<}I7zlt2NIzghI+ z<5s(IC#DlqmE}H$qB1#{jSJb6-&bFNoC6w#>-$1LA|b)fcPI97yAXhjwsx0`N8nZ zuy|x_zMt}$lLZh!S8#-^WS;I#Wsd{1RY3GCyt}5;t4i-7VH`~6&;ksQngiZ1>Lsfm zXY?oe@HlOR;BZ*2I>39iYj#EoOwWLS@?a`=#Gu)*K^+%6yPSeTt?iyV$PDNpKJYZp zQ{)RbcNI;{#p9U-xEH(HL}`iNh*+p5MYq^FdxsWh!zT=mp>JP%CHy#5p|qI#YH6#6 zjVA%u>vO$fTP$Aio>^rpFM$n5ZOyiZ-6a}G2xsbz_d?aI8=a2^Kd^`+6U|jgWh(Qy zbHDXU1eT<`HLb2s+(CAQVPb#SRKTDq-@=^VX(@1_>b;H@v)|Bt45Ra=4;7yZdj@m; zCZH+Ixx98v)z%@za>&&bYIM)Q-86YAjFy=7oyC?P*Z!znR^_vgpUX2mm!m1u-3|Cw z$;Q_2A|Yg?yqw(hG79O&MtKk&Vz@H|G?lT7YrgkBpB?o=`k1yy*e1OTjhHRM!${o{ z*v1R^kcp5|*|%yzk=K&^-sW!oYiOk9L=^wk4;^jBXoIq^ z^T`=~?`f}5akkdY49u7p0JgZ2T$jjWcT}$Wdow{bJzr*JH}b5-Try5r=f-+tkXFIM zeIdE*$H;MAm25_Sbj#EkSdu_l!q!liDmti}tmPK0-+S)>1fIEJN;ba3$>6-QAEHVb9diqsdpB?*U&}w2Bu4w1 z)g0@IE+j`WXRJDyroD&j9$z$DbWD;DIrDijh^NEwv#_42mGfKx=Bc3AqJ}tq%h^p> zV7jhiBAEO~d(QQCNucnCZXhLyC{mYUdew0t0tw%Cz~x4;FO4d=vx0@QhJ$ko>-cRz z46{Z4*a-gBJpb#HqoH;ZXaskIL@N*p592E&cxX?WUFWg&=PNub7M)J! ztW`V7ube@>DzkA{Nc+qFsqi06XPYUnl5GD)K!y1#E-rOneYdi1P7El=fo*1$bc2Jt z)5UKU`;X?WA6E=hI*?kBhlLCn z=}B4XNNDItq!~0`Or1=tLc|pFAStmLr`&OJvk#64mAagDxmaVx_PZZn&nBKFR8~0QAJ`Un)a4TaL>h}h zuk~u%(oDUkCAB22RAW9~PD^%E5UW(b&Jn;@S0g9Q!I89has7}j27M%t3l4hH_Zjan z%_t7E*$X#UrQtko*CuOH?p}JQsLA-!j(OVYFO|nan>?Tu9)=8$<9S|I&;^xYg8ZX^Z2F9U_Y7#6c)AshbX$~cMPaseWS#o zj=5`EvfPlXNM%z*Z!U);lRN54#N~RliVn%x$6rmki-IS9e|s@9Jsm5Vh3G#e;Pc2* zYQ6{8~okKLd8Qy0h3`UC#T({cZfEk35_&E_T?D3IVIdp+*=dNzDHP21a30d>;caXGng zrvQ*!zZ65Dl3-T7I!zz92tXxbbW@Wi5g=B2Vob-EE*AMpTWP}%2i=+KynOi!-JG$> zn;?mFAg7hoxAl5H%F`Wr~u5T+sc#NUc&ljR<<) z+mfFCgAvMin7i;-_5_q;QVBQi66*Oj4LJgbL+HU7-MHO|mfQP&;6XwmBI0#0%D(M~ z^wzE4<<}iq%4s;n0jgJ*x=iMMan`j&d>*dQ(eVU(kuABj4;JYLn;|fYE~6Hz4jBQR zCKkM?FoUs@rU6vat^3blR0(M!l&PXw%1ij}!vPdQgJithFB4meks^*a(nuR9!hJTJ zJ#($FCrpJEC#f(@27>Ns*>MlT1ZX8E3Fm=+E6k99F0$nFT7#gXiYZf(s_!$}(&R!M zbl;}99v_)l^d?tPlS(NrDl&pJV4_r~P~3%)R>sLtU*F>y6%}e?BKFnZr0LGR77rg- zW_;a6onNNH!Ke~H6JVsKmjP-B)zFP|I=z<2noK}yO#mDh>bF>0*WBdUW;?qWtRIL% zvsXK^GMjDNJzeY;TA)`;?Poa(>Ek={k**H#Ay}mglKnm53X-zYQpG~m0E=1nJ7=RK zvHCN{j*raCK&<2|47C{$0Nc@+cs!V zMjdN*YHYgya@@1sKW%rXc2%G=Z$JH*X$re?xOsnVDQV+P;xAlVS#GuXDp%0WwKbj0!8gEE3Zs{))9!cEyh@Hm7wO*4uO;U8=Yh5=1q;Kt=4=xXGP> zStIxjCE7v2?$Q*q-INd}-NuF@HbUmL(4>YY6}Ri|Lie#`dEK|tc05*Zn5h0_E~m#$ zb{U#{u@KRMB7^D-o^xit)49P$(%^I2%MQ+rqtb;sR)_NuX@u7+-G2<3rQ)RGpM45? z(ka5gw1)209zr%D!)fe^A{klcCc6kPl=EeE{&3CAQeho4;B=J(36#Q<86Y6Hs8N0W zBAr2%TU2Pb3CNm5`gQoX0(25T=Bo@pti!s%6K5O=rt*0yk;Jj!4CpWNXzC;>q%=}l ztm1QoxvZXEfZJ25%@QS@O~~arHqcRLnd#YAP*G9ocE}4J;ZU))_(9yMe67syAC`RJ zR3iNA9M^}MUn!V4*5{WS0toh{H-QM~|GrdVY@qE4nz{T9E}s(jg5yZD%yE}Go1=SG~=F~_qoPi)f;^1?B|?P$6Spb9g4g>5(r?o zRy9y-EfHd}>-azs8=V-}Y;%z_rh-NTpi@Ebdz-26<+2p(%e0(V1OfuC*Fqwavby)b zJjO3BoDEu#7weCXzs#!GEqEw0rmKG-@T=Eli0!4_QF|id+WpN=`(*Gkp*@k?(s}zX z3T#xG-EVHQnA@>m!v>1vuYyH^WMvO5BY;yrS`H5`tSbmM0QD6wo0i-XkghfeKpn7!2htwcKqV?X{b*z!6_O zvr2v3GJx!^*G#G3dNu^09$6iNO$&&0lf?(McfUsivNpsocE7Xl0J>8;Lb}!8mJgqf zf;@4RJMKEGE zMRXi?ix=lN{iPmD;3f-_{LSiH(TGlpXGrLWCpL00l#f)b{wex~LH-iZJo0yN0>1TZ zV@FHc7@Ap`8PQ>er3E*YC@qGSwgYw*IW$h>F&YZsRRVFbGzXo zX_yg!>Ui}BeHgkM9vLAs+(QNmKFI7GEGCt+APsWnX|8FtFWig(LUOSu>q0>aSS^AIaQN3uo=cxkvp}J3t8NH{#)28hPZeJy=k{K%j_yD(%7g%fmUiIr&1F z7seLCQI!|x&n9NWN*E${DP8TRPfPV)y?%X8KPZQUbl!&(m)mG9Vssj#hL`xdAtOV0 z`7q`CosUle0fInEVxnB`IH&@P!PPr#J;rV^YSCdvNFU5+p4B!1#w3j3GwEpN&p_I! z(bFi2nT^g?a zYiAAfUnS%{eZv2pH;>R}jw)0z2lMrHNJcKLBBANXj(1C>_}ss8f=@KkteIqwb50+Jh=fa*ni}09 zT}@)NDERyy&`}i-1`_&@1U)@hwtMRlHSWD2n}wW5oA+JBWkq9EhWrt1Ks@~Sp_X8#{IvP8EvC;jS_6%t#gPxKt&OvJvT|vUPn`>Q z{u_I59TZpB#0?T9fdCma_yYt89^9EExVu|$clRU&w-ACmgrI}N;1Vpjy9`cn9ekL* zlRWRc^2b)K)b3YXdy1-DGQ+*+p3~i@<=5SsemA8LthR}x&uc{tC`hbOgoPSKtb=k3 zPyCSVjC9dGYeN|V3c{4x-x4y6jLZdW`m|U>@CZThJCiyI5_K;i)uwHE1;7X$7_`Xi zR9~5ls#Iu|2mulK8UT0jXu@IdU}HYV?FOiHSDblyJvg7?_qfhmqfEK#U;&1d0{k`} z=XGAYwMA{?7Q;Rlr0d)JTf3C6fu7M56I_E5*uZpXW0~-QV+^Hy39A%F{&MOOL@OZS ziu?LR#4Y1FN4W=Y3wGJBZ2DotA^kez={uVHEegQ-@`S2A|87(&SEiSN0rB`5D5C3h zb3pI5=hj@Fq^FC%T#I>Q6ey?%RJ?Y|l`Pe&?VOl!?cVbOa0Y4=;bBqxlOFGZoZt1Z zHTp7hUb}@=) zk!!NX$x-zUx^?MJMOAika*6~z<3J2JQBspxw`9(K?0alkKpUJC3M5z1eZH;t()IVh z+g_6-BPV}C_23%5&Fg3h6!a;&&7tkr*H61Rn0uS2@{~$M#6aKYjGxfg_&u_PBn;I| zROW5&=n00{V{czTGAMPeZxW5f0;Lfc-#y5ESy@^6l^au(93M7y%~+_b5AiUgEk_Jsn*);MeX80U}}$^>M@G zn14Pi6;3FS&lO_O)6$1D`A!1mFl=xzzGn@Ud#)M_96@gw&K?Ok*HiO6 zt)jcv^3GNzMT~KJe8n2D5u2w%6Ob$xz-%yAr@b4DwSz^T&GBL?cy4%{4QJHS+xBuj0#re$8J55)HnLSh+9JvaCl2$a$Yo6e9gT<;wAPfQYVezVKgc#K8#oPSqAJ()7@SS)jYju~NaBFoJfhSZN350!7DR=%G zPf_K%7#~E*j81nsqs|pDJyr=V)$^l4d3k_8XmY{2gYAox{N>T$-PVd{pTCp?^~OKH zl*QzvDAOCCx1KJ_%g=B0+I7!XDwrcCAa?M z^X0h?;NbvdivN=cTcxt-lRJDvpb7$DT(P}+4&Ptuy*&-0m-+dAL|$GVn_Ps@cU4VK z&-ZtuM~`A!3bMyoAh5-3-21@;zjwdDM69fOP0ZXZEVSHkAZMpoqb#m}!^L9oeUFI>qvoN+B9sGWt=bU;y9|XhI%2Ee6X2Xbd23 zmM&Net*W8@Tvy*fCg99`dD#o(Gn$&NPufgRm&_NGm2LfgkK8YN0i;E(S2K(H4V396D|6wK>B-OLe811$5uH%K(%qn$!1w)hQ$d;0PX1N z_UQAM`YQ8Z-^YMR?enlsAhVb$FyYqa&KPI`c_Jn$86tXf0hrOo0JD){Hv)m&W*_(q z9^*)BwICq#G_E~l+Tr#TD1!xbQU%bGH8xYOE^b#-L_k*XpYbHSK+*S-kh;S2I2_JU zxc&Ir+t0_<-O2eq0N{IGQFpZ1b0pfMU}kC#SoFi`>~H-TdvG>ZTN5Rt(x3aTe*nl8 zaKX04(68%%k7$kujCs_!ob(HgJauC73I5o9PzNNTZ4RgSuI_ae^B&kP)J8lvwztkv zdi7OBCH_$gcPXr1U-ofeb|E1*Dsyn#wIQOa3h;IQxf9yul@&AX+b2e^UkEo=m#wJo zudHo7mF^AeWx#(2INx}AxeHZ`gX03zAJ1RAPdhAJyY$yShVLe)q$oY9_eONLCYCF+9}q`S%EKO$dojja+G$98d)lT4M)`V|A_<()Svy|qtE@H&GUFO zZR_?ltk(zUk)&9cuNUAu8L9Ux{-EQZVh1Fd{24{mSTSE`noJZcfRF$n6NUiy<*$!C3ko5AmpkW=QVRf7f3WsH z1gIVnEGZ2*1RkfCW&j7=^LJFAwX~B4GXyjgH5~!h?*mCiMtqDx9(^OA1j_zYm4mco zoT^H9WUAU?Ycx=B1%MVp=F5ABdWG4*ttz5lWq^db5{!cBnW0{w3Mm~Gn<1*01(3{J z^g+;VU;D3YYD3O6p4Kt;-y+v2hkmZ0ZT)h7<8%*SO-X5D;a4YZ;)H~a?I^LGZJ(3% zK#Lha!a1e8&??!PvEQlBxsm}=@H$bhCyI>7kx6+}>=6O}mkqKNARc&#gAdY<##jn3 z+X|F)Ds@?$M$|iaKDkKUZLvptqiVCRjqBzZ5jww*jm0i}vwjHk+%g2hgf>Q8k;GmW z!t=LxIGjB8GK-CZ`nmO5>-*B8ikU;ODwE>Y)&TXjb%vOO`GEI5wUkXwO(e}{`Q}4& zr)x_V8UACMWigSFGnH<-xreLHlmRE(pDw^@*S|UzSJ!;~{AzUUyy_>nUG+lr0mT3f z&C!qRg!yT1JIwO6-jwC@*xcD3oIBrjIptnqkC?NQ6CLPr030WNrZscY`yjuWSe->% zTT&roz>SX#Y`df~`P)^%<6v&F)gkl8FB)5(`P%*MwL|iY)ZW$>WQ*bXwjVH!91T() z`<;ctHRR12r-=lN+tg7daVDv`as2TT`||Z3kpBB;fezr7aBnW`s!a;Ijrn(71v-BIOff$4UQZWTEVY~8VUY&`i4nla%96;S1&!nc zm6hF9N~|%l-5uZgbT9Z#r^i!zH!B8;ic5+IeNL*)KP<4GO}Z8x&JYIja2uUettVCR zt+oZlz6Z3P#)=s%6#|>EGrM&@q>m>NO$=Q>Wh>_K;7gAktsv$3VLzY2V?4o@W3n38 zdcApqXyM!1h^|`K%CGxb<~?*ETO?$)hA@#`UR!5@ zbnBWq^QyeO!=&So9%B>1fV$snk$yL8YYpMQ&a25GI<>Yi5fw2E42&Mc$-Tx!5wEeb zccOB-wRQmqS47d|{r>h-U6$|rr|b;<(m7mzFSF+!?o6??bWw$Io^P8_))3n+6|H zrC#0D?DsEDJ-@`8A9t7YTVFXkss(magxh6`zu0Z{;Z*A%Yj|OVgz78brOT8cCLupr zbj~VnU^~pbiX5Dr>h-(4W1GxtE6awMzapR&fZ7AKg|c*g7i#ydwuaJ4`5fPXR zu89b>Ra8{mgF(B=iS6d9?Zy-HXy{QD!l0E=pJEF$H&Lp<2SW%Ql9W%?V{ksZ{IXnURD5$wgiP24bU(m z%umV|?JrVlc8heb;rlLL6o6GCO-7spIiz-4CxSfWG5>qH*`{>)0-QW26T3&R>Irjlva3f{d>zQzeM9 z%uGY$))upkjd)=7;leJPa_;U!+|j7U5@>JY8h%Lq+;?wn{SC$rF_}1Oe|oH6G$$%5 z7bC*;R_v>)M#d0Z)6Ovs&Cd>7);R47z1k9XtM(x|gR1~$;pn5wz4({F?q z-FhOT0z##nq6Jp}fu%pRoUXN-ueD>0P%fS<#q2l8*m4IByp1C35wB_A ze?miVt9f%B4wt-}@3t*6U8Y}T+sAuI)afUldDJfeg;g6-lC@=fTTt8-(Je3fm_AHu ztnFUo#jTc(^-@Mlu6*j$%Kn^JU0Flj;ur_lS$`g~@OZRdwa13+Px;OdbZ!t{`^Xn5 zu7`^r>S{Bvl-BYITM!^a;W1a3id#h89%0Gn^U{oy-22o3`2!2TUe0(ehGbv)zks-*ldGcPjvdU z|8#I@HBhWaPFWdxS%)I%yOqPkfc?LnG)7!WR;aRQLa-Vxh)}N~D>F#F>c}T6BZIfv zD0KdN-Nw+U;^ar9Lt@$99Bu81K&y(%82S9TM;f0K$MMfKu(;`ZsF9%i!Cb&URYhi_ zJxEZjU(r;W! zxf=aJHynfrp9+P1(l1G`sI#-FD%jJf?>rVqO=h7Nty7IWVx|B;VTY_Cf++QGlu+z| zrl%Nn`g|nT1G~t(E*J_uk4i*Nyy^&-O67q7YwL3~G8`kHs?+SJG)9iRas?V^41Rqc z+Y^9nREI&5`L=m+sR{E*aoj$uA|cx0S&UN(v*@i^I8!7 zVYR3I)Bw>|)9q32crZ2v&|df?Q!(-SdIx#AHy%&U=)id8e${t}xZZ(_5HV6-jao#ldH-RrJQ3g`ZJuKvR={Qwf)_Qet`v@JIf`r>yEI0D6 z5E?%|@#U6OBpVbFxmdigs||nbL+84CJwp>m>NJgZWU@J`us_eJkMgxT77MzbiCrX} zW4YU;k@6NjeEdRC+@k>e_2}g5uOoMlm*-Xse-*_KsZunOxc69ey`dx9uG1VTgXUG5YTpu4hINKgJ_Xh+>4^6g ztjtvGVUtsnI=ul^q*u){G7fAqiHu2z{av3r?OCQhuYAR;o2$o<9|KTi{hJR!WwZS8 ztE(F#BBG6S5rV18c>3v`D1+{e1y+wW?B9^n^QrA;kC8xa!Y0-HpE_Qy-=iY_E%nM< z{FBq-*`5E}x9w4+4?VrSP$i^pt_U;ubKetB){_FQY=9Z>CuC*44{R?hAIYV$(-Zp> zbNG<@bz@IEv7g}i^BJ)<^v93+(-rz>tNIc~Cl>5I-oO9SzjzeOBS4J`YNEBt7w*~C zkG{8T2zv^M42i&O;MYnsJxYndiUAe(bPt(--;P0j#%@XAkZv((NK(@X%3#p#mUVh; zek$d{=>P0nwG7t3p8~n(s@8xs+ZF&AQ zf1H-uI_mm-N%+&J`$bE_s%n+Ae$eq1d{B)J!K6r&?3QJhb#oBt?;fSZpDp@~dmz-B zH`GiUcVauYKmA;zcP=&nY8rVAneWa*cHuy?OaAmyfNkh10+2j*I!yYqexZLO7K43- z*}?J)A3?mG>XT9 zxIS#K{9&07_y042aEaW~Ylk&=iM7@4KXIftRb#ClG11Dg-jtMz0=s^`!F04KpYEqN zFzQ81{q;SSJVbx=P}8SHB2YjQ>X0J?Y@YuLaY)DPdqq{HEQKvUHy@{ZJ>r}5Sz7{i z7IYj6+}KZ?;@g7m8i%v>s*<^*o^C!?7=LL3jVtT{I*!AwVG6ylZcy-o&&5&keE&3z zIrVf5#_9m8Gz01)>J~#^x*pbE!YB%=EHudL)*j4^PpGdr|4a4ec_&Ya+#5Sihp>kQa-IIA9JELOvUv(Ylm-no z0fD=sfTnx#!uMHAN@CE97cbZ>j{gQIkNcwprxZy6hcp)z-dJfoR}R3T63; z%@<)yRwdXpX-A%SvymaL3vER$MnXHwE=sLCQ80 zEYI#H1*>0QLahjP92>;MAR0B;m{DN|S#l64&wu_XXWZ*zhhn9;|HKSnLfP8Gubasn?cIKZ%KcGbNx)zTL_ZwJ^yrx)tAk-J$ZQht8$ zX|LA*%8+{Z=ztG|bJvoSE7pPX=9-b}+Fh5dEdGBV+d_Rvhj2ap#*0JX+bc`L=!qdM^!b1}i6Llaeg%i`rgeaMRxoBa; zRp79jd}N=HjSWyIfnmK6TlVB%PT{TIZSCuTj|#>7jVLZG|GPN`6pU2sua*Ys^W z|En9wx-_I&*GO7Dh<*5BQ*;Ybqr>#=kH@<%Pa8rY-~gc)1_*|WlI0ejmry06iIQdu zV=^ojx|h4c+tM%ZWnDetB1vNJ5pMzQOX zPHtw*=&)w-q&J`VJ7Dd(WPxxpkZVaM^c)qctyAD4snLOG6qdgK_~bA9p=F^ZKv54w zL&Nyz>z{xB$GaGLrH=*(@EjG{u=OwnSks1?;AF z#T;X$^U2S5_X8h9%1om~&w|d-x1>{y@Z6srLUOc#IC@j%ysAd!^=HFFm(fhLq?W@} zgDB4FzoL!!;>YVuFcSRz@Y^z4u1+2!W6i(RpseT3!?)Z=IDwkNNZqFg&${v~{-vVw zIYwZD|DnCBt7|Pgf{e45s9W--vu2c`{^XTxu7+SbtD^V6GVZ@3{r{5}tVZ+a=bd*? z(|ux+Ynqg{bx@0z21vUP=#_*E^ZYy<0;nJ7Hbdylx6-XoYc_;UH&dS`LrU#;f3idn zHQgL&HV!0KQ!G^*%YGcxNQUK}3 zEg~tW8g<`o4LxlRQAj#I(Yy&SUaHxX%@AHeiU_h&8|w;lW}p1{8Wu5JcK3d%@jQ@U zQI5>b|I{AQBbRn}UMZS@3D3|%%zd*$4&Z{E4!n$%Y)xHlO}E$&DGD@9UH8)24v*_7 zKo&cg%_cOtCWnZB`V(r-qqgx5_R>e{UEUpnR1LJ6@~>w(D7B8e&z^dmEB;>KuXTZn zcq|B;P3aoi@jIEw2?12z=GMvl6wR`&)M1)}5ac$ooQ-%4-LcV&v*yYFQ38!5vnUuH zuNCy_7S$#_&uBe?rVEsNAMde**fJqGEA-+1UGpNId%S|YhEJ$$>D~RH@8_aR9>H2T zIe-#~Y1)~J$U#gSjAGVt%*qR9-D(TCN9v~U_0R(l^(ME25OR_=f1 zOyf(#}YVAfq%lfEREV3D~hdw{xX6IM`c(>3T24$1KvYb!&gehEA%aTzgm}}WP zt=JtNN~U#`dhBpmL{hNeeA`w#j6;@SZ-P7TZSWk}_LK+a(r9m*Z3`J->CZt{f=8ta z#&RS}vh>uoSpC!ZdP}oS799rL-WS#UY^`Uv51z&!f8zHW-SV+HV_K->?@v1FPY&?f zuA%R7g%f~cKh;ENrnO?NU9Qe`ZJ(9)5I$vL2;KaziIR9Oe2)I9b`_==5!JRLuRayWAbb3x~ zy_Q%up@pk99%$a3OOf8XzPnCq7;ty~fZQ64jy0`{_)I=i+qK}no;%!feo_^KT~N|f zyF0GPJ0m7`?subZMgbB&$Q%Be=*xpVbfF+mNIEO-l@5oY1KRF~-6(BbG$qP;WI}-E zKh(IwrkV=!avj;EfLR< zi>s=}^N86kpZtMH-s%X&Q0z0pim>02vBKVTdIJ6v-yZd<78$su^NW~`fAUjcw@Cj* zWMyGHUn6PMc&hti&Q1Ea_VBYuF+2695gFq|%t?f_)Eq+Vo@1QF{o(`KJ(yxe`+3!M zZ*-!h<)oy@M9iHR1<`@CnX?<9?1B)pjuZJu)YNw9F;dS#pmF_ocd-Ky(vu-PvF| ztdvj4Oy`W&uHnn`@h(m4^iXl@aq5@rt0loC!%eSvQ^~2RS8ccq_P*PIJpHRxWD7&3 zcbtA#S>Yhv;Dy%>dLNle>0bih$IK*=Bm%F&4tMXkEZTQ$Q~6jLTXf5JfdZ2Yi2O%o zcN{6Yyud{4*@14&9|V%8Szsy`)L&{5Od6C0qnrC{+GE9yd!wmD#8KUSZ~F!x5#@8u z$V3e1lZpfr!lUi9&%1Zyy~L!(B8R8)$c22DXBsSgrmA|b^__&5xJ^Q3gI>z(mT!h2 zhXULIMoAA1-9I{+9cJxon7alZ>sUn%JhlPfA8OpFE!H{WvQQ0{YrJ&wpOmoB=!0@| zhDj`w&KJi4jl*jpFju3m;#cQb+wop+=1cY2Wat$Bm_syG-aH?r+-+TzA> zX`TVn{nhJfUm?lKw1=sXA$kuzBjF5H(wq>*XPxhi%5)OUJdra1G1Qe%M=4t5%pcak~5j7?}g zj=RMo%L{fALzsyR8tp=!e8jYo4o-Q;WT27*%K8jJ;Ig$Eji%gS&WTpJ{=1a=UTi%>Zmd&yi! zW*|tsrm1k6ZoRKR6Eo1;cA<*|?3QI3T22Q&^S!HK7$w5}Qc4r-rvI&Gn66foY3cG5 z0~D6=^(>?jSK^L(1@XXVz zbV-bP$8Kg?(px|{N<8Pb!&{dxEtW%>4t+nBlwpS@07^HU&*;c!k-D>Pcd!wnQ=5|q zD$pf&c-M0Op2*=$X*icHx>3%`m|Xi9-9eEd^pZzFDPJ|Bz>Nx$^Ef88cDesioS^>b zYhc2b*i8%e&@pbbH*1&drN9cAi4UnX$@#fyYM%w@57fN8+%^h*hOke+Y z`;f7C)MN(3CAROQEkD-yntURCV(w%4L1SA6`2P>)A%H!SBbQU6=h7Q37aoPpDh%?N!Gn*xU7B(w) zNmqZheF=YxW;t5E)?L6nU;(K};u zTTMZI{>*7|vA1cf<3a_)Co152TG@YU?+~niJ=8Nd8w2MJ3za&+$@C?A!Fios2zeI# zIvzj#8>K$)p_(O3IZgtdM2=2!s3>L=kwwX4v_rG<*vH=cWK7Y38+@C1lUrS5heFkH z(ccDRqD_Byy!p}GuoTzR9YhT8PGDb?=re>Y;1|;S6 ztV`#5i@Ak9;L*(xTm$Qme*mH{h2MgfuvU@MYyuO0EDmsdlkIs)_pb)aKJW|S;N)~|qKBB~*^oZQN&I<0M3pMhO3 zmDVy=QFQCJh-iVutD3Sh^a*~gpyD3iTj`!lT9JmtAU&*PBwFeRz9uGLubgzSTx0Ls z*p7V#w8Iai_j&En+G^P8ZrvMzdPHZlK85C~9n&)cMHit46b|Gf&O1Q0OhaGJfPLcP z7rH$csb>?vUfEBf9u8^i-*;yMeEnR}l$^c$4dT56#iVQBXett8l}Iv*d;oLX0lf$y*wsnD_#A;MD!rBQ*Ro5%8!w4nS;; zl!pWE?2I(^yYZHtsLd#F67StV)Gfk|A;W%?jq+Nm`*&y8i3V39J+v#i85zPH)wLAj zsYL$e>mRsn+rDdGh*_l;p;#e{{n$bn&GOCYSQ&OyOPK_Pvi?;mB~H|}nX+>qykrNr z^mZUKV$a=XD0}Z+S2*fjOP`^19uW%wl(QZYU{ju2+;rRN!q%{VzWnq;KcvW7MLfpe zF%&o=ksM>t5>PlZHw^ZI^K#+u0Vd;Gvvywz_Xrw(=rw zPxJwKY3uhy3FgKYY3Zc|{of}SI^8MNjdROt&OiL?!d z0`kIIv-XDGru#0Uthq_RA5n;K^*_fOZeU^*>+GKwTwUZnt4QYziFBIZ--6E7=ilZ( zgB=`e)^hHyz<0-D(kMmyWSuX5&{=Hon=2kV6iw|;;K&kw(*i7?*u><9W#Qd3?X@*{ zVw47Ef2rao+l;pBN?wPr0MnZ5s;Ig{o}p*IeeVsw@Wxi;zzwC3^_!YJE{JyTX(a|` zNqj2Nrs;Rzq*kV^S6kIhpY+8p8pnaoT5zKlxlDFe z6&mVuyvAwUUgp-3NnZh*Q{KZZEy_&cBDcx&^S#JnLch#f7)pyiB4AiJ>6Mk`>=zfy z8!dta@e|vChx!=3_F_bB^x>%RBrg|={PeF5DA~OB-35meh(oZgk?l>nF$zSn=a?|~ zA#7_8S*yMQBI3E-4yOsr>b&XEs=UAIEmp(69nKSQa<`hvyrFGnm46dst!-yoY;+_V z_qfK{v_$X3ELc2zOw*ne7tB}CzOtVT3#hG<602T8P?F5&$Iqp?yy~aG%O$9=D7|5B zmJ;(7e3mT4x}=uTyjlFM`o7I(WqfL*Jn{Pyk`5hDx`JUuTre~-(k%14sZLiyrYgR` z`_;?0`z4JndJQ|F>I1}$Roy)vg&K|@YdXrM6&DEAQYu#0Hnh!s?z)lY&jMbyYj#3= zMvm#{JbQY&s-KP}nz4Ev#tlA-POGk_q3Lle(DABg=Co_Q4DRX_4@#MX9@m&N8)Rs+ z=0uU5oE=KKj%d642?V_L9*yOO<Zh=}>@^>8 zOYxrvlnovMGCyRaEjDJ*(I@Nd+Uq4us51*@E zogYU=_CX&Dzs|8p3a$44<<4iKEK$Gjs1xk=X0;2`&u-vL*n=Ej@t=>oAiZ*IM92F) z#_w96K%{2A6hujK^mXZ2yX&3A(Y{)_8+BS?j4gK(b#G2~R_Ti&A9AAYED%$Jphq^p zrg}#-ou6yu@==^Q0e6cQCy?Vj>FDRT&wcMZ@5X-&aDYZTaVX~AvQYol+2#^5C)zGgDFv&e6q24!hvlkjhUsUob>7o*>htYy`WtK}7J9ohL(Bouf>k&1hpO)M^IoVu%Q`=v!itks`hO^8Mxb)AJ`aPG)*LihSy^rjTc;Pt*HQS8hU-y2P*+x@Zd0YBmg(&wnP9pcSS zrG*Df>Z*w}oKp^7>;sJgn`m2qxIZZJch0P2C&?re13+pZ+k8}484VzW(v)7?oof(;NY{K(u z0g@&hkHYZP1Nt+3Do1FkNmLz|(*gez>$brldeZEURzh>6`|NkG=k_692=LR*GA z=_$6eiDH{G^QH-<^>W0&yk#;bvOD|LlMi3s0?zL?dBsUbBD6Z%t4`OVQQZ0?;Z}pE z7EoL8c_Xo8zKD~%O$y9Tm>LOJh_b&f)vPc2sF6Pz9?BA6;_VJ0-y?kaKKJ}ts{P1SQdIE!_w+PHSc0xzJ1DYOpzKAZsWv8aS4E_ggE(P0NP5+1*9PeMEVIsA@q&-x_nVqSn^e^JGun-0kivVp%g z96=_E0z-+p?H$|V$t77_=b8q4U3fK{ZH0Y3)5<~}B^vIKCemU*U`1wg#cJn}6Ws*W9=WyhOi7+WZYsBM*F8Ba=JCEpF0 z&e0(KffW4c@~9|(s|aNt?71*S-@ z?lmv99=eTle(?M|QDfdw30)KNvvyTC{A^h(vYGDoxYwzpK|JY}i#mR?8%s8vox7R} z*f#q&2KG?hZ4$IBr`sg2Y9F~939_Sw><(~Ds*6VeDiwO z6{(Dvad#_R%fT?O|Bn}XbH!ovkL&5mH5}W=Ja`MC3%=m`6WCd%ngYzM>!csIIva+h%|X3=5K1@X6-2ptAACA!%s=%oyn@RV6bA;t<^}vR-%+4G z*8>5!8S&-1*1_dYyWuZpwzJT_9;&QP&zZc0@2yPd=Nldxh>Vh_*yjV^ZjKEj9N1qD=|`a*3!LzXFf*-ufzKd;d7}B z^uw(q*N}<*gGFnoM;yvem1F>6*D| z6_}`vd-WQrHv-UO5@)(zkidL8J4e~OPZ@&YQr3M$sn`@?69VK&k^v1}WEf|9s}9t> zR95vM`kmur9q-ZVpzUNOd$S_FgKI6gHbs_BB5l zC^LPdcDK2B_2WBw$h=D2xHGNwnQF`#PngN{PcWNCOtR%ncv~+Xdhjj@ zYi_CN(oJu~=@^8Xt8HC>yHO-=r%_wH%oNZkubMaogas4k!W;<97&Zl*=L$x?H*<_Yn`O4mr z5(=(o8)EHcwa!%;4eN6sHpAQTHihuFfW?`;y-V$_rK+N!eDMRf{u*PKc`dw^_xl~E z;V`c|xt?|e@_LljPi$C_PVU#WWv_fn5#UtiqyhMbRQJuUMv7Zwi$Q}dC9l`!cgKK` zj?%Iw$9~<5-X?g;Xp9KL6+ICC>pF9F(VMj|wr7^(R$W>{XIM1{<>@wG`Y|a#IW6x&&!{z&Y^k z4Zfz~aXW3cmG>227emU2I7(N2a+iQ@m@ZZ*D(H1F$SA=D^_1pfO#=}}5^|SM_m;bj z*?vhGq#HSq!gBL~piUIaEjd3e2#=EF1zyg@Bx$5XRA?xZ$7#YIp`S$L&ij$@7lO}- zr+bp(GBVb1oi^<6sQRI>-%d{1EdtH#&dZ`WiZ3Rm9l))V(&OYN=#o3)oltq?v9#TZ z`+>+R5HHkXc8~AYECYe;-VA)+GAO|y-|;F|!1|^vc}z{6oo@1ME`jBN7_8Lf%hbf! z%LJ7OU{YfDSA#E};DU}L8ic8;9Rnm35OMfB!luerT#S%Vjp9kmn#t0<-PykQMkX5A zgw67Y1QO-#4)kaHCf1kza{|m#7z$hUoh<ZVr{Hre19t z;pUrgT^UHak7dl0D6rdHUUKoJ%$$jxD5VSEd1dIKy5;V2Lr;C%3&J9{UN%<&8IJ=( zzmWsske(W_N0AsM;gnLPDw~~Se^Ie>ltSm!$YZQ@ERz_gnA_W-g-0PBA@b=Xc`K=J zO2^dw9VYxX%MOYGs`@4Y_zQU-!YD2VNeD^_AZ4S=x-L!+qsm&?*|1Fx0*(vs)nNT5 za)AxZ(l}!ytHGQ!E{LX@U7y?Q3Q6x5B%Z~`lQ&g`a|Ay+)%}vD1}4z+@ZjmM z&#efoo%coJ1L|HgRZlw@pZkAJcGJ;)mF>h*R$U;gKM8klw&>8&GM(J{zV2E9|2i*c z{}3zfs`!QxuK6B}CnAA4{ z1h~3y;LtjeLEUo}=2wI!N)gvk>*X&@`tFUQ!n%clIxq6dvwr(FGJ(t50xP5Jbaq+l z8#y>07<+&2>AEr7zpE&icuul%*j`(s;ps+l{rH!5BUk-DMsovC+4&hxdDo|T#7xvY z-C(5fG_RPJKFuzjZ;6S9mUfZZfHq(TAQ+Z6zgr9c!=A&nf1}=0LapL&>nD>^FKT?~ zs6MI7q{M}PJ8;- zJDiwPlni(v&}lDCKG}6b_Vwc>>++v-eFgcKj|t35X1_WRRb(m{R#m-XX1yDOaOuS7 zHr3TF9F%9a3PHBMh>KO34%mB$ykl?}ZO5}bD6vm$7ub&=mW zsG`Oy(|$fTWIDKzY_>;Yq4KE9ZFFxj-3|vRXItsn*kSK6!^GsS;qUJ>F2L9%08hud zi9H10mch@ZO;&Zg<$YfdHCapdRbbv&FJP1%_~f>5!rCH_PYuuoGwW-HQi} zp!`J+X2V+NTbfIHU!7RCfXW3EVWhlh%jN_;u4Aq~GAEK{5aucyEW6bMU(eO$EKCP^ zFCCQ@t?QvV+~P}U^tPbY_3zWTBl!pP!YguDtGgZnH3Nogfo?(a+xE{ghzcgEK{p665-wRV(w zpB}4I8@~|pJO3OFSNC+=5h|@5pp6)>3CxdDeV|{aJjo zRIoKLjZ`m9XX4#}Lp9S&z0-wGlTAT>nT2d3xE{1yKx|Tvg$+6nam7L#;qxC5niKxf zJf@zt&FAiSyM<$S(ag=g@>q8b&`i-1kB)d zKqTWvjH#oZ1+C_!CCqJIipu~L)RdSvx$P^i84T=9`cHrl9U}x(jotauKS~7Br~iQ< z0x(T8$B3p+#wsYh`tr4)s__uhD|_MvplXQE7z_w)@Vd#PIW7WlFYmFxd}%|KCoUE;%-H#yTm?b@TR3521`Al%|3EKj5rwQnLj__pKG8&np|vcRb2gmsNhUc;wH)F`;21uKy%YCAvB zd5BAk5&YK{$*4maK0u?w|Isoi{^Uzo;)>@b;JN@CmO8$z(tz^XXYs2RtG9?Wpye7W zVRjJM{oVio!v%C`N}ne+oL1Gcb@DL&U1RG!CKM(tg)?MaTf_fAF05qu;X5E71`_S4zCOI0b0V;gO_|6$p-=){jMPh6+sAtY9YTu9G z*{me~G20z8=$6bZ#u^o*XJcv{FU!E##s4i=Ve6uYzhT??onK>922PA{%a)4>?C`>^ zrQd0&arol2P=I5d&3bzpU*Hi47|JyovaRO9w?TBm+F$?4DH6^8wRsKHQUk`%vB^>8 zSC!n`yVMF|*7By=yUs^5*H0ZNt?jE~iXL=7I{Np_ zF&3vakY1aV^u9aNS%4hWtj;VAAZ|b#*~JkbAFwDo{plToNH#IY%f%PjJkrZp z+Y?jaNM&HjB?l$BIuKlFpmlwMrPmVq{pCXase5!n?8jeHpF?OkPxr~nJu51tq}%gk zuSl5sjZ`#^FM$D_HqBtGRjQ-b;SK&|R0A|eIZ zqwmfp-}wstPzZ{`O5w&6kdgMkBVJkgJqO<>gSgv~2|bB@LOnXq|Nht;{&NcA=ssju zD5jZFFlQqDDHKqTEsfblkYsjqSvHyjN%uTFw6ve57rvW=D*k>*5YrmKAYfw^uZ@kx z$Jf@rVnlrvQAS21V?Rf-+xi>NpbnC9&zhRv zTVqtb7)&wh#WgjHe_omNnm;F(-Q=+jR05(?ppD0(^L}5)bSv}Y-jxfp-V;*Ni4q`Y zJdpI!VmSQ9?INDZa#2UPy0h(#FkVl4U_H4QH5nLNQ}e}(EDaXdz0k!*O#Y#RMMtNP zV)so<>KfN>uT~98cHZqTgq^v{C4cgU|3gUgN(!-uxVgM_(b9RRh*MjuRr%1_t7Xw< zy1PL6iwwIJE(wpC;ksErM*Z>n80|BPXR4~eEJ#T&FaP1^H$r_ zSy%>A)YsWNI@paL4h$++(Up|Ec6=Ypsevz$8MN65_WGm&`3HC9$jD9hV?Cpq=Jt{> z88|vam@4*-Atkw>@!`^^5bxvr5k$oSx6CZS;$?_*9j$c9n|<}Lj_-+)_1g%OQX->$ z3sBhik1HJ&PBi!LUyi(<`VSW1bwEo4EhENa+O}2>rCU{A>}>6P6GRr`CR<$dX$k78qv`kcXaaU-A53O^9?`&lg0lN zQC}UF)$_c400EUwk#3ak?gk0z2I&UrE|HXw?v@simhSHElJ4&M9pLkQUoQTD>zwDA z-Pzrl`<|KIm22XipC^o3S(Cu!^5=ULPUG!JVPil!YzmK}7^&yjnGkC82acA!wifZ; zyxE$?0DJgPq(3dZ=|S)-{Yi1J{XVwqMSFwW*8F=wP>|kK_GVr^j7>1j9=NB%;e5aO|9| zEfCe#tHVSVvx=JjQ*-wnaPZjQ&8Ln|hqjiQ{^}R2Lh$i{12ceNW5aF-BLtq5=p7eV z>nk7B;%jq67!rE?aNFJ89L`6~j4qpIh7I0nH4d)S@7bip1(d{A?QijX0x4_ME2ABD z&>s|CYf(Q>7A4J%7N?+owpvV{vwf&9Q>IzxI{Xk76(gB#duBqG&h4prZ()>ECWDt< zScv+0Fe$E1rO8v0|2gqZAuqr;}&M3$J^+TL+RhaUxV!x`DH zvuroRPBsSS!2`KInvZ`@Ox&otyUIfSTxFW&ej>aO8JlBouL#-ej{w)iNNG7o36U)o zTVgXodER$?Ib?aa&vyRNHkm!+RrLPsdSmnHW(;w}=)C6jKarJO1uY-f^Jrz|-lHRT zS%>Y+SJ6!m1)lK<`MAvDZ$7YD_U_mOHVJ_eip7dj0Y4`e@`*{JWFv z4h7574C?AfmjU^vHl{z=K}n%p3DIS;NwHz!ehSzy27D8gRS*YzmMK24#nqL-U+5s z8E1n29w0>7;dZ#rCB8dJ&(JfaXZi+^O-09-ilT?+_6idV{~4M2aBz9Y0ep!0YYR;* z*XYIr*zYP|xv)fErlQA)hNgce> z#uep4Me5KdvVicFL0gw+5H~ ztGNS*)WS-bkFFqpM=>R&L7>y-C-y0(1xU#uyYDL<5tTj(LIUv#ErW=gLxr%)AijWu zo1V!~q5BZS?u!1wg80ou7$0%uId7Xg(#H!s&b_&a;%ep;9?anyr-n+Aod|reKD7$>UvB z75i5}{v#kPva+Lewx`w|9vob-oLykh03+?#!zY*Ii6< zDZrc8BR$(MoiGG9A73cK$(j4}&fz1r|4cQKkbkQQ|M3h^SE8!*J{BFX7oN zz*N=DYF6^lz~}xjorxV7!Q`qp{^9CI@6yWIt!;^*(rm>SDVRIaY%$<3yQ*4)43@Ox z;WSE13%fHifoC8J2{;Dr|E4%p!e%Qoa#gy+bsSlL$z$O(jkE;iW&R7rd0Nk8;4m~j zf^XP?mtWUT=si9Nxgf|jtQCqG7?V&^US`B%;^Gm-t*-UBch!8&05{{Rq})rw?S)H?WT7B@z%Bjo2a zyoRSfA&;wnTi>5f_#wt*O6LBvgVN(w*0Od%Y;vJ`h`d%5q%JDz(GXvr7MBy}sOvch z5~C<=t{IDUy3z%%cRoVr8Ra;iiwO!P?pYZJB(kh*2Zi%>R+`7N?6M5SfV@RU=4T(= zpy|yJU-m%}*UrVp0BCoKiH}ypYf|VmnR>AYIRU?^$e{Jc02j$VL|l!8ZEXSmzr>9yXue0wrST}(q&U~64w779PU$1QsXmQy7&9YRO2gsyIA}SP^pENxZ zk&8B@q#)?%fDS1u{A7WV$(6aRjNXm)^PUzCAR59ykT_lc-gsz(mDfGKu0rxyvhvm+ z;ZH^T<Z0=L6@$CGq(?H_wJB-lNyjr&l*1oNZE1tT$KRTJSESxD zj{YeYgw1Xjd2nbT%ks%4>kJpWy})95+u*jk;B@>Cy2_%*i9|FUAB6Ny2WuKl=|Zse zr-(9X{$Ybj#l{&f@v?j(j;yYiojKB~P54kSU)FoSjj4v<1kh@I-GEPI?FdAKfU|=7bGwhrcidL?!J19lLx48 zs;mQIF+n8%@gp`~(}ygvEH|g`{;h+EX`aDIyxv7Lsl0v>a@%29*D(~t8KtC8fF)W? z850q=*$0Z<1yw=;FFUUC%$?{GE6 zDmP{$R8~%m$yZqZ;B;6Nw2*EEt)5ch&j_DrQ;Q^YP==KRC@ANG`6>NLAJ4A-z&~C*77jX)s3JSESKZ*KM{BwfBmkvgS2wQ10~!jZ=^rv_k=nM3&*tZ$yIC2 zvC!6ly;HKQG;eF>Z|FB(KY6%)4KFPGO1&DBQ_`iU_gf?H@84f{V~6De^U#^^3X&|C%dnR^40azQ**(eg1;|* z@9A%dKhG2L=@rN?U?4>N!?7EAO|{v|^g2ls)d}|il8kly&i=-H%jD!@l~TYZZ+!i| z{}+^?r&vcX;uA>34m`#&Ft;|;fBKzp=}~j(aa)VREH!ScMVM=gJY|&}3P=u#=v}vY z=R36MF_ZVVRJVU^!bf*1%Fq^iZM$r%j^6T0SsQAN4bke#YC*Q;*n0EpQ;USHMOC%{ za1ik|JfVT_stIfVcSZz`!CtdODf>C_Lj|^1ihC|(+wws0cJL zMy&(3^V1c|s-3x?G@8lQ{uBw4w52%`hXfpGSLZ!0ytaP>gxhPu0RA3+ltkVd0p;mQ$T(tCQ3* z4;F0Te#3q2Sus?p1R4Vn1(6nx@bh8HmOuPL+Cs{#6Hkk6h>rSw*-vS%66ZntQHW95 zlGLJNG3Fg+cLP3i8}+;Mqd&g}4`}SAq*M;fY@D+%{azg_Emx)VWT2lIKFi-k?atL9zOT!oEA#%V9DnO>Ap82QPU(gJn4 zzWJ6BhUzsU$K(1-FCc$v(j!GiAinNG&LfoVU3bEU?-& z`9+yx@>|5~5>w-@nChpy*}{>PMzU)-7N=SJuw$n+7vGa5l@oXZc^EYNir%gQ--fvDZ&v37@0=0;p*$@bs>d`GseJCQ2MB8;~SVKK8ogJ zoGrpj^?11)fdf5j7A+F;7msR6ClUfV=~BJsc6uU-?O=YMWrR^Djq`>@2}c$`#x+|u z%4+rg+avEoTYtoUNC!X|j+o44XO6;TiK@f~!oE8f_iM6Rr+1uh!G0;VJ-Hqci8q|i z+Ley}QC+X&xW`1hN$$PgO8XF#FAw3M?eBl${#mR3lgz8ZfhEhjGtbe2OK)S?aRVTf zm67S2%8hV9WB9kuf7Y|S8?~oVkGo7L87iR#7To80hULY{rrcX?|W_er17du-l?LzC7X&hjn7AWyMG06pGk zYfh(Ll2+jH+fi%;EIH=$W{2lUdmvY^{bTiEBt|P-0jZ3hg+Z6++iN&X&~E~J`wX7- z!=|8jD!_hPH*ep)Lw8m~1pdh^Xp&y1nJ>;wSeV;$BNJeKwFy?p)k7lP9xn-Ki6DIc z)7Eh}vK+9lExxTsCkw(nyfiD--*tfg2L>JZxS`(icl{M%^4+aIna|Q?oStI4I=LI` zdD{+N$8A7)R)de+*Gp8~LO=!how-sMJWH>?up-Tjz0cU{E^!uk0 z2UsQQ_1Lj7u(Qu%p57-b{p9H0Z$8;p#eN??5=D$RaaId&6-m?e4{wUiq=Tc=y-&#& z!gRU6&TMp~hOs(y=;4V=^=r90nJFfsyw>YV@J#BwtYnMb;{R>_Wo&TaGVb2%_gO^` zA7|FcFaeTv5?f9{gS1vD7V<96v_1A7(?mnWj_JKjV^Un$wuNOT(pI2!|Grk;%oIf8 z#f|+hvv+v>VixjCvlIIDONX8$NioDkauduRoQzBm>DD@DyA<}=Pjf^3Q@-Nc6raGY z{VR$cNAtQ)!Cb;jN)RjE-$Hqde-C`kY)xwqGVIJ=;WpWgmq>fGjxuJBtcrCwUY{$N zu#$VP`<#CHz{kfo59Tr*cJ)U7TXJgu<(491{r%u3=1wjGM;!wUMbGEXR*h)>KT;0yi0)@Zj5P_KI-wWn(?dgK0(*~N|4t1M> zm!)m|9fV^SXgk8%H48zy|Z6qr>jS7@}#bv7r5lby#%Lu4y=${z8wBX%y z3*^o*@j?-B#^(x^Gei*tp3M%)e@|HTVb*K`FkVmKPWU3``_sBSiS=~ceOK9Kz3v?yOK*e%FLcW-7bwPC)eKB$L86JN(csz=wx{8&Q+th zZjR5MCAK)VG$?}Mi-q3ouVQASiJGIiRGNK8K`&L;7Tw^0e_K_{dFVE`+owE>mKx(>8z$s2bwO|?Rm z!U@teO4o#a8SKPnA^45-g!z~APZ=aesUzCQZLWloa@F}e7k)wogx%Zmt^0LP_hvj2 zh7TerS#PLa3PL&JhlhB8$>;Ljk+zD!>MP ze%JOF=Pt8aZ$Mm4T0BMys=t&nm>+J@7baLug1twdrUV*4n23=cYc0nP zZyL2_Tz_gWR!;J@SpB@T6(jj247109Rrpi2$yO1e4ugEZHB2iqM@!6OaoP?f;fIF|IBgN6eKWM*} zcuOCAOuHTk>Lys|Sz++eaVWU9OSE#i+*}9o^}eqiY0V+tBTs)lM>fw5H~vu;cVqas zsJF%@t69wof1b>dfTd@FefJ<^zs~c4wzq$@!!Efl+e&`xK*wvzX4X-L0M61t6#Dl) zOQ+RQu1vQ;efmGSTB2)yUgf~ddbwP}z-`E$nJF~`z12VN`f4p*oo<#r<@mjjGZ-EUB-5qQ z0w2F1Z6dsW&H5WIRjl{o{d@C^%AE>_Q!{f@3zIMN6@^Jz5oe3{=%Ly{(sZ($5Svscqax*NYCLe3 zJaQMcYfxFt-zGkJm&AV{7thpN;HzEk%j$-ya0pGRala5DsEb3B*kY?H9ZXOpW?2#N zk8se>O1?j%p&mdtUd4xu{1x3_FS`xitw>zoj%G~8D<$ZQIC;HUBrf|mEz*VU%X+KO zj(`7I|8H{nk7b5Pk>&lG*lX@{Q*72IA>Y4=4SLei<@(s`9IQ)Z`jTXbgsqiqh4U zIAS`zaiy>{>L0#=gkjTQpt~A&PcJ{hOqBeutYFIxDQ+)BAcbqS4e7-LN8S0%rFfii082Z?NJps-(y~=f(zoE$;3FI#fLRe- zQXHcczLYgHb&~4bVtm+l$_QSM;B__tzeWidIMx|?#ex};xH42J5{I3=u07(J;L*BF% zaYV6r#5Hm5x1R}TTQ;R7>S(i~=hF5HsHe#cAM<~H@pD<=t{wKwMwC~d6%F0l&1t8Y z=(bBCLv0BD`|o2+tb9lIPJ88pkK9zkZ*-zJ_EeTKeLVkF4nHyi{H~Y75#rl@ zhoeR*ntP0GPW!y?V&W4claU=bc2aRoMl0hSM7P4Ux`@%_@%!Wtx-{8s7+9ga1?HSA zkzMRJXX-u<8GO7Xz6+n1q9ZjaUFa{Ownr5dLSuw5=q-j$ttF>hL(VoBp6o!35OYeE z{%K?&ekgK95EdOJ(!Q?K9Qv!~Hq^4OaAA~pF>1Zb!08$H7@O^7_~VQeVfNx+jA;G) zO`z!DDqn!tvRYFm#CC!!>gh!q$25d!aat$_G(Jz2+v=fgxv(BddCq=-6AU16s~eRD zv&UU_UDz;OVmx&VRgE|&)0kNP*UEpt!GB@N(d0C)Ww=mjs?3s&EnsP=={P2$;yn3+ z`pa!ZD6bn)zVTFz#x40J3CrxM!)d(3>6Ta5V{<|FLnWT*!1doY^NvaD1CD*BEbry4 zC9kN&;^CNh)vA5-TXnI_;I(aqtDAyq<3V9xm=~maP!V0=^8<`}Exm*;F*%`QNUzsM zG2U6`_TLN(ZjmY?3?v0)e}VuLmU42#R2KVKVBE=7AKR00^C`=tSj@P3nEL@9o=k8v z-E3=-=eJtcqIT)G^Vi^OEl=7erqp*D+`o}63H23$3fk&u6Muw?FxeRC{Ihl+xLBrE z{u8|Hd2kSmRcebF#B=RW34<6GhEy%Cn^T*7W|B{eeR&ZnbI!8H`WZrUm39}@7kd!e zw0HU0WKVdX_$eS0*qfMeYCM+ghbSXM-^mI3v|-f7Mqs^p*5en4#tw!e>%iPEfXYLGicw$8#t= ze^ znw_VmG5n-#MbejVGdn)-7|R8g*S^d0qOik*D#HkU>84<@W=oYN047;0%`*nV!F{E| z81Eia$e$<~sqxB?fbVco(lZ6lV}Fsav~n0emJ7j8-O0A}Oq2p8n_sm2#+JiqK;Bw5 z;TyU1iDe;}N(Q{Bred;0%$^wl3ltZ* zUmCsr65x(euw{&;RkEhjd@Up-@Wid;@?Sl@iM)7fYQS(4pX((iCjvMu4TvvK$~c^nTUr1@RNZs4XW1j>&+8zN zGNxbZ>MF0A?-AG0@p+ABPIbNLg_7^i8?01l?gpb5F9V}Z@G>!6e&y&L>z!YGgQjOh zWqy`}@*?L14D#pa17C~--k^Pd;;F*cPh=3}N~Biv-f3x1taoeYMK6o!FW)8s*5omv z0XJtpNiQ>F>wj8qi6CKj#{%pfN{vFFwY)|1lm`GiT>qO|Dy?`Cp9_;l{dJ=MD=^uN zq%){mLq+*%ix3Q&=QyL0qsT=t(plf@X`okxYIez9mi68GCmZ)#5%e@;!9YtUm z6iCU{7>dF!OEf{V@=s^XRz;8PqRvq zJ3?y+|6>4B2Mobm%iV>UKSwg7({^HyjgF_<^8ZNQpM_x0R9;@C2c*0Le%jdV<33XG zP=v6BpYfsMpMUnBMNqH(i*&fOY_mo5s1U)>^PX4&D!>ZA%}sr06IU+Pex-Y4xsM@e zR4M|zpvLJIe%q90MVeRu{V>{>+b|F1vk1krU~vHXzRYXK3`k7T(E{us856J92`y zaIP`KyN4S#>8bu&JL64O@`Td3@Zu#$S~A&y_P)BQLwGOwsak3CKF;@VRKo6V?vXU6 z4#FkbN-bBvVoC&}%3o8q7r7bC}_~vW$ zD-^loGRE>RBV$>o>jZ+WNY_pkIHoGyT)AqOrG>S-+lAyw-3MRpz}d$zm6^)nRD510 zX6E0~HP%(l1`s05ae5h#H{^{E>gR^r4?__mEN~lJORe!aF=@;M0o#5;5T)-swYq3e zDlj3s9XaY9rJHA>j5YUN6QYL-$qTT+n#C`Kz}6O6c?-^xYZQ(%ToWfdmGNziA6Z=X zyOVW4vP6oAYlCs@59atQ6I?cjN52IAc6D`Ngk?; z>?Ac+0y!BT$b{h9PI7?pm+YziaQXhSN&0Lwb9r$I0v3n8RdH&Pw4SBu@6a>jGAW=t zuogQd-*SAk*s4=M;jC1Oa>CNU@cZ5^!{y(9cuEv+p>t#dz%316CR&Lq14@o#l+^)` z$;F+Ht9TCIJRg|cv@;L2&rTx(0Ad(3w@^+ti3f&_D!6eBMf3z#Z zu~7&s(XNB50IZDZPb*n`*rAHIZAjXYv@n>Ouyed+ty8u}4C3^lo}4L^AZlRL54VDo zKvg?`A7gAv+8Bq5qU8SMQBC{XXU5K3dr25z5rrLwis%ePw8El|b~z9(F3hrS;*)Hu zyUl#b;X`(Jh#8ar%72FU(0(w9~DX0#;rZdcbzUzr?O;H=bnnBBKAFt|%f7A{eu z!a*@m2}>SuTeZ(oRKBwu7ghr6#~UBD8}0|#kbgi+3qgtph1OoO4}~BN=?y+o_*Thz zjH Q!5R)59b-T9Oc@KMZkFO8|3fzpuP3)9^Lo7*ksd*HUf4skZYy)u!D&YQy<%w z{w7JMgxqcl(Jf+Z3uxc@1e0v9ZMa+?6h9);Xra!O<>&J;?`A^~zLZel(;r`HdMvEM^`--T- zIsV%#5I%GoI2C6mbKXzmkMZmkv$GC|jl~PQ(@KoiDzKa$oZS)Or{a5&Gg zxR#E_?`R}SybvuFrjDaRA30t31f>+)7wr9t+uO^et65|3;H4*8bi8yQJ-y+)l#OeR zEK|6|v-Hp+4?4&Neo-(~+BqETOwOpENKW-%-3HC@JC4SnQP^qV?bK9S68?KKoPN34 zibX{t!iv}TXbyFiTKX9Z$Rjx2gQ9hguaRKAW%)YjnqDxDc7T6czZ%&SFng=NtL(tj z+6;V(dk)V8_F}$xW`=HqcrKL&9yiwh{^0$mTtE!|e^1=+S@5|5fQm{giTnjL+P%Oz z=&zfIjrYn84SLXKljzUjAO`jY96-jizzwBAHP;>LZGC=0KrS!-{5vLg@RAoSV7WhE z`c&JXIPrpizR6QEI8Z7UMtYs0%vQg9;-n}sXwTm!{&}t%EO7_&l;HZ}UI1M$(sScT zlZ}U#cMBI&InWSrUZ6%V#(z&@sB9j20cNvT1OzV)sCj`wuTb9wu$Yg}>3AV{c)UO` zmlvS*iqVaAbPOV0Uksjr_NAxR&pnMmZf*Tz{thrPU754a_>WvrzC4qw=>vn#e{{b( zVm`O|jrTJMqI9ZrB%+1~&dy8q{GI`gIM$2Su6w(i_hxqXx5spKiJl_z#HjEunr>7s z64b;6m@NX;%3l!Gx0kF$?+Cd6VFTyC-bjC;bne<9eK9r1C&{JMPol{(6xk z{vyYv#u)`HSo*pm3yOdQv@)QOQ5vM#te4(#LZV>w{5z#PD=UZSzNsN&wt#=|BLjik zA_#0VDLblcgfd8wM_3_#u}Y_Cl#%$Yo$}bG-{#$_=Pjf%QOd zp2-F!@$;q z_=O3PRv_SjRj>dSj{bxD%k1}i;>@*mLWyymX=YtQBL;hRq|mZVY`P1nTQ9N41> zlr%nmR4o&3X!uIBGl}uMo|WvMMtaL3nFD9CaLHTbQ|zbgvQ(bh%e@gT$`~5;vU-&! z2;)KjHz{G_gfE%TM7FN~5t!hxERgm}Za&<97x@$iOTY!uJX_oI5BhY`Vf)o`|IDB3 zAF=K2?O|Bk3w0_(&~{B`(|>%1b%T0G&UdHl)fyU%5bUqtKg~4B^lU{+zd?TCGsnAU zYqwg)fjbZHdWkwJdeSs;iL;j ztQKkJ+h5i?xIAxx`x|h(&a+-3em+%MfvQo$GgOa!&5nF^-aCO56=`p8?~jDT=&^$? z)W+#|;eL13lW0A)Ki{B|@p+tyC55T4OMls$QBp)v4wo06jSWB!0;@;={Aaiz_+3Ri zOX?m2|GH{rdtaH%6X-ws&g!2cBu{RY47}!^FT6#lDVg_FS3c4%H%tH) zDwaA?N0A58J3}{x?8gQy9{ksX@nc>2(YP^VS9pBa(@C%OgoHdzL&KIXu?8M1++CEX z)fM2?ywbO#qICWmc6sG-qoJXJr%H>r^Qqu4)1S^=-4`!CO#RLe zCFf!_H!I(*e!RB4YZQPg5A-SqMcCdBE==_yGfs^3b}kl7gdlh9;sN4ZtW9398Wfk; z0w1yQPY1Z7F+|$S4#3)j_@Cq<8;D${vDoQEpr`9w%5 zv;^M2AA00;xY^$v`|N%-gO#9(jb=-x{RiGq#PMF!S+kb=Ku`6T;uiIUf*Bu* z+zNOybR?hkmT7}MQril3 zj45?GSh%CR*zhVH0$oQt1@~v35LstnS_&`*;MY2xAh_>58HYKxDt#u zc~ycv;@SHyE>hPYThjg5}tLjkG4eNAy=9T0|A+EixIg`2SX-+wnO%8il5AR4p;1?$S z*)*heWqC>9qTzY7G+2+cdPq^M_LyaMyQ=G3_t*YI)7R@K8>Yxj1EhuAQHE{OkL-Co;dWoWRbZ$sa|&% zt1ct~+`p@320-&UzsrQYVYWGYy$J~fQ@Ss(&QS8>yv-ssIr z1X^q~yS$pk;si#QAf&9sZOx~XiauFr7zw;v#~*yCsa~*Jc2gN`w*?9=vbY;;Ce5|p z9=eSwZZZ4qHek_=xt`(3e6gCcvT_iQ2=PLva%eA+yc}h-u>@FykgleX{e*q?SSjy>l9l0 zO79NlT*pZSf+Ee=E2NtC>}b@SONDL@`zwt$ZRr7*nNgFv9{&YQKCm4MHRtr|9s|XL zkCzq$wVd42Ei|6l^ef!?SlO;yqsF7g(abcuY3y|S{9<;OcScq~-MClHVUG0Nx7JKF zo#yU(%kY+{#xqCCe{o4f3V01r>%&e<*p0nPu%kSyv$v_Do7ovZNavKTIFYfFe&y+o zc{Wd%7$I83-tti$j8CRj9l+IKr;(P7*QBy@z1Xhx*i$ zU^>785njTPAl{93Cc;}`^g`z%uWpYEuSqb8Iy4^z_M? z&lTVROJ8qK$2ccw!n@c?b*Ea_`wt*UP<6mX>Kvb9Gq?yHh5oe%H}%)y*V^^w zeiGD{VMVDno=+cHvOo$Aa)V(o7?}#QjNe&rIN#SAG9zZk3P6JLE~cF<1|g7=<;mYG zQgQ4=Ny!5j$GNcG$YK(new`|;zcR_AQ}Xbk{+>NQ|U+z;9n9_?!(twoB9Gic1G9Wn+@w|xw+|M{V6`MNJW>9 zqp*a0buOD~13NHe5orr$ABErY&+BcCraaVD-CUqjm_Ms_!`H(oROhaZz)L6Ouig1_ zuV*LlYZJ^fK@&FFl54(Ax1wwM!FHf|eS7s7dvoQ^JB|uuR(qX}A#m9D2Y9;e?Mws~ z`Y>so5jE9WpNEY`!;h|JVWhZwUadMWD=%5W07r!E%OSYHuP)^h-V}K8S?TLlz4nPh zFY9w=SOep!60X)aj4ZjIzueg86*#1Mz`(+SF_D;9cZ;Eq2?Kyr9BLOM8Nob$ydKlC z!c^pscOu=9ZSMR_7wkA#xN&yl%f3XlT0E{e%M&)3Y9|yuAA6pDk4I3tJQEvee<@D| z>2oz4YzGA{hFyJ!b-N^hhv(E*3}{YYt-HKWI9k?q$#H#VMkQG``h`nXmdA5f9&P>z z#oxs=--MfhxNE8N&}*(L8+^*hksY?$t+q&eF-dbXki2V(2=r{0v^R#5Q0bLuNFpWx*=@JTl8V#e?XMpA4M#*1DS1u6mc7d z9HHurrPU3g9=YJYX=X@3&9)5Qo12=Q6v5kPH+NA6mA`A+A=(WNk00CfVriK`|Fp7TcW<-aW&ob>KpNmR`I^yQ|y?Z|(%-ImLUw(w;fGwS*6)HS$ z$|?+Lnt*nDWt;NjUS9B6d99P_;e2+3UCAXkMM-$+(iiYRwV#dJ(tv3=!Mh0} z78(Tazy8gaMYgnHI(`bg-J5ReKx?kT4(2Q4)z?KAvPIrfm#nz9pOzK_9f5IFk&%(W zEgPS`tvn+TXqm*7r{m)jTFG%_tJoS!v&vH~+SEVdbne0z9#>KkuZYy@souo0CFN|N zDV1m}o^JNquWy>}HZ{?0&F9W_ne&jJFUhKnVjz+6PXbo(t<5n&*i?GJljPu{(5n!Y zX2Ph^nCRo%)@R^Aii*b%@gWsTyq16t7MSsQ6Lx+0CXPoU3M%=tAXGSkehcLJ#e5C@ z3bF$w*GDn;hRpyC*;&@1%627(HQhOF-{PN@pf{+4ue znES>()vG$+u+{eSw7NhD=!c=j`z{fs^+zbLm3S2ze`>-{sx0D(N3fCD^1<;~n502C z57XJ*bpD?F6|Wi5Rgk?1^FJg-S;_nfD)rGzN6!2HUhbm|wluc>dxe^QbOwtpGO!Wb4%Vf8~q+R;?+f=*3iN~CW8*$gW3G99%shMm7V3qmqBw{ zob*Fdv9l5Rj{<^tKi-17*8VScFJ=xEkII&e#uMN0wY}L!eJ5=jKUMYg*Y_s|>y11Q zZ97~sv+R5(Gfg%+Z5FTejWVREzt^c36HbTyoI$jx&uuE=I7P8ULxU>91j|BEQ1y09 zwpeq(0~YnR1neHG9bbi;-A^eb<#+%6vW>Pou_A$^f?#`U!-T{WuT=H{{kBZ%b4=F;dc=%)89ICdTPpe1tYpV- z*`Ceb4<0x1&Ks{3K+aJe*taP;1VsV_E_oKO3U?GDQB`G*Obbf zDjhOYUCYe7{FS$=azRZZ@7K3{ir9;dd5SgHX274zrBb$037}m{+V-IGhk&?Y>ZfW~ z+R-&@HyXv_aTO}2lko=uAq!Y23}pYS&TQzJ>dh__l3gNJ$Y|mlj)3|kdS8yfi`xS( zQu0Yl>-j*>dns&$h0cPv`tSS?d>^auB4p#MwHMN5q&X?WUKhAs7#kvG2)Zr-9&) zDl_uX4pP@jMyaN$w(Qej#Zkgrkr4n03lx}wu{%gHDZ0pvsro2|U$Yh~bk0uRXP^UI zL%9Z!gawWwmK^Fiu<)4=^^9T>e{?3o((<9d_1Rwv00p-HRDPvS26^XAfe|)YMO@1% zb&Q57^OB4l7+`*3WmO-X9*#;%xQ~LLB}0c|ok$j_t*L2x#AizCH}oa%q#_DXH8+Qx zEX)tmF}@{Tj=M46s&pBe?763!pW<@(ay}}s+#r^++&l21oI5&ch(p|6xUGkZ#KheA zzs>1*D(bEdY;l}i>h|5}-qlH}qkBt_h~elyJLg%QIi5a8un~aVgr$!68*+;`3scup}f+k%<@bLkxWvi@J zHm_=S{#%-lXm>XAcQZdT#(J-pqY&-DDe+idaqX^l9mmfmBbX*|AvU38Qow9=hn3Db zLTSgYWZ(Ms^P4)C69Wdvf#SU}PSqYG>fliO{^a$kKferbv#{AYEN&%j&M)@HYHg*n z@6iNK*xdWHFM}xy;AU)UON_5a(2QAT&0fpnblF7RW6~9AL|CO$J6~#x`Eg(Kw@hu? z&-Q&A-w~LiXfWm-U9_8|raBCDv($2|kBYU^RLa3%7#_Vqc{g+&RaEyoU~W8y%6UDe z7S;>q6}YcU0am!WXjCr-zxnvAeRXQD`oxX;YXJiFzNq?}8@cdlR`@{nq*z75*(TOQoW*oEH_67RfqpweWmO#aS5 zrc1=)A=&zS(peW=e!I6jbBA5r-1X*jcNhILxeaM`w(GqLW4R)GZ1 zZr0whVcx4MW7g3gWX%hCkVQfz+~SIIk5{gsoad!*foJ#t;@RwOn3oI2JKkViAx|xU zm-iyAA%Rz0mWRh<$3aAmBY>AXp3V58T!DgbQM#e}xHtJJ4yAtbr!A1ix|1e6DON?_MM&Ev<8|jL3x{Iq6SX)-f7`c`_@4g}6w1 z`TRz$M!lGmLIuE}EMTfgZJ*hQxma4_LlbAEdk1CAjs_s3FIu8eu1>z&Ipms&Z+n1? zi)XWq3b-FZ7!9#HJQYpbE10jwa1cOB3Xdx@&a}PX&VQ4{ir9F_3x1qHr-`hu)wAYn zOFz7k(G_@H2KcYL`@Ld;HSCg;PHRpm_g$@uZNiu9X{U{fS)*Ge0+S~rVCF1?s4f~x*rtG|B?3AQB`%(+vuiC zQlvvcq$C6ZX;2Vp>F$*7E=3TeyQRCkIe>I`cT0E2-3Q(qhkLVxQxcf z#t5DvdbfO)lRFtZ{YFmS<7_{Tvq8CaZoQ5g&5-YUm<{27YLCxl8>sZcAYxhjUcEST z{!M3iz!4XK#fRtF-`3qu@vBpn#rYiovs6@H;!v2I4n>t`QK5Th(s4~b8Y{Nl?DgUU z7NJTtDfai91CDP{xr|-?GFT}Eg-OTt+2uMuK){7t++?zC46e{Ehrb|ajE&8+q~yCyXDMV;h55l~Vp=v4DxpP#;vmrif#2(q8unb27hi1-Y}@~b%? zTadG`Q=DE$1iX=GzP-u{hN65HwoqRS8yi?U87wuz1QS!}oEXN&#c@F~`?~(8J^@|^ zy;kd}z~xoT`1sf``1|3#5YXU)G{yODi%~L6vlomqWa04}nc`+>!)1yck0>-f8!c|4 zl$h;uo?5=}16n9e^Pr#OXf*^lY=%>^eGWEHXg=`qi-o<-g|7 zC5I#+G!`}B_7IW#>1AE}GYrr670(Aat8Z&yQpoJ@5NHi!a%__O-&`>o6CR>gN$X{Q)erjS(_fnfc<(*%Lbjt z3C!VmoZFm>s{_wc3ypaPiDwzI|8)uX=|O|PUkvTQfOh?@vi4J%n5Qx^GISsa?G;DJ zZvK&6^BMS{uy2QSKSA3WJ6JLA?nx3s1wm&Hk8^%Tq*k3)feREAl-OiGt)PYAr+E;}Z;xlQ zpUqeL+uHirQS7IOhzkj)mf--OgWFntp4N}wQ-P(^Z?`*I0(aj0R5;)+1^zt^jOF2W zevKO;@W&aEjO?idGH@Nw1%R8iTKfUSGtw2g88imybvG7rzdnsc_DQU*!!(mRzT|sa zPT1o*U@vmKpCNuacBM^tUiI%x0nCvTqJ~Gb@=)!JgW;c@(39-BQaQO@h0o;c?ftmY zKrsb$in&%sTPZ>a7xMKcw$Bu#rIL=<#%NvK$G2lE$ygis8Q!wk+X)5To+N`=pn&~G z{Cvfy`A*A+8o(TPkL!P%EQZMi<(rc~K$tb--ms35|(;psKq?lV`JC2(?n7 zlU`RSk_747GZ)DH@2#!6J%xN`2b0BGOFz7p+6AGOwJFq36&vSAn-CD%ZMBVi zeVh*!WNxFFH~gP_Nql(MRC2Awr(4h7UMigK2F^i9-P_asdWQIdtDA!;OADzl&JX;E z-phQZJL}jwe5{T2v~+3FIr0Roh@ST-020R1zidfTs3AD3*bcydxVzU}&&7E%^*me- zV~e+jXZSxY_F};4-nOr!rLx}Gb;Ryf0(U9|?X+IKe5FEyqfqyJk)*pKyYX$d5@L|aAH2Hc^E_I~L?Wx7Q8V;=ytiU|QGD*#K#kMUNGE~_H{PdvmTaB#GM#j-UJkngm=p@8 z?SCj31DZ0^b?jBsSE%}8F-(+i3oLHdV~)Bg-vmKzs*Ak2`Ccu#2hYiTE$d>^4QO(` zHa5{ZV&apTfXno{nakrjO#XO>4R{gI_EriNPr?zfjU4TSTjC)xB{6%Euc<$^)4Gat}I%l8tcYm~xGS2B?n$Y&m>~2jVql{TiumrS+ zUKVJYgYQ4r$KsxuQc$h?P9cs=mUw!MGgXJFR$0^{$EXX9v|iuS{y9TU@x9PH$nx{z zJRxkniu;`LvVulIhkkoilAU+rsVy&=gj+seeCLS#?Aj+b(QKmGQyNti{`oBE^^dV0 z!?mqW9xc^1{@~;aA$_rAw{tLs(}8(|(Plkukd}$Z^?JX#C2{owU#DaE&d)z+`>Ifz zQ`z^dIpE%tn9W50awkm7&H479ksTxF#~1X5&->FUB?CuKktDW$-uu!7KjC@so~(-xxy~eXevHZBtR5 z3kj2_51__H=DX)@x`pey*qm!It@o>KtRv;V0tDJ92QogihQ0pwGU?F>4tTe_**}fg zQj^6UZ^EvA`h?&X^WMr|SM7?a5uaR7Zr_Ea(%Z5zr<{eL31ZQqvY_V88O3UqYw^T~ z6=EZzuGpVZu|T?}2XOolWx&vI$~an?Um?*=2wpiG4UEY81kGuX-GHNx=ne=n- zZ*jZDnq&SPBZ2P7Gy33?23@cl{yx=u4?WfECT}ELJOl*BTTdpGAUt|JVe3wagL%wB zL6g%F`0jFOdYW&PYXVY9T=NLnb{lYNy8nm>_Gg+tUBgUjIXU+H>kBLwm}J;fk4|z( z6NhVX7ZO>W)Pm7&Lxx96;37&3#(Fal5P0D+HY;l4#=AJ{&{E+yHt!>zG*|pUP-=aF zX$3J1VYC;7-G#mVg9W6h#%)2YW}Q1j%}<&%Nk+HVzO@*ZaUUPEqMfofWCWcDAjD0( zP)l=24s@GhZkev`a}1U<=6Nk~9;+;%5dISmlXkK@P(2qC>MwU*y3Y*2kLCK!zN`xR zFAVtzL(J_KU~jH|*=k~_e?8euwzOE_GKk;&6^9fkOm0XP%hG?+->+tRe|xgti~#;D z@XC*o61y#e7JE>VXm9DgOV6C2er8jVz#L@iTWq7*R9kH8wE=0*S~ zG*9E-&wPvZ9^LZSJZZ&|jZ$FVLR}Y!t;!V=c>9~sl8M-mDPfY?p)Q8o70U5PR9aAlv7+4>^d1X>*?hh)#Zq%FHuD;$=*J*j0 z7X8?tTnxr_GjU_dL9qsX7MYu8B+S~*LQ{#GM#Z-xr=`;TYGrSC=hrR(++=xJrrU{4lZ2728eZWL>tEy-BmsIWEHyI_)+w39io}f*f;b=W8(&WKyofosmFjV0T<6Vo(KRVX$SdCLCaS6TRDsN z*I2jej7xuO#yr(Xw+(s1HSflLODd3JIXlDzXJ+KYU&ru@Qi-EnhZfb`;=GjTBIn=8 zs+ub2EvDqOWr@eFpcD-y#puYb+cN)6S&zpx)sp2PhsrC~n;|2uM5iu~n!QjYmk(7P zkA489FDCi9<_O~ze>Dc-vA~h2!I9{C=qt>whuX8o42{3PFn6LE^X;urq`UjLN*J1* zUjn68B4Rp|ed<_fj^=}ta9AXF6B!wMGD0rMGkJ=b^}a$IE06*c`^pW=z@FEY@5S~mPg$(3q6f6bq z)zL};6&b-?d#Fm3cG<|&x zrp9a>^pFGn5@yjuNMlwV$0J?S@ zO3T&1QGt*A&!t%&&ws%uZash9d)7)<-@twdcP%-p!_kmlStG|@;=&E}DZXT}R4hZB z84MU~QJDA_jlTICG6vJEz1T&gr37PD_B`O^N7>GEvb3Vpf~r?HuSn_2&9_yxw(*8a zEWW2#cXnN-ee_l?VnFKq{y95~=_{ncpnEyp0C=r;hc8(b4tT3;UhYgK4GXX2rad1& zQD-QG7x?+cwHNUw^d*0F_kB7M(Koxidi~PAS+5o^CC%^F;ln==2-xx};ruMH5hb^G zhOZp2dp@bF-i0zq&L=VTv~Dbxpc>YnRwgdrDVxa~4qQ2Z{{&;qa-I|^HNVaH0yP2o zz_=|^6umgZfh>$$_s-WL%atOHQCsH~!dI2)-57}uie6WwGj$2cM;})XUNH1@&vb5U zOWFP}=IZKlm~H$&U-wYs3uy{N{<7ZN z)()cc7vyv=#+96C4^M8|Uw7znsK(baMBil~0H0*qgjCwts1(-0I9DcAYueuoc_VbA z>3A8Pv(9hb*m$h!(3JvBX9ki!xDx?y-(rzMU5L8hjMOYIw=*=pjEG$*G9|R9tz9Vm zm?eTp4Sp++-`^v_79`m zs>anVW4yA6(AcYw5RV^2#j=ZLVk;M7cU~}eH>dB8`cR;IAvnm`F*~2({m?J2`k!G9 zY32RfjSL2rNDe$U>2&-y76QH)UVAi{$=2*6MC14K!Ulz8Mz?~mH-gi5-xmG56Euav z(*G`sL@qg7si&kGTqjmM5TK=gYJV$O8)6UD2rCgb3!Oem*F#$SiIr&@1N{s|A zLvs&>q%G;HgP%=UUl8KIb=Pg=;`z3kCuqTEv5|^YE{5l$tEJd|H>xCI_QtD8k#hdc zmPZ0@MimE~+jagi2{uI(+M=f^r> zbv`8*G6EQZ+x-MtY_GN%r&Se4s<+$ib&0|XadjFY$t7h~uyil;#xoH=hy#8Gneq8o z$k~UVGM5?Hv_Hhz3Lyy)yvX~4Ru%&g2_M}E8%P!jIM=?4pqQFbh9Nq&KV2+3G(EOV zYa5wnQs2i{jVHvvb`kSej!KqExj5SUaT7G#7tclml1kj%KYimte}GX&fuNq*j{LTI z&(E#3;cOex-eg3^TIlP1IX`#r-2AL0QFxU5@w<0w=G=PewrZPxD#4M{OH7t*tcoWkW;|uFr}=&pP4fr& zsiSX}H}k(`y)8UrMWo<`;NWv>z4F+zso*DL@M;Y^a1{gEe^U+80I{>Pai8Wl65P5e zi&p#OLgf90-$_f0mNpg87BW{yrxgrq33U30Ow+wL7}^U-tS84xY{lf&G2*uPR`IY2 z)4g3rXraKY9zP8gBl3LGBIIXl@^>;Vi%q0sZ93< zd}432+@=q!k&c^-`$FQ*zAuWJOk0loI&|j4l`s`mU$NT0ECGJr6wVT7t86GBORSV^ z)s7_d2X1z>Zl1^4rY1QeQy%u8cHH82MkBEys9sL^+#6S__Sdkh0N&V>b`tX(S$u~2 z0%G`$L&rqu{occSy(^BAXaawu?y3fMc19zGj);B3B;egjTe3uax z#F_1MyeYhY{>0LRSe{yT>YxM4GeH$NW+J@0vxo2O*%@*LDw28sZ;&5zd3dwIOfARY zD^;;7Hg+uq1aw$6-XNh`?TR>$z69wo=qpwz8iyfx-?h?_MrNpJ z>ZVG7JzRK@ZwVnkIXq;vJpbDV*GrjxpHs9?AJ%|-*g7(_^c}8<@POH57aqu#_84|D zI%^M5!@+T+4QC{N=7>R3!^nQEa$r=)!@j)-h8d=Ze9VRSLO?q#_WTb@RJm0=x;geN zu-uPc?CUdIjej9~QeT&Q!LAv{3IeCqAr@kyz-3_?g~e*tL&jdC&HC@m^+s%+;=XhD z0WdkVvWv$528sW5D9dp<3Be*j1~xW8vcVfX?K~ow{%&WL+vQABw+Eh{>eY*Lw~Ui0 zA;C6oHN^FXAjad8urDr3kSo8)s8uRQ-MfddgM(PY-^NUbDQ5T6%2V*s+> zrGxl9LxlaR5$~Caf&#%iSbRpHd8SgiX>JS##7(frzkx9PBxvVN=)uE#;OW>;Nn#LW zPZm2X3@Kg=H_(Bo*Jv9eva;UVEVqLH@-=zTjaPdg)HCUfb_AZvZ&2nC7iy@*gb=Vl z>y7>oh;o2>PIc~f7c~BeAZFwu-t?(yRm=1K$_V0d_Y4sZ@E4Mmle6t4b%bZQ^$hUF z!yp)fAW@_)?A#oHctnjI3Lqssfi#+ufzAFPD4mYgKx#3IyMBV+*Xr+1Y+&-|5q@4j^pLp^NdUw zfn>g^hUrHz7~3A}{{h2~rJIU(^;}MY+((rzKN`jg_ zTQpNK6uK`ggo1&h^aS8#BQF6_AwhW%zhil-V2<5FNQDppQ*a+zJps{U29I!ao&VkG zb9Re@SpSa3ACmB zPY)`HX80RL4;)THR*pI)Nufxv4F1yyXGO;UmgYzBf`Kd1@&0c>T%Jm~F=~7S0eUc! zfq=a)vIPne+G9SA6`y63|H&rI()~@TTkU_P@Z2Ry!L&e1Tqk$ZAP}EZejOcysa3sj znc3o#S*&P6pIZ3dS^^3X&qE!Ey2207$aS}|{J0nlsqQbyfk0C34`$O#d?X{#42#vV z#Z7lj4C78wAV6=&O3V0J7U%23lJo{8*srbgb~I9Ol_ZG%_2y4@#jvn=g8g!cAHiz7 z(J#`S!|A|FiY8E zsO~+DD$4BzM7h4og8L1kZ0>)BP@aYeUa{z>CMFm+2MTlkEA;%a5KCooQ)`h1pl#?Y zneZ|!_R%Dz{dpotq_6H>3sk%V@QW88dgz%6faY0ZX};+bYjA|I;~u3SLcHrBD`P`N zLQ-OcX=1wLu)i2WDFUQu@was*Fmpj)5wea)0z#UW5z(uRr7rSk28oSui--eq9xhecHy}99X_sOkkL=!4?$-uiQX_gO+4@_c*jJZ9$)2 z6Wv@HHC$4VxjyR*K9h%XltOQwni*o;5GrIFOx~Fx>U8hy_TYZ;$Q9`8J`UuWUscBI zz>(3Tqt1B(6mS%V`@c)iLEG$6S!pgQ`i#J9Oy0@W(e|;*tj@=tU)(Gs$;(%0?)`BM zjq>q-Ue6D;`xK*3$5Gv7NjNMLtoWuKqe`M0PnD`4Fv5Q?Y=a zD>Nit3Lj3+p zExOO8wY!FzU5;UbJQC6eMZMhCWE$zHbk8$}F{IM7`@np8>=meuCY~$5Q>Hb~k_J$J zn{Ubg4^S4{`-RxjbZ69{obCXvEw%+2c5`nW;91?u{5!#ghz{-^l0<6Wa^^Q@cH@=& zVRB!6i{7$!Tt1gP)(m%&aP0qQa+dhPsV&*J=?mF5@3)G-Km928kn1UXdcb=*B=Nz7 zVPL{#h^LZ0c$c`49oq77G|0!w9^D*}uam(Zf{d?s-DZG=3>9W<@>gX~IR-`J-^K}<7iq3u`W#i$f$O zx_pkmd6&Y-3JAo}G2I)WFDyRi$?M;2OQDre4NFeaq<}36TX45{_~aWnG;ZUW|I5KBkgk?cOTVW9aXPA=N6wtf&Z&IT)xH8r#+Bt| zLQgW01-N+MyEFv=A9@rc@toT8zs4u1k!-BZ4RyUN4&`V7Q}*jOnU=AL!21qhpq*Gn zHowzv+Sz|`g&4V0;k9c5!J&Te113|w-tz4BHmIPYS0v*nd*?s%TG}O7U93qOJS~-( zc1{yiyGLWj^|CsXYGQf`1NhaAl+XA&V~wP;%;tN4=$ltMQ0S|E*F+P{tBNSVGT}L_ zPmjXfweAB6o+s3J{fCZAhIg<$^M?k-uNX5~n% z*Z<5>7CCvfB1ldf041J64tINp2$Hp>UCS-kv){NykYMPr@!uOzB<@_GI)ntiw?LEK z`R;|>GBB{P3_fNz?gf0A0r!j_>QNW2^9oV5Ixzx=?! zl)QDU?4bC$Z##$h1RH(7X_ms!Ufw~mLnx;6UQM&#0?;Dj*rCNaSfDLpny^PUs_amiyW{Ksj3Cu!E%=;X zPMv|+ArTd(&^*_?i6-Uk3n9~Q&2lI&`$7y<#v=FX!@HZ`e zfy;k=tN7u$9}JoZ14hKr;}b8bIFA6p|Lt3Nul2qzzui%Kn~Lo*1*Yr?@|RQn0h+sG z_rXO}u>o2=Pts6VMpUb4;D45Rc7*)3O1z9M{3wSwf8SMm1Jgo#JM`S4%6!*hZp8Ru z5H5V&KxD;`)ZcWHuY(v z{MO^eTByD>SxWo$vcWz4%*95*CL8cZ5QNQ*O3Dw=DE}lDuf0!Hx!Jt&k{_R2Ef)~r z>gsR?v$?Z6Ope*8(1o1ZRN=t;YjgL|+e`*GW1k6=@=_X#QDynLuS}m_`Vse3;OST@6p{_>^%fe6%2i zAzjop2|KjwCdJ;@w4dy~`twK{^wi|nOzR9KUVsOv+`>ZMi@za8P27$f^|qe!N^nEB zZY`IxCnpF1_#d&>pPS{*nHkmRz9xn<`~H4)LrRMt{2R;58nYXqH1lG*pxvz6TSxW`>PoVa(s>rVe`7`q6d>n!FuT;Q3V5KDNOcOl|i)Hj}@ zT7lU+L##ik&j7TSl9kKqAFmYJhmf%6$l(tJePCn1zN^051B@N`HnH48e?Gvn64kp z$8y4!i$y$+){nB2lh(O=sc**(CMG1cJfiwnKZw-?<-B~-JpQmg^C*F=-QMu`3%--d+ggNf|QUQy-B1s036BoQz38Q!OhCL z6n=LImfN6qLUP9FK{XL6Q+PXH`})*4oNK7u z806Q&P!IL|&1;5|h;|h&Y zj&a{Lt|$O6pM_ru@A;iZpqMl4MfRlC#AprHy?0ONDamDL+Uc$20+`joYvA8In=7u@ zq=f!s2+9BbU|?MN_KBMJjtrW9~Ur$}GHV^qHa zh?c^74Gp#>YeBst`$SMAwJ-N3gp>ojIc&~5Xfg_%lmTqBq&LJd>L-b$OXarvtiySSjO7C-$H1Q@3Px;bSqXnmn3+{#Ms)8iiMu0dD0(->3|^dz@wkd zGv)6I3yP_AAE`@~adiZ}u=U0yPdm44D+X|Oq_E)yN2-g#V0|buQx|e~`Y#w~-Z7hv zl*XLArJcX6-9g=Wjv}r}`;&ax^);$2w^DU?kwwIOwoBhBrsNdq&Sd&uq21NzTb>{a0d4qt@E zD)*~GVK4VCdWZw)=+Dq!NCcRI69D2Bl9_beyWMSK_!mbZQ<&vALgxQU3&to-)RGI) zlE5d(*SAuxQC*mtVgBNFWRcWfC4R2JY#PVome9LUQddbKwAURdFNg}HHSH$c2A+N# z?yyX)HVb3BHF4Qc9n1pqO52KIPnf{e z*5ola7p@9V0$F{t>4-6Oa{@P*p#Hrv&fWEyi^bIFL7PzY@|gbcMP{U?pcjCEQf!U> z1Qc#yLE01@d@CPWagsE2RsD5vm_9-aGnRd!yG4-QC2g|dG7YZ@`v5hGx4O}*cIP%` z?qGN=yIYS^NyvOEs+e@8+GX&&6K4K|IE;9GnBtGU9FaQxsUJ~}W~XNP;YsvGO-utu z?VpSM{M50r37;p+e*L#Zd^hk1eUza-g%#w-6{=eeJK63Y>6%k4w8Cd#Ba#ksg$pHncR!wQ+7lSFj0=Ao&{*s_<6oWYW9~7hFdCLv&O)X0_{@zQmyGFg?-I{*E>4Gdb=~nx< zI=|>`MQ)g&TJUxd(`g-x^0XKizHKF!XEnvu-$4jp_OK>ih9WzqMg#08*tuV*g)v0I zp94KP6+F$oDSu$-`*Jz7@ER$2*nN@ztkEMemSZp}`@z-UwN^*%$LAzHH3w!?kKA|0l z*F!YN(XTZ1d^k{KD=gh<2M09_|DS_JNWmDDRIEHyrGE6O!lD}&sWDu`UNp4dtqu@k zp1g5a@=_tPQ(ZTiRem8|_fPFdOnp)L+mN$-I^HBDmuq94>3VeU2(>g`A_&dfqiy-U zJTU)8TS)}fH7}|sH}Dnzg$dAXvfWcnN%p*g?FE9kY=UW_)`KFqP-c_AbLeM8Nh$rW zPVsfi!3twE7u1Zjik5Tr{k@lkx%bL*0Rb=C$Af}#b_Ra3+jbX}G+f*Nv&$4#DHTTW z$-ki6m<1!?3S#S-8!PU`&GwwvJ!)CBa_19pwHL4ZtrV7{sZ#xgIW&>&il#f>=jOAo zZK)McO0D?oCGWs|(f>ZnalNROxkvpeTNf{2)f+aP*hV(({zn}%WopS%!q2`!R-|fm zRKM$` z#qT%#I#)h&;D3Wri}6|#uv9|Izt1UXFhd7?BM5WputS5<+#Ov*PYeuny8@0n+rv*Q zp7$oI6Yn!8+|a)w)g31Hs$C7dx|a^A9KwIynT@L`ziuf_n+O?EfR*17h64iKZ0gFB zsCHj2K;E^l$E$6QRCMK-k-}3;S5F}E1h_Pkd9@AjbImncSh2$sivXd@Dwx~Uu!S=+5rUVF)6*7y9T~ z0Yyd~%Z6R9t?WP!y>MxzbqHa(=k+XWVcxM^xfr_F_}W&6(?hE6vab(JH{D&#KSCq8 zR(*!g>V;B4$iI6g-rIA`-WyWgdA(7k6A$udGWI}+C?13+6n>RcVSsJE(=)1>o)*&% z$h)(2cw^YYsh|1Ky_Ys?Hb_lYx}VP*hWk7r7n?rV5Ldqcl-05p*QN2E1^9quh90L( zUVhr*kYyP!N2v(sWw(18$HKj|{RK~+ewETPR^c7j%jKkhL5_aeFH+ThpE5goj%1>9 z`V$Wn3KG_*_;Bb1fOldc%0-bAC%L57hN?1xK9brvulK$t$z>}j#JgO4xUVVddLnJb z`VUw{wS%DC&RCr*BDyMseZvPbsPz;;RoqBcOaE{2iMze7iwD)=3?QYOd5z~a8aoVr z9NkT}FEM!?K`s8wtF6OQ;j&K#NHnt-9@ULn`PWBcFOWhC_-!{N&B~SWGtk}@iiOTh zg(Tm$jvU5OR^%R^%ED!=w!Tn;bXFBW(sSQsJuB#}~d)o0w@+i1RX4L7kP`|8R< zwtP@!OP=5Lp%6RM;SbZ#!5TTh;tQ|wh#m*YxGHC@Sd@<>1L`n&-7WgN(r?Kz3K!*$ zailme_BkuiU*sv|mJa)O25MyV*?jpfgclvBKieIPBpJLg$?Pb)LF1H378ygZO+GVa zldGwQ0&HONE-&vptYXqdlZs1A^>bTSRY;_dioZ1%slP<{49_lR&vr3^Gp=&<4u#MK z7&2lcj7E9(r@C4@6B;Ys;4ECGYUL0Xg-2VQHHtW0TgN7*mO@8ODS@edX!k&P79u;E ze{Q+>CsITHS{!}9)Y&f2vTFn|$~PR?n_XaNLg`m?IjN>15J)B_jY^FOwk_pVv0JTs z!4SoMa^7_G07g7ofH8bM9f4+7YPio=p)80wRRnKh*q`{4Z>vi;`*OKWXGdpH@ofwm zeqSHR6>2f6E0?|P>ztI%ymmNrzo0Uki4FTR)9nTi$P~Z;v1((3Q`KZ9lnB}%l=2V( zkV3iK3BL?X!!eIC_Gzy0dtBU^y8bJspfd zXxc{&7ex(4Yx5f%;z>d!rIws}AMdD2Ho$&BrxICN67!JcnV-V7X_n|%X`XlQlr=*I*U65%^o4V%+I3AA2V(3_A4YyhG z)tY&eJLq0*A5kT9#WWn2uXiO>{^%Mx$dsyPv)wb{8niPe601Q7bkHmr++|#jOW%l; zjY&>>%{MTU=SMH}$M>zx(E-7HC$gYh z!rUq(e&&D~Gz)*!)t{PR|F1}>ubyD~fp8=d)LQI?=byQytqAts@3q$?)}h>N{K>CI z8kW=fuN{7ii`^k02k&88ADkIR_*`9U`w6cKO|Q$MOyf`)k~7nEY;H3_9tg z>)QxrO!}0!u786mu)Uhf#Nbl~QQb#OXGS$6I{4Y4O*%3DSZO0-6Napv(-|n^m2 zi1T2ogz%}IIFR;+_uZ#S7awdvq^2u>-UN6t;&TG|O5+;VfWjhjU z4q2xd6^&ocdXi*mEGk1=3zfcf=&~}h)4YEfpS1Eunf?A{Zj^pze6-jEihfJD@bBTO zf+|^tW!-t%BDKVTUMKU|EZ@?~0W<$jo83D-Lv1XA&pZ3Om5YN~JvRfc3#^+c%kBd= zpOl6s4r-W(6yvRkub)+BbM@m|N_d=hZ?Z!i*Gq3vZ`8Ouv~GUU+|Ae`cx~r->P*Yk za!P01&8ACR9`XwUSQ}}mvmK=U2Of5YDF>$q0dnvub*ie;v;td0BSlr1rw~JbSb+1Q z@;gkYPf?6qMsqm&R)JbCIT~kr1thmt4MwQUAnJj1OnWu|37TUij*~_4IW_ccO+iy+#%SZnZ0c*S1N{%*e_i9UyN|(yMvjn%~&;>R3IU zli}Mfx<}+dVyTel-+w5wW%jnI)B0a}il)Mg;edU=61ij#%FaIaaFoz@S* zz>pRAH;}&vrWRM_mWU#M9@>8PQgcbs1EyGn_gD1fbxaQ)Ijt%d=2lu3~*@%DZJ)=z>6i@?2gCp-A?jK)aed}S^Af+c6HEw zM9bN}_qXY!WW#S70D_K=s04qB8Iwu%e>pC`w{q}Uk_Xz>-~Y1T zYCrAI+jNz_8KH8HQgGf_TkC73n?3e-Jm6BUJjvju8?UN-+2%npvrcSY+x=NC&)!rt z3rKNuxR#?zL|t{2@Jpfk!(TAS@s-itX2nW;2iMh8ic!JB?fURT$asO4k|*P#=X}}7 z2#@W@z_%!SE9>I;morw*Sf7pii7eGF^~S69;W`v%&2x2oG$N}jD?gfarguD-h@D}wyKbK9xD1s7zW)04%bU*W`W$b1I|(&SBGXbnWN!>B zygEJA{wuCRAy zArMh(zBu3(0bUL3C&lOj!dIxEN}0hcH5#2>%WR(%yJL5yCr!R_ZLiAG$uOu<3ucVT zmkHL3nbGaT9Hq+LM>e$Tg965*uevEJLBm)LE{o&G2}wz0Itvx;b=o$wtEWdNcj$95 z#({`H``+;!I0=(RfLEl6>%}VT!`ZNiC8U;N5~yAs+F!A$x9yNEb(=&YuV`F6J8JgQ zOUW;Es1yahk_1_v!2k{3d&SYkJfBLd<+nzB655?eb*LE?4l z#V(dNF6IDTMP=C00I;#@AoXIBe5^4LtA6_jR^V#J-C+CbEi1f^QwzU>ky`-E>rcby zD;eF6ICZB;KRG~6++|H874|UZaSi90we9xeDNbvs+|^B@So@BQVQ<-CMNzRVOqlU| zK(F(S)U`e11}Y5AkbX?05hIY&s~(|eR=B6s_?MCC`h2pY9@n@mZ?UL11O}Dm;{LdH z-u>>zarY*Kautat2lexE!bVD<`PK*a9i29rhn2c`J|=Jp6mv}B&6m(NzlZIeKdVKE zTip!3L{cgnIJN5g2@_gyQfP(m65(lBn^@9URk}HJ(*NnO4q^q@lv-Tvx*5OnIU6=; zdg7}Rfzu8#m0L1L(Ufup4WidO$V=0~((3>Mn_Aphp~&uIv4l+3Bd3=lXl@o-{}5B0 znFW#7*B0Ue{Vs*VWXk69Si@_Ig zw3l+T|C--f&0|tPZ^Elc=!78%H!JN-EjkM)X%YHA% z!un~d{c54ZuI0Yc_W&d`IgH>3W&6E5X*u_f;892~nm#!RgCsARKwvR{yj5c)lI|Oc znBSBB|ALWGuzv~=wD8*-&F$UHGQnEy_e(Dj+f^tAFS47QA zL1FYe{#WnI09+BhQCb8nG}h?PaKG8{2;dQKNXZC(P>cEf6iT-%P-Jd*PeUQ40(6 zJHK~G$=dW4_lXBZfEuRZOIEu&=3N^X?QodDYsVVgkAX@NKS|J=XXmX-ln~a&`sBX{ z3_zR!zsSy*%O*8vYmcnd+K0sBnDR{+^8&n@X9kD6Mq+m+*2Sw1z+i<074pNr?4Rs}iFviL**K$W_AEcM zbWrNW&xm4Iy+i5Qxcpa^YctQPo%0_zY*wfY?^(rcxo=$VMz)w1X6Lz( z1zeU&eTc5aHQq93c+A|_N5mzJoLSkkS9oN>4sEY`Ba-Kp>!=l4>-oB+R?bwjra6fA zzzWz`nq3==`-^*ZG*Da!Uib(9y_fC^VPs{B1?{TxJ|iB!@(<~Z5kq|jyd;gDuRHnU z0gh1&Y+Z+SgPtz8Aw@`&FEad^8mTOw@yF{(xxG~Ec(Iw=!7IW8w6*rhqoXjHEqMJ5B9qTBbvX6Qyy{9 zHUH|nvfISAv%~-EOTN=)lzJmx4c28@$fP10iZ~A6bU8!~;U$^42EnbFA@p5;W^{+9 z#wXs@@4+ME3Hj~B)BFqj zb;SAZ_N=q$I?WffnMe!j=AoMuC)5ALjZ8P@Rh^!L2H+VKNWLFN-)GXC-a@_p{$})C z)gaz-&A7y($CQclLrxu+3A^1G0KASUeCE}1ov<>%RyiNzdTy~zm=J5OuAuV{I6{~Z z{+0i*wpUr~*~+(2;o*E$x0WK_PN+LL@uE177-A=yssePH=7(Gla34BwSR7c8Jp8OZ zj;*yCc>@dceSQD`*sL{IcM6|B*0XU>%*Bs?zWHrmyiaF&-J9bNS_RZpzZB+U> z(P?&mO6jg|JH`8bZX_oDJYAZq#xP;}+KscUUdg47&S@0fw`Ahure{l5 zMEy@&wE5|c9S^2*9@^oM`Ty*%X$!t}7d5j_HV)fSTgx`(;XY^eUwl{nGeT8FLiQyv z%~AAT81w7Pg@^N;h2{6xy-d4Z#LG||waMb*@`}Ex|C45Z+_&1`Sd zsusOyc<`X+>CKD|O>3@Z9-k&PH#_%b(1YlDlftCV!raoE?b%ts_S~($Zg03x$tn4! zd99a{i-1zfhxT_qQ{1FQMYI-gs#iRDM`CRlvqH&B-8Nu-fAQ>tZ@@vB{~ucNYxhss z%2r(V@Z7cXw~n_TpY|@7$yl*rnPg3S>WL`lX{Dvh_!y2TAN{tyY-*ai^y1bDEq01m zqe>U#X0PILFcNud9}qq(+3d&(p_U6Td|zHW`)!-8<-z8SOGJ0`nS|J6cL@rm#BN%; zYGK*eEic<=U)H(<=@KpN=L6<2#cLDbekIP-se1TWu&uXY;vorrQ0-x<1=-lrEirzlU{ziEx{`AWh(EtTZcjx z^C%`wnkIJLbdSx$9W9dDch1=zPg|#1V|I2)zZg?vd-Sf?bGOHtxu1{dY7ZBQHugD| zp~xV()X;8H-r7yu_dcxmsd~L`*P<^=G8R_cIlTCo=vDT6cc+>^RG1{Nu(W&1yJgGb zL?`%6yE<7c+-0WM>|1ZP?ePij31iqdAv*R`&Z94HTR9HRY^rZN_eWs61gJG{{!smX z{r}n}za3mks{efd|L6Vwf2Zx;cZmb#TwLtS691*1iv-TlgMkPmsHO*lKo(H<5CR0j z3-iIkt_+~*d@vXSitk#gyj&L;+FPOzS|r&~cf~CE9yTFU!Ce>B_-=lwb`b^-G10eYI`}Zs9^V8p* zw%=QSZ%t&7#n<{zf4|@VzxFrq3as+QtFG_8-^Wv5n>tP9?tlMTI|GZ;JQM$#serUD ziZ))g>V99$-&fM_SFAdA&)nbGT7CovhEkSn=y_`Tf|RuTR9guZ!FJZSJbL z{*r!>vpU|TOe(oP=VtkRY18z3djrew*Ot!+RsqH6-HGes_pe*^>V*bB3-Hnmp!2L} zntD$H2HEbsq-G9ang7DC?61{w4{(zBd-iUKS`rUZ^xJ^1&H&k=1lqC!0bLA`)d)aQ q4JPo$2$1+9js(!QBru?g;(z?jWqJ}dOh)?|fWXt$&t;ucLK6U>p4SZk literal 0 HcmV?d00001 diff --git a/.abi-check/6.25.3/postgres.symbols.ignore b/.abi-check/6.25.3/postgres.symbols.ignore new file mode 100644 index 000000000000..f824cd0a7de6 --- /dev/null +++ b/.abi-check/6.25.3/postgres.symbols.ignore @@ -0,0 +1 @@ +DummySymbol diff --git a/.abi-check/6.25.3/postgres.types.ignore b/.abi-check/6.25.3/postgres.types.ignore new file mode 100644 index 000000000000..7dd4f899ba78 --- /dev/null +++ b/.abi-check/6.25.3/postgres.types.ignore @@ -0,0 +1 @@ +DummyType diff --git a/.abi-check/README.md b/.abi-check/README.md new file mode 100644 index 000000000000..eb82783c7e1d --- /dev/null +++ b/.abi-check/README.md @@ -0,0 +1,74 @@ +# Check the compatibility of Greenplum ABI. + +## Introduction + +We use the [`abi-dumper`](https://github.com/lvc/abi-dumper) and [`abi-compliance-checker`](https://github.com/lvc/abi-compliance-checker/) to check the Greenplum's ABI. We also use the [GitHub action](../.github/workflows/greenplum-abi-tests.yml) to automate this job. + +## Requirements + +`abi-dumper` requires the binary being compiled with `-Og -g3`, hence the `CFLAGS` for configuration looks like: + +```bash +## GCC's maybe-uninitialized checker may produce false positives with different +## levels of optimizations. To prevent building failures, we append the '-Wno-maybe-uninitialized' +## to the $CFLAGS as well. +CFLAGS='-Og -g3 -Wno-maybe-uninitialized' ./configure --with-xxx --with-yyy --with-zzz +``` + +## Check the ABI's compatibility + +Several binaries are shipped in Greenplum, e.g., `$GPHOME/bin/postgres`, `$GPHOME/lib/libpq.so`, etc. Since the `postgres` binary are referenced by many extensions, the ABI compatibility of it is the most important. The following steps illustrate how to check the ABI compatibility of the `postgres` binary. + +1. Dump the ABI information of one `postgres` binary. + ``` + abi-dumper $GPHOME/bin/postgres -lver -o + ``` + - ``: The version of the binary. You can give it some reasonable name, e.g., `6.25.3` to indicate the binary is built from '6.25.3' tag. + - ``: The file path for dumping the ABI information, e.g., `greenplum-6.25.3.dump` + +2. Dump the ABI information of another `postgres` binary (same as the step 1). + +3. Compare the ABI between these two binaries with `abi-compliance-checker`. + ``` + abi-compliance-checker \ + -lib \ + -old \ + -new + ``` + - ``: The name of the library, e.g., `postgres`. + +4. By default, the `abi-compliance-checker` will produce an HTML web page and there will be detailed information about ABI changes. + +## Ignore the "Safe ABI breaking change" + +There might be "safe ABI breaking changes", e.g., some symbol being removed and not referenced by any extensions or programs. Here are steps on how to suppress such errors. + +1. Add ignored symbols to `gpdb_src/.abi-check//postgres.symbols.ignore` (one symbol per line). + - ``: The baseline version of Greenplum. If we want to ensure the ABI isn't broken between the `6.25.3` release and the latest `6X_STABLE`. The baseline version of Greenplum is `6.25.3`. See: [./6.25.3/postgres.symbols.ignore](./6.25.3/postgres.symbols.ignore) + +2. Add ignored types to `gpdb_src/.abi-check//postgres.types.ignore` (one type per line). + - ``: The baseline version of Greenplum. If we want to ensure the ABI isn't broken between the `6.25.3` release and the latest `6X_STABLE`. The baseline version of Greenplum is `6.25.3`. See: [./6.25.3/postgres.types.ignore](./6.25.3/postgres.types.ignore) + +3. Pass these two files to `abi-compliance-checker` and it will produce a report in HTML format. + ``` + abi-compliance-checker -skip-symbols gpdb_src/.abi-check//postgres.symbols.ignore \ + -skip-types gpdb_src/.abi-check//postgres.types.ignore \ + -lib postgres \ + -old greenplum-.dump + -new greenplum-new.dump + ``` + It will produce a ABI report in `./compat_reports/postgres/X_to_Y/compat_report.html`. + +## View the ABI compatibility report + +### View the report locally + +You can either open the HTML report in your browser or dump it to stdout using `lynx -dump compat_reports/postgres/X_to_Y/compat_report.html`. + +## View the report from GitHub Action + +1. Navigate to the "Summary" page of the test. +2. Click the report and download it. +3. View the report as above. + +![./.images/download-report-from-gh-action.png](./.images/download-report-from-gh-action.png) diff --git a/.github/workflows/greenplum-abi-tests.yml b/.github/workflows/greenplum-abi-tests.yml new file mode 100644 index 000000000000..29e3adc2f787 --- /dev/null +++ b/.github/workflows/greenplum-abi-tests.yml @@ -0,0 +1,168 @@ +name: Greenplum ABI Tests + +on: + workflow_dispatch: + push: + branches: + - 6X_STABLE + paths: + - 'concourse/scripts/**' + - 'src/**' + - '.github/workflows/**' + - '.github/scripts/**' + - '.abi-check/**' + +jobs: + abi-dump-setup: + runs-on: ubuntu-latest + outputs: + BASELINE_REF: ${{ steps.vars.outputs.BASELINE_REF }} + BASELINE_VERSION: ${{ steps.vars.outputs.BASELINE_VERSION }} + ABI_LIBS: ${{ steps.vars.outputs.ABI_LIBS }} + ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }} + steps: + - name: Fetch source + uses: actions/checkout@v3 + + - name: Get Greenplum version variables + id: vars + run: | + remote_repo='https://github.com/greenplum-db/gpdb.git' + git ls-remote --tags --refs --sort='v:refname' $remote_repo '6.*' | tail -n 1 > baseline_version_ref + baseline_ref=$(cat baseline_version_ref | awk '{print $1}') + baseline_version=$(cat baseline_version_ref | awk '{print $2}') + echo "BASELINE_REF=${baseline_ref}" | tee -a $GITHUB_OUTPUT + echo "BASELINE_VERSION=${baseline_version#'refs/tags/'}" | tee -a $GITHUB_OUTPUT + echo "ABI_LIBS=postgres" | tee -a $GITHUB_OUTPUT + echo "ABI_HEADERS=." | tee -a $GITHUB_OUTPUT + + - name: Upload symbol/type checking exception list + uses: actions/upload-artifact@v3 + with: + name: exception_lists + path: '.abi-check/${{ steps.vars.outputs.BASELINE_VERSION }}/' + + abi-dump: + needs: abi-dump-setup + runs-on: ubuntu-latest + container: gcr.io/data-gpdb-public-images/gpdb6-rocky8-build + strategy: + matrix: + name: + - build-baseline + - build-latest + include: + - name: build-baseline + repo: greenplum-db/gpdb + ref: ${{ needs.abi-dump-setup.outputs.BASELINE_VERSION }} + - name: build-latest + repo: ${{ github.repository }} + ref: ${{ github.sha }} + + steps: + ## FIXME: abi-dumper requires 'Universal Ctags' but the package manager only provides + ## 'Exuberant Ctags'. + - name: Install universal-ctags. + run: | + wget 'https://github.com/universal-ctags/ctags-nightly-build/releases/download/2023.07.05%2Bafdae39c0c2e508d113cbc570f4635b96159840c/uctags-2023.07.05-linux-x86_64.tar.xz' + tar -xf uctags-2023.07.05-linux-x86_64.tar.xz + cp uctags-2023.07.05-linux-x86_64/bin/* /usr/bin/ + which ctags + + - name: Download Greenplum source code + uses: actions/checkout@v3 + with: + repository: ${{ matrix.repo }} + ref: ${{ matrix.ref }} + submodules: recursive + fetch-depth: 0 # Specify '0' to fetch all history for all branches and tags. + path: gpdb_src + + - name: Install abi-dumper + run: | + yum install -y epel-release + yum install -y abi-dumper + + - name: Build Greenplum + run: | + ## TODO: Since abi-dumper requires debug info and it's hard to inject CFLAGS via the script for + ## releasing Greenplum, we have to manually configure it here. Probably we can improve it in future. + export PATH=/opt/python-3.9.13/bin:/opt/python-2.7.18/bin:$PATH + pushd gpdb_src + CC='gcc -m64' \ + CFLAGS='-Og -g3 -Wno-maybe-uninitialized' LDFLAGS='-Wl,--enable-new-dtags -Wl,--export-dynamic' \ + ./configure --with-quicklz --disable-gpperfmon --with-gssapi --enable-mapreduce --enable-orafce --enable-ic-proxy \ + --enable-orca --with-libxml --with-pythonsrc-ext --with-uuid=e2fs --with-pgport=5432 --enable-tap-tests \ + --enable-debug-extensions --with-perl --with-python --with-openssl --with-pam --with-ldap --with-includes="" \ + --with-libraries="" --disable-rpath \ + --prefix=/usr/local/greenplum-db-devel \ + --mandir=/usr/local/greenplum-db-devel/man + make -j`nproc` && make install + + - name: Dump ABI + run: | + abi-dumper -lver ${{ matrix.ref }} -skip-cxx -public-headers /usr/local/greenplum-db-devel/include/${{ needs.abi-dump-setup.outputs.ABI_HEADERS }} -o postgres-${{ matrix.ref }}.abi /usr/local/greenplum-db-devel/bin/postgres + + - name: Upload ABI files + uses: actions/upload-artifact@v3 + with: + name: ${{ matrix.name }} + path: '*${{ matrix.ref }}.abi' + + abi-compare: + needs: + - abi-dump-setup + - abi-dump + runs-on: ubuntu-latest + container: gcr.io/data-gpdb-public-images/gpdb6-rocky8-build + steps: + - name: Download baseline + uses: actions/download-artifact@v3 + with: + name: build-baseline + path: build-baseline/ + - name: Download latest + uses: actions/download-artifact@v3 + with: + name: build-latest + path: build-latest/ + + - name: Download exception lists + uses: actions/download-artifact@v3 + with: + name: exception_lists + path: exception_lists/ + + - name: Install abi-compliance-checker and report viewer (lynx) + run: | + yum install -y epel-release + yum install -y abi-compliance-checker + yum install -y --enablerepo=powertools lynx + + - name: Compare ABI + run: | + SKIP_POSTGRES_SYMBOLS_LIST="exception_lists/postgres.symbols.ignore" + SKIP_POSTGRES_SYMBOLS_OPTION="" + if [[ -f "$SKIP_POSTGRES_SYMBOLS_LIST" ]]; then + SKIP_POSTGRES_SYMBOLS_OPTION="-skip-symbols ${SKIP_POSTGRES_SYMBOLS_LIST}" + fi + SKIP_POSTGRES_TYPES_LIST="exception_lists/postgres.types.ignore" + SKIP_POSTGRES_TYPES_OPTION="" + if [[ -f "$SKIP_POSTGRES_TYPES_LIST" ]]; then + SKIP_POSTGRES_TYPES_OPTION="-skip-types ${SKIP_POSTGRES_TYPES_LIST}" + fi + abi-compliance-checker ${SKIP_POSTGRES_SYMBOLS_OPTION} \ + ${SKIP_POSTGRES_TYPES_OPTION} \ + -lib postgres \ + -old build-baseline/postgres*.abi \ + -new build-latest/postgres*.abi + + ## Dump the reports to stdout. + lynx -dump $(find compat_reports/ | grep html) + + - name: Upload ABI Comparison + if: always() + uses: actions/upload-artifact@v3 + with: + name: compat-report-${{ github.sha }} + path: compat_reports/ From 9d576dca97436813c0407de0886903fd48378fc0 Mon Sep 17 00:00:00 2001 From: Marbin Tan Date: Tue, 24 Oct 2023 20:18:43 -0700 Subject: [PATCH 052/106] ci: Request higher memory for pg_upgrade job Simply bump the instance type to n1-highmem-4, which offers 26G of memory. This should be enough to prevent the frequent OOMs we have been seeing in CI for the pg_upgrade job: gawk: /home/gpadmin/dumpsort.gawk:29: (FILENAME=- FNR=56391536) fatal: node.c:1030:more_blocks: freep: can't allocate 10400 bytes of memory (Cannot allocate memory) This commit backports: ed44a2875a7d35df9a56f94ecdb0fd923ac7309c Resolved conflict: - resolved conflict where git thinks that pg_upgrade is a new job --- concourse/pipelines/gpdb_6X_STABLE-generated.yml | 4 ++-- concourse/pipelines/templates/gpdb-tpl.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/concourse/pipelines/gpdb_6X_STABLE-generated.yml b/concourse/pipelines/gpdb_6X_STABLE-generated.yml index 7244feb51fa5..41007e8ba926 100644 --- a/concourse/pipelines/gpdb_6X_STABLE-generated.yml +++ b/concourse/pipelines/gpdb_6X_STABLE-generated.yml @@ -12,7 +12,7 @@ ## file (example: templates/gpdb-tpl.yml) and regenerate the pipeline ## using appropriate tool (example: gen_pipeline.py -t prod). ## ---------------------------------------------------------------------- -## Generated by gen_pipeline.py at: 2023-09-12 12:58:30.539301 +## Generated by gen_pipeline.py at: 2023-10-24 20:14:15.500156 ## Template file: gpdb-tpl.yml ## OS Types: ## Test Sections: ['icw', 'cli', 'aa', 'release'] @@ -1348,7 +1348,7 @@ jobs: <<: *ccp_default_params vars: <<: *ccp_default_vars - instance_type: n1-standard-4 + instance_type: n1-highmem-4 number_of_nodes: 2 - task: gen_cluster file: ccp_src/ci/tasks/gen_cluster.yml diff --git a/concourse/pipelines/templates/gpdb-tpl.yml b/concourse/pipelines/templates/gpdb-tpl.yml index 0680d627f14b..5fd76592552e 100644 --- a/concourse/pipelines/templates/gpdb-tpl.yml +++ b/concourse/pipelines/templates/gpdb-tpl.yml @@ -1718,7 +1718,7 @@ jobs: <<: *ccp_default_params vars: <<: *ccp_default_vars - instance_type: n1-standard-4 + instance_type: n1-highmem-4 number_of_nodes: 2 - task: gen_cluster file: ccp_src/ci/tasks/gen_cluster.yml From 9cee0e0bba2ca16e85a09ff448b2415a8ef74712 Mon Sep 17 00:00:00 2001 From: Nikhil Kak Date: Wed, 13 Sep 2023 17:36:35 -0700 Subject: [PATCH 053/106] Fix flaky basebackup tap test that relied on pg_current_xlog_location Backported from GPDB main: https://github.com/greenplum-db/gpdb/commit/5f7452103edd20ed93f948416f0d34d544f374f1 and https://github.com/greenplum-db/gpdb/commit/bd1a975dcd962c42efb15e9f7b444f787fb16587 Note that we haven't observed this test being flaky on gpdb6 but it was flaky on gpdb7. So we fixed it for gpdb7 and are backporting this since this is a better way to compare wal files. More details on the flaky test can be found on the 7X PR https://github.com/greenplum-db/gpdb/pull/16445 **CONTEXT: A previous commit https://github.com/greenplum-db/gpdb/commit/a62bba858bad3395d1ecea14fe1ac3828dbb66ff had added a basebackup tap test to ensure that the primary wal file containing the wal switch record (created by pg_basebackup) exactly matched the wal file on the standby. **PROBLEM: In order to get the name of the WAL file to compare, we used to rely on `pg_current_xlog_location()` to return the "last WAL file" copied over by pg_basebackup to the standby server. This wasn't always reliable because because it's possible for newer WAL files to get created after pg_basebackup is run. Because of this, the pg_basebackup test could be flaky and get stuck **FIX: Instead of relying on `pg_current_xlog_location()`, we can rely on the backup history file created by pg_basebackup which contains information about the "STOP WAL LOCATION". This will always give us accurate information about the last wal file that was shipped by pg_basebackup to the standby server. In order for pg_basebackup to retain this backup history file, we need to enable archiving because otherwise, the file gets deleted. Sample backup history file cat 000000010000000000000014.00000028.backup ``` START WAL LOCATION: 0/50000028 (file 000000010000000000000014) STOP WAL LOCATION: 0/50000110 (file 000000010000000000000014) CHECKPOINT LOCATION: 0/50000060 BACKUP METHOD: streamed BACKUP FROM: master START TIME: 2023-09-13 12:36:01 PDT LABEL: pg_basebackup base backup STOP TIME: 2023-09-13 12:36:02 PDT ``` --- src/bin/pg_basebackup/t/010_pg_basebackup.pl | 59 ++++++++++++++------ 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl index 679c1bcfc5b5..d277caaf2671 100644 --- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl +++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl @@ -4,6 +4,7 @@ use TestLib; use File::Compare; use File::Path qw(rmtree); +use PostgresNode; use Test::More tests => 48 + 4; program_help_ok('pg_basebackup'); @@ -74,9 +75,19 @@ ok(-f "$tempdir/tarbackup/base.tar", 'backup tar was created'); ########################## Test that the headers are zeroed out in both the primary and mirror WAL files -my $compare_tempdir = "$tempdir/checksum_test"; - -# Ensure that when pg_basebackup is run that the last WAL segment file +my $node_wal_compare_primary = get_new_node('wal_compare_primary'); +# We need to enable archiving for this test because we depend on the backup history +# file created by pg_basebackup to retrieve the "STOP WAL LOCATION". This file only +# gets persisted if archiving is turned on. +$node_wal_compare_primary->init( + has_archiving => 1, + allows_streaming => 1); +$node_wal_compare_primary->start; + +my $node_wal_compare_primary_datadir = $node_wal_compare_primary->data_dir; +my $node_wal_compare_standby_datadir = "$tempdir/wal_compare_standby"; + +# Ensure that when pg_basebackup is run, the last WAL segment file # containing the XLOG_BACKUP_END and XLOG_SWITCH records match on both # the primary and mirror segment. We want to ensure that all pages after # the XLOG_SWITCH record are all zeroed out. Previously, the primary @@ -86,31 +97,41 @@ # and would lead to checksum mismatches for external tools that checked # for that. -#Insert data and then run pg_basebackup -psql 'postgres', 'CREATE TABLE zero_header_test as SELECT generate_series(1,1000);'; -command_ok([ 'pg_basebackup', '-D', $compare_tempdir, '--target-gp-dbid', '123' , '-X', 'stream'], +# Insert data and then run pg_basebackup +$node_wal_compare_primary->psql('postgres', 'CREATE TABLE zero_header_test as SELECT generate_series(1,1000);'); +$node_wal_compare_primary->command_ok([ 'pg_basebackup', '-D', $node_wal_compare_standby_datadir, '--target-gp-dbid', '123' , '-X', 'stream'], 'pg_basebackup wal file comparison test'); -ok( -f "$compare_tempdir/PG_VERSION", 'pg_basebackup ran successfully'); - -my $current_wal_file = psql 'postgres', "SELECT pg_xlogfile_name(pg_current_xlog_location());"; -my $primary_wal_file_path = "$tempdir/pgdata/pg_xlog/$current_wal_file"; -my $mirror_wal_file_path = "$compare_tempdir/pg_xlog/$current_wal_file"; - -## Test that primary and mirror WAL file is the same +ok( -f "$node_wal_compare_standby_datadir/PG_VERSION", 'pg_basebackup ran successfully'); + +# We can't rely on `pg_current_xlog_location()` to get the last WAL filename that was +# copied over to the standby. This is because it's possible for newer WAL files +# to get created after pg_basebackup is run. +# So instead, we rely on the backup history file created by pg_basebackup to get +# this information. We can safely assume that there's only one backup history +# file in the primary's xlog dir +my $backup_history_file = "$node_wal_compare_primary_datadir/pg_xlog/*.backup"; +my $stop_wal_file_cmd = 'sed -n "s/STOP WAL LOCATION.*(file //p" ' . $backup_history_file . ' | sed "s/)//g"'; +my $stop_wal_file = `$stop_wal_file_cmd`; +chomp($stop_wal_file); +my $primary_wal_file_path = "$node_wal_compare_primary_datadir/pg_xlog/$stop_wal_file"; +my $mirror_wal_file_path = "$node_wal_compare_standby_datadir/pg_xlog/$stop_wal_file"; + +# Test that primary and mirror WAL file is the same ok(compare($primary_wal_file_path, $mirror_wal_file_path) eq 0, "wal file comparison"); -## Test that all the bytes after the last written record in the WAL file are zeroed out -my $total_bytes_cmd = 'pg_controldata ' . $compare_tempdir . ' | grep "Bytes per WAL segment:" | awk \'{print $5}\''; +# Test that all the bytes after the last written record in the WAL file are zeroed out +my $total_bytes_cmd = 'pg_controldata ' . $node_wal_compare_standby_datadir . ' | grep "Bytes per WAL segment:" | awk \'{print $5}\''; my $total_allocated_bytes = `$total_bytes_cmd`; my $current_lsn_cmd = 'pg_xlogdump -f ' . $primary_wal_file_path . ' | grep "xlog switch" | awk \'{print $10}\' | sed "s/,//"'; my $current_lsn = `$current_lsn_cmd`; chomp($current_lsn); -my $current_byte_offset = psql 'postgres', "SELECT file_offset FROM pg_xlogfile_name_offset('$current_lsn');"; -#Get offset of last written record +my $current_byte_offset = $node_wal_compare_primary->safe_psql('postgres', "SELECT file_offset FROM pg_xlogfile_name_offset('$current_lsn');"); + +# Get offset of last written record open my $fh, '<:raw', $primary_wal_file_path; -#Since pg_xlogfile_name_offset does not account for the xlog switch record, we need to add it ourselves +# Since pg_xlogfile_name_offset does not account for the xlog switch record, we need to add it ourselves my $xlog_switch_record_len = 32; seek $fh, $current_byte_offset + $xlog_switch_record_len, 0; my $bytes_read = ""; @@ -119,6 +140,8 @@ close $fh; ok($bytes_read =~ /\A\x00*+\z/, 'make sure wal segment is zeroed'); +############################## End header test ##################################### + # The following tests test symlinks. Windows doesn't have symlinks, so # skip on Windows. SKIP: { From ea5f4a16f02e35a4464536b9b5d398d5e2d4a845 Mon Sep 17 00:00:00 2001 From: Nikhil Kak Date: Thu, 5 Oct 2023 14:17:51 -0700 Subject: [PATCH 054/106] Remove pg_xlogdump -f flag from pg_basebackup test For one of the pg_basebackup tap test scenarios, we would run pg_xlogdump with the follow(-f) flag. In some scenarios as mentioned by the previous commit, it's possible that the latest WAL file may not contain the SWITCH record and hence might be ready to accept new WAL records. With the follow flag, we would wait forever for these new WAL records until the SWITCH wal record showed up which wouldn't happen so the test would get stuck indefinitely. We don't really need this flag since the previous pg_basebackup call would guarantee a SWITCH record in the WAL file. So it's best to remove this flag. --- src/bin/pg_basebackup/t/010_pg_basebackup.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl index d277caaf2671..091f28c68c6a 100644 --- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl +++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl @@ -123,7 +123,7 @@ my $total_bytes_cmd = 'pg_controldata ' . $node_wal_compare_standby_datadir . ' | grep "Bytes per WAL segment:" | awk \'{print $5}\''; my $total_allocated_bytes = `$total_bytes_cmd`; -my $current_lsn_cmd = 'pg_xlogdump -f ' . $primary_wal_file_path . ' | grep "xlog switch" | awk \'{print $10}\' | sed "s/,//"'; +my $current_lsn_cmd = 'pg_xlogdump ' . $primary_wal_file_path . ' | grep "xlog switch" | awk \'{print $10}\' | sed "s/,//"'; my $current_lsn = `$current_lsn_cmd`; chomp($current_lsn); From 881875322e3571b4eb5a332c45db92a4d8a9007b Mon Sep 17 00:00:00 2001 From: Xing Guo Date: Thu, 26 Oct 2023 13:41:39 +0800 Subject: [PATCH 055/106] [6X] Post fix for ABI checking pipeline. (#16651) - The symbol 'ConfigureNamesInt_gp' is changed due to 11d915a908117278a0c2edcc92a63f8826b5bb07[^1]. It's Greenplum specific GUC and it will not be referenced by other programs. So, it's safe to ignore it. - We should always print the HTML report to stdout, even if the job is failed. - Enable running ABI tests for pull requests. [^1]: https://github.com/greenplum-db/gpdb/commit/11d915a908117278a0c2edcc92a63f8826b5bb07 --- .abi-check/6.25.3/postgres.symbols.ignore | 1 + .github/workflows/greenplum-abi-tests.yml | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.abi-check/6.25.3/postgres.symbols.ignore b/.abi-check/6.25.3/postgres.symbols.ignore index f824cd0a7de6..de1b4294eed9 100644 --- a/.abi-check/6.25.3/postgres.symbols.ignore +++ b/.abi-check/6.25.3/postgres.symbols.ignore @@ -1 +1,2 @@ DummySymbol +ConfigureNamesInt_gp diff --git a/.github/workflows/greenplum-abi-tests.yml b/.github/workflows/greenplum-abi-tests.yml index 29e3adc2f787..7bde532b21a5 100644 --- a/.github/workflows/greenplum-abi-tests.yml +++ b/.github/workflows/greenplum-abi-tests.yml @@ -2,6 +2,14 @@ name: Greenplum ABI Tests on: workflow_dispatch: + pull_request: + paths: + - 'concourse/scripts/**' + - 'src/**' + - '.github/workflows/**' + - '.github/scripts/**' + - '.abi-check/**' + push: branches: - 6X_STABLE @@ -157,7 +165,9 @@ jobs: -old build-baseline/postgres*.abi \ -new build-latest/postgres*.abi - ## Dump the reports to stdout. + - name: Print out ABI report + if: always() + run: | lynx -dump $(find compat_reports/ | grep html) - name: Upload ABI Comparison From cf9cbf844c61d01f345c42885a42104c1981e9bd Mon Sep 17 00:00:00 2001 From: David Yozie Date: Thu, 26 Oct 2023 15:00:55 -0700 Subject: [PATCH 056/106] Docs: Remove ssl_renegotiation_limit info since it's not available in v6 --- gpdb-doc/markdown/security-guide/topics/Authenticate.html.md | 1 - 1 file changed, 1 deletion(-) diff --git a/gpdb-doc/markdown/security-guide/topics/Authenticate.html.md b/gpdb-doc/markdown/security-guide/topics/Authenticate.html.md index 2f00b575ef48..ba2e7e1d5b57 100644 --- a/gpdb-doc/markdown/security-guide/topics/Authenticate.html.md +++ b/gpdb-doc/markdown/security-guide/topics/Authenticate.html.md @@ -369,7 +369,6 @@ For more details on how to create your server private key and certificate, refer The following Server settings need to be specified in the `postgresql.conf` configuration file: - `ssl` *boolean*. Enables SSL connections. -- `ssl_renegotiation_limit` *integer*. Specifies the data limit before key renegotiation. - `ssl_ciphers` *string*. Configures the list SSL ciphers that are allowed. `ssl_ciphers` *overrides* any ciphers string specified in `/etc/openssl.cnf`. The default value `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH` enables all ciphers except for ADH, LOW, EXP, and MD5 ciphers, and prioritizes ciphers by their strength.
      > **Note** With TLS 1.2 some ciphers in MEDIUM and HIGH strength still use NULL encryption \(no encryption for transport\), which the default `ssl_ciphers` string allows. To bypass NULL ciphers with TLS 1.2 use a string such as `TLSv1.2:!eNULL:!aNULL`. From a599c448b7aa09c61486bf8d2b222cddbfe51a26 Mon Sep 17 00:00:00 2001 From: Ning Wu Date: Mon, 30 Oct 2023 11:17:32 +0800 Subject: [PATCH 057/106] Only create the ext/python3.9 folder when vendoring python3.9 Authored-by: Ning Wu --- gpAux/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpAux/Makefile b/gpAux/Makefile index 0397e9020608..c74303f2533e 100644 --- a/gpAux/Makefile +++ b/gpAux/Makefile @@ -602,8 +602,8 @@ copylibs : echo "INFO: Python not found on this platform, $(BLD_ARCH), not copying it into the GPDB package."; \ fi # Create the python3.9 directory to flag to build scripts that python has been handled - mkdir -p $(INSTLOC)/ext/python3.9 @if [ ! -z "$(PYTHONHOME39)" ]; then \ + mkdir -p $(INSTLOC)/ext/python3.9; \ echo "Copying python3.9, ., from $(PYTHONHOME39) into $(INSTLOC)/ext/python3.9..."; \ (cd $(PYTHONHOME39) && tar cf - .) | (cd $(INSTLOC)/ext/python3.9/ && tar xpf -); \ echo "...DONE"; \ From cfde5ffdd4e8ffb84e08f936bb81d43c308157b6 Mon Sep 17 00:00:00 2001 From: mperezfuster Date: Mon, 30 Oct 2023 23:59:40 +0000 Subject: [PATCH 058/106] Docs: new guc gp_interconnect_cursor_ic_table_size (#16637) * Docs: new guc gp_interconnect_cursor_ic_table_size * Mod after review --------- Co-authored-by: Mireia Perez Fuster --- .../markdown/ref_guide/config_params/guc-list.html.md | 8 ++++++++ .../ref_guide/config_params/guc_category-list.html.md | 1 + 2 files changed, 9 insertions(+) diff --git a/gpdb-doc/markdown/ref_guide/config_params/guc-list.html.md b/gpdb-doc/markdown/ref_guide/config_params/guc-list.html.md index 7fd95f055555..5c5e287fea10 100644 --- a/gpdb-doc/markdown/ref_guide/config_params/guc-list.html.md +++ b/gpdb-doc/markdown/ref_guide/config_params/guc-list.html.md @@ -1085,6 +1085,14 @@ communication. In these cases, you must configure this parameter to use a wildca |-----------|-------|-------------------| |wildcard,unicast|wildcard|local, system, reload| +##
      gp_interconnect_cursor_ic_table_size + +Specifies the size of the Cursor History Table for UDP interconnect. Although it is not usually necessary, you may increase it if running a user-defined function which contains many concurrent cursor queries hangs. The default value is 128. + +|Value Range|Default|Set Classifications| +|-----------|-------|-------------------| +|128-102400|128|master, session, reload| + ## gp_interconnect_debug_retry_interval Specifies the interval, in seconds, to log Greenplum Database interconnect debugging messages when the server configuration parameter [gp\_log\_interconnect](#gp_log_interconnect) is set to `DEBUG`. The default is 10 seconds. diff --git a/gpdb-doc/markdown/ref_guide/config_params/guc_category-list.html.md b/gpdb-doc/markdown/ref_guide/config_params/guc_category-list.html.md index 42e15321a9d5..6f9e64816d65 100644 --- a/gpdb-doc/markdown/ref_guide/config_params/guc_category-list.html.md +++ b/gpdb-doc/markdown/ref_guide/config_params/guc_category-list.html.md @@ -475,6 +475,7 @@ The parameters in this topic control the configuration of the Greenplum Database ### Interconnect Configuration Parameters - [gp_interconnect_address_type](guc-list.html#gp_interconnect_address_type) +- [gp_interconnect_cursor_ic_table_size](guc-list.html#gp_interconnect_cursor_ic_table_size) - [gp_interconnect_fc_method](guc-list.html#gp_interconnect_fc_method) - [gp_interconnect_proxy_addresses](guc-list.html#gp_interconnect_proxy_addresses) - [gp_interconnect_queue_depth](guc-list.html#gp_interconnect_queue_depth) From 4b30d8e91afba2ec0cefa9982b1c34b713b3e85e Mon Sep 17 00:00:00 2001 From: Zhenglong Li Date: Tue, 31 Oct 2023 10:49:11 +0800 Subject: [PATCH 059/106] add duration if query is canceled (#16572) This is the backport of #16557 Add the duration time into "canceling statement due to user request" to help the user debug when canceling the query. The log message will be 2023-10-10 03:32:55.325528 UTC,"smart","postgres",p463714,th1746153216,"[local]",,2023-10-10 03:32:01 UTC, 0,con32,cmd9,seg-1,,,,sx1,"ERROR","57014","canceling statement due to user request, duration:4256.617",,,,,, "select * from t, t as t1, t as t2;",0,,"postgres.c",4053, Not change the log schema. --- src/backend/tcop/postgres.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index ac91327b84dd..83307eba0f93 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -4006,9 +4006,25 @@ ProcessInterrupts(const char* filename, int lineno) (errcode(ERRCODE_GP_OPERATION_CANCELED), errmsg("canceling MPP operation%s", cancel_msg_str.data))); else - ereport(ERROR, - (errcode(ERRCODE_QUERY_CANCELED), - errmsg("canceling statement due to user request%s", cancel_msg_str.data))); + { + char msec_str[32]; + + switch (check_log_duration(msec_str, false)) + { + case 0: + ereport(ERROR, + (errcode(ERRCODE_QUERY_CANCELED), + errmsg("canceling statement due to user request%s", cancel_msg_str.data))); + break; + case 1: + case 2: + ereport(ERROR, + (errcode(ERRCODE_QUERY_CANCELED), + errmsg("canceling statement due to user request%s, duration:%s", + cancel_msg_str.data, msec_str))); + break; + } + } } } /* If we get here, do nothing (probably, QueryCancelPending was reset) */ From c5cc5bf016fe255bdb0142abeba5ef5cd1c927b0 Mon Sep 17 00:00:00 2001 From: Ning Wu Date: Tue, 31 Oct 2023 10:43:16 +0800 Subject: [PATCH 060/106] Remove the pass condition for icw_extensions_gpcloud job The job failed for a long time due to the AWS credential issue and not resolved yet. In order to unblock the release candidate to be generated, remove the pass condition for this job. Authored-by: Ning Wu --- concourse/pipelines/gpdb_6X_STABLE-generated.yml | 4 +--- concourse/pipelines/templates/gpdb-tpl.yml | 2 -- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/concourse/pipelines/gpdb_6X_STABLE-generated.yml b/concourse/pipelines/gpdb_6X_STABLE-generated.yml index 41007e8ba926..125c53216214 100644 --- a/concourse/pipelines/gpdb_6X_STABLE-generated.yml +++ b/concourse/pipelines/gpdb_6X_STABLE-generated.yml @@ -12,7 +12,7 @@ ## file (example: templates/gpdb-tpl.yml) and regenerate the pipeline ## using appropriate tool (example: gen_pipeline.py -t prod). ## ---------------------------------------------------------------------- -## Generated by gen_pipeline.py at: 2023-10-24 20:14:15.500156 +## Generated by gen_pipeline.py at: 2023-10-31 10:46:55.243925 ## Template file: gpdb-tpl.yml ## OS Types: ## Test Sections: ['icw', 'cli', 'aa', 'release'] @@ -1436,7 +1436,6 @@ jobs: - unit_tests_gporca_rocky8 - gpdb_pitr_rocky8 - interconnect_rocky8 - - icw_extensions_gpcloud_rocky8 - gpexpand_rocky8 - pg_upgrade_rocky8 - get: gpdb_src @@ -1454,7 +1453,6 @@ jobs: - unit_tests_gporca_rocky8 - gpdb_pitr_rocky8 - interconnect_rocky8 - - icw_extensions_gpcloud_rocky8 - gpexpand_rocky8 - pg_upgrade_rocky8 trigger: true diff --git a/concourse/pipelines/templates/gpdb-tpl.yml b/concourse/pipelines/templates/gpdb-tpl.yml index 5fd76592552e..447732c31752 100644 --- a/concourse/pipelines/templates/gpdb-tpl.yml +++ b/concourse/pipelines/templates/gpdb-tpl.yml @@ -1811,7 +1811,6 @@ jobs: - unit_tests_gporca_[[ os_type ]] - gpdb_pitr_[[ os_type ]] - interconnect_[[ os_type ]] - - icw_extensions_gpcloud_[[ os_type ]] - gpexpand_[[ os_type ]] - pg_upgrade_[[ os_type ]] - get: gpdb_src @@ -1835,7 +1834,6 @@ jobs: - unit_tests_gporca_[[ os_type ]] - gpdb_pitr_[[ os_type ]] - interconnect_[[ os_type ]] - - icw_extensions_gpcloud_[[ os_type ]] - gpexpand_[[ os_type ]] - pg_upgrade_[[ os_type ]] trigger: true From 2189e1f49c1dcb7d404a226a45b7f76a17ac5a65 Mon Sep 17 00:00:00 2001 From: "Kevin.wyh" Date: Tue, 31 Oct 2023 15:38:53 +0800 Subject: [PATCH 061/106] Fix ORCA producing incorrect plan when handling SEMI join with RANDOM distributed table (greenplum-db#16611) (#16654) When ORCA transforms a SEMI join to an INNER join, a LogicalGbAgg is introduced in the inner side of the INNER join to remove duplicate inner keys. Subsequently, this join is attempted to be transformed into a LogicalIndexApply. ORCA determines if this transformation can be done by checking if the inner side's grouping columns contain all distribution columns. However, when the inner side's distribution columns come from a RANDOM table, the checking always evaluates to true and allows the transform to proceed. But RANDOM distribution cannot satisfy this transformation because the same 'distribution' key may appear in different segments. This commit resolves the issue by adding a check for the size of RANDOM distribution columns and forbidding the illegal transform. Co-authored-by: wuyuhao28 --- .../xforms/CXformJoin2IndexApplyGeneric.cpp | 17 +- src/test/regress/expected/subselect_gp.out | 170 ++++++++++++++++++ .../expected/subselect_gp_optimizer.out | 170 ++++++++++++++++++ src/test/regress/sql/subselect_gp.sql | 46 +++++ 4 files changed, 401 insertions(+), 2 deletions(-) diff --git a/src/backend/gporca/libgpopt/src/xforms/CXformJoin2IndexApplyGeneric.cpp b/src/backend/gporca/libgpopt/src/xforms/CXformJoin2IndexApplyGeneric.cpp index 4221f74e0907..83d5094b1d12 100644 --- a/src/backend/gporca/libgpopt/src/xforms/CXformJoin2IndexApplyGeneric.cpp +++ b/src/backend/gporca/libgpopt/src/xforms/CXformJoin2IndexApplyGeneric.cpp @@ -266,9 +266,12 @@ CXformJoin2IndexApplyGeneric::Transform(CXformContext *pxfctxt, pexprGet = pexprCurrInnerChild; if (NULL != groupingColsToCheck.Value() && - !groupingColsToCheck->ContainsAll(distributionCols)) + (!groupingColsToCheck->ContainsAll(distributionCols) || + ptabdescInner->GetRelDistribution() == + IMDRelation::EreldistrRandom)) { - // the grouping columns are not a superset of the distribution columns + // the grouping columns are not a superset of the distribution columns, + // or distribution columns are empty when the table is randomly distributed return; } } @@ -281,6 +284,16 @@ CXformJoin2IndexApplyGeneric::Transform(CXformContext *pxfctxt, ptabdescInner = popDynamicGet->Ptabdesc(); distributionCols = popDynamicGet->PcrsDist(); pexprGet = pexprCurrInnerChild; + + if (NULL != groupingColsToCheck.Value() && + (!groupingColsToCheck->ContainsAll(distributionCols) || + ptabdescInner->GetRelDistribution() == + IMDRelation::EreldistrRandom)) + { + // the grouping columns are not a superset of the distribution columns, + // or distribution columns are empty when the table is randomly distributed + return; + } } break; diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index cd15afff0f1a..79b34b08e782 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -3146,3 +3146,173 @@ SELECT c FROM t0; Optimizer: Postgres query optimizer (16 rows) +-- +-- Test case for ORCA semi join with random table +-- See https://github.com/greenplum-db/gpdb/issues/16611 +-- +--- case for random distribute +create table table_left (l1 int, l2 int) distributed by (l1); +create table table_right (r1 int, r2 int) distributed randomly; +create index table_right_idx on table_right(r1); +insert into table_left values (1,1); +insert into table_right select i, i from generate_series(1, 300) i; +insert into table_right select 1, 1 from generate_series(1, 100) i; +--- make sure the same value (1,1) rows are inserted into different segments +select count(distinct gp_segment_id) > 1 from table_right where r1 = 1; + ?column? +---------- + t +(1 row) + +analyze table_left; +analyze table_right; +-- two types of semi join tests +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + -> Hash Join + Hash Cond: (table_right.r1 = table_left.l1) + -> HashAggregate + Group Key: table_right.r1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: table_right.r1 + -> Seq Scan on table_right + -> Hash + -> Seq Scan on table_left + Optimizer: Postgres query optimizer +(11 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +explain (costs off) select * from table_left where l1 in (select r1 from table_right); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + -> Hash Join + Hash Cond: (table_right.r1 = table_left.l1) + -> HashAggregate + Group Key: table_right.r1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: table_right.r1 + -> Seq Scan on table_right + -> Hash + -> Seq Scan on table_left + Optimizer: Postgres query optimizer +(11 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +--- case for replicate distribute +alter table table_right set distributed replicated; +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); + QUERY PLAN +----------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (table_right.r1 = table_left.l1) + -> HashAggregate + Group Key: table_right.r1 + -> Seq Scan on table_right + -> Hash + -> Seq Scan on table_left + Optimizer: Postgres query optimizer +(9 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +explain (costs off) select * from table_left where l1 in (select r1 from table_right); + QUERY PLAN +----------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (table_right.r1 = table_left.l1) + -> HashAggregate + Group Key: table_right.r1 + -> Seq Scan on table_right + -> Hash + -> Seq Scan on table_left + Optimizer: Postgres query optimizer +(9 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +--- case for partition table with random distribute +drop table table_right; +create table table_right (r1 int, r2 int) distributed randomly partition by range (r1) ( start (0) end (300) every (100)); +NOTICE: CREATE TABLE will create partition "table_right_1_prt_1" for table "table_right" +NOTICE: CREATE TABLE will create partition "table_right_1_prt_2" for table "table_right" +NOTICE: CREATE TABLE will create partition "table_right_1_prt_3" for table "table_right" +create index table_right_idx on table_right(r1); +insert into table_right select i, i from generate_series(1, 299) i; +insert into table_right select 1, 1 from generate_series(1, 100) i; +analyze table_right; +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + -> Hash Join + Hash Cond: (table_right_1_prt_1.r1 = table_left.l1) + -> HashAggregate + Group Key: table_right_1_prt_1.r1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: table_right_1_prt_1.r1 + -> Append + -> Seq Scan on table_right_1_prt_1 + -> Seq Scan on table_right_1_prt_2 + -> Seq Scan on table_right_1_prt_3 + -> Hash + -> Seq Scan on table_left + Optimizer: Postgres query optimizer +(14 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +explain (costs off) select * from table_left where l1 in (select r1 from table_right); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + -> Hash Join + Hash Cond: (table_right_1_prt_1.r1 = table_left.l1) + -> HashAggregate + Group Key: table_right_1_prt_1.r1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: table_right_1_prt_1.r1 + -> Append + -> Seq Scan on table_right_1_prt_1 + -> Seq Scan on table_right_1_prt_2 + -> Seq Scan on table_right_1_prt_3 + -> Hash + -> Seq Scan on table_left + Optimizer: Postgres query optimizer +(14 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +-- clean up +drop table table_left; +drop table table_right; diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index a5493728802c..9357f32f8c9c 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -3287,3 +3287,173 @@ SELECT c FROM t0; Optimizer: Postgres query optimizer (16 rows) +-- +-- Test case for ORCA semi join with random table +-- See https://github.com/greenplum-db/gpdb/issues/16611 +-- +--- case for random distribute +create table table_left (l1 int, l2 int) distributed by (l1); +create table table_right (r1 int, r2 int) distributed randomly; +create index table_right_idx on table_right(r1); +insert into table_left values (1,1); +insert into table_right select i, i from generate_series(1, 300) i; +insert into table_right select 1, 1 from generate_series(1, 100) i; +--- make sure the same value (1,1) rows are inserted into different segments +select count(distinct gp_segment_id) > 1 from table_right where r1 = 1; + ?column? +---------- + t +(1 row) + +analyze table_left; +analyze table_right; +-- two types of semi join tests +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + -> Hash Semi Join + Hash Cond: (table_left.l1 = table_right.r1) + -> Seq Scan on table_left + Filter: (NOT (l1 IS NULL)) + -> Hash + -> Result + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: table_right.r1 + -> Seq Scan on table_right + Optimizer: Pivotal Optimizer (GPORCA) +(11 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +explain (costs off) select * from table_left where l1 in (select r1 from table_right); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + -> Hash Semi Join + Hash Cond: (table_left.l1 = table_right.r1) + -> Seq Scan on table_left + -> Hash + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: table_right.r1 + -> Seq Scan on table_right + Optimizer: Pivotal Optimizer (GPORCA) +(9 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +--- case for replicate distribute +alter table table_right set distributed replicated; +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); + QUERY PLAN +------------------------------------------------------------------------- + Gather Motion 1:1 (slice2; segments: 1) + -> Nested Loop + Join Filter: true + -> Broadcast Motion 3:1 (slice1; segments: 3) + -> Seq Scan on table_left + Filter: (NOT (l1 IS NULL)) + -> GroupAggregate + Group Key: table_right.r1 + -> Result + -> Index Scan using table_right_idx on table_right + Index Cond: (r1 = table_left.l1) + Optimizer: Pivotal Optimizer (GPORCA) +(12 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +explain (costs off) select * from table_left where l1 in (select r1 from table_right); + QUERY PLAN +------------------------------------------------------------------- + Gather Motion 1:1 (slice2; segments: 1) + -> Nested Loop + Join Filter: true + -> Broadcast Motion 3:1 (slice1; segments: 3) + -> Seq Scan on table_left + -> GroupAggregate + Group Key: table_right.r1 + -> Index Scan using table_right_idx on table_right + Index Cond: (r1 = table_left.l1) + Optimizer: Pivotal Optimizer (GPORCA) +(10 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +--- case for partition table with random distribute +drop table table_right; +create table table_right (r1 int, r2 int) distributed randomly partition by range (r1) ( start (0) end (300) every (100)); +NOTICE: CREATE TABLE will create partition "table_right_1_prt_1" for table "table_right" +NOTICE: CREATE TABLE will create partition "table_right_1_prt_2" for table "table_right" +NOTICE: CREATE TABLE will create partition "table_right_1_prt_3" for table "table_right" +create index table_right_idx on table_right(r1); +insert into table_right select i, i from generate_series(1, 299) i; +insert into table_right select 1, 1 from generate_series(1, 100) i; +analyze table_right; +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); + QUERY PLAN +--------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) + -> Hash Semi Join + Hash Cond: (table_left.l1 = table_right.r1) + -> Seq Scan on table_left + Filter: (NOT (l1 IS NULL)) + -> Hash + -> Result + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: table_right.r1 + -> Sequence + -> Partition Selector for table_right (dynamic scan id: 1) + Partitions selected: 3 (out of 3) + -> Dynamic Seq Scan on table_right (dynamic scan id: 1) + Optimizer: Pivotal Optimizer (GPORCA) +(14 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +explain (costs off) select * from table_left where l1 in (select r1 from table_right); + QUERY PLAN +--------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) + -> Hash Semi Join + Hash Cond: (table_left.l1 = table_right.r1) + -> Seq Scan on table_left + -> Hash + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: table_right.r1 + -> Sequence + -> Partition Selector for table_right (dynamic scan id: 1) + Partitions selected: 3 (out of 3) + -> Dynamic Seq Scan on table_right (dynamic scan id: 1) + Optimizer: Pivotal Optimizer (GPORCA) +(12 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +-- clean up +drop table table_left; +drop table table_right; diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql index cc592c611762..6d66f65b2c1e 100644 --- a/src/test/regress/sql/subselect_gp.sql +++ b/src/test/regress/sql/subselect_gp.sql @@ -1230,3 +1230,49 @@ explain (COSTS OFF) with t0 AS ( JOIN s as t ON true ) SELECT c FROM t0; + +-- +-- Test case for ORCA semi join with random table +-- See https://github.com/greenplum-db/gpdb/issues/16611 +-- +--- case for random distribute +create table table_left (l1 int, l2 int) distributed by (l1); +create table table_right (r1 int, r2 int) distributed randomly; +create index table_right_idx on table_right(r1); +insert into table_left values (1,1); +insert into table_right select i, i from generate_series(1, 300) i; +insert into table_right select 1, 1 from generate_series(1, 100) i; + +--- make sure the same value (1,1) rows are inserted into different segments +select count(distinct gp_segment_id) > 1 from table_right where r1 = 1; +analyze table_left; +analyze table_right; + +-- two types of semi join tests +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); +select * from table_left where exists (select 1 from table_right where l1 = r1); +explain (costs off) select * from table_left where l1 in (select r1 from table_right); +select * from table_left where exists (select 1 from table_right where l1 = r1); + +--- case for replicate distribute +alter table table_right set distributed replicated; +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); +select * from table_left where exists (select 1 from table_right where l1 = r1); +explain (costs off) select * from table_left where l1 in (select r1 from table_right); +select * from table_left where exists (select 1 from table_right where l1 = r1); + +--- case for partition table with random distribute +drop table table_right; +create table table_right (r1 int, r2 int) distributed randomly partition by range (r1) ( start (0) end (300) every (100)); +create index table_right_idx on table_right(r1); +insert into table_right select i, i from generate_series(1, 299) i; +insert into table_right select 1, 1 from generate_series(1, 100) i; +analyze table_right; +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); +select * from table_left where exists (select 1 from table_right where l1 = r1); +explain (costs off) select * from table_left where l1 in (select r1 from table_right); +select * from table_left where exists (select 1 from table_right where l1 = r1); + +-- clean up +drop table table_left; +drop table table_right; From e94e33cad73d8328f00be38cbc922adaf3fbf61a Mon Sep 17 00:00:00 2001 From: Huansong Fu Date: Tue, 24 Oct 2023 09:09:25 -0700 Subject: [PATCH 062/106] Allow absolute path for pg_file_rename If caller indicates so. Only for superuser. This is to support utility function gp_move_orphaned_files which can move files between absolute paths. So far only pg_file_rename requires it. P.S. in 7X superuser can do the same but it is achieved via a new role 'pg_read_server_files' which is not in 6X. So adding the superuser() check instead. --- src/backend/utils/adt/genfile.c | 35 ++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c index 0994a6157dd1..53ce9a74b301 100644 --- a/src/backend/utils/adt/genfile.c +++ b/src/backend/utils/adt/genfile.c @@ -59,7 +59,7 @@ typedef struct * absolute paths that match DataDir or Log_directory. */ static char * -convert_and_check_filename(text *arg) +convert_and_check_filename(text *arg, bool abs_ok) { char *filename; @@ -68,6 +68,19 @@ convert_and_check_filename(text *arg) if (is_absolute_path(filename)) { + /* + * Allow absolute path if caller indicates so. Only for superuser. + * This is to support utility function gp_move_orphaned_files which + * can move files between absolute paths. So far only pg_file_rename + * requires abs_ok=true. + * + * P.S. in 7X superuser can do the same but it is achieved via a new + * role 'pg_read_server_files' which is not in 6X. So adding the + * superuser() check instead. + */ + if (abs_ok && superuser()) + return filename; + /* Disallow '/a/b/data/..' */ if (path_contains_parent_reference(filename)) ereport(ERROR, @@ -237,7 +250,7 @@ pg_read_file(PG_FUNCTION_ARGS) if (PG_NARGS() >= 4) missing_ok = PG_GETARG_BOOL(3); - filename = convert_and_check_filename(filename_t); + filename = convert_and_check_filename(filename_t, false); result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok); if (result) @@ -278,7 +291,7 @@ pg_read_binary_file(PG_FUNCTION_ARGS) if (PG_NARGS() >= 4) missing_ok = PG_GETARG_BOOL(3); - filename = convert_and_check_filename(filename_t); + filename = convert_and_check_filename(filename_t, false); result = read_binary_file(filename, seek_offset, bytes_to_read, missing_ok); @@ -345,7 +358,7 @@ pg_stat_file(PG_FUNCTION_ARGS) if (PG_NARGS() == 2) missing_ok = PG_GETARG_BOOL(1); - filename = convert_and_check_filename(filename_t); + filename = convert_and_check_filename(filename_t, false); if (stat(filename, &fst) < 0) { @@ -445,7 +458,7 @@ pg_ls_dir(PG_FUNCTION_ARGS) oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); fctx = palloc(sizeof(directory_fctx)); - fctx->location = convert_and_check_filename(PG_GETARG_TEXT_P(0)); + fctx->location = convert_and_check_filename(PG_GETARG_TEXT_P(0), false); fctx->include_dot_dirs = include_dot_dirs; fctx->dirdesc = AllocateDir(fctx->location); @@ -512,7 +525,7 @@ pg_file_write(PG_FUNCTION_ARGS) requireSuperuser(); - filename = convert_and_check_filename(PG_GETARG_TEXT_P(0)); + filename = convert_and_check_filename(PG_GETARG_TEXT_P(0), false); data = PG_GETARG_TEXT_P(1); if (!PG_GETARG_BOOL(2)) @@ -563,12 +576,12 @@ pg_file_rename(PG_FUNCTION_ARGS) if (PG_ARGISNULL(0) || PG_ARGISNULL(1)) PG_RETURN_NULL(); - fn1 = convert_and_check_filename(PG_GETARG_TEXT_P(0)); - fn2 = convert_and_check_filename(PG_GETARG_TEXT_P(1)); + fn1 = convert_and_check_filename(PG_GETARG_TEXT_P(0), true); + fn2 = convert_and_check_filename(PG_GETARG_TEXT_P(1), true); if (PG_ARGISNULL(2)) fn3 = 0; else - fn3 = convert_and_check_filename(PG_GETARG_TEXT_P(2)); + fn3 = convert_and_check_filename(PG_GETARG_TEXT_P(2), true); if (access(fn1, W_OK) < 0) { @@ -647,7 +660,7 @@ pg_file_unlink(PG_FUNCTION_ARGS) requireSuperuser(); - filename = convert_and_check_filename(PG_GETARG_TEXT_P(0)); + filename = convert_and_check_filename(PG_GETARG_TEXT_P(0), false); if (access(filename, W_OK) < 0) { @@ -818,7 +831,7 @@ pg_file_length(PG_FUNCTION_ARGS) requireSuperuser(); - filename = convert_and_check_filename(filename_t); + filename = convert_and_check_filename(filename_t, false); if (stat(filename, &fst) < 0) ereport(ERROR, From 86b688e609b6b047d6ddf50a7de5b3b111f5f7b6 Mon Sep 17 00:00:00 2001 From: Huansong Fu Date: Tue, 24 Oct 2023 08:52:13 -0700 Subject: [PATCH 063/106] UDF gp_move_orphaned_files to move orphaned files Provide a superuser-only UDF gp_move_orphaned_files in gp_check_functions extension to move orphaned files found by gp_check_orphaned_files into a location specified by user, which can be removed/backup by the user later. gp_move_orphaned_files(target_directory text) After moving, the file will be renamed to something that indicates its original location in the data directory. For example, say the file '12345' in the default tablespace is orphaned on primary segment 2, it will be moved like this: Original location: /base/13700/12345 After moving: /seg2_base_13700_12345 The function will return each moved file's before/after paths, and whether the move succeeded. E.g.: postgres=# select * from gp_move_orphaned_files('/home/csteam/workspace/gpdb7/files'); gp_segment_id | move_success | oldpath | newpath ---------------+--------------+----------------------------+----------------------------------- -1 | t | /data_dir/base/13715/99999 | /target_dir/seg-1_base_13715_99999 1 | t | /data_dir/base/13715/99999 | /target_dir/seg1_base_13715_99999 2 | t | /data_dir/base/13715/99999 | /target_dir/seg2_base_13715_99999 (3 rows) Note that the new version is a two-number version 1.1 instead of three-number 1.0.1, since it does not seem to make sense to have a three-number version in the first place. We cannot change the existing so just make future versions correct. --- gpcontrib/gp_check_functions/Makefile | 2 +- .../gp_check_functions--1.0.0--1.1.sql | 179 ++++++++++++++++++ ...-1.0.0.sql => gp_check_functions--1.1.sql} | 100 +++++++++- .../gp_check_functions.control | 2 +- src/test/regress/input/gp_check_files.source | 67 ++++++- src/test/regress/output/gp_check_files.source | 90 ++++++++- 6 files changed, 425 insertions(+), 15 deletions(-) create mode 100644 gpcontrib/gp_check_functions/gp_check_functions--1.0.0--1.1.sql rename gpcontrib/gp_check_functions/{gp_check_functions--1.0.0.sql => gp_check_functions--1.1.sql} (78%) diff --git a/gpcontrib/gp_check_functions/Makefile b/gpcontrib/gp_check_functions/Makefile index 3052b65bb818..ec3711801e53 100644 --- a/gpcontrib/gp_check_functions/Makefile +++ b/gpcontrib/gp_check_functions/Makefile @@ -1,5 +1,5 @@ EXTENSION = gp_check_functions -DATA = gp_check_functions--1.0.0.sql +DATA = gp_check_functions--1.1.sql gp_check_functions--1.0.0--1.1.sql MODULES = gp_check_functions # REGRESS testing is covered by the main suite test 'gp_check_files' as we need the custom tablespace directory support diff --git a/gpcontrib/gp_check_functions/gp_check_functions--1.0.0--1.1.sql b/gpcontrib/gp_check_functions/gp_check_functions--1.0.0--1.1.sql new file mode 100644 index 000000000000..03ff8a766987 --- /dev/null +++ b/gpcontrib/gp_check_functions/gp_check_functions--1.0.0--1.1.sql @@ -0,0 +1,179 @@ +/* gpcontrib/gp_check_functions/gp_check_functions--1.0.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION gp_check_functions UPDATE TO '1.1'" to load this file. \quit + +-- Check orphaned data files on default and user tablespaces. +-- Compared to the previous version, add gp_segment_id to show which segment it is being executed. +CREATE OR REPLACE VIEW __check_orphaned_files AS +SELECT f1.tablespace, f1.filename, f1.filepath, pg_catalog.gp_execution_segment() AS gp_segment_id +from __get_exist_files f1 +LEFT JOIN __get_expect_files f2 +ON f1.tablespace = f2.tablespace AND substring(f1.filename from '[0-9]+') = f2.filename +WHERE f2.tablespace IS NULL + AND f1.filename SIMILAR TO '[0-9]+(\.)?(\_)?%'; + +-- Function to check orphaned files. +-- Compared to the previous version, adjust the SELECT ... FROM __check_orphaned_files since we added new column to it. +-- NOTE: this function does the same lock and checks as gp_check_functions.gp_move_orphaned_files(), and it needs to be that way. +CREATE OR REPLACE FUNCTION __gp_check_orphaned_files_func() +RETURNS TABLE ( + gp_segment_id int, + tablespace oid, + filename text, + filepath text +) +LANGUAGE plpgsql AS $$ +BEGIN + BEGIN + -- lock pg_class so that no one will be adding/altering relfilenodes + LOCK TABLE pg_class IN SHARE MODE NOWAIT; + + -- make sure no other active/idle transaction is running + IF EXISTS ( + SELECT 1 + FROM (SELECT * from pg_stat_activity UNION ALL SELECT * FROM gp_dist_random('pg_stat_activity'))q + WHERE + sess_id <> -1 + AND sess_id <> current_setting('gp_session_id')::int -- Exclude the current session + ) THEN + RAISE EXCEPTION 'There is a client session running on one or more segment. Aborting...'; + END IF; + + -- force checkpoint to make sure we do not include files that are normally pending delete + CHECKPOINT; + + RETURN QUERY + SELECT v.gp_segment_id, v.tablespace, v.filename, v.filepath + FROM gp_dist_random('__check_orphaned_files') v + UNION ALL + SELECT -1 AS gp_segment_id, v.tablespace, v.filename, v.filepath + FROM __check_orphaned_files v; + EXCEPTION + WHEN lock_not_available THEN + RAISE EXCEPTION 'cannot obtain SHARE lock on pg_class'; + WHEN OTHERS THEN + RAISE; + END; + + RETURN; +END; +$$; + +-- Function to move orphaned files to a designated location. +-- NOTE: this function does the same lock and checks as gp_move_orphaned_files(), +-- and it needs to be that way. +CREATE OR REPLACE FUNCTION __gp_check_orphaned_files_func() +RETURNS TABLE ( + gp_segment_id int, + tablespace oid, + filename text, + filepath text +) +LANGUAGE plpgsql AS $$ +BEGIN + BEGIN + -- lock pg_class so that no one will be adding/altering relfilenodes + LOCK TABLE pg_class IN SHARE MODE NOWAIT; + + -- make sure no other active/idle transaction is running + IF EXISTS ( + SELECT 1 + FROM (SELECT * from pg_stat_activity UNION ALL SELECT * FROM gp_dist_random('pg_stat_activity'))q + WHERE + sess_id <> -1 + AND sess_id <> current_setting('gp_session_id')::int -- Exclude the current session + ) THEN + RAISE EXCEPTION 'There is a client session running on one or more segment. Aborting...'; + END IF; + + -- force checkpoint to make sure we do not include files that are normally pending delete + CHECKPOINT; + + RETURN QUERY + SELECT v.gp_segment_id, v.tablespace, v.filename, v.filepath + FROM gp_dist_random('__check_orphaned_files') v + UNION ALL + SELECT -1 AS gp_segment_id, v.tablespace, v.filename, v.filepath + FROM __check_orphaned_files v; + EXCEPTION + WHEN lock_not_available THEN + RAISE EXCEPTION 'cannot obtain SHARE lock on pg_class'; + WHEN OTHERS THEN + RAISE; + END; + + RETURN; +END; +$$; + +GRANT EXECUTE ON FUNCTION __gp_check_orphaned_files_func() TO public; + +-- UDF to move orphaned files to a designated location +-- NOTE: this function does the same lock and checks as __gp_check_orphaned_files_func(), +-- and it needs to be that way. +CREATE FUNCTION gp_move_orphaned_files(target_location TEXT) RETURNS TABLE ( + gp_segment_id INT, + move_success BOOL, + oldpath TEXT, + newpath TEXT +) +LANGUAGE plpgsql AS $$ +BEGIN + -- lock pg_class so that no one will be adding/altering relfilenodes + LOCK TABLE pg_class IN SHARE MODE NOWAIT; + + -- make sure no other active/idle transaction is running + IF EXISTS ( + SELECT 1 + FROM (SELECT * from pg_stat_activity UNION ALL SELECT * FROM gp_dist_random('pg_stat_activity'))q + WHERE + sess_id <> -1 + AND sess_id <> current_setting('gp_session_id')::int -- Exclude the current session + ) THEN + RAISE EXCEPTION 'There is a client session running on one or more segment. Aborting...'; + END IF; + + -- force checkpoint to make sure we do not include files that are normally pending delete + CHECKPOINT; + + RETURN QUERY + SELECT + q.gp_segment_id, + q.move_success, + q.oldpath, + q.newpath + FROM ( + WITH OrphanedFiles AS ( + -- Coordinator + SELECT + o.gp_segment_id, + s.setting || '/' || o.filepath as oldpath, + target_location || '/seg' || o.gp_segment_id::text || '_' || REPLACE(o.filepath, '/', '_') as newpath + FROM __check_orphaned_files o, pg_settings s + WHERE s.name = 'data_directory' + UNION ALL + -- Segments + SELECT + o.gp_segment_id, + s.setting || '/' || o.filepath as oldpath, + target_location || '/seg' || o.gp_segment_id::text || '_' || REPLACE(o.filepath, '/', '_') as newpath + FROM gp_dist_random('__check_orphaned_files') o + JOIN (SELECT gp_execution_segment() as gp_segment_id, * FROM gp_dist_random('pg_settings')) s on o.gp_segment_id = s.gp_segment_id + WHERE s.name = 'data_directory' + ) + SELECT + OrphanedFiles.gp_segment_id, + OrphanedFiles.oldpath, + OrphanedFiles.newpath, + pg_file_rename(OrphanedFiles.oldpath, OrphanedFiles.newpath, NULL) AS move_success + FROM OrphanedFiles + ) q ORDER BY q.gp_segment_id, q.oldpath; +EXCEPTION + WHEN lock_not_available THEN + RAISE EXCEPTION 'cannot obtain SHARE lock on pg_class'; + WHEN OTHERS THEN + RAISE; +END; +$$; + diff --git a/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql b/gpcontrib/gp_check_functions/gp_check_functions--1.1.sql similarity index 78% rename from gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql rename to gpcontrib/gp_check_functions/gp_check_functions--1.1.sql index ff5ff837665d..dce2ef3171c2 100644 --- a/gpcontrib/gp_check_functions/gp_check_functions--1.0.0.sql +++ b/gpcontrib/gp_check_functions/gp_check_functions--1.1.sql @@ -234,13 +234,14 @@ GRANT SELECT ON __get_expect_files_ext TO public; -- -------------------------------------------------------------------------------- CREATE OR REPLACE VIEW __check_orphaned_files AS -SELECT f1.tablespace, f1.filename, f1.filepath +SELECT f1.tablespace, f1.filename, f1.filepath, pg_catalog.gp_execution_segment() AS gp_segment_id from __get_exist_files f1 LEFT JOIN __get_expect_files f2 ON f1.tablespace = f2.tablespace AND substring(f1.filename from '[0-9]+') = f2.filename WHERE f2.tablespace IS NULL AND f1.filename SIMILAR TO '[0-9]+(\.)?(\_)?%'; + GRANT SELECT ON __check_orphaned_files TO public; -------------------------------------------------------------------------------- @@ -261,6 +262,8 @@ GRANT SELECT ON __check_orphaned_files TO public; -- -------------------------------------------------------------------------------- +-- NOTE: this function does the same lock and checks as gp_move_orphaned_files(), +-- and it needs to be that way. CREATE OR REPLACE FUNCTION __gp_check_orphaned_files_func() RETURNS TABLE ( gp_segment_id int, @@ -289,11 +292,11 @@ BEGIN CHECKPOINT; RETURN QUERY - SELECT pg_catalog.gp_execution_segment() AS gp_segment_id, * - FROM gp_dist_random('__check_orphaned_files') + SELECT v.gp_segment_id, v.tablespace, v.filename, v.filepath + FROM gp_dist_random('__check_orphaned_files') v UNION ALL - SELECT -1 AS gp_segment_id, * - FROM __check_orphaned_files; + SELECT -1 AS gp_segment_id, v.tablespace, v.filename, v.filepath + FROM __check_orphaned_files v; EXCEPTION WHEN lock_not_available THEN RAISE EXCEPTION 'cannot obtain SHARE lock on pg_class'; @@ -304,8 +307,94 @@ BEGIN RETURN; END; $$; + GRANT EXECUTE ON FUNCTION __gp_check_orphaned_files_func() TO public; +-------------------------------------------------------------------------------- +-- @function: +-- gp_move_orphaned_files +-- +-- @in: +-- target_location text - directory where we move the orphaned files to +-- +-- @out: +-- gp_segment_id int - segment content ID +-- move_success bool - whether the move attempt succeeded +-- oldpath text - filepath (name included) of the orphaned file before moving +-- newpath text - filepath (name included) of the orphaned file after moving +-- +-- @doc: +-- UDF to move orphaned files to a designated location +-- +-------------------------------------------------------------------------------- + +-- NOTE: this function does the same lock and checks as __gp_check_orphaned_files_func(), +-- and it needs to be that way. +CREATE FUNCTION gp_move_orphaned_files(target_location TEXT) RETURNS TABLE ( + gp_segment_id INT, + move_success BOOL, + oldpath TEXT, + newpath TEXT +) +LANGUAGE plpgsql AS $$ +BEGIN + -- lock pg_class so that no one will be adding/altering relfilenodes + LOCK TABLE pg_class IN SHARE MODE NOWAIT; + + -- make sure no other active/idle transaction is running + IF EXISTS ( + SELECT 1 + FROM (SELECT * from pg_stat_activity UNION ALL SELECT * FROM gp_dist_random('pg_stat_activity'))q + WHERE + sess_id <> -1 + AND sess_id <> current_setting('gp_session_id')::int -- Exclude the current session + ) THEN + RAISE EXCEPTION 'There is a client session running on one or more segment. Aborting...'; + END IF; + + -- force checkpoint to make sure we do not include files that are normally pending delete + CHECKPOINT; + + RETURN QUERY + SELECT + q.gp_segment_id, + q.move_success, + q.oldpath, + q.newpath + FROM ( + WITH OrphanedFiles AS ( + -- Coordinator + SELECT + o.gp_segment_id, + s.setting || '/' || o.filepath as oldpath, + target_location || '/seg' || o.gp_segment_id::text || '_' || REPLACE(o.filepath, '/', '_') as newpath + FROM __check_orphaned_files o, pg_settings s + WHERE s.name = 'data_directory' + UNION ALL + -- Segments + SELECT + o.gp_segment_id, + s.setting || '/' || o.filepath as oldpath, + target_location || '/seg' || o.gp_segment_id::text || '_' || REPLACE(o.filepath, '/', '_') as newpath + FROM gp_dist_random('__check_orphaned_files') o + JOIN (SELECT gp_execution_segment() as gp_segment_id, * FROM gp_dist_random('pg_settings')) s on o.gp_segment_id = s.gp_segment_id + WHERE s.name = 'data_directory' + ) + SELECT + OrphanedFiles.gp_segment_id, + OrphanedFiles.oldpath, + OrphanedFiles.newpath, + pg_file_rename(OrphanedFiles.oldpath, OrphanedFiles.newpath, NULL) AS move_success + FROM OrphanedFiles + ) q ORDER BY q.gp_segment_id, q.oldpath; +EXCEPTION + WHEN lock_not_available THEN + RAISE EXCEPTION 'cannot obtain SHARE lock on pg_class'; + WHEN OTHERS THEN + RAISE; +END; +$$; + -------------------------------------------------------------------------------- -- @view: -- __check_missing_files @@ -393,3 +482,4 @@ SELECT -1 AS gp_segment_id, * FROM __check_missing_files; -- not checking ext on coordinator GRANT SELECT ON gp_check_missing_files_ext TO public; + diff --git a/gpcontrib/gp_check_functions/gp_check_functions.control b/gpcontrib/gp_check_functions/gp_check_functions.control index 8fe18ab67cbd..84a55eea4825 100644 --- a/gpcontrib/gp_check_functions/gp_check_functions.control +++ b/gpcontrib/gp_check_functions/gp_check_functions.control @@ -1,5 +1,5 @@ # gp_check_functions extension comment = 'various GPDB helper views/functions' -default_version = '1.0.0' +default_version = '1.1' relocatable = true diff --git a/src/test/regress/input/gp_check_files.source b/src/test/regress/input/gp_check_files.source index b4969f509700..47d078bef04a 100644 --- a/src/test/regress/input/gp_check_files.source +++ b/src/test/regress/input/gp_check_files.source @@ -7,6 +7,12 @@ -- s/aocsseg_\d+/aocsseg_xxx/g -- m/aovisimap_\d+/ -- s/aovisimap_\d+/aovisimap_xxx/g +-- m/seg1_pg_tblspc_.*/ +-- s/seg1_pg_tblspc_.*/seg1_pg_tblspc_XXX/g +-- m/ERROR\: could not rename .*/ +-- s/ERROR\: could not rename .*/ERROR\: could not rename XXX/g +-- m/ERROR\: cannot rename .*/ +-- s/ERROR\: cannot rename .*/ERROR\: cannot rename XXX/g -- end_matchsubs create extension gp_check_functions; @@ -40,10 +46,15 @@ $$ LANGUAGE plpgsql; CREATE TABLESPACE checkfile_ts LOCATION '@testtablespace@'; set default_tablespace = checkfile_ts; --- create a table that we'll delete the files +-- create a table that we'll delete the files to test missing files. +-- this have to be created beforehand in order for the tablespace directories to be created. CREATE TABLE checkmissing_heap(a int, b int, c int); insert into checkmissing_heap select i,i,i from generate_series(1,100)i; +-- +-- Tests for orphaned files +-- + -- go to seg1's data directory for the tablespace we just created \cd @testtablespace@ select dbid from gp_segment_configuration where content = 1 and role = 'p' \gset @@ -62,9 +73,57 @@ set client_min_messages = ERROR; select gp_segment_id, filename from run_orphaned_files_view(); reset client_min_messages; --- remove the orphaned files so not affect subsequent tests -\! rm 987654 -\! rm 987654.3 +-- test moving the orphaned files + +-- firstly, should not move anything if the target directory doesn't exist +select * from gp_move_orphaned_files('@testtablespace@/non_exist_dir'); +select gp_segment_id, filename from run_orphaned_files_view(); + +-- should also fail to move if no proper permission to the target directory +\! mkdir @testtablespace@/moving_orphaned_file_test +\! chmod 000 @testtablespace@/moving_orphaned_file_test +select * from gp_move_orphaned_files('@testtablespace@/moving_orphaned_file_test'); +select gp_segment_id, filename from run_orphaned_files_view(); + +-- should not allow non-superuser to run, +-- though it would complain as soon as non-superuser tries to lock pg_class in gp_move_orphaned_files +create role check_file_test_role nosuperuser; +set role = check_file_test_role; +select * from gp_move_orphaned_files('@testtablespace@/moving_orphaned_file_test'); +reset role; +drop role check_file_test_role; + +\! chmod 700 @testtablespace@/moving_orphaned_file_test +-- should correctly move the orphaned files, +-- filter out exact paths as that could vary +\a +select gp_segment_id, move_success, regexp_replace(oldpath, '^.*/(.+)$', '\1') as oldpath, regexp_replace(newpath, '^.*/(.+)$', '\1') as newpath +from gp_move_orphaned_files('@testtablespace@/moving_orphaned_file_test'); +\a + +-- The moved orphaned files are in the target directory tree with a name that indicates its original location in data directory +\cd @testtablespace@/moving_orphaned_file_test/ + +-- should see the orphaned files being moved +\! ls +-- no orphaned files can be found now +select gp_segment_id, filename from run_orphaned_files_view(); + +-- should not affect existing tables +select count(*) from checkmissing_heap; + +-- go back to the valid data directory +\cd @testtablespace@ +select dbid from gp_segment_configuration where content = 1 and role = 'p' \gset +\cd :dbid +select get_tablespace_version_directory_name() as version_dir \gset +\cd :version_dir +select oid from pg_database where datname = current_database() \gset +\cd :oid + +-- +-- Tests for missing files +-- -- Now remove the data file for the table we just created. -- But check to see if the working directory is what we expect (under diff --git a/src/test/regress/output/gp_check_files.source b/src/test/regress/output/gp_check_files.source index 4d638b8c24db..105601328931 100644 --- a/src/test/regress/output/gp_check_files.source +++ b/src/test/regress/output/gp_check_files.source @@ -6,6 +6,12 @@ -- s/aocsseg_\d+/aocsseg_xxx/g -- m/aovisimap_\d+/ -- s/aovisimap_\d+/aovisimap_xxx/g +-- m/seg1_pg_tblspc_.*/ +-- s/seg1_pg_tblspc_.*/seg1_pg_tblspc_XXX/g +-- m/ERROR\: could not rename .*/ +-- s/ERROR\: could not rename .*/ERROR\: could not rename XXX/g +-- m/ERROR\: cannot rename .*/ +-- s/ERROR\: cannot rename .*/ERROR\: cannot rename XXX/g -- end_matchsubs create extension gp_check_functions; -- helper function to repeatedly run gp_check_orphaned_files for up to 10 minutes, @@ -35,9 +41,13 @@ $$ LANGUAGE plpgsql; -- we'll use a specific tablespace to test CREATE TABLESPACE checkfile_ts LOCATION '@testtablespace@'; set default_tablespace = checkfile_ts; --- create a table that we'll delete the files +-- create a table that we'll delete the files to test missing files. +-- this have to be created beforehand in order for the tablespace directories to be created. CREATE TABLE checkmissing_heap(a int, b int, c int); insert into checkmissing_heap select i,i,i from generate_series(1,100)i; +-- +-- Tests for orphaned files +-- -- go to seg1's data directory for the tablespace we just created \cd @testtablespace@ select dbid from gp_segment_configuration where content = 1 and role = 'p' \gset @@ -59,9 +69,81 @@ select gp_segment_id, filename from run_orphaned_files_view(); (2 rows) reset client_min_messages; --- remove the orphaned files so not affect subsequent tests -\! rm 987654 -\! rm 987654.3 +-- test moving the orphaned files +-- firstly, should not move anything if the target directory doesn't exist +select * from gp_move_orphaned_files('@testtablespace@/non_exist_dir'); +ERROR: could not rename XXX +select gp_segment_id, filename from run_orphaned_files_view(); + gp_segment_id | filename +---------------+---------- + 1 | 987654.3 + 1 | 987654 +(2 rows) + +-- should also fail to move if no proper permission to the target directory +\! mkdir @testtablespace@/moving_orphaned_file_test +\! chmod 000 @testtablespace@/moving_orphaned_file_test +select * from gp_move_orphaned_files('@testtablespace@/moving_orphaned_file_test'); +ERROR: cannot rename XXX +CONTEXT: PL/pgSQL function gp_move_orphaned_files(text) line 20 at RETURN QUERY +select gp_segment_id, filename from run_orphaned_files_view(); + gp_segment_id | filename +---------------+---------- + 1 | 987654.3 + 1 | 987654 +(2 rows) + +-- should not allow non-superuser to run, +-- though it would complain as soon as non-superuser tries to lock pg_class in gp_move_orphaned_files +create role check_file_test_role nosuperuser; +set role = check_file_test_role; +select * from gp_move_orphaned_files('@testtablespace@/moving_orphaned_file_test'); +ERROR: permission denied for relation pg_class +CONTEXT: SQL statement "LOCK TABLE pg_class IN SHARE MODE NOWAIT" +PL/pgSQL function gp_move_orphaned_files(text) line 4 at SQL statement +reset role; +drop role check_file_test_role; +\! chmod 700 @testtablespace@/moving_orphaned_file_test +-- should correctly move the orphaned files, +-- filter out exact paths as that could vary +\a +select gp_segment_id, move_success, regexp_replace(oldpath, '^.*/(.+)$', '\1') as oldpath, regexp_replace(newpath, '^.*/(.+)$', '\1') as newpath +from gp_move_orphaned_files('@testtablespace@/moving_orphaned_file_test'); +gp_segment_id|move_success|oldpath|newpath +1|t|987654|seg1_pg_tblspc_17816_GPDB_6_302307241_17470_987654 +1|t|987654.3|seg1_pg_tblspc_17816_GPDB_6_302307241_17470_987654.3 +(2 rows) +\a +-- The moved orphaned files are in the target directory tree with a name that indicates its original location in data directory +\cd @testtablespace@/moving_orphaned_file_test/ +-- should see the orphaned files being moved +\! ls +seg1_pg_tblspc_37385_GPDB_6_302307241_37039_987654 +seg1_pg_tblspc_37385_GPDB_6_302307241_37039_987654.3 +-- no orphaned files can be found now +select gp_segment_id, filename from run_orphaned_files_view(); + gp_segment_id | filename +---------------+---------- +(0 rows) + +-- should not affect existing tables +select count(*) from checkmissing_heap; + count +------- + 100 +(1 row) + +-- go back to the valid data directory +\cd @testtablespace@ +select dbid from gp_segment_configuration where content = 1 and role = 'p' \gset +\cd :dbid +select get_tablespace_version_directory_name() as version_dir \gset +\cd :version_dir +select oid from pg_database where datname = current_database() \gset +\cd :oid +-- +-- Tests for missing files +-- -- Now remove the data file for the table we just created. -- But check to see if the working directory is what we expect (under -- the test tablespace). Also just delete one and only one file that From 460b94790f75b9aec221c13683d6e29b62f6bf01 Mon Sep 17 00:00:00 2001 From: Wenlin Zhang Date: Wed, 1 Nov 2023 10:08:57 +0800 Subject: [PATCH 064/106] [6X]Utility tool for EL7 -> EL8 OS upgrade (#16312) * This PR adds a utility tool for EL7 -> EL8 OS upgrade. It is mainly due to collated data change after the glibc upgrade. https://wiki.postgresql.org/wiki/Locale_data_changes. The utility tool includes precheck-index, precheck-table, and migrate commands. 1. precheck-index is used for detecting all indexes under collated types(like text, varchar, char) that need to be reindexed (including catalog and user indexes). 2. precheck-table is used for detecting range-partitioned tables using those types in the partition key, and the rows are not in the expected partitions after the OS upgrade. 3. migrate is used to handle the above indexes and tables. If needs to reindex, then reindex. If needs repartition, then re-partition the tables. Also, for `precheck-table` it has one more option `--pre_upgrade` Before the OS upgrade, the `precheck-table` should use the option `--pre_upgrade`, it will dump all the range-partitioned tables using built-in collatable types in the partition key to the output file. After the OS upgrade, `precheck-table` doesn't need the option, and it will do the second check by using the GUC, to verify that all rows are still in the correct partitions and it will dump the check failed tables into the output file After the user precheck-table or precheck-index and dump the results into the specified out file (eg: table.out) Users can use the migrate command to do the postfix for migrate locale or run psql -f table.out directly. Modify the makefile and install the file into the @bindir@. --- gpMgmt/bin/Makefile | 2 +- gpMgmt/bin/el8_migrate_locale/Makefile | 17 + gpMgmt/bin/el8_migrate_locale/README.md | 213 ++++++++ .../el8_migrate_locale/el8_migrate_locale.py | 517 ++++++++++++++++++ gpMgmt/bin/el8_migrate_locale/test.sql | 261 +++++++++ 5 files changed, 1009 insertions(+), 1 deletion(-) create mode 100644 gpMgmt/bin/el8_migrate_locale/Makefile create mode 100644 gpMgmt/bin/el8_migrate_locale/README.md create mode 100644 gpMgmt/bin/el8_migrate_locale/el8_migrate_locale.py create mode 100644 gpMgmt/bin/el8_migrate_locale/test.sql diff --git a/gpMgmt/bin/Makefile b/gpMgmt/bin/Makefile index 5540bd6efe46..ced907b69490 100644 --- a/gpMgmt/bin/Makefile +++ b/gpMgmt/bin/Makefile @@ -7,7 +7,7 @@ ifneq "$(wildcard $(top_builddir)/src/Makefile.global)" "" include $(top_builddir)/src/Makefile.global endif -SUBDIRS = stream gpcheckcat_modules gpconfig_modules gpssh_modules gppylib lib +SUBDIRS = stream gpcheckcat_modules gpconfig_modules gpssh_modules gppylib lib el8_migrate_locale SUBDIRS += ifaddrs $(recurse) diff --git a/gpMgmt/bin/el8_migrate_locale/Makefile b/gpMgmt/bin/el8_migrate_locale/Makefile new file mode 100644 index 000000000000..a5136f05c68c --- /dev/null +++ b/gpMgmt/bin/el8_migrate_locale/Makefile @@ -0,0 +1,17 @@ +# gpMgmt/bin/el8_migrate_locale/Makefile + +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +installdirs: + $(MKDIR_P) '$(DESTDIR)$(bindir)/el8_migrate_locale' + +install: installdirs + $(INSTALL_SCRIPT) el8_migrate_locale.py '$(DESTDIR)$(bindir)/el8_migrate_locale/'; + $(INSTALL_SCRIPT) README.md '$(DESTDIR)$(bindir)/el8_migrate_locale/'; + +uninstall: + rm -rf '$(DESTDIR)$(bindir)/el8_migrate_locale/'; + +clean distclean: + rm -f *.pyc diff --git a/gpMgmt/bin/el8_migrate_locale/README.md b/gpMgmt/bin/el8_migrate_locale/README.md new file mode 100644 index 000000000000..844203bf6baa --- /dev/null +++ b/gpMgmt/bin/el8_migrate_locale/README.md @@ -0,0 +1,213 @@ +1. use `python el8_migrate_locale.py precheck-index` to list affected indexes. +2. use `python el8_migrate_locale.py precheck-table` to list affected partitioned tables. +3. use `python el8_migrate_locale.py migrate` to run the reindex and alter partition table commands. + +(Note: For easier reading, some example output is omitted with ellipses.) + +``` +$ python el8_migrate_locale.py --help +usage: el8_migrate_locale [-h] [--host HOST] [--port PORT] + [--dbname DBNAME] [--user USER] + {precheck-index,precheck-table,migrate} ... + +positional arguments: + {precheck-index,precheck-table,migrate} + sub-command help + precheck-index list affected index + precheck-table list affected tables + migrate run the reindex and the rebuild partition commands + +optional arguments: + -h, --help show this help message and exit + --host HOST Greenplum Database hostname + --port PORT Greenplum Database port + --dbname DBNAME Greenplum Database database name + --user USER Greenplum Database user name +``` +``` +$ python el8_migrate_locale.py precheck-index --help +usage: el8_migrate_locale precheck-index [-h] --out OUT + +optional arguments: + -h, --help show this help message and exit + +required arguments: + --out OUT outfile path for the reindex commands + +Example usage: + +$ python el8_migrate_locale.py precheck-index --out index.out +2023-10-18 11:04:13,944 - INFO - There are 2 catalog indexes that needs reindex when doing OS upgrade from EL7->EL8. +2023-10-18 11:04:14,001 - INFO - There are 7 user indexes in database test that needs reindex when doing OS upgrade from EL7->EL8. + +$ cat index.out +\c postgres +-- catalog indexrelid: 3597 | index name: pg_seclabel_object_index | table name: pg_seclabel | collname: default | indexdef: CREATE UNIQUE INDEX pg_seclabel_object_index ON pg_catalog.pg_seclabel USING btree (objoid, classoid, objsubid, provider) +reindex index pg_seclabel_object_index; + +-- catalog indexrelid: 3593 | index name: pg_shseclabel_object_index | table name: pg_shseclabel | collname: default | indexdef: CREATE UNIQUE INDEX pg_shseclabel_object_index ON pg_catalog.pg_shseclabel USING btree (objoid, classoid, provider) +reindex index pg_shseclabel_object_index; + +\c test +-- indexrelid: 16512 | index name: testupgrade.hash_idx1 | table name: testupgrade.hash_test1 | collname: default | indexdef: CREATE INDEX hash_idx1 ON testupgrade.hash_test1 USING btree (content) +reindex index testupgrade.hash_idx1; +... +``` +``` +$ python el8_migrate_locale.py precheck-table --help +usage: el8_migrate_locale precheck-table [-h] --out OUT [--pre_upgrade] + [--order_size_ascend] + [--nthread NTHREAD] + +optional arguments: + -h, --help show this help message and exit + --pre_upgrade check tables before os upgrade to EL8 + --order_size_ascend sort the tables by size in ascending order + --nthread NTHREAD the concurrent threads to check partition tables + +Notes: there is a new option pre_upgrade, which is used for step1 before OS upgrade, and it will print all the potential affected partition tables. + +Example usage for check before OS upgrade: +$ python el8_migrate_locale.py precheck-table --pre_upgrade --out table_pre_upgrade.out +2023-10-18 08:04:06,907 - INFO - There are 6 partitioned tables in database testupgrade that should be checked when doing OS upgrade from EL7->EL8. +2023-10-18 08:04:06,947 - WARNING - no default partition for testupgrade.partition_range_test_3 +2023-10-18 08:04:06,984 - WARNING - no default partition for testupgrade.partition_range_test_ao +2023-10-18 08:04:07,021 - WARNING - no default partition for testupgrade.partition_range_test_2 +2023-10-18 08:04:07,100 - WARNING - no default partition for testupgrade.root +--------------------------------------------- +total partition tables size : 416 KB +total partition tables : 6 +total leaf partitions : 19 +--------------------------------------------- + +Example usage for check after OS upgrade: +$ python el8_migrate_locale.py precheck-table --out table.out +2023-10-16 04:12:19,064 - WARNING - There are 2 tables in database test that the distribution key is using custom operator class, should be checked when doing OS upgrade from EL7->EL8. +--------------------------------------------- +tablename | distclass +('testdiskey', 16397) +('testupgrade.test_citext', 16454) +--------------------------------------------- +2023-10-16 04:12:19,064 - INFO - There are 6 partitioned tables in database testupgrade that should be checked when doing OS upgrade from EL7->EL8. +2023-10-16 04:12:19,066 - INFO - worker[0]: begin: +2023-10-16 04:12:19,066 - INFO - worker[0]: connect to ... +2023-10-16 04:12:19,110 - INFO - start checking table testupgrade.partition_range_test_3_1_prt_mar ... +2023-10-16 04:12:19,162 - INFO - check table testupgrade.partition_range_test_3_1_prt_mar OK. +2023-10-16 04:12:19,162 - INFO - start checking table testupgrade.partition_range_test_3_1_prt_feb ... +2023-10-16 04:12:19,574 - INFO - check table testupgrade.partition_range_test_3_1_prt_feb error out: ERROR: trying to insert row into wrong partition (seg1 10.0.138.96:20001 pid=3975) +DETAIL: Expected partition: partition_range_test_3_1_prt_mar, provided partition: partition_range_test_3_1_prt_feb. + +2023-10-16 04:12:19,575 - INFO - start checking table testupgrade.partition_range_test_3_1_prt_jan ... +2023-10-16 04:12:19,762 - INFO - check table testupgrade.partition_range_test_3_1_prt_jan error out: ERROR: trying to insert row into wrong partition (seg1 10.0.138.96:20001 pid=3975) +DETAIL: Expected partition: partition_range_test_3_1_prt_feb, provided partition: partition_range_test_3_1_prt_jan. + +2023-10-16 04:12:19,804 - WARNING - no default partition for testupgrade.partition_range_test_3 +... +2023-10-16 04:12:22,058 - INFO - Current progress: have 0 remaining, 2.77 seconds passed. +2023-10-16 04:12:22,058 - INFO - worker[0]: finish. +--------------------------------------------- +total partition tables size : 416 KB +total partition tables : 6 +total leaf partitions : 19 +--------------------------------------------- + +Example Usage for using nthreads (check passed example): +$ python el8_migrate_locale.py precheck-table --out table.out --nthread 3 +2023-10-18 11:19:11,717 - INFO - There are 4 partitioned tables in database test that should be checked when doing OS upgrade from EL7->EL8. +2023-10-18 11:19:11,718 - INFO - worker[0]: begin: +2023-10-18 11:19:11,718 - INFO - worker[0]: connect to ... +2023-10-18 11:19:11,718 - INFO - worker[1]: begin: +2023-10-18 11:19:11,719 - INFO - worker[1]: connect to ... +2023-10-18 11:19:11,718 - INFO - worker[2]: begin: +2023-10-18 11:19:11,719 - INFO - worker[2]: connect to ... +2023-10-18 11:19:11,744 - INFO - start checking table testupgrade.partition_range_test_1_1_prt_mar ... +2023-10-18 11:19:11,745 - INFO - start checking table testupgrade.partition_range_test_ao_1_prt_mar ... +2023-10-18 11:19:11,746 - INFO - start checking table testupgrade.partition_range_test_2_1_prt_mar ... +2023-10-18 11:19:11,749 - INFO - check table testupgrade.partition_range_test_1_1_prt_mar OK. +2023-10-18 11:19:11,749 - INFO - start checking table testupgrade.partition_range_test_1_1_prt_feb ... +2023-10-18 11:19:11,751 - INFO - check table testupgrade.partition_range_test_ao_1_prt_mar OK. +2023-10-18 11:19:11,751 - INFO - start checking table testupgrade.partition_range_test_ao_1_prt_feb ... +2023-10-18 11:19:11,751 - INFO - check table testupgrade.partition_range_test_2_1_prt_mar OK. +2023-10-18 11:19:11,751 - INFO - start checking table testupgrade.partition_range_test_2_1_prt_feb ... +2023-10-18 11:19:11,752 - INFO - check table testupgrade.partition_range_test_1_1_prt_feb OK. +2023-10-18 11:19:11,752 - INFO - start checking table testupgrade.partition_range_test_1_1_prt_others ... +2023-10-18 11:19:11,754 - INFO - check table testupgrade.partition_range_test_2_1_prt_feb OK. +2023-10-18 11:19:11,754 - INFO - start checking table testupgrade.partition_range_test_2_1_prt_jan ... +2023-10-18 11:19:11,755 - INFO - check table testupgrade.partition_range_test_1_1_prt_others OK. +2023-10-18 11:19:11,755 - INFO - check table testupgrade.partition_range_test_ao_1_prt_feb OK. +2023-10-18 11:19:11,755 - INFO - start checking table testupgrade.partition_range_test_ao_1_prt_jan ... +2023-10-18 11:19:11,756 - INFO - Current progress: have 1 remaining, 0.97 seconds passed. +2023-10-18 11:19:11,757 - INFO - check table testupgrade.partition_range_test_2_1_prt_jan OK. +2023-10-18 11:19:11,758 - INFO - Current progress: have 0 remaining, 0.99 seconds passed. +2023-10-18 11:19:11,758 - INFO - worker[2]: finish. +2023-10-18 11:19:11,761 - INFO - check table testupgrade.partition_range_test_ao_1_prt_jan OK. +2023-10-18 11:19:11,761 - INFO - Current progress: have 0 remaining, 1.07 seconds passed. +2023-10-18 11:19:11,761 - INFO - worker[1]: finish. +2023-10-18 11:19:11,763 - INFO - start checking table testupgrade.root_1_prt_mar ... +2023-10-18 11:19:11,766 - INFO - check table testupgrade.root_1_prt_mar OK. +2023-10-18 11:19:11,767 - INFO - start checking table testupgrade.root_1_prt_feb ... +2023-10-18 11:19:11,769 - INFO - check table testupgrade.root_1_prt_feb OK. +2023-10-18 11:19:11,770 - INFO - start checking table testupgrade.root_1_prt_jan ... +2023-10-18 11:19:11,772 - INFO - check table testupgrade.root_1_prt_jan OK. +2023-10-18 11:19:11,773 - INFO - Current progress: have 0 remaining, 1.4 seconds passed. +2023-10-18 11:19:11,773 - INFO - worker[0]: finish. +--------------------------------------------- +total partition tables size : 0 Bytes +total partition tables : 0 +total leaf partitions : 0 +--------------------------------------------- + +$ cat table.out +-- order table by size in descending order +\c testupgrade + +-- parrelid: 16649 | coll: 100 | attname: date | msg: partition table, 3 leafs, size 98304 +begin; create temp table "testupgrade.partition_range_test_3_bak" as select * from testupgrade.partition_range_test_3; truncate testupgrade.partition_range_test_3; insert into testupgrade.partition_range_test_3 select * from "testupgrade.partition_range_test_3_bak"; commit; +... + +``` +``` +$ python el8_migrate_locale.py migrate --help +usage: el8_migrate_locale migrate [-h] --input INPUT + +optional arguments: + -h, --help show this help message and exit + +required arguments: + --input INPUT the file contains reindex or rebuild partition commands + +Example usage for migrate index: +$ python el8_migrate_locale.py migrate --input index.out +2023-10-16 04:12:02,461 - INFO - db: testupgrade, total have 7 commands to execute +2023-10-16 04:12:02,467 - INFO - db: testupgrade, executing command: reindex index testupgrade.test_id1; +2023-10-16 04:12:02,541 - INFO - db: testupgrade, executing command: reindex index testupgrade.test_id2; +2023-10-16 04:12:02,566 - INFO - db: testupgrade, executing command: reindex index testupgrade.test_id3; +2023-10-16 04:12:02,592 - INFO - db: testupgrade, executing command: reindex index testupgrade.test_citext_pkey; +2023-10-16 04:12:02,623 - INFO - db: testupgrade, executing command: reindex index testupgrade.test_idx_citext; +2023-10-16 04:12:02,647 - INFO - db: testupgrade, executing command: reindex index testupgrade.hash_idx1; +2023-10-16 04:12:02,673 - INFO - db: testupgrade, executing command: reindex index testupgrade.idx_projecttag; +2023-10-16 04:12:02,692 - INFO - db: postgres, total have 2 commands to execute +2023-10-16 04:12:02,698 - INFO - db: postgres, executing command: reindex index pg_seclabel_object_index; +2023-10-16 04:12:02,730 - INFO - db: postgres, executing command: reindex index pg_shseclabel_object_index; +2023-10-16 04:12:02,754 - INFO - All done + +Example usage for migrate tables: +$ python el8_migrate_locale.py migrate --input table.out +2023-10-16 04:14:17,003 - INFO - db: testupgrade, total have 6 commands to execute +2023-10-16 04:14:17,009 - INFO - db: testupgrade, executing command: begin; create temp table "testupgrade.partition_range_test_3_bak" as select * from testupgrade.partition_range_test_3; truncate testupgrade.partition_range_test_3; insert into testupgrade.partition_range_test_3 select * from "testupgrade.partition_range_test_3_bak"; commit; +2023-10-16 04:14:17,175 - INFO - db: testupgrade, executing analyze command: analyze testupgrade.partition_range_test_3;; +2023-10-16 04:14:17,201 - INFO - db: testupgrade, executing command: begin; create temp table "testupgrade.partition_range_test_2_bak" as select * from testupgrade.partition_range_test_2; truncate testupgrade.partition_range_test_2; insert into testupgrade.partition_range_test_2 select * from "testupgrade.partition_range_test_2_bak"; commit; +2023-10-16 04:14:17,490 - ERROR - ERROR: no partition for partitioning key (seg1 10.0.138.96:20001 pid=4028) + +2023-10-16 04:14:17,497 - INFO - db: testupgrade, executing command: begin; create temp table "testupgrade.partition_range_test_4_bak" as select * from testupgrade.partition_range_test_4; truncate testupgrade.partition_range_test_4; insert into testupgrade.partition_range_test_4 select * from "testupgrade.partition_range_test_4_bak"; commit; +2023-10-16 04:14:17,628 - INFO - db: testupgrade, executing analyze command: analyze testupgrade.partition_range_test_4;; +2023-10-16 04:14:17,660 - INFO - db: testupgrade, executing command: begin; create temp table "testupgrade.partition_range_test_1_bak" as select * from testupgrade.partition_range_test_1; truncate testupgrade.partition_range_test_1; insert into testupgrade.partition_range_test_1 select * from "testupgrade.partition_range_test_1_bak"; commit; +2023-10-16 04:14:17,784 - INFO - db: testupgrade, executing analyze command: analyze testupgrade.partition_range_test_1;; +2023-10-16 04:14:17,808 - INFO - db: testupgrade, executing command: begin; create temp table "testupgrade.root_bak" as select * from testupgrade.root; truncate testupgrade.root; insert into testupgrade.root select * from "testupgrade.root_bak"; commit; +2023-10-16 04:14:17,928 - INFO - db: testupgrade, executing analyze command: analyze testupgrade.root;; +2023-10-16 04:14:17,952 - INFO - db: testupgrade, executing command: begin; create temp table "testupgrade.partition_range_test_ao_bak" as select * from testupgrade.partition_range_test_ao; truncate testupgrade.partition_range_test_ao; insert into testupgrade.partition_range_test_ao select * from "testupgrade.partition_range_test_ao_bak"; commit; +2023-10-16 04:14:18,276 - ERROR - ERROR: no partition for partitioning key (seg1 10.0.138.96:20001 pid=4060) + +2023-10-16 04:14:18,277 - INFO - All done +``` + diff --git a/gpMgmt/bin/el8_migrate_locale/el8_migrate_locale.py b/gpMgmt/bin/el8_migrate_locale/el8_migrate_locale.py new file mode 100644 index 000000000000..f52a762a8246 --- /dev/null +++ b/gpMgmt/bin/el8_migrate_locale/el8_migrate_locale.py @@ -0,0 +1,517 @@ +#!/usr/bin/env python +#!-*- coding: utf-8 -*- +import argparse +import sys +from pygresql.pg import DB +import logging +import signal +from multiprocessing import Queue +from threading import Thread, Lock +import time +import string +from collections import defaultdict +import os +import re +try: + from pygresql import pg +except ImportError, e: + sys.exit('ERROR: Cannot import modules. Please check that you have sourced greenplum_path.sh. Detail: ' + str(e)) + +class connection(object): + def __init__(self, host, port, dbname, user): + self.host = host + self.port = port + self.dbname = dbname + self.user = user + + def _get_pg_port(self, port): + if port is not None: + return port + try: + port = os.environ.get('PGPORT') + if not port: + port = self.get_port_from_conf() + return int(port) + except: + sys.exit("No port has been set, please set env PGPORT or MASTER_DATA_DIRECTORY or specify the port in the command line") + + def get_port_from_conf(self): + datadir = os.environ.get('MASTER_DATA_DIRECTORY') + if datadir: + file = datadir +'/postgresql.conf' + if os.path.isfile(file): + with open(file) as f: + for line in f.xreadlines(): + match = re.search('port=\d+',line) + if match: + match1 = re.search('\d+', match.group()) + if match1: + return match1.group() + + def get_default_db_conn(self): + db = DB(dbname=self.dbname, + host=self.host, + port=self._get_pg_port(self.port), + user=self.user) + return db + + def get_db_conn(self, dbname): + db = DB(dbname=dbname, + host=self.host, + port=self._get_pg_port(self.port), + user=self.user) + return db + + def get_db_list(self): + db = self.get_default_db_conn() + sql = "select datname from pg_database where datname not in ('template0');" + dbs = [datname for datname, in db.query(sql).getresult()] + db.close + return dbs + +class CheckIndexes(connection): + def get_affected_user_indexes(self, dbname): + db = self.get_db_conn(dbname) + # The built-in collatable data types are text,varchar,and char, and the indcollation contains the OID of the collation + # to use for the index, or zero if the column is not of a collatable data type. + sql = """ + SELECT distinct(indexrelid), indexrelid::regclass::text as indexname, indrelid::regclass::text as tablename, collname, pg_get_indexdef(indexrelid) +FROM (SELECT indexrelid, indrelid, indcollation[i] coll FROM pg_index, generate_subscripts(indcollation, 1) g(i)) s +JOIN pg_collation c ON coll=c.oid +WHERE collname != 'C' and collname != 'POSIX' and indexrelid >= 16384; + """ + index = db.query(sql).getresult() + if index: + logger.info("There are {} user indexes in database {} that needs reindex when doing OS upgrade from EL7->EL8.".format(len(index), dbname)) + db.close() + return index + + def get_affected_catalog_indexes(self): + db = self.get_default_db_conn() + sql = """ + SELECT distinct(indexrelid), indexrelid::regclass::text as indexname, indrelid::regclass::text as tablename, collname, pg_get_indexdef(indexrelid) +FROM (SELECT indexrelid, indrelid, indcollation[i] coll FROM pg_index, generate_subscripts(indcollation, 1) g(i)) s +JOIN pg_collation c ON coll=c.oid +WHERE collname != 'C' and collname != 'POSIX' and indexrelid < 16384; + """ + index = db.query(sql).getresult() + if index: + logger.info("There are {} catalog indexes that needs reindex when doing OS upgrade from EL7->EL8.".format(len(index))) + db.close() + return index + + def handle_one_index(self, name): + # no need to handle special charactor here, because the name will include the double quotes if it has special charactors. + sql = """ + reindex index {}; + """.format(name) + return sql.strip() + + def dump_index_info(self, fn): + dblist = self.get_db_list() + f = open(fn, "w") + + # print all catalog indexes that might be affected. + cindex = self.get_affected_catalog_indexes() + if cindex: + print>>f, "\c ", self.dbname + for indexrelid, indexname, tablename, collname, indexdef in cindex: + print>>f, "-- catalog indexrelid:", indexrelid, "| index name:", indexname, "| table name:", tablename, "| collname:", collname, "| indexdef: ", indexdef + print>>f, self.handle_one_index(indexname) + print>>f + + # print all user indexes in all databases that might be affected. + for dbname in dblist: + index = self.get_affected_user_indexes(dbname) + if index: + print>>f, "\c ", dbname + for indexrelid, indexname, tablename, collname, indexdef in index: + print>>f, "-- indexrelid:", indexrelid, "| index name:", indexname, "| table name:", tablename, "| collname:", collname, "| indexdef: ", indexdef + print>>f, self.handle_one_index(indexname) + print>>f + + f.close() + +class CheckTables(connection): + def __init__(self, host, port, dbname, user, order_size_ascend, nthread, pre_upgrade): + self.host = host + self.port = port + self.dbname = dbname + self.user = user + self.order_size_ascend = order_size_ascend + self.nthread = nthread + self.filtertabs = [] + self.filtertabslock = Lock() + self.total_leafs = 0 + self.total_roots = 0 + self.total_root_size = 0 + self.lock = Lock() + self.qlist = Queue() + self.pre_upgrade = pre_upgrade + signal.signal(signal.SIGTERM, self.sig_handler) + signal.signal(signal.SIGINT, self.sig_handler) + + def get_affected_partitioned_tables(self, dbname): + db = self.get_db_conn(dbname) + # The built-in collatable data types are text,varchar,and char, and the defined collation of the column, or zero if the column is not of a collatable data type + # filter the partition by list, because only partiton by range might be affected. + sql = """ + WITH might_affected_tables AS ( + SELECT + prelid, + coll, + attname, + attnum, + parisdefault + FROM + ( + select + p.oid as poid, + p.parrelid as prelid, + t.attcollation coll, + t.attname as attname, + t.attnum as attnum + from + pg_partition p + join pg_attribute t on p.parrelid = t.attrelid + and t.attnum = ANY(p.paratts :: smallint[]) + and p.parkind = 'r' + ) s + JOIN pg_collation c ON coll = c.oid + JOIN pg_partition_rule r ON poid = r.paroid + WHERE + collname != 'C' and collname != 'POSIX' + ), + par_has_default AS ( + SELECT + prelid, + coll, + attname, + parisdefault + FROM + might_affected_tables group by (prelid, coll, attname, parisdefault) + ) + select prelid, prelid::regclass::text as partitionname, coll, attname, bool_or(parisdefault) as parhasdefault from par_has_default group by (prelid, coll, attname) ; + """ + tabs = db.query(sql).getresult() + db.close() + return tabs + + # get the tables which distribution column is using custom operator class, it may be affected by the OS upgrade, so give a warning. + def get_custom_opclass_as_distribute_keys_tables(self, dbname): + db = self.get_db_conn(dbname) + sql = """ + select table_oid::regclass::text as tablename, max(distclass) from (select localoid , unnest(distclass::int[]) distclass from gp_distribution_policy) x(table_oid, distclass) group by table_oid having max(distclass) > 16384; + """ + tables = db.query(sql).getresult() + if tables: + logger.warning("There are {} tables in database {} that the distribution key is using custom operator class, should be checked when doing OS upgrade from EL7->EL8.".format(len(tables), dbname)) + print "---------------------------------------------" + print "tablename | distclass" + for t in tables: + print t + print "---------------------------------------------" + db.close() + + # Escape double-quotes in a string, so that the resulting string is suitable for + # embedding as in SQL. Analogouous to libpq's PQescapeIdentifier + def escape_identifier(self, str): + # Does the string need quoting? Simple strings with all-lower case ASCII + # letters don't. + SAFE_RE = re.compile('[a-z][a-z0-9_]*$') + + if SAFE_RE.match(str): + return str + + # Otherwise we have to quote it. Any double-quotes in the string need to be escaped + # by doubling them. + return '"' + str.replace('"', '""') + '"' + + def handle_one_table(self, name): + bakname = "{}".format(self.escape_identifier(name + "_bak")) + sql = """ + begin; create temp table {1} as select * from {0}; truncate {0}; insert into {0} select * from {1}; commit; + """.format(name, bakname) + return sql.strip() + + def get_table_size_info(self, dbname, parrelid): + db = self.get_db_conn(dbname) + sql_size = """ + with recursive cte(nlevel, table_oid) as ( + select 0, {}::regclass::oid + union all + select nlevel+1, pi.inhrelid + from cte, pg_inherits pi + where cte.table_oid = pi.inhparent + ) + select sum(pg_relation_size(table_oid)) as size, count(1) as nleafs + from cte where nlevel = (select max(nlevel) from cte); + """ + r = db.query(sql_size.format(parrelid)) + size = r.getresult()[0][0] + nleafs = r.getresult()[0][1] + self.lock.acquire() + self.total_root_size += size + self.total_leafs += nleafs + self.total_roots += 1 + self.lock.release() + db.close() + return "partition table, %s leafs, size %s" % (nleafs, size), size + + def dump_tables(self, fn): + dblist = self.get_db_list() + f = open(fn, "w") + + for dbname in dblist: + table_info = [] + # check tables that the distribution columns are using custom operator class + self.get_custom_opclass_as_distribute_keys_tables(dbname) + + # get all the might-affected partitioned tables + tables = self.get_affected_partitioned_tables(dbname) + + if tables: + logger.info("There are {} partitioned tables in database {} that should be checked when doing OS upgrade from EL7->EL8.".format(len(tables), dbname)) + # if check before os upgrade, it will print the SQL results and doesn't do the GUC check. + if self.pre_upgrade: + for parrelid, tablename, coll, attname, has_default_partition in tables: + # get the partition table size info to estimate the time + msg, size = self.get_table_size_info(dbname, parrelid) + table_info.append((parrelid, tablename, coll, attname, msg, size)) + # if no default partition, give a warning, in case of migrate failed + if has_default_partition == 'f': + logger.warning("no default partition for {}".format(tablename)) + else: + # start multiple threads to check if the rows are still in the correct partitions after os upgrade, if check failed, add these tables to filtertabs + for t in tables: + # qlist is used by multiple threads + self.qlist.put(t) + self.concurrent_check(dbname) + table_info = self.filtertabs[:] + self.filtertabs = [] + + # dump the table info to the specified output file + if table_info: + print>>f, "-- order table by size in %s order " % 'ascending' if self.order_size_ascend else '-- order table by size in descending order' + print>>f, "\c ", dbname + print>>f + + # sort the tables by size + if self.order_size_ascend: + self.filtertabs.sort(key=lambda x: x[-1], reverse=False) + else: + self.filtertabs.sort(key=lambda x: x[-1], reverse=True) + + for result in table_info: + parrelid = result[0] + name = result[1] + coll = result[2] + attname = result[3] + msg = result[4] + print>>f, "-- parrelid:", parrelid, "| coll:", coll, "| attname:", attname, "| msg:", msg + print>>f, self.handle_one_table(name) + print>>f + + # print the total partition table size + self.print_size_summary_info() + + f.close() + + def print_size_summary_info(self): + print "---------------------------------------------" + KB = float(1024) + MB = float(KB ** 2) + GB = float(KB ** 3) + if self.total_root_size < KB: + print("total partition tables size : {} Bytes".format(int(float(self.total_root_size)))) + elif KB <= self.total_root_size < MB: + print("total partition tables size : {} KB".format(int(float(self.total_root_size) / KB))) + elif MB <= self.total_root_size < GB: + print("total partition tables size : {} MB".format(int(float(self.total_root_size) / MB))) + else: + print("total partition tables size : {} GB".format(int(float(self.total_root_size) / GB))) + + print("total partition tables : {}".format(self.total_roots)) + print("total leaf partitions : {}".format(self.total_leafs)) + print "---------------------------------------------" + + # start multiple threads to do the check + def concurrent_check(self, dbname): + threads = [] + for i in range(self.nthread): + t = Thread(target=CheckTables.check_partitiontables_by_guc, + args=[self, i, dbname]) + threads.append(t) + for t in threads: + t.start() + for t in threads: + t.join() + + def sig_handler(self, sig, arg): + sys.stderr.write("terminated by signal %s\n" % sig) + sys.exit(127) + + @staticmethod + # check these tables by using GUC gp_detect_data_correctness, dump the error tables to the output file + def check_partitiontables_by_guc(self, idx, dbname): + logger.info("worker[{}]: begin: ".format(idx)) + logger.info("worker[{}]: connect to <{}> ...".format(idx, dbname)) + start = time.time() + db = self.get_db_conn(dbname) + has_error = False + + while not self.qlist.empty(): + result = self.qlist.get() + parrelid = result[0] + tablename = result[1] + coll = result[2] + attname = result[3] + has_default_partition = result[4] + + try: + db.query("set gp_detect_data_correctness = 1;") + except Exception as e: + logger.warning("missing GUC gp_detect_data_correctness") + db.close() + + # get the leaf partition names + get_partitionname_sql = """ + with recursive cte(root_oid, table_oid, nlevel) as ( + select parrelid, parrelid, 0 from pg_partition where not paristemplate and parlevel = 0 + union all + select root_oid, pi.inhrelid, nlevel+1 + from cte, pg_inherits pi + where cte.table_oid = pi.inhparent + ) + select root_oid::regclass::text as tablename, table_oid::regclass::text as partitioname + from cte where nlevel = (select max(nlevel) from cte) and root_oid = {}; + """ + partitiontablenames = db.query(get_partitionname_sql.format(parrelid)).getresult() + for tablename, partitioname in partitiontablenames: + sql = "insert into {tab} select * from {tab}".format(tab=partitioname) + try: + logger.info("start checking table {tab} ...".format(tab=partitioname)) + db.query(sql) + logger.info("check table {tab} OK.".format(tab=partitioname)) + except Exception as e: + logger.info("check table {tab} error out: {err_msg}".format(tab=partitioname, err_msg=str(e))) + has_error = True + + # if check failed, dump the table to the specified out file. + if has_error: + # get the partition table size info to estimate the time + msg, size = self.get_table_size_info(dbname, parrelid) + self.filtertabslock.acquire() + self.filtertabs.append((parrelid, tablename, coll, attname, msg, size)) + self.filtertabslock.release() + has_error = False + if has_default_partition == 'f': + logger.warning("no default partition for {}".format(tablename)) + + db.query("set gp_detect_data_correctness = 0;") + + end = time.time() + total_time = end - start + logger.info("Current progress: have {} remaining, {} seconds passed.".format(self.qlist.qsize(), round(total_time, 2))) + + db.close() + logger.info("worker[{}]: finish.".format(idx)) + +class migrate(connection): + def __init__(self, dbname, port, host, user, script_file): + self.dbname = dbname + self.port = self._get_pg_port(port) + self.host = host + self.user = user + self.script_file = script_file + self.dbdict = defaultdict(list) + + self.parse_inputfile() + + def parse_inputfile(self): + with open(self.script_file) as f: + for line in f: + sql = line.strip() + if sql.startswith("\c"): + db_name = sql.split("\c")[1].strip() + if (sql.startswith("reindex") and sql.endswith(";") and sql.count(";") == 1): + self.dbdict[db_name].append(sql) + if (sql.startswith("begin;") and sql.endswith("commit;")): + self.dbdict[db_name].append(sql) + + def run(self): + try: + for db_name, commands in self.dbdict.items(): + total_counts = len(commands) + logger.info("db: {}, total have {} commands to execute".format(db_name, total_counts)) + for command in commands: + self.run_alter_command(db_name, command) + except KeyboardInterrupt: + sys.exit('\nUser Interrupted') + + logger.info("All done") + + def run_alter_command(self, db_name, command): + try: + db = self.get_db_conn(db_name) + logger.info("db: {}, executing command: {}".format(db_name, command)) + db.query(command) + + if (command.startswith("begin")): + pieces = [p for p in re.split("( |\\\".*?\\\"|'.*?')", command) if p.strip()] + index = pieces.index("truncate") + if 0 < index < len(pieces) - 1: + table_name = pieces[index+1] + analyze_sql = "analyze {};".format(table_name) + logger.info("db: {}, executing analyze command: {}".format(db_name, analyze_sql)) + db.query(analyze_sql) + + db.close() + except Exception, e: + logger.error("{}".format(str(e))) + +def parseargs(): + parser = argparse.ArgumentParser(prog='el8_migrate_locale') + parser.add_argument('--host', type=str, help='Greenplum Database hostname') + parser.add_argument('--port', type=int, help='Greenplum Database port') + parser.add_argument('--dbname', type=str, default='postgres', help='Greenplum Database database name') + parser.add_argument('--user', type=str, help='Greenplum Database user name') + + subparsers = parser.add_subparsers(help='sub-command help', dest='cmd') + parser_precheck_index = subparsers.add_parser('precheck-index', help='list affected index') + required = parser_precheck_index.add_argument_group('required arguments') + required.add_argument('--out', type=str, help='outfile path for the reindex commands', required=True) + + parser_precheck_table = subparsers.add_parser('precheck-table', help='list affected tables') + required = parser_precheck_table.add_argument_group('required arguments') + required.add_argument('--out', type=str, help='outfile path for the rebuild partition commands', required=True) + parser_precheck_table.add_argument('--pre_upgrade', action='store_true', help='check tables before os upgrade to EL8') + parser_precheck_table.add_argument('--order_size_ascend', action='store_true', help='sort the tables by size in ascending order') + parser_precheck_table.set_defaults(order_size_ascend=False) + parser_precheck_table.add_argument('--nthread', type=int, default=1, help='the concurrent threads to check partition tables') + + parser_run = subparsers.add_parser('migrate', help='run the reindex and the rebuild partition commands') + required = parser_run.add_argument_group('required arguments') + required.add_argument('--input', type=str, help='the file contains reindex or rebuild partition commands', required=True) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parseargs() + # initialize logger + logging.basicConfig(level=logging.DEBUG, stream=sys.stdout, format="%(asctime)s - %(levelname)s - %(message)s") + logger = logging.getLogger() + + if args.cmd == 'precheck-index': + ci = CheckIndexes(args.host, args.port, args.dbname, args.user) + ci.dump_index_info(args.out) + elif args.cmd == 'precheck-table': + ct = CheckTables(args.host, args.port, args.dbname, args.user, args.order_size_ascend, args.nthread, args.pre_upgrade) + ct.dump_tables(args.out) + elif args.cmd == 'migrate': + cr = migrate(args.dbname, args.port, args.host, args.user, args.input) + cr.run() + else: + sys.stderr.write("unknown subcommand!") + sys.exit(127) diff --git a/gpMgmt/bin/el8_migrate_locale/test.sql b/gpMgmt/bin/el8_migrate_locale/test.sql new file mode 100644 index 000000000000..99b4ce3044c5 --- /dev/null +++ b/gpMgmt/bin/el8_migrate_locale/test.sql @@ -0,0 +1,261 @@ +-- case1 test basic table and index with char/varchar/text type +CREATE TABLE test_character_type +( + char_1 CHAR(1), + varchar_10 VARCHAR(10), + txt TEXT +); + +INSERT INTO test_character_type (char_1) +VALUES ('Y ') RETURNING *; + +INSERT INTO test_character_type (varchar_10) +VALUES ('HelloWorld ') RETURNING *; + +INSERT INTO test_character_type (txt) +VALUES ('TEXT column can store a string of any length') RETURNING txt; + +create index "test_id1 's " on test_character_type (char_1); +create index "test_id2 \ $ \\" on test_character_type (varchar_10); +create index " test_id "" 3 " on test_character_type (txt); + +-- case2 test type citext; +create extension citext; +CREATE TABLE test_citext +( + nick CITEXT PRIMARY KEY, + pass TEXT NOT NULL +); + +INSERT INTO test_citext VALUES ('larry', random()::text); +INSERT INTO test_citext VALUES ('Tom', random()::text); +INSERT INTO test_citext VALUES ('Damian', random()::text); +INSERT INTO test_citext VALUES ('NEAL', random()::text); +INSERT INTO test_citext VALUES ('Bjørn', random()::text); + +create index test_idx_citext on test_citext (nick); + +----- case 3 test special case with $ +create table test1 +( + content varchar +) DISTRIBUTED by (content); +insert into test1 (content) +values ('a'), + ('$a'), + ('a$'), + ('b'), + ('$b'), + ('b$'), + ('A'), + ('B'); +create index id1 on test1 (content); + +---- case4 test speical case with '""' +CREATE TABLE hash_test +( + id int, + date text +) DISTRIBUTED BY (date); +insert into hash_test values (1, '01'); +insert into hash_test values (1, '"01"'); +insert into hash_test values (2, '"02"'); +insert into hash_test values (3, '02'); +insert into hash_test values (4, '03'); + +---- case5 test speical case with 1-1 vs 11 +CREATE TABLE test2 +( + id int, + date text +) DISTRIBUTED BY (id) +PARTITION BY RANGE (date) +( START (text '01-01') INCLUSIVE + END (text '11-01') EXCLUSIVE + ); + +insert into test2 +values (2, '02-1'), + (2, '03-1'), + (2, '08-1'), + (2, '09-01'), + (1, '11'), + (1, '1-1'); + +--- case6 test range partition with special character '“”' +CREATE TABLE partition_range_test +( + id int, + date text +) DISTRIBUTED BY (id) +PARTITION BY RANGE (date) + (PARTITION Jan START ( '01') INCLUSIVE , + PARTITION Feb START ( '02') INCLUSIVE , + PARTITION Mar START ( '03') INCLUSIVE + END ( '04') EXCLUSIVE); + +insert into partition_range_test values (1, '01'); +insert into partition_range_test values (1, '"01"'); +insert into partition_range_test values (2, '"02"'); +insert into partition_range_test values (2, '02'); +insert into partition_range_test values (3, '03'); +insert into partition_range_test values (3, '"03"'); + +-- case7 test range partition with default partition. +CREATE TABLE partition_range_test_default (id int, date text) DISTRIBUTED BY (id) +PARTITION BY RANGE (date) + (PARTITION feb START ( '02') INCLUSIVE , + PARTITION Mar START ( '03') INCLUSIVE, + Default partition others); + +insert into partition_range_test_default values (1, '01'), (1, '"01"'), (2, '"02"'), (2, '02'), (3, '03'), (3, '"03"'), (4, '04'), (4, '"04"'); + +-- case8 for testing insert into root select * from partition_range_test where date > '"02"'; +create table root +( + id int, + date text +) DISTRIBUTED BY (id) +PARTITION BY RANGE (date) +(PARTITION Jan START ( '01') INCLUSIVE , +PARTITION Feb START ( '02') INCLUSIVE , +PARTITION Mar START ( '03') INCLUSIVE +END ( '04') EXCLUSIVE); + +insert into root +select * +from partition_range_test +where date > '"02"'; + +--- case9 test range partition with special character '“”' with ao +CREATE TABLE partition_range_test_ao +( + id int, + date text +) + WITH (appendonly = true) + DISTRIBUTED BY (id) + PARTITION BY RANGE (date) + (PARTITION Jan START ('01') INCLUSIVE , + PARTITION Feb START ('02') INCLUSIVE , + PARTITION Mar START ('03') INCLUSIVE + END ('04') EXCLUSIVE); + +insert into partition_range_test_ao values (1, '01'); +insert into partition_range_test_ao values (1, '"01"'); +insert into partition_range_test_ao values (1, '"01-1"'); +insert into partition_range_test_ao values (2, '"02-1"'); +insert into partition_range_test_ao values (2, '"02"'); +insert into partition_range_test_ao values (2, '02'); + +--- case10 for index constraint violation +CREATE TABLE repository +( + id integer, + slug character varying(100), + name character varying(100), + project_id character varying(100) +) DISTRIBUTED BY (slug, project_id); + +insert into repository values (793, 'text-rnn', 'text-rnn', 146); +insert into repository values (812, 'ink_data', 'ink_data', 146); + +-- case11 for index unique constraint violation +create table gitrefresh +( + projecttag text, + state character(1), + analysis_started timestamp without time zone, + analysis_ended timestamp without time zone, + counter_requested integer, + customer_id integer, + id int, + constraint idx_projecttag unique (projecttag) +); +create index pk_gitrefresh on gitrefresh (id); +INSERT INTO gitrefresh(projecttag, state, analysis_started, counter_requested, customer_id) +VALUES ('npm@randombytes', 'Q', NOW(), 1, 0); + +-- case12 for partition range list and special characters +CREATE TABLE rank +( + id int, + gender char(1) +) DISTRIBUTED BY (id) +PARTITION BY LIST (gender) +( PARTITION girls VALUES ('F'), + PARTITION boys VALUES ('M'), + DEFAULT PARTITION other ); + +CREATE TABLE "rank $ % &" +( + id int, + gender char(1) +) DISTRIBUTED BY (id) +PARTITION BY LIST (gender) +( PARTITION girls VALUES ('F'), + PARTITION boys VALUES ('M'), + DEFAULT PARTITION other ); + +CREATE TABLE "rank $ % & ! *" +( + id int, + gender char(1) +) DISTRIBUTED BY (id) +PARTITION BY LIST (gender) +( PARTITION girls VALUES ('F'), + PARTITION boys VALUES ('M'), + DEFAULT PARTITION other ); + +CREATE TABLE "rank 's " +( + id int, + gender char(1) +) DISTRIBUTED BY (id) +PARTITION BY LIST (gender) +( PARTITION girls VALUES ('F'), + PARTITION boys VALUES ('M'), + DEFAULT PARTITION other ); + +CREATE TABLE "rank 's' " +( + id int, + gender char(1) +) DISTRIBUTED BY (id) +PARTITION BY LIST (gender) +( PARTITION girls VALUES ('F'), + PARTITION boys VALUES ('M'), + DEFAULT PARTITION other ); + +CREATE TABLE "rank b c" +( + id int, + gender char(1) +) DISTRIBUTED BY (id) +PARTITION BY LIST (gender) +( PARTITION girls VALUES ('F'), + PARTITION boys VALUES ('M'), + DEFAULT PARTITION other ); + +-- case13 for testing partition key is type date +CREATE TABLE sales (id int, time date, amt decimal(10,2)) +DISTRIBUTED BY (id) +PARTITION BY RANGE (time) +( START (date '2022-01-01') INCLUSIVE + END (date '2023-01-01') EXCLUSIVE + EVERY (INTERVAL '1 month') ); + +-- case14 for testing partition range with special characters in name +CREATE TABLE "partition_range_ 's " (id int, date text) +DISTRIBUTED BY (id) +PARTITION BY RANGE (date) + (PARTITION feb START ( '02') INCLUSIVE , + PARTITION Mar START ( '03') INCLUSIVE, + Default partition others); + +CREATE TABLE "partition_range_ 's' " (id int, date text) +DISTRIBUTED BY (id) +PARTITION BY RANGE (date) + (PARTITION feb START ( '02') INCLUSIVE , + PARTITION Mar START ( '03') INCLUSIVE, + Default partition others); From a74d3040b2dec43ea3cbdcbab54fb8c4078b82e3 Mon Sep 17 00:00:00 2001 From: Shirisha SN Date: Thu, 2 Nov 2023 09:31:55 +0530 Subject: [PATCH 065/106] Fix error seen in CM utilities when invalid argument is provided (#16544) **Issue:** CM utilities like gpstart, gpstop, gpstate, gprecoverseg, gpaddmirror error out with an exception stack trace, when - Arguments are provided without any option - or, too many arguments are provided for an option. **RCA:** Ideally when too many arguments are provided or arguments are provided without any option, then utilities should raise an exception, error out gracefully and print CLI help messages on console. There is stack trace seen here because while handling this exception another exception occurred where a non-existing attribute is accessed by a NoneType object. This exception stack trace is seen in `simple_main_locked` function. This is a regression caused by #16433 As part of this commit the parameters of the function `simple_main_locked` were changed. Earlier `createOptionParserFn` function was passed as a parameter and its return value parser was used to print help for the utility in case of an exception. With these changes, neither function `createOptionParserFn` is passed as parameter nor its return value parser , so parser remains None. Hence while using parser to call a function, it leads to another exception dumping a stack trace. **Fix:** Passing `parser` as a parameter to function `simple_main_locked` --- gpMgmt/bin/gppylib/mainUtils.py | 5 ++--- gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature | 12 ++++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/gpMgmt/bin/gppylib/mainUtils.py b/gpMgmt/bin/gppylib/mainUtils.py index c43b31d41602..c48972c92c59 100644 --- a/gpMgmt/bin/gppylib/mainUtils.py +++ b/gpMgmt/bin/gppylib/mainUtils.py @@ -294,13 +294,13 @@ def simple_main_internal(createOptionParserFn, createCommandFn, mainOptions): # at this point we have whatever lock we require try: - simple_main_locked(parserOptions, parserArgs, createCommandFn, mainOptions) + simple_main_locked(parser, parserOptions, parserArgs, createCommandFn, mainOptions) finally: if sml is not None: sml.release() -def simple_main_locked(parserOptions, parserArgs, createCommandFn, mainOptions): +def simple_main_locked(parser, parserOptions, parserArgs, createCommandFn, mainOptions): """ Not to be called externally -- use simple_main instead """ @@ -313,7 +313,6 @@ def simple_main_locked(parserOptions, parserArgs, createCommandFn, mainOptions): faultProberInterface.registerFaultProber(faultProberImplGpdb.GpFaultProberImplGpdb()) commandObject = None - parser = None forceQuiet = mainOptions is not None and mainOptions.get("forceQuietOutput") diff --git a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature index a1bf16e5630c..f2800963ed74 100644 --- a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature +++ b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature @@ -626,6 +626,18 @@ Feature: gprecoverseg tests And gprecoverseg should return a return code of 0 Then the cluster is rebalanced + Scenario: gprecoverseg errors out with restricted options + Given the database is running + And user stops all primary processes + And user can start transactions + When the user runs "gprecoverseg xyz" + Then gprecoverseg should return a return code of 2 + And gprecoverseg should print "Recovers a primary or mirror segment instance" to stdout + And gprecoverseg should print "too many arguments: only options may be specified" to stdout + When the user runs "gprecoverseg -a" + Then gprecoverseg should return a return code of 0 + And the segments are synchronized + And the cluster is rebalanced ########################### @concourse_cluster tests ########################### # The @concourse_cluster tag denotes the scenario that requires a remote cluster From fb31623d2b88cd9cbbd58184d61095aad409c92c Mon Sep 17 00:00:00 2001 From: mperezfuster Date: Thu, 2 Nov 2023 21:40:22 +0000 Subject: [PATCH 066/106] Docs: added two new diskquota gucs (#16683) * Docs: added two new diskquota gucs * Changes from review --------- Co-authored-by: Mireia Perez Fuster --- .../ref_guide/modules/diskquota.html.md | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/gpdb-doc/markdown/ref_guide/modules/diskquota.html.md b/gpdb-doc/markdown/ref_guide/modules/diskquota.html.md index 6be439dcf729..a1af4cac603a 100644 --- a/gpdb-doc/markdown/ref_guide/modules/diskquota.html.md +++ b/gpdb-doc/markdown/ref_guide/modules/diskquota.html.md @@ -140,6 +140,8 @@ Views available in the `diskquota` module include: - [diskquota.hard\_limit](#hardlimit) - Activates or deactivates the hard limit enforcement of disk usage. - [diskquota.max\_workers](#maxworkers) - Specifies the maximum number of diskquota worker processes that may be running at any one time. - [diskquota.max\_table\_segments](#maxtableseg) - Specifies the maximum number of *table segments* in the cluster. +- [diskquota.max_quota_probes](#maxquotaprobes) - Specifies the maximum number of of quota probes pre-allocated at the cluster level. +- [diskquota.max_monitored_databases](#maxmonitoreddatabases) - Specifies the maximum number of database that the module can monitor. You use the `gpconfig` command to set these parameters in the same way that you would set any Greenplum Database server configuration parameter. @@ -193,6 +195,23 @@ A Greenplum table \(including a partitioned table’s child tables\) is distribu The runtime value of `diskquota.max_table_segments` equals the maximum number of tables multiplied by \(number\_of\_segments + 1\). The default value is `10 * 1024 * 1024`. +### Specifying the Maximum Number of Quota Probes + +The `diskquota.max_quota_probes` server configuration parameter specifies the number of quota probes allowed at the cluster level. `diskquota` requires thousands of probes to collect different quota usage in the cluster, and each quota probe is only used to monitor a specific quota usage, such as how much disk space a role uses on a certain tablespace in a certain database. Even if you do not define its corresponding disk quota rule, its corresponding quota probe runs in the background. For example, if you have 100 roles in a cluster, but you only defined disk quota rules for 10 of the roles' disk usage, Greenplum still requires quota probes for the 100 roles in the cluster. + +You may calculate the number of maximum active probes for a cluster using the following formula: + +``` +role_num * database_num + schema_num + role_num * tablespace_num * database_num + schema_num * tablespace_num +``` + +where `role_num` is the number of roles in the cluster, `tablespace_number` is the number of tablespaces in the cluster, and `schema_num` is the total number of schemas in all databases. + +You must set `diskquota.max_quota_probes` to a number greater than the calculated maximum number of active quota probes: the higher the value, the more memory is used. The memory used by the probes can be calculated as `diskquota.max_quota_probes * 48` (in bytes). The default value of `diskquota.max_quota_probes` is `1048576`, which means that the memory used by the probes by default is `1048576 * 48`, which is approximately 50MB. + +### Specifying the Maximum Number of Databases + +The `diskquota.max_monitored_databases` server configuration parameter specifies the maximum number of databases that can be monitored by `diskquota`. The default value is 50 and the maximum value is 1024. ## Using the diskquota Module @@ -446,7 +465,7 @@ The `diskquota` module has the following limitations and known issues: ## Notes -The `diskquota` module can detect a newly created table inside of an uncommitted transaction. The size of the new table is included in the disk usage calculated for the corresponding schema or role. Hard limit enforcement of disk usage must enabled for a quota-exceeding operation to trigger a `quota exceeded` error in this scenario. +The `diskquota` module can detect a newly created table inside of an uncommitted transaction. The size of the new table is included in the disk usage calculated for its corresponding schema or role. Hard limit enforcement of disk usage must enabled for a quota-exceeding operation to trigger a `quota exceeded` error in this scenario. Deleting rows or running `VACUUM` on a table does not release disk space, so these operations cannot alone remove a schema or role from the `diskquota` denylist. The disk space used by a table can be reduced by running `VACUUM FULL` or `TRUNCATE TABLE`. From 9dd095e7bdbe680f2e8204e5fdfd3824a4c7b33f Mon Sep 17 00:00:00 2001 From: Lisa Owen Date: Mon, 6 Nov 2023 14:03:00 -0700 Subject: [PATCH 067/106] docs - gp_move_orphaned_files() UDF (6x) (#16692) * docs - gp_move_orphaned_files() UDF (6x) * remove unneeded backtick * use generic data directory * refine statement --- .../modules/gp_check_functions.html.md | 42 ++++++++++++++++++- .../markdown/ref_guide/modules/intro.html.md | 2 +- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/gpdb-doc/markdown/ref_guide/modules/gp_check_functions.html.md b/gpdb-doc/markdown/ref_guide/modules/gp_check_functions.html.md index 949a2886dc30..db0c04b409cc 100644 --- a/gpdb-doc/markdown/ref_guide/modules/gp_check_functions.html.md +++ b/gpdb-doc/markdown/ref_guide/modules/gp_check_functions.html.md @@ -1,6 +1,6 @@ # gp_check_functions -The `gp_check_functions` module implements views that identify missing and orphaned relation files. +The `gp_check_functions` module implements views that identify missing and orphaned relation files. The module also exposes a user-defined function that you can use to move orphaned files. The `gp_check_functions` module is a Greenplum Database extension. @@ -39,7 +39,7 @@ The `gp_check_orphaned_files` view scans the default and user-defined tablespace | gp_segment_id | The Greenplum Database segment identifier. | | tablespace | The identifier of the tablespace in which the orphaned file resides. | | filename | The file name of the orphaned data file. | -| filepath | The file system path of the orphaned data file, relative to `$MASTER_DATA_DIRECTORY`. | +| filepath | The file system path of the orphaned data file, relative to the data directory of the master or segment. | > **Caution** Use this view as one of many data points to identify orphaned data files. Do not delete files based solely on results from querying this view. @@ -68,6 +68,38 @@ The `gp_check_missing_files_ext` view scans only append-optimized, column-orient | filename | The file name of the missing extended data file. | +## Moving Orphaned Data Files + +The `gp_move_orphaned_files()` user-defined function (UDF) moves orphaned files found by the [gp_check_orphaned_files](#orphaned) view into a file system location that you specify. + +The function signature is: `gp_move_orphaned_files( TEXT )`. + +`` must exist on all segment hosts before you move the files, and the specified directory must be accessible by the `gpadmin` user. If you specify a relative path for ``, it is considered relative to the data directory of the master or segment. + +Greenplum Database renames each moved data file to one that reflects the original location of the file in the data directory. The file name format differs depending on the tablespace in which the orphaned file resides: + +| Tablespace | Renamed File Format| +|------|-----------| +| default | `seg_base__` | +| global | `seg_global_` | +| user-defined | `seg_pg_tblspc____` | + +For example, if a file named `12345` in the default tablespace is orphaned on primary segment 2, + +``` +SELECT * FROM gp_move_orphaned_files('/home/gpadmin/orphaned'); +``` + +moves and renames the file as follows: + +| Original Location | New Location and File Name | +|------|-----------| +| `/base/13700/12345` | `/home/gpadmin/orphaned/seg2_base_13700_12345` | + +`gp_move_orphaned_files()` returns both the original and the new file system locations for each file that it moves, and also provides an indication of the success or failure of the move operation. + +Once you move the orphaned files, you may choose to remove them or to back them up. + ## Examples Check for missing and orphaned non-extended files: @@ -83,3 +115,9 @@ Check for missing extended data files for append-optimized, column-oriented tabl SELECT * FROM gp_check_missing_files_ext; ``` +Move orphaned files to the `/home/gpadmin/orphaned` directory: + +``` sql +SELECT * FROM gp_move_orphaned_files('/home/gpadmin/orphaned'); +``` + diff --git a/gpdb-doc/markdown/ref_guide/modules/intro.html.md b/gpdb-doc/markdown/ref_guide/modules/intro.html.md index f18978b82e8e..68cd5fce7c25 100644 --- a/gpdb-doc/markdown/ref_guide/modules/intro.html.md +++ b/gpdb-doc/markdown/ref_guide/modules/intro.html.md @@ -16,7 +16,7 @@ The following Greenplum Database and PostgreSQL `contrib` modules are installed; - [diskquota](diskquota.html) - Allows administrators to set disk usage quotas for Greenplum Database roles and schemas. - [fuzzystrmatch](fuzzystrmatch.html) - Determines similarities and differences between strings. - [gp\_array\_agg](gp_array_agg.html) - Implements a parallel `array_agg()` aggregate function for Greenplum Database. -- [gp\_check\_functions](gp_check_functions.html) - Provides views to check for orphaned and missing relation files. +- [gp\_check\_functions](gp_check_functions.html) - Provides views to check for orphaned and missing relation files and a user-defined function to move orphaned files. - [gp\_legacy\_string\_agg](gp_legacy_string_agg.html) - Implements a legacy, single-argument `string_agg()` aggregate function that was present in Greenplum Database 5. - [gp\_parallel\_retrieve\_cursor](gp_parallel_retrieve_cursor.html) - Provides extended cursor functionality to retrieve data, in parallel, directly from Greenplum Database segments. - [gp\_percentile\_agg](gp_percentile_agg.html) - Improves GPORCA performance for ordered-set aggregate functions. From 283eea57100c690cb05b672b14eef7d0382e4e16 Mon Sep 17 00:00:00 2001 From: Yongtao Huang <99629139+hyongtao-db@users.noreply.github.com> Date: Tue, 7 Nov 2023 09:37:05 +0800 Subject: [PATCH 068/106] Fix bug of very long UTF-8 encoded query string truncation [6X] (#16674) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This pr cherry-picks #11946 and #16669 into 6X_STABLE. Long query log will be truncated when writing log.csv. UTF-8 is a variable-length encoding of 1~4 bytes, so UTF-8 characters may be truncated in the middle. Without code change, the test case will meet encoding ERROR: ``` ERROR: invalid byte sequence for encoding "UTF8": 0xe9 0xa3 0x22 ``` because the long query log is truncated, please refer to the special char in the log.csv `�` as below: ``` 床前明月光疑是地上霜举头望明月低头思故乡 独坐幽篁里弹琴复长啸深林人不知明月来相照 千山鸟�",,,,,"PL/Python function " ``` Co-authored-by: t1mursadykov Co-authored-by: Yongtao Huang --- src/backend/utils/error/elog.c | 2 +- src/pl/plpython/expected/plpython_test.out | 30 ++++++++++++++++++++++ src/pl/plpython/sql/plpython_test.sql | 20 +++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index fcb63a9a918d..d4e6b39a12c2 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -3667,7 +3667,7 @@ append_string_to_pipe_chunk(PipeProtoChunk *buffer, const char* input) */ if (len >= PIPE_MAX_PAYLOAD * 20) { - len = PIPE_MAX_PAYLOAD * 20 - 1; + len = pg_mbcliplen(input, len, PIPE_MAX_PAYLOAD * 20 - 1); } char *data = buffer->data + buffer->hdr.len; diff --git a/src/pl/plpython/expected/plpython_test.out b/src/pl/plpython/expected/plpython_test.out index f377e614ed3d..81641de2952a 100755 --- a/src/pl/plpython/expected/plpython_test.out +++ b/src/pl/plpython/expected/plpython_test.out @@ -78,3 +78,33 @@ CONTEXT: Traceback (most recent call last): PL/Python function "elog_test", line 10, in plpy.error('error') PL/Python function "elog_test" +-- Long query Log will be truncated when writing log.csv. +-- If a UTF-8 character is truncated in its middle, +-- the encoding ERROR will appear due to the half of a UTF-8 character, like �. +-- PR-11946 can fix it. +-- If you want more detail, please refer to ISSUE-15319 +SET client_encoding TO 'UTF8'; +CREATE FUNCTION elog_test_string_truncate() RETURNS void +AS $$ +plpy.log("1"+("床前明月光疑是地上霜举头望明月低头思故乡\n"+ +"独坐幽篁里弹琴复长啸深林人不知明月来相照\n"+ +"千山鸟飞绝万径人踪灭孤舟蓑笠翁独钓寒江雪\n"+ +"白日依山尽黄河入海流欲穷千里目更上一层楼\n"+ +"好雨知时节当春乃发生随风潜入夜润物细无声\n")*267) +$$ LANGUAGE plpythonu; +SELECT elog_test_string_truncate(); + elog_test_string_truncate +--------------------------- + +(1 row) + +SELECT logseverity FROM gp_toolkit.__gp_log_master_ext order by logtime desc limit 5; + logseverity +------------- + LOG + LOG + LOG + LOG + LOG +(5 rows) + diff --git a/src/pl/plpython/sql/plpython_test.sql b/src/pl/plpython/sql/plpython_test.sql index 3a761047a091..449c211e890c 100644 --- a/src/pl/plpython/sql/plpython_test.sql +++ b/src/pl/plpython/sql/plpython_test.sql @@ -51,3 +51,23 @@ plpy.error('error') $$ LANGUAGE plpythonu; SELECT elog_test(); + +-- Long query Log will be truncated when writing log.csv. +-- If a UTF-8 character is truncated in its middle, +-- the encoding ERROR will appear due to the half of a UTF-8 character, like �. +-- PR-11946 can fix it. +-- If you want more detail, please refer to ISSUE-15319 +SET client_encoding TO 'UTF8'; + +CREATE FUNCTION elog_test_string_truncate() RETURNS void +AS $$ +plpy.log("1"+("床前明月光疑是地上霜举头望明月低头思故乡\n"+ +"独坐幽篁里弹琴复长啸深林人不知明月来相照\n"+ +"千山鸟飞绝万径人踪灭孤舟蓑笠翁独钓寒江雪\n"+ +"白日依山尽黄河入海流欲穷千里目更上一层楼\n"+ +"好雨知时节当春乃发生随风潜入夜润物细无声\n")*267) +$$ LANGUAGE plpythonu; + +SELECT elog_test_string_truncate(); + +SELECT logseverity FROM gp_toolkit.__gp_log_master_ext order by logtime desc limit 5; From e1dd93d7ab069d30acfb487c1f9edb0885d5c5ea Mon Sep 17 00:00:00 2001 From: Rakesh Sharma Date: Tue, 7 Nov 2023 11:08:22 +0530 Subject: [PATCH 069/106] Fix Differential recovery tablespace issue (#16195) (#16412) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue: 1. Post differential recovery more tablespace directory is created on the target segment. [gpadmin@cdw ~]$ gpssh -f segment_host_list -v [WARN] Reference default values as $COORDINATOR_DATA_DIRECTORY/gpssh.conf could not be found Using delaybeforesend 0.05 and prompt_validation_timeout 1.0 [Reset ...] [INFO] login sdw2 [INFO] login sdw1 => ls /tmp/mytblespace/ ls /tmp/mytblespace/ [sdw2] 4 5 6 7 [sdw1] 2 3 8 9 => exit [gpadmin@cdw ~]$ gpssh -f segment_host_list -v [WARN] Reference default values as $COORDINATOR_DATA_DIRECTORY/gpssh.conf could not be found Using delaybeforesend 0.05 and prompt_validation_timeout 1.0 [Reset ...] [INFO] login sdw1 [INFO] login sdw2 => ls /tmp/mytblespace/ ls /tmp/mytblespace/ [sdw1] 2 3 4 5 6 7 8 9 [sdw2] 4 5 6 7 => exit Steps to reproduce ( tested on demo cluster): 1. make sure that the cluster is up and running in a balance state. 2. create a tablespace to a location outside of the data directory, add some tables just to have some data. 3. check the content of tablespace location(/tmp/mytblespace/) for all host. 4. pick any of the segments and make primary down and wait until mirror gets promoted. 5. run differential recovery. 6. post recover check the content of tablespace location(/tmp/mytblespace/) for all host. you will find that source tablespace data also getting populated in the target data dir. RCA: In differential recovery when we perform the step of sync_tablespaces() there is a bug because of that we are copying all the directories under the tablespace location. which was not intended as the get_segment_tablespace returning only base tablespace location, not the segment-specific target link location. Fix: Updated algorithm to copy tablespaces data directory and updating the symlink. following are the steps 1. First get the tablespace location using get_segment_tablespace_oid_locations() 2. clean all the symlink files available in $DATADIR/pg_tblspc/ for target data directory. 3. loopthrough all oid,location for the segment and do rsync from source to target tablespace dir. 4. create a symlink for target tablespace directory under $DATADIR/pg_tblspc. 2. Post differential recovery tablespace symlink is pointing to the wrong target path(source). for dbid 3: 17270 -> /tmp/testtblspc/3 (primary) for dbid 6: 17270 -> /tmp/testtblspc/3 (mirror) Steps to reproduce: 1. make sure that the cluster is up and running in a balanced state. 2. create a tablespace to a location outside of the data directory, add some tables just to have some data. 3. pick any one segment pair and check the symlink for primary and mirror at location $PG_DATA/pg_tblspc/{oid}. you will find that both are pointing to different locations, such as tablespace_location/{dbid}. 4. now make primary down and wait until mirror gets promoted. 5. run differential recovery. 6. post recovery check tablespace symlink for the current primary and mirror, you will find that mirror is pointing to the source tablespace location (tablespace_location/{primary_dbid}) or in other words both are pointing to same location. RCA: From the code when we are doing sync_pgdata we are copying all data from $PG_DATA/pg_tblspc/ to the target data directory, which is an overwriting symlink in the target data directory. Fix: recreated the symlink in sync_tablespaces() step. Some observation: when we create checkpoint before recovery via pg_start_backup() it creates tablespace_map file along with backup_label that also holds the entries for tablespace symlink. on segment_start it reads symlink from the tablespace_map file and creates the same. As we wanted the fix to be consistent with gp_expand and gpupgrade instead of updating tablespaace_map file we have updated the symlinkthat is the reason in this fix, we have excluded tablespace_map file from the file list. we have also excluded pg_tblspc dir in sync_pg_data(), this will help us to run rsync for pg_data and tablespace in parallel. Unit Test: Updated the unit test impacted by the changes. Behave test: extended the current tablespace verification step to verify tablespace to check for the symlink duplication and wrong entry. also updated tablespace to make it little more complex scenario. --- gpMgmt/bin/gppylib/commands/unix.py | 14 ++++ .../segment_tablespace_locations.py | 6 +- .../test_unit_segment_tablespace_locations.py | 10 +-- .../test/unit/test_unit_gpsegrecovery.py | 22 ++++-- gpMgmt/sbin/gpsegrecovery.py | 68 ++++++++++++++----- .../behave/mgmt_utils/gprecoverseg.feature | 9 ++- .../mgmt_utils/steps/tablespace_mgmt_utils.py | 33 ++++++++- 7 files changed, 127 insertions(+), 35 deletions(-) diff --git a/gpMgmt/bin/gppylib/commands/unix.py b/gpMgmt/bin/gppylib/commands/unix.py index 8bb1b0dca71d..d54ffa8d8a3a 100644 --- a/gpMgmt/bin/gppylib/commands/unix.py +++ b/gpMgmt/bin/gppylib/commands/unix.py @@ -173,6 +173,20 @@ def kill_sequence(pid): logandkill(pid, signal.SIGABRT) +def get_remote_link_path(path, host): + """ + Function to get symlink target path for a given path on given host. + :param path: path for which symlink has to be found + :param host: host on which the given path is available + :return: returns symlink target path + """ + + cmdStr = """python -c 'import os; print(os.readlink("%s"))'""" % path + cmd = Command('get remote link path', cmdStr=cmdStr, ctxt=REMOTE, + remoteHost=host) + cmd.run(validateAfter=True) + return cmd.get_stdout() + # ---------------Platform Framework-------------------- """ The following platform framework is used to handle any differences between diff --git a/gpMgmt/bin/gppylib/operations/segment_tablespace_locations.py b/gpMgmt/bin/gppylib/operations/segment_tablespace_locations.py index 06ee46e39d21..135d4dd410d9 100644 --- a/gpMgmt/bin/gppylib/operations/segment_tablespace_locations.py +++ b/gpMgmt/bin/gppylib/operations/segment_tablespace_locations.py @@ -46,7 +46,7 @@ def get_tablespace_locations(all_hosts, mirror_data_directory): return tablespace_locations -def get_segment_tablespace_locations(primary_hostname, primary_port): +def get_segment_tablespace_oid_locations(primary_hostname, primary_port): """ to get user defined tablespace locations for a specific primary segment. This function is called by gprecoverseg --differential to get the tablespace locations by connecting to primary while mirror is down. @@ -54,9 +54,9 @@ def get_segment_tablespace_locations(primary_hostname, primary_port): as parameter and it is called before mirrors are moved to new location by gpmovemirrors. :param primary_hostname: string type primary hostname :param primary_port: int type primary segment port - :return: list of tablespace locations + :return: list of tablespace oids and locations """ - sql = "SELECT distinct(tblspc_loc) FROM ( SELECT oid FROM pg_tablespace WHERE spcname NOT IN " \ + sql = "SELECT distinct(oid),tblspc_loc FROM ( SELECT oid FROM pg_tablespace WHERE spcname NOT IN " \ "('pg_default', 'pg_global')) AS q,LATERAL gp_tablespace_location(q.oid);" try: query = RemoteQueryCommand("Get segment tablespace locations", sql, primary_hostname, primary_port) diff --git a/gpMgmt/bin/gppylib/operations/test/unit/test_unit_segment_tablespace_locations.py b/gpMgmt/bin/gppylib/operations/test/unit/test_unit_segment_tablespace_locations.py index 13dbc482b195..2cf90b00e665 100644 --- a/gpMgmt/bin/gppylib/operations/test/unit/test_unit_segment_tablespace_locations.py +++ b/gpMgmt/bin/gppylib/operations/test/unit/test_unit_segment_tablespace_locations.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from mock import Mock, patch, call -from gppylib.operations.segment_tablespace_locations import get_tablespace_locations, get_segment_tablespace_locations +from gppylib.operations.segment_tablespace_locations import get_tablespace_locations, get_segment_tablespace_oid_locations from test.unit.gp_unittest import GpTestCase class GetTablespaceDirTestCase(GpTestCase): @@ -39,9 +39,9 @@ def test_validate_data_with_mirror_data_directory_get_tablespace_locations(self) self.assertEqual(expected, get_tablespace_locations(False, mirror_data_directory)) @patch('gppylib.db.catalog.RemoteQueryCommand.run', side_effect=Exception()) - def test_get_segment_tablespace_locations_exception(self, mock1): + def test_get_segment_tablespace_oid_locations_exception(self, mock1): with self.assertRaises(Exception) as ex: - get_segment_tablespace_locations('sdw1', 40000) + get_segment_tablespace_oid_locations('sdw1', 40000) self.assertEqual(0, self.mock_logger.debug.call_count) self.assertTrue('Failed to get segment tablespace locations for segment with host sdw1 and port 40000' in str(ex.exception)) @@ -49,8 +49,8 @@ def test_get_segment_tablespace_locations_exception(self, mock1): @patch('gppylib.db.catalog.RemoteQueryCommand.__init__', return_value=None) @patch('gppylib.db.catalog.RemoteQueryCommand.run') @patch('gppylib.db.catalog.RemoteQueryCommand.get_results') - def test_get_segment_tablespace_locations_success(self, mock1, mock2, mock3): - get_segment_tablespace_locations('sdw1', 40000) + def test_get_segment_tablespace_oid_locations_success(self, mock1, mock2, mock3): + get_segment_tablespace_oid_locations('sdw1', 40000) self.assertEqual(1, self.mock_logger.debug.call_count) self.assertEqual([call('Successfully got tablespace locations for segment with host sdw1, port 40000')], self.mock_logger.debug.call_args_list) diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_gpsegrecovery.py b/gpMgmt/bin/gppylib/test/unit/test_unit_gpsegrecovery.py index e1918fa60b1a..56ba305cfa94 100644 --- a/gpMgmt/bin/gppylib/test/unit/test_unit_gpsegrecovery.py +++ b/gpMgmt/bin/gppylib/test/unit/test_unit_gpsegrecovery.py @@ -478,8 +478,12 @@ def test_pg_stop_backup_success(self, mock1, mock2): self.mock_logger.debug.call_args_list) @patch('gppylib.db.catalog.RemoteQueryCommand.get_results', - return_value=[['/data/mytblspace1'], ['/data/mytblspace2']]) - def test_sync_tablespaces_outside_data_dir(self, mock): + return_value=[['1111','/data/mytblspace1'], ['2222','/data/mytblspace2']]) + @patch('gpsegrecovery.get_remote_link_path', + return_value='/data/mytblspace1/2') + @patch('os.listdir') + @patch('os.symlink') + def test_sync_tablespaces_outside_data_dir(self, mock1,mock2,mock3,mock4): self.diff_recovery_cmd.sync_tablespaces() self.assertEqual(2, self.mock_rsync_init.call_count) self.assertEqual(2, self.mock_rsync_run.call_count) @@ -488,8 +492,10 @@ def test_sync_tablespaces_outside_data_dir(self, mock): self.mock_logger.debug.call_args_list) @patch('gppylib.db.catalog.RemoteQueryCommand.get_results', - return_value=[['/data/mirror0']]) - def test_sync_tablespaces_within_data_dir(self, mock): + return_value=[['1234','/data/primary0']]) + @patch('os.listdir') + @patch('os.symlink') + def test_sync_tablespaces_within_data_dir(self, mock, mock2,mock3): self.diff_recovery_cmd.sync_tablespaces() self.assertEqual(0, self.mock_rsync_init.call_count) self.assertEqual(0, self.mock_rsync_run.call_count) @@ -497,8 +503,12 @@ def test_sync_tablespaces_within_data_dir(self, mock): self.mock_logger.debug.call_args_list) @patch('gppylib.db.catalog.RemoteQueryCommand.get_results', - return_value=[['/data/mirror0'], ['/data/mytblspace1']]) - def test_sync_tablespaces_mix_data_dir(self, mock): + return_value=[['1111','/data/primary0'], ['2222','/data/mytblspace1']]) + @patch('gpsegrecovery.get_remote_link_path', + return_value='/data/mytblspace1/2') + @patch('os.listdir') + @patch('os.symlink') + def test_sync_tablespaces_mix_data_dir(self, mock1, mock2, mock3,mock4): self.diff_recovery_cmd.sync_tablespaces() self.assertEqual(1, self.mock_rsync_init.call_count) self.assertEqual(1, self.mock_rsync_run.call_count) diff --git a/gpMgmt/sbin/gpsegrecovery.py b/gpMgmt/sbin/gpsegrecovery.py index 181d378fa212..d38b72f2ca4a 100644 --- a/gpMgmt/sbin/gpsegrecovery.py +++ b/gpMgmt/sbin/gpsegrecovery.py @@ -11,9 +11,11 @@ from gppylib.commands.gp import SegmentStart from gppylib.gparray import Segment from gppylib.commands.gp import ModifyConfSetting +from gppylib.db import dbconn from gppylib.db.catalog import RemoteQueryCommand from gppylib.operations.get_segments_in_recovery import is_seg_in_backup_mode -from gppylib.operations.segment_tablespace_locations import get_segment_tablespace_locations +from gppylib.operations.segment_tablespace_locations import get_segment_tablespace_oid_locations +from gppylib.commands.unix import get_remote_link_path class FullRecovery(Command): @@ -174,17 +176,25 @@ def sync_pg_data(self): "postmaster.opts", "internal.auto.conf", "pg_dynshmem", + # tablespace_map file is generated on call of pg_start_backup on primary, this file contains the target link + # of the tablespace like 17264 /tmp/testtblspc/6.if we do not add this in exclude list the file will get + # copied to the mirror.and after recovery, if we start the segment, because of the presence of the tablespace_map + # file in mirror data_directory, it honors the file and recreates the symlinks as available in the tabespace_map file. + # but the problem here is as the tablespace_map file has the content from the primary segment + # it will create a wrong symlink for table space. + "tablespace_map", "pg_notify/*", "pg_replslot/*", "pg_serial/*", "pg_stat_tmp/*", "pg_snapshots/*", "pg_subtrans/*", + "pg_tblspc/*", # excluding as the tablespace is handled in sync_tablespaces() "backups/*", "/db_dumps", # as we exclude during pg_basebackup "gpperfmon/data", # as we exclude during pg_basebackup "gpperfmon/logs", # as we exclude during pg_basebackup - "/promote", # Need to check why do we exclude it during pg_basebackup + "/promote", # as we exclude during pg_basebackup ] """ Rsync options used: @@ -268,24 +278,46 @@ def sync_tablespaces(self): "Syncing tablespaces of dbid {} which are outside of data_dir".format( self.recovery_info.target_segment_dbid)) - # get the tablespace locations - tablespaces = get_segment_tablespace_locations(self.recovery_info.source_hostname, + # get the oid and tablespace locations + tablespaces = get_segment_tablespace_oid_locations(self.recovery_info.source_hostname, self.recovery_info.source_port) - for tablespace_location in tablespaces: - if tablespace_location[0].startswith(self.recovery_info.target_datadir): - continue - # os.path.join(dir, "") will append a '/' at the end of dir. When using "/" at the end of source, - # rsync will copy the content of the last directory. When not using "/" at the end of source, rsync - # will copy the last directory and the content of the directory. - cmd = Rsync(name="Sync tablespace", - srcFile=os.path.join(tablespace_location[0], ""), - dstFile=tablespace_location[0], - srcHost=self.recovery_info.source_hostname, - progress=True, - checksum=True, - progress_file=self.recovery_info.progress_file) - cmd.run(validateAfter=True) + # clear all tablespace symlink for target. + for file in os.listdir(os.path.join(self.recovery_info.target_datadir,"pg_tblspc")): + file_path = os.path.join(self.recovery_info.target_datadir,"pg_tblspc",file) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + except Exception as e: + raise Exception("Failed to remove link {} for dbid {} : {}". + format(file_path,self.recovery_info.target_segment_dbid, str(e))) + + for oid, tablespace_location in tablespaces: + # tablespace_location is the link path who's symlink is created at $DATADIR/pg_tblspc/{oid} + # tablespace_location is the base path in which datafiles are stored in respective dbid directory. + targetOidPath = os.path.join(self.recovery_info.target_datadir, "pg_tblspc", str(oid)) + targetPath = os.path.join(tablespace_location, str(self.recovery_info.target_segment_dbid)) + + #if tablespace is not inside the datadir do rsync for copy, if it is inside datadirectory + #files would have been copied while doing rsync for data dir. + if not tablespace_location.startswith(self.recovery_info.source_datadir): + srcOidPath = os.path.join(self.recovery_info.source_datadir, "pg_tblspc", str(oid)) + srcPath = get_remote_link_path(srcOidPath,self.recovery_info.source_hostname) + + # os.path.join(dir, "") will append a '/' at the end of dir. When using "/" at the end of source, + # rsync will copy the content of the last directory. When not using "/" at the end of source, rsync + # will copy the last directory and the content of the directory. + cmd = Rsync(name="Sync tablespace", + srcFile=os.path.join(srcPath, ""), + dstFile=targetPath, + srcHost=self.recovery_info.source_hostname, + progress=True, + checksum=True, + progress_file=self.recovery_info.progress_file) + cmd.run(validateAfter=True) + + # create tablespace symlink for target data directory. + os.symlink(targetPath, targetOidPath) def start_segment(recovery_info, logger, era): diff --git a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature index f2800963ed74..a1fbd6039b2d 100644 --- a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature +++ b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature @@ -3,14 +3,15 @@ Feature: gprecoverseg tests Scenario Outline: recovery works with tablespaces Given the database is running - And a tablespace is created with data And user stops all primary processes And user can start transactions + And a tablespace is created with data When the user runs "gprecoverseg " Then gprecoverseg should return a return code of 0 And the segments are synchronized And verify replication slot internal_wal_replication_slot is available on all the segments And the tablespace is valid + And the tablespace has valid symlink And the database segments are in execute mode Given another tablespace is created with data @@ -19,6 +20,7 @@ Feature: gprecoverseg tests And the segments are synchronized And verify replication slot internal_wal_replication_slot is available on all the segments And the tablespace is valid + And the tablespace has valid symlink And the other tablespace is valid And the database segments are in execute mode Examples: @@ -685,13 +687,14 @@ Feature: gprecoverseg tests @concourse_cluster Scenario Outline: incremental recovery works with tablespaces on a multi-host environment Given the database is running - And a tablespace is created with data And user stops all primary processes And user can start transactions + And a tablespace is created with data When the user runs "gprecoverseg " Then gprecoverseg should return a return code of 0 And the segments are synchronized And the tablespace is valid + And the tablespace has valid symlink And the database segments are in execute mode Given another tablespace is created with data @@ -700,6 +703,7 @@ Feature: gprecoverseg tests And the segments are synchronized And verify replication slot internal_wal_replication_slot is available on all the segments And the tablespace is valid + And the tablespace has valid symlink And the other tablespace is valid And the database segments are in execute mode Examples: @@ -747,6 +751,7 @@ Feature: gprecoverseg tests # verify the data And the tablespace is valid + And the tablespace has valid symlink And the row count from table "public.before_host_is_down" in "gptest" is verified against the saved data And the row count from table "public.after_host_is_down" in "gptest" is verified against the saved data diff --git a/gpMgmt/test/behave/mgmt_utils/steps/tablespace_mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/tablespace_mgmt_utils.py index fc05cb3ce8c4..41f924c0d871 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/tablespace_mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/tablespace_mgmt_utils.py @@ -1,6 +1,6 @@ import pipes import tempfile -import time +import os from behave import given, then from pygresql import pg @@ -9,6 +9,8 @@ from gppylib.gparray import GpArray from test.behave_utils.utils import run_cmd,wait_for_database_dropped from gppylib.commands.base import Command, REMOTE +from gppylib.commands.unix import get_remote_link_path +from contextlib import closing class Tablespace: def __init__(self, name): @@ -72,6 +74,32 @@ def verify(self, hostname=None, port=0): raise Exception("Tablespace data is not identically distributed. Expected:\n%r\n but found:\n%r" % ( sorted(self.initial_data), sorted(data))) + def verify_symlink(self, hostname=None, port=0): + url = dbconn.DbURL(hostname=hostname, port=port, dbname=self.dbname) + gparray = GpArray.initFromCatalog(url) + all_segments = gparray.getDbList() + + # fetching oid of available user created tablespaces + with closing(dbconn.connect(url, unsetSearchPath=False)) as conn: + tblspc_oids = dbconn.execSQL(conn, "SELECT oid FROM pg_tablespace WHERE spcname NOT IN ('pg_default', 'pg_global')").fetchall() + + if not tblspc_oids: + return None # no table space is present + + # keeping a list to check if any of the symlink has duplicate entry + tblspc = [] + for seg in all_segments: + for tblspc_oid in tblspc_oids: + symlink_path = os.path.join(seg.getSegmentTableSpaceDirectory(), str(tblspc_oid[0])) + target_path = get_remote_link_path(symlink_path, seg.getSegmentHostName()) + segDbId = seg.getSegmentDbId() + #checking for duplicate and wrong symlink target + if target_path in tblspc or os.path.basename(target_path) != str(segDbId): + raise Exception("tablespac has invalid/duplicate symlink for oid {0} in segment dbid {1}".\ + format(str(tblspc_oid[0]),str(segDbId))) + + tblspc.append(target_path) + def verify_for_gpexpand(self, hostname=None, port=0): """ For gpexpand, we need make sure: @@ -191,6 +219,9 @@ def _create_tablespace_with_data(context, name): def impl(context): context.tablespaces["outerspace"].verify() +@then('the tablespace has valid symlink') +def impl(context): + context.tablespaces["outerspace"].verify_symlink() @then('the tablespace is valid on the standby master') def impl(context): From 3063cd2f7f3d179ba446a21a7c0d7acb73ee3f73 Mon Sep 17 00:00:00 2001 From: David Kimura Date: Tue, 7 Nov 2023 13:34:48 -0800 Subject: [PATCH 070/106] [ORCA] Relax client/server CTYPE encoding requirement (#16684) ORCA represents strings using wide characters. That requires converting to wide format using `vswprintf()`. However, that API may fail if the string to covert is incompatible with the set LC_CTYPE character set. That can happen if the database defined LC_CTYPE character set doesn't support the client character set. For example, if the database has LC_CTYPE set to 'C', but the client has 'ko_KR.UTF-8'. In that case, ORCA currently falls back to PLANNER because converting to wide format fails. A few possible solutions were considered: 1) Remove wide characters from ORCA 2) Use a 'good' LC_CTYPE while calling `vswprintf()` 3) Return a generic wide character string on failure Solution 1 is very invasive. Solution 2 is tricky to pick a 'good' LC_CTYPE. Solution 3 seems the simplest and is the implemented approach. Solution 3 required re-thinking commit 6f0737379c9. Instead of a fall back, we allow ORCA use a generic string. Then translate it back to the actual string during DXLToPlStmt translation. That handles the case where the offending name is in the column project list (see added regress testcase). --- .../gpopt/translate/CDXLTranslateContext.cpp | 7 +- .../translate/CTranslatorDXLToPlStmt.cpp | 63 ++++++++++++- src/backend/gpopt/utils/COptTasks.cpp | 12 +-- .../libgpos/include/gpos/error/CException.h | 3 - .../src/unittest/gpos/string/CWStringTest.cpp | 19 ++-- .../gporca/libgpos/src/common/clibwrapper.cpp | 6 +- .../gporca/libgpos/src/error/CMessage.cpp | 10 --- .../gpopt/translate/CDXLTranslateContext.h | 17 +++- .../gpopt/translate/CTranslatorDXLToPlStmt.h | 1 + src/include/gpopt/utils/COptTasks.h | 5 +- src/test/regress/expected/gp_locale.out | 90 +++++++++++++++++++ src/test/regress/greenplum_schedule | 2 +- src/test/regress/sql/gp_locale.sql | 61 +++++++++++++ 13 files changed, 256 insertions(+), 40 deletions(-) create mode 100644 src/test/regress/expected/gp_locale.out create mode 100644 src/test/regress/sql/gp_locale.sql diff --git a/src/backend/gpopt/translate/CDXLTranslateContext.cpp b/src/backend/gpopt/translate/CDXLTranslateContext.cpp index 7c870054c343..6ccc12d6abcb 100644 --- a/src/backend/gpopt/translate/CDXLTranslateContext.cpp +++ b/src/backend/gpopt/translate/CDXLTranslateContext.cpp @@ -27,8 +27,9 @@ using namespace gpos; // //--------------------------------------------------------------------------- CDXLTranslateContext::CDXLTranslateContext(CMemoryPool *mp, - BOOL is_child_agg_node) - : m_mp(mp), m_is_child_agg_node(is_child_agg_node) + BOOL is_child_agg_node, + const Query *query) + : m_mp(mp), m_is_child_agg_node(is_child_agg_node), m_query(query) { // initialize hash table m_colid_to_target_entry_map = GPOS_NEW(m_mp) ULongToTargetEntryMap(m_mp); @@ -46,7 +47,7 @@ CDXLTranslateContext::CDXLTranslateContext(CMemoryPool *mp, CDXLTranslateContext::CDXLTranslateContext(CMemoryPool *mp, BOOL is_child_agg_node, ULongToColParamMap *original) - : m_mp(mp), m_is_child_agg_node(is_child_agg_node) + : m_mp(mp), m_is_child_agg_node(is_child_agg_node), m_query(NULL) { m_colid_to_target_entry_map = GPOS_NEW(m_mp) ULongToTargetEntryMap(m_mp); m_colid_to_paramid_map = GPOS_NEW(m_mp) ULongToColParamMap(m_mp); diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp index 465e29422020..d173655f1422 100644 --- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp +++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp @@ -210,11 +210,12 @@ CTranslatorDXLToPlStmt::InitTranslators() //--------------------------------------------------------------------------- PlannedStmt * CTranslatorDXLToPlStmt::GetPlannedStmtFromDXL(const CDXLNode *dxlnode, + const Query *orig_query, bool can_set_tag) { GPOS_ASSERT(NULL != dxlnode); - CDXLTranslateContext dxl_translate_ctxt(m_mp, false); + CDXLTranslateContext dxl_translate_ctxt(m_mp, false, orig_query); CDXLTranslationContextArray *ctxt_translation_prev_siblings = GPOS_NEW(m_mp) CDXLTranslationContextArray(m_mp); @@ -4742,6 +4743,51 @@ CTranslatorDXLToPlStmt::TranslateDXLTblDescrToRangeTblEntry( return rte; } +//--------------------------------------------------------------------------- +// @function: +// update_unknown_locale_walker +// +// @doc: +// Given an expression tree and a TargetEntry pointer context, look for a +// matching target entry in the expression tree and overwrite the given +// TargetEntry context's resname with the original found in the expression +// tree. +// +//--------------------------------------------------------------------------- +static bool +update_unknown_locale_walker(Node *node, void *context) +{ + if (node == NULL) + { + return false; + } + + TargetEntry *unknown_target_entry = (TargetEntry *) context; + + if (IsA(node, TargetEntry)) + { + TargetEntry *te = (TargetEntry *) node; + + if (te->resorigtbl == unknown_target_entry->resorigtbl && + te->resno == unknown_target_entry->resno) + { + unknown_target_entry->resname = te->resname; + return false; + } + } + else if (IsA(node, Query)) + { + Query *query = (Query *) node; + + return gpdb::WalkExpressionTree( + (Node *) query->targetList, + (bool (*)()) update_unknown_locale_walker, (void *) context); + } + + return gpdb::WalkExpressionTree( + node, (bool (*)()) update_unknown_locale_walker, (void *) context); +} + //--------------------------------------------------------------------------- // @function: // CTranslatorDXLToPlStmt::TranslateDXLProjList @@ -4846,6 +4892,21 @@ CTranslatorDXLToPlStmt::TranslateDXLProjList( } target_entry->resorigtbl = pteOriginal->resorigtbl; target_entry->resorigcol = pteOriginal->resorigcol; + + // ORCA represents strings using wide characters. That can + // require converting from multibyte characters using + // vswprintf(). However, vswprintf() is dependent on the system + // locale which is set at the database level. When that locale + // cannot interpret the string correctly, it fails. ORCA + // bypasses the failure by using a generic "UNKNOWN" string. + // When that happens, the following code translates it back to + // the original multibyte string. + if (strcmp(target_entry->resname, "UNKNOWN") == 0) + { + update_unknown_locale_walker( + (Node *) output_context->GetQuery(), + (void *) target_entry); + } } } diff --git a/src/backend/gpopt/utils/COptTasks.cpp b/src/backend/gpopt/utils/COptTasks.cpp index f435ece2c273..2e4c060133fb 100644 --- a/src/backend/gpopt/utils/COptTasks.cpp +++ b/src/backend/gpopt/utils/COptTasks.cpp @@ -285,8 +285,9 @@ COptTasks::LogExceptionMessageAndDelete(CHAR *err_buf, ULONG severity_level) //--------------------------------------------------------------------------- PlannedStmt * COptTasks::ConvertToPlanStmtFromDXL( - CMemoryPool *mp, CMDAccessor *md_accessor, const CDXLNode *dxlnode, - bool can_set_tag, DistributionHashOpsKind distribution_hashops) + CMemoryPool *mp, CMDAccessor *md_accessor, const Query *orig_query, + const CDXLNode *dxlnode, bool can_set_tag, + DistributionHashOpsKind distribution_hashops) { GPOS_ASSERT(NULL != md_accessor); GPOS_ASSERT(NULL != dxlnode); @@ -305,8 +306,8 @@ COptTasks::ConvertToPlanStmtFromDXL( // translate DXL -> PlannedStmt CTranslatorDXLToPlStmt dxl_to_plan_stmt_translator( mp, md_accessor, &dxl_to_plan_stmt_ctxt, gpdb::GetGPSegmentCount()); - return dxl_to_plan_stmt_translator.GetPlannedStmtFromDXL(dxlnode, - can_set_tag); + return dxl_to_plan_stmt_translator.GetPlannedStmtFromDXL( + dxlnode, orig_query, can_set_tag); } @@ -603,7 +604,8 @@ COptTasks::OptimizeTask(void *ptr) // that may not have the correct can_set_tag opt_ctxt->m_plan_stmt = (PlannedStmt *) gpdb::CopyObject(ConvertToPlanStmtFromDXL( - mp, &mda, plan_dxl, opt_ctxt->m_query->canSetTag, + mp, &mda, opt_ctxt->m_query, plan_dxl, + opt_ctxt->m_query->canSetTag, query_to_dxl_translator->GetDistributionHashOpsKind())); } diff --git a/src/backend/gporca/libgpos/include/gpos/error/CException.h b/src/backend/gporca/libgpos/include/gpos/error/CException.h index d616de5415ec..83d2ba9cd3cd 100644 --- a/src/backend/gporca/libgpos/include/gpos/error/CException.h +++ b/src/backend/gporca/libgpos/include/gpos/error/CException.h @@ -135,9 +135,6 @@ class CException // unknown exception ExmiUnhandled, - // illegal byte sequence - ExmiIllegalByteSequence, - ExmiSentinel }; diff --git a/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp b/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp index a216a82160b6..483b8a47e07f 100644 --- a/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp +++ b/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp @@ -178,30 +178,23 @@ CWStringTest::EresUnittest_AppendFormatInvalidLocale() CAutoMemoryPool amp(CAutoMemoryPool::ElcExc); CMemoryPool *mp = amp.Pmp(); + CWStringDynamic *expected = + GPOS_NEW(mp) CWStringDynamic(mp, GPOS_WSZ_LIT("UNKNOWN")); + CHAR *oldLocale = setlocale(LC_CTYPE, NULL); CWStringDynamic *pstr1 = GPOS_NEW(mp) CWStringDynamic(mp); GPOS_RESULT eres = GPOS_OK; setlocale(LC_CTYPE, "C"); - GPOS_TRY - { - pstr1->AppendFormat(GPOS_WSZ_LIT("%s"), (CHAR *) "ÃË", 123); - - eres = GPOS_FAILED; - } - GPOS_CATCH_EX(ex) - { - GPOS_ASSERT(GPOS_MATCH_EX(ex, CException::ExmaSystem, - CException::ExmiIllegalByteSequence)); + pstr1->AppendFormat(GPOS_WSZ_LIT("%s"), (CHAR *) "ÃË", 123); - GPOS_RESET_EX; - } - GPOS_CATCH_END; + pstr1->Equals(expected); // cleanup setlocale(LC_CTYPE, oldLocale); GPOS_DELETE(pstr1); + GPOS_DELETE(expected); return eres; } diff --git a/src/backend/gporca/libgpos/src/common/clibwrapper.cpp b/src/backend/gporca/libgpos/src/common/clibwrapper.cpp index f5dabb720a68..dc516515e1c5 100644 --- a/src/backend/gporca/libgpos/src/common/clibwrapper.cpp +++ b/src/backend/gporca/libgpos/src/common/clibwrapper.cpp @@ -358,7 +358,11 @@ gpos::clib::Vswprintf(WCHAR *wcstr, SIZE_T max_len, const WCHAR *format, { // Invalid multibyte character encountered. This can happen if the byte sequence does not // match with the server encoding. - GPOS_RAISE(CException::ExmaSystem, CException::ExmiIllegalByteSequence); + // + // Rather than fail/fall-back here, ORCA uses a generic "UNKNOWN" + // string. During DXL to PlStmt translation this will be translated + // back using the original query tree (see TranslateDXLProjList) + res = swprintf(wcstr, max_len, format, "UNKNOWN"); } return res; diff --git a/src/backend/gporca/libgpos/src/error/CMessage.cpp b/src/backend/gporca/libgpos/src/error/CMessage.cpp index 34d6a2b568d2..d9337dc564c8 100644 --- a/src/backend/gporca/libgpos/src/error/CMessage.cpp +++ b/src/backend/gporca/libgpos/src/error/CMessage.cpp @@ -272,16 +272,6 @@ CMessage::GetMessage(ULONG index) CException(CException::ExmaUnhandled, CException::ExmiUnhandled), CException::ExsevError, GPOS_WSZ_WSZLEN("Unhandled exception"), 0, GPOS_WSZ_WSZLEN("Unhandled exception")), - - CMessage( - CException(CException::ExmaSystem, - CException::ExmiIllegalByteSequence), - CException::ExsevError, - GPOS_WSZ_WSZLEN( - "Invalid multibyte character for locale encountered in metadata name"), - 0, - GPOS_WSZ_WSZLEN( - "Invalid multibyte character for locale encountered in metadata name")), }; return &msg[index]; diff --git a/src/include/gpopt/translate/CDXLTranslateContext.h b/src/include/gpopt/translate/CDXLTranslateContext.h index 31e07e681945..bd9b59ca144f 100644 --- a/src/include/gpopt/translate/CDXLTranslateContext.h +++ b/src/include/gpopt/translate/CDXLTranslateContext.h @@ -17,6 +17,12 @@ #ifndef GPDXL_CDXLTranslateContext_H #define GPDXL_CDXLTranslateContext_H +extern "C" { +#include "postgres.h" + +#include "nodes/plannodes.h" +} + #include "gpos/base.h" #include "gpos/common/CHashMap.h" #include "gpos/common/CHashMapIter.h" @@ -78,12 +84,15 @@ class CDXLTranslateContext // to use OUTER instead of 0 for Var::varno in Agg target lists (MPP-12034) BOOL m_is_child_agg_node; + const Query *m_query; + // copy the params hashmap void CopyParamHashmap(ULongToColParamMap *original); public: // ctor/dtor - CDXLTranslateContext(CMemoryPool *mp, BOOL is_child_agg_node); + CDXLTranslateContext(CMemoryPool *mp, BOOL is_child_agg_node, + const Query *query); CDXLTranslateContext(CMemoryPool *mp, BOOL is_child_agg_node, ULongToColParamMap *original); @@ -100,6 +109,12 @@ class CDXLTranslateContext return m_colid_to_paramid_map; } + const Query * + GetQuery() + { + return m_query; + } + // return the target entry corresponding to the given ColId const TargetEntry *GetTargetEntry(ULONG colid) const; diff --git a/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h b/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h index 3a8cf515c140..881fc2400eab 100644 --- a/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h +++ b/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h @@ -178,6 +178,7 @@ class CTranslatorDXLToPlStmt // main translation routine for DXL tree -> PlannedStmt PlannedStmt *GetPlannedStmtFromDXL(const CDXLNode *dxlnode, + const Query *orig_query, bool can_set_tag); // translate the join types from its DXL representation to the GPDB one diff --git a/src/include/gpopt/utils/COptTasks.h b/src/include/gpopt/utils/COptTasks.h index ea6d30650e4f..5c70e589db8a 100644 --- a/src/include/gpopt/utils/COptTasks.h +++ b/src/include/gpopt/utils/COptTasks.h @@ -132,8 +132,9 @@ class COptTasks // translate a DXL tree into a planned statement static PlannedStmt *ConvertToPlanStmtFromDXL( - CMemoryPool *mp, CMDAccessor *md_accessor, const CDXLNode *dxlnode, - bool can_set_tag, DistributionHashOpsKind distribution_hashops); + CMemoryPool *mp, CMDAccessor *md_accessor, const Query *orig_query, + const CDXLNode *dxlnode, bool can_set_tag, + DistributionHashOpsKind distribution_hashops); // load search strategy from given path static CSearchStageArray *LoadSearchStrategy(CMemoryPool *mp, char *path); diff --git a/src/test/regress/expected/gp_locale.out b/src/test/regress/expected/gp_locale.out new file mode 100644 index 000000000000..0d916a93d70c --- /dev/null +++ b/src/test/regress/expected/gp_locale.out @@ -0,0 +1,90 @@ +-- ORCA uses functions (e.g. vswprintf) to translation to wide character +-- format. But those libraries may fail if the current locale cannot handle the +-- character set. This test checks that even when those libraries fail, ORCA is +-- still able to generate plans. +-- +-- Create a database that sets the minimum locale +-- +DROP DATABASE IF EXISTS test_locale; +CREATE DATABASE test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; +\c test_locale +-- +-- drop/add/remove columns +-- +CREATE TABLE hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); +ALTER TABLE hi_안녕세계 DROP COLUMN 안녕세계2; +ALTER TABLE hi_안녕세계 ADD COLUMN 안녕세계2_ADD_COLUMN text; +ALTER TABLE hi_안녕세계 RENAME COLUMN 안녕세계3 TO こんにちわ3; +INSERT INTO hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); +INSERT INTO hi_안녕세계 VALUES(42, '안녕세계1 second', '안녕세2 second', '안녕세계3 second'); +-- +-- Try various queries containing multibyte character set and check the column +-- name output +-- +SET optimizer_trace_fallback=on; +-- DELETE +DELETE FROM hi_안녕세계 WHERE a=42; +-- UPDATE +UPDATE hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; +-- SELECT +SELECT * FROM hi_안녕세계; + a | 안녕세계1 | こんにちわ3 | 안녕세계2_add_column +---+------------------------+---------------+---------------------- + 1 | 안녕세계1 first UPDATE | 안녕세2 first | 안녕세계3 first +(1 row) + +SELECT 안녕세계1 || こんにちわ3 FROM hi_안녕세계; + ?column? +------------------------------------- + 안녕세계1 first UPDATE안녕세2 first +(1 row) + +-- SELECT ALIAS +SELECT 안녕세계1 AS 안녕세계1_Alias FROM hi_안녕세계; + 안녕세계1_alias +------------------------ + 안녕세계1 first UPDATE +(1 row) + +-- SUBQUERY +SELECT * FROM (SELECT 안녕세계1 FROM hi_안녕세계) t; + 안녕세계1 +------------------------ + 안녕세계1 first UPDATE +(1 row) + +SELECT (SELECT こんにちわ3 FROM hi_안녕세계) FROM (SELECT 1) AS q; + こんにちわ3 +--------------- + 안녕세2 first +(1 row) + +SELECT (SELECT (SELECT こんにちわ3 FROM hi_안녕세계) FROM hi_안녕세계) FROM (SELECT 1) AS q; + こんにちわ3 +--------------- + 안녕세2 first +(1 row) + +-- CTE +WITH cte AS +(SELECT 안녕세계1, こんにちわ3 FROM hi_안녕세계) SELECT * FROM cte WHERE 안녕세계1 LIKE '안녕세계1%'; + 안녕세계1 | こんにちわ3 +------------------------+--------------- + 안녕세계1 first UPDATE | 안녕세2 first +(1 row) + +WITH cte(안녕세계x, こんにちわx) AS +(SELECT 안녕세계1, こんにちわ3 FROM hi_안녕세계) SELECT * FROM cte WHERE 안녕세계x LIKE '안녕세계1%'; + 안녕세계x | こんにちわx +------------------------+--------------- + 안녕세계1 first UPDATE | 안녕세2 first +(1 row) + +-- JOIN +SELECT * FROM hi_안녕세계 hi_안녕세계1, hi_안녕세계 hi_안녕세계2 WHERE hi_안녕세계1.안녕세계1 LIKE '%UPDATE'; + a | 안녕세계1 | こんにちわ3 | 안녕세계2_add_column | a | 안녕세계1 | こんにちわ3 | 안녕세계2_add_column +---+------------------------+---------------+----------------------+---+------------------------+---------------+---------------------- + 1 | 안녕세계1 first UPDATE | 안녕세2 first | 안녕세계3 first | 1 | 안녕세계1 first UPDATE | 안녕세2 first | 안녕세계3 first +(1 row) + +RESET optimizer_trace_fallback; diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index 01d5cb60258f..d1782f2c5a31 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -41,7 +41,7 @@ test: instr_in_shmem_setup test: instr_in_shmem test: createdb -test: gp_aggregates gp_metadata variadic_parameters default_parameters function_extensions spi gp_xml update_gp returning_gp resource_queue_with_rule gp_types gp_index gp_lock +test: gp_aggregates gp_metadata variadic_parameters default_parameters function_extensions spi gp_xml update_gp returning_gp resource_queue_with_rule gp_types gp_index gp_lock gp_locale test: shared_scan test: spi_processed64bit test: python_processed64bit diff --git a/src/test/regress/sql/gp_locale.sql b/src/test/regress/sql/gp_locale.sql new file mode 100644 index 000000000000..444352c9eddf --- /dev/null +++ b/src/test/regress/sql/gp_locale.sql @@ -0,0 +1,61 @@ +-- ORCA uses functions (e.g. vswprintf) to translation to wide character +-- format. But those libraries may fail if the current locale cannot handle the +-- character set. This test checks that even when those libraries fail, ORCA is +-- still able to generate plans. + +-- +-- Create a database that sets the minimum locale +-- +DROP DATABASE IF EXISTS test_locale; +CREATE DATABASE test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; +\c test_locale + +-- +-- drop/add/remove columns +-- +CREATE TABLE hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); +ALTER TABLE hi_안녕세계 DROP COLUMN 안녕세계2; +ALTER TABLE hi_안녕세계 ADD COLUMN 안녕세계2_ADD_COLUMN text; +ALTER TABLE hi_안녕세계 RENAME COLUMN 안녕세계3 TO こんにちわ3; + +INSERT INTO hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); +INSERT INTO hi_안녕세계 VALUES(42, '안녕세계1 second', '안녕세2 second', '안녕세계3 second'); + +-- +-- Try various queries containing multibyte character set and check the column +-- name output +-- +SET optimizer_trace_fallback=on; + +-- DELETE +DELETE FROM hi_안녕세계 WHERE a=42; + +-- UPDATE +UPDATE hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; + +-- SELECT +SELECT * FROM hi_안녕세계; + +SELECT 안녕세계1 || こんにちわ3 FROM hi_안녕세계; + +-- SELECT ALIAS +SELECT 안녕세계1 AS 안녕세계1_Alias FROM hi_안녕세계; + +-- SUBQUERY +SELECT * FROM (SELECT 안녕세계1 FROM hi_안녕세계) t; + +SELECT (SELECT こんにちわ3 FROM hi_안녕세계) FROM (SELECT 1) AS q; + +SELECT (SELECT (SELECT こんにちわ3 FROM hi_안녕세계) FROM hi_안녕세계) FROM (SELECT 1) AS q; + +-- CTE +WITH cte AS +(SELECT 안녕세계1, こんにちわ3 FROM hi_안녕세계) SELECT * FROM cte WHERE 안녕세계1 LIKE '안녕세계1%'; + +WITH cte(안녕세계x, こんにちわx) AS +(SELECT 안녕세계1, こんにちわ3 FROM hi_안녕세계) SELECT * FROM cte WHERE 안녕세계x LIKE '안녕세계1%'; + +-- JOIN +SELECT * FROM hi_안녕세계 hi_안녕세계1, hi_안녕세계 hi_안녕세계2 WHERE hi_안녕세계1.안녕세계1 LIKE '%UPDATE'; + +RESET optimizer_trace_fallback; From 02e224666fc98c8d6330c0d9d0b15cf40a5e959c Mon Sep 17 00:00:00 2001 From: Praveen Kumar Date: Thu, 9 Nov 2023 22:17:48 +0530 Subject: [PATCH 071/106] Added support for gpstate tracking for differential recovery (#16639) Backport of https://github.com/greenplum-db/gpdb/pull/16460/ Issue : The "gpstate -e" command allows us to track full and incremental recovery progress. However, this functionality is not available for differential recovery. Approach : Show the progress stage-wise with rsync version 3.1.0 or above. We are synchronizing data for several components of the segment data directory, including pg_data, tablespace, pg_wal, and pg_control. Multiple rsync sessions will run sequentially for these components, and we want to track the progress of each stage. In rsync version 3.1.0 and above, the --info=progress2 option is available, which allows us to display cumulative progress information for a running rsync session. As each rsync session progresses, the cumulative progress information will be displayed on the terminal. The --info=progress2 option details the bytes transferred and the percentage completed for each session. Using --info=progress2 we can monitor the progress stage-wise of rsync session. The displayed information includes the remote segment, port, recovery type, stage name, completed bytes, and percentage completed. Implementation: The approach now includes a validation check for rsync version 3.1.0 or above. If the current version is below this threshold, an exception will be raised. Added a suffix of the stage name to each line with a 'kB/s' or 'MB/s' pattern in it of rsync.*.dbid5.out file. Implemented filtering of relevant lines using grep while tailing data from rsync..dbid5.out and writing to the recovery_progress file to prevent race conditions. In gpsate utility parse the recovery_progress data and process differential recovery progress. So the stage-wise progress looks like : gpstate:pkumar144Q6L7:-[INFO]:- Segment Port Recovery type Stage Completed bytes (kB) Percentage completed gpstate:pkumar144Q6L7:-[INFO]:- pkumar144Q6L7.vmware.com 6005 differential Syncing pg_data of dbid 5 298,338,995 32% gpstate:pkumar144Q6L7:-[INFO]:- pkumar144Q6L7.vmware.com 6007 differential Syncing pg_data of dbid 7 157,475,653 18% Test: Added unit and behave test cases --- .../commands/test/unit/test_unit_unix.py | 16 +++++ gpMgmt/bin/gppylib/commands/unix.py | 35 +++++++++- .../gppylib/operations/buildMirrorSegments.py | 64 ++++++++++++++++--- .../bin/gppylib/programs/clsRecoverSegment.py | 8 +++ gpMgmt/bin/gppylib/programs/clsSystemState.py | 51 +++++++++++---- .../gppylib/test/unit/test_unit_gpstate.py | 39 +++++++++-- gpMgmt/sbin/gpsegrecovery.py | 7 +- .../behave/mgmt_utils/gprecoverseg.feature | 45 +++++++++++++ gpMgmt/test/behave/mgmt_utils/gpstate.feature | 18 ++++++ .../behave/mgmt_utils/steps/gpstate_utils.py | 19 +++++- .../behave/mgmt_utils/steps/mgmt_utils.py | 42 +++++++++++- .../mgmt_utils/steps/tablespace_mgmt_utils.py | 13 ++++ 12 files changed, 325 insertions(+), 32 deletions(-) diff --git a/gpMgmt/bin/gppylib/commands/test/unit/test_unit_unix.py b/gpMgmt/bin/gppylib/commands/test/unit/test_unit_unix.py index c436e5186113..4f8a1ba5dd1c 100644 --- a/gpMgmt/bin/gppylib/commands/test/unit/test_unit_unix.py +++ b/gpMgmt/bin/gppylib/commands/test/unit/test_unit_unix.py @@ -65,5 +65,21 @@ def test_kill_9_segment_processes_kill_error(self): self.subject.logger.info.assert_called_once_with('Terminating processes for segment /data/primary/gpseg0') self.subject.logger.error.assert_called_once_with('Failed to kill process 789 for segment /data/primary/gpseg0: Kill Error') + + @patch('gppylib.commands.unix.get_rsync_version', return_value='rsync version 3.2.7') + @patch('gppylib.commands.unix.LooseVersion', side_effect=['3.2.7', '3.1.0']) + def test_compare_rsync_version(self, mock_parse_version, mock_get_cmd_version): + + result = self.subject.validate_rsync_version("3.2.7") + self.assertTrue(result) + + + @patch('gppylib.commands.unix.get_rsync_version', return_value='rsync version 2.6.9') + @patch('gppylib.commands.unix.LooseVersion', side_effect=['2.6.9', '3.1.0']) + def test_validate_rsync_version_false(self, mock_parse_version, mock_get_cmd_version): + + result =self.subject.validate_rsync_version("2.6.9") + self.assertFalse(result) + if __name__ == '__main__': run_tests() diff --git a/gpMgmt/bin/gppylib/commands/unix.py b/gpMgmt/bin/gppylib/commands/unix.py index d54ffa8d8a3a..92f45e1ae710 100644 --- a/gpMgmt/bin/gppylib/commands/unix.py +++ b/gpMgmt/bin/gppylib/commands/unix.py @@ -13,6 +13,8 @@ import signal import uuid import pipes +import re +from distutils.version import LooseVersion from gppylib.gplog import get_default_logger from gppylib.commands.base import * @@ -534,8 +536,10 @@ def __init__(self, name, srcFile, dstFile, srcHost=None, dstHost=None, recursive if checksum: cmd_tokens.append('-c') + # Shows the progress of the whole transfer, + # Note : It is only supported with rsync 3.1.0 or above if progress: - cmd_tokens.append('--progress') + cmd_tokens.append('--info=progress2,name0') # To show file transfer stats if stats: @@ -568,11 +572,14 @@ def __init__(self, name, srcFile, dstFile, srcHost=None, dstHost=None, recursive cmd_tokens.extend(exclude_str) + # Combines output streams, uses 'sed' to find lines with 'kB/s' or 'MB/s' and appends ':%s' as suffix to the end + # of each line and redirects it to progress_file if progress_file: - cmd_tokens.append('> %s 2>&1' % pipes.quote(progress_file)) + cmd_tokens.append( + '2>&1 | tr "\\r" "\\n" |sed -E "/[0-9]+%/ s/$/ :{0}/" > {1}'.format(name, pipes.quote(progress_file))) cmdStr = ' '.join(cmd_tokens) - + cmdStr = "set -o pipefail; {}".format(cmdStr) self.command_tokens = cmd_tokens Command.__init__(self, name, cmdStr, ctxt, remoteHost) @@ -812,3 +819,25 @@ def isScpEnabled(hostlist): return False return True + + + +def validate_rsync_version(min_ver): + """ + checks the version of the 'rsync' command and compares it with a required version. + If the current version is lower than the required version, it raises an exception + """ + rsync_version_info = get_rsync_version() + pattern = r"version (\d+\.\d+\.\d+)" + match = re.search(pattern, rsync_version_info) + current_rsync_version = match.group(1) + if LooseVersion(current_rsync_version) < LooseVersion(min_ver): + return False + return True + +def get_rsync_version(): + """ get the rsync current version """ + cmdStr = findCmdInPath("rsync") + " --version" + cmd = Command("get rsync version", cmdStr=cmdStr) + cmd.run(validateAfter=True) + return cmd.get_stdout() diff --git a/gpMgmt/bin/gppylib/operations/buildMirrorSegments.py b/gpMgmt/bin/gppylib/operations/buildMirrorSegments.py index 92705a8e1cb8..a3197223eb28 100644 --- a/gpMgmt/bin/gppylib/operations/buildMirrorSegments.py +++ b/gpMgmt/bin/gppylib/operations/buildMirrorSegments.py @@ -70,7 +70,7 @@ def get_recovery_progress_pattern(recovery_type='incremental'): progress of rsync looks like: "1,036,923,510 99% 39.90MB/s 0:00:24" """ if recovery_type == 'differential': - return r" +\d+%\ +\d+.\d+(kB|mB)\/s" + return r" +\d+%\ +\d+.\d+(kB|MB)\/s" return r"\d+\/\d+ (kB|mB) \(\d+\%\)" @@ -459,18 +459,66 @@ def print_progress(): os.remove(combined_progress_filepath) - def _get_progress_cmd(self, progressFile, targetSegmentDbId, targetHostname): + def _get_progress_cmd(self, progressFile, targetSegmentDbId, targetHostname, isDifferentialRecovery): """ # There is race between when the recovery process creates the progressFile # when this progress cmd is run. Thus, the progress command touches # the file to ensure its presence before tailing. """ if self.__progressMode != GpMirrorListToBuild.Progress.NONE: - return GpMirrorListToBuild.ProgressCommand("tail the last line of the file", - "set -o pipefail; touch -a {0}; tail -1 {0} | tr '\\r' '\\n' |" - " tail -1".format(pipes.quote(progressFile)), - targetSegmentDbId, progressFile, ctxt=base.REMOTE, - remoteHost=targetHostname) + cmd_desc = "tail the last line of the file" + if isDifferentialRecovery: + # For differential recovery, use sed to filter lines with specific patterns to avoid race condition. + + # Set the option to make the pipeline fail if any command within it fails; + # Example: set -o pipefail; + + # Create or update a file with the name specified in {0}; + # Example: touch -a 'rsync.20230926_145006.dbid2.out'; + + # Display the last 3 lines of the file specified in {0} and pass them to the next command; + # Example: If {0} contains: + # receiving incremental file list + # + # 0 0% 0.00kB/s 0:00:00 :Syncing pg_control file of dbid 5 + # 8,192 100% 7.81MB/s 0:00:00 (xfr#1, to-chk=0/1) :Syncing pg_control file of dbid 5 + # 8,192 100% 7.81MB/s 0:00:00 (xfr#1, to-chk=0/1) :Syncing pg_control file of dbid 5 + # + # This command will pass the above lines (excluding the first) to the next command. + + # Process the output using sed (stream editor), printing lines that match certain patterns; + # Example: If the output is " 8,192 100% 7.81MB/s 0:00:00 (xfr#1, to-chk=0/1) :Syncing pg_control file of dbid 5", + # this command will print: + # 8,192 100% 7.81MB/s 0:00:00 (xfr#1, to-chk=0/1) :Syncing pg_control file of dbid 5 + # + # It will print lines that contain ":Syncing.*dbid", "error:", or "total". + + # Translate carriage return characters to newline characters; + # Example: If the output contains '\r' characters, they will be replaced with '\n'. + + # Display only the last line of the processed output. + # Example: If the output after the previous command is: + # 8,192 100% 7.81MB/s 0:00:00 (xfr#1, to-chk=0/1) :Syncing pg_control file of dbid 5 + # This command will output the same line. + + cmd_str = ( + "set -o pipefail; touch -a {0}; tail -3 {0} | sed -n -e '/:Syncing.*dbid/p; /error:/p; /total/p' | tr '\\r' '\\n' | tail -1" + .format(pipes.quote(progressFile)) + ) + else: + # For full and incremental recovery, simply tail the last line. + cmd_str = ( + "set -o pipefail; touch -a {0}; tail -1 {0} | tr '\\r' '\\n' | tail -1" + .format(pipes.quote(progressFile)) + ) + + progress_command = GpMirrorListToBuild.ProgressCommand( + cmd_desc, cmd_str, + targetSegmentDbId, progressFile, ctxt=base.REMOTE, + remoteHost=targetHostname + ) + + return progress_command return None def _get_remove_cmd(self, remove_file, target_host): @@ -533,7 +581,7 @@ def _do_recovery(self, recovery_info_by_host, gpEnv): era = read_era(gpEnv.getMasterDataDir(), logger=self.__logger) for hostName, recovery_info_list in recovery_info_by_host.items(): for ri in recovery_info_list: - progressCmd = self._get_progress_cmd(ri.progress_file, ri.target_segment_dbid, hostName) + progressCmd = self._get_progress_cmd(ri.progress_file, ri.target_segment_dbid, hostName, ri.is_differential_recovery) if progressCmd: progress_cmds.append(progressCmd) diff --git a/gpMgmt/bin/gppylib/programs/clsRecoverSegment.py b/gpMgmt/bin/gppylib/programs/clsRecoverSegment.py index 80f7cee8d37b..e7088f758c06 100644 --- a/gpMgmt/bin/gppylib/programs/clsRecoverSegment.py +++ b/gpMgmt/bin/gppylib/programs/clsRecoverSegment.py @@ -256,6 +256,14 @@ def run(self): if self.__options.replayLag and not self.__options.rebalanceSegments: raise ProgramArgumentValidationException("--replay-lag should be used only with -r") + # Checking rsync version before performing a differential recovery operation. + # the --info=progress2 option, which provides whole file transfer progress, requires rsync 3.1.0 or above + min_rsync_ver = "3.1.0" + if self.__options.differentialResynchronization and not unix.validate_rsync_version(min_rsync_ver): + raise ProgramArgumentValidationException("To perform a differential recovery, a minimum rsync version " + "of {0} is required. Please ensure that rsync is updated to " + "version {0} or higher.".format(min_rsync_ver)) + faultProberInterface.getFaultProber().initializeProber(gpEnv.getMasterPort()) confProvider = configInterface.getConfigurationProvider().initializeProvider(gpEnv.getMasterPort()) diff --git a/gpMgmt/bin/gppylib/programs/clsSystemState.py b/gpMgmt/bin/gppylib/programs/clsSystemState.py index b9d3b49d00c7..7b147a3996d8 100644 --- a/gpMgmt/bin/gppylib/programs/clsSystemState.py +++ b/gpMgmt/bin/gppylib/programs/clsSystemState.py @@ -77,6 +77,7 @@ def __str__(self): return self.__name VALUE_RECOVERY_TOTAL_BYTES = FieldDefinition("Total bytes (kB)", "recovery_total_bytes", "int") VALUE_RECOVERY_PERCENTAGE = FieldDefinition("Percentage completed", "recovery_percentage", "int") VALUE_RECOVERY_TYPE = FieldDefinition("Recovery type", "recovery_type", "int") +VALUE_RECOVERY_STAGE = FieldDefinition("Stage", "recovery_stage", "text") CATEGORY__STATUS = "Status" VALUE__MASTER_REPORTS_STATUS = FieldDefinition("Configuration reports status as", "status_in_config", "text", "Config status") @@ -165,7 +166,7 @@ def __init__(self ): VALUE__ACTIVE_PID_INT, VALUE__POSTMASTER_PID_VALUE_INT, VALUE__POSTMASTER_PID_FILE, VALUE__POSTMASTER_PID_VALUE, VALUE__LOCK_FILES, VALUE_RECOVERY_COMPLETED_BYTES, VALUE_RECOVERY_TOTAL_BYTES, VALUE_RECOVERY_PERCENTAGE, - VALUE_RECOVERY_TYPE + VALUE_RECOVERY_TYPE, VALUE_RECOVERY_STAGE ]: self.__allValues[k] = True @@ -692,8 +693,14 @@ def logSegments(segments, logAsPairs, additionalFieldsToLog=[]): if segments_under_recovery: logger.info("----------------------------------------------------") logger.info("Segments in recovery") - logSegments(segments_under_recovery, False, [VALUE_RECOVERY_TYPE, VALUE_RECOVERY_COMPLETED_BYTES, VALUE_RECOVERY_TOTAL_BYTES, - VALUE_RECOVERY_PERCENTAGE]) + if data.getStrValue(segments_under_recovery[0], VALUE_RECOVERY_TYPE) == "differential": + logSegments(segments_under_recovery, False, + [VALUE_RECOVERY_TYPE, VALUE_RECOVERY_STAGE, VALUE_RECOVERY_COMPLETED_BYTES, + VALUE_RECOVERY_PERCENTAGE]) + else: + logSegments(segments_under_recovery, False, + [VALUE_RECOVERY_TYPE, VALUE_RECOVERY_COMPLETED_BYTES, VALUE_RECOVERY_TOTAL_BYTES, + VALUE_RECOVERY_PERCENTAGE]) exitCode = 1 # final output -- no errors, then log this message @@ -975,12 +982,26 @@ def _parse_recovery_progress_data(data, recovery_progress_file, gpArray): with open(recovery_progress_file, 'r') as fp: for line in fp: recovery_type, dbid, progress = line.strip().split(':',2) - pattern = re.compile(get_recovery_progress_pattern()) - if re.search(pattern, progress): - bytes, units, precentage_str = progress.strip().split(' ',2) - completed_bytes, total_bytes = bytes.split('/') - percentage = re.search(r'(\d+\%)', precentage_str).group() - recovery_progress_by_dbid[int(dbid)] = [recovery_type, completed_bytes, total_bytes, percentage] + # Define patterns for identifying different recovery types + rewind_bb_pattern = re.compile(get_recovery_progress_pattern()) + diff_pattern = re.compile(get_recovery_progress_pattern('differential')) + + # Check if the progress matches full,incremental or differential recovery patterns + if re.search(rewind_bb_pattern, progress) or re.search(diff_pattern, progress): + stage, total_bytes = "", "" + if recovery_type == "differential": + # Process differential recovery progress. + progress_parts = progress.strip().split(':') + stage = progress_parts[-1] + completed_bytes, percentage = progress_parts[0].split()[:2] + else: + # Process full or incremental recovery progress. + bytes, units, precentage_str = progress.strip().split(' ', 2) + completed_bytes, total_bytes = bytes.split('/') + percentage = re.search(r'(\d+\%)', precentage_str).group() + + recovery_progress_by_dbid[int(dbid)] = [recovery_type, completed_bytes, total_bytes, percentage, + stage] # Now the catalog update happens before we run recovery, # so now when we query gpArray here, it will have new address/port for the recovering segments @@ -990,12 +1011,20 @@ def _parse_recovery_progress_data(data, recovery_progress_file, gpArray): if dbid in recovery_progress_by_dbid.keys(): data.switchSegment(seg) recovery_progress_segs.append(seg) - recovery_type, completed_bytes, total_bytes, percentage = recovery_progress_by_dbid[dbid] + recovery_type, completed_bytes, total_bytes, percentage, stage = recovery_progress_by_dbid[dbid] + + # Add recovery progress values to GpstateData data.addValue(VALUE_RECOVERY_TYPE, recovery_type) data.addValue(VALUE_RECOVERY_COMPLETED_BYTES, completed_bytes) - data.addValue(VALUE_RECOVERY_TOTAL_BYTES, total_bytes) data.addValue(VALUE_RECOVERY_PERCENTAGE, percentage) + if recovery_type == "differential": + # If differential recovery, add stage information. + data.addValue(VALUE_RECOVERY_STAGE, stage) + else: + # If full or incremental, add total bytes' information. + data.addValue(VALUE_RECOVERY_TOTAL_BYTES, total_bytes) + return recovery_progress_segs diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_gpstate.py b/gpMgmt/bin/gppylib/test/unit/test_unit_gpstate.py index bd5b24383464..8be17eb0b020 100644 --- a/gpMgmt/bin/gppylib/test/unit/test_unit_gpstate.py +++ b/gpMgmt/bin/gppylib/test/unit/test_unit_gpstate.py @@ -61,11 +61,14 @@ def setUp(self): self.gpArrayMock = mock.MagicMock(spec=gparray.GpArray) self.gpArrayMock.getSegDbList.return_value = [self.primary1, self.primary2, self.primary3] - def check_recovery_fields(self, segment, type, completed, total, percentage): + def check_recovery_fields(self, segment, type, completed, total, percentage, stage=None): self.assertEqual(type, self.data.getStrValue(segment, VALUE_RECOVERY_TYPE)) self.assertEqual(completed, self.data.getStrValue(segment, VALUE_RECOVERY_COMPLETED_BYTES)) - self.assertEqual(total, self.data.getStrValue(segment, VALUE_RECOVERY_TOTAL_BYTES)) self.assertEqual(percentage, self.data.getStrValue(segment, VALUE_RECOVERY_PERCENTAGE)) + if type == "differential": + self.assertEqual(stage, self.data.getStrValue(segment, VALUE_RECOVERY_STAGE)) + else: + self.assertEqual(total, self.data.getStrValue(segment, VALUE_RECOVERY_TOTAL_BYTES)) def test_parse_recovery_progress_data_returns_empty_when_file_does_not_exist(self): self.assertEqual([], GpSystemStateProgram._parse_recovery_progress_data(self.data, '/file/does/not/exist', self.gpArrayMock)) @@ -88,12 +91,16 @@ def test_parse_recovery_progress_data_adds_recovery_progress_data_during_multipl with tempfile.NamedTemporaryFile() as f: f.write("full:1: 1164848/1371715 kB (0%), 0/1 tablespace (...t1/demoDataDir0/base/16384/40962)\n".encode("utf-8")) f.write("incremental:2: 1171384/1371875 kB (85%)anything can appear here".encode('utf-8')) + f.write("incremental:2: 1171384/1371875 kB (85%)anything can appear here\n".encode('utf-8')) + f.write( + "differential:3: 122,017,543 74% 74.02MB/s 0:00:01 (xfr#1994, to-chk=963/2979) :Syncing pg_data of dbid 1\n".encode( + "utf-8")) f.flush() - self.assertEqual([self.primary1, self.primary2], GpSystemStateProgram._parse_recovery_progress_data(self.data, f.name, self.gpArrayMock)) + self.assertEqual([self.primary1, self.primary2, self.primary3], GpSystemStateProgram._parse_recovery_progress_data(self.data, f.name, self.gpArrayMock)) self.check_recovery_fields(self.primary1,'full', '1164848', '1371715', '0%') self.check_recovery_fields(self.primary2, 'incremental', '1171384', '1371875', '85%') - self.check_recovery_fields(self.primary3, '', '', '', '') + self.check_recovery_fields(self.primary3, 'differential', '122,017,543', '', '74%', 'Syncing pg_data of dbid 1') def test_parse_recovery_progress_data_doesnt_adds_recovery_progress_data_only_for_completed_recoveries(self): with tempfile.NamedTemporaryFile() as f: @@ -126,6 +133,30 @@ def test_parse_recovery_progress_data_doesnt_adds_recovery_progress_data_only_fo self.check_recovery_fields(self.primary3, '', '', '', '') + def test_parse_recovery_progress_data_adds_differential_recovery_progress_data_during_single_recovery(self): + with tempfile.NamedTemporaryFile() as f: + f.write("differential:1: 38,861,653 7% 43.45MB/s 0:00:00 (xfr#635, ir-chk=9262/9919) :Syncing pg_data of dbid 1\n".encode("utf-8")) + f.flush() + self.assertEqual([self.primary1], GpSystemStateProgram._parse_recovery_progress_data(self.data, f.name, self.gpArrayMock)) + + self.check_recovery_fields(self.primary1, 'differential', '38,861,653', '', '7%', "Syncing pg_data of dbid 1") + self.check_recovery_fields(self.primary2, '', '', '', '') + self.check_recovery_fields(self.primary3, '', '', '', '') + + + def test_parse_recovery_progress_data_adds_differential_recovery_progress_data_during_multiple_recovery(self): + with tempfile.NamedTemporaryFile() as f: + f.write("differential:1: 38,861,653 7% 43.45MB/s 0:00:00 (xfr#635, ir-chk=9262/9919) :Syncing pg_data of dbid 1\n".encode("utf-8")) + f.write("differential:2: 122,017,543 74% 74.02MB/s 0:00:01 (xfr#1994, to-chk=963/2979) :Syncing tablespace of dbid 2 for oid 17934\n".encode("utf-8")) + f.write("differential:3: 122,017,543 (74%) 74.02MB/s 0:00:01 (xfr#1994, to-chk=963/2979) :Invalid format\n".encode("utf-8")) + f.flush() + self.assertEqual([self.primary1, self.primary2], GpSystemStateProgram._parse_recovery_progress_data(self.data, f.name, self.gpArrayMock)) + + self.check_recovery_fields(self.primary1, 'differential', '38,861,653', '', '7%', "Syncing pg_data of dbid 1") + self.check_recovery_fields(self.primary2, 'differential', '122,017,543', '', '74%', "Syncing tablespace of dbid 2 for oid 17934") + self.check_recovery_fields(self.primary3, '', '', '', '') + + class ReplicationInfoTestCase(unittest.TestCase): """ A test case for GpSystemStateProgram._add_replication_info(). diff --git a/gpMgmt/sbin/gpsegrecovery.py b/gpMgmt/sbin/gpsegrecovery.py index d38b72f2ca4a..65ccc57c091e 100644 --- a/gpMgmt/sbin/gpsegrecovery.py +++ b/gpMgmt/sbin/gpsegrecovery.py @@ -210,7 +210,8 @@ def sync_pg_data(self): # os.path.join(dir, "") will append a '/' at the end of dir. When using "/" at the end of source, # rsync will copy the content of the last directory. When not using "/" at the end of source, rsync # will copy the last directory and the content of the directory. - cmd = Rsync(name="Sync pg data_dir", srcFile=os.path.join(self.recovery_info.source_datadir, ""), + cmd = Rsync(name='Syncing pg_data of dbid {}'.format(self.recovery_info.target_segment_dbid), + srcFile=os.path.join(self.recovery_info.source_datadir, ""), dstFile=self.recovery_info.target_datadir, srcHost=self.recovery_info.source_hostname, exclude_list=rsync_exclude_list, delete=True, checksum=True, progress=True, progress_file=self.recovery_info.progress_file) @@ -259,7 +260,7 @@ def sync_xlog_and_control_file(self): # os.path.join(dir, "") will append a '/' at the end of dir. When using "/" at the end of source, # rsync will copy the content of the last directory. When not using "/" at the end of source, rsync # will copy the last directory and the content of the directory. - cmd = Rsync(name="Sync pg_xlog files", srcFile=os.path.join(self.recovery_info.source_datadir, "pg_xlog", ""), + cmd = Rsync(name="Syncing pg_xlog files of dbid {}".format(self.recovery_info.target_segment_dbid), srcFile=os.path.join(self.recovery_info.source_datadir, "pg_xlog", ""), dstFile=os.path.join(self.recovery_info.target_datadir, "pg_xlog", ""), progress=True, checksum=True, srcHost=self.recovery_info.source_hostname, progress_file=self.recovery_info.progress_file) @@ -307,7 +308,7 @@ def sync_tablespaces(self): # os.path.join(dir, "") will append a '/' at the end of dir. When using "/" at the end of source, # rsync will copy the content of the last directory. When not using "/" at the end of source, rsync # will copy the last directory and the content of the directory. - cmd = Rsync(name="Sync tablespace", + cmd = Rsync(name="Syncing tablespace of dbid {0} for oid {1}" .format(self.recovery_info.target_segment_dbid, str(oid)), srcFile=os.path.join(srcPath, ""), dstFile=targetPath, srcHost=self.recovery_info.source_hostname, diff --git a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature index a1fbd6039b2d..0eb4cce138dd 100644 --- a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature +++ b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature @@ -110,6 +110,51 @@ Feature: gprecoverseg tests And verify replication slot internal_wal_replication_slot is available on all the segments And the cluster is rebalanced + @concourse_cluster + Scenario: gpstate track of differential recovery for single host + Given the database is running + And all files in gpAdminLogs directory are deleted on all hosts in the cluster + And user immediately stops all mirror processes for content 0 + And the user waits until mirror on content 0 is down + And user can start transactions + And sql "DROP TABLE IF EXISTS test_recoverseg; CREATE TABLE test_recoverseg AS SELECT generate_series(1,100000000) AS a;" is executed in "postgres" db + And sql "DROP TABLE IF EXISTS test_recoverseg_1; CREATE TABLE test_recoverseg_1 AS SELECT generate_series(1,100000000) AS a;" is executed in "postgres" db + When the user asynchronously runs "gprecoverseg -a --differential" and the process is saved + Then the user waits until recovery_progress.file is created in gpAdminLogs and verifies that all dbids progress with pg_data are present + When the user runs "gpstate -e" + Then gpstate should print "Segments in recovery" to stdout + And gpstate output contains "differential" entries for mirrors of content 0 + And gpstate output looks like + | Segment | Port | Recovery type | Stage | Completed bytes \(kB\) | Percentage completed | + | \S+ | [0-9]+ | differential | Syncing pg_data of dbid 6 | ([\d,]+)[ \t] | \d+% | + And the user waits until saved async process is completed + And all files in gpAdminLogs directory are deleted on all hosts in the cluster + And sql "DROP TABLE IF EXISTS test_recoverseg;" is executed in "postgres" db + And sql "DROP TABLE IF EXISTS test_recoverseg_1;" is executed in "postgres" db + And the cluster is rebalanced + + + @concourse_cluster + Scenario: check Tablespace Recovery Progress with gpstate + Given the database is running + And all files in gpAdminLogs directory are deleted on all hosts in the cluster + And user immediately stops all mirror processes for content 0 + And user can start transactions + And a tablespace is created with data + And insert additional data into the tablespace + When the user asynchronously runs "gprecoverseg -a --differential" and the process is saved + Then the user waits until recovery_progress.file is created in gpAdminLogs and verifies that all dbids progress with tablespace are present + When the user runs "gpstate -e" + Then gpstate should print "Segments in recovery" to stdout + And gpstate output contains "differential" entries for mirrors of content 0 + And gpstate output looks like + | Segment | Port | Recovery type | Stage | Completed bytes \(kB\) | Percentage completed | + | \S+ | [0-9]+ | differential | Syncing tablespace of dbid 6 for oid \d+ | ([\d,]+)[ \t] | \d+% | + And the user waits until saved async process is completed + And all files in gpAdminLogs directory are deleted on all hosts in the cluster + And the cluster is rebalanced + + Scenario: full recovery works with tablespaces Given the database is running And a tablespace is created with data diff --git a/gpMgmt/test/behave/mgmt_utils/gpstate.feature b/gpMgmt/test/behave/mgmt_utils/gpstate.feature index 6df5e71861e1..869b5c29abc1 100644 --- a/gpMgmt/test/behave/mgmt_utils/gpstate.feature +++ b/gpMgmt/test/behave/mgmt_utils/gpstate.feature @@ -656,3 +656,21 @@ Feature: gpstate tests And the user runs command "unset PGDATABASE && $GPHOME/bin/gpstate -e -v" Then command should print "pg_isready -q -h .* -p .* -d postgres" to stdout And command should print "All segments are running normally" to stdout + + + Scenario: gpstate -e shows information about segments with ongoing differential recovery + Given a standard local demo cluster is running + Given all files in gpAdminLogs directory are deleted + And a sample recovery_progress.file is created with ongoing differential recoveries in gpAdminLogs + And we run a sample background script to generate a pid on "master" segment + And a sample gprecoverseg.lock directory is created using the background pid in master_data_directory + When the user runs "gpstate -e" + Then gpstate should print "Segments in recovery" to stdout + And gpstate output contains "differential,differential" entries for mirrors of content 0,1 + And gpstate output looks like + | Segment | Port | Recovery type | Stage | Completed bytes \(kB\) | Percentage completed | + | \S+ | [0-9]+ | differential | Syncing pg_data of dbid 5 | 16,454,866 | 4% | + | \S+ | [0-9]+ | differential | Syncing tablespace of dbid 6 for oid 20516 | 8,192 | 100% | + And all files in gpAdminLogs directory are deleted + And the background pid is killed on "master" segment + And the gprecoverseg lock directory is removed diff --git a/gpMgmt/test/behave/mgmt_utils/steps/gpstate_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/gpstate_utils.py index e8778493852c..caefcd36623a 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/gpstate_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/gpstate_utils.py @@ -66,8 +66,12 @@ def impl(context, recovery_types, contents): for index, seg_to_display in enumerate(segments_to_display): hostname = seg_to_display.getSegmentHostName() port = seg_to_display.getSegmentPort() - expected_msg = "{}[ \t]+{}[ \t]+{}[ \t]+[0-9]+[ \t]+[0-9]+[ \t]+[0-9]+\%".format(hostname, port, - recovery_types[index]) + if recovery_types[index] == "differential": + expected_msg = "{}[ \t]+{}[ \t]+{}[ \t]+(.+?)[ \t]+([\d,]+)[ \t]+[0-9]+\%".format(hostname, port, + recovery_types[index]) + else: + expected_msg = "{}[ \t]+{}[ \t]+{}[ \t]+[0-9]+[ \t]+[0-9]+[ \t]+[0-9]+\%".format(hostname, port, + recovery_types[index]) check_stdout_msg(context, expected_msg) #TODO assert that only segments_to_display are printed to the console @@ -125,3 +129,14 @@ def check_stdout_msg_in_order(context, msg): context.stdout_position = match.end() + +@given('a sample recovery_progress.file is created with ongoing differential recoveries in gpAdminLogs') +def impl(context): + with open('{}/gpAdminLogs/recovery_progress.file'.format(os.path.expanduser("~")), 'w+') as fp: + fp.write( + "differential:5: 16,454,866 4% 16.52MB/s 0:00:00 (xfr#216, ir-chk=9669/9907) :Syncing pg_data " + "of dbid 5\n") + fp.write("differential:6: 8,192 100% 7.81MB/s 0:00:00 (xfr#1, to-chk=0/1) :Syncing tablespace of " + "dbid 6 for oid 20516") + + diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index de545ca93a7e..bc6fa3a1c204 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -459,7 +459,7 @@ def impl(context, logdir): with open(recovery_progress_file, 'r') as fp: context.recovery_lines = fp.readlines() for line in context.recovery_lines: - recovery_type, dbid, progress = line.strip().split(':', 2) + recovery_type, dbid, progress = line.strip().split(':')[:3] progress_pattern = re.compile(get_recovery_progress_pattern(recovery_type)) # TODO: assert progress line in the actual hosts bb/rewind progress file if re.search(progress_pattern, progress) and dbid.isdigit() and recovery_type in ['full', 'differential', 'incremental']: @@ -4338,3 +4338,43 @@ def impl(context, dbname): context.db_name = datname context.db_oid = oid + + + +@then('the user waits until recovery_progress.file is created in {logdir} and verifies that all dbids progress with {stage} are present') +def impl(context, logdir, stage): + all_segments = GpArray.initFromCatalog(dbconn.DbURL()).getDbList() + failed_segments = filter(lambda seg: seg.getSegmentStatus() == 'd', all_segments) + stage_patterns = [] + for seg in failed_segments: + dbid = seg.getSegmentDbId() + if stage == "tablespace": + pat = "Syncing tablespace of dbid {} for oid".format(dbid) + else: + pat = "differential:{}" .format(dbid) + stage_patterns.append(pat) + if len(stage_patterns) == 0: + raise Exception('Failed to get the details of down segment') + attempt = 0 + num_retries = 9000 + log_dir = _get_gpAdminLogs_directory() if logdir == 'gpAdminLogs' else logdir + recovery_progress_file = '{}/recovery_progress.file'.format(log_dir) + while attempt < num_retries: + attempt += 1 + if os.path.exists(recovery_progress_file): + if verify_elements_in_file(recovery_progress_file, stage_patterns): + return + time.sleep(0.1) + if attempt == num_retries: + raise Exception('Timed out after {} retries'.format(num_retries)) + + +def verify_elements_in_file(filename, elements): + with open(filename, 'r') as file: + content = file.read() + for element in elements: + if element not in content: + return False + + return True + diff --git a/gpMgmt/test/behave/mgmt_utils/steps/tablespace_mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/tablespace_mgmt_utils.py index 41f924c0d871..a5b4a9d35443 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/tablespace_mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/tablespace_mgmt_utils.py @@ -127,6 +127,14 @@ def verify_for_gpexpand(self, hostname=None, port=0): "Expected pre-gpexpand data:\n%\n but found post-gpexpand data:\n%r" % ( sorted(self.initial_data), sorted(data))) + def insert_more_data(self): + with dbconn.connect(dbconn.DbURL(dbname=self.dbname), unsetSearchPath=False) as conn: + db = pg.DB(conn) + db.query("CREATE TABLE tbl_1 (i int) DISTRIBUTED RANDOMLY") + db.query("INSERT INTO tbl_1 VALUES (GENERATE_SERIES(0, 100000000))") + db.query("CREATE TABLE tbl_2 (i int) DISTRIBUTED RANDOMLY") + db.query("INSERT INTO tbl_2 VALUES (GENERATE_SERIES(0, 100000000))") + def _checkpoint_and_wait_for_replication_replay(db): """ @@ -243,3 +251,8 @@ def impl(context): for tablespace in context.tablespaces.values(): tablespace.cleanup() context.tablespaces = {} + +@given('insert additional data into the tablespace') +def impl(context): + context.tablespaces["outerspace"].insert_more_data() + From 17f4527c8d3791bf45e570cd8071e645a7cb6463 Mon Sep 17 00:00:00 2001 From: mperezfuster Date: Tue, 14 Nov 2023 00:07:19 +0000 Subject: [PATCH 072/106] Docs: relax requirements for anti-virus (#16681) * Docs: relax requirements for anti-virus * updated link * Changes from review --------- Co-authored-by: Mireia Perez Fuster --- gpdb-doc/markdown/admin_guide/perf_intro.html.md | 2 +- .../install_guide/platform-requirements-overview.md.hbs | 2 +- gpdb-doc/markdown/install_guide/prep_os.html.md | 2 -- gpdb-doc/markdown/security-guide/topics/preface.html.md | 6 ++++++ 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/gpdb-doc/markdown/admin_guide/perf_intro.html.md b/gpdb-doc/markdown/admin_guide/perf_intro.html.md index c2a419aaad52..31c87e9b2de0 100644 --- a/gpdb-doc/markdown/admin_guide/perf_intro.html.md +++ b/gpdb-doc/markdown/admin_guide/perf_intro.html.md @@ -22,7 +22,7 @@ Several key performance factors influence database performance. Understanding th Database performance relies heavily on disk I/O and memory usage. To accurately set performance expectations, you need to know the baseline performance of the hardware on which your DBMS is deployed. Performance of hardware components such as CPUs, hard disks, disk controllers, RAM, and network interfaces will significantly affect how fast your database performs. -> **Caution** Do not install anti-virus software of any type on Greenplum Database hosts. VMware Greenplum is not supported for use with anti-virus software because the additional CPU and IO load interferes with Greenplum Database operations. +> **Note** If you use endpoint security software on your Greenplum Database hosts, it may affect your database performance and stability. See [About Endpoint Security Sofware](../security-guide/topics/preface.html#endpoint_security) for more information. ### Workload diff --git a/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs b/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs index 5ec295c14e50..65378a7ba9b6 100644 --- a/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs +++ b/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs @@ -16,7 +16,7 @@ Greenplum Database 6 runs on the following operating system platforms: -> **Caution** Do not install anti-virus software of any type on Greenplum Database hosts. VMware Greenplum is not supported for use with anti-virus software because the additional CPU and IO load interferes with Greenplum Database operations. +> **Note** If you use endpoint security software on your Greenplum Database hosts, it may affect your database performance and stability. See [About Endpoint Security Sofware](../security-guide/topics/preface.html#endpoint_security) for more information. > **Caution** A kernel issue in Red Hat Enterprise Linux 8.5 and 8.6 can cause I/O freezes and synchronization problems with XFS filesystems. This issue is fixed in RHEL 8.7. See [RHEL8: xfs_buf deadlock between inode deletion and block allocation](https://access.redhat.com/solutions/6984334). > Significant Greenplum Database performance degradation has been observed when enabling resource group-based workload management on RedHat 6.x and CentOS 6.x systems. This issue is caused by a Linux cgroup kernel bug. This kernel bug has been fixed in CentOS 7.x and Red Hat 7.x/8.x systems. diff --git a/gpdb-doc/markdown/install_guide/prep_os.html.md b/gpdb-doc/markdown/install_guide/prep_os.html.md index d8577dfbbf5d..036098661f60 100644 --- a/gpdb-doc/markdown/install_guide/prep_os.html.md +++ b/gpdb-doc/markdown/install_guide/prep_os.html.md @@ -4,8 +4,6 @@ title: Configuring Your Systems Describes how to prepare your operating system environment for Greenplum Database software installation. -> **Caution** Do not install anti-virus software of any type on Greenplum Database hosts. VMware Greenplum is not supported for use with anti-virus software because the additional CPU and IO load interferes with Greenplum Database operations. - Perform the following tasks in order: 1. Make sure your host systems meet the requirements described in [Platform Requirements](platform-requirements-overview.html). diff --git a/gpdb-doc/markdown/security-guide/topics/preface.html.md b/gpdb-doc/markdown/security-guide/topics/preface.html.md index 5dcb79813a48..704e600cf57b 100644 --- a/gpdb-doc/markdown/security-guide/topics/preface.html.md +++ b/gpdb-doc/markdown/security-guide/topics/preface.html.md @@ -25,3 +25,9 @@ Describes how to encrypt data at rest in the database or in transit over the net - **[Security Best Practices](../topics/BestPractices.html)** Describes basic security best practices that you should follow to ensure the highest level of system security.  +## About Endpoint Security Software + +If you install any endpoint security software on your Greenplum Database hosts, such as anti-virus, data protection, network security, or other security related software, the additional CPU, IO, network or memory load can interfere with Greenplum Database operations and may affect database performance and stability. + +Refer to your endpoint security vendor and perform careful testing in a non-production environment to ensure it does not have any negative impact on Greenplum Database operations. + From 6439c425d2df9d64e918f511f43e8ab22748cab2 Mon Sep 17 00:00:00 2001 From: mperezfuster Date: Tue, 14 Nov 2023 00:10:08 +0000 Subject: [PATCH 073/106] Docs: updated 6.X documentation to include EL 9 requirements (#16682) Co-authored-by: Mireia Perez Fuster --- .../platform-requirements-overview.md.hbs | 6 ++++++ .../markdown/install_guide/prep_os.html.md | 20 +++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs b/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs index 65378a7ba9b6..576d7bca9406 100644 --- a/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs +++ b/gpdb-doc/markdown/install_guide/platform-requirements-overview.md.hbs @@ -6,9 +6,12 @@ This topic describes the Greenplum Database 6 platform and operating system soft Greenplum Database 6 runs on the following operating system platforms: +- Red Hat Enterprise Linux 64-bit 9.x - Red Hat Enterprise Linux 64-bit 8.7 or later (As of Greenplum Database version 6.20. See the following [Note](#rhel-issues)) - Red Hat Enterprise Linux 64-bit 7.x \(See the following [Note](#rhel-issues).\) - Red Hat Enterprise Linux 64-bit 6.x +- Rocky Linux 9.x +- Rocky Linux 8.7 or later - CentOS 64-bit 7.x - CentOS 64-bit 6.x - Ubuntu 18.04 LTS @@ -40,6 +43,7 @@ Greenplum Database 6 requires the following software packages on RHEL/CentOS 6/7 - bash - bzip2 - curl +- compat-openssl11 (RHEL/Rocky 9) - iproute - krb5-devel - libcgroup (RHEL/CentOS 6) @@ -50,6 +54,7 @@ Greenplum Database 6 requires the following software packages on RHEL/CentOS 6/7 - libuuid - libxml2 - libyaml +- libzstd (RHEL/Rocky 9) - less - net-tools (Debian/Fedora) - openldap @@ -59,6 +64,7 @@ Greenplum Database 6 requires the following software packages on RHEL/CentOS 6/7 - openssl - openssl-libs (RHEL/CentOS 7 and RHEL/Rocky 8) - perl +- python3 (RHEL/Rocky 9) - readline - rsync - sed diff --git a/gpdb-doc/markdown/install_guide/prep_os.html.md b/gpdb-doc/markdown/install_guide/prep_os.html.md index 036098661f60..69ccfbc175fc 100644 --- a/gpdb-doc/markdown/install_guide/prep_os.html.md +++ b/gpdb-doc/markdown/install_guide/prep_os.html.md @@ -62,7 +62,7 @@ If you choose to enable SELinux in `Enforcing` mode, then Greenplum processes an ## Deactivate or Configure Firewall Software -You should also deactivate firewall software such as `iptables` \(on systems such as RHEL 6.x and CentOS 6.x \), `firewalld` \(on systems such as RHEL 7.x and CentOS 7.x\), or `ufw` \(on Ubuntu systems, deactivated by default\). If firewall software is not deactivated, you must instead configure your software to allow required communication between Greenplum hosts. +You should also deactivate firewall software such as `iptables` \(on systems such as RHEL 6.x and CentOS 6.x \), `firewalld` \(on systems such as RHEL 7.x and CentOS 7.x and later\), or `ufw` \(on Ubuntu systems, deactivated by default\). If firewall software is not deactivated, you must instead configure your software to allow required communication between Greenplum hosts. To deactivate `iptables`: @@ -300,7 +300,7 @@ Set the following parameters in the `/etc/security/limits.conf` file: * hard nproc 131072 ``` -For Red Hat Enterprise Linux \(RHEL\) and CentOS systems, parameter values in the `/etc/security/limits.d/90-nproc.conf` file \(RHEL/CentOS 6\) or `/etc/security/limits.d/20-nproc.conf` file \(RHEL/CentOS 7\) override the values in the `limits.conf` file. Ensure that any parameters in the override file are set to the required value. The Linux module `pam_limits` sets user limits by reading the values from the `limits.conf` file and then from the override file. For information about PAM and user limits, see the documentation on PAM and `pam_limits`. +For Red Hat Enterprise Linux \(RHEL\) and CentOS systems, parameter values in the `/etc/security/limits.d/90-nproc.conf` file \(RHEL/CentOS 6\) or `/etc/security/limits.d/20-nproc.conf` file \(RHEL/CentOS 7 and later\) override the values in the `limits.conf` file. Ensure that any parameters in the override file are set to the required value. The Linux module `pam_limits` sets user limits by reading the values from the `limits.conf` file and then from the override file. For information about PAM and user limits, see the documentation on PAM and `pam_limits`. Run the `ulimit -u` command on each segment host to display the maximum number of processes that are available to each user. Validate that the return value is 131072. @@ -333,7 +333,7 @@ XFS is the preferred data storage file system on Linux platforms. Use the `mount rw,nodev,noatime,nobarrier,inode64 ``` -The `nobarrier` option is not supported on RHEL 8 or Ubuntu systems. Use only the options: +The `nobarrier` option is not supported on RHEL 8 or Ubuntu systems or later. Use only the options: ``` rw,nodev,noatime,inode64 @@ -412,7 +412,7 @@ The XFS options can also be set in the `/etc/fstab` file. This example entry fro Non-Volatile Memory Express (NVMe) - RHEL 7
      RHEL 8
      Ubuntu + RHEL 7
      RHEL 8
      RHEL 9
      Ubuntu none @@ -421,7 +421,7 @@ The XFS options can also be set in the `/etc/fstab` file. This example entry fro noop - RHEL 8
      Ubuntu + RHEL 8
      RHEL 9
      Ubuntu none @@ -430,7 +430,7 @@ The XFS options can also be set in the `/etc/fstab` file. This example entry fro deadline - RHEL 8
      Ubuntu + RHEL 8
      RHEL 9
      Ubuntu mq-deadline @@ -450,7 +450,7 @@ The XFS options can also be set in the `/etc/fstab` file. This example entry fro > **Note** Using the `echo` command to set the disk I/O scheduler policy is not persistent; you must ensure that you run the command whenever the system reboots. How to run the command will vary based on your system. - To specify the I/O scheduler at boot time on systems that use `grub2` such as RHEL 7.x or CentOS 7.x, use the system utility `grubby`. This command adds the parameter when run as `root`: + To specify the I/O scheduler at boot time on systems that use `grub2` such as RHEL 7.x or CentOS 7.x and later, use the system utility `grubby`. This command adds the parameter when run as `root`: ``` # grubby --update-kernel=ALL --args="elevator=deadline" @@ -464,9 +464,9 @@ The XFS options can also be set in the `/etc/fstab` file. This example entry fro # grubby --info=ALL ``` - Refer to your operating system documentation for more information about the `grubby` utility. If you used the `grubby` command to configure the disk scheduler on a RHEL or CentOS 7.x system and it does not update the kernels, see the [Note](#grubby_note) at the end of the section. + Refer to your operating system documentation for more information about the `grubby` utility. If you used the `grubby` command to configure the disk scheduler on a RHEL or CentOS 7.x system and later and it does not update the kernels, see the [Note](#grubby_note) at the end of the section. - For additional information about configuring the disk scheduler, refer to the RedHat Enterprise Linux documentation for [RHEL 7](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/performance_tuning_guide/sect-red_hat_enterprise_linux-performance_tuning_guide-storage_and_file_systems-configuration_tools#sect-Red_Hat_Enterprise_Linux-Performance_Tuning_Guide-Configuration_tools-Setting_the_default_IO_scheduler) or [RHEL 8](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/monitoring_and_managing_system_status_and_performance/setting-the-disk-scheduler_monitoring-and-managing-system-status-and-performance). The Ubuntu wiki [IOSchedulers](https://wiki.ubuntu.com/Kernel/Reference/IOSchedulers) topic describes the I/O schedulers available on Ubuntu systems. + For additional information about configuring the disk scheduler, refer to the RedHat Enterprise Linux documentation for [RHEL 7](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/performance_tuning_guide/sect-red_hat_enterprise_linux-performance_tuning_guide-storage_and_file_systems-configuration_tools#sect-Red_Hat_Enterprise_Linux-Performance_Tuning_Guide-Configuration_tools-Setting_the_default_IO_scheduler), [RHEL 8](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/monitoring_and_managing_system_status_and_performance/setting-the-disk-scheduler_monitoring-and-managing-system-status-and-performance), or [RHEL 9](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/html/monitoring_and_managing_system_status_and_performance/setting-the-disk-scheduler_monitoring-and-managing-system-status-and-performance). The Ubuntu wiki [IOSchedulers](https://wiki.ubuntu.com/Kernel/Reference/IOSchedulers) topic describes the I/O schedulers available on Ubuntu systems. ### Networking @@ -496,7 +496,7 @@ kernel /vmlinuz-2.6.18-274.3.1.el5 ro root=LABEL=/ initrd /initrd-2.6.18-274.3.1.el5.img ``` -On systems that use `grub2` such as RHEL 7.x or CentOS 7.x, use the system utility `grubby`. This command adds the parameter when run as root. +On systems that use `grub2` such as RHEL 7.x or CentOS 7.x and later, use the system utility `grubby`. This command adds the parameter when run as root. ``` # grubby --update-kernel=ALL --args="transparent_hugepage=never" From df3e0b0aaa60b8ae9f839bfc5a52f8a3a8424493 Mon Sep 17 00:00:00 2001 From: mperezfuster Date: Tue, 14 Nov 2023 00:11:33 +0000 Subject: [PATCH 074/106] Docs: edit MAX_LINE_LENGHT description for 6X (#16713) Co-authored-by: Mireia Perez Fuster --- gpdb-doc/markdown/utility_guide/ref/gpfdist.html.md | 2 +- gpdb-doc/markdown/utility_guide/ref/gpload.html.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gpdb-doc/markdown/utility_guide/ref/gpfdist.html.md b/gpdb-doc/markdown/utility_guide/ref/gpfdist.html.md index 934b3a1c5e22..baceff9c2661 100644 --- a/gpdb-doc/markdown/utility_guide/ref/gpfdist.html.md +++ b/gpdb-doc/markdown/utility_guide/ref/gpfdist.html.md @@ -62,7 +62,7 @@ Most likely, you will want to run `gpfdist` on your ETL machines rather than the : Sets the number of seconds that `gpfdist` waits before cleaning up the session when there are no `POST` requests from the segments. Default is 300. Allowed values are 300 to 86400. You may increase its value when experiencing heavy network traffic. -m max\_length -: Sets the maximum allowed data row length in bytes. Default is 32768. Should be used when user data includes very wide rows \(or when `line too long` error message occurs\). Should not be used otherwise as it increases resource allocation. Valid range is 32K to 256MB. \(The upper limit is 1MB on Windows systems.\) +: Sets the maximum allowed data row length in bytes. Default is 32768. Should be used when user data includes very wide rows \(or when `line too long` error message occurs\). Should not be used otherwise as it increases resource allocation. Valid range is 32K to 256MB. The upper limit is 1MB on Windows systems. : > **Note** Memory issues might occur if you specify a large maximum row length and run a large number of `gpfdist` concurrent connections. For example, setting this value to the maximum of 256MB with 96 concurrent `gpfdist` processes requires approximately 24GB of memory \(`(96 + 1) x 246MB`\). diff --git a/gpdb-doc/markdown/utility_guide/ref/gpload.html.md b/gpdb-doc/markdown/utility_guide/ref/gpload.html.md index 752182a66aae..5b265eca42e9 100644 --- a/gpdb-doc/markdown/utility_guide/ref/gpload.html.md +++ b/gpdb-doc/markdown/utility_guide/ref/gpload.html.md @@ -231,7 +231,7 @@ GPLOAD : Required when `TRANSFORM` is specified. Specifies the location of the transformation configuration file that is specified in the `TRANSFORM` parameter, above. MAX\_LINE\_LENGTH - : Optional. An integer that specifies the maximum length of a line in the XML transformation data passed to `gpload`. + : Optional. Sets the maximum allowed data row length in bytes. Default is 32768. Should be used when user data includes very wide rows (or when `line too long` error message occurs). Should not be used otherwise as it increases resource allocation. Valid range is 32K to 256MB. The upper limit is 1MB on Windows systems. FORMAT : Optional. Specifies the format of the source data file\(s\) - either plain text \(`TEXT`\) or comma separated values \(`CSV`\) format. Defaults to `TEXT` if not specified. For more information about the format of the source data, see [Loading and Unloading Data](../../admin_guide/load/topics/g-loading-and-unloading-data.html). From 4486d3e7f33f540972d73de384e08af4c37d9d0a Mon Sep 17 00:00:00 2001 From: Jingwen Yang Date: Tue, 14 Nov 2023 10:13:47 +0800 Subject: [PATCH 075/106] [6X backport] Fix issue that segment index might be wrong in slice->segments (#16701) The element in the list slice->segments[i] is the index of gpdb actual segments, so it can't be more than the number of segments. To make sure that segment index is valid, we modify the method to decide segment index when gangType is GANGTYPE_SINGLETON_READER in this commit. Co-authored-by: Yongtao Huang --- src/backend/executor/execMain.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 1b7222710a59..8ee53ecc2e96 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -5018,9 +5018,19 @@ FillSliceGangInfo(Slice *slice, int numsegments) slice->segments = list_make1_int(-1); break; case GANGTYPE_SINGLETON_READER: - slice->gangSize = 1; - slice->segments = list_make1_int(gp_session_id % numsegments); - break; + { + int gp_segment_count = getgpsegmentCount(); + slice->gangSize = 1; + /* + * numsegments might be larger than the number of gpdb actual segments for foreign table. + * For example, for gp2gp, when remote gpdb cluster has more segments than local gpdb, + * numsegments will be larger than getgpsegmentCount(). + * + * So we need to use the minimum of numsegments and getgpsegmentCount() here. + */ + slice->segments = list_make1_int(gp_session_id % Min(numsegments, gp_segment_count)); + break; + } default: elog(ERROR, "unexpected gang type"); } From 68f62192673ec4a8ec598c32e12f10ad651e7559 Mon Sep 17 00:00:00 2001 From: Hongxu Ma Date: Tue, 14 Nov 2023 16:04:06 +0800 Subject: [PATCH 076/106] 6X: Check peer listener failed in IC-PROXY mode (#16553) This commit backported #16438 to 6x: In IC-PROXY mode, the user doesn't get a notification when peer listener bind/listen failed. Because the related code is in IC-PROXY process, it only records warning logs. So, the user's query just hangs here silently. This behavior may make the user very confused. In the commit, introduced a SHM variable to check the failure and give notification in time. --- src/backend/cdb/motion/ic_proxy_backend.c | 1 + src/backend/cdb/motion/ic_proxy_backend.h | 4 +- src/backend/cdb/motion/ic_proxy_bgworker.c | 26 ++++++++ src/backend/cdb/motion/ic_proxy_main.c | 13 +++- src/backend/cdb/motion/ic_tcp.c | 10 +++ src/backend/postmaster/postmaster.c | 4 -- src/backend/storage/ipc/ipci.c | 9 +++ src/include/cdb/ic_proxy_bgworker.h | 6 +- .../expected/ic_proxy_listen_failed.out | 64 +++++++++++++++++++ .../isolation2/isolation2_ic_proxy_schedule | 3 + .../isolation2/script/start_py_httpserver.sh | 13 ++++ .../isolation2/sql/ic_proxy_listen_failed.sql | 33 ++++++++++ 12 files changed, 179 insertions(+), 7 deletions(-) create mode 100644 src/test/isolation2/expected/ic_proxy_listen_failed.out create mode 100755 src/test/isolation2/script/start_py_httpserver.sh create mode 100644 src/test/isolation2/sql/ic_proxy_listen_failed.sql diff --git a/src/backend/cdb/motion/ic_proxy_backend.c b/src/backend/cdb/motion/ic_proxy_backend.c index 7269372b4545..40b07d2a35f5 100644 --- a/src/backend/cdb/motion/ic_proxy_backend.c +++ b/src/backend/cdb/motion/ic_proxy_backend.c @@ -34,6 +34,7 @@ #include "cdb/cdbvars.h" #include "cdb/ml_ipc.h" #include "executor/execdesc.h" +#include "storage/shmem.h" #include "ic_proxy.h" #include "ic_proxy_backend.h" diff --git a/src/backend/cdb/motion/ic_proxy_backend.h b/src/backend/cdb/motion/ic_proxy_backend.h index 3006837bdb3c..579441572e11 100644 --- a/src/backend/cdb/motion/ic_proxy_backend.h +++ b/src/backend/cdb/motion/ic_proxy_backend.h @@ -13,7 +13,7 @@ #define IC_PROXY_BACKEND_H #include "postgres.h" - +#include "port/atomics.h" #include "cdb/cdbinterconnect.h" #include @@ -38,6 +38,8 @@ typedef struct ICProxyBackendContext ChunkTransportState *transportState; } ICProxyBackendContext; +extern pg_atomic_uint32 *ic_proxy_peer_listener_failed; + extern void ic_proxy_backend_connect(ICProxyBackendContext *context, ChunkTransportStateEntry *pEntry, MotionConn *conn, bool isSender); diff --git a/src/backend/cdb/motion/ic_proxy_bgworker.c b/src/backend/cdb/motion/ic_proxy_bgworker.c index 5427e85bc867..6c5de0ba4b43 100644 --- a/src/backend/cdb/motion/ic_proxy_bgworker.c +++ b/src/backend/cdb/motion/ic_proxy_bgworker.c @@ -16,6 +16,7 @@ #include "postgres.h" #include "storage/ipc.h" +#include "storage/shmem.h" #include "cdb/ic_proxy_bgworker.h" #include "ic_proxy_server.h" @@ -35,3 +36,28 @@ ICProxyMain(Datum main_arg) /* main loop */ proc_exit(ic_proxy_server_main()); } + +/* + * the size of ICProxy SHM structure + */ +Size +ICProxyShmemSize(void) +{ + Size size = 0; + size = add_size(size, sizeof(*ic_proxy_peer_listener_failed)); + return size; +} + +/* + * initialize ICProxy's SHM structure: only one flag variable + */ +void +ICProxyShmemInit(void) +{ + bool found; + ic_proxy_peer_listener_failed = ShmemInitStruct("IC_PROXY Listener Failure Flag", + sizeof(*ic_proxy_peer_listener_failed), + &found); + if (!found) + pg_atomic_init_u32(ic_proxy_peer_listener_failed, 0); +} \ No newline at end of file diff --git a/src/backend/cdb/motion/ic_proxy_main.c b/src/backend/cdb/motion/ic_proxy_main.c index 027041897f88..84b1163ebda4 100644 --- a/src/backend/cdb/motion/ic_proxy_main.c +++ b/src/backend/cdb/motion/ic_proxy_main.c @@ -18,6 +18,8 @@ #include "storage/ipc.h" #include "utils/guc.h" #include "utils/memutils.h" +#include "storage/shmem.h" +#include "port/atomics.h" #include "ic_proxy_server.h" #include "ic_proxy_addr.h" @@ -36,6 +38,8 @@ static uv_timer_t ic_proxy_server_timer; static uv_tcp_t ic_proxy_peer_listener; static bool ic_proxy_peer_listening; +/* flag (in SHM) for incidaing if peer listener bind/listen failed */ +pg_atomic_uint32 *ic_proxy_peer_listener_failed; static uv_pipe_t ic_proxy_client_listener; static bool ic_proxy_client_listening; @@ -144,8 +148,12 @@ ic_proxy_server_peer_listener_init(uv_loop_t *loop) if (ic_proxy_addrs == NIL) return; + Assert(ic_proxy_peer_listener_failed != NULL); if (ic_proxy_peer_listening) + { + Assert(pg_atomic_read_u32(ic_proxy_peer_listener_failed) == 0); return; + } /* Get the addr from the gp_interconnect_proxy_addresses */ addr = ic_proxy_get_my_addr(); @@ -185,6 +193,7 @@ ic_proxy_server_peer_listener_init(uv_loop_t *loop) { elog(WARNING, "ic-proxy: tcp: fail to bind: %s", uv_strerror(ret)); + pg_atomic_exchange_u32(ic_proxy_peer_listener_failed, 1); return; } @@ -194,6 +203,7 @@ ic_proxy_server_peer_listener_init(uv_loop_t *loop) { elog(WARNING, "ic-proxy: tcp: fail to listen: %s", uv_strerror(ret)); + pg_atomic_exchange_u32(ic_proxy_peer_listener_failed, 1); return; } @@ -201,6 +211,7 @@ ic_proxy_server_peer_listener_init(uv_loop_t *loop) elogif(gp_log_interconnect >= GPVARS_VERBOSITY_VERBOSE, LOG, "ic-proxy: tcp: listening on socket %d", fd); + pg_atomic_exchange_u32(ic_proxy_peer_listener_failed, 0); ic_proxy_peer_listening = true; } @@ -431,10 +442,10 @@ int ic_proxy_server_main(void) { char path[MAXPGPATH]; - elogif(gp_log_interconnect >= GPVARS_VERBOSITY_TERSE, LOG, "ic-proxy: server setting up"); + pg_atomic_exchange_u32(ic_proxy_peer_listener_failed, 0); ic_proxy_pkt_cache_init(IC_PROXY_MAX_PKT_SIZE); uv_loop_init(&ic_proxy_server_loop); diff --git a/src/backend/cdb/motion/ic_tcp.c b/src/backend/cdb/motion/ic_tcp.c index 4f05d79516c6..6d4c5a523146 100644 --- a/src/backend/cdb/motion/ic_tcp.c +++ b/src/backend/cdb/motion/ic_tcp.c @@ -1277,6 +1277,16 @@ SetupTCPInterconnect(EState *estate) interconnect_context->doSendStopMessage = doSendStopMessageTCP; #ifdef ENABLE_IC_PROXY + /* check if current Segment's ICProxy listener failed */ + if (pg_atomic_read_u32(ic_proxy_peer_listener_failed) > 0) + { + ereport(ERROR, + (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), + errmsg("Failed to setup ic_proxy interconnect"), + errdetail("The ic_proxy process failed to bind or listen."), + errhint("Please check the server log for related WARNING messages."))); + } + ic_proxy_backend_init_context(interconnect_context); #endif /* ENABLE_IC_PROXY */ diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index f50c7bd4945b..9468499aec5f 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -421,11 +421,7 @@ static BackgroundWorker PMAuxProcList[MaxPMAuxProc] = #ifdef ENABLE_IC_PROXY {"ic proxy process", -#ifdef FAULT_INJECTOR BGWORKER_SHMEM_ACCESS, -#else - 0, -#endif BgWorkerStart_RecoveryFinished, 0, /* restart immediately if ic proxy process exits with non-zero code */ ICProxyMain, {0}, {0}, 0, 0, diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index e06441cbbf7d..3c664ccf93eb 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -67,6 +67,7 @@ #include "utils/session_state.h" #include "cdb/cdbendpoint.h" #include "replication/gp_replication.h" +#include "cdb/ic_proxy_bgworker.h" shmem_startup_hook_type shmem_startup_hook = NULL; @@ -185,6 +186,10 @@ CreateSharedMemoryAndSemaphores(int port) size = add_size(size, FaultInjector_ShmemSize()); #endif +#ifdef ENABLE_IC_PROXY + size = add_size(size, ICProxyShmemSize()); +#endif + /* This elog happens before we know the name of the log file we are supposed to use */ elog(DEBUG1, "Size not including the buffer pool %lu", (unsigned long) size); @@ -337,6 +342,10 @@ CreateSharedMemoryAndSemaphores(int port) FaultInjector_ShmemInit(); #endif +#ifdef ENABLE_IC_PROXY + ICProxyShmemInit(); +#endif + /* * Set up other modules that need some shared memory space */ diff --git a/src/include/cdb/ic_proxy_bgworker.h b/src/include/cdb/ic_proxy_bgworker.h index d30a73c285d5..a9a9a4b2d49a 100644 --- a/src/include/cdb/ic_proxy_bgworker.h +++ b/src/include/cdb/ic_proxy_bgworker.h @@ -13,10 +13,14 @@ #ifndef IC_PROXY_BGWORKER_H #define IC_PROXY_BGWORKER_H -#include "postgres.h" +#include "port/atomics.h" +/* flag (in SHM) for incidaing if peer listener bind/listen failed */ +extern pg_atomic_uint32 *ic_proxy_peer_listener_failed; extern bool ICProxyStartRule(Datum main_arg); extern void ICProxyMain(Datum main_arg); +extern Size ICProxyShmemSize(void); +extern void ICProxyShmemInit(void); #endif /* IC_PROXY_BGWORKER_H */ diff --git a/src/test/isolation2/expected/ic_proxy_listen_failed.out b/src/test/isolation2/expected/ic_proxy_listen_failed.out new file mode 100644 index 000000000000..74b914a4deb0 --- /dev/null +++ b/src/test/isolation2/expected/ic_proxy_listen_failed.out @@ -0,0 +1,64 @@ +-- Test case for the scenario which ic-proxy peer listener port has been occupied + +-- start_matchsubs +-- m/ic_tcp.c:\d+/ +-- s/ic_tcp.c:\d+/ic_tcp.c:LINE/ +-- end_matchsubs + +1:create table PR_16438 (i int); +CREATE +1:insert into PR_16438 select generate_series(1,100); +INSERT 100 +1q: ... + +-- get one port and occupy it (start_py_httpserver.sh), then restart cluster +!\retcode ic_proxy_port=`psql postgres -Atc "show gp_interconnect_proxy_addresses;" | awk -F ',' '{print $1}' | awk -F ':' '{print $4}'` && gpstop -ai > /dev/null && ./script/start_py_httpserver.sh $ic_proxy_port; +-- start_ignore +started a http server + +-- end_ignore +(exited with code 0) +!\retcode sleep 2 && gpstart -a > /dev/null; +-- start_ignore + +-- end_ignore +(exited with code 0) + +-- this output is hard to match, let's ignore it +-- start_ignore +2&:select count(*) from PR_16438; +FAILED: Forked command is not blocking; got output: ERROR: Failed to setup ic_proxy interconnect +DETAIL: The ic_proxy process failed to bind or listen. +HINT: Please check the server log for related WARNING messages. +2<: <... completed> +FAILED: Execution failed +2q: ... +-- end_ignore + +-- execute a query (should failed) +3:select count(*) from PR_16438; +ERROR: Failed to setup ic_proxy interconnect +DETAIL: The ic_proxy process failed to bind or listen. +HINT: Please check the server log for related WARNING messages. + +-- kill the script to release port and execute query again (should successfully) +-- Note: different from 7x here, we have to restart cluster (no need in 7x) +-- because 6x's icproxy code doesn't align with 7x: https://github.com/greenplum-db/gpdb/issues/14485 +!\retcode ps aux | grep SimpleHTTPServer | grep -v grep | awk '{print $2}' | xargs kill; +-- start_ignore + +-- end_ignore +(exited with code 0) +!\retcode sleep 2 && gpstop -ari > /dev/null; +-- start_ignore + +-- end_ignore +(exited with code 0) + +4:select count(*) from PR_16438; + count +------- + 100 +(1 row) +4:drop table PR_16438; +DROP diff --git a/src/test/isolation2/isolation2_ic_proxy_schedule b/src/test/isolation2/isolation2_ic_proxy_schedule index 4b1255784407..3187f0184566 100644 --- a/src/test/isolation2/isolation2_ic_proxy_schedule +++ b/src/test/isolation2/isolation2_ic_proxy_schedule @@ -7,3 +7,6 @@ test: tcp_ic_teardown # test TCP proxy peer shutdown test: ic_proxy_peer_shutdown + +# test ic-proxy listen failed +test: ic_proxy_listen_failed diff --git a/src/test/isolation2/script/start_py_httpserver.sh b/src/test/isolation2/script/start_py_httpserver.sh new file mode 100755 index 000000000000..31457d0ff220 --- /dev/null +++ b/src/test/isolation2/script/start_py_httpserver.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# start a python http server (port is $1) in background + +which python2 > /dev/null +if [ $? -eq 0 ] +then + python2 -m SimpleHTTPServer $1 >/dev/null 2>&1 & + echo "started a http server" + exit 0 +fi + +echo "no python found" +exit 1 diff --git a/src/test/isolation2/sql/ic_proxy_listen_failed.sql b/src/test/isolation2/sql/ic_proxy_listen_failed.sql new file mode 100644 index 000000000000..8ac255879cf3 --- /dev/null +++ b/src/test/isolation2/sql/ic_proxy_listen_failed.sql @@ -0,0 +1,33 @@ +-- Test case for the scenario which ic-proxy peer listener port has been occupied + +-- start_matchsubs +-- m/ic_tcp.c:\d+/ +-- s/ic_tcp.c:\d+/ic_tcp.c:LINE/ +-- end_matchsubs + +1:create table PR_16438 (i int); +1:insert into PR_16438 select generate_series(1,100); +1q: + +-- get one port and occupy it (start_py_httpserver.sh), then restart cluster +!\retcode ic_proxy_port=`psql postgres -Atc "show gp_interconnect_proxy_addresses;" | awk -F ',' '{print $1}' | awk -F ':' '{print $4}'` && gpstop -ai > /dev/null && ./script/start_py_httpserver.sh $ic_proxy_port; +!\retcode sleep 2 && gpstart -a > /dev/null; + +-- this output is hard to match, let's ignore it +-- start_ignore +2&:select count(*) from PR_16438; +2<: +2q: +-- end_ignore + +-- execute a query (should failed) +3:select count(*) from PR_16438; + +-- kill the script to release port and execute query again (should successfully) +-- Note: different from 7x here, we have to restart cluster (no need in 7x) +-- because 6x's icproxy code doesn't align with 7x: https://github.com/greenplum-db/gpdb/issues/14485 +!\retcode ps aux | grep SimpleHTTPServer | grep -v grep | awk '{print $2}' | xargs kill; +!\retcode sleep 2 && gpstop -ari > /dev/null; + +4:select count(*) from PR_16438; +4:drop table PR_16438; From c96ea775f866a45d0ffdf6d8c8282155c376b8e4 Mon Sep 17 00:00:00 2001 From: Darvin Harutyunyan <12006323+DarvinHarutyunyan@users.noreply.github.com> Date: Tue, 14 Nov 2023 12:09:40 +0400 Subject: [PATCH 077/106] Upgrade PgBouncer (#16734) Upgrade PgBouncer version to the latest with new "Encrypt LDAP Authentication" feature. --- gpAux/extensions/pgbouncer/source | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpAux/extensions/pgbouncer/source b/gpAux/extensions/pgbouncer/source index 331c06ed27a8..cbbdde1aa631 160000 --- a/gpAux/extensions/pgbouncer/source +++ b/gpAux/extensions/pgbouncer/source @@ -1 +1 @@ -Subproject commit 331c06ed27a89fd0d460552713b852b8b6bc9d3d +Subproject commit cbbdde1aa631256294336da5a05f4c8519b1c964 From 99181e5ec826aa5e7d18926dce2f24a061ff8183 Mon Sep 17 00:00:00 2001 From: Karen Huddleston Date: Wed, 15 Nov 2023 11:34:29 -0800 Subject: [PATCH 078/106] Revert "[6X] Fix segmentation fault during dispatch interrupt (#16602)" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 70270654da95e02927c4817040458bc5695e385d. Reverts #16602. It still caused flaky failure in the compile jobs for pipeline And it can be reproduced in pipeline env (a weird SIGUSR1 signal): [root@9d6f784a-5910-4c76-4d4d-a36edf86f096 test]# trap 'echo "Be patient"’ SIGUSR1 [root@9d6f784a-5910-4c76-4d4d-a36edf86f096 test]# ./cdbdisp_query.t [ OK ] test__CdbDispatchPlan_may_be_interrupted [=============] 1 tests ran [ PASSED ] 1 tests [root@9d6f784a-5910-4c76-4d4d-a36edf86f096 test]# ./cdbdisp_query.t [ OK ] test__CdbDispatchPlan_may_be_interrupted [=============] 1 tests ran [ PASSED ] 1 tests Be patient --- src/backend/cdb/dispatcher/test/Makefile | 10 +- .../cdb/dispatcher/test/cdbdisp_query_test.c | 316 ------------------ .../cdb/dispatcher/test/gpsegconfig_dump | 4 - src/interfaces/libpq/fe-connect.c | 6 +- src/interfaces/libpq/fe-misc.c | 3 - 5 files changed, 3 insertions(+), 336 deletions(-) delete mode 100644 src/backend/cdb/dispatcher/test/cdbdisp_query_test.c delete mode 100644 src/backend/cdb/dispatcher/test/gpsegconfig_dump diff --git a/src/backend/cdb/dispatcher/test/Makefile b/src/backend/cdb/dispatcher/test/Makefile index 77fc66e34433..8ff64dcedf2d 100644 --- a/src/backend/cdb/dispatcher/test/Makefile +++ b/src/backend/cdb/dispatcher/test/Makefile @@ -3,8 +3,7 @@ top_builddir = ../../../../.. include $(top_builddir)/src/Makefile.global TARGETS=cdbdispatchresult \ - cdbgang \ - cdbdisp_query + cdbgang include $(top_builddir)/src/backend/mock.mk @@ -24,9 +23,4 @@ cdbgang.t: \ $(MOCK_DIR)/backend/utils/mmgr/redzone_handler_mock.o \ $(MOCK_DIR)/backend/utils/misc/faultinjector_mock.o -cdbdisp_query.t: \ - $(MOCK_DIR)/backend/access/transam/xlog_mock.o \ - $(MOCK_DIR)/backend/libpq/fe-exec_mock.o \ - $(MOCK_DIR)/backend/libpq/fe-misc_mock.o \ - $(MOCK_DIR)/backend/cdb/cdbfts_mock.o \ - $(MOCK_DIR)/backend/utils/misc/gpexpand_mock.o +include $(top_builddir)/src/backend/mock.mk diff --git a/src/backend/cdb/dispatcher/test/cdbdisp_query_test.c b/src/backend/cdb/dispatcher/test/cdbdisp_query_test.c deleted file mode 100644 index 8355f71b5f59..000000000000 --- a/src/backend/cdb/dispatcher/test/cdbdisp_query_test.c +++ /dev/null @@ -1,316 +0,0 @@ -#include -#include -#include -#include "cmockery.h" -#include "postgres.h" - -#include "storage/ipc.h" -#include "storage/proc.h" - -#include "../cdbdisp_query.c" - - -#undef PG_RE_THROW -#define PG_RE_THROW() siglongjmp(*PG_exception_stack, 1) - - -int __wrap_errmsg(const char *fmt,...); -int __wrap_errcode(int sqlerrcode); -bool __wrap_errstart(int elevel, const char *filename, int lineno, - const char *funcname, const char *domain); -void __wrap_errfinish(int dummy __attribute__((unused)),...); -Gang *__wrap_cdbgang_createGang_async(List *segments, SegmentType segmentType); -int __wrap_pqPutMsgStart(char msg_type, bool force_len, PGconn *conn); -int __wrap_PQcancel(PGcancel *cancel, char *errbuf, int errbufsize); -char *__wrap_serializeNode(Node *node, int *size, int *uncompressed_size_out); -char *__wrap_qdSerializeDtxContextInfo(int *size, bool wantSnapshot, bool inCursor, int txnOptions, char *debugCaller); -void __wrap_VirtualXactLockTableInsert(VirtualTransactionId vxid); -void __wrap_AcceptInvalidationMessages(void); -static void terminate_process(); - - -int -__wrap_errmsg(const char *fmt,...) -{ - check_expected(fmt); - optional_assignment(fmt); - return (int) mock(); -} - - -int -__wrap_errcode(int sqlerrcode) -{ - check_expected(sqlerrcode); - return (int) mock(); -} - - -bool -__wrap_errstart(int elevel, const char *filename, int lineno, - const char *funcname, const char *domain) -{ - check_expected(elevel); - check_expected(filename); - check_expected(lineno); - check_expected(funcname); - check_expected(domain); - optional_assignment(filename); - optional_assignment(funcname); - optional_assignment(domain); - return (bool) mock(); -} - - -void __wrap_errfinish(int dummy __attribute__((unused)),...) -{ - PG_RE_THROW(); -} - - -static void -expect_ereport(int expect_elevel) -{ - expect_any(__wrap_errmsg, fmt); - will_be_called(__wrap_errmsg); - - expect_any(__wrap_errcode, sqlerrcode); - will_be_called(__wrap_errcode); - - expect_value(__wrap_errstart, elevel, expect_elevel); - expect_any(__wrap_errstart, filename); - expect_any(__wrap_errstart, lineno); - expect_any(__wrap_errstart, funcname); - expect_any(__wrap_errstart, domain); - if (expect_elevel < ERROR) - { - will_return(__wrap_errstart, false); - } - else - { - will_return(__wrap_errstart, true); - } -} - - -Gang * -__wrap_cdbgang_createGang_async(List *segments, SegmentType segmentType) -{ - MemoryContext oldContext = MemoryContextSwitchTo(DispatcherContext); - Gang *gang = buildGangDefinition(segments, segmentType); - - MemoryContextSwitchTo(oldContext); - - PGconn *conn = (PGconn *) malloc(sizeof(PGconn)); - - MemSet(conn, 0, sizeof(PGconn)); - initPQExpBuffer(&conn->errorMessage); - initPQExpBuffer(&conn->workBuffer); - gang->db_descriptors[0]->conn = conn; - - return gang; -} - - -int -__wrap_pqPutMsgStart(char msg_type, bool force_len, PGconn *conn) -{ - if (conn->outBuffer_shared) - fail_msg("Mustn't send something else during dispatch!"); - check_expected(msg_type); - check_expected(force_len); - check_expected(conn); - optional_assignment(conn); - return (int) mock(); -} - - -int -__wrap_PQcancel(PGcancel *cancel, char *errbuf, int errbufsize) -{ - return (int) mock(); -} - - -char * -__wrap_serializeNode(Node *node, int *size, int *uncompressed_size_out) -{ - const int alloc_size = 1024; - - if (size != NULL) - *size = alloc_size; - if (uncompressed_size_out != NULL) - *uncompressed_size_out = alloc_size; - - return (char *) palloc(alloc_size); -} - - -char * -__wrap_qdSerializeDtxContextInfo(int *size, bool wantSnapshot, bool inCursor, int txnOptions, char *debugCaller) -{ - const int alloc_size = 1024; - - assert_int_not_equal(size, NULL); - *size = alloc_size; - - return (char *) palloc(alloc_size); -} - - -void -__wrap_VirtualXactLockTableInsert(VirtualTransactionId vxid) -{ - mock(); -} - -void -__wrap_AcceptInvalidationMessages(void) -{ - mock(); -} - - -static void -terminate_process() -{ - die(SIGTERM); -} - -/* - * Test query may be interrupted during plan dispatching - */ -static void -test__CdbDispatchPlan_may_be_interrupted(void **state) -{ - PlannedStmt *plannedstmt = (PlannedStmt *) palloc(sizeof(PlannedStmt)); - QueryDesc *queryDesc = (QueryDesc *) palloc(sizeof(QueryDesc)); - - queryDesc->plannedstmt = plannedstmt; - /* ddesc->secContext is filled in cdbdisp_buildPlanQueryParms() */ - queryDesc->ddesc = (QueryDispatchDesc *) palloc(sizeof(QueryDispatchDesc)); - /* source text is required for buildGpQueryString() */ - queryDesc->sourceText = "select a from t1;"; - - /* slice table is needed to allocate gang */ - SliceTable *table = (SliceTable *) palloc(sizeof(SliceTable)); - Slice *slice = makeNode(Slice); - - slice->sliceIndex = 1; - slice->gangType = GANGTYPE_PRIMARY_READER; - slice->segments = list_make1_int(0); - table->slices = lappend(table->slices, slice); - - queryDesc->estate = CreateExecutorState(); - queryDesc->estate->es_sliceTable = table; - - /* cdbcomponent_getCdbComponents() mocks */ - will_be_called(FtsNotifyProber); - will_return(getFtsVersion, 1); - will_return(GetGpExpandVersion, 1); - - /* StartTransactionCommand() mocks */ - will_return(RecoveryInProgress, false); - will_be_called(__wrap_VirtualXactLockTableInsert); - will_be_called(__wrap_AcceptInvalidationMessages); - will_be_called(initialize_wal_bytes_written); - - /* - * cdbdisp_dispatchToGang() - * - * start sending MPP query to QE inside PQsendGpQuery_shared() replace - * connection buffer with the shared one - */ - expect_any(PQsendQueryStart, conn); - will_return(PQsendQueryStart, true); - - /* first try to flush MPP query inside PQsendGpQuery_shared() */ - expect_any(pqFlushNonBlocking, conn); - will_return(pqFlushNonBlocking, 1); - - /* - * cdbdisp_waitDispatchFinish() - * - * query will be interrupted before poll() - */ - expect_any(pqFlushNonBlocking, conn); - will_return_with_sideeffect(pqFlushNonBlocking, 1, &terminate_process, NULL); - - /* process was terminated by administrative command */ - expect_ereport(FATAL); - - /* QD will trying to cancel queries on QEs */ - will_return(__wrap_PQcancel, TRUE); - - /* during close and free connection */ - expect_any_count(pqClearAsyncResult, conn, 2); - will_be_called_count(pqClearAsyncResult, 2); - - /* - * BUT! pqPutMsgStart mustn't be called - * - * we can't send termination message (X) until shared message isn't sent - * out the buffer completely - */ - - /* - * dirty hack. cluster topology needed to allocate gangs is loaded from - * gpsegconfig_dump outside of transaction - */ - cdbcomponent_getCdbComponents(); - - StartTransactionCommand(); - - PG_TRY(); - { - CdbDispatchPlan(queryDesc, false, false); - fail(); - } - PG_CATCH(); - { - /* - * SIGTERM handling emulation gpdb bail out from CheckDispatchResult - * without flushing unsent messages in case of process exit in - * progress AtAbort_DispatcherState will be called during transaction - * abort - */ - proc_exit_inprogress = true; - - AtAbort_DispatcherState(); - } - PG_END_TRY(); -} - -int -main(int argc, char *argv[]) -{ - pqsignal(SIGUSR1, SIG_IGN); - pqsignal(SIGUSR2, SIG_IGN); - - cmockery_parse_arguments(argc, argv); - - const UnitTest tests[] = - { - unit_test(test__CdbDispatchPlan_may_be_interrupted) - }; - - Gp_role = GP_ROLE_DISPATCH; - /* to start transaction */ - PGPROC proc; - - MyBackendId = 7; - proc.backendId = MyBackendId; - MyProc = &proc; - /* to build cdb components info */ - GpIdentity.dbid = 1; - GpIdentity.segindex = -1; - - MemoryContextInit(); - - /* to avoid mocking cdbtm.c functions */ - MyTmGxactLocal = (TMGXACTLOCAL *) MemoryContextAllocZero(TopMemoryContext, sizeof(TMGXACTLOCAL)); - - SetSessionUserId(1000, true); - - return run_tests(tests); -} \ No newline at end of file diff --git a/src/backend/cdb/dispatcher/test/gpsegconfig_dump b/src/backend/cdb/dispatcher/test/gpsegconfig_dump deleted file mode 100644 index c033071faae6..000000000000 --- a/src/backend/cdb/dispatcher/test/gpsegconfig_dump +++ /dev/null @@ -1,4 +0,0 @@ -1 -1 p p n u 6000 localhost localhost -2 0 p p n u 6002 localhost localhost -3 1 p p n u 6003 localhost localhost -4 2 p p n u 6004 localhost localhost diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index c42c8d684f07..c418da5615f0 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -3220,12 +3220,8 @@ sendTerminateConn(PGconn *conn) /* * Note that the protocol doesn't allow us to send Terminate messages * during the startup phase. - * - * GPDB: we won't manage to send any pq messages until dispatch isn't - * finished. But we can be here during dispatch interruption. */ - if (conn->sock != PGINVALID_SOCKET && conn->status == CONNECTION_OK && - !conn->outBuffer_shared) + if (conn->sock != PGINVALID_SOCKET && conn->status == CONNECTION_OK) { /* * Try to send "close connection" message to backend. Ignore any diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c index 99ba3954f302..98a06c287272 100644 --- a/src/interfaces/libpq/fe-misc.c +++ b/src/interfaces/libpq/fe-misc.c @@ -563,9 +563,6 @@ pqCheckInBufferSpace(size_t bytes_needed, PGconn *conn) int pqPutMsgStart(char msg_type, bool force_len, PGconn *conn) { - /* GPDB: we won't manage to send new message during dispatch */ - Assert(!conn->outBuffer_shared); - int lenPos; int endPos; From 38c8f80aad843ef11966e3b44f17d2fc5b528363 Mon Sep 17 00:00:00 2001 From: Georgy Shelkovy Date: Fri, 17 Nov 2023 13:26:42 +0500 Subject: [PATCH 079/106] Implement vacuum/analyze ordering functions for arenadata_toolkit (#641) This patch extends the functionality of the arenadata_toolkit extension by adding two functions to obtain different table orders for vacuum/analyze. These two functions are planned to be used in the vacuum command in the operation.py script. By default, the bundle will use the current approach, in which new tables are placed at the beginning of the list. But users will be able to reconfigure it to use a second function that places new tables at the end of the list, or even use a custom query to determine the order. --- gpcontrib/arenadata_toolkit/Makefile | 7 ++- .../arenadata_toolkit--1.2--1.3.sql | 62 +++++++++++++++++++ .../arenadata_toolkit.control | 2 +- .../expected/adb_vacuum_strategy_test.out | 56 +++++++++++++++++ .../expected/arenadata_toolkit_test.out | 20 ++++-- .../sql/adb_vacuum_strategy_test.sql | 40 ++++++++++++ 6 files changed, 179 insertions(+), 8 deletions(-) create mode 100644 gpcontrib/arenadata_toolkit/arenadata_toolkit--1.2--1.3.sql create mode 100644 gpcontrib/arenadata_toolkit/expected/adb_vacuum_strategy_test.out create mode 100644 gpcontrib/arenadata_toolkit/sql/adb_vacuum_strategy_test.sql diff --git a/gpcontrib/arenadata_toolkit/Makefile b/gpcontrib/arenadata_toolkit/Makefile index 97a38ebfdcc2..98698fd9f123 100644 --- a/gpcontrib/arenadata_toolkit/Makefile +++ b/gpcontrib/arenadata_toolkit/Makefile @@ -3,11 +3,12 @@ MODULES = arenadata_toolkit EXTENSION = arenadata_toolkit -EXTENSION_VERSION = 1.2 +EXTENSION_VERSION = 1.3 DATA = \ arenadata_toolkit--1.0.sql \ arenadata_toolkit--1.0--1.1.sql \ - arenadata_toolkit--1.1--1.2.sql + arenadata_toolkit--1.1--1.2.sql \ + arenadata_toolkit--1.2--1.3.sql DATA_built = $(EXTENSION)--$(EXTENSION_VERSION).sql @@ -15,7 +16,7 @@ $(DATA_built): $(DATA) cat $(DATA) > $(DATA_built) REGRESS = arenadata_toolkit_test arenadata_toolkit_skew_test adb_get_relfilenodes_test \ - adb_collect_table_stats_test + adb_collect_table_stats_test adb_vacuum_strategy_test REGRESS_OPTS += --init-file=$(top_srcdir)/src/test/regress/init_file ifdef USE_PGXS diff --git a/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.2--1.3.sql b/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.2--1.3.sql new file mode 100644 index 000000000000..bd787bc4bd92 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.2--1.3.sql @@ -0,0 +1,62 @@ +/* gpcontrib/arenadata_toolkit/arenadata_toolkit--1.2--1.3.sql */ + +/* + * Returns columns (table_schema, table_name) ordered by increasing vacuum time. In this + * list, if newest_first is true, then tables that are not yet vacuumed are located first, + * and already vacuumed - at the end, else (newest_first is false) tables that are already + * vacuumed are located first, and tables that are not yet vacuumed are located at the end. + */ +CREATE FUNCTION arenadata_toolkit.adb_vacuum_strategy(actionname TEXT, newest_first BOOLEAN) +RETURNS TABLE (table_schema NAME, table_name NAME) AS +$func$ +BEGIN + RETURN query EXECUTE format($$ + SELECT nspname, relname + FROM pg_catalog.pg_class c + JOIN pg_catalog.pg_namespace n ON relnamespace = n.oid + LEFT JOIN pg_catalog.pg_partition_rule ON parchildrelid = c.oid + LEFT JOIN pg_catalog.pg_stat_last_operation ON staactionname = UPPER(%L) + AND objid = c.oid AND classid = 'pg_catalog.pg_class'::pg_catalog.regclass + WHERE relkind = 'r' AND relstorage != 'x' AND parchildrelid IS NULL + AND nspname NOT IN (SELECT schema_name FROM arenadata_toolkit.operation_exclude) + ORDER BY statime ASC NULLS %s + $$, actionname, CASE WHEN newest_first THEN 'FIRST' ELSE 'LAST' END); +END; +$func$ LANGUAGE plpgsql STABLE EXECUTE ON MASTER; + +/* + * Only for admin usage. + */ +REVOKE ALL ON FUNCTION arenadata_toolkit.adb_vacuum_strategy(TEXT, BOOLEAN) FROM public; + +/* + * Returns columns (table_schema, table_name) ordered by increasing vacuum time. + * In this list, tables that are not yet vacuumed are located first, + * and already vacuumed - at the end (default strategy). + */ +CREATE FUNCTION arenadata_toolkit.adb_vacuum_strategy_newest_first(actionname TEXT) +RETURNS TABLE (table_schema NAME, table_name NAME) AS +$$ + SELECT arenadata_toolkit.adb_vacuum_strategy(actionname, true); +$$ LANGUAGE sql STABLE EXECUTE ON MASTER; + +/* + * Only for admin usage. + */ +REVOKE ALL ON FUNCTION arenadata_toolkit.adb_vacuum_strategy_newest_first(TEXT) FROM public; + +/* + * Returns columns (table_schema, table_name) ordered by increasing vacuum time. + * In this list, tables that are already vacuumed are located first, + * and tables that are not yet vacuumed are located at the end. + */ +CREATE FUNCTION arenadata_toolkit.adb_vacuum_strategy_newest_last(actionname TEXT) +RETURNS TABLE (table_schema NAME, table_name NAME) AS +$$ + SELECT arenadata_toolkit.adb_vacuum_strategy(actionname, false); +$$ LANGUAGE sql STABLE EXECUTE ON MASTER; + +/* + * Only for admin usage. + */ +REVOKE ALL ON FUNCTION arenadata_toolkit.adb_vacuum_strategy_newest_last(TEXT) FROM public; diff --git a/gpcontrib/arenadata_toolkit/arenadata_toolkit.control b/gpcontrib/arenadata_toolkit/arenadata_toolkit.control index 69986410bab2..505f3ad1ad62 100644 --- a/gpcontrib/arenadata_toolkit/arenadata_toolkit.control +++ b/gpcontrib/arenadata_toolkit/arenadata_toolkit.control @@ -1,5 +1,5 @@ # arenadata_toolkit extension comment = 'extension is used for manipulation of objects created by adb-bundle' -default_version = '1.2' +default_version = '1.3' module_pathname = '$libdir/arenadata_toolkit' relocatable = false diff --git a/gpcontrib/arenadata_toolkit/expected/adb_vacuum_strategy_test.out b/gpcontrib/arenadata_toolkit/expected/adb_vacuum_strategy_test.out new file mode 100644 index 000000000000..0c577198c042 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/expected/adb_vacuum_strategy_test.out @@ -0,0 +1,56 @@ +CREATE EXTENSION arenadata_toolkit; +SELECT arenadata_toolkit.adb_create_tables(); + adb_create_tables +------------------- + +(1 row) + +CREATE SCHEMA test_vacuum; +CREATE TABLE test_vacuum.vacuumed (a int) DISTRIBUTED BY (a); +CREATE TABLE test_vacuum.not_vacuumed (a int) DISTRIBUTED BY (a); +-- Disable multiple notifications about the creation of multiple subpartitions. +SET client_min_messages=WARNING; +CREATE TABLE test_vacuum.part_table (id INT, a INT, b INT, c INT, d INT, str TEXT) +DISTRIBUTED BY (id) +PARTITION BY RANGE (a) + SUBPARTITION BY RANGE (b) + SUBPARTITION TEMPLATE (START (1) END (3) EVERY (1)) + SUBPARTITION BY RANGE (c) + SUBPARTITION TEMPLATE (START (1) END (3) EVERY (1)) + SUBPARTITION BY RANGE (d) + SUBPARTITION TEMPLATE (START (1) END (3) EVERY (1)) + SUBPARTITION BY LIST (str) + SUBPARTITION TEMPLATE ( + SUBPARTITION sub_prt1 VALUES ('sub_prt1'), + SUBPARTITION sub_prt2 VALUES ('sub_prt2')) + (START (1) END (3) EVERY (1)); +RESET client_min_messages; +INSERT INTO test_vacuum.vacuumed SELECT generate_series(1, 10); +INSERT INTO test_vacuum.not_vacuumed SELECT generate_series(1, 10); +DELETE FROM test_vacuum.vacuumed WHERE a >= 5; +DELETE FROM test_vacuum.not_vacuumed WHERE a >= 5; +VACUUM test_vacuum.vacuumed; +-- default strategy +SELECT * FROM arenadata_toolkit.adb_vacuum_strategy_newest_first('VACUUM') WHERE table_schema = 'test_vacuum'; + table_schema | table_name +--------------+-------------- + test_vacuum | part_table + test_vacuum | not_vacuumed + test_vacuum | vacuumed +(3 rows) + +-- reversed strategy +SELECT * FROM arenadata_toolkit.adb_vacuum_strategy_newest_last('VACUUM') WHERE table_schema = 'test_vacuum'; + table_schema | table_name +--------------+-------------- + test_vacuum | vacuumed + test_vacuum | not_vacuumed + test_vacuum | part_table +(3 rows) + +DROP SCHEMA test_vacuum CASCADE; +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table test_vacuum.vacuumed +drop cascades to table test_vacuum.not_vacuumed +drop cascades to table test_vacuum.part_table +DROP EXTENSION arenadata_toolkit; diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out index f4e2ad78f3df..4977ab778832 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out @@ -96,6 +96,9 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o adb_relation_storage_size | proc | - | {=X/owner,owner=X/owner} adb_relation_storage_size_on_segments | proc | - | {=X/owner,owner=X/owner} adb_skew_coefficients | table | v | {owner=arwdDxt/owner,=r/owner} + adb_vacuum_strategy | proc | - | {owner=X/owner} + adb_vacuum_strategy_newest_first | proc | - | {owner=X/owner} + adb_vacuum_strategy_newest_last | proc | - | {owner=X/owner} arenadata_toolkit | schema | - | {owner=UC/owner,=U/owner} daily_operation | table | a | db_files_current | table | h | {owner=arwdDxt/owner,=r/owner} @@ -103,7 +106,7 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o db_files_history_1_prt_default_part | table | a | db_files_history_1_prt_pYYYYMM | table | a | operation_exclude | table | a | -(16 rows) +(19 rows) -- check that toolkit objects now depends on extension SELECT objname, objtype, extname, deptype FROM pg_depend d JOIN @@ -121,7 +124,10 @@ WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; adb_relation_storage_size | proc | arenadata_toolkit | e adb_relation_storage_size_on_segments | proc | arenadata_toolkit | e adb_skew_coefficients | table | arenadata_toolkit | e -(9 rows) + adb_vacuum_strategy | proc | arenadata_toolkit | e + adb_vacuum_strategy_newest_first | proc | arenadata_toolkit | e + adb_vacuum_strategy_newest_last | proc | arenadata_toolkit | e +(12 rows) DROP EXTENSION arenadata_toolkit; DROP SCHEMA arenadata_toolkit cascade; @@ -149,6 +155,9 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o adb_relation_storage_size | proc | - | {=X/owner,owner=X/owner} adb_relation_storage_size_on_segments | proc | - | {=X/owner,owner=X/owner} adb_skew_coefficients | table | v | {owner=arwdDxt/owner,=r/owner} + adb_vacuum_strategy | proc | - | {owner=X/owner} + adb_vacuum_strategy_newest_first | proc | - | {owner=X/owner} + adb_vacuum_strategy_newest_last | proc | - | {owner=X/owner} arenadata_toolkit | schema | - | {owner=UC/owner,=U/owner} daily_operation | table | a | {owner=arwdDxt/owner} db_files_current | table | h | {owner=arwdDxt/owner,=r/owner} @@ -156,7 +165,7 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o db_files_history_1_prt_default_part | table | a | {owner=arwdDxt/owner} db_files_history_1_prt_pYYYYMM | table | a | {owner=arwdDxt/owner} operation_exclude | table | a | {owner=arwdDxt/owner} -(16 rows) +(19 rows) -- check that toolkit objects now depends on extension SELECT objname, objtype, extname, deptype FROM pg_depend d JOIN @@ -174,7 +183,10 @@ WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; adb_relation_storage_size | proc | arenadata_toolkit | e adb_relation_storage_size_on_segments | proc | arenadata_toolkit | e adb_skew_coefficients | table | arenadata_toolkit | e -(9 rows) + adb_vacuum_strategy | proc | arenadata_toolkit | e + adb_vacuum_strategy_newest_first | proc | arenadata_toolkit | e + adb_vacuum_strategy_newest_last | proc | arenadata_toolkit | e +(12 rows) DROP EXTENSION arenadata_toolkit; DROP SCHEMA arenadata_toolkit cascade; diff --git a/gpcontrib/arenadata_toolkit/sql/adb_vacuum_strategy_test.sql b/gpcontrib/arenadata_toolkit/sql/adb_vacuum_strategy_test.sql new file mode 100644 index 000000000000..9d2ccb03a8cd --- /dev/null +++ b/gpcontrib/arenadata_toolkit/sql/adb_vacuum_strategy_test.sql @@ -0,0 +1,40 @@ +CREATE EXTENSION arenadata_toolkit; +SELECT arenadata_toolkit.adb_create_tables(); + +CREATE SCHEMA test_vacuum; + +CREATE TABLE test_vacuum.vacuumed (a int) DISTRIBUTED BY (a); +CREATE TABLE test_vacuum.not_vacuumed (a int) DISTRIBUTED BY (a); +-- Disable multiple notifications about the creation of multiple subpartitions. +SET client_min_messages=WARNING; +CREATE TABLE test_vacuum.part_table (id INT, a INT, b INT, c INT, d INT, str TEXT) +DISTRIBUTED BY (id) +PARTITION BY RANGE (a) + SUBPARTITION BY RANGE (b) + SUBPARTITION TEMPLATE (START (1) END (3) EVERY (1)) + SUBPARTITION BY RANGE (c) + SUBPARTITION TEMPLATE (START (1) END (3) EVERY (1)) + SUBPARTITION BY RANGE (d) + SUBPARTITION TEMPLATE (START (1) END (3) EVERY (1)) + SUBPARTITION BY LIST (str) + SUBPARTITION TEMPLATE ( + SUBPARTITION sub_prt1 VALUES ('sub_prt1'), + SUBPARTITION sub_prt2 VALUES ('sub_prt2')) + (START (1) END (3) EVERY (1)); +RESET client_min_messages; + +INSERT INTO test_vacuum.vacuumed SELECT generate_series(1, 10); +INSERT INTO test_vacuum.not_vacuumed SELECT generate_series(1, 10); + +DELETE FROM test_vacuum.vacuumed WHERE a >= 5; +DELETE FROM test_vacuum.not_vacuumed WHERE a >= 5; + +VACUUM test_vacuum.vacuumed; + +-- default strategy +SELECT * FROM arenadata_toolkit.adb_vacuum_strategy_newest_first('VACUUM') WHERE table_schema = 'test_vacuum'; +-- reversed strategy +SELECT * FROM arenadata_toolkit.adb_vacuum_strategy_newest_last('VACUUM') WHERE table_schema = 'test_vacuum'; + +DROP SCHEMA test_vacuum CASCADE; +DROP EXTENSION arenadata_toolkit; From 9f71b94c4af923f88259c790e990503c7be96cdc Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Fri, 24 Nov 2023 14:11:09 +0200 Subject: [PATCH 080/106] Revert "Add more locales to docker image and add tests" to resove conflicts with 02e2246 and reapplied later This reverts commit 6298e778e6f610000da1848a117702d729306697. --- arenadata/Dockerfile | 6 ------ gpMgmt/test/behave/mgmt_utils/gpstop.feature | 6 ------ .../behave/mgmt_utils/steps/mgmt_utils.py | 21 +------------------ 3 files changed, 1 insertion(+), 32 deletions(-) diff --git a/arenadata/Dockerfile b/arenadata/Dockerfile index ee0fb40d6a8c..2b885287efcc 100644 --- a/arenadata/Dockerfile +++ b/arenadata/Dockerfile @@ -3,12 +3,6 @@ FROM centos:centos7 as base ARG sigar=https://downloads.adsw.io/ADB/6.22.0_arenadata38/centos/7/community/x86_64/sigar-1.6.5-1056.git2932df5.el7.x86_64.rpm ARG sigar_headers=http://downloads.adsw.io/ADB/6.22.0_arenadata38/centos/7/community/x86_64/sigar-headers-1.6.5-1056.git2932df5.el7.x86_64.rpm -# Reinstall glibc-common. This is necessary to get langpacks in docker -# because docker images don't contain them. -RUN sed -i 's/\(override_install_langs*\)/# \1/' /etc/yum.conf && \ - yum -y reinstall glibc-common && \ - yum clean all - # Install some basic utilities and build tools RUN yum makecache && yum update -y ca-certificates && \ rpm --import https://mirror.yandex.ru/centos/RPM-GPG-KEY-CentOS-7 && \ diff --git a/gpMgmt/test/behave/mgmt_utils/gpstop.feature b/gpMgmt/test/behave/mgmt_utils/gpstop.feature index bc1bae6f029c..410ff6625c2c 100644 --- a/gpMgmt/test/behave/mgmt_utils/gpstop.feature +++ b/gpMgmt/test/behave/mgmt_utils/gpstop.feature @@ -189,9 +189,3 @@ Feature: gpstop behave tests And the user runs gpstop -a and selects f And gpstop should return a return code of 0 - @demo_cluster - Scenario: gpstop gpstop should not print "Failed to kill processes for segment" when locale is different from English - Given the database is running - And "LC_ALL" is different from English - When the user runs "gpstop -a" - Then gpstop should not print "Failed to kill processes for segment" diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index 12ae3912ce8c..d4e0ddb20b74 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -43,7 +43,7 @@ from gppylib.commands.base import Command, REMOTE from gppylib import pgconf -default_locale = None + master_data_dir = os.environ.get('MASTER_DATA_DIRECTORY') if master_data_dir is None: raise Exception('Please set MASTER_DATA_DIRECTORY in environment') @@ -4280,22 +4280,3 @@ def impl(context, table, dbname, count): if int(count) != sum(current_row_count): raise Exception( "%s table in %s has %d rows, expected %d rows." % (table, dbname, sum(current_row_count), int(count))) - -@given('"LC_ALL" is different from English') -def step_impl(context): - default_locale = os.environ.get('LC_ALL') - - try: - os.system('sudo localedef -i ru_RU -f UTF-8 ru_RU.UTF-8 > /dev/null') - except FileNotFoundError: - raise Exception("Failed to generate Russian locale") - - os.environ['LC_ALL'] = 'ru_RU.utf8' - -@then('gpstop should not print "Failed to kill processes for segment"') -def impl(context): - check_string_not_present_stdout(context, 'Failed to kill processes for segment') - if default_locale is not None: - os.environ['LC_ALL'] = default_locale - else: - del os.environ['LC_ALL'] From 1c4bdabcfc002156d95210a27969cc28a3b2a05c Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Fri, 24 Nov 2023 14:33:17 +0200 Subject: [PATCH 081/106] Revert "Fix ORCA invalid processing of nested SubLinks under aggregates. (#588)" to be replaced with upstream 720464e This reverts commit 1c2c49c78e9de67ec2abe5ffe12759a4ec550f23. --- .../gpopt/translate/CQueryMutators.cpp | 30 +++++++--------- .../regress/expected/aggregates_optimizer.out | 2 ++ src/test/regress/expected/subselect.out | 30 ---------------- .../regress/expected/subselect_optimizer.out | 36 ------------------- src/test/regress/sql/subselect.sql | 8 ----- 5 files changed, 15 insertions(+), 91 deletions(-) diff --git a/src/backend/gpopt/translate/CQueryMutators.cpp b/src/backend/gpopt/translate/CQueryMutators.cpp index 37cd1dc34411..caf60da8e7ed 100644 --- a/src/backend/gpopt/translate/CQueryMutators.cpp +++ b/src/backend/gpopt/translate/CQueryMutators.cpp @@ -687,26 +687,22 @@ CQueryMutators::RunExtractAggregatesMutator(Node *node, // Handle other top-level outer references in the project element. if (var->varlevelsup == context->m_current_query_level) { - if (var->varlevelsup >= context->m_agg_levels_up) + if (var->varlevelsup == context->m_agg_levels_up) { - // We previously started to mutate the Aggref, that references - // the top level query. This Aggref is going to be moved to the - // derived query (see comments in Aggref if-case above). - // Therefore, if we are mutating Vars inside the Aggref, and - // these Vars reference the top level query (varlevelsup = m_current_query_level) - // as well, we must change their varlevelsup field in order to preserve - // correct reference level. i.e these Vars are pulled up as the part of - // the Aggref by the m_agg_levels_up. + // If Var references the top level query inside an Aggref that also + // references top level query, the Aggref is moved to the derived query + // (see comments in Aggref if-case above). Thus, these Var references + // are brought up to the top-query level. // e.g: - // select (select max((select foo.a))) from foo; + // explain select (select sum(foo.a) from jazz) from foo group by a, b; // is transformed into - // select (select fnew.max_t) - // from (select max((select foo.a)) max_t from foo) fnew; - // Here the foo.a inside max referenced top level RTE foo at - // varlevelsup = 2 inside the Aggref at agglevelsup 1. Then the - // Aggref is brought up to the top-query-level of fnew and foo.a - // inside Aggref is decreased by original Aggref's level. - var->varlevelsup -= context->m_agg_levels_up; + // select (select fnew.sum_t from jazz) + // from (select foo.a, foo.b, sum(foo.a) sum_t + // from foo group by foo.a, foo.b) fnew; + // + // Note the foo.a var which is in sum() in a subquery must now become a + // var referencing the current query level. + var->varlevelsup = 0; return (Node *) var; } diff --git a/src/test/regress/expected/aggregates_optimizer.out b/src/test/regress/expected/aggregates_optimizer.out index 3935f1874b92..20fcffbc19a5 100644 --- a/src/test/regress/expected/aggregates_optimizer.out +++ b/src/test/regress/expected/aggregates_optimizer.out @@ -373,6 +373,8 @@ LINE 4: where sum(distinct a.four + b.four) = b.four)... select (select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1))) from tenk1 o; +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: Query-to-DXL Translation: No variable entry found due to incorrect normalization of query max ------ 9999 diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index a375efe83766..156db314a435 100755 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1037,34 +1037,4 @@ group by j, q1; 2 | 1 | 2 (1 row) --- Ensure that both planners produce valid plans for the query with the nested --- SubLink, and this SubLink is under the aggregation. For ORCA the fallback --- shouldn't occur. -explain (verbose, costs off) -select (select max((select t.i))) from t; - QUERY PLAN ------------------------------------------------- - Aggregate - Output: (SubPlan 2) - -> Gather Motion 3:1 (slice1; segments: 3) - Output: (max((SubPlan 1))) - -> Aggregate - Output: max((SubPlan 1)) - -> Seq Scan on public.t - Output: t.i - SubPlan 1 (slice1; segments: 1) - -> Result - Output: t.i - SubPlan 2 (slice0) - -> Result - Output: max((max((SubPlan 1)))) - Optimizer: Postgres query optimizer -(15 rows) - -select (select max((select t.i))) from t; - max ------ - 1 -(1 row) - drop table t; diff --git a/src/test/regress/expected/subselect_optimizer.out b/src/test/regress/expected/subselect_optimizer.out index c869799d6f41..cd6014e0cfea 100644 --- a/src/test/regress/expected/subselect_optimizer.out +++ b/src/test/regress/expected/subselect_optimizer.out @@ -1101,40 +1101,4 @@ group by j, q1; 2 | 1 | 2 (1 row) --- Ensure that both planners produce valid plans for the query with the nested --- SubLink, and this SubLink is under the aggregation. For ORCA the fallback --- shouldn't occur. -explain (verbose, costs off) -select (select max((select t.i))) from t; - QUERY PLAN ------------------------------------------------------- - Result - Output: (SubPlan 2) - -> Aggregate - Output: max((max((SubPlan 1)))) - -> Gather Motion 3:1 (slice1; segments: 3) - Output: (max((SubPlan 1))) - -> Aggregate - Output: max((SubPlan 1)) - -> Seq Scan on public.t - Output: i - SubPlan 1 (slice1; segments: 3) - -> Result - Output: t.i - -> Result - Output: true - SubPlan 2 (slice0) - -> Result - Output: (max((max((SubPlan 1))))) - -> Result - Output: true - Optimizer: Pivotal Optimizer (GPORCA) -(21 rows) - -select (select max((select t.i))) from t; - max ------ - 1 -(1 row) - drop table t; diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 45da8e2f6fd6..b5905caea92f 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -553,12 +553,4 @@ select j, 1 as c, from t group by j, q1; --- Ensure that both planners produce valid plans for the query with the nested --- SubLink, and this SubLink is under the aggregation. For ORCA the fallback --- shouldn't occur. -explain (verbose, costs off) -select (select max((select t.i))) from t; - -select (select max((select t.i))) from t; - drop table t; From 9849afce9ebbf826dfcc7d83cda19a39e0ea48eb Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Fri, 24 Nov 2023 14:33:50 +0200 Subject: [PATCH 082/106] Revert "Fix ORCA invalid processing of nested SubLinks referenced in GROUP BY clause" to be replaced with upstream 720464e This reverts commit bef7c8d9a3b393e98e9f213845cad71e597e55b2. --- .../gpopt/translate/CQueryMutators.cpp | 11 ++--- src/test/regress/expected/subselect.out | 39 --------------- .../regress/expected/subselect_optimizer.out | 47 ------------------- src/test/regress/sql/subselect.sql | 15 ------ 4 files changed, 5 insertions(+), 107 deletions(-) diff --git a/src/backend/gpopt/translate/CQueryMutators.cpp b/src/backend/gpopt/translate/CQueryMutators.cpp index caf60da8e7ed..8c12567f8442 100644 --- a/src/backend/gpopt/translate/CQueryMutators.cpp +++ b/src/backend/gpopt/translate/CQueryMutators.cpp @@ -436,12 +436,11 @@ CQueryMutators::RunGroupingColMutator(Node *node, GPOS_ASSERT(IsA(old_sublink->subselect, Query)); - // One need to call the Query mutator for subselect and take into - // account that SubLink can be multi-level. Therefore, the - // context->m_current_query_level must be modified properly - // while diving into such nested SubLink. - new_sublink->subselect = - RunGroupingColMutator(old_sublink->subselect, context); + new_sublink->subselect = gpdb::MutateQueryOrExpressionTree( + old_sublink->subselect, + (MutatorWalkerFn) CQueryMutators::RunGroupingColMutator, context, + 0 // flags -- mutate into cte-lists + ); context->m_current_query_level--; diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 156db314a435..f59fde3dc423 100755 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -998,43 +998,4 @@ group by i, j; 2 | 2 (1 row) --- Ensure that both planners produce valid plans for the query with the nested --- SubLink when this SubLink is inside the GROUP BY clause. Attribute, which is --- not grouping column, is added to query targetList to make ORCA perform query --- normalization. For ORCA the fallback shouldn't occur. -explain (verbose, costs off) -select j, 1 as c, -(select j from (select j) q2) q1 -from t -group by j, q1; - QUERY PLAN ------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) - Output: t.j, 1, ((SubPlan 1)) - -> HashAggregate - Output: t.j, 1, ((SubPlan 1)) - Group Key: t.j, ((SubPlan 1)) - -> Redistribute Motion 3:3 (slice1; segments: 3) - Output: t.j, ((SubPlan 1)) - Hash Key: t.j, ((SubPlan 1)) - -> HashAggregate - Output: t.j, ((SubPlan 1)) - Group Key: t.j, (SubPlan 1) - -> Seq Scan on public.t - Output: t.j, (SubPlan 1) - SubPlan 1 (slice1; segments: 1) - -> Result - Output: t.j - Optimizer: Postgres query optimizer -(17 rows) - -select j, 1 as c, -(select j from (select j) q2) q1 -from t -group by j, q1; - j | c | q1 ----+---+---- - 2 | 1 | 2 -(1 row) - drop table t; diff --git a/src/test/regress/expected/subselect_optimizer.out b/src/test/regress/expected/subselect_optimizer.out index cd6014e0cfea..3ff680c6d3fa 100644 --- a/src/test/regress/expected/subselect_optimizer.out +++ b/src/test/regress/expected/subselect_optimizer.out @@ -1054,51 +1054,4 @@ group by i, j; 2 | 2 (1 row) --- Ensure that both planners produce valid plans for the query with the nested --- SubLink when this SubLink is inside the GROUP BY clause. Attribute, which is --- not grouping column, is added to query targetList to make ORCA perform query --- normalization. For ORCA the fallback shouldn't occur. -explain (verbose, costs off) -select j, 1 as c, -(select j from (select j) q2) q1 -from t -group by j, q1; - QUERY PLAN ------------------------------------------------------------------------- - Result - Output: j, 1, ((SubPlan 1)) - -> Gather Motion 3:1 (slice2; segments: 3) - Output: j, ((SubPlan 1)) - -> GroupAggregate - Output: j, ((SubPlan 1)) - Group Key: t.j, ((SubPlan 1)) - -> Sort - Output: j, ((SubPlan 1)) - Sort Key: t.j, ((SubPlan 1)) - -> Redistribute Motion 3:3 (slice1; segments: 3) - Output: j, ((SubPlan 1)) - Hash Key: j, ((SubPlan 1)) - -> Result - Output: j, ((SubPlan 1)) - -> Result - Output: (SubPlan 1), j - -> Seq Scan on public.t - Output: j - SubPlan 1 (slice1; segments: 3) - -> Result - Output: t.j - -> Result - Output: true - Optimizer: Pivotal Optimizer (GPORCA) -(25 rows) - -select j, 1 as c, -(select j from (select j) q2) q1 -from t -group by j, q1; - j | c | q1 ----+---+---- - 2 | 1 | 2 -(1 row) - drop table t; diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index b5905caea92f..d03556538576 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -538,19 +538,4 @@ select j, from t group by i, j; --- Ensure that both planners produce valid plans for the query with the nested --- SubLink when this SubLink is inside the GROUP BY clause. Attribute, which is --- not grouping column, is added to query targetList to make ORCA perform query --- normalization. For ORCA the fallback shouldn't occur. -explain (verbose, costs off) -select j, 1 as c, -(select j from (select j) q2) q1 -from t -group by j, q1; - -select j, 1 as c, -(select j from (select j) q2) q1 -from t -group by j, q1; - drop table t; From 0e44cc7bb3596670239bd0262aa8a18fc3e0a19f Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Fri, 24 Nov 2023 14:34:21 +0200 Subject: [PATCH 083/106] Revert "Fix ORCA invalid processing of nested SubLinks with GROUP BY attributes" to be replaced with upstream commit 720464e This reverts commit ca89de312ed32d602ebec69d3f84196ddf437a77. --- .../gpopt/translate/CQueryMutators.cpp | 36 ++------------- .../regress/expected/bfv_olap_optimizer.out | 2 + src/test/regress/expected/subselect.out | 39 ---------------- .../regress/expected/subselect_optimizer.out | 46 ------------------- src/test/regress/sql/subselect.sql | 22 --------- 5 files changed, 7 insertions(+), 138 deletions(-) diff --git a/src/backend/gpopt/translate/CQueryMutators.cpp b/src/backend/gpopt/translate/CQueryMutators.cpp index 8c12567f8442..665c3bfc78df 100644 --- a/src/backend/gpopt/translate/CQueryMutators.cpp +++ b/src/backend/gpopt/translate/CQueryMutators.cpp @@ -798,43 +798,17 @@ CQueryMutators::RunExtractAggregatesMutator(Node *node, GPOS_ASSERT(IsA(old_sublink->subselect, Query)); - // One need to call the Query mutator for subselect and take into - // account that SubLink can be multi-level. Therefore, the - // context->m_current_query_level must be modified properly - // while diving into such nested SubLink. - new_sublink->subselect = - RunExtractAggregatesMutator(old_sublink->subselect, context); + new_sublink->subselect = gpdb::MutateQueryOrExpressionTree( + old_sublink->subselect, + (MutatorWalkerFn) RunExtractAggregatesMutator, (void *) context, + 0 // mutate into cte-lists + ); context->m_current_query_level--; return (Node *) new_sublink; } - if (IsA(node, Query)) - { - Query *query = gpdb::MutateQueryTree( - (Query *) node, (MutatorWalkerFn) RunExtractAggregatesMutator, - context, QTW_IGNORE_RT_SUBQUERIES); - - ListCell *lc; - ForEach(lc, query->rtable) - { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); - - if (RTE_SUBQUERY == rte->rtekind) - { - Query *subquery = rte->subquery; - context->m_current_query_level++; - rte->subquery = (Query *) RunExtractAggregatesMutator( - (Node *) subquery, context); - context->m_current_query_level--; - gpdb::GPDBFree(subquery); - } - } - - return (Node *) query; - } - return gpdb::MutateExpressionTree( node, (MutatorWalkerFn) RunExtractAggregatesMutator, context); } diff --git a/src/test/regress/expected/bfv_olap_optimizer.out b/src/test/regress/expected/bfv_olap_optimizer.out index bf4134c10e26..1a55c4fe8b4f 100644 --- a/src/test/regress/expected/bfv_olap_optimizer.out +++ b/src/test/regress/expected/bfv_olap_optimizer.out @@ -647,6 +647,8 @@ select (select rn from (select row_number() over () as rn, name ,sum(sum(a.salary)) over() from t2_github_issue_10143 a group by a.code; +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: Query-to-DXL Translation: No variable entry found due to incorrect normalization of query dongnm | sum --------+------ | 2100 diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index f59fde3dc423..295aad6d500b 100755 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -960,42 +960,3 @@ fetch backward all in c1; commit; --end_ignore --- Ensure that both planners produce valid plans for the query with the nested --- SubLink, which contains attributes referenced in query's GROUP BY clause. --- The inner part of SubPlan should contain only t.j. --- start_ignore -drop table if exists t; -NOTICE: table "t" does not exist, skipping --- end_ignore -create table t (i int, j int) distributed by (i); -insert into t values (1, 2); -explain (verbose, costs off) -select j, -(select j from (select j) q2) -from t -group by i, j; - QUERY PLAN ------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Output: t.j, ((SubPlan 1)), t.i - -> HashAggregate - Output: t.j, (SubPlan 1), t.i - Group Key: t.i, t.j - -> Seq Scan on public.t - Output: t.j, t.i - SubPlan 1 (slice1; segments: 1) - -> Result - Output: t.j - Optimizer: Postgres query optimizer -(11 rows) - -select j, -(select j from (select j) q2) -from t -group by i, j; - j | j ----+--- - 2 | 2 -(1 row) - -drop table t; diff --git a/src/test/regress/expected/subselect_optimizer.out b/src/test/regress/expected/subselect_optimizer.out index 3ff680c6d3fa..ce71ba62a90e 100644 --- a/src/test/regress/expected/subselect_optimizer.out +++ b/src/test/regress/expected/subselect_optimizer.out @@ -1009,49 +1009,3 @@ fetch backward all in c1; ERROR: backward scan is not supported in this version of Greenplum Database commit; --end_ignore --- Ensure that both planners produce valid plans for the query with the nested --- SubLink, which contains attributes referenced in query's GROUP BY clause. --- The inner part of SubPlan should contain only t.j. --- start_ignore -drop table if exists t; -NOTICE: table "t" does not exist, skipping --- end_ignore -create table t (i int, j int) distributed by (i); -insert into t values (1, 2); -explain (verbose, costs off) -select j, -(select j from (select j) q2) -from t -group by i, j; - QUERY PLAN ----------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Output: j, ((SubPlan 1)) - -> Result - Output: j, (SubPlan 1) - -> GroupAggregate - Output: j, i - Group Key: t.i, t.j - -> Sort - Output: i, j - Sort Key: t.i, t.j - -> Seq Scan on public.t - Output: i, j - SubPlan 1 (slice1; segments: 3) - -> Result - Output: t.j - -> Result - Output: true - Optimizer: Pivotal Optimizer (GPORCA) -(18 rows) - -select j, -(select j from (select j) q2) -from t -group by i, j; - j | j ----+--- - 2 | 2 -(1 row) - -drop table t; diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index d03556538576..0d84d0316bc5 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -517,25 +517,3 @@ fetch backward all in c1; commit; --end_ignore - --- Ensure that both planners produce valid plans for the query with the nested --- SubLink, which contains attributes referenced in query's GROUP BY clause. --- The inner part of SubPlan should contain only t.j. --- start_ignore -drop table if exists t; --- end_ignore -create table t (i int, j int) distributed by (i); -insert into t values (1, 2); - -explain (verbose, costs off) -select j, -(select j from (select j) q2) -from t -group by i, j; - -select j, -(select j from (select j) q2) -from t -group by i, j; - -drop table t; From 4624af071d8f8556eeacc4fdc18fd04d4c839a69 Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Fri, 24 Nov 2023 14:35:18 +0200 Subject: [PATCH 084/106] Revert "Fix plans for queries to replicated table with volatile function (#383)" to resolve conflicts with c381149 This reverts commit cc35273a7a3d2e5c7c69bde151560601aa76a857. --- src/backend/cdb/cdbllize.c | 8 - src/backend/cdb/cdbmutate.c | 8 +- src/backend/optimizer/plan/createplan.c | 13 +- src/backend/optimizer/plan/subselect.c | 12 +- src/test/regress/expected/rpt.out | 216 +------------------- src/test/regress/expected/rpt_optimizer.out | 216 +------------------- src/test/regress/sql/rpt.sql | 35 ---- 7 files changed, 10 insertions(+), 498 deletions(-) diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index 73be0f430eb2..9212152fe21e 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -582,14 +582,6 @@ ParallelizeCorrelatedSubPlanMutator(Node *node, ParallelizeCorrelatedPlanWalkerC if (ctx->movement == MOVEMENT_BROADCAST) { Assert (NULL != ctx->currentPlanFlow); - - if (scanPlan->flow->locustype == CdbLocusType_SegmentGeneral && - contain_volatile_functions((Node *) scanPlan->qual)) - { - scanPlan->flow->locustype = CdbLocusType_SingleQE; - scanPlan->flow->flotype = FLOW_SINGLETON; - } - broadcastPlan(scanPlan, false /* stable */ , false /* rescannable */, ctx->currentPlanFlow->numsegments /* numsegments */); } diff --git a/src/backend/cdb/cdbmutate.c b/src/backend/cdb/cdbmutate.c index fedee52c7f00..bb679da3517b 100644 --- a/src/backend/cdb/cdbmutate.c +++ b/src/backend/cdb/cdbmutate.c @@ -849,13 +849,7 @@ apply_motion_mutator(Node *node, ApplyMotionState *context) if (IsA(newnode, Motion) &&flow->req_move != MOVEMENT_NONE) { plan = ((Motion *) newnode)->plan.lefttree; - - /* We'll recreate this motion later below. But we should save motion - * request to create appropriate motion above the child node. - * Original flow for the child node will be restored - * after motion creation. */ - flow->flow_before_req_move = plan->flow; - plan->flow = flow; + flow = plan->flow; newnode = (Node *) plan; } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index fe03c8df7ce9..923046462c1e 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -6588,15 +6588,12 @@ adjust_modifytable_flow(PlannerInfo *root, ModifyTable *node, List *is_split_upd * Obviously, tmp_tab in new segments can't get data if we don't * add a broadcast here. */ - if (subplan->flow->flotype == FLOW_SINGLETON && - subplan->flow->locustype == CdbLocusType_SegmentGeneral) + if (optimizer_replicated_table_insert && + subplan->flow->flotype == FLOW_SINGLETON && + subplan->flow->locustype == CdbLocusType_SegmentGeneral && + !contain_volatile_functions((Node *)subplan->targetlist)) { - if (contain_volatile_functions((Node *)subplan->targetlist)) - { - subplan->flow->locustype = CdbLocusType_SingleQE; - } - else if (optimizer_replicated_table_insert && - subplan->flow->numsegments >= targetPolicy->numsegments) + if (subplan->flow->numsegments >= targetPolicy->numsegments) { /* * A query to reach here: diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index ceb711d5e93e..15a585695a3b 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -41,11 +41,9 @@ #include "utils/syscache.h" #include "cdb/cdbmutate.h" -#include "cdb/cdbsetop.h" #include "cdb/cdbsubselect.h" #include "cdb/cdbvars.h" - typedef struct convert_testexpr_context { PlannerInfo *root; @@ -669,7 +667,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType, &subroot, config); - if (plan->flow->locustype == CdbLocusType_General && + if ((plan->flow->locustype == CdbLocusType_SegmentGeneral || + plan->flow->locustype == CdbLocusType_General) && (contain_volatile_functions((Node *) plan->targetlist) || contain_volatile_functions(subquery->havingQual))) { @@ -677,13 +676,6 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType, plan->flow->flotype = FLOW_SINGLETON; } - if (plan->flow->locustype == CdbLocusType_SegmentGeneral && - (contain_volatile_functions((Node *) plan->targetlist) || - contain_volatile_functions(subquery->havingQual))) - { - plan = (Plan *) make_motion_gather(subroot, plan, NIL, CdbLocusType_SingleQE); - } - /* Isolate the params needed by this specific subplan */ plan_params = root->plan_params; root->plan_params = NIL; diff --git a/src/test/regress/expected/rpt.out b/src/test/regress/expected/rpt.out index b0a595e468df..4785e821fae1 100644 --- a/src/test/regress/expected/rpt.out +++ b/src/test/regress/expected/rpt.out @@ -875,7 +875,7 @@ explain (costs off, verbose) select * from t_hashdist left join t_replicate_vola Output: t_replicate_volatile.a, t_replicate_volatile.b, t_replicate_volatile.c SubPlan 1 (slice2; segments: 3) -> Materialize - Output: (random()) + Output: random() -> Broadcast Motion 1:3 (slice1; segments: 1) Output: (random()) -> Seq Scan on rpt.t_replicate_volatile t_replicate_volatile_1 @@ -985,51 +985,6 @@ explain (costs off) select a from t_replicate_volatile union all select * from n Optimizer: Postgres query optimizer (6 rows) --- insert into table with serial column -create table t_replicate_dst(id serial, i integer) distributed replicated; -create table t_replicate_src(i integer) distributed replicated; -insert into t_replicate_src select i from generate_series(1, 5) i; -explain (costs off, verbose) insert into t_replicate_dst (i) select i from t_replicate_src; - QUERY PLAN ---------------------------------------------------------------------------------------------- - Insert on rpt.t_replicate_dst - -> Broadcast Motion 1:3 (slice1; segments: 1) - Output: ((nextval('t_replicate_dst_id_seq'::regclass))::integer), t_replicate_src.i - -> Seq Scan on rpt.t_replicate_src - Output: nextval('t_replicate_dst_id_seq'::regclass), t_replicate_src.i - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off, optimizer=off -(7 rows) - -explain (costs off, verbose) with s as (select i from t_replicate_src group by i having random() > 0) insert into t_replicate_dst (i) select i from s; - QUERY PLAN ----------------------------------------------------------------------------------------- - Insert on rpt.t_replicate_dst - -> Broadcast Motion 1:3 (slice1; segments: 1) - Output: ((nextval('t_replicate_dst_id_seq'::regclass))::integer), "*SELECT*".i - -> Subquery Scan on "*SELECT*" - Output: nextval('t_replicate_dst_id_seq'::regclass), "*SELECT*".i - -> HashAggregate - Output: t_replicate_src.i - Group Key: t_replicate_src.i - Filter: (random() > '0'::double precision) - -> Seq Scan on rpt.t_replicate_src - Output: t_replicate_src.i - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off, optimizer=off -(13 rows) - -insert into t_replicate_dst (i) select i from t_replicate_src; -select distinct id from gp_dist_random('t_replicate_dst') order by id; - id ----- - 1 - 2 - 3 - 4 - 5 -(5 rows) - -- update & delete explain (costs off) update t_replicate_volatile set a = 1 where b > random(); ERROR: could not devise a plan (cdbpath.c:2074) @@ -1350,173 +1305,6 @@ select c from rep_tab where c in (select distinct d from rand_tab); 2 (2 rows) --- --- Check sub-selects with distributed replicated tables and volatile functions --- -create table t (i int) distributed replicated; -create table t1 (a int) distributed by (a); -create table t2 (a int, b float) distributed replicated; -create or replace function f(i int) returns int language sql security definer as $$ select i; $$; --- ensure we make gather motion when volatile functions in subplan -explain (costs off, verbose) select (select f(i) from t); - QUERY PLAN ------------------------------------------------------ - Result - Output: $0 - InitPlan 1 (returns $0) (slice2) - -> Gather Motion 1:1 (slice1; segments: 1) - Output: (f(i)) - -> Seq Scan on rpt.t - Output: f(i) - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(9 rows) - -explain (costs off, verbose) select (select f(i) from t group by f(i)); - QUERY PLAN ------------------------------------------------------ - Result - Output: $0 - InitPlan 1 (returns $0) (slice2) - -> Gather Motion 1:1 (slice1; segments: 1) - Output: (f(i)) - -> HashAggregate - Output: (f(i)) - Group Key: f(t.i) - -> Seq Scan on rpt.t - Output: i, f(i) - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(12 rows) - -explain (costs off, verbose) select (select i from t group by i having f(i) > 0); - QUERY PLAN ------------------------------------------------------ - Result - Output: $0 - InitPlan 1 (returns $0) (slice2) - -> Gather Motion 1:1 (slice1; segments: 1) - Output: i - -> HashAggregate - Output: i - Group Key: t.i - Filter: (f(t.i) > 0) - -> Seq Scan on rpt.t - Output: i - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(13 rows) - --- ensure we do not make broadcast motion -explain (costs off, verbose) select * from t1 where a in (select random() from t where i=a group by i); - QUERY PLAN --------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Output: t1.a - -> Seq Scan on rpt.t1 - Output: t1.a - Filter: (SubPlan 1) - SubPlan 1 (slice1; segments: 1) - -> Result - Output: random(), t.i - Filter: (t.i = t1.a) - -> Materialize - Output: t.i, t.i - -> Seq Scan on rpt.t - Output: t.i, t.i - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(15 rows) - -explain (costs off, verbose) select * from t1 where a in (select random() from t where i=a); - QUERY PLAN ------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) - Output: t1.a - -> Seq Scan on rpt.t1 - Output: t1.a - Filter: (SubPlan 1) - SubPlan 1 (slice1; segments: 1) - -> Result - Output: random() - Filter: (t.i = t1.a) - -> Materialize - Output: t.i - -> Seq Scan on rpt.t - Output: t.i - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(15 rows) - --- ensure we make broadcast motion when volatile function in deleting motion flow -explain (costs off, verbose) insert into t2 (a, b) select i, random() from t; - QUERY PLAN ------------------------------------------------------ - Insert on rpt.t2 - -> Broadcast Motion 1:3 (slice1; segments: 1) - Output: t.i, (random()) - -> Seq Scan on rpt.t - Output: t.i, random() - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(7 rows) - --- ensure we make broadcast motion when volatile function in correlated subplan qual -explain (costs off, verbose) select * from t1 where a in (select f(i) from t where i=a and f(i) > 0); - QUERY PLAN ------------------------------------------------------------------------------ - Gather Motion 3:1 (slice2; segments: 3) - Output: t1.a - -> Seq Scan on rpt.t1 - Output: t1.a - Filter: (SubPlan 1) - SubPlan 1 (slice2; segments: 3) - -> Result - Output: f(t.i) - -> Result - Output: t.i - Filter: (t.i = t1.a) - -> Materialize - Output: t.i, t.i - -> Broadcast Motion 1:3 (slice1; segments: 1) - Output: t.i, t.i - -> Seq Scan on rpt.t - Output: t.i, t.i - Filter: (f(t.i) > 0) - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(20 rows) - --- ensure we do not break broadcast motion -explain (costs off, verbose) select * from t1 where 1 <= ALL (select i from t group by i having random() > 0); - QUERY PLAN ------------------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) - Output: t1.a - -> Result - Output: t1.a - One-Time Filter: (SubPlan 1) - -> Seq Scan on rpt.t1 - Output: t1.a - SubPlan 1 (slice2; segments: 3) - -> Materialize - Output: t.i - -> Broadcast Motion 1:3 (slice1; segments: 1) - Output: t.i - -> HashAggregate - Output: t.i - Group Key: t.i - Filter: (random() > '0'::double precision) - -> Seq Scan on rpt.t - Output: t.i - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(20 rows) - -drop table if exists t; -drop table if exists t1; -drop table if exists t2; -drop function if exists f(i int); -- start_ignore drop schema rpt cascade; NOTICE: drop cascades to 13 other objects @@ -1530,8 +1318,6 @@ drop cascades to table minmaxtest drop cascades to table t_hashdist drop cascades to table t_replicate_volatile drop cascades to sequence seq_for_insert_replicated_table -drop cascades to table t_replicate_dst -drop cascades to table t_replicate_src drop cascades to table rtbl drop cascades to table t1_13532 drop cascades to table t2_13532 diff --git a/src/test/regress/expected/rpt_optimizer.out b/src/test/regress/expected/rpt_optimizer.out index 490a94274753..884b3d2d9c72 100644 --- a/src/test/regress/expected/rpt_optimizer.out +++ b/src/test/regress/expected/rpt_optimizer.out @@ -866,7 +866,7 @@ explain (costs off, verbose) select * from t_hashdist left join t_replicate_vola Output: t_replicate_volatile.a, t_replicate_volatile.b, t_replicate_volatile.c SubPlan 1 (slice2; segments: 3) -> Materialize - Output: (random()) + Output: random() -> Broadcast Motion 1:3 (slice1; segments: 1) Output: (random()) -> Seq Scan on rpt.t_replicate_volatile t_replicate_volatile_1 @@ -976,51 +976,6 @@ explain (costs off) select a from t_replicate_volatile union all select * from n Optimizer: Postgres query optimizer (6 rows) --- insert into table with serial column -create table t_replicate_dst(id serial, i integer) distributed replicated; -create table t_replicate_src(i integer) distributed replicated; -insert into t_replicate_src select i from generate_series(1, 5) i; -explain (costs off, verbose) insert into t_replicate_dst (i) select i from t_replicate_src; - QUERY PLAN ---------------------------------------------------------------------------------------------- - Insert on rpt.t_replicate_dst - -> Broadcast Motion 1:3 (slice1; segments: 1) - Output: ((nextval('t_replicate_dst_id_seq'::regclass))::integer), t_replicate_src.i - -> Seq Scan on rpt.t_replicate_src - Output: nextval('t_replicate_dst_id_seq'::regclass), t_replicate_src.i - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off, optimizer=off -(7 rows) - -explain (costs off, verbose) with s as (select i from t_replicate_src group by i having random() > 0) insert into t_replicate_dst (i) select i from s; - QUERY PLAN ----------------------------------------------------------------------------------------- - Insert on rpt.t_replicate_dst - -> Broadcast Motion 1:3 (slice1; segments: 1) - Output: ((nextval('t_replicate_dst_id_seq'::regclass))::integer), "*SELECT*".i - -> Subquery Scan on "*SELECT*" - Output: nextval('t_replicate_dst_id_seq'::regclass), "*SELECT*".i - -> HashAggregate - Output: t_replicate_src.i - Group Key: t_replicate_src.i - Filter: (random() > '0'::double precision) - -> Seq Scan on rpt.t_replicate_src - Output: t_replicate_src.i - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off, optimizer=off -(13 rows) - -insert into t_replicate_dst (i) select i from t_replicate_src; -select distinct id from gp_dist_random('t_replicate_dst') order by id; - id ----- - 1 - 2 - 3 - 4 - 5 -(5 rows) - -- update & delete explain (costs off) update t_replicate_volatile set a = 1 where b > random(); ERROR: could not devise a plan (cdbpath.c:2089) @@ -1350,173 +1305,6 @@ select c from rep_tab where c in (select distinct d from rand_tab); 2 (2 rows) --- --- Check sub-selects with distributed replicated tables and volatile functions --- -create table t (i int) distributed replicated; -create table t1 (a int) distributed by (a); -create table t2 (a int, b float) distributed replicated; -create or replace function f(i int) returns int language sql security definer as $$ select i; $$; --- ensure we make gather motion when volatile functions in subplan -explain (costs off, verbose) select (select f(i) from t); - QUERY PLAN ------------------------------------------------------ - Result - Output: $0 - InitPlan 1 (returns $0) (slice2) - -> Gather Motion 1:1 (slice1; segments: 1) - Output: (f(i)) - -> Seq Scan on rpt.t - Output: f(i) - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(9 rows) - -explain (costs off, verbose) select (select f(i) from t group by f(i)); - QUERY PLAN ------------------------------------------------------ - Result - Output: $0 - InitPlan 1 (returns $0) (slice2) - -> Gather Motion 1:1 (slice1; segments: 1) - Output: (f(i)) - -> HashAggregate - Output: (f(i)) - Group Key: f(t.i) - -> Seq Scan on rpt.t - Output: i, f(i) - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(12 rows) - -explain (costs off, verbose) select (select i from t group by i having f(i) > 0); - QUERY PLAN ------------------------------------------------------ - Result - Output: $0 - InitPlan 1 (returns $0) (slice2) - -> Gather Motion 1:1 (slice1; segments: 1) - Output: i - -> HashAggregate - Output: i - Group Key: t.i - Filter: (f(t.i) > 0) - -> Seq Scan on rpt.t - Output: i - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(13 rows) - --- ensure we do not make broadcast motion -explain (costs off, verbose) select * from t1 where a in (select random() from t where i=a group by i); - QUERY PLAN --------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Output: t1.a - -> Seq Scan on rpt.t1 - Output: t1.a - Filter: (SubPlan 1) - SubPlan 1 (slice1; segments: 1) - -> Result - Output: random(), t.i - Filter: (t.i = t1.a) - -> Materialize - Output: t.i, t.i - -> Seq Scan on rpt.t - Output: t.i, t.i - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(15 rows) - -explain (costs off, verbose) select * from t1 where a in (select random() from t where i=a); - QUERY PLAN ------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) - Output: t1.a - -> Seq Scan on rpt.t1 - Output: t1.a - Filter: (SubPlan 1) - SubPlan 1 (slice1; segments: 1) - -> Result - Output: random() - Filter: (t.i = t1.a) - -> Materialize - Output: t.i - -> Seq Scan on rpt.t - Output: t.i - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(15 rows) - --- ensure we make broadcast motion when volatile function in deleting motion flow -explain (costs off, verbose) insert into t2 (a, b) select i, random() from t; - QUERY PLAN ------------------------------------------------------ - Insert on rpt.t2 - -> Broadcast Motion 1:3 (slice1; segments: 1) - Output: t.i, (random()) - -> Seq Scan on rpt.t - Output: t.i, random() - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(7 rows) - --- ensure we make broadcast motion when volatile function in correlated subplan qual -explain (costs off, verbose) select * from t1 where a in (select f(i) from t where i=a and f(i) > 0); - QUERY PLAN ------------------------------------------------------------------------------ - Gather Motion 3:1 (slice2; segments: 3) - Output: t1.a - -> Seq Scan on rpt.t1 - Output: t1.a - Filter: (SubPlan 1) - SubPlan 1 (slice2; segments: 3) - -> Result - Output: f(t.i) - -> Result - Output: t.i - Filter: (t.i = t1.a) - -> Materialize - Output: t.i, t.i - -> Broadcast Motion 1:3 (slice1; segments: 1) - Output: t.i, t.i - -> Seq Scan on rpt.t - Output: t.i, t.i - Filter: (f(t.i) > 0) - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(20 rows) - --- ensure we do not break broadcast motion -explain (costs off, verbose) select * from t1 where 1 <= ALL (select i from t group by i having random() > 0); - QUERY PLAN ------------------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) - Output: t1.a - -> Result - Output: t1.a - One-Time Filter: (SubPlan 1) - -> Seq Scan on rpt.t1 - Output: t1.a - SubPlan 1 (slice2; segments: 3) - -> Materialize - Output: t.i - -> Broadcast Motion 1:3 (slice1; segments: 1) - Output: t.i - -> HashAggregate - Output: t.i - Group Key: t.i - Filter: (random() > '0'::double precision) - -> Seq Scan on rpt.t - Output: t.i - Optimizer: Postgres query optimizer - Settings: enable_bitmapscan=off, enable_seqscan=off -(20 rows) - -drop table if exists t; -drop table if exists t1; -drop table if exists t2; -drop function if exists f(i int); -- start_ignore drop schema rpt cascade; NOTICE: drop cascades to 13 other objects @@ -1530,8 +1318,6 @@ drop cascades to table minmaxtest drop cascades to table t_hashdist drop cascades to table t_replicate_volatile drop cascades to sequence seq_for_insert_replicated_table -drop cascades to table t_replicate_dst -drop cascades to table t_replicate_src drop cascades to table rtbl drop cascades to table t1_13532 drop cascades to table t2_13532 diff --git a/src/test/regress/sql/rpt.sql b/src/test/regress/sql/rpt.sql index 1e1b479d286c..a558dbae1128 100644 --- a/src/test/regress/sql/rpt.sql +++ b/src/test/regress/sql/rpt.sql @@ -424,16 +424,6 @@ explain (costs off) insert into t_replicate_volatile select random(), a, a from create sequence seq_for_insert_replicated_table; explain (costs off) insert into t_replicate_volatile select nextval('seq_for_insert_replicated_table'); explain (costs off) select a from t_replicate_volatile union all select * from nextval('seq_for_insert_replicated_table'); - --- insert into table with serial column -create table t_replicate_dst(id serial, i integer) distributed replicated; -create table t_replicate_src(i integer) distributed replicated; -insert into t_replicate_src select i from generate_series(1, 5) i; -explain (costs off, verbose) insert into t_replicate_dst (i) select i from t_replicate_src; -explain (costs off, verbose) with s as (select i from t_replicate_src group by i having random() > 0) insert into t_replicate_dst (i) select i from s; -insert into t_replicate_dst (i) select i from t_replicate_src; -select distinct id from gp_dist_random('t_replicate_dst') order by id; - -- update & delete explain (costs off) update t_replicate_volatile set a = 1 where b > random(); explain (costs off) update t_replicate_volatile set a = 1 from t_replicate_volatile x where x.a + random() = t_replicate_volatile.b; @@ -542,31 +532,6 @@ select c from rep_tab where c in (select distinct a from dist_tab); explain select c from rep_tab where c in (select distinct d from rand_tab); select c from rep_tab where c in (select distinct d from rand_tab); --- --- Check sub-selects with distributed replicated tables and volatile functions --- -create table t (i int) distributed replicated; -create table t1 (a int) distributed by (a); -create table t2 (a int, b float) distributed replicated; -create or replace function f(i int) returns int language sql security definer as $$ select i; $$; --- ensure we make gather motion when volatile functions in subplan -explain (costs off, verbose) select (select f(i) from t); -explain (costs off, verbose) select (select f(i) from t group by f(i)); -explain (costs off, verbose) select (select i from t group by i having f(i) > 0); --- ensure we do not make broadcast motion -explain (costs off, verbose) select * from t1 where a in (select random() from t where i=a group by i); -explain (costs off, verbose) select * from t1 where a in (select random() from t where i=a); --- ensure we make broadcast motion when volatile function in deleting motion flow -explain (costs off, verbose) insert into t2 (a, b) select i, random() from t; --- ensure we make broadcast motion when volatile function in correlated subplan qual -explain (costs off, verbose) select * from t1 where a in (select f(i) from t where i=a and f(i) > 0); --- ensure we do not break broadcast motion -explain (costs off, verbose) select * from t1 where 1 <= ALL (select i from t group by i having random() > 0); -drop table if exists t; -drop table if exists t1; -drop table if exists t2; -drop function if exists f(i int); - -- start_ignore drop schema rpt cascade; -- end_ignore From bde50c2c0226c323dd2daef44fa1ff5480bcbe9e Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Fri, 24 Nov 2023 14:35:51 +0200 Subject: [PATCH 085/106] Revert "Avoid rescan of modifying operations inside correlated Subplans." to resolve coflicts with 2189e1f This reverts commit c1645466534bedbc12ac52bca133f0ff73576636. --- src/backend/cdb/cdbllize.c | 11 +- src/test/regress/expected/subselect_gp.out | 232 ------------------ src/test/regress/expected/subselect_gp_1.out | 232 ------------------ .../expected/subselect_gp_optimizer.out | 232 ------------------ src/test/regress/sql/subselect_gp.sql | 104 -------- 5 files changed, 1 insertion(+), 810 deletions(-) diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index 9212152fe21e..7e61cc10e4ca 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -455,18 +455,9 @@ ParallelizeCorrelatedSubPlanMutator(Node *node, ParallelizeCorrelatedPlanWalkerC } } - /* - * If the ModifyTable node appears inside the correlated Subplan, it has - * to be handled the same way as various *Scan nodes. Currently such - * situation may occur only for modifying CTE cases, and, therefore, - * mutator shouldn't go under ModifyTable's plans and should broadcast or - * focus the result of modifying operation if needed. - */ if (IsA(node, SeqScan) ||IsA(node, ShareInputScan) - ||IsA(node, ExternalScan) - ||(IsA(node, SubqueryScan) && IsA(((SubqueryScan *) node)->subplan, ModifyTable)) - ||IsA(node,ModifyTable)) + ||IsA(node, ExternalScan)) { Plan *scanPlan = (Plan *) node; diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index ce9060e971e3..46a854888eb4 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -3228,235 +3228,3 @@ select * from t1; drop table t2; drop table t1; --- Test correlated SubPlans containing writable operation are --- planned and executed correctly. The result of modifying operations --- should be broadcasted (or focused) and materialized. --- start_ignore -drop table if exists t1; -NOTICE: table "t1" does not exist, skipping -drop table if exists t2; -NOTICE: table "t2" does not exist, skipping ---end_ignore -create table t1(i int) distributed by (i); -create table t2(i int) distributed by (i); -insert into t2 values (1), (2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) - Merge Key: t2.i - -> Sort - Sort Key: t2.i - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Result - Filter: (t2.i = cte.i) - -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Subquery Scan on cte - Filter: (cte.i > 0) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - Optimizer: Postgres query optimizer -(18 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 5 -(1 row) - ---start_ignore -drop table if exists t_repl; -NOTICE: table "t_repl" does not exist, skipping ---end_ignore -create table t_repl (i int) distributed replicated; -insert into t_repl values (1), (2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice3; segments: 1) - -> Sort - Sort Key: t_repl.i - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 1) - -> Result - Filter: (t_repl.i = cte.i) - -> Materialize - -> Gather Motion 3:1 (slice2; segments: 3) - -> Subquery Scan on cte - Filter: (cte.i > 0) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - Optimizer: Postgres query optimizer -(17 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 10 -(1 row) - ---start_ignore -drop table if exists t3; -NOTICE: table "t3" does not exist, skipping ---end_ignore -create table t3 (i int, j int) distributed randomly; -insert into t3 values (1, 1), (2, 2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice4; segments: 3) - Merge Key: t2.i - -> Sort - Sort Key: t2.i - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice4; segments: 3) - -> Hash Join - Hash Cond: (t1.i = t3.i) - -> Result - -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - -> Hash - -> Result - Filter: (t3.j = t2.i) - -> Materialize - -> Broadcast Motion 3:3 (slice3; segments: 3) - -> Seq Scan on t3 - Optimizer: Postgres query optimizer -(23 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 15 -(1 row) - -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice4; segments: 1) - -> Sort - Sort Key: t_repl.i - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice4; segments: 3) - -> Hash Join - Hash Cond: (t1.i = t3.i) - -> Result - -> Materialize - -> Gather Motion 3:1 (slice2; segments: 3) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - -> Hash - -> Result - Filter: (t3.j = t_repl.i) - -> Materialize - -> Gather Motion 3:1 (slice3; segments: 3) - -> Seq Scan on t3 - Optimizer: Postgres query optimizer -(22 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 20 -(1 row) - -drop table t3; -drop table t_repl; -drop table t2; -drop table t1; diff --git a/src/test/regress/expected/subselect_gp_1.out b/src/test/regress/expected/subselect_gp_1.out index 25655d29153a..79d28c66b77a 100644 --- a/src/test/regress/expected/subselect_gp_1.out +++ b/src/test/regress/expected/subselect_gp_1.out @@ -3228,235 +3228,3 @@ select * from t1; drop table t2; drop table t1; --- Test correlated SubPlans containing writable operation are --- planned and executed correctly. The result of modifying operations --- should be broadcasted (or focused) and materialized. --- start_ignore -drop table if exists t1; -NOTICE: table "t1" does not exist, skipping -drop table if exists t2; -NOTICE: table "t2" does not exist, skipping ---end_ignore -create table t1(i int) distributed by (i); -create table t2(i int) distributed by (i); -insert into t2 values (1), (2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) - Merge Key: t2.i - -> Sort - Sort Key: t2.i - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Result - Filter: (t2.i = cte.i) - -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Subquery Scan on cte - Filter: (cte.i > 0) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - Optimizer: Postgres query optimizer -(18 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 5 -(1 row) - ---start_ignore -drop table if exists t_repl; -NOTICE: table "t_repl" does not exist, skipping ---end_ignore -create table t_repl (i int) distributed replicated; -insert into t_repl values (1), (2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice3; segments: 1) - -> Sort - Sort Key: t_repl.i - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 1) - -> Result - Filter: (t_repl.i = cte.i) - -> Materialize - -> Gather Motion 3:1 (slice2; segments: 3) - -> Subquery Scan on cte - Filter: (cte.i > 0) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - Optimizer: Postgres query optimizer -(17 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 10 -(1 row) - ---start_ignore -drop table if exists t3; -NOTICE: table "t3" does not exist, skipping ---end_ignore -create table t3 (i int, j int) distributed randomly; -insert into t3 values (1, 1), (2, 2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice4; segments: 3) - Merge Key: t2.i - -> Sort - Sort Key: t2.i - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice4; segments: 3) - -> Hash Join - Hash Cond: (t1.i = t3.i) - -> Result - -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - -> Hash - -> Result - Filter: (t3.j = t2.i) - -> Materialize - -> Broadcast Motion 3:3 (slice3; segments: 3) - -> Seq Scan on t3 - Optimizer: Postgres query optimizer -(23 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 15 -(1 row) - -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice4; segments: 1) - -> Sort - Sort Key: t_repl.i - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice4; segments: 3) - -> Hash Join - Hash Cond: (t1.i = t3.i) - -> Result - -> Materialize - -> Gather Motion 3:1 (slice2; segments: 3) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - -> Hash - -> Result - Filter: (t3.j = t_repl.i) - -> Materialize - -> Gather Motion 3:1 (slice3; segments: 3) - -> Seq Scan on t3 - Optimizer: Postgres query optimizer -(22 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 20 -(1 row) - -drop table t3; -drop table t_repl; -drop table t2; -drop table t1; diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index b81ffb0be78b..2bf26f5458ae 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -3369,235 +3369,3 @@ select * from t1; drop table t2; drop table t1; --- Test correlated SubPlans containing writable operation are --- planned and executed correctly. The result of modifying operations --- should be broadcasted (or focused) and materialized. --- start_ignore -drop table if exists t1; -NOTICE: table "t1" does not exist, skipping -drop table if exists t2; -NOTICE: table "t2" does not exist, skipping ---end_ignore -create table t1(i int) distributed by (i); -create table t2(i int) distributed by (i); -insert into t2 values (1), (2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) - Merge Key: t2.i - -> Sort - Sort Key: t2.i - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Result - Filter: (t2.i = cte.i) - -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Subquery Scan on cte - Filter: (cte.i > 0) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - Optimizer: Postgres query optimizer -(18 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 5 -(1 row) - ---start_ignore -drop table if exists t_repl; -NOTICE: table "t_repl" does not exist, skipping ---end_ignore -create table t_repl (i int) distributed replicated; -insert into t_repl values (1), (2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice3; segments: 1) - -> Sort - Sort Key: t_repl.i - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 1) - -> Result - Filter: (t_repl.i = cte.i) - -> Materialize - -> Gather Motion 3:1 (slice2; segments: 3) - -> Subquery Scan on cte - Filter: (cte.i > 0) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - Optimizer: Postgres query optimizer -(17 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 10 -(1 row) - ---start_ignore -drop table if exists t3; -NOTICE: table "t3" does not exist, skipping ---end_ignore -create table t3 (i int, j int) distributed randomly; -insert into t3 values (1, 1), (2, 2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice4; segments: 3) - Merge Key: t2.i - -> Sort - Sort Key: t2.i - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice4; segments: 3) - -> Hash Join - Hash Cond: (t1.i = t3.i) - -> Result - -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - -> Hash - -> Result - Filter: (t3.j = t2.i) - -> Materialize - -> Broadcast Motion 3:3 (slice3; segments: 3) - -> Seq Scan on t3 - Optimizer: Postgres query optimizer -(23 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 15 -(1 row) - -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice4; segments: 1) - -> Sort - Sort Key: t_repl.i - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice4; segments: 3) - -> Hash Join - Hash Cond: (t1.i = t3.i) - -> Result - -> Materialize - -> Gather Motion 3:1 (slice2; segments: 3) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - -> Hash - -> Result - Filter: (t3.j = t_repl.i) - -> Materialize - -> Gather Motion 3:1 (slice3; segments: 3) - -> Seq Scan on t3 - Optimizer: Postgres query optimizer -(22 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 20 -(1 row) - -drop table t3; -drop table t_repl; -drop table t2; -drop table t1; diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql index 1e50eb45904b..da31c0f442a6 100644 --- a/src/test/regress/sql/subselect_gp.sql +++ b/src/test/regress/sql/subselect_gp.sql @@ -1279,107 +1279,3 @@ select * from t1; drop table t2; drop table t1; - --- Test correlated SubPlans containing writable operation are --- planned and executed correctly. The result of modifying operations --- should be broadcasted (or focused) and materialized. --- start_ignore -drop table if exists t1; -drop table if exists t2; ---end_ignore -create table t1(i int) distributed by (i); -create table t2(i int) distributed by (i); -insert into t2 values (1), (2); - -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) -order by i; - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) -order by i; - -select count(*) from t1; - ---start_ignore -drop table if exists t_repl; ---end_ignore -create table t_repl (i int) distributed replicated; -insert into t_repl values (1), (2); - -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) -order by i; - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) -order by i; - -select count(*) from t1; - ---start_ignore -drop table if exists t3; ---end_ignore -create table t3 (i int, j int) distributed randomly; -insert into t3 values (1, 1), (2, 2); - -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) -order by i; - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) -order by i; - -select count(*) from t1; - -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) -order by i; - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) -order by i; - -select count(*) from t1; - -drop table t3; -drop table t_repl; -drop table t2; -drop table t1; From 75a62bb06a8ae6ecf555069309b53b6c52619b2f Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Fri, 24 Nov 2023 14:44:20 +0200 Subject: [PATCH 086/106] Revert "Revert "Avoid rescan of modifying operations inside the correlated SubPlans. (#596)"" to resolve conflicts with upstream 2189e1f This reverts commit 96e6caadc5e77752f08c56eaccb47166be1c0b47. --- src/backend/cdb/cdbllize.c | 13 + src/test/regress/expected/subselect_gp.out | 280 ++++++++++++++++++ src/test/regress/expected/subselect_gp_1.out | 280 ++++++++++++++++++ .../expected/subselect_gp_optimizer.out | 280 ++++++++++++++++++ src/test/regress/sql/subselect_gp.sql | 121 ++++++++ 5 files changed, 974 insertions(+) diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index 7e61cc10e4ca..a1253653151a 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -651,6 +651,19 @@ ParallelizeCorrelatedSubPlanMutator(Node *node, ParallelizeCorrelatedPlanWalkerC return ParallelizeCorrelatedSubPlanMutator(node, ctx); } + if (IsA(node, ModifyTable)) + { + Plan *mplan = (Plan *) node; + + if (ctx->movement == MOVEMENT_BROADCAST) + broadcastPlan(mplan, false /* stable */ , false /* rescannable */ , + ctx->currentPlanFlow->numsegments); + else + focusPlan(mplan, false /* stable */ , false /* rescannable */ ); + + return (Node *) materialize_subplan((PlannerInfo *) ctx->base.node, mplan); + } + Node *result = plan_tree_mutator(node, ParallelizeCorrelatedSubPlanMutator, ctx); /* diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index 46a854888eb4..c2039ac94d3a 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -3172,6 +3172,286 @@ select count(*) from t1; 0 (1 row) +drop table t2; +drop table t1; +-- Test correlated SubPlans containing writable operation are +-- planned and executed correctly. The result of modifying operations +-- should be broadcasted (or focused) and materialized. +-- start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists t2; +NOTICE: table "t2" does not exist, skipping +--end_ignore +create table t1(i int) distributed by (i); +create table t2(i int) distributed by (i); +insert into t2 values (1), (2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice3; segments: 3) + Merge Key: t2.i + -> Sort + Sort Key: t2.i + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Subquery Scan on cte + Filter: (t2.i = cte.i) + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(16 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as +(update t1 set i = 1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Subquery Scan on cte + Filter: (t2.i = cte.i) + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Update on t1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: "outer".i + -> Split + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(14 rows) + +with cte as +(update t1 set i = 1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 5 +(1 row) + +select count(*) from t1 where i = 1; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as +(delete from t1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + QUERY PLAN +----------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice2; segments: 3) + -> Subquery Scan on cte + Filter: (t2.i = cte.i) + -> Materialize + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Delete on t1 + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(11 rows) + +with cte as +(delete from t1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 0 +(1 row) + +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +--end_ignore +create table t_repl (i int) distributed replicated; +insert into t_repl values (1), (2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice3; segments: 1) + -> Sort + Sort Key: t_repl.i + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Subquery Scan on cte + Filter: (t_repl.i = cte.i) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(15 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as +(update t1 set i = 1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice3; segments: 1) + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Subquery Scan on cte + Filter: (t_repl.i = cte.i) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Update on t1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: "outer".i + -> Split + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(14 rows) + +with cte as +(update t1 set i = 1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 5 +(1 row) + +select count(*) from t1 where i = 1; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as +(delete from t1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + QUERY PLAN +-------------------------------------------------------------------- + Gather Motion 1:1 (slice2; segments: 1) + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice2; segments: 3) + -> Subquery Scan on cte + Filter: (t_repl.i = cte.i) + -> Materialize + -> Gather Motion 3:1 (slice1; segments: 3) + -> Delete on t1 + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(11 rows) + +with cte as +(delete from t1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 0 +(1 row) + +drop table t_repl; drop table t2; drop table t1; -- Test that executor does not treat InitPlans as rescannable diff --git a/src/test/regress/expected/subselect_gp_1.out b/src/test/regress/expected/subselect_gp_1.out index 79d28c66b77a..942d9ec8d3ce 100644 --- a/src/test/regress/expected/subselect_gp_1.out +++ b/src/test/regress/expected/subselect_gp_1.out @@ -3172,6 +3172,286 @@ select count(*) from t1; 0 (1 row) +drop table t2; +drop table t1; +-- Test correlated SubPlans containing writable operation are +-- planned and executed correctly. The result of modifying operations +-- should be broadcasted (or focused) and materialized. +-- start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists t2; +NOTICE: table "t2" does not exist, skipping +--end_ignore +create table t1(i int) distributed by (i); +create table t2(i int) distributed by (i); +insert into t2 values (1), (2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice3; segments: 3) + Merge Key: t2.i + -> Sort + Sort Key: t2.i + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Subquery Scan on cte + Filter: (t2.i = cte.i) + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(16 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as +(update t1 set i = 1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Subquery Scan on cte + Filter: (t2.i = cte.i) + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Update on t1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: "outer".i + -> Split + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(14 rows) + +with cte as +(update t1 set i = 1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 5 +(1 row) + +select count(*) from t1 where i = 1; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as +(delete from t1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + QUERY PLAN +----------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice2; segments: 3) + -> Subquery Scan on cte + Filter: (t2.i = cte.i) + -> Materialize + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Delete on t1 + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(11 rows) + +with cte as +(delete from t1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 0 +(1 row) + +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +--end_ignore +create table t_repl (i int) distributed replicated; +insert into t_repl values (1), (2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice3; segments: 1) + -> Sort + Sort Key: t_repl.i + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Subquery Scan on cte + Filter: (t_repl.i = cte.i) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(15 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as +(update t1 set i = 1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice3; segments: 1) + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Subquery Scan on cte + Filter: (t_repl.i = cte.i) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Update on t1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: "outer".i + -> Split + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(14 rows) + +with cte as +(update t1 set i = 1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 5 +(1 row) + +select count(*) from t1 where i = 1; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as +(delete from t1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + QUERY PLAN +-------------------------------------------------------------------- + Gather Motion 1:1 (slice2; segments: 1) + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice2; segments: 3) + -> Subquery Scan on cte + Filter: (t_repl.i = cte.i) + -> Materialize + -> Gather Motion 3:1 (slice1; segments: 3) + -> Delete on t1 + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(11 rows) + +with cte as +(delete from t1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 0 +(1 row) + +drop table t_repl; drop table t2; drop table t1; -- Test that executor does not treat InitPlans as rescannable diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index 2bf26f5458ae..0be11b2664fa 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -3313,6 +3313,286 @@ select count(*) from t1; 0 (1 row) +drop table t2; +drop table t1; +-- Test correlated SubPlans containing writable operation are +-- planned and executed correctly. The result of modifying operations +-- should be broadcasted (or focused) and materialized. +-- start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists t2; +NOTICE: table "t2" does not exist, skipping +--end_ignore +create table t1(i int) distributed by (i); +create table t2(i int) distributed by (i); +insert into t2 values (1), (2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice3; segments: 3) + Merge Key: t2.i + -> Sort + Sort Key: t2.i + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Subquery Scan on cte + Filter: (t2.i = cte.i) + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(16 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as +(update t1 set i = 1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Subquery Scan on cte + Filter: (t2.i = cte.i) + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Update on t1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: "outer".i + -> Split + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(14 rows) + +with cte as +(update t1 set i = 1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 5 +(1 row) + +select count(*) from t1 where i = 1; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as +(delete from t1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + QUERY PLAN +----------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice2; segments: 3) + -> Subquery Scan on cte + Filter: (t2.i = cte.i) + -> Materialize + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Delete on t1 + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(11 rows) + +with cte as +(delete from t1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 0 +(1 row) + +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +--end_ignore +create table t_repl (i int) distributed replicated; +insert into t_repl values (1), (2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice3; segments: 1) + -> Sort + Sort Key: t_repl.i + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Subquery Scan on cte + Filter: (t_repl.i = cte.i) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(15 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as +(update t1 set i = 1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice3; segments: 1) + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Subquery Scan on cte + Filter: (t_repl.i = cte.i) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Update on t1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: "outer".i + -> Split + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(14 rows) + +with cte as +(update t1 set i = 1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 5 +(1 row) + +select count(*) from t1 where i = 1; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as +(delete from t1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + QUERY PLAN +-------------------------------------------------------------------- + Gather Motion 1:1 (slice2; segments: 1) + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice2; segments: 3) + -> Subquery Scan on cte + Filter: (t_repl.i = cte.i) + -> Materialize + -> Gather Motion 3:1 (slice1; segments: 3) + -> Delete on t1 + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(11 rows) + +with cte as +(delete from t1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 0 +(1 row) + +drop table t_repl; drop table t2; drop table t1; -- Test that executor does not treat InitPlans as rescannable diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql index da31c0f442a6..6a85f78e5f9d 100644 --- a/src/test/regress/sql/subselect_gp.sql +++ b/src/test/regress/sql/subselect_gp.sql @@ -1249,6 +1249,127 @@ select count(*) from t1; drop table t2; drop table t1; +-- Test correlated SubPlans containing writable operation are +-- planned and executed correctly. The result of modifying operations +-- should be broadcasted (or focused) and materialized. +-- start_ignore +drop table if exists t1; +drop table if exists t2; +--end_ignore +create table t1(i int) distributed by (i); +create table t2(i int) distributed by (i); +insert into t2 values (1), (2); + +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i) +order by i; + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i) +order by i; + +select count(*) from t1; + +explain (costs off) +with cte as +(update t1 set i = 1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + +with cte as +(update t1 set i = 1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + +select count(*) from t1; +select count(*) from t1 where i = 1; + +explain (costs off) +with cte as +(delete from t1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + +with cte as +(delete from t1 + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i); + +select count(*) from t1; + +--start_ignore +drop table if exists t_repl; +--end_ignore +create table t_repl (i int) distributed replicated; +insert into t_repl values (1), (2); + +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i) +order by i; + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i) +order by i; + +select count(*) from t1; + +explain (costs off) +with cte as +(update t1 set i = 1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + +with cte as +(update t1 set i = 1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + +select count(*) from t1; +select count(*) from t1 where i = 1; + +explain (costs off) +with cte as +(delete from t1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + +with cte as +(delete from t1 + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i); + +select count(*) from t1; + +drop table t_repl; +drop table t2; +drop table t1; + -- Test that executor does not treat InitPlans as rescannable -- while initializing executor state. Otherwise, for InitPlan containing -- non-rescannable operations (like Split Update node) executor may From 5f36c9792960ade5c0bb348ceaca7dc9660f19a3 Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Fri, 24 Nov 2023 14:45:45 +0200 Subject: [PATCH 087/106] Revert "Make InitPlans non-rescannable during estate initialization (#595)" to resolve conflicts with upstream 2189e1f This reverts commit d0a5bc0bbd4703cf5242dd178aa8ea1edf082a4f. --- src/backend/executor/execMain.c | 6 +-- .../translate/CTranslatorDXLToPlStmt.cpp | 17 ++---- src/backend/optimizer/plan/subselect.c | 11 ++-- .../gpopt/translate/CTranslatorDXLToPlStmt.h | 4 +- src/test/regress/expected/subselect_gp.out | 54 ------------------- src/test/regress/expected/subselect_gp_1.out | 54 ------------------- .../expected/subselect_gp_optimizer.out | 54 ------------------- src/test/regress/sql/subselect_gp.sql | 31 ----------- 8 files changed, 13 insertions(+), 218 deletions(-) diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 3dcdbb6f9578..08d2e0f1aa5f 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -2264,13 +2264,11 @@ InitPlan(QueryDesc *queryDesc, int eflags) /* * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. * - * GPDB: We always set the REWIND flag, except InitPlans, - * to delay eagerfree. + * GPDB: We always set the REWIND flag, to delay eagerfree. */ sp_eflags = eflags & (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA); - if (bms_is_member(subplan_idx + 1, plannedstmt->rewindPlanIDs)) - sp_eflags |= EXEC_FLAG_REWIND; + sp_eflags |= EXEC_FLAG_REWIND; Plan *subplan = (Plan *) lfirst(l); subplanstate = ExecInitNode(subplan, estate, sp_eflags); diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp index 1cc796434dac..fc357c46d5b3 100644 --- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp +++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp @@ -279,7 +279,7 @@ CTranslatorDXLToPlStmt::GetPlannedStmtFromDXL(const CDXLNode *dxlnode, // pplstmt->intoClause = m_pctxdxltoplstmt->Pintocl(); planned_stmt->intoPolicy = m_dxl_to_plstmt_context->GetDistributionPolicy(); - SetSubPlanVariables(planned_stmt); + SetInitPlanVariables(planned_stmt); if (CMD_SELECT == m_cmd_type && NULL != dxlnode->GetDXLDirectDispatchInfo()) { @@ -344,16 +344,14 @@ CTranslatorDXLToPlStmt::TranslateDXLOperatorToPlan( //--------------------------------------------------------------------------- // @function: -// CTranslatorDXLToPlStmt::SetSubPlanVariables +// CTranslatorDXLToPlStmt::SetInitPlanVariables // // @doc: // Iterates over the plan to set the qDispSliceId that is found in the plan // as well as its subplans. Set the number of parameters used in the plan. -// Simultaneously fills rewindPlanIDs bitmapset in PlannedStmt with plan_id -// of all SubPlans, except InitPlans. //--------------------------------------------------------------------------- void -CTranslatorDXLToPlStmt::SetSubPlanVariables(PlannedStmt *planned_stmt) +CTranslatorDXLToPlStmt::SetInitPlanVariables(PlannedStmt *planned_stmt) { if (1 != m_dxl_to_plstmt_context @@ -370,9 +368,6 @@ CTranslatorDXLToPlStmt::SetSubPlanVariables(PlannedStmt *planned_stmt) List *subplan_list = gpdb::ExtractNodesPlan(planned_stmt->planTree, T_SubPlan, true); - // set of plan_ids of any SubPlan except InitPLan - Bitmapset *planIds = NULL; - ListCell *lc = NULL; ForEach(lc, subplan_list) @@ -382,8 +377,6 @@ CTranslatorDXLToPlStmt::SetSubPlanVariables(PlannedStmt *planned_stmt) { SetInitPlanSliceInformation(subplan); } - else - planIds = gpdb::BmsAddMember(planIds, subplan->plan_id); } // InitPlans can also be defined in subplans. We therefore have to iterate @@ -404,12 +397,8 @@ CTranslatorDXLToPlStmt::SetSubPlanVariables(PlannedStmt *planned_stmt) { SetInitPlanSliceInformation(subplan); } - else - planIds = gpdb::BmsAddMember(planIds, subplan->plan_id); } } - - planned_stmt->rewindPlanIDs = planIds; } //--------------------------------------------------------------------------- diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 15a585695a3b..ee82690d4f09 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -979,12 +979,13 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, root->init_plans = lappend(root->init_plans, splan); /* - * Executor passes EXEC_REWIND flag to the plan nodes in order to indicate - * that underlying node or subplan are likely to be rescanned. Moreover, - * for any SubPlan, except InitPlans, rescan is expected and EXEC_REWIND - * should be set for them. EXEC_REWIND also allows to delay the eager free. + * A parameterless subplan (not initplan) should be prepared to handle + * REWIND efficiently. If it has direct parameters then there's no point + * since it'll be reset on each scan anyway; and if it's an initplan then + * there's no point since it won't get re-run without parameter changes + * anyway. The input of a hashed subplan doesn't need REWIND either. */ - if (!splan->is_initplan) + if (splan->parParam == NIL && !splan->is_initplan && !splan->useHashTable) root->glob->rewindPlanIDs = bms_add_member(root->glob->rewindPlanIDs, splan->plan_id); diff --git a/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h b/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h index 809cdae278fa..00666a645f70 100644 --- a/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h +++ b/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h @@ -193,8 +193,8 @@ class CTranslatorDXLToPlStmt // Set the qDispSliceId in the subplans defining an initplan void SetInitPlanSliceInformation(SubPlan *); - // Set InitPlanVariable and fill rewindPlanIDs in PlannedStmt - void SetSubPlanVariables(PlannedStmt *); + // Set InitPlanVariable in PlannedStmt + void SetInitPlanVariables(PlannedStmt *); // translate DXL table scan node into a SeqScan node Plan *TranslateDXLTblScan( diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index c2039ac94d3a..e8cbd15ee21e 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -3454,57 +3454,3 @@ select count(*) from t1; drop table t_repl; drop table t2; drop table t1; --- Test that executor does not treat InitPlans as rescannable --- while initializing executor state. Otherwise, for InitPlan containing --- non-rescannable operations (like Split Update node) executor may --- fail with an assertion error. --- start_ignore -drop table if exists t1; -NOTICE: table "t1" does not exist, skipping -drop table if exists t2; -NOTICE: table "t2" does not exist, skipping ---end_ignore -create table t1(i int) distributed by (i); -create table t2(i int) distributed by (i); -insert into t1 values (1); -insert into t2 values (1); -explain (costs off) -with cte as -(update t1 set i = 0 - returning i) -select i from t2 -where 0 = (select i from cte); - QUERY PLAN --------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) - -> Result - One-Time Filter: (0 = $1) - InitPlan 1 (returns $1) (slice4) - -> Gather Motion 3:1 (slice2; segments: 3) - -> Update on t1 - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: "outer".i - -> Split - -> Seq Scan on t1 - -> Seq Scan on t2 - Optimizer: Postgres query optimizer -(12 rows) - -with cte as -(update t1 set i = 0 - returning i) -select i from t2 -where 0 = (select i from cte); - i ---- - 1 -(1 row) - -select * from t1; - i ---- - 0 -(1 row) - -drop table t2; -drop table t1; diff --git a/src/test/regress/expected/subselect_gp_1.out b/src/test/regress/expected/subselect_gp_1.out index 942d9ec8d3ce..da9f01ca9002 100644 --- a/src/test/regress/expected/subselect_gp_1.out +++ b/src/test/regress/expected/subselect_gp_1.out @@ -3454,57 +3454,3 @@ select count(*) from t1; drop table t_repl; drop table t2; drop table t1; --- Test that executor does not treat InitPlans as rescannable --- while initializing executor state. Otherwise, for InitPlan containing --- non-rescannable operations (like Split Update node) executor may --- fail with an assertion error. --- start_ignore -drop table if exists t1; -NOTICE: table "t1" does not exist, skipping -drop table if exists t2; -NOTICE: table "t2" does not exist, skipping ---end_ignore -create table t1(i int) distributed by (i); -create table t2(i int) distributed by (i); -insert into t1 values (1); -insert into t2 values (1); -explain (costs off) -with cte as -(update t1 set i = 0 - returning i) -select i from t2 -where 0 = (select i from cte); - QUERY PLAN --------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) - -> Result - One-Time Filter: (0 = $1) - InitPlan 1 (returns $1) (slice4) - -> Gather Motion 3:1 (slice2; segments: 3) - -> Update on t1 - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: "outer".i - -> Split - -> Seq Scan on t1 - -> Seq Scan on t2 - Optimizer: Postgres query optimizer -(12 rows) - -with cte as -(update t1 set i = 0 - returning i) -select i from t2 -where 0 = (select i from cte); - i ---- - 1 -(1 row) - -select * from t1; - i ---- - 0 -(1 row) - -drop table t2; -drop table t1; diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index 0be11b2664fa..bb47175efbb1 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -3595,57 +3595,3 @@ select count(*) from t1; drop table t_repl; drop table t2; drop table t1; --- Test that executor does not treat InitPlans as rescannable --- while initializing executor state. Otherwise, for InitPlan containing --- non-rescannable operations (like Split Update node) executor may --- fail with an assertion error. --- start_ignore -drop table if exists t1; -NOTICE: table "t1" does not exist, skipping -drop table if exists t2; -NOTICE: table "t2" does not exist, skipping ---end_ignore -create table t1(i int) distributed by (i); -create table t2(i int) distributed by (i); -insert into t1 values (1); -insert into t2 values (1); -explain (costs off) -with cte as -(update t1 set i = 0 - returning i) -select i from t2 -where 0 = (select i from cte); - QUERY PLAN --------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) - -> Result - One-Time Filter: (0 = $1) - InitPlan 1 (returns $1) (slice4) - -> Gather Motion 3:1 (slice2; segments: 3) - -> Update on t1 - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: "outer".i - -> Split - -> Seq Scan on t1 - -> Seq Scan on t2 - Optimizer: Postgres query optimizer -(12 rows) - -with cte as -(update t1 set i = 0 - returning i) -select i from t2 -where 0 = (select i from cte); - i ---- - 1 -(1 row) - -select * from t1; - i ---- - 0 -(1 row) - -drop table t2; -drop table t1; diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql index 6a85f78e5f9d..5cd4e6b2c483 100644 --- a/src/test/regress/sql/subselect_gp.sql +++ b/src/test/regress/sql/subselect_gp.sql @@ -1369,34 +1369,3 @@ select count(*) from t1; drop table t_repl; drop table t2; drop table t1; - --- Test that executor does not treat InitPlans as rescannable --- while initializing executor state. Otherwise, for InitPlan containing --- non-rescannable operations (like Split Update node) executor may --- fail with an assertion error. --- start_ignore -drop table if exists t1; -drop table if exists t2; ---end_ignore -create table t1(i int) distributed by (i); -create table t2(i int) distributed by (i); -insert into t1 values (1); -insert into t2 values (1); - -explain (costs off) -with cte as -(update t1 set i = 0 - returning i) -select i from t2 -where 0 = (select i from cte); - -with cte as -(update t1 set i = 0 - returning i) -select i from t2 -where 0 = (select i from cte); - -select * from t1; - -drop table t2; -drop table t1; From 2e6cbb49c8becd8641d3f6c929cc53b516c4429a Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Fri, 24 Nov 2023 14:47:31 +0200 Subject: [PATCH 088/106] Revert "Avoid rescan of modifying operations inside the correlated SubPlans. (#596)" to solve conflicts with upstream 2189e1f This reverts commit 0b4466da9859a3762465512f2514af27f60da3b2. --- src/backend/cdb/cdbllize.c | 13 - src/test/regress/expected/subselect_gp.out | 280 ------------------ src/test/regress/expected/subselect_gp_1.out | 280 ------------------ .../expected/subselect_gp_optimizer.out | 280 ------------------ src/test/regress/sql/subselect_gp.sql | 121 -------- 5 files changed, 974 deletions(-) diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index a1253653151a..7e61cc10e4ca 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -651,19 +651,6 @@ ParallelizeCorrelatedSubPlanMutator(Node *node, ParallelizeCorrelatedPlanWalkerC return ParallelizeCorrelatedSubPlanMutator(node, ctx); } - if (IsA(node, ModifyTable)) - { - Plan *mplan = (Plan *) node; - - if (ctx->movement == MOVEMENT_BROADCAST) - broadcastPlan(mplan, false /* stable */ , false /* rescannable */ , - ctx->currentPlanFlow->numsegments); - else - focusPlan(mplan, false /* stable */ , false /* rescannable */ ); - - return (Node *) materialize_subplan((PlannerInfo *) ctx->base.node, mplan); - } - Node *result = plan_tree_mutator(node, ParallelizeCorrelatedSubPlanMutator, ctx); /* diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index e8cbd15ee21e..63a827a1728e 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -3174,283 +3174,3 @@ select count(*) from t1; drop table t2; drop table t1; --- Test correlated SubPlans containing writable operation are --- planned and executed correctly. The result of modifying operations --- should be broadcasted (or focused) and materialized. --- start_ignore -drop table if exists t1; -NOTICE: table "t1" does not exist, skipping -drop table if exists t2; -NOTICE: table "t2" does not exist, skipping ---end_ignore -create table t1(i int) distributed by (i); -create table t2(i int) distributed by (i); -insert into t2 values (1), (2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) - Merge Key: t2.i - -> Sort - Sort Key: t2.i - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Subquery Scan on cte - Filter: (t2.i = cte.i) - -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - Optimizer: Postgres query optimizer -(16 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 5 -(1 row) - -explain (costs off) -with cte as -(update t1 set i = 1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - QUERY PLAN --------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Subquery Scan on cte - Filter: (t2.i = cte.i) - -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Update on t1 - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: "outer".i - -> Split - -> Seq Scan on t1 - Optimizer: Postgres query optimizer -(14 rows) - -with cte as -(update t1 set i = 1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 5 -(1 row) - -select count(*) from t1 where i = 1; - count -------- - 5 -(1 row) - -explain (costs off) -with cte as -(delete from t1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - QUERY PLAN ------------------------------------------------------------------------ - Gather Motion 3:1 (slice2; segments: 3) - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice2; segments: 3) - -> Subquery Scan on cte - Filter: (t2.i = cte.i) - -> Materialize - -> Broadcast Motion 3:3 (slice1; segments: 3) - -> Delete on t1 - -> Seq Scan on t1 - Optimizer: Postgres query optimizer -(11 rows) - -with cte as -(delete from t1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 0 -(1 row) - ---start_ignore -drop table if exists t_repl; -NOTICE: table "t_repl" does not exist, skipping ---end_ignore -create table t_repl (i int) distributed replicated; -insert into t_repl values (1), (2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice3; segments: 1) - -> Sort - Sort Key: t_repl.i - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Subquery Scan on cte - Filter: (t_repl.i = cte.i) - -> Materialize - -> Gather Motion 3:1 (slice2; segments: 3) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - Optimizer: Postgres query optimizer -(15 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 5 -(1 row) - -explain (costs off) -with cte as -(update t1 set i = 1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - QUERY PLAN --------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice3; segments: 1) - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Subquery Scan on cte - Filter: (t_repl.i = cte.i) - -> Materialize - -> Gather Motion 3:1 (slice2; segments: 3) - -> Update on t1 - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: "outer".i - -> Split - -> Seq Scan on t1 - Optimizer: Postgres query optimizer -(14 rows) - -with cte as -(update t1 set i = 1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 5 -(1 row) - -select count(*) from t1 where i = 1; - count -------- - 5 -(1 row) - -explain (costs off) -with cte as -(delete from t1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - QUERY PLAN --------------------------------------------------------------------- - Gather Motion 1:1 (slice2; segments: 1) - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice2; segments: 3) - -> Subquery Scan on cte - Filter: (t_repl.i = cte.i) - -> Materialize - -> Gather Motion 3:1 (slice1; segments: 3) - -> Delete on t1 - -> Seq Scan on t1 - Optimizer: Postgres query optimizer -(11 rows) - -with cte as -(delete from t1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 0 -(1 row) - -drop table t_repl; -drop table t2; -drop table t1; diff --git a/src/test/regress/expected/subselect_gp_1.out b/src/test/regress/expected/subselect_gp_1.out index da9f01ca9002..3676b7866c4e 100644 --- a/src/test/regress/expected/subselect_gp_1.out +++ b/src/test/regress/expected/subselect_gp_1.out @@ -3174,283 +3174,3 @@ select count(*) from t1; drop table t2; drop table t1; --- Test correlated SubPlans containing writable operation are --- planned and executed correctly. The result of modifying operations --- should be broadcasted (or focused) and materialized. --- start_ignore -drop table if exists t1; -NOTICE: table "t1" does not exist, skipping -drop table if exists t2; -NOTICE: table "t2" does not exist, skipping ---end_ignore -create table t1(i int) distributed by (i); -create table t2(i int) distributed by (i); -insert into t2 values (1), (2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) - Merge Key: t2.i - -> Sort - Sort Key: t2.i - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Subquery Scan on cte - Filter: (t2.i = cte.i) - -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - Optimizer: Postgres query optimizer -(16 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 5 -(1 row) - -explain (costs off) -with cte as -(update t1 set i = 1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - QUERY PLAN --------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Subquery Scan on cte - Filter: (t2.i = cte.i) - -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Update on t1 - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: "outer".i - -> Split - -> Seq Scan on t1 - Optimizer: Postgres query optimizer -(14 rows) - -with cte as -(update t1 set i = 1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 5 -(1 row) - -select count(*) from t1 where i = 1; - count -------- - 5 -(1 row) - -explain (costs off) -with cte as -(delete from t1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - QUERY PLAN ------------------------------------------------------------------------ - Gather Motion 3:1 (slice2; segments: 3) - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice2; segments: 3) - -> Subquery Scan on cte - Filter: (t2.i = cte.i) - -> Materialize - -> Broadcast Motion 3:3 (slice1; segments: 3) - -> Delete on t1 - -> Seq Scan on t1 - Optimizer: Postgres query optimizer -(11 rows) - -with cte as -(delete from t1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 0 -(1 row) - ---start_ignore -drop table if exists t_repl; -NOTICE: table "t_repl" does not exist, skipping ---end_ignore -create table t_repl (i int) distributed replicated; -insert into t_repl values (1), (2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice3; segments: 1) - -> Sort - Sort Key: t_repl.i - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Subquery Scan on cte - Filter: (t_repl.i = cte.i) - -> Materialize - -> Gather Motion 3:1 (slice2; segments: 3) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - Optimizer: Postgres query optimizer -(15 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 5 -(1 row) - -explain (costs off) -with cte as -(update t1 set i = 1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - QUERY PLAN --------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice3; segments: 1) - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Subquery Scan on cte - Filter: (t_repl.i = cte.i) - -> Materialize - -> Gather Motion 3:1 (slice2; segments: 3) - -> Update on t1 - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: "outer".i - -> Split - -> Seq Scan on t1 - Optimizer: Postgres query optimizer -(14 rows) - -with cte as -(update t1 set i = 1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 5 -(1 row) - -select count(*) from t1 where i = 1; - count -------- - 5 -(1 row) - -explain (costs off) -with cte as -(delete from t1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - QUERY PLAN --------------------------------------------------------------------- - Gather Motion 1:1 (slice2; segments: 1) - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice2; segments: 3) - -> Subquery Scan on cte - Filter: (t_repl.i = cte.i) - -> Materialize - -> Gather Motion 3:1 (slice1; segments: 3) - -> Delete on t1 - -> Seq Scan on t1 - Optimizer: Postgres query optimizer -(11 rows) - -with cte as -(delete from t1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 0 -(1 row) - -drop table t_repl; -drop table t2; -drop table t1; diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index bb47175efbb1..551f06d9e248 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -3315,283 +3315,3 @@ select count(*) from t1; drop table t2; drop table t1; --- Test correlated SubPlans containing writable operation are --- planned and executed correctly. The result of modifying operations --- should be broadcasted (or focused) and materialized. --- start_ignore -drop table if exists t1; -NOTICE: table "t1" does not exist, skipping -drop table if exists t2; -NOTICE: table "t2" does not exist, skipping ---end_ignore -create table t1(i int) distributed by (i); -create table t2(i int) distributed by (i); -insert into t2 values (1), (2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) - Merge Key: t2.i - -> Sort - Sort Key: t2.i - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Subquery Scan on cte - Filter: (t2.i = cte.i) - -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - Optimizer: Postgres query optimizer -(16 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 5 -(1 row) - -explain (costs off) -with cte as -(update t1 set i = 1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - QUERY PLAN --------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice3; segments: 3) - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Subquery Scan on cte - Filter: (t2.i = cte.i) - -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Update on t1 - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: "outer".i - -> Split - -> Seq Scan on t1 - Optimizer: Postgres query optimizer -(14 rows) - -with cte as -(update t1 set i = 1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 5 -(1 row) - -select count(*) from t1 where i = 1; - count -------- - 5 -(1 row) - -explain (costs off) -with cte as -(delete from t1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - QUERY PLAN ------------------------------------------------------------------------ - Gather Motion 3:1 (slice2; segments: 3) - -> Seq Scan on t2 - Filter: (SubPlan 1) - SubPlan 1 (slice2; segments: 3) - -> Subquery Scan on cte - Filter: (t2.i = cte.i) - -> Materialize - -> Broadcast Motion 3:3 (slice1; segments: 3) - -> Delete on t1 - -> Seq Scan on t1 - Optimizer: Postgres query optimizer -(11 rows) - -with cte as -(delete from t1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 0 -(1 row) - ---start_ignore -drop table if exists t_repl; -NOTICE: table "t_repl" does not exist, skipping ---end_ignore -create table t_repl (i int) distributed replicated; -insert into t_repl values (1), (2); -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i) -order by i; - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice3; segments: 1) - -> Sort - Sort Key: t_repl.i - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Subquery Scan on cte - Filter: (t_repl.i = cte.i) - -> Materialize - -> Gather Motion 3:1 (slice2; segments: 3) - -> Insert on t1 - -> Redistribute Motion 1:3 (slice1; segments: 1) - Hash Key: i.i - -> Function Scan on generate_series i - Optimizer: Postgres query optimizer -(15 rows) - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i) -order by i; - i ---- - 1 - 2 -(2 rows) - -select count(*) from t1; - count -------- - 5 -(1 row) - -explain (costs off) -with cte as -(update t1 set i = 1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - QUERY PLAN --------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice3; segments: 1) - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice3; segments: 3) - -> Subquery Scan on cte - Filter: (t_repl.i = cte.i) - -> Materialize - -> Gather Motion 3:1 (slice2; segments: 3) - -> Update on t1 - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: "outer".i - -> Split - -> Seq Scan on t1 - Optimizer: Postgres query optimizer -(14 rows) - -with cte as -(update t1 set i = 1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 5 -(1 row) - -select count(*) from t1 where i = 1; - count -------- - 5 -(1 row) - -explain (costs off) -with cte as -(delete from t1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - QUERY PLAN --------------------------------------------------------------------- - Gather Motion 1:1 (slice2; segments: 1) - -> Seq Scan on t_repl - Filter: (SubPlan 1) - SubPlan 1 (slice2; segments: 3) - -> Subquery Scan on cte - Filter: (t_repl.i = cte.i) - -> Materialize - -> Gather Motion 3:1 (slice1; segments: 3) - -> Delete on t1 - -> Seq Scan on t1 - Optimizer: Postgres query optimizer -(11 rows) - -with cte as -(delete from t1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 0 -(1 row) - -drop table t_repl; -drop table t2; -drop table t1; diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql index 5cd4e6b2c483..39f6d76f26a1 100644 --- a/src/test/regress/sql/subselect_gp.sql +++ b/src/test/regress/sql/subselect_gp.sql @@ -1248,124 +1248,3 @@ select count(*) from t1; drop table t2; drop table t1; - --- Test correlated SubPlans containing writable operation are --- planned and executed correctly. The result of modifying operations --- should be broadcasted (or focused) and materialized. --- start_ignore -drop table if exists t1; -drop table if exists t2; ---end_ignore -create table t1(i int) distributed by (i); -create table t2(i int) distributed by (i); -insert into t2 values (1), (2); - -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i) -order by i; - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i) -order by i; - -select count(*) from t1; - -explain (costs off) -with cte as -(update t1 set i = 1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - -with cte as -(update t1 set i = 1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - -select count(*) from t1; -select count(*) from t1 where i = 1; - -explain (costs off) -with cte as -(delete from t1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - -with cte as -(delete from t1 - returning *) -select * from t2 -where t2.i in (select i from cte where t2.i = cte.i); - -select count(*) from t1; - ---start_ignore -drop table if exists t_repl; ---end_ignore -create table t_repl (i int) distributed replicated; -insert into t_repl values (1), (2); - -explain (costs off) -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i) -order by i; - -with cte as -(insert into t1 - select i from generate_series(1, 5) i - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i) -order by i; - -select count(*) from t1; - -explain (costs off) -with cte as -(update t1 set i = 1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - -with cte as -(update t1 set i = 1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - -select count(*) from t1; -select count(*) from t1 where i = 1; - -explain (costs off) -with cte as -(delete from t1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - -with cte as -(delete from t1 - returning *) -select * from t_repl -where t_repl.i in (select i from cte where t_repl.i = cte.i); - -select count(*) from t1; - -drop table t_repl; -drop table t2; -drop table t1; From 0ec868f56f675ebfd92e68a978c99dae4e17e358 Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Fri, 24 Nov 2023 14:48:02 +0200 Subject: [PATCH 089/106] Revert "Support Explicit Redistribute Motion inside the SubPlans" to solve conflicts with upstream commit 2189e1f This reverts commit 4a9aac4ceb0d1ed296b19c3d6a603e8d101d999a. --- src/backend/nodes/copyfuncs.c | 1 - src/backend/nodes/equalfuncs.c | 1 - src/backend/nodes/outfuncs.c | 4 +- src/backend/nodes/readfast.c | 1 - src/test/regress/expected/subselect_gp.out | 62 ------------------- src/test/regress/expected/subselect_gp_1.out | 62 ------------------- .../expected/subselect_gp_optimizer.out | 62 ------------------- src/test/regress/sql/subselect_gp.sql | 33 ---------- 8 files changed, 1 insertion(+), 225 deletions(-) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 34acc5cb98af..f865a436be80 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2321,7 +2321,6 @@ _copyFlow(const Flow *from) COPY_SCALAR_FIELD(numsegments); COPY_NODE_FIELD(hashExprs); COPY_NODE_FIELD(hashOpfamilies); - COPY_SCALAR_FIELD(segidColIdx); COPY_NODE_FIELD(flow_before_req_move); return newnode; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 0eb552d90011..e6a246cbb9c4 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -769,7 +769,6 @@ _equalFlow(const Flow *a, const Flow *b) COMPARE_SCALAR_FIELD(numsegments); COMPARE_NODE_FIELD(hashExprs); COMPARE_NODE_FIELD(hashOpfamilies); - COMPARE_SCALAR_FIELD(segidColIdx); return true; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 65a3f618444c..0dfe3360ab66 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -60,8 +60,6 @@ #define WRITE_INT_FIELD(fldname) \ appendStringInfo(str, " :" CppAsString(fldname) " %d", node->fldname) -#define WRITE_INT16_FIELD(fldname) WRITE_INT_FIELD(fldname) - /* Write an unsigned integer field (anything written as ":fldname %u") */ #define WRITE_UINT_FIELD(fldname) \ appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname) @@ -1957,7 +1955,7 @@ _outFlow(StringInfo str, const Flow *node) WRITE_NODE_FIELD(hashExprs); WRITE_NODE_FIELD(hashOpfamilies); - WRITE_INT16_FIELD(segidColIdx); + WRITE_NODE_FIELD(flow_before_req_move); } diff --git a/src/backend/nodes/readfast.c b/src/backend/nodes/readfast.c index 17b1bd9548a4..de6c0c2c011a 100644 --- a/src/backend/nodes/readfast.c +++ b/src/backend/nodes/readfast.c @@ -2187,7 +2187,6 @@ _readFlow(void) READ_NODE_FIELD(hashExprs); READ_NODE_FIELD(hashOpfamilies); - READ_INT16_FIELD(segidColIdx); READ_NODE_FIELD(flow_before_req_move); READ_DONE(); diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index 63a827a1728e..89f31abdb1cd 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -3112,65 +3112,3 @@ select * from r where b in (select b from s where c=10 order by c limit 2); 1 | 2 | 3 (1 row) --- Test that Explicit Redistribute Motion is applied properly for --- queries that have modifying operation inside a SubPlan. That --- requires the ModifyTable's top Flow node to be copied correctly inside --- ParallelizeSubPlan function. ---start_ignore -drop table if exists t1; -NOTICE: table "t1" does not exist, skipping -drop table if exists t2; -NOTICE: table "t2" does not exist, skipping ---end_ignore -create table t1 (i int) distributed randomly; -create table t2 (i int) distributed by (i); -insert into t1 values (1); -insert into t2 values (1); -explain (costs off) -with cte as -(delete from t1 - using t2 where t2.i = t1.i - returning t1.i) -select i from t2 -where exists (select i from cte); - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice4; segments: 3) - -> Result - One-Time Filter: $1 - InitPlan 1 (returns $1) (slice5) - -> Limit - -> Gather Motion 3:1 (slice3; segments: 3) - -> Limit - -> Delete on t1 - -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) - -> Hash Join - Hash Cond: (t1.i = t2_1.i) - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: t1.i - -> Seq Scan on t1 - -> Hash - -> Seq Scan on t2 t2_1 - -> Seq Scan on t2 - Optimizer: Postgres query optimizer -(18 rows) - -with cte as -(delete from t1 - using t2 where t2.i = t1.i - returning t1.i) -select i from t2 -where exists (select i from cte); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 0 -(1 row) - -drop table t2; -drop table t1; diff --git a/src/test/regress/expected/subselect_gp_1.out b/src/test/regress/expected/subselect_gp_1.out index 3676b7866c4e..398aa5e7f6a0 100644 --- a/src/test/regress/expected/subselect_gp_1.out +++ b/src/test/regress/expected/subselect_gp_1.out @@ -3112,65 +3112,3 @@ select * from r where b in (select b from s where c=10 order by c limit 2); 1 | 2 | 3 (1 row) --- Test that Explicit Redistribute Motion is applied properly for --- queries that have modifying operation inside a SubPlan. That --- requires the ModifyTable's top Flow node to be copied correctly inside --- ParallelizeSubPlan function. ---start_ignore -drop table if exists t1; -NOTICE: table "t1" does not exist, skipping -drop table if exists t2; -NOTICE: table "t2" does not exist, skipping ---end_ignore -create table t1 (i int) distributed randomly; -create table t2 (i int) distributed by (i); -insert into t1 values (1); -insert into t2 values (1); -explain (costs off) -with cte as -(delete from t1 - using t2 where t2.i = t1.i - returning t1.i) -select i from t2 -where exists (select i from cte); - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice4; segments: 3) - -> Result - One-Time Filter: $1 - InitPlan 1 (returns $1) (slice5) - -> Limit - -> Gather Motion 3:1 (slice3; segments: 3) - -> Limit - -> Delete on t1 - -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) - -> Hash Join - Hash Cond: (t1.i = t2_1.i) - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: t1.i - -> Seq Scan on t1 - -> Hash - -> Seq Scan on t2 t2_1 - -> Seq Scan on t2 - Optimizer: Postgres query optimizer -(18 rows) - -with cte as -(delete from t1 - using t2 where t2.i = t1.i - returning t1.i) -select i from t2 -where exists (select i from cte); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 0 -(1 row) - -drop table t2; -drop table t1; diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index 551f06d9e248..706dca15d9b4 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -3253,65 +3253,3 @@ select * from r where b in (select b from s where c=10 order by c limit 2); 1 | 2 | 3 (1 row) --- Test that Explicit Redistribute Motion is applied properly for --- queries that have modifying operation inside a SubPlan. That --- requires the ModifyTable's top Flow node to be copied correctly inside --- ParallelizeSubPlan function. ---start_ignore -drop table if exists t1; -NOTICE: table "t1" does not exist, skipping -drop table if exists t2; -NOTICE: table "t2" does not exist, skipping ---end_ignore -create table t1 (i int) distributed randomly; -create table t2 (i int) distributed by (i); -insert into t1 values (1); -insert into t2 values (1); -explain (costs off) -with cte as -(delete from t1 - using t2 where t2.i = t1.i - returning t1.i) -select i from t2 -where exists (select i from cte); - QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice4; segments: 3) - -> Result - One-Time Filter: $1 - InitPlan 1 (returns $1) (slice5) - -> Limit - -> Gather Motion 3:1 (slice3; segments: 3) - -> Limit - -> Delete on t1 - -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) - -> Hash Join - Hash Cond: (t1.i = t2_1.i) - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: t1.i - -> Seq Scan on t1 - -> Hash - -> Seq Scan on t2 t2_1 - -> Seq Scan on t2 - Optimizer: Postgres query optimizer -(18 rows) - -with cte as -(delete from t1 - using t2 where t2.i = t1.i - returning t1.i) -select i from t2 -where exists (select i from cte); - i ---- - 1 -(1 row) - -select count(*) from t1; - count -------- - 0 -(1 row) - -drop table t2; -drop table t1; diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql index 39f6d76f26a1..67d8a149a507 100644 --- a/src/test/regress/sql/subselect_gp.sql +++ b/src/test/regress/sql/subselect_gp.sql @@ -1215,36 +1215,3 @@ explain (costs off) select * from r where b in (select b from s where c=10 order select * from r where b in (select b from s where c=10 order by c); explain (costs off) select * from r where b in (select b from s where c=10 order by c limit 2); select * from r where b in (select b from s where c=10 order by c limit 2); - --- Test that Explicit Redistribute Motion is applied properly for --- queries that have modifying operation inside a SubPlan. That --- requires the ModifyTable's top Flow node to be copied correctly inside --- ParallelizeSubPlan function. ---start_ignore -drop table if exists t1; -drop table if exists t2; ---end_ignore -create table t1 (i int) distributed randomly; -create table t2 (i int) distributed by (i); -insert into t1 values (1); -insert into t2 values (1); - -explain (costs off) -with cte as -(delete from t1 - using t2 where t2.i = t1.i - returning t1.i) -select i from t2 -where exists (select i from cte); - -with cte as -(delete from t1 - using t2 where t2.i = t1.i - returning t1.i) -select i from t2 -where exists (select i from cte); - -select count(*) from t1; - -drop table t2; -drop table t1; From 56f634a46fe8effca95608d088b085842ad7155a Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Fri, 24 Nov 2023 15:56:54 +0200 Subject: [PATCH 090/106] fix subselect_gp test on power apply 720464e to our custom power answer file git show 720464e -- src/test/regress/expected/subselect_gp.out > /tmp/subselect.patch patch -p1 --merge src/test/regress/expected/subselect_gp_1.out < /tmp/subselect.patch Co-authored-by: bhari Co-authored-by: Alexander Kondakov --- src/test/regress/expected/subselect_gp_1.out | 34 ++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/test/regress/expected/subselect_gp_1.out b/src/test/regress/expected/subselect_gp_1.out index 398aa5e7f6a0..5f1d9a6089ce 100644 --- a/src/test/regress/expected/subselect_gp_1.out +++ b/src/test/regress/expected/subselect_gp_1.out @@ -3112,3 +3112,37 @@ select * from r where b in (select b from s where c=10 order by c limit 2); 1 | 2 | 3 (1 row) +-- Test nested query with aggregate inside a sublink, +-- ORCA should correctly normalize the aggregate expression inside the +-- sublink's nested query and the column variable accessed in aggregate should +-- be accessible to the aggregate after the normalization of query. +-- If the query is not supported, ORCA should gracefully fallback to postgres +explain (COSTS OFF) with t0 AS ( + SELECT + ROW_TO_JSON((SELECT x FROM (SELECT max(t.b)) x)) + AS c + FROM r + JOIN s ON true + JOIN s as t ON true + ) +SELECT c FROM t0; + QUERY PLAN +--------------------------------------------------------------------------------------- + Aggregate + -> Gather Motion 3:1 (slice3; segments: 3) + -> Aggregate + -> Nested Loop + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Nested Loop + -> Seq Scan on r + -> Materialize + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on s + -> Materialize + -> Seq Scan on s t + SubPlan 1 (slice0) + -> Subquery Scan on x + -> Result + Optimizer: Postgres query optimizer +(16 rows) + From d7bab765a0a0f98004d919456432158b1d715d38 Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Fri, 24 Nov 2023 16:01:07 +0200 Subject: [PATCH 091/106] fix subselect_gp test on power apply 2189e1f to our custom power answer file git show 2189e1f -- src/test/regress/expected/subselect_gp.out > /tmp/subselect.patch patch -p1 --merge src/test/regress/expected/subselect_gp_1.out < /tmp/subselect.patch Co-authored-by: "Kevin.wyh" Co-authored-by: wuyuhao28 --- src/test/regress/expected/subselect_gp_1.out | 170 +++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/src/test/regress/expected/subselect_gp_1.out b/src/test/regress/expected/subselect_gp_1.out index 5f1d9a6089ce..16a7153b0b08 100644 --- a/src/test/regress/expected/subselect_gp_1.out +++ b/src/test/regress/expected/subselect_gp_1.out @@ -3146,3 +3146,173 @@ SELECT c FROM t0; Optimizer: Postgres query optimizer (16 rows) +-- +-- Test case for ORCA semi join with random table +-- See https://github.com/greenplum-db/gpdb/issues/16611 +-- +--- case for random distribute +create table table_left (l1 int, l2 int) distributed by (l1); +create table table_right (r1 int, r2 int) distributed randomly; +create index table_right_idx on table_right(r1); +insert into table_left values (1,1); +insert into table_right select i, i from generate_series(1, 300) i; +insert into table_right select 1, 1 from generate_series(1, 100) i; +--- make sure the same value (1,1) rows are inserted into different segments +select count(distinct gp_segment_id) > 1 from table_right where r1 = 1; + ?column? +---------- + t +(1 row) + +analyze table_left; +analyze table_right; +-- two types of semi join tests +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + -> Hash Join + Hash Cond: (table_right.r1 = table_left.l1) + -> HashAggregate + Group Key: table_right.r1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: table_right.r1 + -> Seq Scan on table_right + -> Hash + -> Seq Scan on table_left + Optimizer: Postgres query optimizer +(11 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +explain (costs off) select * from table_left where l1 in (select r1 from table_right); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + -> Hash Join + Hash Cond: (table_right.r1 = table_left.l1) + -> HashAggregate + Group Key: table_right.r1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: table_right.r1 + -> Seq Scan on table_right + -> Hash + -> Seq Scan on table_left + Optimizer: Postgres query optimizer +(11 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +--- case for replicate distribute +alter table table_right set distributed replicated; +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); + QUERY PLAN +----------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (table_right.r1 = table_left.l1) + -> HashAggregate + Group Key: table_right.r1 + -> Seq Scan on table_right + -> Hash + -> Seq Scan on table_left + Optimizer: Postgres query optimizer +(9 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +explain (costs off) select * from table_left where l1 in (select r1 from table_right); + QUERY PLAN +----------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (table_right.r1 = table_left.l1) + -> HashAggregate + Group Key: table_right.r1 + -> Seq Scan on table_right + -> Hash + -> Seq Scan on table_left + Optimizer: Postgres query optimizer +(9 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +--- case for partition table with random distribute +drop table table_right; +create table table_right (r1 int, r2 int) distributed randomly partition by range (r1) ( start (0) end (300) every (100)); +NOTICE: CREATE TABLE will create partition "table_right_1_prt_1" for table "table_right" +NOTICE: CREATE TABLE will create partition "table_right_1_prt_2" for table "table_right" +NOTICE: CREATE TABLE will create partition "table_right_1_prt_3" for table "table_right" +create index table_right_idx on table_right(r1); +insert into table_right select i, i from generate_series(1, 299) i; +insert into table_right select 1, 1 from generate_series(1, 100) i; +analyze table_right; +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + -> Hash Join + Hash Cond: (table_right_1_prt_1.r1 = table_left.l1) + -> HashAggregate + Group Key: table_right_1_prt_1.r1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: table_right_1_prt_1.r1 + -> Append + -> Seq Scan on table_right_1_prt_1 + -> Seq Scan on table_right_1_prt_2 + -> Seq Scan on table_right_1_prt_3 + -> Hash + -> Seq Scan on table_left + Optimizer: Postgres query optimizer +(14 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +explain (costs off) select * from table_left where l1 in (select r1 from table_right); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + -> Hash Join + Hash Cond: (table_right_1_prt_1.r1 = table_left.l1) + -> HashAggregate + Group Key: table_right_1_prt_1.r1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: table_right_1_prt_1.r1 + -> Append + -> Seq Scan on table_right_1_prt_1 + -> Seq Scan on table_right_1_prt_2 + -> Seq Scan on table_right_1_prt_3 + -> Hash + -> Seq Scan on table_left + Optimizer: Postgres query optimizer +(14 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +-- clean up +drop table table_left; +drop table table_right; From 55e15112df80c0762eea920b72af8c22cdd037cf Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Thu, 24 Aug 2023 22:37:18 +0300 Subject: [PATCH 092/106] Support Explicit Redistribute Motion inside the SubPlans Flow structure has a segidColIdx field, which referencing a gp_segment_id column in plan's targetList when the Explicit Redistribute Motion is requested. There was a problem, that _copyFlow, _equalFlow, _outFlow and _readFlow functions did not handle the segidColIdx field. Therefore, the Flow node could not be serialized and deserialized, or copied correctly. The problem manifested itself when a query had UPDATE/DELETE operation, that required Explicit Redistribute, inside the SubPlan (or InitPlan). The Explicit Redistribute did not applied correctly inside the apply_motion_mutator function because by that moment the value segidColIdx had been lost. The problem occured because previously SubPlans had been mutated inside the ParallelizeSubplan function, where the SubPlan's plan had been copied via copyObject function. This function copies the whole plan including the Flow node, which is copied via _copyFlow function. However the Flow node copying did not include the copying of segidColIdx Flow field, which is used for valid performance of Explicit Redistribute Motion. Therefore, this patch solves the issue by adding segidColIdx to the list of fields to copy, serialize, deserialize and compare in the _copyFlow, _outFlow, _readFlow and _equalFlow functions respectively. Cherry-picked from: 4a9aac4 --- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/equalfuncs.c | 1 + src/backend/nodes/outfuncs.c | 4 +- src/backend/nodes/readfast.c | 1 + src/test/regress/expected/subselect_gp.out | 62 +++++++++++++++++++ src/test/regress/expected/subselect_gp_1.out | 62 +++++++++++++++++++ .../expected/subselect_gp_optimizer.out | 62 +++++++++++++++++++ src/test/regress/sql/subselect_gp.sql | 33 ++++++++++ 8 files changed, 225 insertions(+), 1 deletion(-) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index f865a436be80..34acc5cb98af 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2321,6 +2321,7 @@ _copyFlow(const Flow *from) COPY_SCALAR_FIELD(numsegments); COPY_NODE_FIELD(hashExprs); COPY_NODE_FIELD(hashOpfamilies); + COPY_SCALAR_FIELD(segidColIdx); COPY_NODE_FIELD(flow_before_req_move); return newnode; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index e6a246cbb9c4..0eb552d90011 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -769,6 +769,7 @@ _equalFlow(const Flow *a, const Flow *b) COMPARE_SCALAR_FIELD(numsegments); COMPARE_NODE_FIELD(hashExprs); COMPARE_NODE_FIELD(hashOpfamilies); + COMPARE_SCALAR_FIELD(segidColIdx); return true; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 0dfe3360ab66..65a3f618444c 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -60,6 +60,8 @@ #define WRITE_INT_FIELD(fldname) \ appendStringInfo(str, " :" CppAsString(fldname) " %d", node->fldname) +#define WRITE_INT16_FIELD(fldname) WRITE_INT_FIELD(fldname) + /* Write an unsigned integer field (anything written as ":fldname %u") */ #define WRITE_UINT_FIELD(fldname) \ appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname) @@ -1955,7 +1957,7 @@ _outFlow(StringInfo str, const Flow *node) WRITE_NODE_FIELD(hashExprs); WRITE_NODE_FIELD(hashOpfamilies); - + WRITE_INT16_FIELD(segidColIdx); WRITE_NODE_FIELD(flow_before_req_move); } diff --git a/src/backend/nodes/readfast.c b/src/backend/nodes/readfast.c index de6c0c2c011a..17b1bd9548a4 100644 --- a/src/backend/nodes/readfast.c +++ b/src/backend/nodes/readfast.c @@ -2187,6 +2187,7 @@ _readFlow(void) READ_NODE_FIELD(hashExprs); READ_NODE_FIELD(hashOpfamilies); + READ_INT16_FIELD(segidColIdx); READ_NODE_FIELD(flow_before_req_move); READ_DONE(); diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index 79b34b08e782..497ebba10f95 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -3316,3 +3316,65 @@ select * from table_left where exists (select 1 from table_right where l1 = r1); -- clean up drop table table_left; drop table table_right; +-- Test that Explicit Redistribute Motion is applied properly for +-- queries that have modifying operation inside a SubPlan. That +-- requires the ModifyTable's top Flow node to be copied correctly inside +-- ParallelizeSubPlan function. +--start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists t2; +NOTICE: table "t2" does not exist, skipping +--end_ignore +create table t1 (i int) distributed randomly; +create table t2 (i int) distributed by (i); +insert into t1 values (1); +insert into t2 values (1); +explain (costs off) +with cte as +(delete from t1 + using t2 where t2.i = t1.i + returning t1.i) +select i from t2 +where exists (select i from cte); + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice4; segments: 3) + -> Result + One-Time Filter: $1 + InitPlan 1 (returns $1) (slice5) + -> Limit + -> Gather Motion 3:1 (slice3; segments: 3) + -> Limit + -> Delete on t1 + -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) + -> Hash Join + Hash Cond: (t1.i = t2_1.i) + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: t1.i + -> Seq Scan on t1 + -> Hash + -> Seq Scan on t2 t2_1 + -> Seq Scan on t2 + Optimizer: Postgres query optimizer +(18 rows) + +with cte as +(delete from t1 + using t2 where t2.i = t1.i + returning t1.i) +select i from t2 +where exists (select i from cte); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 0 +(1 row) + +drop table t2; +drop table t1; diff --git a/src/test/regress/expected/subselect_gp_1.out b/src/test/regress/expected/subselect_gp_1.out index 16a7153b0b08..bac4922ee0a3 100644 --- a/src/test/regress/expected/subselect_gp_1.out +++ b/src/test/regress/expected/subselect_gp_1.out @@ -3316,3 +3316,65 @@ select * from table_left where exists (select 1 from table_right where l1 = r1); -- clean up drop table table_left; drop table table_right; +-- Test that Explicit Redistribute Motion is applied properly for +-- queries that have modifying operation inside a SubPlan. That +-- requires the ModifyTable's top Flow node to be copied correctly inside +-- ParallelizeSubPlan function. +--start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists t2; +NOTICE: table "t2" does not exist, skipping +--end_ignore +create table t1 (i int) distributed randomly; +create table t2 (i int) distributed by (i); +insert into t1 values (1); +insert into t2 values (1); +explain (costs off) +with cte as +(delete from t1 + using t2 where t2.i = t1.i + returning t1.i) +select i from t2 +where exists (select i from cte); + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice4; segments: 3) + -> Result + One-Time Filter: $1 + InitPlan 1 (returns $1) (slice5) + -> Limit + -> Gather Motion 3:1 (slice3; segments: 3) + -> Limit + -> Delete on t1 + -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) + -> Hash Join + Hash Cond: (t1.i = t2_1.i) + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: t1.i + -> Seq Scan on t1 + -> Hash + -> Seq Scan on t2 t2_1 + -> Seq Scan on t2 + Optimizer: Postgres query optimizer +(18 rows) + +with cte as +(delete from t1 + using t2 where t2.i = t1.i + returning t1.i) +select i from t2 +where exists (select i from cte); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 0 +(1 row) + +drop table t2; +drop table t1; diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index 9357f32f8c9c..9749fbd73c62 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -3457,3 +3457,65 @@ select * from table_left where exists (select 1 from table_right where l1 = r1); -- clean up drop table table_left; drop table table_right; +-- Test that Explicit Redistribute Motion is applied properly for +-- queries that have modifying operation inside a SubPlan. That +-- requires the ModifyTable's top Flow node to be copied correctly inside +-- ParallelizeSubPlan function. +--start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists t2; +NOTICE: table "t2" does not exist, skipping +--end_ignore +create table t1 (i int) distributed randomly; +create table t2 (i int) distributed by (i); +insert into t1 values (1); +insert into t2 values (1); +explain (costs off) +with cte as +(delete from t1 + using t2 where t2.i = t1.i + returning t1.i) +select i from t2 +where exists (select i from cte); + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice4; segments: 3) + -> Result + One-Time Filter: $1 + InitPlan 1 (returns $1) (slice5) + -> Limit + -> Gather Motion 3:1 (slice3; segments: 3) + -> Limit + -> Delete on t1 + -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) + -> Hash Join + Hash Cond: (t1.i = t2_1.i) + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: t1.i + -> Seq Scan on t1 + -> Hash + -> Seq Scan on t2 t2_1 + -> Seq Scan on t2 + Optimizer: Postgres query optimizer +(18 rows) + +with cte as +(delete from t1 + using t2 where t2.i = t1.i + returning t1.i) +select i from t2 +where exists (select i from cte); + i +--- + 1 +(1 row) + +select count(*) from t1; + count +------- + 0 +(1 row) + +drop table t2; +drop table t1; diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql index 6d66f65b2c1e..ce306cb8b16a 100644 --- a/src/test/regress/sql/subselect_gp.sql +++ b/src/test/regress/sql/subselect_gp.sql @@ -1276,3 +1276,36 @@ select * from table_left where exists (select 1 from table_right where l1 = r1); -- clean up drop table table_left; drop table table_right; + +-- Test that Explicit Redistribute Motion is applied properly for +-- queries that have modifying operation inside a SubPlan. That +-- requires the ModifyTable's top Flow node to be copied correctly inside +-- ParallelizeSubPlan function. +--start_ignore +drop table if exists t1; +drop table if exists t2; +--end_ignore +create table t1 (i int) distributed randomly; +create table t2 (i int) distributed by (i); +insert into t1 values (1); +insert into t2 values (1); + +explain (costs off) +with cte as +(delete from t1 + using t2 where t2.i = t1.i + returning t1.i) +select i from t2 +where exists (select i from cte); + +with cte as +(delete from t1 + using t2 where t2.i = t1.i + returning t1.i) +select i from t2 +where exists (select i from cte); + +select count(*) from t1; + +drop table t2; +drop table t1; From 120aaef599cf4cce1fac430b833a1b74b4a5d8bd Mon Sep 17 00:00:00 2001 From: Viktor Kurilko Date: Fri, 24 Nov 2023 20:04:24 +0300 Subject: [PATCH 093/106] Fix CTEs with volatile target list for case of SegmentGeneral and General locuses (#633) Plan nodes with the General and SegmentGeneral locus type should return the same result on each of the segments. But a volatile function does not guarantee the same result for different calls, and therefore if the target list of a plan node with General or SegmentGeneral locus type contains a volatile function, then the locus of such a plan node should be brought to SingleQE. In the current implementation, plan node with volatile function in target list is brought to SingleQE not for all subqueries. If volatile function is in target list of CTE, multiset functions and union/except/intersect then they can hide the volatile function in the underlying nodes and current implementation can't detect them. The volatile function can be executed on more than one segment and each of them will work with a different result set which is not correct. This patch fixes the error by moving the processing of volatile functions in the target list for plan nodes with the General and SegmentGeneral locus type to subquery_planner so that processing is performed for all subqueries. --- src/backend/optimizer/path/allpaths.c | 15 -- src/backend/optimizer/plan/planner.c | 18 +++ src/backend/optimizer/plan/subselect.c | 15 -- src/test/regress/expected/bfv_planner.out | 152 ++++++++++++++++++++ src/test/regress/expected/rpt.out | 124 ++++++++++++++++ src/test/regress/expected/rpt_optimizer.out | 124 ++++++++++++++++ src/test/regress/sql/bfv_planner.sql | 50 +++++++ src/test/regress/sql/rpt.sql | 48 +++++++ 8 files changed, 516 insertions(+), 30 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 4319d5973827..f43745159e48 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -1717,21 +1717,6 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, /* XXX rel->onerow = ??? */ } - if (rel->subplan->flow->locustype == CdbLocusType_General && - (contain_volatile_functions((Node *) rel->subplan->targetlist) || - contain_volatile_functions(subquery->havingQual))) - { - rel->subplan->flow->locustype = CdbLocusType_SingleQE; - rel->subplan->flow->flotype = FLOW_SINGLETON; - } - - if (rel->subplan->flow->locustype == CdbLocusType_SegmentGeneral && - (contain_volatile_functions((Node *) rel->subplan->targetlist) || - contain_volatile_functions(subquery->havingQual))) - { - rel->subplan = (Plan *) make_motion_gather(subroot, rel->subplan, NIL, CdbLocusType_SingleQE); - } - rel->subroot = subroot; /* Isolate the params needed by this specific subplan */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 215d43f7b60c..e0b998b9671f 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -923,6 +923,24 @@ subquery_planner(PlannerGlobal *glob, Query *parse, SS_finalize_plan(root, plan, true); } + /* + * If plan contains volatile functions in the target list, then we need + * bring it to SingleQE + */ + if (plan->flow->locustype == CdbLocusType_General && + (contain_volatile_functions((Node *) plan->targetlist) || + contain_volatile_functions(parse->havingQual))) + { + plan->flow->locustype = CdbLocusType_SingleQE; + plan->flow->flotype = FLOW_SINGLETON; + } + else if (plan->flow->locustype == CdbLocusType_SegmentGeneral && + (contain_volatile_functions((Node *) plan->targetlist) || + contain_volatile_functions(parse->havingQual))) + { + plan = (Plan *) make_motion_gather(root, plan, NIL, CdbLocusType_SingleQE); + } + /* Return internal info if caller wants it */ if (subroot) *subroot = root; diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index ceb711d5e93e..1d7253ec35d5 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -669,21 +669,6 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType, &subroot, config); - if (plan->flow->locustype == CdbLocusType_General && - (contain_volatile_functions((Node *) plan->targetlist) || - contain_volatile_functions(subquery->havingQual))) - { - plan->flow->locustype = CdbLocusType_SingleQE; - plan->flow->flotype = FLOW_SINGLETON; - } - - if (plan->flow->locustype == CdbLocusType_SegmentGeneral && - (contain_volatile_functions((Node *) plan->targetlist) || - contain_volatile_functions(subquery->havingQual))) - { - plan = (Plan *) make_motion_gather(subroot, plan, NIL, CdbLocusType_SingleQE); - } - /* Isolate the params needed by this specific subplan */ plan_params = root->plan_params; root->plan_params = NIL; diff --git a/src/test/regress/expected/bfv_planner.out b/src/test/regress/expected/bfv_planner.out index 6e4f499ae099..d648bcbab34c 100644 --- a/src/test/regress/expected/bfv_planner.out +++ b/src/test/regress/expected/bfv_planner.out @@ -566,6 +566,158 @@ explain (costs off) select * from t_hashdist cross join (select * from generate_ Optimizer: Postgres query optimizer (8 rows) +set gp_cte_sharing = on; +-- ensure that the volatile function is executed on one segment if it is in the CTE target list +explain (costs off, verbose) with cte as ( + select a * random() as a from generate_series(1, 5) a +) +select * from cte join (select * from t_hashdist join cte using(a)) b using(a); + QUERY PLAN +--------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice3; segments: 3) + Output: share0_ref1.a, b.b, b.c + -> Hash Join + Output: share0_ref1.a, b.b, b.c + Hash Cond: (b.a = share0_ref1.a) + -> Subquery Scan on b + Output: b.b, b.c, b.a + -> Hash Join + Output: share0_ref2.a, t_hashdist.b, t_hashdist.c + Hash Cond: ((t_hashdist.a)::double precision = share0_ref2.a) + -> Seq Scan on public.t_hashdist + Output: t_hashdist.b, t_hashdist.c, t_hashdist.a + -> Hash + Output: share0_ref2.a + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: share0_ref2.a + -> Shared Scan (share slice:id 1:0) + Output: share0_ref2.a + -> Hash + Output: share0_ref1.a + -> Broadcast Motion 1:3 (slice2; segments: 1) + Output: share0_ref1.a + -> Shared Scan (share slice:id 2:0) + Output: share0_ref1.a + -> Materialize + Output: (((a.a)::double precision * random())) + -> Function Scan on pg_catalog.generate_series a + Output: ((a.a)::double precision * random()) + Function Call: generate_series(1, 5) + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off, gp_cte_sharing=on, optimizer=off +(31 rows) + +set gp_cte_sharing = off; +explain (costs off, verbose) with cte as ( + select a, a * random() from generate_series(1, 5) a +) +select * from cte join t_hashdist using(a); + QUERY PLAN +----------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) + Output: a.a, (((a.a)::double precision * random())), t_hashdist.b, t_hashdist.c + -> Hash Join + Output: a.a, (((a.a)::double precision * random())), t_hashdist.b, t_hashdist.c + Hash Cond: (t_hashdist.a = a.a) + -> Seq Scan on public.t_hashdist + Output: t_hashdist.b, t_hashdist.c, t_hashdist.a + -> Hash + Output: a.a, (((a.a)::double precision * random())) + -> Redistribute Motion 1:3 (slice1; segments: 1) + Output: a.a, (((a.a)::double precision * random())) + Hash Key: a.a + -> Function Scan on pg_catalog.generate_series a + Output: a.a, ((a.a)::double precision * random()) + Function Call: generate_series(1, 5) + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off, gp_cte_sharing=off, optimizer=off +(17 rows) + +reset gp_cte_sharing; +-- ensure that the volatile function is executed on one segment if it is in the union target list +explain (costs off, verbose) select * from ( + select random() as a from generate_series(1, 5) + union + select random() as a from generate_series(1, 5) +) +a join t_hashdist on a.a = t_hashdist.a; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) + Output: (random()), t_hashdist.a, t_hashdist.b, t_hashdist.c + -> Hash Join + Output: (random()), t_hashdist.a, t_hashdist.b, t_hashdist.c + Hash Cond: ((t_hashdist.a)::double precision = (random())) + -> Seq Scan on public.t_hashdist + Output: t_hashdist.a, t_hashdist.b, t_hashdist.c + -> Hash + Output: (random()) + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: (random()) + -> HashAggregate + Output: (random()) + Group Key: (random()) + -> Append + -> Function Scan on pg_catalog.generate_series + Output: random() + Function Call: generate_series(1, 5) + -> Function Scan on pg_catalog.generate_series generate_series_1 + Output: random() + Function Call: generate_series(1, 5) + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off, optimizer=off +(23 rows) + +-- ensure that the volatile function is executed on one segment if it is in target list of subplan of multiset function +explain (costs off, verbose) select * from ( + SELECT count(*) as a FROM anytable_out( TABLE( SELECT random()::int from generate_series(1, 5) a ) ) +) a join t_hashdist using(a); + QUERY PLAN +----------------------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) + Output: (count(*)), t_hashdist.b, t_hashdist.c + -> Hash Join + Output: (count(*)), t_hashdist.b, t_hashdist.c + Hash Cond: (t_hashdist.a = (count(*))) + -> Seq Scan on public.t_hashdist + Output: t_hashdist.b, t_hashdist.c, t_hashdist.a + -> Hash + Output: (count(*)) + -> Redistribute Motion 1:3 (slice1; segments: 1) + Output: (count(*)) + Hash Key: (count(*)) + -> Aggregate + Output: count(*) + -> Table Function Scan on pg_catalog.anytable_out + Output: anytable_out + -> Function Scan on pg_catalog.generate_series a + Output: (random())::integer + Function Call: generate_series(1, 5) + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off, optimizer=off +(21 rows) + +-- if there is a volatile function in the target list of a plan with the locus type +-- General or Segment General, then such a plan should be executed on single +-- segment, since it is assumed that nodes with such locus types will give the same +-- result on all segments, which is impossible for a volatile function. +-- start_ignore +drop table if exists d; +-- end_ignore +create table d (b int, a int default 1) distributed by (b); +insert into d select * from generate_series(0, 20) j; +-- change distribution without reorganize +alter table d set distributed randomly; +with cte as ( + select a as a, a * random() as rand from generate_series(0, 3)a +) +select count(distinct(rand)) from cte join d on cte.a = d.a; + count +------- + 1 +(1 row) + +drop table d; -- CTAS on general locus into replicated table create temp SEQUENCE test_seq; explain (costs off) create table t_rep as select nextval('test_seq') from (select generate_series(1,10)) t1 distributed replicated; diff --git a/src/test/regress/expected/rpt.out b/src/test/regress/expected/rpt.out index b0a595e468df..d61092721457 100644 --- a/src/test/regress/expected/rpt.out +++ b/src/test/regress/expected/rpt.out @@ -1513,6 +1513,130 @@ explain (costs off, verbose) select * from t1 where 1 <= ALL (select i from t gr Settings: enable_bitmapscan=off, enable_seqscan=off (20 rows) +set gp_cte_sharing = on; +-- ensure that the volatile function is executed on one segment if it is in the CTE target list +explain (costs off, verbose) with cte as ( + select a * random() as a from t2 +) +select * from cte join (select * from t1 join cte using(a)) b using(a); + QUERY PLAN +--------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice4; segments: 3) + Output: share0_ref1.a + -> Hash Join + Output: share0_ref1.a + Hash Cond: (share0_ref2.a = share0_ref1.a) + -> Hash Join + Output: share0_ref2.a + Hash Cond: ((t1.a)::double precision = share0_ref2.a) + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: t1.a + Hash Key: (t1.a)::double precision + -> Seq Scan on rpt.t1 + Output: t1.a + -> Hash + Output: share0_ref2.a + -> Redistribute Motion 1:3 (slice2; segments: 1) + Output: share0_ref2.a + Hash Key: share0_ref2.a + -> Shared Scan (share slice:id 2:0) + Output: share0_ref2.a + -> Hash + Output: share0_ref1.a + -> Redistribute Motion 1:3 (slice3; segments: 1) + Output: share0_ref1.a + Hash Key: share0_ref1.a + -> Shared Scan (share slice:id 3:0) + Output: share0_ref1.a + -> Materialize + Output: (((t2.a)::double precision * random())) + -> Seq Scan on rpt.t2 + Output: ((t2.a)::double precision * random()) + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off, gp_cte_sharing=on, optimizer=off +(33 rows) + +set gp_cte_sharing = off; +explain (costs off, verbose) with cte as ( + select a, a * random() from t2 +) +select * from cte join t1 using(a); + QUERY PLAN +---------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) + Output: t2.a, (((t2.a)::double precision * random())) + -> Hash Join + Output: t2.a, (((t2.a)::double precision * random())) + Hash Cond: (t1.a = t2.a) + -> Seq Scan on rpt.t1 + Output: t1.a + -> Hash + Output: t2.a, (((t2.a)::double precision * random())) + -> Redistribute Motion 1:3 (slice1; segments: 1) + Output: t2.a, (((t2.a)::double precision * random())) + Hash Key: t2.a + -> Seq Scan on rpt.t2 + Output: t2.a, ((t2.a)::double precision * random()) + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off, gp_cte_sharing=off, optimizer=off +(16 rows) + +reset gp_cte_sharing; +-- ensure that the volatile function is executed on one segment if it is in target list of subplan of multiset function +explain (costs off, verbose) select * from ( + SELECT count(*) as a FROM anytable_out( TABLE( SELECT random()::int from t2 ) ) +) a join t1 using(a); + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + Output: (count(*)) + -> Hash Join + Output: (count(*)) + Hash Cond: (t1.a = (count(*))) + -> Seq Scan on rpt.t1 + Output: t1.a + -> Hash + Output: (count(*)) + -> Redistribute Motion 1:3 (slice1; segments: 1) + Output: (count(*)) + Hash Key: (count(*)) + -> Aggregate + Output: count(*) + -> Table Function Scan on pg_catalog.anytable_out + Output: anytable_out + -> Seq Scan on rpt.t2 + Output: (random())::integer + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off, optimizer=off +(20 rows) + +-- if there is a volatile function in the target list of a plan with the locus type +-- General or Segment General, then such a plan should be executed on single +-- segment, since it is assumed that nodes with such locus types will give the same +-- result on all segments, which is impossible for a volatile function. +-- start_ignore +drop table if exists d; +NOTICE: table "d" does not exist, skipping +drop table if exists r; +NOTICE: table "r" does not exist, skipping +-- end_ignore +create table r (a int, b int) distributed replicated; +create table d (b int, a int default 1) distributed by (b); +insert into d select * from generate_series(0, 20) j; +-- change distribution without reorganize +alter table d set distributed randomly; +insert into r values (1, 1), (2, 2), (3, 3); +with cte as ( + select a, b * random() as rand from r +) +select count(distinct(rand)) from cte join d on cte.a = d.a; + count +------- + 1 +(1 row) + +drop table r; +drop table d; drop table if exists t; drop table if exists t1; drop table if exists t2; diff --git a/src/test/regress/expected/rpt_optimizer.out b/src/test/regress/expected/rpt_optimizer.out index 490a94274753..36e4d859430d 100644 --- a/src/test/regress/expected/rpt_optimizer.out +++ b/src/test/regress/expected/rpt_optimizer.out @@ -1513,6 +1513,130 @@ explain (costs off, verbose) select * from t1 where 1 <= ALL (select i from t gr Settings: enable_bitmapscan=off, enable_seqscan=off (20 rows) +set gp_cte_sharing = on; +-- ensure that the volatile function is executed on one segment if it is in the CTE target list +explain (costs off, verbose) with cte as ( + select a * random() as a from t2 +) +select * from cte join (select * from t1 join cte using(a)) b using(a); + QUERY PLAN +------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice4; segments: 3) + Output: share0_ref1.a + -> Hash Join + Output: share0_ref1.a + Hash Cond: (share0_ref2.a = share0_ref1.a) + -> Hash Join + Output: share0_ref2.a + Hash Cond: ((t1.a)::double precision = share0_ref2.a) + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: t1.a + Hash Key: (t1.a)::double precision + -> Seq Scan on rpt.t1 + Output: t1.a + -> Hash + Output: share0_ref2.a + -> Redistribute Motion 1:3 (slice2; segments: 1) + Output: share0_ref2.a + Hash Key: share0_ref2.a + -> Shared Scan (share slice:id 2:0) + Output: share0_ref2.a + -> Hash + Output: share0_ref1.a + -> Redistribute Motion 1:3 (slice3; segments: 1) + Output: share0_ref1.a + Hash Key: share0_ref1.a + -> Shared Scan (share slice:id 3:0) + Output: share0_ref1.a + -> Materialize + Output: (((t2.a)::double precision * random())) + -> Seq Scan on rpt.t2 + Output: ((t2.a)::double precision * random()) + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off, gp_cte_sharing=on +(33 rows) + +set gp_cte_sharing = off; +explain (costs off, verbose) with cte as ( + select a, a * random() from t2 +) +select * from cte join t1 using(a); + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) + Output: t2.a, (((t2.a)::double precision * random())) + -> Hash Join + Output: t2.a, (((t2.a)::double precision * random())) + Hash Cond: (t1.a = t2.a) + -> Seq Scan on rpt.t1 + Output: t1.a + -> Hash + Output: t2.a, (((t2.a)::double precision * random())) + -> Redistribute Motion 1:3 (slice1; segments: 1) + Output: t2.a, (((t2.a)::double precision * random())) + Hash Key: t2.a + -> Seq Scan on rpt.t2 + Output: t2.a, ((t2.a)::double precision * random()) + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off, gp_cte_sharing=off +(16 rows) + +reset gp_cte_sharing; +-- ensure that the volatile function is executed on one segment if it is in target list of subplan of multiset function +explain (costs off, verbose) select * from ( + SELECT count(*) as a FROM anytable_out( TABLE( SELECT random()::int from t2 ) ) +) a join t1 using(a); + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + Output: (count(*)) + -> Hash Join + Output: (count(*)) + Hash Cond: (t1.a = (count(*))) + -> Seq Scan on rpt.t1 + Output: t1.a + -> Hash + Output: (count(*)) + -> Redistribute Motion 1:3 (slice1; segments: 1) + Output: (count(*)) + Hash Key: (count(*)) + -> Aggregate + Output: count(*) + -> Table Function Scan on pg_catalog.anytable_out + Output: anytable_out + -> Seq Scan on rpt.t2 + Output: (random())::integer + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(20 rows) + +-- if there is a volatile function in the target list of a plan with the locus type +-- General or Segment General, then such a plan should be executed on single +-- segment, since it is assumed that nodes with such locus types will give the same +-- result on all segments, which is impossible for a volatile function. +-- start_ignore +drop table if exists d; +NOTICE: table "d" does not exist, skipping +drop table if exists r; +NOTICE: table "r" does not exist, skipping +-- end_ignore +create table r (a int, b int) distributed replicated; +create table d (b int, a int default 1) distributed by (b); +insert into d select * from generate_series(0, 20) j; +-- change distribution without reorganize +alter table d set distributed randomly; +insert into r values (1, 1), (2, 2), (3, 3); +with cte as ( + select a, b * random() as rand from r +) +select count(distinct(rand)) from cte join d on cte.a = d.a; + count +------- + 1 +(1 row) + +drop table r; +drop table d; drop table if exists t; drop table if exists t1; drop table if exists t2; diff --git a/src/test/regress/sql/bfv_planner.sql b/src/test/regress/sql/bfv_planner.sql index c0a34800a9b8..c0fbdd03d84f 100644 --- a/src/test/regress/sql/bfv_planner.sql +++ b/src/test/regress/sql/bfv_planner.sql @@ -316,6 +316,56 @@ explain (costs off) select * from t_hashdist cross join (select a, count(1) as s -- limit explain (costs off) select * from t_hashdist cross join (select * from generate_series(1, 10) limit 1) x; +set gp_cte_sharing = on; + +-- ensure that the volatile function is executed on one segment if it is in the CTE target list +explain (costs off, verbose) with cte as ( + select a * random() as a from generate_series(1, 5) a +) +select * from cte join (select * from t_hashdist join cte using(a)) b using(a); + +set gp_cte_sharing = off; + +explain (costs off, verbose) with cte as ( + select a, a * random() from generate_series(1, 5) a +) +select * from cte join t_hashdist using(a); + +reset gp_cte_sharing; + +-- ensure that the volatile function is executed on one segment if it is in the union target list +explain (costs off, verbose) select * from ( + select random() as a from generate_series(1, 5) + union + select random() as a from generate_series(1, 5) +) +a join t_hashdist on a.a = t_hashdist.a; + +-- ensure that the volatile function is executed on one segment if it is in target list of subplan of multiset function +explain (costs off, verbose) select * from ( + SELECT count(*) as a FROM anytable_out( TABLE( SELECT random()::int from generate_series(1, 5) a ) ) +) a join t_hashdist using(a); + +-- if there is a volatile function in the target list of a plan with the locus type +-- General or Segment General, then such a plan should be executed on single +-- segment, since it is assumed that nodes with such locus types will give the same +-- result on all segments, which is impossible for a volatile function. +-- start_ignore +drop table if exists d; +-- end_ignore +create table d (b int, a int default 1) distributed by (b); + +insert into d select * from generate_series(0, 20) j; +-- change distribution without reorganize +alter table d set distributed randomly; + +with cte as ( + select a as a, a * random() as rand from generate_series(0, 3)a +) +select count(distinct(rand)) from cte join d on cte.a = d.a; + +drop table d; + -- CTAS on general locus into replicated table create temp SEQUENCE test_seq; explain (costs off) create table t_rep as select nextval('test_seq') from (select generate_series(1,10)) t1 distributed replicated; diff --git a/src/test/regress/sql/rpt.sql b/src/test/regress/sql/rpt.sql index 1e1b479d286c..adc1aa4ece6f 100644 --- a/src/test/regress/sql/rpt.sql +++ b/src/test/regress/sql/rpt.sql @@ -562,6 +562,54 @@ explain (costs off, verbose) insert into t2 (a, b) select i, random() from t; explain (costs off, verbose) select * from t1 where a in (select f(i) from t where i=a and f(i) > 0); -- ensure we do not break broadcast motion explain (costs off, verbose) select * from t1 where 1 <= ALL (select i from t group by i having random() > 0); + +set gp_cte_sharing = on; + +-- ensure that the volatile function is executed on one segment if it is in the CTE target list +explain (costs off, verbose) with cte as ( + select a * random() as a from t2 +) +select * from cte join (select * from t1 join cte using(a)) b using(a); + +set gp_cte_sharing = off; + +explain (costs off, verbose) with cte as ( + select a, a * random() from t2 +) +select * from cte join t1 using(a); + +reset gp_cte_sharing; + +-- ensure that the volatile function is executed on one segment if it is in target list of subplan of multiset function +explain (costs off, verbose) select * from ( + SELECT count(*) as a FROM anytable_out( TABLE( SELECT random()::int from t2 ) ) +) a join t1 using(a); + +-- if there is a volatile function in the target list of a plan with the locus type +-- General or Segment General, then such a plan should be executed on single +-- segment, since it is assumed that nodes with such locus types will give the same +-- result on all segments, which is impossible for a volatile function. +-- start_ignore +drop table if exists d; +drop table if exists r; +-- end_ignore +create table r (a int, b int) distributed replicated; +create table d (b int, a int default 1) distributed by (b); + +insert into d select * from generate_series(0, 20) j; +-- change distribution without reorganize +alter table d set distributed randomly; + +insert into r values (1, 1), (2, 2), (3, 3); + +with cte as ( + select a, b * random() as rand from r +) +select count(distinct(rand)) from cte join d on cte.a = d.a; + +drop table r; +drop table d; + drop table if exists t; drop table if exists t1; drop table if exists t2; From f5215ad206d5328ff94a27376490a1abe62677bd Mon Sep 17 00:00:00 2001 From: Alexey Gordeev Date: Tue, 28 Nov 2023 17:49:24 +0500 Subject: [PATCH 094/106] Fix ORCA's triggers on debug build (#650) An attempt to call DML over a table with trigger may give an assertion error in `CFunctionProp` constructor. `func_stability` passed to the constructor may be more than max value (`IMDFunction::EfsSentinel`). This happens, because `CLogicalRowTrigger` class has no default value for `m_efs` in its constructor. This means that `m_efs` may contain any value (for example 2139062143). Later, when `InitFunctionProperties()` tries to calculate less strict stability level, `m_efs` is not overwritten, because it already has greater value. The patch fixes the error by adding default `m_efs` value to constructor. The planing may fall on any type of trigger on any DML operaion, so you may test it with any trigger and `optimizer_enable_dml_triggers` set to enabled. --- .../gporca/libgpopt/src/operators/CLogicalRowTrigger.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/backend/gporca/libgpopt/src/operators/CLogicalRowTrigger.cpp b/src/backend/gporca/libgpopt/src/operators/CLogicalRowTrigger.cpp index 01cb44d1a59b..7793a9816527 100644 --- a/src/backend/gporca/libgpopt/src/operators/CLogicalRowTrigger.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CLogicalRowTrigger.cpp @@ -33,7 +33,8 @@ CLogicalRowTrigger::CLogicalRowTrigger(CMemoryPool *mp) m_rel_mdid(NULL), m_type(0), m_pdrgpcrOld(NULL), - m_pdrgpcrNew(NULL) + m_pdrgpcrNew(NULL), + m_efs(IMDFunction::EfsImmutable) { m_fPattern = true; } @@ -53,7 +54,8 @@ CLogicalRowTrigger::CLogicalRowTrigger(CMemoryPool *mp, IMDId *rel_mdid, m_rel_mdid(rel_mdid), m_type(type), m_pdrgpcrOld(pdrgpcrOld), - m_pdrgpcrNew(pdrgpcrNew) + m_pdrgpcrNew(pdrgpcrNew), + m_efs(IMDFunction::EfsImmutable) { GPOS_ASSERT(rel_mdid->IsValid()); GPOS_ASSERT(0 != type); From 32ad64effda0e108ad07254c6dc7f0361965e6a0 Mon Sep 17 00:00:00 2001 From: Xing Guo Date: Thu, 28 Oct 2021 12:11:13 +0800 Subject: [PATCH 095/106] Fix 'DROP OWNED BY' failure when some protocol is accessible by the user. Cherry picked from 3f71222 This PR helps resolve #12748. When the access privilege is granted to some user, the 'DROP OWNED BY' clause cannot be executed with an error message saying: 'ERROR: unexpected object class XXXX (aclchk.c:XXX)'. Simple SQL to reproduce: ```sql -- Create read, write functions. CREATE OR REPLACE FUNCTION dummy_read() RETURNS INTEGER AS $$ SELECT 0 $$ LANGUAGE SQL; CREATE OR REPLACE FUNCTION dummy_write() RETURNS INTEGER AS $$ SELECT 0 $$ LANGUAGE SQL; -- Create protocol. CREATE TRUSTED PROTOCOL dummy_proto ( readfunc = dummy_read, writefunc=dummy_write ); -- Create a user. CREATE ROLE test_role1; -- Grant access privilege. GRANT ALL ON PROTOCOL dummy_proto TO test_role1; DROP OWNED BY test_role1; ``` --- src/backend/catalog/aclchk.c | 3 +++ src/test/regress/input/external_table.source | 10 ++++++++++ src/test/regress/output/external_table.source | 10 ++++++++++ 3 files changed, 23 insertions(+) diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index 6a3c8b20a4a1..736e379d04ec 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -1580,6 +1580,9 @@ RemoveRoleFromObjectACL(Oid roleid, Oid classid, Oid objid) case ForeignDataWrapperRelationId: istmt.objtype = ACL_OBJECT_FDW; break; + case ExtprotocolRelationId: + istmt.objtype = ACL_OBJECT_EXTPROTOCOL; + break; default: elog(ERROR, "unexpected object class %u", classid); break; diff --git a/src/test/regress/input/external_table.source b/src/test/regress/input/external_table.source index 757be9c1ef4d..3724d4d899f8 100644 --- a/src/test/regress/input/external_table.source +++ b/src/test/regress/input/external_table.source @@ -293,6 +293,16 @@ SELECT encoding from gp_dist_random('pg_exttable') where urilocation='{gpfdist:/ DROP EXTERNAL TABLE issue_9727; RESET client_encoding; +-- Test "DROP OWNED BY" when everything of the protocol is granted to some user. +-- GitHub Issue #12748: https://github.com/greenplum-db/gpdb/issues/12748 +CREATE TRUSTED PROTOCOL dummy_protocol_issue_12748 (readfunc = 'read_from_file', writefunc = 'write_to_file'); +CREATE ROLE test_role_issue_12748; +GRANT ALL ON PROTOCOL dummy_protocol_issue_12748 TO test_role_issue_12748; +DROP OWNED BY test_role_issue_12748; +-- Clean up. +DROP ROLE test_role_issue_12748; +DROP PROTOCOL dummy_protocol_issue_12748; + -- -- WET tests -- diff --git a/src/test/regress/output/external_table.source b/src/test/regress/output/external_table.source index 9d78d6a30468..71b91bcf95f0 100644 --- a/src/test/regress/output/external_table.source +++ b/src/test/regress/output/external_table.source @@ -394,6 +394,16 @@ SELECT encoding from gp_dist_random('pg_exttable') where urilocation='{gpfdist:/ DROP EXTERNAL TABLE issue_9727; RESET client_encoding; +-- Test "DROP OWNED BY" when everything of the protocol is granted to some user. +-- GitHub Issue #12748: https://github.com/greenplum-db/gpdb/issues/12748 +CREATE TRUSTED PROTOCOL dummy_protocol_issue_12748 (readfunc = 'read_from_file', writefunc = 'write_to_file'); +CREATE ROLE test_role_issue_12748; +NOTICE: resource queue required -- using default resource queue "pg_default" +GRANT ALL ON PROTOCOL dummy_protocol_issue_12748 TO test_role_issue_12748; +DROP OWNED BY test_role_issue_12748; +-- Clean up. +DROP ROLE test_role_issue_12748; +DROP PROTOCOL dummy_protocol_issue_12748; -- -- WET tests -- From ebd310c0854b8767e3b760b7f42124c167f6084b Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Wed, 20 Sep 2023 13:50:57 +0300 Subject: [PATCH 096/106] Make InitPlans non-rescannable during estate initialization (#595) The executor enabled the EXEC_FLAG_REWIND flag for all types of SubPlans: either for InitPlans or correlated/uncorrelated SubPlans. This flag represents that the rescan is expected and is used during initialization of executor state. However, if a query had InitPlans, which contained non-rescannable nodes (like Split Update in the tests), the executor failed with assertion error (for example, when calling ExecInitSplitUpdate when initializing executor state inside the ExecInitNode for the InitPlan). Because InitPlans are essentially executed only once, there is no need to expect a rescan of the InitPlan. Therefore, in order to support non-rescannable operations inside the InitPlans, this patch disables EXEC_FLAG_REWIND flag for InitPlans. This patch partially returns vanilla postgres logic, which used plannedstmt->rewindPlanIDs bitmapset for making a decision whether current SubPlan should be executed with EXEC_REWIND flag. This bitmapset used to be filled with the ids of such SubPlans, that could optimize the rescan operation if the EXEC_REWIND is set, like parameterless subplans. Other types of SubPlans were considered rescannable by default and there were no need to set the EXEC_REWIND flag for them. However, GPDB interprets the EXEC_REWIND flag as an indicator that the node is likely to be rescanned, and also used this flag to delay the eager free. Therefore, this patch proposes to fill plannedstmt->rewindPlanIDs set with all the subplans ids, except InitPlans, and to set EXEC_REWIND flag only for those subplans that are in the rewindPlanIDs bitmapset. As for legacy optimizer, the if-clause influencing the filling of the bitmapset is changed inside the build_subplan function in order to filter out any InitPlans. As for ORCA optimizer, rewindPlanIDs was not previously used, and this patch adds a bunch of logic to fill this bitmapset with subplan ids. This patch extends existing SetInitPlanVariables function and renames it to SetSubPlanVariables. This function has originally been setting nInitPlans and nParamExec in PlannedStmt, and also has been setting qDispSliceId for each InitPlan, that is found during plan tree traversal. This patch extends this behaviour and additionally fills the rewindPlanIDs bitmapset for each SubPlan found, execept InitPlans. At executor side, the condition checking whether the SubPlan is in the planned_stmt->rewindPlanIDs is added to the InitPlan function. From that point, SubPlans will be initialized with EXEC_REWIND flag only if they are not InitPlans. Ticket: ADBDEV-4059 Cherry-picked from: d0a5bc0 --- src/backend/executor/execMain.c | 6 ++- .../translate/CTranslatorDXLToPlStmt.cpp | 17 ++++-- src/backend/optimizer/plan/subselect.c | 11 ++-- .../gpopt/translate/CTranslatorDXLToPlStmt.h | 4 +- src/test/regress/expected/subselect_gp.out | 54 +++++++++++++++++++ src/test/regress/expected/subselect_gp_1.out | 54 +++++++++++++++++++ .../expected/subselect_gp_optimizer.out | 54 +++++++++++++++++++ src/test/regress/sql/subselect_gp.sql | 31 +++++++++++ 8 files changed, 218 insertions(+), 13 deletions(-) diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index bd644e9f5f2a..348b41bb7927 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -2264,11 +2264,13 @@ InitPlan(QueryDesc *queryDesc, int eflags) /* * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. * - * GPDB: We always set the REWIND flag, to delay eagerfree. + * GPDB: We always set the REWIND flag, except InitPlans, + * to delay eagerfree. */ sp_eflags = eflags & (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA); - sp_eflags |= EXEC_FLAG_REWIND; + if (bms_is_member(subplan_idx + 1, plannedstmt->rewindPlanIDs)) + sp_eflags |= EXEC_FLAG_REWIND; Plan *subplan = (Plan *) lfirst(l); subplanstate = ExecInitNode(subplan, estate, sp_eflags); diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp index cc916ac657aa..6d3ee370bcce 100644 --- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp +++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp @@ -280,7 +280,7 @@ CTranslatorDXLToPlStmt::GetPlannedStmtFromDXL(const CDXLNode *dxlnode, // pplstmt->intoClause = m_pctxdxltoplstmt->Pintocl(); planned_stmt->intoPolicy = m_dxl_to_plstmt_context->GetDistributionPolicy(); - SetInitPlanVariables(planned_stmt); + SetSubPlanVariables(planned_stmt); if (CMD_SELECT == m_cmd_type && NULL != dxlnode->GetDXLDirectDispatchInfo()) { @@ -345,14 +345,16 @@ CTranslatorDXLToPlStmt::TranslateDXLOperatorToPlan( //--------------------------------------------------------------------------- // @function: -// CTranslatorDXLToPlStmt::SetInitPlanVariables +// CTranslatorDXLToPlStmt::SetSubPlanVariables // // @doc: // Iterates over the plan to set the qDispSliceId that is found in the plan // as well as its subplans. Set the number of parameters used in the plan. +// Simultaneously fills rewindPlanIDs bitmapset in PlannedStmt with plan_id +// of all SubPlans, except InitPlans. //--------------------------------------------------------------------------- void -CTranslatorDXLToPlStmt::SetInitPlanVariables(PlannedStmt *planned_stmt) +CTranslatorDXLToPlStmt::SetSubPlanVariables(PlannedStmt *planned_stmt) { if (1 != m_dxl_to_plstmt_context @@ -369,6 +371,9 @@ CTranslatorDXLToPlStmt::SetInitPlanVariables(PlannedStmt *planned_stmt) List *subplan_list = gpdb::ExtractNodesPlan(planned_stmt->planTree, T_SubPlan, true); + // set of plan_ids of any SubPlan except InitPLan + Bitmapset *planIds = NULL; + ListCell *lc = NULL; ForEach(lc, subplan_list) @@ -378,6 +383,8 @@ CTranslatorDXLToPlStmt::SetInitPlanVariables(PlannedStmt *planned_stmt) { SetInitPlanSliceInformation(subplan); } + else + planIds = gpdb::BmsAddMember(planIds, subplan->plan_id); } // InitPlans can also be defined in subplans. We therefore have to iterate @@ -398,8 +405,12 @@ CTranslatorDXLToPlStmt::SetInitPlanVariables(PlannedStmt *planned_stmt) { SetInitPlanSliceInformation(subplan); } + else + planIds = gpdb::BmsAddMember(planIds, subplan->plan_id); } } + + planned_stmt->rewindPlanIDs = planIds; } //--------------------------------------------------------------------------- diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index ee82690d4f09..15a585695a3b 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -979,13 +979,12 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, root->init_plans = lappend(root->init_plans, splan); /* - * A parameterless subplan (not initplan) should be prepared to handle - * REWIND efficiently. If it has direct parameters then there's no point - * since it'll be reset on each scan anyway; and if it's an initplan then - * there's no point since it won't get re-run without parameter changes - * anyway. The input of a hashed subplan doesn't need REWIND either. + * Executor passes EXEC_REWIND flag to the plan nodes in order to indicate + * that underlying node or subplan are likely to be rescanned. Moreover, + * for any SubPlan, except InitPlans, rescan is expected and EXEC_REWIND + * should be set for them. EXEC_REWIND also allows to delay the eager free. */ - if (splan->parParam == NIL && !splan->is_initplan && !splan->useHashTable) + if (!splan->is_initplan) root->glob->rewindPlanIDs = bms_add_member(root->glob->rewindPlanIDs, splan->plan_id); diff --git a/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h b/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h index 26bb3cb7ce44..279ff83c706a 100644 --- a/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h +++ b/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h @@ -194,8 +194,8 @@ class CTranslatorDXLToPlStmt // Set the qDispSliceId in the subplans defining an initplan void SetInitPlanSliceInformation(SubPlan *); - // Set InitPlanVariable in PlannedStmt - void SetInitPlanVariables(PlannedStmt *); + // Set InitPlanVariable and fill rewindPlanIDs in PlannedStmt + void SetSubPlanVariables(PlannedStmt *); // translate DXL table scan node into a SeqScan node Plan *TranslateDXLTblScan( diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index 497ebba10f95..89a502656065 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -3378,3 +3378,57 @@ select count(*) from t1; drop table t2; drop table t1; +-- Test that executor does not treat InitPlans as rescannable +-- while initializing executor state. Otherwise, for InitPlan containing +-- non-rescannable operations (like Split Update node) executor may +-- fail with an assertion error. +-- start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists t2; +NOTICE: table "t2" does not exist, skipping +--end_ignore +create table t1(i int) distributed by (i); +create table t2(i int) distributed by (i); +insert into t1 values (1); +insert into t2 values (1); +explain (costs off) +with cte as +(update t1 set i = 0 + returning i) +select i from t2 +where 0 = (select i from cte); + QUERY PLAN +-------------------------------------------------------------------------- + Gather Motion 3:1 (slice3; segments: 3) + -> Result + One-Time Filter: (0 = $1) + InitPlan 1 (returns $1) (slice4) + -> Gather Motion 3:1 (slice2; segments: 3) + -> Update on t1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: "outer".i + -> Split + -> Seq Scan on t1 + -> Seq Scan on t2 + Optimizer: Postgres query optimizer +(12 rows) + +with cte as +(update t1 set i = 0 + returning i) +select i from t2 +where 0 = (select i from cte); + i +--- + 1 +(1 row) + +select * from t1; + i +--- + 0 +(1 row) + +drop table t2; +drop table t1; diff --git a/src/test/regress/expected/subselect_gp_1.out b/src/test/regress/expected/subselect_gp_1.out index bac4922ee0a3..6c5d6dfe280a 100644 --- a/src/test/regress/expected/subselect_gp_1.out +++ b/src/test/regress/expected/subselect_gp_1.out @@ -3378,3 +3378,57 @@ select count(*) from t1; drop table t2; drop table t1; +-- Test that executor does not treat InitPlans as rescannable +-- while initializing executor state. Otherwise, for InitPlan containing +-- non-rescannable operations (like Split Update node) executor may +-- fail with an assertion error. +-- start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists t2; +NOTICE: table "t2" does not exist, skipping +--end_ignore +create table t1(i int) distributed by (i); +create table t2(i int) distributed by (i); +insert into t1 values (1); +insert into t2 values (1); +explain (costs off) +with cte as +(update t1 set i = 0 + returning i) +select i from t2 +where 0 = (select i from cte); + QUERY PLAN +-------------------------------------------------------------------------- + Gather Motion 3:1 (slice3; segments: 3) + -> Result + One-Time Filter: (0 = $1) + InitPlan 1 (returns $1) (slice4) + -> Gather Motion 3:1 (slice2; segments: 3) + -> Update on t1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: "outer".i + -> Split + -> Seq Scan on t1 + -> Seq Scan on t2 + Optimizer: Postgres query optimizer +(12 rows) + +with cte as +(update t1 set i = 0 + returning i) +select i from t2 +where 0 = (select i from cte); + i +--- + 1 +(1 row) + +select * from t1; + i +--- + 0 +(1 row) + +drop table t2; +drop table t1; diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index 9749fbd73c62..dbc6056272aa 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -3519,3 +3519,57 @@ select count(*) from t1; drop table t2; drop table t1; +-- Test that executor does not treat InitPlans as rescannable +-- while initializing executor state. Otherwise, for InitPlan containing +-- non-rescannable operations (like Split Update node) executor may +-- fail with an assertion error. +-- start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists t2; +NOTICE: table "t2" does not exist, skipping +--end_ignore +create table t1(i int) distributed by (i); +create table t2(i int) distributed by (i); +insert into t1 values (1); +insert into t2 values (1); +explain (costs off) +with cte as +(update t1 set i = 0 + returning i) +select i from t2 +where 0 = (select i from cte); + QUERY PLAN +-------------------------------------------------------------------------- + Gather Motion 3:1 (slice3; segments: 3) + -> Result + One-Time Filter: (0 = $1) + InitPlan 1 (returns $1) (slice4) + -> Gather Motion 3:1 (slice2; segments: 3) + -> Update on t1 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: "outer".i + -> Split + -> Seq Scan on t1 + -> Seq Scan on t2 + Optimizer: Postgres query optimizer +(12 rows) + +with cte as +(update t1 set i = 0 + returning i) +select i from t2 +where 0 = (select i from cte); + i +--- + 1 +(1 row) + +select * from t1; + i +--- + 0 +(1 row) + +drop table t2; +drop table t1; diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql index ce306cb8b16a..a47c6915e49d 100644 --- a/src/test/regress/sql/subselect_gp.sql +++ b/src/test/regress/sql/subselect_gp.sql @@ -1309,3 +1309,34 @@ select count(*) from t1; drop table t2; drop table t1; + +-- Test that executor does not treat InitPlans as rescannable +-- while initializing executor state. Otherwise, for InitPlan containing +-- non-rescannable operations (like Split Update node) executor may +-- fail with an assertion error. +-- start_ignore +drop table if exists t1; +drop table if exists t2; +--end_ignore +create table t1(i int) distributed by (i); +create table t2(i int) distributed by (i); +insert into t1 values (1); +insert into t2 values (1); + +explain (costs off) +with cte as +(update t1 set i = 0 + returning i) +select i from t2 +where 0 = (select i from cte); + +with cte as +(update t1 set i = 0 + returning i) +select i from t2 +where 0 = (select i from cte); + +select * from t1; + +drop table t2; +drop table t1; From 611ff8a9ba533dc354bc022d3131a8edeef844d8 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 12 Sep 2023 18:00:21 +0300 Subject: [PATCH 097/106] Avoid rescan of modifying operations inside correlated Subplans. When a query had a modifying command inside the correlated SubPlan, the ModifyTable node could be rescanned for each outer tuple. That lead to execution errors (rescan of specific nodes is not supported). This happened because the ParallelizeCorrelatedSubplanMutator function did not expect the ModifyTable node inside the correlated SubPlans. This patch adds the support of the ModifyTable nodes for correlated SubPlans. Currently, ModifyTable node can get into the SubPlan only as the part of CTE query, therefore, it can either be wrapped in the SubqueryScan node or be standalone, depending on the SubqueryScan being trivial or not. This patch affects ParallelizeCorrelatedSubplanMutator function. The patch extends the if-clause dedicated to the choice of plan nodes, that need to be broadcasted or focused, and then materialized. The specific conditions related to modifying operations were added. These conditions checks whether current node is the SubqueryScan with ModifyTable just under it or current node is a standalone ModifyTable. If condition is satisfied, node is then processed the same way as any other Scan-type nodes. Next, the result of ModifyTable is either broadcasted or focused depending on the target flow type. Then the result is materialized in order to avoid rescan of underlying nodes. Cherry-picked-from: c164546 --- src/backend/cdb/cdbllize.c | 11 +- src/test/regress/expected/subselect_gp.out | 232 ++++++++++++++++++ src/test/regress/expected/subselect_gp_1.out | 232 ++++++++++++++++++ .../expected/subselect_gp_optimizer.out | 232 ++++++++++++++++++ src/test/regress/sql/subselect_gp.sql | 104 ++++++++ 5 files changed, 810 insertions(+), 1 deletion(-) diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index 7e61cc10e4ca..9212152fe21e 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -455,9 +455,18 @@ ParallelizeCorrelatedSubPlanMutator(Node *node, ParallelizeCorrelatedPlanWalkerC } } + /* + * If the ModifyTable node appears inside the correlated Subplan, it has + * to be handled the same way as various *Scan nodes. Currently such + * situation may occur only for modifying CTE cases, and, therefore, + * mutator shouldn't go under ModifyTable's plans and should broadcast or + * focus the result of modifying operation if needed. + */ if (IsA(node, SeqScan) ||IsA(node, ShareInputScan) - ||IsA(node, ExternalScan)) + ||IsA(node, ExternalScan) + ||(IsA(node, SubqueryScan) && IsA(((SubqueryScan *) node)->subplan, ModifyTable)) + ||IsA(node,ModifyTable)) { Plan *scanPlan = (Plan *) node; diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index 89a502656065..2109d91f2009 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -3432,3 +3432,235 @@ select * from t1; drop table t2; drop table t1; +-- Test correlated SubPlans containing writable operation are +-- planned and executed correctly. The result of modifying operations +-- should be broadcasted (or focused) and materialized. +-- start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists t2; +NOTICE: table "t2" does not exist, skipping +--end_ignore +create table t1(i int) distributed by (i); +create table t2(i int) distributed by (i); +insert into t2 values (1), (2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice3; segments: 3) + Merge Key: t2.i + -> Sort + Sort Key: t2.i + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Result + Filter: (t2.i = cte.i) + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Subquery Scan on cte + Filter: (cte.i > 0) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(18 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 5 +(1 row) + +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +--end_ignore +create table t_repl (i int) distributed replicated; +insert into t_repl values (1), (2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice3; segments: 1) + -> Sort + Sort Key: t_repl.i + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 1) + -> Result + Filter: (t_repl.i = cte.i) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Subquery Scan on cte + Filter: (cte.i > 0) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(17 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 10 +(1 row) + +--start_ignore +drop table if exists t3; +NOTICE: table "t3" does not exist, skipping +--end_ignore +create table t3 (i int, j int) distributed randomly; +insert into t3 values (1, 1), (2, 2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice4; segments: 3) + Merge Key: t2.i + -> Sort + Sort Key: t2.i + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice4; segments: 3) + -> Hash Join + Hash Cond: (t1.i = t3.i) + -> Result + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + -> Hash + -> Result + Filter: (t3.j = t2.i) + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on t3 + Optimizer: Postgres query optimizer +(23 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 15 +(1 row) + +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice4; segments: 1) + -> Sort + Sort Key: t_repl.i + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice4; segments: 3) + -> Hash Join + Hash Cond: (t1.i = t3.i) + -> Result + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + -> Hash + -> Result + Filter: (t3.j = t_repl.i) + -> Materialize + -> Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on t3 + Optimizer: Postgres query optimizer +(22 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 20 +(1 row) + +drop table t3; +drop table t_repl; +drop table t2; +drop table t1; diff --git a/src/test/regress/expected/subselect_gp_1.out b/src/test/regress/expected/subselect_gp_1.out index 6c5d6dfe280a..a84cf6c0b411 100644 --- a/src/test/regress/expected/subselect_gp_1.out +++ b/src/test/regress/expected/subselect_gp_1.out @@ -3432,3 +3432,235 @@ select * from t1; drop table t2; drop table t1; +-- Test correlated SubPlans containing writable operation are +-- planned and executed correctly. The result of modifying operations +-- should be broadcasted (or focused) and materialized. +-- start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists t2; +NOTICE: table "t2" does not exist, skipping +--end_ignore +create table t1(i int) distributed by (i); +create table t2(i int) distributed by (i); +insert into t2 values (1), (2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice3; segments: 3) + Merge Key: t2.i + -> Sort + Sort Key: t2.i + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Result + Filter: (t2.i = cte.i) + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Subquery Scan on cte + Filter: (cte.i > 0) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(18 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 5 +(1 row) + +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +--end_ignore +create table t_repl (i int) distributed replicated; +insert into t_repl values (1), (2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice3; segments: 1) + -> Sort + Sort Key: t_repl.i + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 1) + -> Result + Filter: (t_repl.i = cte.i) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Subquery Scan on cte + Filter: (cte.i > 0) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(17 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 10 +(1 row) + +--start_ignore +drop table if exists t3; +NOTICE: table "t3" does not exist, skipping +--end_ignore +create table t3 (i int, j int) distributed randomly; +insert into t3 values (1, 1), (2, 2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice4; segments: 3) + Merge Key: t2.i + -> Sort + Sort Key: t2.i + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice4; segments: 3) + -> Hash Join + Hash Cond: (t1.i = t3.i) + -> Result + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + -> Hash + -> Result + Filter: (t3.j = t2.i) + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on t3 + Optimizer: Postgres query optimizer +(23 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 15 +(1 row) + +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice4; segments: 1) + -> Sort + Sort Key: t_repl.i + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice4; segments: 3) + -> Hash Join + Hash Cond: (t1.i = t3.i) + -> Result + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + -> Hash + -> Result + Filter: (t3.j = t_repl.i) + -> Materialize + -> Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on t3 + Optimizer: Postgres query optimizer +(22 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 20 +(1 row) + +drop table t3; +drop table t_repl; +drop table t2; +drop table t1; diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index dbc6056272aa..f48c204b78a0 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -3573,3 +3573,235 @@ select * from t1; drop table t2; drop table t1; +-- Test correlated SubPlans containing writable operation are +-- planned and executed correctly. The result of modifying operations +-- should be broadcasted (or focused) and materialized. +-- start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists t2; +NOTICE: table "t2" does not exist, skipping +--end_ignore +create table t1(i int) distributed by (i); +create table t2(i int) distributed by (i); +insert into t2 values (1), (2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice3; segments: 3) + Merge Key: t2.i + -> Sort + Sort Key: t2.i + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 3) + -> Result + Filter: (t2.i = cte.i) + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Subquery Scan on cte + Filter: (cte.i > 0) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(18 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 5 +(1 row) + +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +--end_ignore +create table t_repl (i int) distributed replicated; +insert into t_repl values (1), (2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice3; segments: 1) + -> Sort + Sort Key: t_repl.i + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice3; segments: 1) + -> Result + Filter: (t_repl.i = cte.i) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Subquery Scan on cte + Filter: (cte.i > 0) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(17 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 10 +(1 row) + +--start_ignore +drop table if exists t3; +NOTICE: table "t3" does not exist, skipping +--end_ignore +create table t3 (i int, j int) distributed randomly; +insert into t3 values (1, 1), (2, 2); +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice4; segments: 3) + Merge Key: t2.i + -> Sort + Sort Key: t2.i + -> Seq Scan on t2 + Filter: (SubPlan 1) + SubPlan 1 (slice4; segments: 3) + -> Hash Join + Hash Cond: (t1.i = t3.i) + -> Result + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + -> Hash + -> Result + Filter: (t3.j = t2.i) + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on t3 + Optimizer: Postgres query optimizer +(23 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 15 +(1 row) + +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) +order by i; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice4; segments: 1) + -> Sort + Sort Key: t_repl.i + -> Seq Scan on t_repl + Filter: (SubPlan 1) + SubPlan 1 (slice4; segments: 3) + -> Hash Join + Hash Cond: (t1.i = t3.i) + -> Result + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Insert on t1 + -> Redistribute Motion 1:3 (slice1; segments: 1) + Hash Key: i.i + -> Function Scan on generate_series i + -> Hash + -> Result + Filter: (t3.j = t_repl.i) + -> Materialize + -> Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on t3 + Optimizer: Postgres query optimizer +(22 rows) + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) +order by i; + i +--- + 1 + 2 +(2 rows) + +select count(*) from t1; + count +------- + 20 +(1 row) + +drop table t3; +drop table t_repl; +drop table t2; +drop table t1; diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql index a47c6915e49d..d41f299a2592 100644 --- a/src/test/regress/sql/subselect_gp.sql +++ b/src/test/regress/sql/subselect_gp.sql @@ -1340,3 +1340,107 @@ select * from t1; drop table t2; drop table t1; + +-- Test correlated SubPlans containing writable operation are +-- planned and executed correctly. The result of modifying operations +-- should be broadcasted (or focused) and materialized. +-- start_ignore +drop table if exists t1; +drop table if exists t2; +--end_ignore +create table t1(i int) distributed by (i); +create table t2(i int) distributed by (i); +insert into t2 values (1), (2); + +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) +order by i; + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte where t2.i = cte.i and cte.i > 0) +order by i; + +select count(*) from t1; + +--start_ignore +drop table if exists t_repl; +--end_ignore +create table t_repl (i int) distributed replicated; +insert into t_repl values (1), (2); + +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) +order by i; + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte where t_repl.i = cte.i and cte.i > 0) +order by i; + +select count(*) from t1; + +--start_ignore +drop table if exists t3; +--end_ignore +create table t3 (i int, j int) distributed randomly; +insert into t3 values (1, 1), (2, 2); + +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) +order by i; + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t2 +where t2.i in (select i from cte join t3 using (i) where t3.j = t2.i) +order by i; + +select count(*) from t1; + +explain (costs off) +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) +order by i; + +with cte as +(insert into t1 + select i from generate_series(1, 5) i + returning *) +select * from t_repl +where t_repl.i in (select i from cte join t3 using (i) where t3.j = t_repl.i) +order by i; + +select count(*) from t1; + +drop table t3; +drop table t_repl; +drop table t2; +drop table t1; From 3c95046a62f754f3c3f8442d43f536499ee7850d Mon Sep 17 00:00:00 2001 From: Georgy Shelkovy Date: Wed, 28 Sep 2022 18:35:30 +0600 Subject: [PATCH 098/106] Fix plans for queries to replicated table with volatile function (#383) There are several cases in which planner produces bogus plan for queries to replicated tables with volatile functions, that may lead to wrong results or even segfault. 1. Volatile function in subplan Query with subplan containing volatile functions on distributed replicated tables may not make gather motion. Currently, gpdb replaces locuses for subplans subtrees to SingleQE in case of they have SegmentGeneral (i.e. replicated table scan, data is only on segments) locus and contains volatile functions. But it's incorrect because SingleQE locus assume that data is available on any segment instance including coordinator. As a result, there is no reason to add gather motion above such subtree and the resulting plan will be invalid. Solution is to make explicit gather motion in such case. 2. Volatile function in modify subplan targret list Query on distributed replicated tables may not make broadcast motion. Usually, insert query uses subquery. Volatile functions in such subqueries are caught in set_subqueryscan_pathlist which adds gather motion for that. But some subqueries are simplified on early planning stages and subquery subtree is substituted into main plan (see is_simple_subquery). In such case we should explicit catch volatile functions before adding ModifyTable node. To ensure rows are explicitly forwarded to all segments we should replace subplan locus (with volatile functions) with SingleQE before request motion append. It is necessary because planner considers pointless to send rows from replicated tables. Solution is set CdbLocusType_SingleQE in such cases, that later make broadcast motion. 3. Volatile function in deleting motion flow Query containing volatile functions on distributed replicated tables may not make broadcast motion. This produces wrong plan. This happens, because apply_motion_mutator delete pre-existing broadcast motion to recreate it later. But we should save motion request to create appropriate motion above the child node. Original flow for the child node will be restored after motion creation. Solution is to save such flow, that later make broadcast motion. 4. Volatile function in correlated subplan quals Query on distributed replicated tables may not make broadcast motion. This produces wrong plan. This happens, because broadcast motion does not made, when volatile function exists. Planner considers pointless to send rows from replicated tables. But if volatile function exist in quals we need broadcast motion. Solution is set CdbLocusType_SingleQE in such cases, that later make broadcast motion. Cherry-picked from: 7ef4218221babf046b63f748d3467b54ba5c9ca0 to append plan changes for 516bd3a9d4ff1c027eb7ac53dadbc24768d04165 Cherry-picked from: cc35273 --- src/backend/cdb/cdbllize.c | 8 + src/backend/cdb/cdbmutate.c | 8 +- src/backend/optimizer/plan/createplan.c | 13 +- src/backend/optimizer/plan/subselect.c | 12 +- src/test/regress/expected/rpt.out | 217 +++++++++++++++++++- src/test/regress/expected/rpt_optimizer.out | 217 +++++++++++++++++++- src/test/regress/sql/rpt.sql | 36 ++++ 7 files changed, 501 insertions(+), 10 deletions(-) diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index 9212152fe21e..73be0f430eb2 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -582,6 +582,14 @@ ParallelizeCorrelatedSubPlanMutator(Node *node, ParallelizeCorrelatedPlanWalkerC if (ctx->movement == MOVEMENT_BROADCAST) { Assert (NULL != ctx->currentPlanFlow); + + if (scanPlan->flow->locustype == CdbLocusType_SegmentGeneral && + contain_volatile_functions((Node *) scanPlan->qual)) + { + scanPlan->flow->locustype = CdbLocusType_SingleQE; + scanPlan->flow->flotype = FLOW_SINGLETON; + } + broadcastPlan(scanPlan, false /* stable */ , false /* rescannable */, ctx->currentPlanFlow->numsegments /* numsegments */); } diff --git a/src/backend/cdb/cdbmutate.c b/src/backend/cdb/cdbmutate.c index bb679da3517b..fedee52c7f00 100644 --- a/src/backend/cdb/cdbmutate.c +++ b/src/backend/cdb/cdbmutate.c @@ -849,7 +849,13 @@ apply_motion_mutator(Node *node, ApplyMotionState *context) if (IsA(newnode, Motion) &&flow->req_move != MOVEMENT_NONE) { plan = ((Motion *) newnode)->plan.lefttree; - flow = plan->flow; + + /* We'll recreate this motion later below. But we should save motion + * request to create appropriate motion above the child node. + * Original flow for the child node will be restored + * after motion creation. */ + flow->flow_before_req_move = plan->flow; + plan->flow = flow; newnode = (Node *) plan; } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 923046462c1e..fe03c8df7ce9 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -6588,12 +6588,15 @@ adjust_modifytable_flow(PlannerInfo *root, ModifyTable *node, List *is_split_upd * Obviously, tmp_tab in new segments can't get data if we don't * add a broadcast here. */ - if (optimizer_replicated_table_insert && - subplan->flow->flotype == FLOW_SINGLETON && - subplan->flow->locustype == CdbLocusType_SegmentGeneral && - !contain_volatile_functions((Node *)subplan->targetlist)) + if (subplan->flow->flotype == FLOW_SINGLETON && + subplan->flow->locustype == CdbLocusType_SegmentGeneral) { - if (subplan->flow->numsegments >= targetPolicy->numsegments) + if (contain_volatile_functions((Node *)subplan->targetlist)) + { + subplan->flow->locustype = CdbLocusType_SingleQE; + } + else if (optimizer_replicated_table_insert && + subplan->flow->numsegments >= targetPolicy->numsegments) { /* * A query to reach here: diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 15a585695a3b..ceb711d5e93e 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -41,9 +41,11 @@ #include "utils/syscache.h" #include "cdb/cdbmutate.h" +#include "cdb/cdbsetop.h" #include "cdb/cdbsubselect.h" #include "cdb/cdbvars.h" + typedef struct convert_testexpr_context { PlannerInfo *root; @@ -667,8 +669,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType, &subroot, config); - if ((plan->flow->locustype == CdbLocusType_SegmentGeneral || - plan->flow->locustype == CdbLocusType_General) && + if (plan->flow->locustype == CdbLocusType_General && (contain_volatile_functions((Node *) plan->targetlist) || contain_volatile_functions(subquery->havingQual))) { @@ -676,6 +677,13 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType, plan->flow->flotype = FLOW_SINGLETON; } + if (plan->flow->locustype == CdbLocusType_SegmentGeneral && + (contain_volatile_functions((Node *) plan->targetlist) || + contain_volatile_functions(subquery->havingQual))) + { + plan = (Plan *) make_motion_gather(subroot, plan, NIL, CdbLocusType_SingleQE); + } + /* Isolate the params needed by this specific subplan */ plan_params = root->plan_params; root->plan_params = NIL; diff --git a/src/test/regress/expected/rpt.out b/src/test/regress/expected/rpt.out index a38b804a1fc2..0a90105f08c0 100644 --- a/src/test/regress/expected/rpt.out +++ b/src/test/regress/expected/rpt.out @@ -875,7 +875,7 @@ explain (costs off, verbose) select * from t_hashdist left join t_replicate_vola Output: t_replicate_volatile.a, t_replicate_volatile.b, t_replicate_volatile.c SubPlan 1 (slice2; segments: 3) -> Materialize - Output: random() + Output: (random()) -> Broadcast Motion 1:3 (slice1; segments: 1) Output: (random()) -> Seq Scan on rpt.t_replicate_volatile t_replicate_volatile_1 @@ -985,6 +985,51 @@ explain (costs off) select a from t_replicate_volatile union all select * from n Optimizer: Postgres query optimizer (6 rows) +-- insert into table with serial column +create table t_replicate_dst(id serial, i integer) distributed replicated; +create table t_replicate_src(i integer) distributed replicated; +insert into t_replicate_src select i from generate_series(1, 5) i; +explain (costs off, verbose) insert into t_replicate_dst (i) select i from t_replicate_src; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Insert on rpt.t_replicate_dst + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: ((nextval('t_replicate_dst_id_seq'::regclass))::integer), t_replicate_src.i + -> Seq Scan on rpt.t_replicate_src + Output: nextval('t_replicate_dst_id_seq'::regclass), t_replicate_src.i + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off, optimizer=off +(7 rows) + +explain (costs off, verbose) with s as (select i from t_replicate_src group by i having random() > 0) insert into t_replicate_dst (i) select i from s; + QUERY PLAN +---------------------------------------------------------------------------------------- + Insert on rpt.t_replicate_dst + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: ((nextval('t_replicate_dst_id_seq'::regclass))::integer), "*SELECT*".i + -> Subquery Scan on "*SELECT*" + Output: nextval('t_replicate_dst_id_seq'::regclass), "*SELECT*".i + -> HashAggregate + Output: t_replicate_src.i + Group Key: t_replicate_src.i + Filter: (random() > '0'::double precision) + -> Seq Scan on rpt.t_replicate_src + Output: t_replicate_src.i + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off, optimizer=off +(13 rows) + +insert into t_replicate_dst (i) select i from t_replicate_src; +select distinct id from gp_dist_random('t_replicate_dst') order by id; + id +---- + 1 + 2 + 3 + 4 + 5 +(5 rows) + -- update & delete explain (costs off) update t_replicate_volatile set a = 1 where b > random(); ERROR: could not devise a plan (cdbpath.c:2074) @@ -1363,6 +1408,174 @@ select j, (select 5) AS "Uncorrelated Field" from t; 2 | 5 (1 row) +-- +-- Check sub-selects with distributed replicated tables and volatile functions +-- +drop table if exists t; +create table t (i int) distributed replicated; +create table t1 (a int) distributed by (a); +create table t2 (a int, b float) distributed replicated; +create or replace function f(i int) returns int language sql security definer as $$ select i; $$; +-- ensure we make gather motion when volatile functions in subplan +explain (costs off, verbose) select (select f(i) from t); + QUERY PLAN +----------------------------------------------------- + Result + Output: $0 + InitPlan 1 (returns $0) (slice2) + -> Gather Motion 1:1 (slice1; segments: 1) + Output: (f(i)) + -> Seq Scan on rpt.t + Output: f(i) + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(9 rows) + +explain (costs off, verbose) select (select f(i) from t group by f(i)); + QUERY PLAN +----------------------------------------------------- + Result + Output: $0 + InitPlan 1 (returns $0) (slice2) + -> Gather Motion 1:1 (slice1; segments: 1) + Output: (f(i)) + -> HashAggregate + Output: (f(i)) + Group Key: f(t.i) + -> Seq Scan on rpt.t + Output: i, f(i) + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(12 rows) + +explain (costs off, verbose) select (select i from t group by i having f(i) > 0); + QUERY PLAN +----------------------------------------------------- + Result + Output: $0 + InitPlan 1 (returns $0) (slice2) + -> Gather Motion 1:1 (slice1; segments: 1) + Output: i + -> HashAggregate + Output: i + Group Key: t.i + Filter: (f(t.i) > 0) + -> Seq Scan on rpt.t + Output: i + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(13 rows) + +-- ensure we do not make broadcast motion +explain (costs off, verbose) select * from t1 where a in (select random() from t where i=a group by i); + QUERY PLAN +-------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: t1.a + -> Seq Scan on rpt.t1 + Output: t1.a + Filter: (SubPlan 1) + SubPlan 1 (slice1; segments: 1) + -> Result + Output: random(), t.i + Filter: (t.i = t1.a) + -> Materialize + Output: t.i, t.i + -> Seq Scan on rpt.t + Output: t.i, t.i + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(15 rows) + +explain (costs off, verbose) select * from t1 where a in (select random() from t where i=a); + QUERY PLAN +----------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: t1.a + -> Seq Scan on rpt.t1 + Output: t1.a + Filter: (SubPlan 1) + SubPlan 1 (slice1; segments: 1) + -> Result + Output: random() + Filter: (t.i = t1.a) + -> Materialize + Output: t.i + -> Seq Scan on rpt.t + Output: t.i + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(15 rows) + +-- ensure we make broadcast motion when volatile function in deleting motion flow +explain (costs off, verbose) insert into t2 (a, b) select i, random() from t; + QUERY PLAN +----------------------------------------------------- + Insert on rpt.t2 + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: t.i, (random()) + -> Seq Scan on rpt.t + Output: t.i, random() + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(7 rows) + +-- ensure we make broadcast motion when volatile function in correlated subplan qual +explain (costs off, verbose) select * from t1 where a in (select f(i) from t where i=a and f(i) > 0); + QUERY PLAN +----------------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) + Output: t1.a + -> Seq Scan on rpt.t1 + Output: t1.a + Filter: (SubPlan 1) + SubPlan 1 (slice2; segments: 3) + -> Result + Output: f(t.i) + -> Result + Output: t.i + Filter: (t.i = t1.a) + -> Materialize + Output: t.i, t.i + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: t.i, t.i + -> Seq Scan on rpt.t + Output: t.i, t.i + Filter: (f(t.i) > 0) + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(20 rows) + +-- ensure we do not break broadcast motion +explain (costs off, verbose) select * from t1 where 1 <= ALL (select i from t group by i having random() > 0); + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + Output: t1.a + -> Result + Output: t1.a + One-Time Filter: (SubPlan 1) + -> Seq Scan on rpt.t1 + Output: t1.a + SubPlan 1 (slice2; segments: 3) + -> Materialize + Output: t.i + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: t.i + -> HashAggregate + Output: t.i + Group Key: t.i + Filter: (random() > '0'::double precision) + -> Seq Scan on rpt.t + Output: t.i + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(20 rows) + +drop table if exists t; +drop table if exists t1; +drop table if exists t2; +drop function if exists f(i int); -- start_ignore drop schema rpt cascade; NOTICE: drop cascades to 13 other objects @@ -1376,6 +1589,8 @@ drop cascades to table minmaxtest drop cascades to table t_hashdist drop cascades to table t_replicate_volatile drop cascades to sequence seq_for_insert_replicated_table +drop cascades to table t_replicate_dst +drop cascades to table t_replicate_src drop cascades to table rtbl drop cascades to table t1_13532 drop cascades to table t2_13532 diff --git a/src/test/regress/expected/rpt_optimizer.out b/src/test/regress/expected/rpt_optimizer.out index bb1d56f3068d..41848105624b 100644 --- a/src/test/regress/expected/rpt_optimizer.out +++ b/src/test/regress/expected/rpt_optimizer.out @@ -866,7 +866,7 @@ explain (costs off, verbose) select * from t_hashdist left join t_replicate_vola Output: t_replicate_volatile.a, t_replicate_volatile.b, t_replicate_volatile.c SubPlan 1 (slice2; segments: 3) -> Materialize - Output: random() + Output: (random()) -> Broadcast Motion 1:3 (slice1; segments: 1) Output: (random()) -> Seq Scan on rpt.t_replicate_volatile t_replicate_volatile_1 @@ -976,6 +976,51 @@ explain (costs off) select a from t_replicate_volatile union all select * from n Optimizer: Postgres query optimizer (6 rows) +-- insert into table with serial column +create table t_replicate_dst(id serial, i integer) distributed replicated; +create table t_replicate_src(i integer) distributed replicated; +insert into t_replicate_src select i from generate_series(1, 5) i; +explain (costs off, verbose) insert into t_replicate_dst (i) select i from t_replicate_src; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Insert on rpt.t_replicate_dst + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: ((nextval('t_replicate_dst_id_seq'::regclass))::integer), t_replicate_src.i + -> Seq Scan on rpt.t_replicate_src + Output: nextval('t_replicate_dst_id_seq'::regclass), t_replicate_src.i + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off, optimizer=off +(7 rows) + +explain (costs off, verbose) with s as (select i from t_replicate_src group by i having random() > 0) insert into t_replicate_dst (i) select i from s; + QUERY PLAN +---------------------------------------------------------------------------------------- + Insert on rpt.t_replicate_dst + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: ((nextval('t_replicate_dst_id_seq'::regclass))::integer), "*SELECT*".i + -> Subquery Scan on "*SELECT*" + Output: nextval('t_replicate_dst_id_seq'::regclass), "*SELECT*".i + -> HashAggregate + Output: t_replicate_src.i + Group Key: t_replicate_src.i + Filter: (random() > '0'::double precision) + -> Seq Scan on rpt.t_replicate_src + Output: t_replicate_src.i + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off, optimizer=off +(13 rows) + +insert into t_replicate_dst (i) select i from t_replicate_src; +select distinct id from gp_dist_random('t_replicate_dst') order by id; + id +---- + 1 + 2 + 3 + 4 + 5 +(5 rows) + -- update & delete explain (costs off) update t_replicate_volatile set a = 1 where b > random(); ERROR: could not devise a plan (cdbpath.c:2089) @@ -1372,6 +1417,174 @@ select j, (select 5) AS "Uncorrelated Field" from t; 2 | 5 (1 row) +-- +-- Check sub-selects with distributed replicated tables and volatile functions +-- +drop table if exists t; +create table t (i int) distributed replicated; +create table t1 (a int) distributed by (a); +create table t2 (a int, b float) distributed replicated; +create or replace function f(i int) returns int language sql security definer as $$ select i; $$; +-- ensure we make gather motion when volatile functions in subplan +explain (costs off, verbose) select (select f(i) from t); + QUERY PLAN +----------------------------------------------------- + Result + Output: $0 + InitPlan 1 (returns $0) (slice2) + -> Gather Motion 1:1 (slice1; segments: 1) + Output: (f(i)) + -> Seq Scan on rpt.t + Output: f(i) + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(9 rows) + +explain (costs off, verbose) select (select f(i) from t group by f(i)); + QUERY PLAN +----------------------------------------------------- + Result + Output: $0 + InitPlan 1 (returns $0) (slice2) + -> Gather Motion 1:1 (slice1; segments: 1) + Output: (f(i)) + -> HashAggregate + Output: (f(i)) + Group Key: f(t.i) + -> Seq Scan on rpt.t + Output: i, f(i) + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(12 rows) + +explain (costs off, verbose) select (select i from t group by i having f(i) > 0); + QUERY PLAN +----------------------------------------------------- + Result + Output: $0 + InitPlan 1 (returns $0) (slice2) + -> Gather Motion 1:1 (slice1; segments: 1) + Output: i + -> HashAggregate + Output: i + Group Key: t.i + Filter: (f(t.i) > 0) + -> Seq Scan on rpt.t + Output: i + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(13 rows) + +-- ensure we do not make broadcast motion +explain (costs off, verbose) select * from t1 where a in (select random() from t where i=a group by i); + QUERY PLAN +-------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: t1.a + -> Seq Scan on rpt.t1 + Output: t1.a + Filter: (SubPlan 1) + SubPlan 1 (slice1; segments: 1) + -> Result + Output: random(), t.i + Filter: (t.i = t1.a) + -> Materialize + Output: t.i, t.i + -> Seq Scan on rpt.t + Output: t.i, t.i + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(15 rows) + +explain (costs off, verbose) select * from t1 where a in (select random() from t where i=a); + QUERY PLAN +----------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: t1.a + -> Seq Scan on rpt.t1 + Output: t1.a + Filter: (SubPlan 1) + SubPlan 1 (slice1; segments: 1) + -> Result + Output: random() + Filter: (t.i = t1.a) + -> Materialize + Output: t.i + -> Seq Scan on rpt.t + Output: t.i + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(15 rows) + +-- ensure we make broadcast motion when volatile function in deleting motion flow +explain (costs off, verbose) insert into t2 (a, b) select i, random() from t; + QUERY PLAN +----------------------------------------------------- + Insert on rpt.t2 + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: t.i, (random()) + -> Seq Scan on rpt.t + Output: t.i, random() + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(7 rows) + +-- ensure we make broadcast motion when volatile function in correlated subplan qual +explain (costs off, verbose) select * from t1 where a in (select f(i) from t where i=a and f(i) > 0); + QUERY PLAN +----------------------------------------------------------------------------- + Gather Motion 3:1 (slice2; segments: 3) + Output: t1.a + -> Seq Scan on rpt.t1 + Output: t1.a + Filter: (SubPlan 1) + SubPlan 1 (slice2; segments: 3) + -> Result + Output: f(t.i) + -> Result + Output: t.i + Filter: (t.i = t1.a) + -> Materialize + Output: t.i, t.i + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: t.i, t.i + -> Seq Scan on rpt.t + Output: t.i, t.i + Filter: (f(t.i) > 0) + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(20 rows) + +-- ensure we do not break broadcast motion +explain (costs off, verbose) select * from t1 where 1 <= ALL (select i from t group by i having random() > 0); + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 3:1 (slice2; segments: 3) + Output: t1.a + -> Result + Output: t1.a + One-Time Filter: (SubPlan 1) + -> Seq Scan on rpt.t1 + Output: t1.a + SubPlan 1 (slice2; segments: 3) + -> Materialize + Output: t.i + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: t.i + -> HashAggregate + Output: t.i + Group Key: t.i + Filter: (random() > '0'::double precision) + -> Seq Scan on rpt.t + Output: t.i + Optimizer: Postgres query optimizer + Settings: enable_bitmapscan=off, enable_seqscan=off +(20 rows) + +drop table if exists t; +drop table if exists t1; +drop table if exists t2; +drop function if exists f(i int); -- start_ignore drop schema rpt cascade; NOTICE: drop cascades to 13 other objects @@ -1385,6 +1598,8 @@ drop cascades to table minmaxtest drop cascades to table t_hashdist drop cascades to table t_replicate_volatile drop cascades to sequence seq_for_insert_replicated_table +drop cascades to table t_replicate_dst +drop cascades to table t_replicate_src drop cascades to table rtbl drop cascades to table t1_13532 drop cascades to table t2_13532 diff --git a/src/test/regress/sql/rpt.sql b/src/test/regress/sql/rpt.sql index d20bf1795e39..c198352f8068 100644 --- a/src/test/regress/sql/rpt.sql +++ b/src/test/regress/sql/rpt.sql @@ -424,6 +424,16 @@ explain (costs off) insert into t_replicate_volatile select random(), a, a from create sequence seq_for_insert_replicated_table; explain (costs off) insert into t_replicate_volatile select nextval('seq_for_insert_replicated_table'); explain (costs off) select a from t_replicate_volatile union all select * from nextval('seq_for_insert_replicated_table'); + +-- insert into table with serial column +create table t_replicate_dst(id serial, i integer) distributed replicated; +create table t_replicate_src(i integer) distributed replicated; +insert into t_replicate_src select i from generate_series(1, 5) i; +explain (costs off, verbose) insert into t_replicate_dst (i) select i from t_replicate_src; +explain (costs off, verbose) with s as (select i from t_replicate_src group by i having random() > 0) insert into t_replicate_dst (i) select i from s; +insert into t_replicate_dst (i) select i from t_replicate_src; +select distinct id from gp_dist_random('t_replicate_dst') order by id; + -- update & delete explain (costs off) update t_replicate_volatile set a = 1 where b > random(); explain (costs off) update t_replicate_volatile set a = 1 from t_replicate_volatile x where x.a + random() = t_replicate_volatile.b; @@ -549,6 +559,32 @@ select j, (select j) AS "Correlated Field" from t; explain (costs off) select j, (select 5) AS "Uncorrelated Field" from t; select j, (select 5) AS "Uncorrelated Field" from t; +-- +-- Check sub-selects with distributed replicated tables and volatile functions +-- +drop table if exists t; +create table t (i int) distributed replicated; +create table t1 (a int) distributed by (a); +create table t2 (a int, b float) distributed replicated; +create or replace function f(i int) returns int language sql security definer as $$ select i; $$; +-- ensure we make gather motion when volatile functions in subplan +explain (costs off, verbose) select (select f(i) from t); +explain (costs off, verbose) select (select f(i) from t group by f(i)); +explain (costs off, verbose) select (select i from t group by i having f(i) > 0); +-- ensure we do not make broadcast motion +explain (costs off, verbose) select * from t1 where a in (select random() from t where i=a group by i); +explain (costs off, verbose) select * from t1 where a in (select random() from t where i=a); +-- ensure we make broadcast motion when volatile function in deleting motion flow +explain (costs off, verbose) insert into t2 (a, b) select i, random() from t; +-- ensure we make broadcast motion when volatile function in correlated subplan qual +explain (costs off, verbose) select * from t1 where a in (select f(i) from t where i=a and f(i) > 0); +-- ensure we do not break broadcast motion +explain (costs off, verbose) select * from t1 where 1 <= ALL (select i from t group by i having random() > 0); +drop table if exists t; +drop table if exists t1; +drop table if exists t2; +drop function if exists f(i int); + -- start_ignore drop schema rpt cascade; -- end_ignore From fc2aade30fc115b08e50c8402c6d6e982a53bb7d Mon Sep 17 00:00:00 2001 From: Dennis Kovalenko Date: Wed, 8 Mar 2023 23:21:07 +0400 Subject: [PATCH 099/106] Add more locales to docker image and add tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Geenplum python scrips try to parse system command’s STDOUT and STDERR in English and may fail if locale is different from en_US. This patch adds tests that cover this case Also, this patch reinstalls glibc-common in docker container. This is necessary to get langpacks in docker because docker images don't contain them. Cherry-picked-from: 6298e77 --- arenadata/Dockerfile | 6 ++++++ gpMgmt/test/behave/mgmt_utils/gpstop.feature | 6 ++++++ .../behave/mgmt_utils/steps/mgmt_utils.py | 20 ++++++++++++++++++- 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/arenadata/Dockerfile b/arenadata/Dockerfile index 2b885287efcc..ee0fb40d6a8c 100644 --- a/arenadata/Dockerfile +++ b/arenadata/Dockerfile @@ -3,6 +3,12 @@ FROM centos:centos7 as base ARG sigar=https://downloads.adsw.io/ADB/6.22.0_arenadata38/centos/7/community/x86_64/sigar-1.6.5-1056.git2932df5.el7.x86_64.rpm ARG sigar_headers=http://downloads.adsw.io/ADB/6.22.0_arenadata38/centos/7/community/x86_64/sigar-headers-1.6.5-1056.git2932df5.el7.x86_64.rpm +# Reinstall glibc-common. This is necessary to get langpacks in docker +# because docker images don't contain them. +RUN sed -i 's/\(override_install_langs*\)/# \1/' /etc/yum.conf && \ + yum -y reinstall glibc-common && \ + yum clean all + # Install some basic utilities and build tools RUN yum makecache && yum update -y ca-certificates && \ rpm --import https://mirror.yandex.ru/centos/RPM-GPG-KEY-CentOS-7 && \ diff --git a/gpMgmt/test/behave/mgmt_utils/gpstop.feature b/gpMgmt/test/behave/mgmt_utils/gpstop.feature index 626eb672eb18..3336fd5be3ed 100644 --- a/gpMgmt/test/behave/mgmt_utils/gpstop.feature +++ b/gpMgmt/test/behave/mgmt_utils/gpstop.feature @@ -209,3 +209,9 @@ Feature: gpstop behave tests And the user runs gpstop -a and selects f And gpstop should return a return code of 0 + @demo_cluster + Scenario: gpstop gpstop should not print "Failed to kill processes for segment" when locale is different from English + Given the database is running + And "LC_ALL" is different from English + When the user runs "gpstop -a" + Then gpstop should not print "Failed to kill processes for segment" diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index bc6fa3a1c204..1259f59c82c9 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -43,7 +43,7 @@ from gppylib.commands.base import Command, REMOTE from gppylib import pgconf - +default_locale = None master_data_dir = os.environ.get('MASTER_DATA_DIRECTORY') if master_data_dir is None: raise Exception('Please set MASTER_DATA_DIRECTORY in environment') @@ -4378,3 +4378,21 @@ def verify_elements_in_file(filename, elements): return True +@given('"LC_ALL" is different from English') +def step_impl(context): + default_locale = os.environ.get('LC_ALL') + + try: + os.system('sudo localedef -i ru_RU -f UTF-8 ru_RU.UTF-8 > /dev/null') + except FileNotFoundError: + raise Exception("Failed to generate Russian locale") + + os.environ['LC_ALL'] = 'ru_RU.utf8' + +@then('gpstop should not print "Failed to kill processes for segment"') +def impl(context): + check_string_not_present_stdout(context, 'Failed to kill processes for segment') + if default_locale is not None: + os.environ['LC_ALL'] = default_locale + else: + del os.environ['LC_ALL'] From fd5939d4f491238bbd28be88b98949cd0e2d6fe0 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 4 Dec 2023 22:13:36 +0300 Subject: [PATCH 100/106] Fix partition selection during DML execution when dropped columns are present (#634) Current partitioning design faced a problem, which is connected with the differences in physical attribute layouts of the child partition and the parent relation in case when the parent relation has dropped columns and the newly added child partition has not, and vice versa. When executing modifying DML operations on a leaf partition of a partitioned table, the executor anyway performs the partition selection in case of INSERT, UPDATE for legacy planner and in case of INSERT, UPDATE, DELETE for ORCA optimizer. This is done in selectPartition function, which is called from slot_get_partition function (in case of modifying DML commands). This procedure is done based on 1) attribute values of the tuple, which have been retrieved from the respective subplan of ModifyTable (DML) node and whose attribute structure corresponds the leaf partition's. 2) the root partition's attribute numbers and partition rules, which are based on the parent (root) relation's tuple descriptor. Thus, if the parent relation's tuple descriptor contained dropped columns and leaf partition's did not (or visa versa, the leaf partition had dropped columns and parent had not), the partition selection could go wrong and lead to various execution errors. Let's consider the following case. The partitioned table was created, and at one moment it was decided to drop several irrelevant columns from it. After that some new partitions were added or some were exchanged with new relations. In this situation these newly added (exchanged) partitions don't have dropped columns in it's structure, but the original parent relation has such dropped attributes. Then the INSERT command into the new leaf partition is executed. In this case for both planners the ExecInsert function is called, which then calls slot_get_partition function in order to validate that the tuple values correspond to specified leaf partition. And here the tuple descriptor does not have dropped attributes, but the partition selection is performed from parent's view, which have those attributes, and the parent's partitioning attribute numbers are used to validate the tuple. Because of that the partition selection procedure could either select wrong target partition (the wrong attribute was taken from the slot values to check the partition rule) or fail to find the partition at all, becasue none of the partition rules were satisfied. The same issue occured when we didn't drop anything from parent relation, however we exhanged a partition to the one with dropped columns. When inserting into exchanged leaf partition the partition selection could also go wrong by the same reasons (selection is performed from parent's view). Similar issue occured for both planners in other commands, when partition selection was performed. And in order to prevent the potentially wrong partition selection, when modifying the leaf partition, this patch proposes the following solution. The legacy planner have checkPartitionUpdate function, which is called in UPDATE cases and which checks that the tuple does not switch its partition at update. And to our astonishment this function is able to handle the case when the partition being modified has physically-different attribute numbers from root partition's due to dropped columns. In this case the function creates the ri_PartCheckMap map in target (child) ResultRelInfo, which stores correspondance between target partition attributes and parent's. And because of that map, the partition selection goes smoothly inside the function. The proposed solution is to build ri_PartCheckMap for all the cases when the leaf partition is modified and parent relation has dropped columns (or child has and parent doesn't), and when the partition selection is expected. The logic, which checks the matching of leaf's and parent's relation descriptors and builds the ri_PartCheckMap is moved to a separate function makePartitionCheckMap. This new function is called inside the ExecModifyTable and ExecDML functions under different conditions. For legacy planner this function is called for any command, except DELETE, no matter which relation is initially specified as target (can be either root or leaf). For example, when executing UPDATE (either split or common update) on root relation, the planner builds the plan via inheritance_planner, and, therefore, the each subplan of ModifyTable node will return the tuple slot with attributes structure of the specific partition (some subplans will contain dropped attributes in targetList, some won't). Here call of makePartitionCheckMap is also required. For INSERT command it will be always called as well, because makePartitionCheckMap validates whether oid of result relation is equal to parent't partition oid, and this check allows us to call the function on root relation too (if it's true, the map is not built, because it's safe to perform operation through the root relation). For ORCA optimizer the makePartitionCheckMap is called inside the ExecDML function only in the case if the leaf partition is being modified. Otherwise, if the command is called on root relation, the tuple will always have the parent's structure, which does not break the partition selection procedure. This patch also changes the slot_get_partition function, which performs the partition selection. This function is extended with the alternative variant of the attribute values extraction from the given slot. When the makePartitionCheckMap function have created the ri_PartCheckMap of the child ResultRelInfo, the attributes values array is built via that map (inside reconstructTupleValues), what allows to perform the partition selection from parent's view. Finally, this patch additionally solves one more issue, that is not directly related partition selection problem described above. Let's consider the case when the parent relation has two partitions, and one of them has incompatible attribute structure with parent's due to dropped columns. The issue occured when the split UPDATE on parent partition was executed, and its plan is built by legacy planner. That means that ModifyTable node contains several sublans and several result relations inside the estate->es_result_relations (inheritance planner creates them and the update is executed for each of them). And during execution of this plan, when update is executed on the partition, which has the same relation descriptor with parent's, the invalid behaviour could occur when preparing the tuple for insertion (see the last test case in tests section). In this case the partition selection at insert stage works correct, because the partition does not differ from parent relation. However, inside the get_part function, after the partition was selected correctly from selectPartition function, the targetid_get_partition function could wrongly form the final ResultRelInfo by creating unnecessary ri_partInsertMap map, which is used in reconstructMatchingTupleSlot function (inside the ExecInsert) to adjust the tuple to the selected partition descriptor if the tuple does not fit. targetid_get_partition function makes the desicion on building that map based on check, that compares two relation's descriptors: one, which is considered to be parent's (parentInfo = estate->es_result_relations), and one, which corresponds to selected child partition (childInfo). But in case when UPDATE is formed by inheritance planner, the parentInfo does not correspond the true parent relation. It corresponds to one of other partitions. And in the case, when selected partition is valid (it has the same relation descriptor with parent's), and parentInfo corresponds to invalid partition (it does not match with parent descriptor), the ri_partInsertMap could be built by mistake, what led to unnecessary tuple reformatting and invalid results. This patch solves this last issue by adding a check inside the targetid_get_partition function, which ensures that the estate->es_result_relations corresponds to the true root relation. That would be true for all the cases when the DML is executed on parent relation, except the UPDATE by legacy planner. And if the check is passed, the ri_partInsertMap is built, otherwise it's not. Moreover, during such UPDATE there is completely no need to reconstruct the tuple, because each ModifyTable's subplan will give already valid tuple. --- src/backend/executor/execMain.c | 78 ++++- src/backend/executor/nodeDML.c | 32 ++ src/backend/executor/nodeModifyTable.c | 177 ++++++---- src/include/executor/execDML.h | 3 + src/include/nodes/execnodes.h | 6 +- src/test/regress/expected/qp_dropped_cols.out | 299 ++++++++++++++++ .../expected/qp_dropped_cols_optimizer.out | 326 ++++++++++++++++++ src/test/regress/sql/qp_dropped_cols.sql | 137 ++++++++ 8 files changed, 969 insertions(+), 89 deletions(-) diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 3dcdbb6f9578..ea4b672e8352 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -4591,6 +4591,7 @@ targetid_get_partition(Oid targetid, EState *estate, bool openIndices) { int natts; Relation resultRelation; + Oid parentRelid = estate->es_result_partitions->part->parrelid; natts = parentInfo->ri_RelationDesc->rd_att->natts; /* in base relation */ @@ -4603,10 +4604,27 @@ targetid_get_partition(Oid targetid, EState *estate, bool openIndices) if (openIndices) ExecOpenIndices(childInfo); - map_part_attrs(parentInfo->ri_RelationDesc, - childInfo->ri_RelationDesc, - &(childInfo->ri_partInsertMap), - TRUE); /* throw on error, so result not needed */ + /* + * es_result_relations does not always represent the parent relation. + * E.g. planner's UPDATE command on parent partition leads to multiple + * subplans and result relations due to preceding inheritance planning. + * In this case es_result_relations points to one of the partitions, not + * to the parent. Thus, the descriptor mapping should be performed only + * for the case if es_result_relations really corresponds to the parent. + * Otherwise, there is a chance to reconstruct already valid tuple and + * get the wrong results (e.g. target partition relation descriptor is + * different from parentInfo's, but it's UPDATE (legacy planner) and + * parentInfo represents another partition, which is not the true + * parent). Moreover, if we are initially modify a leaf partition, + * i.e we called a DML command straight on child partition, or it's + * inheritance plan execution, the tuple descriptor already matches + * the partition's, and the extra mapping is unnecessary. + */ + if (RelationGetRelid(parentInfo->ri_RelationDesc) == parentRelid) + map_part_attrs(parentInfo->ri_RelationDesc, + childInfo->ri_RelationDesc, + &(childInfo->ri_partInsertMap), + TRUE); /* throw on error, so result not needed */ } return childInfo; } @@ -4631,22 +4649,60 @@ values_get_partition(Datum *values, bool *nulls, TupleDesc tupdesc, ResultRelInfo * slot_get_partition(TupleTableSlot *slot, EState *estate) { - ResultRelInfo *resultRelInfo; - AttrNumber max_attr; + ResultRelInfo *resultRelInfo = estate->es_result_relation_info; + TupleDesc tupdesc; Datum *values; bool *nulls; Assert(PointerIsValid(estate->es_result_partitions)); - max_attr = estate->es_partition_state->max_partition_attr; + /* + * If we previously found out that we need to map attribute numbers + * (in case if child part has physically-different attribute numbers from + * parent's), we must extract slot values according to that mapping. + */ + if (resultRelInfo->ri_PartCheckMap != NULL) + { + Datum *slot_values; + bool *slot_nulls; + Relation parentRel = resultRelInfo->ri_PartitionParent; + AttrMap *map; + + Assert(parentRel != NULL); + tupdesc = RelationGetDescr(parentRel); + + slot_getallattrs(slot); + slot_values = slot_get_values(slot); + slot_nulls = slot_get_isnull(slot); + values = palloc(tupdesc->natts * sizeof(Datum)); + nulls = palloc0(tupdesc->natts * sizeof(bool)); + + /* Now we have values/nulls in parent's view. */ + map = resultRelInfo->ri_PartCheckMap; + reconstructTupleValues(map, slot_values, slot_nulls, slot->tts_tupleDescriptor->natts, + values, nulls, tupdesc->natts); + } + else + { + AttrNumber max_attr = estate->es_partition_state->max_partition_attr; - slot_getsomeattrs(slot, max_attr); - values = slot_get_values(slot); - nulls = slot_get_isnull(slot); + slot_getsomeattrs(slot, max_attr); + /* values/nulls pointing to partslot's array. */ + values = slot_get_values(slot); + nulls = slot_get_isnull(slot); + tupdesc = slot->tts_tupleDescriptor; + } - resultRelInfo = get_part(estate, values, nulls, slot->tts_tupleDescriptor, + resultRelInfo = get_part(estate, values, nulls, tupdesc, true); + /* Free up if we allocated mapped attributes. */ + if (values != slot_get_values(slot)) + pfree(values); + + if (nulls != slot_get_isnull(slot)) + pfree(nulls); + return resultRelInfo; } diff --git a/src/backend/executor/nodeDML.c b/src/backend/executor/nodeDML.c index 33b2edf5387f..d5dc775273ad 100644 --- a/src/backend/executor/nodeDML.c +++ b/src/backend/executor/nodeDML.c @@ -85,6 +85,38 @@ ExecDML(DMLState *node) /* remove 'junk' columns from tuple */ node->cleanedUpSlot = ExecFilterJunk(node->junkfilter, projectedSlot); + /* + * If we are modifying a leaf partition we have to ensure that partition + * selection operation will consider leaf partition's attributes as + * coherent with root partition's attribute numbers, because partition + * selection is performed using root's attribute numbers (all partition + * rules are based on the parent relation's tuple descriptor). In case + * when child partition has different attribute numbers from root's due to + * dropped columns, the partition selection may go wrong without extra + * validation. + */ + if (node->ps.state->es_result_partitions) + { + ResultRelInfo *relInfo = node->ps.state->es_result_relations; + + /* + * The DML is done on a leaf partition. In order to reuse the map, + * it will be allocated at es_result_relations. + */ + if (RelationGetRelid(relInfo->ri_RelationDesc) != + node->ps.state->es_result_partitions->part->parrelid) + makePartitionCheckMap(node->ps.state, relInfo); + + /* + * DML node always performs partition selection, and if we want to + * reuse the map built in makePartitionCheckMap, we are allowed to + * reassign es_result_relation_info, because ExecInsert, ExecDelete + * changes it with target partition anyway. Moreover, without + * inheritance plan (ORCA never builds such plans) the + * es_result_relations will contain the only relation. + */ + node->ps.state->es_result_relation_info = relInfo; + } /* GPDB_91_MERGE_FIXME: * This kind of node is used by ORCA only. If in the future ORCA still uses * DML node, canSetTag should be saved in DML plan node and init-ed by diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 3cba0ee49db4..8d8572fdea84 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -1132,7 +1132,6 @@ checkPartitionUpdate(EState *estate, TupleTableSlot *partslot, Datum *values = NULL; bool *nulls = NULL; TupleDesc tupdesc = NULL; - Oid parentRelid; Oid targetid; Assert(estate->es_partition_state != NULL && @@ -1144,81 +1143,12 @@ checkPartitionUpdate(EState *estate, TupleTableSlot *partslot, Assert(PointerIsValid(estate->es_result_partitions)); /* - * As opposed to INSERT, resultRelation here is the same child part - * as scan origin. However, the partition selection is done with the - * parent partition's attribute numbers, so if this result (child) part - * has physically-different attribute numbers due to dropped columns, - * we should map the child attribute numbers to the parent's attribute - * numbers to perform the partition selection. - * EState doesn't have the parent relation information at the moment, - * so we have to do a hard job here by opening it and compare the - * tuple descriptors. If we find we need to map attribute numbers, - * max_partition_attr could also be bogus for this child part, - * so we end up materializing the whole columns using slot_getallattrs(). - * The purpose of this code is just to prevent the tuple from - * incorrectly staying in default partition that has no constraint - * (parts with constraint will throw an error if the tuple is changing - * partition keys to out of part value anyway.) It's a bit overkill - * to do this complicated logic just for this purpose, which is necessary - * with our current partitioning design, but I hope some day we can - * change this so that we disallow phyisically-different tuple descriptor - * across partition. - */ - parentRelid = estate->es_result_partitions->part->parrelid; - - /* - * I don't believe this is the case currently, but we check the parent relid - * in case the updating partition has changed since the last time we opened it. - */ - if (resultRelInfo->ri_PartitionParent && - parentRelid != RelationGetRelid(resultRelInfo->ri_PartitionParent)) - { - resultRelInfo->ri_PartCheckTupDescMatch = 0; - if (resultRelInfo->ri_PartCheckMap != NULL) - pfree(resultRelInfo->ri_PartCheckMap); - if (resultRelInfo->ri_PartitionParent) - relation_close(resultRelInfo->ri_PartitionParent, AccessShareLock); - } - - /* - * Check this at the first pass only to avoid repeated catalog access. + * If we find we need to map attribute numbers (in case if child part has + * physically-different attribute numbers from parent's, the mapping is + * performed inside the makePartitionCheckMap function) + * max_partition_attr could also be bogus for this child part, so we end + * up materializing the whole columns using slot_getallattrs(). */ - if (resultRelInfo->ri_PartCheckTupDescMatch == 0 && - parentRelid != RelationGetRelid(resultRelInfo->ri_RelationDesc)) - { - Relation parentRel; - TupleDesc resultTupdesc, parentTupdesc; - - /* - * We are on a child part, let's see the tuple descriptor looks like - * the parent's one. Probably this won't cause deadlock because - * DML should have opened the parent table with appropriate lock. - */ - parentRel = relation_open(parentRelid, AccessShareLock); - resultTupdesc = RelationGetDescr(resultRelationDesc); - parentTupdesc = RelationGetDescr(parentRel); - if (!equalTupleDescs(resultTupdesc, parentTupdesc, false)) - { - AttrMap *map; - MemoryContext oldcontext; - - /* Tuple looks different. Construct attribute mapping. */ - oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); - map_part_attrs(resultRelationDesc, parentRel, &map, true); - MemoryContextSwitchTo(oldcontext); - - /* And save it for later use. */ - resultRelInfo->ri_PartCheckMap = map; - - resultRelInfo->ri_PartCheckTupDescMatch = -1; - } - else - resultRelInfo->ri_PartCheckTupDescMatch = 1; - - resultRelInfo->ri_PartitionParent = parentRel; - /* parentRel will be closed as part of ResultRelInfo cleanup */ - } - if (resultRelInfo->ri_PartCheckMap != NULL) { Datum *parent_values; @@ -1927,6 +1857,19 @@ ExecModifyTable(ModifyTableState *node) slot = ExecFilterJunk(junkfilter, slot); } + /* + * We have to ensure that partition selection in INSERT or UPDATE will + * consider leaf partition's attributes as coherent with root + * partition's attribute numbers, because partition selection is + * performed using root's attribute numbers (all partition rules are + * based on the parent relation's tuple descriptor). In case when + * child partition has different attribute numbers from parent's + * due to dropped columns, the partition selection may go wrong without + * extra validation. + */ + if (operation != CMD_DELETE && estate->es_result_partitions) + makePartitionCheckMap(estate, estate->es_result_relation_info); + switch (operation) { case CMD_INSERT: @@ -2506,3 +2449,87 @@ ExecSquelchModifyTable(ModifyTableState *node) break; } } + +/* + * Build a attribute mapping between child partition and the root partition in + * case if child partition has physically-different attribute numbers from + * root's due to dropped columns. + */ +void +makePartitionCheckMap(EState *estate, ResultRelInfo *resultRelInfo) +{ + Relation resultRelationDesc = resultRelInfo->ri_RelationDesc; + Oid parentRelid; + + Assert(PointerIsValid(estate->es_result_partitions)); + + /* + * The partition selection operation is done with the parent partition's + * attribute numbers, so if child partition has physically-different + * attribute numbers due to dropped columns, we should map the child + * attribute numbers to the parent's attribute numbers to perform the + * partition selection. EState may not have the parent relation + * information at the moment, so we have to do a hard job here by opening + * it and compare the tuple descriptors. The purpose of this code is to + * prevent the tuple from being incorrectly interpreted during partition + * selection, that can be performed in ExecInsert, ExecDelete and + * checkPartitionUpdate functions when we work with the leaf partition as + * result relation. + */ + parentRelid = estate->es_result_partitions->part->parrelid; + + /* + * I don't believe this is the case currently, but we check the parent + * relid in case the updating partition has changed since the last time we + * opened it. + */ + if (resultRelInfo->ri_PartitionParent && + parentRelid != RelationGetRelid(resultRelInfo->ri_PartitionParent)) + { + resultRelInfo->ri_PartCheckTupDescMatch = 0; + if (resultRelInfo->ri_PartCheckMap != NULL) + pfree(resultRelInfo->ri_PartCheckMap); + if (resultRelInfo->ri_PartitionParent) + relation_close(resultRelInfo->ri_PartitionParent, AccessShareLock); + } + + /* + * Check this at the first pass only to avoid repeated catalog access. + */ + if (resultRelInfo->ri_PartCheckTupDescMatch == 0 && + parentRelid != RelationGetRelid(resultRelInfo->ri_RelationDesc)) + { + Relation parentRel; + TupleDesc resultTupdesc, + parentTupdesc; + + /* + * We are on a child part, let's see the tuple descriptor looks like + * the parent's one. Probably this won't cause deadlock because DML + * should have opened the parent table with appropriate lock. + */ + parentRel = relation_open(parentRelid, AccessShareLock); + resultTupdesc = RelationGetDescr(resultRelationDesc); + parentTupdesc = RelationGetDescr(parentRel); + if (!equalTupleDescs(resultTupdesc, parentTupdesc, false)) + { + AttrMap *map; + MemoryContext oldcontext; + + /* Tuple looks different. Construct attribute mapping. */ + oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); + map_part_attrs(resultRelationDesc, parentRel, &map, true); + MemoryContextSwitchTo(oldcontext); + + /* And save it for later use. */ + resultRelInfo->ri_PartCheckMap = map; + + resultRelInfo->ri_PartCheckTupDescMatch = -1; + } + else + resultRelInfo->ri_PartCheckTupDescMatch = 1; + + resultRelInfo->ri_PartitionParent = parentRel; + /* parentRel will be closed as part of ResultRelInfo cleanup */ + } +} diff --git a/src/include/executor/execDML.h b/src/include/executor/execDML.h index 2d0124897bab..ff66c9e4cc4a 100644 --- a/src/include/executor/execDML.h +++ b/src/include/executor/execDML.h @@ -24,6 +24,9 @@ reconstructTupleValues(AttrMap *map, extern TupleTableSlot * reconstructMatchingTupleSlot(TupleTableSlot *slot, ResultRelInfo *resultRelInfo); +extern void +makePartitionCheckMap(EState *estate, ResultRelInfo *resultRelInfo); + /* * In PostgreSQL, ExecInsert, ExecDelete and ExecUpdate are static in nodeModifyTable.c. * In GPDB, they're exported. diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 0c6692e9eac9..2ff11d3f587b 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -382,11 +382,11 @@ typedef struct ResultRelInfo uint64 ri_aoprocessed; /* tuples added/deleted for AO */ struct AttrMap *ri_partInsertMap; TupleTableSlot *ri_resultSlot; - /* Parent relation in checkPartitionUpdate */ + /* Parent relation in makePartitionCheckMap */ Relation ri_PartitionParent; - /* tupdesc_match for checkPartitionUpdate */ + /* tupdesc_match for makePartitionCheckMap */ int ri_PartCheckTupDescMatch; - /* Attribute map in checkPartitionUpdate */ + /* Attribute map in makePartitionCheckMap */ struct AttrMap *ri_PartCheckMap; /* diff --git a/src/test/regress/expected/qp_dropped_cols.out b/src/test/regress/expected/qp_dropped_cols.out index c0aa54e30f20..bdfc308ae2db 100644 --- a/src/test/regress/expected/qp_dropped_cols.out +++ b/src/test/regress/expected/qp_dropped_cols.out @@ -16360,3 +16360,302 @@ DELETE FROM dist_key_dropped_pt WHERE b=6; -- the tables, or the pg_upgrade test fails. set client_min_messages='warning'; drop schema qp_dropped_cols cascade; +-- Test modifying DML on leaf partition when parent has dropped columns and +-- the partition has not. Ensure that DML commands pass without execution +-- errors and produce valid results. +RESET search_path; +-- start_ignore +DROP TABLE IF EXISTS t_part_dropped; +-- end_ignore +CREATE TABLE t_part_dropped (c1 int, c2 int, c3 int, c4 int) DISTRIBUTED BY (c1) +PARTITION BY LIST (c3) (PARTITION p0 VALUES (0)); +ALTER TABLE t_part_dropped DROP c2; +ALTER TABLE t_part_dropped ADD PARTITION p2 VALUES (2); +-- Partition selection should go smoothly when inserting into leaf +-- partition with different attribute structure. +EXPLAIN (COSTS OFF, VERBOSE) INSERT INTO t_part_dropped VALUES (1, 2, 4); + QUERY PLAN +---------------------------------------- + Insert on public.t_part_dropped + -> Result + Output: 1, NULL::integer, 2, 4 + Optimizer: Postgres query optimizer + Settings: optimizer=off +(5 rows) + +INSERT INTO t_part_dropped VALUES (1, 2, 4); +EXPLAIN (COSTS OFF, VERBOSE) INSERT INTO t_part_dropped_1_prt_p2 VALUES (1, 2, 4); + QUERY PLAN +------------------------------------------ + Insert on public.t_part_dropped_1_prt_p2 + -> Result + Output: 1, 2, 4 + Optimizer: Postgres query optimizer + Settings: optimizer=off +(5 rows) + +INSERT INTO t_part_dropped_1_prt_p2 VALUES (1, 2, 4); +INSERT INTO t_part_dropped_1_prt_p2 VALUES (1, 2, 0); +-- Ensure that split update on leaf and root partitions does not +-- throw partition selection error in both planners. +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part_dropped_1_prt_p2 SET c1 = 2; + QUERY PLAN +--------------------------------------------------------------------- + Update on public.t_part_dropped_1_prt_p2 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: 2, c3, c4, c1, ctid, gp_segment_id, (DMLAction) + Hash Key: c1 + -> Split + Output: 2, c3, c4, c1, ctid, gp_segment_id, DMLAction + -> Seq Scan on public.t_part_dropped_1_prt_p2 + Output: 2, c3, c4, c1, ctid, gp_segment_id + Optimizer: Postgres query optimizer + Settings: optimizer=off +(10 rows) + +UPDATE t_part_dropped_1_prt_p2 SET c1 = 2; +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part_dropped SET c1 = 3; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Update on public.t_part_dropped_1_prt_p0 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: 3, NULL::integer, t_part_dropped_1_prt_p0.c3, t_part_dropped_1_prt_p0.c4, t_part_dropped_1_prt_p0.c1, t_part_dropped_1_prt_p0.ctid, t_part_dropped_1_prt_p0.gp_segment_id, (DMLAction) + Hash Key: "outer".c1 + -> Split + Output: 3, NULL::integer, t_part_dropped_1_prt_p0.c3, t_part_dropped_1_prt_p0.c4, t_part_dropped_1_prt_p0.c1, t_part_dropped_1_prt_p0.ctid, t_part_dropped_1_prt_p0.gp_segment_id, DMLAction + -> Seq Scan on public.t_part_dropped_1_prt_p0 + Output: 3, NULL::integer, t_part_dropped_1_prt_p0.c3, t_part_dropped_1_prt_p0.c4, t_part_dropped_1_prt_p0.c1, t_part_dropped_1_prt_p0.ctid, t_part_dropped_1_prt_p0.gp_segment_id + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: 3, t_part_dropped_1_prt_p2.c3, t_part_dropped_1_prt_p2.c4, t_part_dropped_1_prt_p2.c1, t_part_dropped_1_prt_p2.ctid, t_part_dropped_1_prt_p2.gp_segment_id, (DMLAction) + Hash Key: "outer".c1 + -> Split + Output: 3, t_part_dropped_1_prt_p2.c3, t_part_dropped_1_prt_p2.c4, t_part_dropped_1_prt_p2.c1, t_part_dropped_1_prt_p2.ctid, t_part_dropped_1_prt_p2.gp_segment_id, DMLAction + -> Seq Scan on public.t_part_dropped_1_prt_p2 + Output: 3, t_part_dropped_1_prt_p2.c3, t_part_dropped_1_prt_p2.c4, t_part_dropped_1_prt_p2.c1, t_part_dropped_1_prt_p2.ctid, t_part_dropped_1_prt_p2.gp_segment_id + Optimizer: Postgres query optimizer + Settings: optimizer=off +(17 rows) + +UPDATE t_part_dropped SET c1 = 3; +-- Ensure that split update on leaf partition does not throw constraint error +-- (executor does not choose the wrong partition at insert stage of update). +INSERT INTO t_part_dropped VALUES (1, 2, 0); +UPDATE t_part_dropped_1_prt_p2 SET c1 = 2 WHERE c4 = 0; +SELECT count(*) FROM t_part_dropped_1_prt_p2; + count +------- + 4 +(1 row) + +-- Split update on root relation should choose the correct partition +-- at insert (executor doesn't put the tuple to wrong partition for legacy +-- planner case). +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part_dropped SET c1 = 3 WHERE c4 = 0; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Update on public.t_part_dropped_1_prt_p0 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: 3, NULL::integer, t_part_dropped_1_prt_p0.c3, t_part_dropped_1_prt_p0.c4, t_part_dropped_1_prt_p0.c1, t_part_dropped_1_prt_p0.ctid, t_part_dropped_1_prt_p0.gp_segment_id, (DMLAction) + Hash Key: "outer".c1 + -> Split + Output: 3, NULL::integer, t_part_dropped_1_prt_p0.c3, t_part_dropped_1_prt_p0.c4, t_part_dropped_1_prt_p0.c1, t_part_dropped_1_prt_p0.ctid, t_part_dropped_1_prt_p0.gp_segment_id, DMLAction + -> Seq Scan on public.t_part_dropped_1_prt_p0 + Output: 3, NULL::integer, t_part_dropped_1_prt_p0.c3, t_part_dropped_1_prt_p0.c4, t_part_dropped_1_prt_p0.c1, t_part_dropped_1_prt_p0.ctid, t_part_dropped_1_prt_p0.gp_segment_id + Filter: (t_part_dropped_1_prt_p0.c4 = 0) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: 3, t_part_dropped_1_prt_p2.c3, t_part_dropped_1_prt_p2.c4, t_part_dropped_1_prt_p2.c1, t_part_dropped_1_prt_p2.ctid, t_part_dropped_1_prt_p2.gp_segment_id, (DMLAction) + Hash Key: "outer".c1 + -> Split + Output: 3, t_part_dropped_1_prt_p2.c3, t_part_dropped_1_prt_p2.c4, t_part_dropped_1_prt_p2.c1, t_part_dropped_1_prt_p2.ctid, t_part_dropped_1_prt_p2.gp_segment_id, DMLAction + -> Seq Scan on public.t_part_dropped_1_prt_p2 + Output: 3, t_part_dropped_1_prt_p2.c3, t_part_dropped_1_prt_p2.c4, t_part_dropped_1_prt_p2.c1, t_part_dropped_1_prt_p2.ctid, t_part_dropped_1_prt_p2.gp_segment_id + Filter: (t_part_dropped_1_prt_p2.c4 = 0) + Optimizer: Postgres query optimizer + Settings: optimizer=off +(19 rows) + +UPDATE t_part_dropped SET c1 = 3 WHERE c4 = 0; +SELECT count(*) FROM t_part_dropped_1_prt_p2; + count +------- + 4 +(1 row) + +SELECT * FROM t_part_dropped_1_prt_p0; + c1 | c3 | c4 +----+----+---- +(0 rows) + +-- For ORCA the partition selection error should not occur. +EXPLAIN (COSTS OFF, VERBOSE) DELETE FROM t_part_dropped_1_prt_p2; + QUERY PLAN +-------------------------------------------------- + Delete on public.t_part_dropped_1_prt_p2 + -> Seq Scan on public.t_part_dropped_1_prt_p2 + Output: ctid, gp_segment_id + Optimizer: Postgres query optimizer + Settings: optimizer=off +(5 rows) + +DELETE FROM t_part_dropped_1_prt_p2; +DROP TABLE t_part_dropped; +-- Test modifying DML on leaf partition after it was exchanged with a relation, +-- that contained dropped columns. Ensure that DML commands pass without +-- execution errors and produce valid results. +-- start_ignore +DROP TABLE IF EXISTS t_part; +DROP TABLE IF EXISTS t_new_part; +-- end_ignore +CREATE TABLE t_part (c1 int, c2 int, c3 int, c4 int) DISTRIBUTED BY (c1) +PARTITION BY LIST (c3) (PARTITION p0 VALUES (0)); +ALTER TABLE t_part ADD PARTITION p2 VALUES (2); +CREATE TABLE t_new_part (c1 int, c11 int, c2 int, c3 int, c4 int); +ALTER TABLE t_new_part DROP c11; +ALTER TABLE t_part EXCHANGE PARTITION FOR (2) WITH TABLE t_new_part; +EXPLAIN (COSTS OFF, VERBOSE) INSERT INTO t_part VALUES (1, 5, 2, 5); + QUERY PLAN +------------------------------------- + Insert on public.t_part + -> Result + Output: 1, 5, 2, 5 + Optimizer: Postgres query optimizer + Settings: optimizer=off +(5 rows) + +INSERT INTO t_part VALUES (1, 5, 2, 5); +EXPLAIN (COSTS OFF, VERBOSE) INSERT INTO t_part_1_prt_p2 VALUES (1, 5, 2, 5); + QUERY PLAN +------------------------------------------- + Insert on public.t_part_1_prt_p2 + -> Result + Output: 1, NULL::integer, 5, 2, 5 + Optimizer: Postgres query optimizer + Settings: optimizer=off +(5 rows) + +INSERT INTO t_part_1_prt_p2 VALUES (1, 5, 2, 5); +-- Ensure that split update on leaf and root partitions does not +-- throw partition selection error in both planners. +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part_1_prt_p2 SET c1 = 2; + QUERY PLAN +---------------------------------------------------------------------------------------- + Update on public.t_part_1_prt_p2 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: 2, NULL::integer, c2, c3, c4, c1, ctid, gp_segment_id, (DMLAction) + Hash Key: c1 + -> Split + Output: 2, NULL::integer, c2, c3, c4, c1, ctid, gp_segment_id, DMLAction + -> Seq Scan on public.t_part_1_prt_p2 + Output: 2, NULL::integer, c2, c3, c4, c1, ctid, gp_segment_id + Optimizer: Postgres query optimizer + Settings: optimizer=off +(10 rows) + +UPDATE t_part_1_prt_p2 SET c1 = 2; +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part SET c1 = 3; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Update on public.t_part_1_prt_p0 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: 3, t_part_1_prt_p0.c2, t_part_1_prt_p0.c3, t_part_1_prt_p0.c4, t_part_1_prt_p0.c1, t_part_1_prt_p0.ctid, t_part_1_prt_p0.gp_segment_id, (DMLAction) + Hash Key: "outer".c1 + -> Split + Output: 3, t_part_1_prt_p0.c2, t_part_1_prt_p0.c3, t_part_1_prt_p0.c4, t_part_1_prt_p0.c1, t_part_1_prt_p0.ctid, t_part_1_prt_p0.gp_segment_id, DMLAction + -> Seq Scan on public.t_part_1_prt_p0 + Output: 3, t_part_1_prt_p0.c2, t_part_1_prt_p0.c3, t_part_1_prt_p0.c4, t_part_1_prt_p0.c1, t_part_1_prt_p0.ctid, t_part_1_prt_p0.gp_segment_id + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: 3, NULL::integer, t_part_1_prt_p2.c2, t_part_1_prt_p2.c3, t_part_1_prt_p2.c4, t_part_1_prt_p2.c1, t_part_1_prt_p2.ctid, t_part_1_prt_p2.gp_segment_id, (DMLAction) + Hash Key: "outer".c1 + -> Split + Output: 3, NULL::integer, t_part_1_prt_p2.c2, t_part_1_prt_p2.c3, t_part_1_prt_p2.c4, t_part_1_prt_p2.c1, t_part_1_prt_p2.ctid, t_part_1_prt_p2.gp_segment_id, DMLAction + -> Seq Scan on public.t_part_1_prt_p2 + Output: 3, NULL::integer, t_part_1_prt_p2.c2, t_part_1_prt_p2.c3, t_part_1_prt_p2.c4, t_part_1_prt_p2.c1, t_part_1_prt_p2.ctid, t_part_1_prt_p2.gp_segment_id + Optimizer: Postgres query optimizer + Settings: optimizer=off +(17 rows) + +UPDATE t_part SET c1 = 3; +-- Ensure that split update on leaf partition does not throw constraint error +-- (executor does not choose the wrong partition at insert stage of update). +INSERT INTO t_part VALUES (1, 0, 2, 0); +UPDATE t_part_1_prt_p2 SET c1 = 2 WHERE c4 = 0; +SELECT count(*) FROM t_part_1_prt_p2; + count +------- + 3 +(1 row) + +-- For ORCA the partition selection error should not occur. +EXPLAIN (COSTS OFF, VERBOSE) DELETE FROM t_part_1_prt_p2; + QUERY PLAN +------------------------------------------ + Delete on public.t_part_1_prt_p2 + -> Seq Scan on public.t_part_1_prt_p2 + Output: ctid, gp_segment_id + Optimizer: Postgres query optimizer + Settings: optimizer=off +(5 rows) + +DELETE FROM t_part_1_prt_p2; +DROP TABLE t_part; +DROP TABLE t_new_part; +-- Test split update execution of a plan from legacy planner in case +-- when parent relation has several partitions, and one of them has +-- physically-different attribute structure from parent's due to +-- dropped columns. Ensure that split update does not reconstruct tuple +-- of correct (without dropped attributes) partition. +CREATE TABLE t_part (c1 int, c2 int, c3 int, c4 int) DISTRIBUTED BY (c1) +PARTITION BY LIST (c3) (PARTITION p0 VALUES (0)); +-- Legacy planner UPDATE's plan consists of several subplans (partitioned +-- relations are considered in inheritance planner), and their execution +-- order varies depending on the order the partitions have been added. +-- Therefore, we add each partition through EXCHANGE to get UPDATE's +-- test plan in a form such that the t_new_part0 update comes first, and the +-- t_new_part2 comes second. This aspect is crucial because executor's +-- partitions related logic depended on that fact, what led to the +-- issue this test demonstrates. +-- This paritition is not compatible with the parent due to dropped columns +CREATE TABLE t_new_part0 (c1 int, c11 int, c2 int, c3 int, c4 int); +ALTER TABLE t_new_part0 drop c11; +ALTER TABLE t_part EXCHANGE PARTITION FOR (0) WITH TABLE t_new_part0; +-- This partition is compatible with the parent. +ALTER TABLE t_part ADD PARTITION p2 VALUES (2); +CREATE TABLE t_new_part2 (c1 int, c2 int, c3 int, c4 int); +ALTER TABLE t_part EXCHANGE PARTITION FOR (2) WITH TABLE t_new_part2; +-- Insert into correct partition, and perform split update on root, +-- that will execute split update on each subplan in case of inheritance +-- plan (legacy planner). Ensure that split update does not reconstruct the +-- tuple at insert. +INSERT INTO t_part VALUES (1, 4, 2, 2); +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part SET c1 = 3; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Update on public.t_part_1_prt_p0 + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: 3, NULL::integer, t_part_1_prt_p0.c2, t_part_1_prt_p0.c3, t_part_1_prt_p0.c4, t_part_1_prt_p0.c1, t_part_1_prt_p0.ctid, t_part_1_prt_p0.gp_segment_id, (DMLAction) + Hash Key: "outer".c1 + -> Split + Output: 3, NULL::integer, t_part_1_prt_p0.c2, t_part_1_prt_p0.c3, t_part_1_prt_p0.c4, t_part_1_prt_p0.c1, t_part_1_prt_p0.ctid, t_part_1_prt_p0.gp_segment_id, DMLAction + -> Seq Scan on public.t_part_1_prt_p0 + Output: 3, NULL::integer, t_part_1_prt_p0.c2, t_part_1_prt_p0.c3, t_part_1_prt_p0.c4, t_part_1_prt_p0.c1, t_part_1_prt_p0.ctid, t_part_1_prt_p0.gp_segment_id + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: 3, t_part_1_prt_p2.c2, t_part_1_prt_p2.c3, t_part_1_prt_p2.c4, t_part_1_prt_p2.c1, t_part_1_prt_p2.ctid, t_part_1_prt_p2.gp_segment_id, (DMLAction) + Hash Key: "outer".c1 + -> Split + Output: 3, t_part_1_prt_p2.c2, t_part_1_prt_p2.c3, t_part_1_prt_p2.c4, t_part_1_prt_p2.c1, t_part_1_prt_p2.ctid, t_part_1_prt_p2.gp_segment_id, DMLAction + -> Seq Scan on public.t_part_1_prt_p2 + Output: 3, t_part_1_prt_p2.c2, t_part_1_prt_p2.c3, t_part_1_prt_p2.c4, t_part_1_prt_p2.c1, t_part_1_prt_p2.ctid, t_part_1_prt_p2.gp_segment_id + Optimizer: Postgres query optimizer + Settings: optimizer=off +(17 rows) + +UPDATE t_part SET c1 = 3; +SELECT * FROM t_part_1_prt_p2; + c1 | c2 | c3 | c4 +----+----+----+---- + 3 | 4 | 2 | 2 +(1 row) + +DROP TABLE t_part; +DROP TABLE t_new_part0; +DROP TABLE t_new_part2; diff --git a/src/test/regress/expected/qp_dropped_cols_optimizer.out b/src/test/regress/expected/qp_dropped_cols_optimizer.out index 6ee68fc7f4a6..ff72f0edf2b6 100644 --- a/src/test/regress/expected/qp_dropped_cols_optimizer.out +++ b/src/test/regress/expected/qp_dropped_cols_optimizer.out @@ -16273,3 +16273,329 @@ DELETE FROM dist_key_dropped_pt WHERE b=6; -- the tables, or the pg_upgrade test fails. set client_min_messages='warning'; drop schema qp_dropped_cols cascade; +-- Test modifying DML on leaf partition when parent has dropped columns and +-- the partition has not. Ensure that DML commands pass without execution +-- errors and produce valid results. +RESET search_path; +-- start_ignore +DROP TABLE IF EXISTS t_part_dropped; +-- end_ignore +CREATE TABLE t_part_dropped (c1 int, c2 int, c3 int, c4 int) DISTRIBUTED BY (c1) +PARTITION BY LIST (c3) (PARTITION p0 VALUES (0)); +ALTER TABLE t_part_dropped DROP c2; +ALTER TABLE t_part_dropped ADD PARTITION p2 VALUES (2); +-- Partition selection should go smoothly when inserting into leaf +-- partition with different attribute structure. +EXPLAIN (COSTS OFF, VERBOSE) INSERT INTO t_part_dropped VALUES (1, 2, 4); + QUERY PLAN +-------------------------------------------------- + Insert + Output: c1, NULL::integer, c3, c4, ColRef_0004 + -> Result + Output: c1, c3, c4, 1 + -> Result + Output: c1, c3, c4 + -> Result + Output: 1, 2, 4 + -> Result + Output: true + Optimizer: Pivotal Optimizer (GPORCA) +(11 rows) + +INSERT INTO t_part_dropped VALUES (1, 2, 4); +EXPLAIN (COSTS OFF, VERBOSE) INSERT INTO t_part_dropped_1_prt_p2 VALUES (1, 2, 4); + QUERY PLAN +---------------------------------------- + Insert + Output: c1, c3, c4, ColRef_0004 + -> Result + Output: c1, c3, c4, 1 + -> Result + Output: c1, c3, c4 + -> Result + Output: 1, 2, 4 + -> Result + Output: true + Optimizer: Pivotal Optimizer (GPORCA) +(11 rows) + +INSERT INTO t_part_dropped_1_prt_p2 VALUES (1, 2, 4); +INSERT INTO t_part_dropped_1_prt_p2 VALUES (1, 2, 0); +-- Ensure that split update on leaf and root partitions does not +-- throw partition selection error in both planners. +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part_dropped_1_prt_p2 SET c1 = 2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Update + Output: t_part_dropped_1_prt_p2.c1, t_part_dropped_1_prt_p2.c3, t_part_dropped_1_prt_p2.c4, (DMLAction), t_part_dropped_1_prt_p2.ctid + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: t_part_dropped_1_prt_p2.c1, t_part_dropped_1_prt_p2.c3, t_part_dropped_1_prt_p2.c4, t_part_dropped_1_prt_p2.ctid, t_part_dropped_1_prt_p2.gp_segment_id, (DMLAction) + Hash Key: t_part_dropped_1_prt_p2.c1 + -> Split + Output: t_part_dropped_1_prt_p2.c1, t_part_dropped_1_prt_p2.c3, t_part_dropped_1_prt_p2.c4, t_part_dropped_1_prt_p2.ctid, t_part_dropped_1_prt_p2.gp_segment_id, DMLAction + -> Result + Output: t_part_dropped_1_prt_p2.c1, t_part_dropped_1_prt_p2.c3, t_part_dropped_1_prt_p2.c4, 2, t_part_dropped_1_prt_p2.ctid, t_part_dropped_1_prt_p2.gp_segment_id + -> Seq Scan on public.t_part_dropped_1_prt_p2 + Output: t_part_dropped_1_prt_p2.c1, t_part_dropped_1_prt_p2.c3, t_part_dropped_1_prt_p2.c4, t_part_dropped_1_prt_p2.ctid, t_part_dropped_1_prt_p2.gp_segment_id + Optimizer: Pivotal Optimizer (GPORCA) +(12 rows) + +UPDATE t_part_dropped_1_prt_p2 SET c1 = 2; +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part_dropped SET c1 = 3; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------- + Update + Output: t_part_dropped.c1, NULL::integer, t_part_dropped.c3, t_part_dropped.c4, (DMLAction), t_part_dropped.ctid + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: t_part_dropped.c1, t_part_dropped.c3, t_part_dropped.c4, t_part_dropped.ctid, t_part_dropped.gp_segment_id, (DMLAction) + Hash Key: t_part_dropped.c1 + -> Split + Output: t_part_dropped.c1, t_part_dropped.c3, t_part_dropped.c4, t_part_dropped.ctid, t_part_dropped.gp_segment_id, DMLAction + -> Result + Output: t_part_dropped.c1, t_part_dropped.c3, t_part_dropped.c4, 3, t_part_dropped.ctid, t_part_dropped.gp_segment_id + -> Sequence + Output: t_part_dropped.c1, t_part_dropped.c3, t_part_dropped.c4, t_part_dropped.ctid, t_part_dropped.gp_segment_id + -> Partition Selector for t_part_dropped (dynamic scan id: 1) + Partitions selected: 2 (out of 2) + -> Dynamic Seq Scan on public.t_part_dropped (dynamic scan id: 1) + Output: t_part_dropped.c1, t_part_dropped.c3, t_part_dropped.c4, t_part_dropped.ctid, t_part_dropped.gp_segment_id + Optimizer: Pivotal Optimizer (GPORCA) +(16 rows) + +UPDATE t_part_dropped SET c1 = 3; +-- Ensure that split update on leaf partition does not throw constraint error +-- (executor does not choose the wrong partition at insert stage of update). +INSERT INTO t_part_dropped VALUES (1, 2, 0); +UPDATE t_part_dropped_1_prt_p2 SET c1 = 2 WHERE c4 = 0; +SELECT count(*) FROM t_part_dropped_1_prt_p2; + count +------- + 4 +(1 row) + +-- Split update on root relation should choose the correct partition +-- at insert (executor doesn't put the tuple to wrong partition for legacy +-- planner case). +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part_dropped SET c1 = 3 WHERE c4 = 0; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------- + Update + Output: t_part_dropped.c1, NULL::integer, t_part_dropped.c3, t_part_dropped.c4, (DMLAction), t_part_dropped.ctid + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: t_part_dropped.c1, t_part_dropped.c3, t_part_dropped.c4, t_part_dropped.ctid, t_part_dropped.gp_segment_id, (DMLAction) + Hash Key: t_part_dropped.c1 + -> Split + Output: t_part_dropped.c1, t_part_dropped.c3, t_part_dropped.c4, t_part_dropped.ctid, t_part_dropped.gp_segment_id, DMLAction + -> Result + Output: t_part_dropped.c1, t_part_dropped.c3, t_part_dropped.c4, 3, t_part_dropped.ctid, t_part_dropped.gp_segment_id + -> Sequence + Output: t_part_dropped.c1, t_part_dropped.c3, t_part_dropped.c4, t_part_dropped.ctid, t_part_dropped.gp_segment_id + -> Partition Selector for t_part_dropped (dynamic scan id: 1) + Partitions selected: 2 (out of 2) + -> Dynamic Seq Scan on public.t_part_dropped (dynamic scan id: 1) + Output: t_part_dropped.c1, t_part_dropped.c3, t_part_dropped.c4, t_part_dropped.ctid, t_part_dropped.gp_segment_id + Filter: (t_part_dropped.c4 = 0) + Optimizer: Pivotal Optimizer (GPORCA) +(17 rows) + +UPDATE t_part_dropped SET c1 = 3 WHERE c4 = 0; +SELECT count(*) FROM t_part_dropped_1_prt_p2; + count +------- + 4 +(1 row) + +SELECT * FROM t_part_dropped_1_prt_p0; + c1 | c3 | c4 +----+----+---- +(0 rows) + +-- For ORCA the partition selection error should not occur. +EXPLAIN (COSTS OFF, VERBOSE) DELETE FROM t_part_dropped_1_prt_p2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Delete + Output: t_part_dropped_1_prt_p2.c1, t_part_dropped_1_prt_p2.c3, t_part_dropped_1_prt_p2.c4, "outer".ColRef_0010, t_part_dropped_1_prt_p2.ctid + -> Result + Output: t_part_dropped_1_prt_p2.c1, t_part_dropped_1_prt_p2.c3, t_part_dropped_1_prt_p2.c4, t_part_dropped_1_prt_p2.ctid, t_part_dropped_1_prt_p2.gp_segment_id, 0 + -> Seq Scan on public.t_part_dropped_1_prt_p2 + Output: t_part_dropped_1_prt_p2.c1, t_part_dropped_1_prt_p2.c3, t_part_dropped_1_prt_p2.c4, t_part_dropped_1_prt_p2.ctid, t_part_dropped_1_prt_p2.gp_segment_id + Optimizer: Pivotal Optimizer (GPORCA) +(7 rows) + +DELETE FROM t_part_dropped_1_prt_p2; +DROP TABLE t_part_dropped; +-- Test modifying DML on leaf partition after it was exchanged with a relation, +-- that contained dropped columns. Ensure that DML commands pass without +-- execution errors and produce valid results. +-- start_ignore +DROP TABLE IF EXISTS t_part; +DROP TABLE IF EXISTS t_new_part; +-- end_ignore +CREATE TABLE t_part (c1 int, c2 int, c3 int, c4 int) DISTRIBUTED BY (c1) +PARTITION BY LIST (c3) (PARTITION p0 VALUES (0)); +ALTER TABLE t_part ADD PARTITION p2 VALUES (2); +CREATE TABLE t_new_part (c1 int, c11 int, c2 int, c3 int, c4 int); +ALTER TABLE t_new_part DROP c11; +ALTER TABLE t_part EXCHANGE PARTITION FOR (2) WITH TABLE t_new_part; +EXPLAIN (COSTS OFF, VERBOSE) INSERT INTO t_part VALUES (1, 5, 2, 5); + QUERY PLAN +---------------------------------------- + Insert + Output: c1, c2, c3, c4, ColRef_0005 + -> Result + Output: c1, c2, c3, c4, 1 + -> Result + Output: c1, c2, c3, c4 + -> Result + Output: 1, 5, 2, 5 + -> Result + Output: true + Optimizer: Pivotal Optimizer (GPORCA) +(11 rows) + +INSERT INTO t_part VALUES (1, 5, 2, 5); +EXPLAIN (COSTS OFF, VERBOSE) INSERT INTO t_part_1_prt_p2 VALUES (1, 5, 2, 5); + QUERY PLAN +------------------------------------------------------ + Insert + Output: c1, NULL::integer, c2, c3, c4, ColRef_0005 + -> Result + Output: c1, c2, c3, c4, 1 + -> Result + Output: c1, c2, c3, c4 + -> Result + Output: 1, 5, 2, 5 + -> Result + Output: true + Optimizer: Pivotal Optimizer (GPORCA) +(11 rows) + +INSERT INTO t_part_1_prt_p2 VALUES (1, 5, 2, 5); +-- Ensure that split update on leaf and root partitions does not +-- throw partition selection error in both planners. +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part_1_prt_p2 SET c1 = 2; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Update + Output: t_part_1_prt_p2.c1, NULL::integer, t_part_1_prt_p2.c2, t_part_1_prt_p2.c3, t_part_1_prt_p2.c4, (DMLAction), t_part_1_prt_p2.ctid + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: t_part_1_prt_p2.c1, t_part_1_prt_p2.c2, t_part_1_prt_p2.c3, t_part_1_prt_p2.c4, t_part_1_prt_p2.ctid, t_part_1_prt_p2.gp_segment_id, (DMLAction) + Hash Key: t_part_1_prt_p2.c1 + -> Split + Output: t_part_1_prt_p2.c1, t_part_1_prt_p2.c2, t_part_1_prt_p2.c3, t_part_1_prt_p2.c4, t_part_1_prt_p2.ctid, t_part_1_prt_p2.gp_segment_id, DMLAction + -> Result + Output: t_part_1_prt_p2.c1, t_part_1_prt_p2.c2, t_part_1_prt_p2.c3, t_part_1_prt_p2.c4, 2, t_part_1_prt_p2.ctid, t_part_1_prt_p2.gp_segment_id + -> Seq Scan on public.t_part_1_prt_p2 + Output: t_part_1_prt_p2.c1, t_part_1_prt_p2.c2, t_part_1_prt_p2.c3, t_part_1_prt_p2.c4, t_part_1_prt_p2.ctid, t_part_1_prt_p2.gp_segment_id + Optimizer: Pivotal Optimizer (GPORCA) +(12 rows) + +UPDATE t_part_1_prt_p2 SET c1 = 2; +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part SET c1 = 3; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------- + Update + Output: t_part.c1, t_part.c2, t_part.c3, t_part.c4, (DMLAction), t_part.ctid + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: t_part.c1, t_part.c2, t_part.c3, t_part.c4, t_part.ctid, t_part.gp_segment_id, (DMLAction) + Hash Key: t_part.c1 + -> Split + Output: t_part.c1, t_part.c2, t_part.c3, t_part.c4, t_part.ctid, t_part.gp_segment_id, DMLAction + -> Result + Output: t_part.c1, t_part.c2, t_part.c3, t_part.c4, 3, t_part.ctid, t_part.gp_segment_id + -> Sequence + Output: t_part.c1, t_part.c2, t_part.c3, t_part.c4, t_part.ctid, t_part.gp_segment_id + -> Partition Selector for t_part (dynamic scan id: 1) + Partitions selected: 2 (out of 2) + -> Dynamic Seq Scan on public.t_part (dynamic scan id: 1) + Output: t_part.c1, t_part.c2, t_part.c3, t_part.c4, t_part.ctid, t_part.gp_segment_id + Optimizer: Pivotal Optimizer (GPORCA) +(16 rows) + +UPDATE t_part SET c1 = 3; +-- Ensure that split update on leaf partition does not throw constraint error +-- (executor does not choose the wrong partition at insert stage of update). +INSERT INTO t_part VALUES (1, 0, 2, 0); +UPDATE t_part_1_prt_p2 SET c1 = 2 WHERE c4 = 0; +SELECT count(*) FROM t_part_1_prt_p2; + count +------- + 3 +(1 row) + +-- For ORCA the partition selection error should not occur. +EXPLAIN (COSTS OFF, VERBOSE) DELETE FROM t_part_1_prt_p2; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------- + Delete + Output: t_part_1_prt_p2.c1, NULL::integer, t_part_1_prt_p2.c2, t_part_1_prt_p2.c3, t_part_1_prt_p2.c4, "outer".ColRef_0011, t_part_1_prt_p2.ctid + -> Result + Output: t_part_1_prt_p2.c1, t_part_1_prt_p2.c2, t_part_1_prt_p2.c3, t_part_1_prt_p2.c4, t_part_1_prt_p2.ctid, t_part_1_prt_p2.gp_segment_id, 0 + -> Seq Scan on public.t_part_1_prt_p2 + Output: t_part_1_prt_p2.c1, t_part_1_prt_p2.c2, t_part_1_prt_p2.c3, t_part_1_prt_p2.c4, t_part_1_prt_p2.ctid, t_part_1_prt_p2.gp_segment_id + Optimizer: Pivotal Optimizer (GPORCA) +(7 rows) + +DELETE FROM t_part_1_prt_p2; +DROP TABLE t_part; +DROP TABLE t_new_part; +-- Test split update execution of a plan from legacy planner in case +-- when parent relation has several partitions, and one of them has +-- physically-different attribute structure from parent's due to +-- dropped columns. Ensure that split update does not reconstruct tuple +-- of correct (without dropped attributes) partition. +CREATE TABLE t_part (c1 int, c2 int, c3 int, c4 int) DISTRIBUTED BY (c1) +PARTITION BY LIST (c3) (PARTITION p0 VALUES (0)); +-- Legacy planner UPDATE's plan consists of several subplans (partitioned +-- relations are considered in inheritance planner), and their execution +-- order varies depending on the order the partitions have been added. +-- Therefore, we add each partition through EXCHANGE to get UPDATE's +-- test plan in a form such that the t_new_part0 update comes first, and the +-- t_new_part2 comes second. This aspect is crucial because executor's +-- partitions related logic depended on that fact, what led to the +-- issue this test demonstrates. +-- This paritition is not compatible with the parent due to dropped columns +CREATE TABLE t_new_part0 (c1 int, c11 int, c2 int, c3 int, c4 int); +ALTER TABLE t_new_part0 drop c11; +ALTER TABLE t_part EXCHANGE PARTITION FOR (0) WITH TABLE t_new_part0; +-- This partition is compatible with the parent. +ALTER TABLE t_part ADD PARTITION p2 VALUES (2); +CREATE TABLE t_new_part2 (c1 int, c2 int, c3 int, c4 int); +ALTER TABLE t_part EXCHANGE PARTITION FOR (2) WITH TABLE t_new_part2; +-- Insert into correct partition, and perform split update on root, +-- that will execute split update on each subplan in case of inheritance +-- plan (legacy planner). Ensure that split update does not reconstruct the +-- tuple at insert. +INSERT INTO t_part VALUES (1, 4, 2, 2); +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part SET c1 = 3; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------- + Update + Output: t_part.c1, t_part.c2, t_part.c3, t_part.c4, (DMLAction), t_part.ctid + -> Redistribute Motion 3:3 (slice1; segments: 3) + Output: t_part.c1, t_part.c2, t_part.c3, t_part.c4, t_part.ctid, t_part.gp_segment_id, (DMLAction) + Hash Key: t_part.c1 + -> Split + Output: t_part.c1, t_part.c2, t_part.c3, t_part.c4, t_part.ctid, t_part.gp_segment_id, DMLAction + -> Result + Output: t_part.c1, t_part.c2, t_part.c3, t_part.c4, 3, t_part.ctid, t_part.gp_segment_id + -> Sequence + Output: t_part.c1, t_part.c2, t_part.c3, t_part.c4, t_part.ctid, t_part.gp_segment_id + -> Partition Selector for t_part (dynamic scan id: 1) + Partitions selected: 2 (out of 2) + -> Dynamic Seq Scan on public.t_part (dynamic scan id: 1) + Output: t_part.c1, t_part.c2, t_part.c3, t_part.c4, t_part.ctid, t_part.gp_segment_id + Optimizer: Pivotal Optimizer (GPORCA) +(16 rows) + +UPDATE t_part SET c1 = 3; +SELECT * FROM t_part_1_prt_p2; + c1 | c2 | c3 | c4 +----+----+----+---- + 3 | 4 | 2 | 2 +(1 row) + +DROP TABLE t_part; +DROP TABLE t_new_part0; +DROP TABLE t_new_part2; diff --git a/src/test/regress/sql/qp_dropped_cols.sql b/src/test/regress/sql/qp_dropped_cols.sql index 6823131c0993..5a93bd5aec72 100644 --- a/src/test/regress/sql/qp_dropped_cols.sql +++ b/src/test/regress/sql/qp_dropped_cols.sql @@ -8687,3 +8687,140 @@ DELETE FROM dist_key_dropped_pt WHERE b=6; -- the tables, or the pg_upgrade test fails. set client_min_messages='warning'; drop schema qp_dropped_cols cascade; + +-- Test modifying DML on leaf partition when parent has dropped columns and +-- the partition has not. Ensure that DML commands pass without execution +-- errors and produce valid results. +RESET search_path; +-- start_ignore +DROP TABLE IF EXISTS t_part_dropped; +-- end_ignore +CREATE TABLE t_part_dropped (c1 int, c2 int, c3 int, c4 int) DISTRIBUTED BY (c1) +PARTITION BY LIST (c3) (PARTITION p0 VALUES (0)); + +ALTER TABLE t_part_dropped DROP c2; +ALTER TABLE t_part_dropped ADD PARTITION p2 VALUES (2); + +-- Partition selection should go smoothly when inserting into leaf +-- partition with different attribute structure. +EXPLAIN (COSTS OFF, VERBOSE) INSERT INTO t_part_dropped VALUES (1, 2, 4); +INSERT INTO t_part_dropped VALUES (1, 2, 4); + +EXPLAIN (COSTS OFF, VERBOSE) INSERT INTO t_part_dropped_1_prt_p2 VALUES (1, 2, 4); +INSERT INTO t_part_dropped_1_prt_p2 VALUES (1, 2, 4); + +INSERT INTO t_part_dropped_1_prt_p2 VALUES (1, 2, 0); + +-- Ensure that split update on leaf and root partitions does not +-- throw partition selection error in both planners. +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part_dropped_1_prt_p2 SET c1 = 2; +UPDATE t_part_dropped_1_prt_p2 SET c1 = 2; + +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part_dropped SET c1 = 3; +UPDATE t_part_dropped SET c1 = 3; + +-- Ensure that split update on leaf partition does not throw constraint error +-- (executor does not choose the wrong partition at insert stage of update). +INSERT INTO t_part_dropped VALUES (1, 2, 0); +UPDATE t_part_dropped_1_prt_p2 SET c1 = 2 WHERE c4 = 0; + +SELECT count(*) FROM t_part_dropped_1_prt_p2; + +-- Split update on root relation should choose the correct partition +-- at insert (executor doesn't put the tuple to wrong partition for legacy +-- planner case). +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part_dropped SET c1 = 3 WHERE c4 = 0; +UPDATE t_part_dropped SET c1 = 3 WHERE c4 = 0; + +SELECT count(*) FROM t_part_dropped_1_prt_p2; +SELECT * FROM t_part_dropped_1_prt_p0; + +-- For ORCA the partition selection error should not occur. +EXPLAIN (COSTS OFF, VERBOSE) DELETE FROM t_part_dropped_1_prt_p2; +DELETE FROM t_part_dropped_1_prt_p2; + +DROP TABLE t_part_dropped; + +-- Test modifying DML on leaf partition after it was exchanged with a relation, +-- that contained dropped columns. Ensure that DML commands pass without +-- execution errors and produce valid results. +-- start_ignore +DROP TABLE IF EXISTS t_part; +DROP TABLE IF EXISTS t_new_part; +-- end_ignore +CREATE TABLE t_part (c1 int, c2 int, c3 int, c4 int) DISTRIBUTED BY (c1) +PARTITION BY LIST (c3) (PARTITION p0 VALUES (0)); + +ALTER TABLE t_part ADD PARTITION p2 VALUES (2); +CREATE TABLE t_new_part (c1 int, c11 int, c2 int, c3 int, c4 int); +ALTER TABLE t_new_part DROP c11; +ALTER TABLE t_part EXCHANGE PARTITION FOR (2) WITH TABLE t_new_part; + +EXPLAIN (COSTS OFF, VERBOSE) INSERT INTO t_part VALUES (1, 5, 2, 5); +INSERT INTO t_part VALUES (1, 5, 2, 5); + +EXPLAIN (COSTS OFF, VERBOSE) INSERT INTO t_part_1_prt_p2 VALUES (1, 5, 2, 5); +INSERT INTO t_part_1_prt_p2 VALUES (1, 5, 2, 5); + +-- Ensure that split update on leaf and root partitions does not +-- throw partition selection error in both planners. +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part_1_prt_p2 SET c1 = 2; +UPDATE t_part_1_prt_p2 SET c1 = 2; + +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part SET c1 = 3; +UPDATE t_part SET c1 = 3; + +-- Ensure that split update on leaf partition does not throw constraint error +-- (executor does not choose the wrong partition at insert stage of update). +INSERT INTO t_part VALUES (1, 0, 2, 0); +UPDATE t_part_1_prt_p2 SET c1 = 2 WHERE c4 = 0; + +SELECT count(*) FROM t_part_1_prt_p2; + +-- For ORCA the partition selection error should not occur. +EXPLAIN (COSTS OFF, VERBOSE) DELETE FROM t_part_1_prt_p2; +DELETE FROM t_part_1_prt_p2; + +DROP TABLE t_part; +DROP TABLE t_new_part; + +-- Test split update execution of a plan from legacy planner in case +-- when parent relation has several partitions, and one of them has +-- physically-different attribute structure from parent's due to +-- dropped columns. Ensure that split update does not reconstruct tuple +-- of correct (without dropped attributes) partition. +CREATE TABLE t_part (c1 int, c2 int, c3 int, c4 int) DISTRIBUTED BY (c1) +PARTITION BY LIST (c3) (PARTITION p0 VALUES (0)); + +-- Legacy planner UPDATE's plan consists of several subplans (partitioned +-- relations are considered in inheritance planner), and their execution +-- order varies depending on the order the partitions have been added. +-- Therefore, we add each partition through EXCHANGE to get UPDATE's +-- test plan in a form such that the t_new_part0 update comes first, and the +-- t_new_part2 comes second. This aspect is crucial because executor's +-- partitions related logic depended on that fact, what led to the +-- issue this test demonstrates. +-- This paritition is not compatible with the parent due to dropped columns +CREATE TABLE t_new_part0 (c1 int, c11 int, c2 int, c3 int, c4 int); +ALTER TABLE t_new_part0 drop c11; +ALTER TABLE t_part EXCHANGE PARTITION FOR (0) WITH TABLE t_new_part0; + +-- This partition is compatible with the parent. +ALTER TABLE t_part ADD PARTITION p2 VALUES (2); +CREATE TABLE t_new_part2 (c1 int, c2 int, c3 int, c4 int); +ALTER TABLE t_part EXCHANGE PARTITION FOR (2) WITH TABLE t_new_part2; + +-- Insert into correct partition, and perform split update on root, +-- that will execute split update on each subplan in case of inheritance +-- plan (legacy planner). Ensure that split update does not reconstruct the +-- tuple at insert. +INSERT INTO t_part VALUES (1, 4, 2, 2); + +EXPLAIN (COSTS OFF, VERBOSE) UPDATE t_part SET c1 = 3; +UPDATE t_part SET c1 = 3; + +SELECT * FROM t_part_1_prt_p2; + +DROP TABLE t_part; +DROP TABLE t_new_part0; +DROP TABLE t_new_part2; From 885a55d3b6f4979180979b79cf03260be864954e Mon Sep 17 00:00:00 2001 From: Huansong Fu Date: Wed, 20 Sep 2023 12:12:10 -0700 Subject: [PATCH 101/106] Fix a flakiness with test gp_check_files This should have be done with #16428, but we need to disable autovacuum when running the gp_check_files regress test. Otherwise we might see errors like: ``` @@ -53,12 +53,8 @@ -- check orphaned files, note that this forces a checkpoint internally. set client_min_messages = ERROR; select gp_segment_id, filename from run_orphaned_files_view(); - gp_segment_id | filename ----------------+---------- - 1 | 987654 - 1 | 987654.3 -(2 rows) - +ERROR: failed to retrieve orphaned files after 10 minutes of retries. +CONTEXT: PL/pgSQL function run_orphaned_files_view() line 19 at RAISE reset client_min_messages; ``` In the log we have: ``` 2023-09-20 15:33:00.766420 UTC,"gpadmin","regression",p148081,th-589358976,"[local]",,2023-09-20 15:31:39 UTC,0,con19,cmd65,seg-1,,dx38585,,sx1,"LOG","00000","attempt failed 17 with error: There is a client session running on one or more segment. Aborting...",,,,,"PL/pgSQL function run_orphaned_files_view() line 11 at RAISE","select gp_segment_id, filename from run_orphaned_files_view();",0,,"pl_exec.c",3857, ``` It is possible that some background jobs have created some backends that we think we should avoid when taking the gp_check_orphaned_files view. As we have decided to make the view conservative (disallowing any backends that could cause false positive of the view results), fixing the test is what we need. In the test we have a safeguard which is to loop 10 minutes and take the view repeatedly (function run_orphaned_files_view()). But it didn't solve the issue because it saw only one snapshot of pg_stat_activity in the entire execution of the function. Now explicitly call pg_stat_clear_snapshot() to solve that issue. Co-authored-by: Ashwin Agrawal aashwin@vmware.com --- src/test/regress/input/gp_check_files.source | 2 ++ src/test/regress/output/gp_check_files.source | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/test/regress/input/gp_check_files.source b/src/test/regress/input/gp_check_files.source index 47d078bef04a..255c923d1dd9 100644 --- a/src/test/regress/input/gp_check_files.source +++ b/src/test/regress/input/gp_check_files.source @@ -33,6 +33,8 @@ BEGIN RAISE LOG 'attempt failed % with error: %', retry_counter + 1, SQLERRM; -- When an exception occurs, wait for 5 seconds and then retry PERFORM pg_sleep(5); + -- Refresh to get the latest pg_stat_activity + PERFORM pg_stat_clear_snapshot(); retry_counter := retry_counter + 1; END; END LOOP; diff --git a/src/test/regress/output/gp_check_files.source b/src/test/regress/output/gp_check_files.source index 105601328931..cff402742105 100644 --- a/src/test/regress/output/gp_check_files.source +++ b/src/test/regress/output/gp_check_files.source @@ -30,6 +30,8 @@ BEGIN RAISE LOG 'attempt failed % with error: %', retry_counter + 1, SQLERRM; -- When an exception occurs, wait for 5 seconds and then retry PERFORM pg_sleep(5); + -- Refresh to get the latest pg_stat_activity + PERFORM pg_stat_clear_snapshot(); retry_counter := retry_counter + 1; END; END LOOP; From 8d8007e98220d1dc9a5c331d6b862aa57b4495a7 Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Mon, 4 Dec 2023 22:53:35 +0200 Subject: [PATCH 102/106] Revert "Remove `getaddrinfo` in `SendDummyPacket()` to address malloc deadlock" This commit causes issues on mackines with disabled ipv6 (e.g. in our CI environment). We should research it more attentively. This reverts commit 7f3c91f7bc6b4fc417b6a7775d5b67b85e2fec4e. --- src/backend/cdb/motion/ic_udpifc.c | 201 +++++------ src/backend/cdb/motion/test/Makefile | 9 - .../cdb/motion/test/cdbsenddummypacket_test.c | 331 ------------------ 3 files changed, 96 insertions(+), 445 deletions(-) delete mode 100644 src/backend/cdb/motion/test/Makefile delete mode 100644 src/backend/cdb/motion/test/cdbsenddummypacket_test.c diff --git a/src/backend/cdb/motion/ic_udpifc.c b/src/backend/cdb/motion/ic_udpifc.c index 07892938875f..9cce2e356a6b 100644 --- a/src/backend/cdb/motion/ic_udpifc.c +++ b/src/backend/cdb/motion/ic_udpifc.c @@ -632,9 +632,6 @@ typedef struct ICStatistics /* Statistics for UDP interconnect. */ static ICStatistics ic_statistics; -/* Cached sockaddr of the listening udp socket */ -static struct sockaddr_storage udp_dummy_packet_sockaddr; - /*========================================================================= * STATIC FUNCTIONS declarations */ @@ -656,16 +653,10 @@ static void setRxThreadError(int eno); static void resetRxThreadError(void); static void SendDummyPacket(void); -static void ConvertToIPv4MappedAddr(struct sockaddr_storage *sockaddr, socklen_t *o_len); -#if defined(__darwin__) -#define s6_addr32 __u6_addr.__u6_addr32 -static void ConvertIPv6WildcardToLoopback(struct sockaddr_storage* dest); -#endif static void getSockAddr(struct sockaddr_storage *peer, socklen_t *peer_len, const char *listenerAddr, int listenerPort); static void setXmitSocketOptions(int txfd); static uint32 setSocketBufferSize(int fd, int type, int expectedSize, int leastSize); -static void setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, - int *txFamily, struct sockaddr_storage *listenerSockaddr); +static void setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFamily); static ChunkTransportStateEntry *startOutgoingUDPConnections(ChunkTransportState *transportStates, Slice *sendSlice, int *pOutgoingCount); @@ -1166,7 +1157,7 @@ resetRxThreadError() * Setup udp listening socket. */ static void -setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFamily, struct sockaddr_storage *listenerSockaddr) +setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFamily) { int errnoSave; int fd = -1; @@ -1333,13 +1324,6 @@ setupUDPListeningSocket(int *listenerSocketFd, uint16 *listenerPort, int *txFami else *listenerPort = ntohs(((struct sockaddr_in *) &our_addr)->sin_port); - /* - * cache the successful sockaddr of the listening socket, so - * we can use this information to connect to the listening socket. - */ - if (listenerSockaddr != NULL) - memcpy(listenerSockaddr, &our_addr, sizeof(struct sockaddr_storage)); - setXmitSocketOptions(fd); return; @@ -1456,8 +1440,8 @@ InitMotionUDPIFC(int *listenerSocketFd, uint16 *listenerPort) /* * setup listening socket and sending socket for Interconnect. */ - setupUDPListeningSocket(listenerSocketFd, listenerPort, &txFamily, &udp_dummy_packet_sockaddr); - setupUDPListeningSocket(&ICSenderSocket, &ICSenderPort, &ICSenderFamily, NULL); + setupUDPListeningSocket(listenerSocketFd, listenerPort, &txFamily); + setupUDPListeningSocket(&ICSenderSocket, &ICSenderPort, &ICSenderFamily); /* Initialize receive control data. */ resetMainThreadWaiting(&rx_control_info.mainWaitingState); @@ -1559,8 +1543,6 @@ CleanupMotionUDPIFC(void) ICSenderPort = 0; ICSenderFamily = 0; - memset(&udp_dummy_packet_sockaddr, 0, sizeof(udp_dummy_packet_sockaddr)); - #ifdef USE_ASSERT_CHECKING /* @@ -2870,8 +2852,30 @@ setupOutgoingUDPConnection(ChunkTransportState *transportStates, ChunkTransportS */ if (pEntry->txfd_family == AF_INET6) { + struct sockaddr_storage temp; + const struct sockaddr_in *in = (const struct sockaddr_in *) &conn->peer; + struct sockaddr_in6 *in6_new = (struct sockaddr_in6 *) &temp; + + memset(&temp, 0, sizeof(temp)); + elog(DEBUG1, "We are inet6, remote is inet. Converting to v4 mapped address."); - ConvertToIPv4MappedAddr(&conn->peer, &conn->peer_len); + + /* Construct a V4-to-6 mapped address. */ + temp.ss_family = AF_INET6; + in6_new->sin6_family = AF_INET6; + in6_new->sin6_port = in->sin_port; + in6_new->sin6_flowinfo = 0; + + memset(&in6_new->sin6_addr, '\0', sizeof(in6_new->sin6_addr)); + /* in6_new->sin6_addr.s6_addr16[5] = 0xffff; */ + ((uint16 *) &in6_new->sin6_addr)[5] = 0xffff; + /* in6_new->sin6_addr.s6_addr32[3] = in->sin_addr.s_addr; */ + memcpy(((char *) &in6_new->sin6_addr) + 12, &(in->sin_addr), 4); + in6_new->sin6_scope_id = 0; + + /* copy it back */ + memcpy(&conn->peer, &temp, sizeof(struct sockaddr_in6)); + conn->peer_len = sizeof(struct sockaddr_in6); } else { @@ -6896,122 +6900,109 @@ WaitInterconnectQuitUDPIFC(void) } /* - * If the socket was created AF_INET6, but the address we want to - * send to is IPv4 (AF_INET), we need to change the address - * format. On Linux, this is not necessary: glibc automatically - * handles this. But on MAC OSX and Solaris, we need to convert - * the IPv4 address to IPv4-mapped IPv6 address in AF_INET6 format. - * - * The comment above relies on getaddrinfo() via function getSockAddr to get - * the correct V4-mapped address. We need to be careful here as we need to - * ensure that the platform we are using is POSIX 1003-2001 compliant. - * Just to be on the safeside, we'll be keeping this function for - * now to be used for all platforms and not rely on POSIX. - * - * Since this can be called in a signal handler, we avoid the use of - * async-signal unsafe functions such as memset/memcpy + * Send a dummy packet to interconnect thread to exit poll() immediately */ static void -ConvertToIPv4MappedAddr(struct sockaddr_storage *sockaddr, socklen_t *o_len) +SendDummyPacket(void) { - const struct sockaddr_in *in = (const struct sockaddr_in *) sockaddr; - struct sockaddr_storage temp = {0}; - struct sockaddr_in6 *in6_new = (struct sockaddr_in6 *) &temp; - - /* Construct a IPv4-to-IPv6 mapped address. */ - temp.ss_family = AF_INET6; - in6_new->sin6_family = AF_INET6; - in6_new->sin6_port = in->sin_port; - in6_new->sin6_flowinfo = 0; - - ((uint16 *) &in6_new->sin6_addr)[5] = 0xffff; + int sockfd = -1; + int ret; + struct addrinfo *addrs = NULL; + struct addrinfo *rp; + struct addrinfo hint; + uint16 udp_listener; + char port_str[32] = {0}; + char *dummy_pkt = "stop it"; + int counter; - in6_new->sin6_addr.s6_addr32[3] = in->sin_addr.s_addr; - in6_new->sin6_scope_id = 0; + /* + * Get address info from interconnect udp listener port + */ + udp_listener = (Gp_listener_port >> 16) & 0x0ffff; + snprintf(port_str, sizeof(port_str), "%d", udp_listener); - /* copy it back */ - *sockaddr = temp; - *o_len = sizeof(struct sockaddr_in6); -} + MemSet(&hint, 0, sizeof(hint)); + hint.ai_socktype = SOCK_DGRAM; + hint.ai_family = AF_UNSPEC; /* Allow for IPv4 or IPv6 */ -#if defined(__darwin__) -/* macos does not accept :: as the destination, we will need to covert this to the IPv6 loopback */ -static void -ConvertIPv6WildcardToLoopback(struct sockaddr_storage* dest) -{ - char address[INET6_ADDRSTRLEN]; - /* we want to terminate our own process, so this should be local */ - const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *) &udp_dummy_packet_sockaddr; - inet_ntop(AF_INET6, &in6->sin6_addr, address, sizeof(address)); - if (strcmp("::", address) == 0) - ((struct sockaddr_in6 *)dest)->sin6_addr = in6addr_loopback; -} + /* Never do name resolution */ +#ifdef AI_NUMERICSERV + hint.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV; +#else + hint.ai_flags = AI_NUMERICHOST; #endif -/* - * Send a dummy packet to interconnect thread to exit poll() immediately - */ -static void -SendDummyPacket(void) -{ - int ret; - char *dummy_pkt = "stop it"; - int counter; - struct sockaddr_storage dest; - socklen_t dest_len; - - Assert(udp_dummy_packet_sockaddr.ss_family == AF_INET || udp_dummy_packet_sockaddr.ss_family == AF_INET6); - Assert(ICSenderFamily == AF_INET || ICSenderFamily == AF_INET6); - - dest = udp_dummy_packet_sockaddr; - dest_len = (ICSenderFamily == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); + ret = pg_getaddrinfo_all(interconnect_address, port_str, &hint, &addrs); + if (ret || !addrs) + { + elog(LOG, "send dummy packet failed, pg_getaddrinfo_all(): %m"); + goto send_error; + } -#if defined(__darwin__) - if (ICSenderFamily == AF_INET6) + for (rp = addrs; rp != NULL; rp = rp->ai_next) { -#if defined(__darwin__) - if (udp_dummy_packet_sockaddr.ss_family == AF_INET6) - ConvertIPv6WildcardToLoopback(&dest); -#endif - if (udp_dummy_packet_sockaddr.ss_family == AF_INET) - ConvertToIPv4MappedAddr(&dest, &dest_len); + /* Create socket according to pg_getaddrinfo_all() */ + sockfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); + if (sockfd < 0) + continue; + + if (!pg_set_noblock(sockfd)) + { + if (sockfd >= 0) + { + closesocket(sockfd); + sockfd = -1; + } + continue; + } + break; } -#endif - if (ICSenderFamily == AF_INET && udp_dummy_packet_sockaddr.ss_family == AF_INET6) + if (rp == NULL) { - /* the size of AF_INET6 is bigger than the side of IPv4, so - * converting from IPv6 to IPv4 may potentially not work. */ - ereport(LOG, (errmsg("sending dummy packet failed: cannot send from AF_INET to receiving on AF_INET6"))); - return; + elog(LOG, "send dummy packet failed, create socket failed: %m"); + goto send_error; } /* - * Send a dummy package to the interconnect listener, try 10 times. - * We don't want to close the socket at the end of this function, since - * the socket will eventually close during the motion layer cleanup. + * Send a dummy package to the interconnect listener, try 10 times */ + counter = 0; while (counter < 10) { counter++; - ret = sendto(ICSenderSocket, dummy_pkt, strlen(dummy_pkt), 0, (struct sockaddr *) &dest, dest_len); + ret = sendto(sockfd, dummy_pkt, strlen(dummy_pkt), 0, rp->ai_addr, rp->ai_addrlen); if (ret < 0) { if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) continue; else { - ereport(LOG, (errmsg("send dummy packet failed, sendto failed: %m"))); - return; + elog(LOG, "send dummy packet failed, sendto failed: %m"); + goto send_error; } } break; } if (counter >= 10) - ereport(LOG, (errmsg("send dummy packet failed, sendto failed with 10 times: %m"))); + { + elog(LOG, "send dummy packet failed, sendto failed: %m"); + goto send_error; + } + pg_freeaddrinfo_all(hint.ai_family, addrs); + closesocket(sockfd); + return; + +send_error: + + if (addrs) + pg_freeaddrinfo_all(hint.ai_family, addrs); + if (sockfd != -1) + closesocket(sockfd); + return; } uint32 diff --git a/src/backend/cdb/motion/test/Makefile b/src/backend/cdb/motion/test/Makefile deleted file mode 100644 index 878fde1fdd1e..000000000000 --- a/src/backend/cdb/motion/test/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -subdir=src/backend/cdb/motion -top_builddir=../../../../.. -include $(top_builddir)/src/Makefile.global - -TARGETS=cdbsenddummypacket - -include $(top_builddir)/src/backend/mock.mk - -cdbsenddummypacket.t: EXCL_OBJS += src/backend/cdb/motion/ic_udpifc.o diff --git a/src/backend/cdb/motion/test/cdbsenddummypacket_test.c b/src/backend/cdb/motion/test/cdbsenddummypacket_test.c deleted file mode 100644 index 9cde98fc5efd..000000000000 --- a/src/backend/cdb/motion/test/cdbsenddummypacket_test.c +++ /dev/null @@ -1,331 +0,0 @@ -#include -#include -#include -#include -#include "cmockery.h" - -#include "../../motion/ic_udpifc.c" - -bool break_loop = false; - -/* - * PROTOTYPES - */ - -extern ssize_t __real_sendto(int sockfd, const void *buf, size_t len, int flags, - const struct sockaddr *dest_addr, socklen_t addrlen); -int __wrap_errcode(int sqlerrcode); -int __wrap_errdetail(const char *fmt,...); -int __wrap_errmsg(const char *fmt,...); -ssize_t __wrap_sendto(int sockfd, const void *buf, size_t len, int flags, - const struct sockaddr *dest_addr, socklen_t addrlen); -void __wrap_elog_finish(int elevel, const char *fmt,...); -void __wrap_elog_start(const char *filename, int lineno, const char *funcname); -void __wrap_errfinish(int dummy __attribute__((unused)),...); -void __wrap_errstart(int elevel, const char *filename, int lineno, const char *funcname, const char *domain); -void __wrap_write_log(const char *fmt,...); - -/* - * WRAPPERS - */ - -int __wrap_errcode(int sqlerrcode) {return 0;} -int __wrap_errdetail(const char *fmt,...) { return 0; } -int __wrap_errmsg(const char *fmt,...) { return 0; } -void __wrap_elog_start(const char *filename, int lineno, const char *funcname) {} -void __wrap_errfinish(int dummy __attribute__((unused)),...) {} -void __wrap_errstart(int elevel, const char *filename, int lineno, const char *funcname, const char *domain){} - -void __wrap_write_log(const char *fmt,...) -{ - /* check if we actually receive the message that sends the error */ - if (strcmp("Interconnect error: short conn receive (\%d)", fmt) == 0) - break_loop = true; -} - -void __wrap_elog_finish(int elevel, const char *fmt,...) -{ - assert_true(elevel <= LOG); -} - -ssize_t __wrap_sendto(int sockfd, const void *buf, size_t len, int flags, const struct sockaddr *dest_addr, socklen_t addrlen) -{ - assert_true(sockfd != PGINVALID_SOCKET); -#if defined(__darwin__) - /* check to see if we converted the wildcard value to something routeable */ - if (udp_dummy_packet_sockaddr.ss_family == AF_INET6) - { - const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *) dest_addr; - char address[INET6_ADDRSTRLEN]; - inet_ntop(AF_INET6, &in6->sin6_addr, address, sizeof(address)); - /* '::' and '::1' should always be '::1' */ - assert_true(strcmp("::1", address) == 0); - } -#endif - - return __real_sendto(sockfd, buf, len, flags, dest_addr, addrlen); -} - -/* - * HELPER FUNCTIONS - */ - -static void wait_for_receiver(bool should_fail) -{ - int counter = 0; - /* break_loop should be reset at the beginning of each test - * The while loop will end early once __wrap_write_log is called; - * this should happen when the receiver polls the message that - * SendDummyPacket sends. - */ - while(!break_loop) - { - /* we are sleeping for a generous amount of time; we should never - * need this much time. There is something wrong if it takes this long. - * - * expect to fail if the communication is invalid, i.e,. IPv4 to IPv6 - */ - if (counter > 2) - break; - sleep(1); - counter++; - } - - if (should_fail) - assert_true(counter > 2); - else - assert_true(counter < 2); -} - -static void -start_receiver() -{ - pthread_attr_t t_atts; - sigset_t pthread_sigs; - int pthread_err; - - pthread_attr_init(&t_atts); - pthread_attr_setstacksize(&t_atts, Max(PTHREAD_STACK_MIN, (128 * 1024))); - ic_set_pthread_sigmasks(&pthread_sigs); - pthread_err = pthread_create(&ic_control_info.threadHandle, &t_atts, rxThreadFunc, NULL); - ic_reset_pthread_sigmasks(&pthread_sigs); - - pthread_attr_destroy(&t_atts); - if (pthread_err != 0) - { - ic_control_info.threadCreated = false; - printf("failed to create thread"); - fail(); - } - - ic_control_info.threadCreated = true; -} - -static sa_family_t -create_sender_socket(sa_family_t af) -{ - int sockfd = socket(af, - SOCK_DGRAM, - 0); - if (sockfd < 0) - { - printf("send dummy packet failed, create socket failed: %m\n"); - fail(); - return PGINVALID_SOCKET; - } - - if (!pg_set_noblock(sockfd)) - { - if (sockfd >= 0) - { - closesocket(sockfd); - } - printf("send dummy packet failed, setting socket with noblock failed: %m\n"); - fail(); - return PGINVALID_SOCKET; - } - - return sockfd; -} - -/* - * START UNIT TEST - */ - -static void -test_send_dummy_packet_ipv4_to_ipv4(void **state) -{ - break_loop = false; - int listenerSocketFd; - uint16 listenerPort; - int txFamily; - - interconnect_address = "0.0.0.0"; - setupUDPListeningSocket(&listenerSocketFd, &listenerPort, &txFamily, &udp_dummy_packet_sockaddr); - - Gp_listener_port = (listenerPort << 16); - UDP_listenerFd = listenerSocketFd; - - ICSenderSocket = create_sender_socket(AF_INET); - ICSenderFamily = AF_INET; - - SendDummyPacket(); - - const struct sockaddr_in *in = (const struct sockaddr_in *) &udp_dummy_packet_sockaddr; - assert_true(txFamily == AF_INET); - assert_true(in->sin_family == AF_INET); - assert_true(listenerPort == ntohs(in->sin_port)); - assert_true(strcmp("0.0.0.0", inet_ntoa(in->sin_addr)) == 0); - - wait_for_receiver(false); -} - -/* Sending from IPv4 to receiving on IPv6 is currently not supported. - * The size of AF_INET6 is bigger than the side of IPv4, so converting from - * IPv6 to IPv4 may potentially not work. - */ -static void -test_send_dummy_packet_ipv4_to_ipv6_should_fail(void **state) -{ - break_loop = false; - int listenerSocketFd; - uint16 listenerPort; - int txFamily; - - interconnect_address = "::"; - setupUDPListeningSocket(&listenerSocketFd, &listenerPort, &txFamily, &udp_dummy_packet_sockaddr); - - Gp_listener_port = (listenerPort << 16); - UDP_listenerFd = listenerSocketFd; - - ICSenderSocket = create_sender_socket(AF_INET); - ICSenderFamily = AF_INET; - - SendDummyPacket(); - - const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *) &udp_dummy_packet_sockaddr; - assert_true(txFamily == AF_INET6); - assert_true(in6->sin6_family == AF_INET6); - assert_true(listenerPort == ntohs(in6->sin6_port)); - - wait_for_receiver(true); -} - -static void -test_send_dummy_packet_ipv6_to_ipv6(void **state) -{ - break_loop = false; - int listenerSocketFd; - uint16 listenerPort; - int txFamily; - - interconnect_address = "::1"; - setupUDPListeningSocket(&listenerSocketFd, &listenerPort, &txFamily, &udp_dummy_packet_sockaddr); - - Gp_listener_port = (listenerPort << 16); - UDP_listenerFd = listenerSocketFd; - - ICSenderSocket = create_sender_socket(AF_INET6); - ICSenderFamily = AF_INET6; - - SendDummyPacket(); - - const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *) &udp_dummy_packet_sockaddr; - assert_true(txFamily == AF_INET6); - assert_true(in6->sin6_family == AF_INET6); - assert_true(listenerPort == ntohs(in6->sin6_port)); - - wait_for_receiver(false); -} - -static void -test_send_dummy_packet_ipv6_to_ipv4(void **state) -{ - break_loop = false; - int listenerSocketFd; - uint16 listenerPort; - int txFamily; - - interconnect_address = "0.0.0.0"; - setupUDPListeningSocket(&listenerSocketFd, &listenerPort, &txFamily, &udp_dummy_packet_sockaddr); - - Gp_listener_port = (listenerPort << 16); - UDP_listenerFd = listenerSocketFd; - - ICSenderSocket = create_sender_socket(AF_INET6); - ICSenderFamily = AF_INET6; - - SendDummyPacket(); - - const struct sockaddr_in *in = (const struct sockaddr_in *) &udp_dummy_packet_sockaddr; - assert_true(txFamily == AF_INET); - assert_true(in->sin_family == AF_INET); - assert_true(listenerPort == ntohs(in->sin_port)); - assert_true(strcmp("0.0.0.0", inet_ntoa(in->sin_addr)) == 0); - - wait_for_receiver(false); -} - - -static void -test_send_dummy_packet_ipv6_to_ipv6_wildcard(void **state) -{ - break_loop = false; - int listenerSocketFd; - uint16 listenerPort; - int txFamily; - - interconnect_address = "::"; - setupUDPListeningSocket(&listenerSocketFd, &listenerPort, &txFamily, &udp_dummy_packet_sockaddr); - - Gp_listener_port = (listenerPort << 16); - UDP_listenerFd = listenerSocketFd; - - ICSenderSocket = create_sender_socket(AF_INET6); - ICSenderFamily = AF_INET6; - - SendDummyPacket(); - - const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *) &udp_dummy_packet_sockaddr; - assert_true(txFamily == AF_INET6); - assert_true(in6->sin6_family == AF_INET6); - assert_true(listenerPort == ntohs(in6->sin6_port)); - - wait_for_receiver(false); -} - -int -main(int argc, char* argv[]) -{ - cmockery_parse_arguments(argc, argv); - - int is_ipv6_supported = true; - int sockfd = socket(AF_INET6, SOCK_DGRAM, 0); - if (sockfd < 0 && errno == EAFNOSUPPORT) - is_ipv6_supported = false; - - log_min_messages = DEBUG1; - - start_receiver(); - - if (is_ipv6_supported) - { - const UnitTest tests[] = { - unit_test(test_send_dummy_packet_ipv4_to_ipv4), - unit_test(test_send_dummy_packet_ipv4_to_ipv6_should_fail), - unit_test(test_send_dummy_packet_ipv6_to_ipv6), - unit_test(test_send_dummy_packet_ipv6_to_ipv4), - unit_test(test_send_dummy_packet_ipv6_to_ipv6_wildcard), - }; - return run_tests(tests); - } - else - { - printf("WARNING: IPv6 is not supported, skipping unittest\n"); - const UnitTest tests[] = { - unit_test(test_send_dummy_packet_ipv4_to_ipv4), - }; - return run_tests(tests); - } - return 0; -} From 6f87a842ca88b1c3e39639fbaa5ec2215a1d3b48 Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Wed, 6 Dec 2023 17:06:17 +0200 Subject: [PATCH 103/106] alter precedence in favor of ipv4 during resolving (#657) gpdb test suite requires enabled ipv6 since 7f3c91f. But there is at least one src/test/ssl test that expects resolving to ipv4 address and fails in the ipv6 environment with: ``` psql: FATAL: no pg_hba.conf entry for host "2001:db8:1::242:ac11:2", user "ssltestuser", database "postgres", SSL on pg_regress: cannot read the result (using postmaster on 9f17c91f4c76, port 6000) ``` so we at least temporary try to change precedence of name resolution in favor of ipv4 --- arenadata/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arenadata/Dockerfile b/arenadata/Dockerfile index ee0fb40d6a8c..eab478296971 100644 --- a/arenadata/Dockerfile +++ b/arenadata/Dockerfile @@ -57,7 +57,8 @@ RUN yum -y install centos-release-scl && \ echo -e 'source /opt/rh/devtoolset-7/enable' >> /opt/gcc_env.sh && \ echo -e '#!/bin/sh' >> /etc/profile.d/jdk_home.sh && \ echo -e 'export JAVA_HOME=/etc/alternatives/java_sdk' >> /etc/profile.d/jdk_home.sh && \ - echo -e 'export PATH=$JAVA_HOME/bin:$PATH' >> /etc/profile.d/jdk_home.sh + echo -e 'export PATH=$JAVA_HOME/bin:$PATH' >> /etc/profile.d/jdk_home.sh && \ + echo -e 'precedence ::ffff:0:0/96 100' >> /etc/gai.conf RUN rpm -i $sigar && rpm -i $sigar_headers From 6cac355c7cfc4684018b210697cec37b0d7dd021 Mon Sep 17 00:00:00 2001 From: Vyacheslav Kompan Date: Mon, 11 Dec 2023 08:39:58 +0300 Subject: [PATCH 104/106] Fix deadlock by shared SegmentGeneral CTE & planning failure by sharing General CTE (#640) Depending on the usage of the shared CTE with General or SegmentGeneral subplan, the number of segments the CTE is executed on may be single for joins with Singleton nodes, and multiple for nodes with other types of locus. In the current implementation this may lead to deadlock if the CTE is used for both join targets: the Join with the Singleton node results in the Share Input Scan producer being executed with 1-gang, while the Join with the N-gang node creates Share Input Scan reader on multiple segments, so the plan execution hangs for the SegmentGeneral, and assertions fail inside shareinput_mutator_xslice_*() functions for the General. If we force execution of CTE on multiple segments, it will cause redundant motions in case of joining the CTE with another General or SegmentGeneral. At the moment of constructing and sharing the CTE we don't know the rest of the plan, so we can't predict the correct CTE locus. Because replicated tables are considered small, the most universal and optimal way to fix deadlock would be to inline CTE scans with General or SegmentGeneral locus. Inlining plan is implemented by making the CTE subplan once and then copying it instead of sharing. This solution, however, cannot push filters down, because this must be done before building the subplan, and the subplan locus can't be known before it's construction. --- src/backend/optimizer/path/allpaths.c | 37 ++++++++++-- src/include/nodes/relation.h | 10 ++-- src/test/regress/expected/with.out | 85 +++++++++++++++++++++++++++ src/test/regress/sql/with.sql | 39 ++++++++++++ 4 files changed, 163 insertions(+), 8 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index f43745159e48..ab8610d5fe6e 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -2089,7 +2089,7 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) * subplan will not be used by InitPlans, so that they can be shared * if this CTE is referenced multiple times (excluding in InitPlans). */ - if (cteplaninfo->shared_plan == NULL) + if (cteplaninfo->subplan == NULL) { PlannerConfig *config = CopyPlannerConfig(root->config); @@ -2110,15 +2110,44 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) subplan = subquery_planner(cteroot->glob, subquery, cteroot, cte->cterecursive, tuple_fraction, &subroot, config); - cteplaninfo->shared_plan = prepare_plan_for_sharing(cteroot, subplan); + /* + * Sharing General and SegmentGeneral subplan may lead to deadlock + * when executed with 1-gang and joined with N-gang. + */ + if (CdbPathLocus_IsGeneral(*subplan->flow) || + CdbPathLocus_IsSegmentGeneral(*subplan->flow)) + { + cteplaninfo->subplan = subplan; + } + else + { + cteplaninfo->subplan = prepare_plan_for_sharing(cteroot, subplan); + } + cteplaninfo->subroot = subroot; } /* * Create another ShareInputScan to reference the already-created - * subplan. + * subplan if not avoiding sharing for General and SegmentGeneral + * subplans. */ - subplan = share_prepared_plan(cteroot, cteplaninfo->shared_plan); + if (CdbPathLocus_IsGeneral(*cteplaninfo->subplan->flow) || + CdbPathLocus_IsSegmentGeneral(*cteplaninfo->subplan->flow)) + { + /* + * If we are not sharing and subplan was created just now, use it. + * Otherwise, make a copy of it to avoid construction of DAG + * instead of a tree. + */ + if (subplan == NULL) + subplan = (Plan *) copyObject(cteplaninfo->subplan); + } + else + { + subplan = share_prepared_plan(cteroot, cteplaninfo->subplan); + } + subroot = cteplaninfo->subroot; } diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index b609b3406c25..d5c33a47edad 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -322,11 +322,13 @@ typedef struct PlannerInfo typedef struct CtePlanInfo { /* - * A subplan, prepared for sharing among many CTE references by - * prepare_plan_for_sharing(), that implements the CTE. NULL if the - * CTE is not shared among references. + * A subplan, that implements the CTE and which is prepared either for + * sharing among many CTE references by prepare_plan_for_sharing() or + * for inlining in cases, when sharing produces invalid plans. NULL if + * the CTE is not shared among references (gp_cte_sharing is off), or to + * be planned or inlined and has not been planned yet. */ - Plan *shared_plan; + Plan *subplan; /* * The subroot corresponding to the subplan. diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out index 4f449766740e..2fe564e8cb96 100644 --- a/src/test/regress/expected/with.out +++ b/src/test/regress/expected/with.out @@ -2280,3 +2280,88 @@ WITH cte AS ( RESET optimizer; DROP TABLE d; +-- Test if sharing is disabled for a SegmentGeneral CTE to avoid deadlock if CTE is +-- executed with 1-gang and joined with n-gang +SET optimizer = off; +--start_ignore +DROP TABLE IF EXISTS d; +NOTICE: table "d" does not exist, skipping +DROP TABLE IF EXISTS r; +NOTICE: table "r" does not exist, skipping +--end_ignore +CREATE TABLE d (a int, b int) DISTRIBUTED BY (a); +INSERT INTO d VALUES ( 1, 2 ),( 2, 3 ); +CREATE TABLE r (a int, b int) DISTRIBUTED REPLICATED; +INSERT INTO r VALUES ( 1, 2 ),( 3, 4 ); +EXPLAIN (COSTS off) +WITH cte AS ( + SELECT count(*) a FROM r +) SELECT * FROM cte JOIN (SELECT * FROM d JOIN cte USING (a) LIMIT 1) d_join_cte USING (a); + QUERY PLAN +--------------------------------------------------------------- + Hash Join + Hash Cond: (d_join_cte.a = (count(*))) + -> Subquery Scan on d_join_cte + -> Limit + -> Gather Motion 3:1 (slice1; segments: 3) + -> Limit + -> Hash Join + Hash Cond: (d.a = (count(*))) + -> Seq Scan on d + -> Hash + -> Aggregate + -> Seq Scan on r + -> Hash + -> Gather Motion 1:1 (slice2; segments: 1) + -> Aggregate + -> Seq Scan on r r_1 + Optimizer: Postgres query optimizer +(17 rows) + +WITH cte AS ( + SELECT count(*) a FROM r +) SELECT * FROM cte JOIN (SELECT * FROM d JOIN cte USING (a) LIMIT 1) d_join_cte USING (a); + a | b +---+--- + 2 | 3 +(1 row) + +-- Test if sharing is disabled for a General CTE to avoid deadlock if CTE is +-- executed with coordinator gang and joined with n-gang +EXPLAIN (COSTS OFF) +WITH cte AS ( + SELECT count(*) a FROM (VALUES ( 1, 2 ),( 3, 4 )) v +) +SELECT * FROM cte JOIN (SELECT * FROM d JOIN cte USING (a) LIMIT 1) d_join_cte USING (a); + QUERY PLAN +--------------------------------------------------------------------------- + Hash Join + Hash Cond: (d_join_cte.a = (count(*))) + -> Subquery Scan on d_join_cte + -> Limit + -> Gather Motion 3:1 (slice1; segments: 3) + -> Limit + -> Hash Join + Hash Cond: (d.a = (count(*))) + -> Seq Scan on d + -> Hash + -> Aggregate + -> Values Scan on "*VALUES*" + -> Hash + -> Aggregate + -> Values Scan on "*VALUES*_1" + Optimizer: Postgres query optimizer +(16 rows) + +WITH cte AS ( + SELECT count(*) a FROM (VALUES ( 1, 2 ),( 3, 4 )) v +) +SELECT * FROM cte JOIN (SELECT * FROM d JOIN cte USING (a) LIMIT 1) d_join_cte USING (a); + a | b +---+--- + 2 | 3 +(1 row) + +RESET optimizer; +DROP TABLE d; +DROP TABLE r; diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql index 72b89a0f0c97..8d0b38522dbf 100644 --- a/src/test/regress/sql/with.sql +++ b/src/test/regress/sql/with.sql @@ -1083,3 +1083,42 @@ WITH cte AS ( RESET optimizer; DROP TABLE d; + +-- Test if sharing is disabled for a SegmentGeneral CTE to avoid deadlock if CTE is +-- executed with 1-gang and joined with n-gang +SET optimizer = off; +--start_ignore +DROP TABLE IF EXISTS d; +DROP TABLE IF EXISTS r; +--end_ignore + +CREATE TABLE d (a int, b int) DISTRIBUTED BY (a); +INSERT INTO d VALUES ( 1, 2 ),( 2, 3 ); +CREATE TABLE r (a int, b int) DISTRIBUTED REPLICATED; +INSERT INTO r VALUES ( 1, 2 ),( 3, 4 ); + +EXPLAIN (COSTS off) +WITH cte AS ( + SELECT count(*) a FROM r +) SELECT * FROM cte JOIN (SELECT * FROM d JOIN cte USING (a) LIMIT 1) d_join_cte USING (a); + +WITH cte AS ( + SELECT count(*) a FROM r +) SELECT * FROM cte JOIN (SELECT * FROM d JOIN cte USING (a) LIMIT 1) d_join_cte USING (a); + +-- Test if sharing is disabled for a General CTE to avoid deadlock if CTE is +-- executed with coordinator gang and joined with n-gang +EXPLAIN (COSTS OFF) +WITH cte AS ( + SELECT count(*) a FROM (VALUES ( 1, 2 ),( 3, 4 )) v +) +SELECT * FROM cte JOIN (SELECT * FROM d JOIN cte USING (a) LIMIT 1) d_join_cte USING (a); + +WITH cte AS ( + SELECT count(*) a FROM (VALUES ( 1, 2 ),( 3, 4 )) v +) +SELECT * FROM cte JOIN (SELECT * FROM d JOIN cte USING (a) LIMIT 1) d_join_cte USING (a); + +RESET optimizer; +DROP TABLE d; +DROP TABLE r; From 5894018a785378680584751210540e94d07a32e3 Mon Sep 17 00:00:00 2001 From: Viktor Kurilko Date: Fri, 15 Dec 2023 08:46:59 +0300 Subject: [PATCH 105/106] Fix use by ORCA of a newer index with HOT-chain in an older transaction. (#619) When heap tuple is updated by legacy planner and the updated tuple is placed at the same page (heap-only tuple, HOT), an update chain is created. It's a chain of updated tuples, in which each tuple's ctid points to the next tuple in the chain. HOT chains allow to store only one index entry, which points to the first tuple in the chain. And during Index Scan we pass through the chain, and the first tuple visible for the current transaction is taken (for more information, see src/backend/access/heap/README.HOT). If we create a second index on column that has been updated, it will store the ctid of the beginning of the existing HOT chain. If a repeatable read transaction started before the transaction in which the second index was created, then this index could be used in the query plan. As a result of the search for this index, a tuple could be found that does not meet the search condition (by a new value that is not visible to the transaction) In the case of the legacy planner, this problem is solved the following way: "To address this issue, regular (non-concurrent) CREATE INDEX makes the new index usable only by new transactions and transactions that don't have snapshots older than the CREATE INDEX command. This prevents queries that can see the inconsistent HOT chains from trying to use the new index and getting incorrect results. Queries that can see the index can only see the rows that were visible after the index was created, hence the HOT chains are consistent for them." But ORCA does not handle this case and can use an index with a broken HOT-chain. This patch resolves the issue for ORCA in the same way as legacy planner. During planning we ignore newly created indexes based on their xmin. Additionally, ORCA faced another related problem. Since ORCA has its own cache (MD Cache) and can cache a relation object without an index that cannot be used in the current snapshot (because MDCacheSetTransientState function returns true), we won't be able to use the index after the problematic snapshot changes. Therefore, we need to reset the cache after the snapshot changes in order to use index. This patch solves the problem in the following way: during index filtering, if we encounter an index that we cannot use, we save TransactionXmin in the mdcache_transaction_xmin variable. In the next queries, we check the saved xmin, and if it is valid and differs from the current one, we reset the cache. The create_index_hot test has also been changed. Now optimizer is turned off before the update. Since ORCA always uses Split Update, in which case HOT chains are not created and the problem is not reproduced. And that's why ORCA wasn't actually tested before. --- src/backend/gpopt/gpdbwrappers.cpp | 53 ++++++++++++++++++- .../translate/CTranslatorDXLToPlStmt.cpp | 2 + .../translate/CTranslatorRelcacheToDXL.cpp | 12 ++++- src/backend/gpopt/utils/COptTasks.cpp | 2 + src/backend/optimizer/plan/orca.c | 1 - src/include/gpopt/gpdbwrappers.h | 13 ++++- .../isolation/expected/create_index_hot.out | 3 +- .../isolation/specs/create_index_hot.spec | 3 ++ 8 files changed, 82 insertions(+), 7 deletions(-) diff --git a/src/backend/gpopt/gpdbwrappers.cpp b/src/backend/gpopt/gpdbwrappers.cpp index 8f3ed749997b..f09c652aa8a5 100644 --- a/src/backend/gpopt/gpdbwrappers.cpp +++ b/src/backend/gpopt/gpdbwrappers.cpp @@ -31,6 +31,7 @@ extern "C" { #include "catalog/pg_collation.h" #include "utils/memutils.h" +#include "utils/snapmgr.h" } #define GP_WRAP_START \ sigjmp_buf local_sigjmp_buf; \ @@ -2509,6 +2510,13 @@ static bool mdcache_invalidation_counter_registered = false; static int64 mdcache_invalidation_counter = 0; static int64 last_mdcache_invalidation_counter = 0; +// If we have cached a relation without an index, because that index cannot +// be used in the current snapshot (for more info see +// src/backend/access/heap/README.HOT), we save TransactionXmin. If +// TransactionXmin changes later, the cache will be reset and the relation will +// be reloaded with that index. +static TransactionId mdcache_transaction_xmin = InvalidTransactionId; + static void mdsyscache_invalidation_counter_callback(Datum arg, int cacheid, uint32 hashvalue) @@ -2590,7 +2598,8 @@ register_mdcache_invalidation_callbacks(void) (Datum) 0); } -// Has there been any catalog changes since last call? +// We reset the cache in case of a catalog change or if TransactionXmin changed +// from that we save in mdcache_transaction_xmin. bool gpdb::MDCacheNeedsReset(void) { @@ -2602,7 +2611,11 @@ gpdb::MDCacheNeedsReset(void) mdcache_invalidation_counter_registered = true; } if (last_mdcache_invalidation_counter == mdcache_invalidation_counter) - return false; + { + return TransactionIdIsValid(mdcache_transaction_xmin) && + !TransactionIdEquals(TransactionXmin, + mdcache_transaction_xmin); + } else { last_mdcache_invalidation_counter = mdcache_invalidation_counter; @@ -2614,6 +2627,42 @@ gpdb::MDCacheNeedsReset(void) return true; } +bool +gpdb::MDCacheSetTransientState(Relation index_rel) +{ + GP_WRAP_START; + { + bool result = + index_rel->rd_index->indcheckxmin && + !TransactionIdPrecedes( + HeapTupleHeaderGetXmin(index_rel->rd_indextuple->t_data), + TransactionXmin); + if (result) + mdcache_transaction_xmin = TransactionXmin; + return result; + } + GP_WRAP_END; + // ignore index if we can't check it visibility for some reason + return true; +} + +void +gpdb::MDCacheResetTransientState(void) +{ + mdcache_transaction_xmin = InvalidTransactionId; +} + +bool +gpdb::MDCacheInTransientState(void) +{ + GP_WRAP_START; + { + return TransactionIdIsValid(mdcache_transaction_xmin); + } + GP_WRAP_END; + return false; +} + // returns true if a query cancel is requested in GPDB bool gpdb::IsAbortRequested(void) diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp index 6d3ee370bcce..46d35e256c99 100644 --- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp +++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp @@ -308,6 +308,8 @@ CTranslatorDXLToPlStmt::GetPlannedStmtFromDXL(const CDXLNode *dxlnode, } } + planned_stmt->transientPlan = gpdb::MDCacheInTransientState(); + return planned_stmt; } diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp index 9e8f7acaff93..3ef6c8ba5d61 100644 --- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp @@ -308,7 +308,11 @@ CTranslatorRelcacheToDXL::RetrieveRelIndexInfoForPartTable(CMemoryPool *mp, GPOS_TRY { - if (IsIndexSupported(index_rel)) + // If the index is supported, but cannot yet be used, ignore it; but + // mark the plan we are generating and cache as transient. + // See src/backend/access/heap/README.HOT for discussion. + if (IsIndexSupported(index_rel) && + !gpdb::MDCacheSetTransientState(index_rel)) { CMDIdGPDB *mdid_index = GPOS_NEW(mp) CMDIdGPDB(IMDId::EmdidInd, index_oid); @@ -364,7 +368,11 @@ CTranslatorRelcacheToDXL::RetrieveRelIndexInfoForNonPartTable(CMemoryPool *mp, GPOS_TRY { - if (IsIndexSupported(index_rel)) + // If the index is supported, but cannot yet be used, ignore it; but + // mark the plan we are generating and cache as transient. + // See src/backend/access/heap/README.HOT for discussion. + if (IsIndexSupported(index_rel) && + !gpdb::MDCacheSetTransientState(index_rel)) { CMDIdGPDB *mdid_index = GPOS_NEW(mp) CMDIdGPDB(IMDId::EmdidInd, index_oid); diff --git a/src/backend/gpopt/utils/COptTasks.cpp b/src/backend/gpopt/utils/COptTasks.cpp index 2e4c060133fb..7b7564172d6c 100644 --- a/src/backend/gpopt/utils/COptTasks.cpp +++ b/src/backend/gpopt/utils/COptTasks.cpp @@ -498,11 +498,13 @@ COptTasks::OptimizeTask(void *ptr) { CMDCache::Init(); CMDCache::SetCacheQuota(optimizer_mdcache_size * 1024L); + gpdb::MDCacheResetTransientState(); } else if (reset_mdcache) { CMDCache::Reset(); CMDCache::SetCacheQuota(optimizer_mdcache_size * 1024L); + gpdb::MDCacheResetTransientState(); } else if (CMDCache::ULLGetCacheQuota() != (ULLONG) optimizer_mdcache_size * 1024L) diff --git a/src/backend/optimizer/plan/orca.c b/src/backend/optimizer/plan/orca.c index 057f192bc667..49cdcfe76805 100644 --- a/src/backend/optimizer/plan/orca.c +++ b/src/backend/optimizer/plan/orca.c @@ -241,7 +241,6 @@ optimize_query(Query *parse, ParamListInfo boundParams) result->relationOids = glob->relationOids; result->invalItems = glob->invalItems; result->oneoffPlan = glob->oneoffPlan; - result->transientPlan = glob->transientPlan; return result; } diff --git a/src/include/gpopt/gpdbwrappers.h b/src/include/gpopt/gpdbwrappers.h index 12ab3a7b3ac8..4f8fa6b24635 100644 --- a/src/include/gpopt/gpdbwrappers.h +++ b/src/include/gpopt/gpdbwrappers.h @@ -683,9 +683,20 @@ FaultInjectorType_e InjectFaultInOptTasks(const char *fault_name); gpos::ULONG CountLeafPartTables(Oid oidRelation); // Does the metadata cache need to be reset (because of a catalog -// table has been changed?) +// table has been changed or TransactionXmin changed from that we saved)? bool MDCacheNeedsReset(void); +// Check that the index is usable in the current snapshot and if not, save the +// xmin of the current snapshot. Returns true if the index is not usable and +// should be skipped. +bool MDCacheSetTransientState(Relation index_rel); + +// reset TransactionXmin value that we saved +void MDCacheResetTransientState(void); + +// returns true if cache is in transient state +bool MDCacheInTransientState(void); + // returns true if a query cancel is requested in GPDB bool IsAbortRequested(void); diff --git a/src/test/isolation/expected/create_index_hot.out b/src/test/isolation/expected/create_index_hot.out index 519318e1d9f0..fb37c9a989d7 100644 --- a/src/test/isolation/expected/create_index_hot.out +++ b/src/test/isolation/expected/create_index_hot.out @@ -1,6 +1,6 @@ Parsed test spec with 2 sessions -starting permutation: s2begin s2select s1update s1createindexonc s2select s2forceindexscan s2select +starting permutation: s2begin s2select s1optimizeroff s1update s1createindexonc s2select s2forceindexscan s2select step s2begin: BEGIN ISOLATION LEVEL SERIALIZABLE; step s2select: select '#' as expected, c from hot where c = '#' union all @@ -8,6 +8,7 @@ step s2select: select '#' as expected, c from hot where c = '#' expected c # # +step s1optimizeroff: set optimizer = off; step s1update: update hot set c = '$' where c = '#'; step s1createindexonc: create index idx_c on hot (c); step s2select: select '#' as expected, c from hot where c = '#' diff --git a/src/test/isolation/specs/create_index_hot.spec b/src/test/isolation/specs/create_index_hot.spec index bb80d8e3cdec..fe224c4917d1 100644 --- a/src/test/isolation/specs/create_index_hot.spec +++ b/src/test/isolation/specs/create_index_hot.spec @@ -23,7 +23,9 @@ teardown # Update a row, and create an index on the updated column. This produces # a broken HOT chain. +#FIXME do not turn off the optimizer when ORCA stops always using Split Update. session "s1" +step "s1optimizeroff" { set optimizer = off; } step "s1update" { update hot set c = '$' where c = '#'; } step "s1createindexonc" { create index idx_c on hot (c); } @@ -39,6 +41,7 @@ permutation "s2begin" "s2select" + "s1optimizeroff" "s1update" "s1createindexonc" From e73000417c345a3e6ec913c17fb9eb04d8e6f8ee Mon Sep 17 00:00:00 2001 From: Evgeniy Ratkov Date: Mon, 18 Dec 2023 08:24:55 +0300 Subject: [PATCH 106/106] arenadata_toolkit: add TOAST's size to adb_relation_storage_size (#654) This patch adds TOAST table's size to summary size of relation at C function adb_relation_storage_size, which is used at SQL functions adb_relation_storage_size and adb_relation_storage_size_on_segments. The code is based on calculate_toast_table_size from dbsize.c, but without calculating size of TOAST's indexes. New test adb_relation_storage_size_test was added to check calculating sizes of different tables (heap, ao, with TOAST or without) at SQL functions adb_relation_storage_size and adb_relation_storage_size_on_segments. --- gpcontrib/arenadata_toolkit/Makefile | 2 +- .../arenadata_toolkit/arenadata_toolkit.c | 21 ++++ .../adb_relation_storage_size_test.out | 95 +++++++++++++++++++ .../sql/adb_relation_storage_size_test.sql | 66 +++++++++++++ 4 files changed, 183 insertions(+), 1 deletion(-) create mode 100644 gpcontrib/arenadata_toolkit/expected/adb_relation_storage_size_test.out create mode 100644 gpcontrib/arenadata_toolkit/sql/adb_relation_storage_size_test.sql diff --git a/gpcontrib/arenadata_toolkit/Makefile b/gpcontrib/arenadata_toolkit/Makefile index 98698fd9f123..d5d74bddbdee 100644 --- a/gpcontrib/arenadata_toolkit/Makefile +++ b/gpcontrib/arenadata_toolkit/Makefile @@ -16,7 +16,7 @@ $(DATA_built): $(DATA) cat $(DATA) > $(DATA_built) REGRESS = arenadata_toolkit_test arenadata_toolkit_skew_test adb_get_relfilenodes_test \ - adb_collect_table_stats_test adb_vacuum_strategy_test + adb_collect_table_stats_test adb_vacuum_strategy_test adb_relation_storage_size_test REGRESS_OPTS += --init-file=$(top_srcdir)/src/test/regress/init_file ifdef USE_PGXS diff --git a/gpcontrib/arenadata_toolkit/arenadata_toolkit.c b/gpcontrib/arenadata_toolkit/arenadata_toolkit.c index db99c45de79c..3625d7add4e5 100644 --- a/gpcontrib/arenadata_toolkit/arenadata_toolkit.c +++ b/gpcontrib/arenadata_toolkit/arenadata_toolkit.c @@ -39,6 +39,7 @@ static int64 get_ao_storage_total_bytes(Relation rel, char *relpath); static bool calculate_ao_storage_perSegFile(const int segno, void *ctx); static void fill_relation_seg_path(char *buf, int bufLen, const char *relpath, int segNo); +static int64 calculate_toast_table_size(Oid toastrelid, ForkNumber forknum); /* * Structure used to accumulate the size of AO/CO relation from callback. @@ -85,6 +86,9 @@ adb_relation_storage_size(PG_FUNCTION_ARGS) size += get_size_from_segDBs(sql); } + if (OidIsValid(rel->rd_rel->reltoastrelid)) + size += calculate_toast_table_size(rel->rd_rel->reltoastrelid, forkNumber); + relation_close(rel, AccessShareLock); PG_RETURN_INT64(size); @@ -147,6 +151,23 @@ calculate_ao_storage_perSegFile(const int segno, void *ctx) return true; } +/* + * Calculate total on-disk size of a TOAST relation. + * Must not be applied to non-TOAST relations. + * + * The code is based on calculate_toast_table_size from dbsize.c, but without + * calculating size of toast's indexes. + */ +static int64 +calculate_toast_table_size(Oid toastrelid, ForkNumber forknum) +{ + Relation toastRel = relation_open(toastrelid, AccessShareLock); + int64 size = calculate_relation_size(toastRel, forknum); + + relation_close(toastRel, AccessShareLock); + return size; +} + /* * Function calculates the size of heap tables. * diff --git a/gpcontrib/arenadata_toolkit/expected/adb_relation_storage_size_test.out b/gpcontrib/arenadata_toolkit/expected/adb_relation_storage_size_test.out new file mode 100644 index 000000000000..a464250ab4f1 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/expected/adb_relation_storage_size_test.out @@ -0,0 +1,95 @@ +CREATE EXTENSION arenadata_toolkit; +CREATE TABLE heap_table_with_toast(a INT, b TEXT) +DISTRIBUTED BY (a); +CREATE TABLE heap_table_without_toast(a INT, b INT) +DISTRIBUTED BY (a); +CREATE TABLE ao_table_with_toast(a INT, b TEXT) +WITH (APPENDOPTIMIZED=true) +DISTRIBUTED BY (a); +CREATE TABLE ao_table_without_toast(a INT, b INT) +WITH (APPENDOPTIMIZED=true) +DISTRIBUTED BY (a); +-- Check that toast exists only for "with_toast" tables +SELECT relname, reltoastrelid != 0 with_toast +FROM pg_class +WHERE relname IN ('heap_table_with_toast', 'heap_table_without_toast', + 'ao_table_with_toast', 'ao_table_without_toast') +ORDER BY 1; + relname | with_toast +--------------------------+------------ + ao_table_with_toast | t + ao_table_without_toast | f + heap_table_with_toast | t + heap_table_without_toast | f +(4 rows) + +-- Insert initial data to tables +INSERT INTO heap_table_with_toast SELECT i, 'short_text' FROM generate_series(1,15) AS i; +INSERT INTO heap_table_without_toast SELECT i, i*10 FROM generate_series(1,15) AS i; +INSERT INTO ao_table_with_toast SELECT i, 'short_text' FROM generate_series(1,15) AS i; +INSERT INTO ao_table_without_toast SELECT i, i*10 FROM generate_series(1,15) AS i; +-- Check sizes on segments +SELECT relname, sizes.gp_segment_id, sizes.size +FROM pg_class, arenadata_toolkit.adb_relation_storage_size_on_segments(oid) sizes +WHERE relname IN ('heap_table_with_toast', 'heap_table_without_toast', + 'ao_table_with_toast', 'ao_table_without_toast') +ORDER BY 1, 2; + relname | gp_segment_id | size +--------------------------+---------------+------- + ao_table_with_toast | 0 | 168 + ao_table_with_toast | 1 | 112 + ao_table_with_toast | 2 | 216 + ao_table_without_toast | 0 | 128 + ao_table_without_toast | 1 | 88 + ao_table_without_toast | 2 | 160 + heap_table_with_toast | 0 | 32768 + heap_table_with_toast | 1 | 32768 + heap_table_with_toast | 2 | 32768 + heap_table_without_toast | 0 | 32768 + heap_table_without_toast | 1 | 32768 + heap_table_without_toast | 2 | 32768 +(12 rows) + +-- Add random large data to get non-zero toast table's size +UPDATE heap_table_with_toast SET b = ( + SELECT string_agg( chr(trunc(65+random()*26)::integer), '') + FROM generate_series(1,50000)) +WHERE a = 1; +UPDATE ao_table_with_toast SET b = ( + SELECT string_agg( chr(trunc(65+random()*26)::integer), '') + FROM generate_series(1,50000)) +WHERE a = 1; +SELECT relname, sizes.gp_segment_id, sizes.size +FROM pg_class, arenadata_toolkit.adb_relation_storage_size_on_segments(oid) sizes +WHERE relname IN ('heap_table_with_toast', 'ao_table_with_toast') +ORDER BY 1, 2; + relname | gp_segment_id | size +-----------------------+---------------+------- + ao_table_with_toast | 0 | 168 + ao_table_with_toast | 1 | 65704 + ao_table_with_toast | 2 | 216 + heap_table_with_toast | 0 | 32768 + heap_table_with_toast | 1 | 98304 + heap_table_with_toast | 2 | 32768 +(6 rows) + +-- Check summary size of tables +SELECT relname, adb_relation_storage_size size +FROM pg_class, arenadata_toolkit.adb_relation_storage_size(oid) +WHERE relname IN ('heap_table_with_toast', 'heap_table_without_toast', + 'ao_table_with_toast', 'ao_table_without_toast') +ORDER BY 1; + relname | size +--------------------------+-------- + ao_table_with_toast | 66088 + ao_table_without_toast | 376 + heap_table_with_toast | 163840 + heap_table_without_toast | 98304 +(4 rows) + +-- Cleanup +DROP TABLE heap_table_with_toast; +DROP TABLE heap_table_without_toast; +DROP TABLE ao_table_with_toast; +DROP TABLE ao_table_without_toast; +DROP EXTENSION arenadata_toolkit; diff --git a/gpcontrib/arenadata_toolkit/sql/adb_relation_storage_size_test.sql b/gpcontrib/arenadata_toolkit/sql/adb_relation_storage_size_test.sql new file mode 100644 index 000000000000..2a1d019a1b0d --- /dev/null +++ b/gpcontrib/arenadata_toolkit/sql/adb_relation_storage_size_test.sql @@ -0,0 +1,66 @@ +CREATE EXTENSION arenadata_toolkit; + +CREATE TABLE heap_table_with_toast(a INT, b TEXT) +DISTRIBUTED BY (a); + +CREATE TABLE heap_table_without_toast(a INT, b INT) +DISTRIBUTED BY (a); + +CREATE TABLE ao_table_with_toast(a INT, b TEXT) +WITH (APPENDOPTIMIZED=true) +DISTRIBUTED BY (a); + +CREATE TABLE ao_table_without_toast(a INT, b INT) +WITH (APPENDOPTIMIZED=true) +DISTRIBUTED BY (a); + +-- Check that toast exists only for "with_toast" tables +SELECT relname, reltoastrelid != 0 with_toast +FROM pg_class +WHERE relname IN ('heap_table_with_toast', 'heap_table_without_toast', + 'ao_table_with_toast', 'ao_table_without_toast') +ORDER BY 1; + +-- Insert initial data to tables +INSERT INTO heap_table_with_toast SELECT i, 'short_text' FROM generate_series(1,15) AS i; +INSERT INTO heap_table_without_toast SELECT i, i*10 FROM generate_series(1,15) AS i; +INSERT INTO ao_table_with_toast SELECT i, 'short_text' FROM generate_series(1,15) AS i; +INSERT INTO ao_table_without_toast SELECT i, i*10 FROM generate_series(1,15) AS i; + +-- Check sizes on segments +SELECT relname, sizes.gp_segment_id, sizes.size +FROM pg_class, arenadata_toolkit.adb_relation_storage_size_on_segments(oid) sizes +WHERE relname IN ('heap_table_with_toast', 'heap_table_without_toast', + 'ao_table_with_toast', 'ao_table_without_toast') +ORDER BY 1, 2; + +-- Add random large data to get non-zero toast table's size +UPDATE heap_table_with_toast SET b = ( + SELECT string_agg( chr(trunc(65+random()*26)::integer), '') + FROM generate_series(1,50000)) +WHERE a = 1; + +UPDATE ao_table_with_toast SET b = ( + SELECT string_agg( chr(trunc(65+random()*26)::integer), '') + FROM generate_series(1,50000)) +WHERE a = 1; + +SELECT relname, sizes.gp_segment_id, sizes.size +FROM pg_class, arenadata_toolkit.adb_relation_storage_size_on_segments(oid) sizes +WHERE relname IN ('heap_table_with_toast', 'ao_table_with_toast') +ORDER BY 1, 2; + +-- Check summary size of tables +SELECT relname, adb_relation_storage_size size +FROM pg_class, arenadata_toolkit.adb_relation_storage_size(oid) +WHERE relname IN ('heap_table_with_toast', 'heap_table_without_toast', + 'ao_table_with_toast', 'ao_table_without_toast') +ORDER BY 1; + +-- Cleanup +DROP TABLE heap_table_with_toast; +DROP TABLE heap_table_without_toast; +DROP TABLE ao_table_with_toast; +DROP TABLE ao_table_without_toast; + +DROP EXTENSION arenadata_toolkit;