Skip to content

Commit

Permalink
Fix workfile duplication in gp_toolkit.gp_workfile_entries (#1053)
Browse files Browse the repository at this point in the history
If there are multiple postgres processes working simultaneously on a single
segment for a single session, at least one of them producing spill files,
gp_toolkit.gp_workfile_entries contained duplicate entries because of
incorrect JOIN condition.

This patch adds an additional output column to the
gp_workfile_mgr_cache_entries C function, containing PID of the process that
created the workfile set. PID was originally (incorrectly) retrieved from
gp_stat_activity, and is now recieved together with the other attributes (such
as slice number). gp_toolkit extension is updated to account for this change.
test/isolation2/workfile_mgr_test also had to be updated to account for the
change. A new isolation test named workfile_gp_toolkit was also added.

Ticket: ADBDEV-6288
  • Loading branch information
silent-observer authored Oct 2, 2024
1 parent 7a305bd commit c9ba080
Show file tree
Hide file tree
Showing 10 changed files with 175 additions and 5 deletions.
2 changes: 1 addition & 1 deletion gpcontrib/gp_toolkit/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
EXTENSION = gp_toolkit
DATA = gp_toolkit--1.1--1.2.sql gp_toolkit--1.0--1.1.sql gp_toolkit--1.0.sql \
gp_toolkit--1.2--1.3.sql gp_toolkit--1.3.sql gp_toolkit--1.3--1.4.sql \
gp_toolkit--1.4--1.5.sql
gp_toolkit--1.4--1.5.sql gp_toolkit--1.5--1.6.sql
MODULE_big = gp_toolkit
ifeq ($(shell uname -s), Linux)
OBJS = resgroup.o gp_partition_maint.o
Expand Down
61 changes: 61 additions & 0 deletions gpcontrib/gp_toolkit/gp_toolkit--1.5--1.6.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/* gpcontrib/gp_toolkit/gp_toolkit--1.5--1.6.sql */

-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION gp_toolkit UPDATE TO '1.6" to load this file. \quit

--
-- Upgrade gp_toolkit.gp_workfile_entries to use pids directly from C function.
--

---------------------------------------------------------------------------------
-- @view:
-- gp_toolkit.gp_workfile_entries
--
-- @doc:
-- List of all the workfile sets currently present on disk
--
--------------------------------------------------------------------------------

CREATE OR REPLACE VIEW gp_toolkit.gp_workfile_entries AS
WITH all_entries AS (
SELECT C.*
FROM gp_toolkit.__gp_workfile_entries_f_on_coordinator() AS C (
segid int,
prefix text,
size bigint,
optype text,
slice int,
sessionid int,
commandid int,
numfiles int,
pid int
)
UNION ALL
SELECT C.*
FROM gp_toolkit.__gp_workfile_entries_f_on_segments() AS C (
segid int,
prefix text,
size bigint,
optype text,
slice int,
sessionid int,
commandid int,
numfiles int,
pid int
))
SELECT S.datname,
S.pid,
C.sessionid as sess_id,
C.commandid as command_cnt,
S.usename,
S.query,
C.segid,
C.slice,
C.optype,
C.size,
C.numfiles,
C.prefix
FROM all_entries C LEFT OUTER JOIN gp_stat_activity S
ON C.sessionid = S.sess_id and C.pid = S.pid and C.segid=S.gp_segment_id;

GRANT SELECT ON gp_toolkit.gp_workfile_entries TO public;
2 changes: 1 addition & 1 deletion gpcontrib/gp_toolkit/gp_toolkit.control
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# gp_toolkit extension

comment = 'various GPDB administrative views/functions'
default_version = '1.5'
default_version = '1.6'
schema = gp_toolkit
7 changes: 6 additions & 1 deletion src/backend/utils/workfile_manager/workfile_mgr.c
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,8 @@ workfile_mgr_create_set(const char *operator_name, const char *prefix, bool hold

LWLockRelease(WorkFileManagerLock);

SIMPLE_FAULT_INJECTOR("after_workfile_mgr_create_set");

return work_set;
}

Expand Down Expand Up @@ -639,6 +641,7 @@ workfile_mgr_create_set_internal(const char *operator_name, const char *prefix)
work_set->pinned = false;
work_set->compression_buf_total = 0;
work_set->num_files_compressed = 0;
work_set->pid = MyProcPid;

/* Track all workfile_sets created in current process */
if (!localCtl.initialized)
Expand Down Expand Up @@ -869,7 +872,7 @@ gp_workfile_mgr_cache_entries_internal(PG_FUNCTION_ARGS)
* The number and type of attributes have to match the definition of the
* view gp_workfile_mgr_cache_entries
*/
#define NUM_CACHE_ENTRIES_ELEM 8
#define NUM_CACHE_ENTRIES_ELEM 9
TupleDesc tupdesc = CreateTemplateTupleDesc(NUM_CACHE_ENTRIES_ELEM);

TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segid", INT4OID, -1, 0);
Expand All @@ -880,6 +883,7 @@ gp_workfile_mgr_cache_entries_internal(PG_FUNCTION_ARGS)
TupleDescInitEntry(tupdesc, (AttrNumber) 6, "sessionid", INT4OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 7, "commandid", INT4OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 8, "numfiles", INT4OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 9, "pid", INT4OID, -1, 0);

funcctx->tuple_desc = BlessTupleDesc(tupdesc);

Expand Down Expand Up @@ -915,6 +919,7 @@ gp_workfile_mgr_cache_entries_internal(PG_FUNCTION_ARGS)
values[5] = UInt32GetDatum(work_set->session_id);
values[6] = UInt32GetDatum(work_set->command_count);
values[7] = UInt32GetDatum(work_set->num_files);
values[8] = UInt32GetDatum(work_set->pid);

cxt->index++;

Expand Down
3 changes: 3 additions & 0 deletions src/include/utils/workfile_mgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ typedef struct workfile_set

/* Number of compressed work files */
uint32 num_files_compressed;

/* PID of the process creating the workfile set */
int pid;
} workfile_set;

/* Workfile Set operations */
Expand Down
65 changes: 65 additions & 0 deletions src/test/isolation2/expected/workfile_gp_toolkit.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
-- This test checks for correct output of gp_toolkit.gp_workfile_entries view
-- It is placed in integration tests because testing for this requires checking
-- for workfiles while a query is still running.

-- check there are no workfiles
1: select segid, prefix, slice from gp_toolkit.gp_workfile_entries order by segid;
segid | prefix | slice
-------+--------+-------
(0 rows)

1: create table workfile_test(id serial, s text) distributed by (id);
CREATE TABLE
1: insert into workfile_test(s) select v::text from generate_series(1, 2000) v;
INSERT 0 2000

1: select gp_inject_fault('after_workfile_mgr_create_set', 'suspend', '', '', '', 2, 2, 0, dbid) from gp_segment_configuration where content > -1 and role = 'p';
gp_inject_fault
-----------------
Success:
Success:
Success:
(3 rows)

1&: select * from (select * from workfile_test t1, workfile_test t2 order by t1.id+t2.id limit 10000000) x, (select * from workfile_test t3, workfile_test t4 order by t3.id+t4.id limit 10000000) y, generate_series(1, 10) z; <waiting ...>
-- wait until 2 workfile is created on each segment
2: select gp_wait_until_triggered_fault('after_workfile_mgr_create_set', 1, dbid) from gp_segment_configuration where content > -1 and role = 'p';
gp_wait_until_triggered_fault
-------------------------------
Success:
Success:
Success:
(3 rows)
-- there should be exactly 6 workfiles, two for each segment (no duplication)
2: select count(*) from gp_toolkit.gp_workfile_entries group by segid;
count
-------
2
2
2
(3 rows)

-- interrupt the query
-- start_ignore

-- end_ignore
1<: <... completed>
ERROR: canceling statement due to user request
1q: ... <quitting>

2: select gp_inject_fault('after_workfile_mgr_create_set', 'reset', dbid) from gp_segment_configuration where content > -1 and role = 'p';
gp_inject_fault
-----------------
Success:
Success:
Success:
(3 rows)

-- check there are no workfiles left
2: select segid, prefix, slice from gp_toolkit.gp_workfile_entries order by segid;
segid | prefix | slice
-------+--------+-------
(0 rows)
2: drop table workfile_test;
DROP TABLE
2q: ... <quitting>
2 changes: 1 addition & 1 deletion src/test/isolation2/input/workfile_mgr_test.source
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ RETURNS setof void
LANGUAGE C VOLATILE EXECUTE ON ALL SEGMENTS AS '@abs_builddir@/isolation2_regress@DLSUFFIX@', 'gp_workfile_mgr_create_workset';

CREATE FUNCTION gp_workfile_mgr_cache_entries()
RETURNS TABLE(segid int4, prefix text, size int8, operation text, slice int4, sessionid int4, commandid int4, numfiles int4)
RETURNS TABLE(segid int4, prefix text, size int8, operation text, slice int4, sessionid int4, commandid int4, numfiles int4, pid int4)
AS '$libdir/gp_workfile_mgr', 'gp_workfile_mgr_cache_entries'
LANGUAGE C VOLATILE EXECUTE ON ALL SEGMENTS;

Expand Down
2 changes: 2 additions & 0 deletions src/test/isolation2/isolation2_schedule
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ test: add_column_after_vacuum_skip_drop_column
test: vacuum_after_vacuum_skip_drop_column
# test workfile_mgr
test: workfile_mgr_test
test: workfile_gp_toolkit

test: pg_basebackup
test: pg_basebackup_with_tablespaces
test: pg_basebackup_large_database_oid
Expand Down
2 changes: 1 addition & 1 deletion src/test/isolation2/output/workfile_mgr_test.source
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ CREATE FUNCTION
CREATE OR REPLACE FUNCTION gp_workfile_mgr_create_empty_workset(worksetname text) RETURNS setof void LANGUAGE C VOLATILE EXECUTE ON ALL SEGMENTS AS '@abs_builddir@/isolation2_regress@DLSUFFIX@', 'gp_workfile_mgr_create_workset';
CREATE FUNCTION

CREATE FUNCTION gp_workfile_mgr_cache_entries() RETURNS TABLE(segid int4, prefix text, size int8, operation text, slice int4, sessionid int4, commandid int4, numfiles int4) AS '$libdir/gp_workfile_mgr', 'gp_workfile_mgr_cache_entries' LANGUAGE C VOLATILE EXECUTE ON ALL SEGMENTS;
CREATE FUNCTION gp_workfile_mgr_cache_entries() RETURNS TABLE(segid int4, prefix text, size int8, operation text, slice int4, sessionid int4, commandid int4, numfiles int4, pid int4) AS '$libdir/gp_workfile_mgr', 'gp_workfile_mgr_cache_entries' LANGUAGE C VOLATILE EXECUTE ON ALL SEGMENTS;
CREATE FUNCTION

-- Wait for at the most 1 min for backends to remove transient
Expand Down
34 changes: 34 additions & 0 deletions src/test/isolation2/sql/workfile_gp_toolkit.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
-- This test checks for correct output of gp_toolkit.gp_workfile_entries view
-- It is placed in integration tests because testing for this requires checking
-- for workfiles while a query is still running.

-- check there are no workfiles
1: select segid, prefix, slice from gp_toolkit.gp_workfile_entries order by segid;

1: create table workfile_test(id serial, s text) distributed by (id);
1: insert into workfile_test(s) select v::text from generate_series(1, 2000) v;

1: select gp_inject_fault('after_workfile_mgr_create_set', 'suspend', '', '', '', 2, 2, 0, dbid) from gp_segment_configuration where content > -1 and role = 'p';

1&: select * from
(select * from workfile_test t1, workfile_test t2 order by t1.id+t2.id limit 10000000) x,
(select * from workfile_test t3, workfile_test t4 order by t3.id+t4.id limit 10000000) y,
generate_series(1, 10) z;
-- wait until 2 workfile is created on each segment
2: select gp_wait_until_triggered_fault('after_workfile_mgr_create_set', 1, dbid) from gp_segment_configuration where content > -1 and role = 'p';
-- there should be exactly 6 workfiles, two for each segment (no duplication)
2: select count(*) from gp_toolkit.gp_workfile_entries group by segid;

-- interrupt the query
-- start_ignore
2: select pg_cancel_backend(pid) from pg_stat_activity where query like '%workfile_test%' and pid != pg_backend_pid();
-- end_ignore
1<:
1q:

2: select gp_inject_fault('after_workfile_mgr_create_set', 'reset', dbid) from gp_segment_configuration where content > -1 and role = 'p';

-- check there are no workfiles left
2: select segid, prefix, slice from gp_toolkit.gp_workfile_entries order by segid;
2: drop table workfile_test;
2q:

0 comments on commit c9ba080

Please sign in to comment.