From 138f95a7d4c2104e49a4dab74e9eac61a5614701 Mon Sep 17 00:00:00 2001 From: Xiaoran Wang Date: Mon, 12 Dec 2022 21:14:12 +0800 Subject: [PATCH] Fix diskquota worker schedule bug (#280) When a database's diskquota bgworker is killed and the db is dropped, diskquota scheduler can not work properly. The cause is: if the scheduler failed to start a bgworker for a database, it will try it again and again forever. A different status code is returned when failing to start bg worker. And if it is failed due to the dropped database (or another other reasons causes db name cannot be retrieved from db id), just skip this bgwoker for now For other failure reasons, limit the times of starting a bgworker for a database to 3 times. If the limit is reached, skip it and pick the next one. --- diskquota.c | 79 ++++++++--- tests/regress/diskquota_schedule | 1 + .../regress/expected/test_worker_schedule.out | 2 + .../test_worker_schedule_exception.out | 123 ++++++++++++++++++ tests/regress/sql/test_worker_schedule.sql | 1 + .../sql/test_worker_schedule_exception.sql | 38 ++++++ 6 files changed, 225 insertions(+), 19 deletions(-) create mode 100644 tests/regress/expected/test_worker_schedule_exception.out create mode 100644 tests/regress/sql/test_worker_schedule_exception.sql diff --git a/diskquota.c b/diskquota.c index 2740f276..519da475 100644 --- a/diskquota.c +++ b/diskquota.c @@ -100,20 +100,27 @@ static DiskquotaLauncherShmemStruct *DiskquotaLauncherShmem; */ BackgroundWorkerHandle **bgworker_handles; +typedef enum +{ + SUCCESS, + INVALID_DB, + NO_FREE_WORKER, + UNKNOWN, +} StartWorkerState; /* functions of disk quota*/ void _PG_init(void); void _PG_fini(void); void disk_quota_worker_main(Datum); void disk_quota_launcher_main(Datum); -static void disk_quota_sigterm(SIGNAL_ARGS); -static void disk_quota_sighup(SIGNAL_ARGS); -static void define_guc_variables(void); -static bool start_worker(DiskquotaDBEntry *dbEntry); -static void create_monitor_db_table(void); -static void add_dbid_to_database_list(Oid dbid); -static void del_dbid_from_database_list(Oid dbid); -static void process_extension_ddl_message(void); +static void disk_quota_sigterm(SIGNAL_ARGS); +static void disk_quota_sighup(SIGNAL_ARGS); +static void define_guc_variables(void); +static StartWorkerState start_worker(DiskquotaDBEntry *dbEntry); +static void create_monitor_db_table(void); +static void add_dbid_to_database_list(Oid dbid); +static void del_dbid_from_database_list(Oid dbid); +static void process_extension_ddl_message(void); static void do_process_extension_ddl_message(MessageResult *code, ExtensionDDLMessage local_extension_ddl_message); static void terminate_all_workers(void); static void on_add_db(Oid dbid, MessageResult *code); @@ -594,6 +601,7 @@ disk_quota_launcher_main(Datum main_arg) Oid curDBId = 0; bool advance_one_db = true; bool timeout = false; + int try_times = 0; while (!got_sigterm) { int rc; @@ -601,8 +609,9 @@ disk_quota_launcher_main(Datum main_arg) /* pick a db to run */ if (advance_one_db) { - curDB = next_db(curDB); - timeout = false; + curDB = next_db(curDB); + timeout = false; + try_times = 0; if (curDB != NULL) { curDBId = curDB->dbid; @@ -728,10 +737,18 @@ disk_quota_launcher_main(Datum main_arg) */ if (TimestampDifferenceExceeds(curDB->next_run_time, GetCurrentTimestamp(), MIN_SLEEPTIME)) { - bool ret = start_worker(curDB); - advance_one_db = ret; - /* has exceeded the next_run_time of current db */ - timeout = true; + StartWorkerState ret = start_worker(curDB); + /* when start_worker successfully or db is invalid, pick up next db to run */ + advance_one_db = (ret == SUCCESS || ret == INVALID_DB) ? true : false; + if (!advance_one_db) + { + /* has exceeded the next_run_time of current db */ + timeout = true; + /* when start_worker return is not 2(no free worker), increase the try_times*/ + if (ret != NO_FREE_WORKER) try_times++; + /* only try to start bgworker for a database at most 3 times */ + if (try_times >= 3) advance_one_db = true; + } } else { @@ -1237,9 +1254,17 @@ terminate_all_workers(void) * Dynamically launch an disk quota worker process. * This function is called when launcher process * schedules a database's diskquota worker to run. + * + * return: + * SUCCESS means starting the bgworker sucessfully. + * INVALID_DB means the database is invalid + * NO_FREE_WORKER means there is no avaliable free workers + * UNKNOWN means registering or starting the bgworker + * failed, maybe there is no free bgworker, or + * forking a process failed and so on. */ -static bool +static StartWorkerState start_worker(DiskquotaDBEntry *dbEntry) { BackgroundWorker worker; @@ -1247,12 +1272,14 @@ start_worker(DiskquotaDBEntry *dbEntry) DiskQuotaWorkerEntry *dq_worker; MemoryContext old_ctx; char *dbname = NULL; + int result = SUCCESS; dq_worker = next_worker(); if (dq_worker == NULL) { elog(DEBUG1, "[diskquota] no free workers"); - return false; + result = NO_FREE_WORKER; + return result; } /* free the BackgroundWorkerHandle used by last database */ free_bgworker_handle(dq_worker->id); @@ -1279,7 +1306,11 @@ start_worker(DiskquotaDBEntry *dbEntry) sprintf(worker.bgw_library_name, DISKQUOTA_BINARY_NAME); sprintf(worker.bgw_function_name, "disk_quota_worker_main"); dbname = get_db_name(dbEntry->dbid); - if (dbname == NULL) goto Failed; + if (dbname == NULL) + { + result = INVALID_DB; + goto Failed; + } snprintf(worker.bgw_name, sizeof(worker.bgw_name), "%s", dbname); pfree(dbname); @@ -1293,6 +1324,7 @@ start_worker(DiskquotaDBEntry *dbEntry) if (!ret) { elog(WARNING, "Create bgworker failed"); + result = UNKNOWN; goto Failed; } BgwHandleStatus status; @@ -1302,6 +1334,7 @@ start_worker(DiskquotaDBEntry *dbEntry) { ereport(WARNING, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("could not start background process"), errhint("More details may be available in the server log."))); + result = UNKNOWN; goto Failed; } if (status == BGWH_POSTMASTER_DIED) @@ -1309,16 +1342,17 @@ start_worker(DiskquotaDBEntry *dbEntry) ereport(WARNING, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("cannot start background processes without postmaster"), errhint("Kill all remaining database processes and restart the database."))); + result = UNKNOWN; goto Failed; } Assert(status == BGWH_STARTED); - return true; + return result; Failed: elog(DEBUG1, "[diskquota] diskquota, starts diskquota failed"); FreeWorker(dq_worker); - return false; + return result; } /* @@ -1655,6 +1689,10 @@ next_db(DiskquotaDBEntry *curDB) nextSlot = curDB->id + 1; } + /* + * SearchSysCache should be run in a transaction + */ + StartTransactionCommand(); LWLockAcquire(diskquota_locks.dblist_lock, LW_SHARED); for (int i = 0; i < MAX_NUM_MONITORED_DB; i++) { @@ -1662,10 +1700,13 @@ next_db(DiskquotaDBEntry *curDB) DiskquotaDBEntry *dbEntry = &DiskquotaLauncherShmem->dbArray[nextSlot]; nextSlot++; if (!dbEntry->in_use || dbEntry->workerId != INVALID_WORKER_ID || dbEntry->dbid == InvalidOid) continue; + /* TODO: should release the invalid db related things */ + if (!is_valid_dbid(dbEntry->dbid)) continue; result = dbEntry; break; } LWLockRelease(diskquota_locks.dblist_lock); + CommitTransactionCommand(); return result; } diff --git a/tests/regress/diskquota_schedule b/tests/regress/diskquota_schedule index 3d34e02b..7722765d 100644 --- a/tests/regress/diskquota_schedule +++ b/tests/regress/diskquota_schedule @@ -40,5 +40,6 @@ test: test_ctas_tablespace_schema test: test_default_tablespace test: test_tablespace_diff_schema test: test_worker_schedule +test: test_worker_schedule_exception test: test_drop_extension test: reset_config diff --git a/tests/regress/expected/test_worker_schedule.out b/tests/regress/expected/test_worker_schedule.out index 8003a4e2..7c6fc7b8 100644 --- a/tests/regress/expected/test_worker_schedule.out +++ b/tests/regress/expected/test_worker_schedule.out @@ -2,6 +2,8 @@ \c DROP DATABASE IF EXISTS t1; NOTICE: database "t1" does not exist, skipping +DROP DATABASE IF EXISTS t2; +NOTICE: database "t2" does not exist, skipping DROP DATABASE IF EXISTS t3; NOTICE: database "t3" does not exist, skipping DROP DATABASE IF EXISTS t4; diff --git a/tests/regress/expected/test_worker_schedule_exception.out b/tests/regress/expected/test_worker_schedule_exception.out new file mode 100644 index 00000000..432e27f9 --- /dev/null +++ b/tests/regress/expected/test_worker_schedule_exception.out @@ -0,0 +1,123 @@ +-- start_ignore +\! gpconfig -c diskquota.max_workers -v 10; +20221209:16:01:17:089154 gpconfig:wxiaoranVKGWQ:wxiaoran-[INFO]:-completed successfully with parameters '-c diskquota.max_workers -v 10' +\! gpconfig -c diskquota.naptime -v 4; +20221209:16:01:19:089255 gpconfig:wxiaoranVKGWQ:wxiaoran-[INFO]:-completed successfully with parameters '-c diskquota.naptime -v 4' +\! gpstop -arf; +20221209:18:21:23:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Starting gpstop with args: -arf +20221209:18:21:23:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Gathering information and validating the environment... +20221209:18:21:23:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Obtaining Greenplum Master catalog information +20221209:18:21:23:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Obtaining Segment details from master... +20221209:18:21:23:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Greenplum Version: 'postgres (Greenplum Database) 6.22.1+dev.36.gedf0e003f8 build dev' +20221209:18:21:23:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Commencing Master instance shutdown with mode='fast' +20221209:18:21:23:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Master segment instance directory=/Users/wxiaoran/gpdb/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 +20221209:18:21:23:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Attempting forceful termination of any leftover master process +20221209:18:21:23:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Terminating processes for segment /Users/wxiaoran/gpdb/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 +20221209:18:21:23:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Stopping master standby host wxiaoranVKGWQ.vmware.com mode=fast +20221209:18:21:24:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Successfully shutdown standby process on wxiaoranVKGWQ.vmware.com +20221209:18:21:24:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Targeting dbid [2, 5, 3, 6, 4, 7] for shutdown +20221209:18:21:24:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Commencing parallel primary segment instance shutdown, please wait... +20221209:18:21:24:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-0.00% of jobs completed +20221209:18:21:25:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-100.00% of jobs completed +20221209:18:21:25:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Commencing parallel mirror segment instance shutdown, please wait... +20221209:18:21:25:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-0.00% of jobs completed +20221209:18:21:26:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-100.00% of jobs completed +20221209:18:21:26:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:----------------------------------------------------- +20221209:18:21:26:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:- Segments stopped successfully = 6 +20221209:18:21:26:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:- Segments with errors during stop = 0 +20221209:18:21:26:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:----------------------------------------------------- +20221209:18:21:26:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Successfully shutdown 6 of 6 segment instances +20221209:18:21:26:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Database successfully shutdown with no errors reported +20221209:18:21:26:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Cleaning up leftover gpmmon process +20221209:18:21:26:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-No leftover gpmmon process found +20221209:18:21:26:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Cleaning up leftover gpsmon processes +20221209:18:21:26:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-No leftover gpsmon processes on some hosts. not attempting forceful termination on these hosts +20221209:18:21:26:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Cleaning up leftover shared memory +20221209:18:21:27:045673 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Restarting System... +\c +DROP DATABASE IF EXISTS t1; +NOTICE: database "t1" does not exist, skipping +DROP DATABASE IF EXISTS t2; +NOTICE: database "t2" does not exist, skipping +--end_ignore +CREATE DATABASE t1; +CREATE DATABASE t2; +\c t1 +CREATE EXTENSION diskquota; +SELECT diskquota.wait_for_worker_new_epoch(); + wait_for_worker_new_epoch +--------------------------- + t +(1 row) + +\! pgrep -f "[p]ostgres.*bgworker.*t1" | xargs kill; +\! sleep 0.5 ; ps -ef | grep postgres | grep "\[diskquota]" | grep -v grep | wc -l +2 +-- start_ignore +\! ps -ef | grep postgres | grep "\[diskquota]" | grep -v grep + 503 89701 89678 0 4:01PM ?? 0:00.17 postgres: 6000, bgworker: [diskquota] - launcher + 503 89743 89678 0 4:01PM ?? 0:00.03 postgres: 6000, bgworker: [diskquota] contrib_regression cmd1 +--end_ignore +\c contrib_regression +DROP DATABASE t1; +\c t2 +CREATE EXTENSION diskquota; +SELECT diskquota.wait_for_worker_new_epoch(); + wait_for_worker_new_epoch +--------------------------- + t +(1 row) + +\c t2 +SELECT diskquota.pause(); + pause +------- + +(1 row) + +SELECT diskquota.wait_for_worker_new_epoch(); + wait_for_worker_new_epoch +--------------------------- + t +(1 row) + +DROP EXTENSION diskquota; +\c contrib_regression +DROP DATABASE t2; +--start_ignore +\! gpconfig -r diskquota.naptime; +20221209:16:02:10:089976 gpconfig:wxiaoranVKGWQ:wxiaoran-[INFO]:-completed successfully with parameters '-r diskquota.naptime' +\! gpconfig -r diskquota.max_workers; +20221209:16:02:12:090078 gpconfig:wxiaoranVKGWQ:wxiaoran-[INFO]:-completed successfully with parameters '-r diskquota.max_workers' +\! gpstop -arf; +20221209:16:02:12:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Starting gpstop with args: -arf +20221209:16:02:12:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Gathering information and validating the environment... +20221209:16:02:12:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Obtaining Greenplum Master catalog information +20221209:16:02:12:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Obtaining Segment details from master... +20221209:16:02:12:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Greenplum Version: 'postgres (Greenplum Database) 6.22.1+dev.36.gedf0e003f8 build dev' +20221209:16:02:12:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Commencing Master instance shutdown with mode='fast' +20221209:16:02:12:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Master segment instance directory=/Users/wxiaoran/gpdb/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 +20221209:16:02:12:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Attempting forceful termination of any leftover master process +20221209:16:02:12:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Terminating processes for segment /Users/wxiaoran/gpdb/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 +20221209:16:02:13:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Stopping master standby host wxiaoranVKGWQ.vmware.com mode=fast +20221209:16:02:14:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Successfully shutdown standby process on wxiaoranVKGWQ.vmware.com +20221209:16:02:14:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Targeting dbid [2, 5, 3, 6, 4, 7] for shutdown +20221209:16:02:14:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Commencing parallel primary segment instance shutdown, please wait... +20221209:16:02:14:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-0.00% of jobs completed +20221209:16:02:14:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-100.00% of jobs completed +20221209:16:02:14:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Commencing parallel mirror segment instance shutdown, please wait... +20221209:16:02:14:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-0.00% of jobs completed +20221209:16:02:15:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-100.00% of jobs completed +20221209:16:02:15:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:----------------------------------------------------- +20221209:16:02:15:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:- Segments stopped successfully = 6 +20221209:16:02:15:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:- Segments with errors during stop = 0 +20221209:16:02:15:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:----------------------------------------------------- +20221209:16:02:15:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Successfully shutdown 6 of 6 segment instances +20221209:16:02:15:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Database successfully shutdown with no errors reported +20221209:16:02:15:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Cleaning up leftover gpmmon process +20221209:16:02:15:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-No leftover gpmmon process found +20221209:16:02:15:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Cleaning up leftover gpsmon processes +20221209:16:02:15:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-No leftover gpsmon processes on some hosts. not attempting forceful termination on these hosts +20221209:16:02:15:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Cleaning up leftover shared memory +20221209:16:02:17:090179 gpstop:wxiaoranVKGWQ:wxiaoran-[INFO]:-Restarting System... +--end_ignore diff --git a/tests/regress/sql/test_worker_schedule.sql b/tests/regress/sql/test_worker_schedule.sql index f63e02f4..94d27e93 100644 --- a/tests/regress/sql/test_worker_schedule.sql +++ b/tests/regress/sql/test_worker_schedule.sql @@ -2,6 +2,7 @@ \c DROP DATABASE IF EXISTS t1; +DROP DATABASE IF EXISTS t2; DROP DATABASE IF EXISTS t3; DROP DATABASE IF EXISTS t4; DROP DATABASE IF EXISTS t5; diff --git a/tests/regress/sql/test_worker_schedule_exception.sql b/tests/regress/sql/test_worker_schedule_exception.sql new file mode 100644 index 00000000..83fe7faf --- /dev/null +++ b/tests/regress/sql/test_worker_schedule_exception.sql @@ -0,0 +1,38 @@ +-- start_ignore +\! gpconfig -c diskquota.max_workers -v 10; +\! gpconfig -c diskquota.naptime -v 4; +\! gpstop -arf; +\c +DROP DATABASE IF EXISTS t1; +DROP DATABASE IF EXISTS t2; +--end_ignore + +CREATE DATABASE t1; +CREATE DATABASE t2; +\c t1 +CREATE EXTENSION diskquota; +SELECT diskquota.wait_for_worker_new_epoch(); + +\! pgrep -f "[p]ostgres.*bgworker.*t1" | xargs kill; +\! sleep 0.5 ; ps -ef | grep postgres | grep "\[diskquota]" | grep -v grep | wc -l +-- start_ignore +\! ps -ef | grep postgres | grep "\[diskquota]" | grep -v grep +--end_ignore +\c contrib_regression +DROP DATABASE t1; +\c t2 +CREATE EXTENSION diskquota; +SELECT diskquota.wait_for_worker_new_epoch(); + +\c t2 +SELECT diskquota.pause(); +SELECT diskquota.wait_for_worker_new_epoch(); +DROP EXTENSION diskquota; + +\c contrib_regression +DROP DATABASE t2; +--start_ignore +\! gpconfig -r diskquota.naptime; +\! gpconfig -r diskquota.max_workers; +\! gpstop -arf; +--end_ignore