Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ADBDEV-4015 2.2.2 sync #23

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
d4ecc28
Fix compiler warnings. (#339)
higuoxing May 25, 2023
1111f71
Fix bug: diskquota.status() (#344)
zhrt123 Jun 26, 2023
58bb9e2
fix pipeline (#345)
zhrt123 Jun 26, 2023
c2686c9
Reduce the number of the log in bgworker. (#346)
zhrt123 Jun 27, 2023
0a837c8
Fix bug: lose monitored_dbid_cache after switching mirror (#342)
zhrt123 Jun 27, 2023
05da9d4
Enable continuous upgrade. (#340)
zhrt123 Jun 27, 2023
c81e696
Fix pipeline. (#349)
zhrt123 Jun 28, 2023
e3622e1
Fix upgrade version check (#347)
beeender Jun 28, 2023
974876d
Remove gp7 from pipeline (#350)
zhrt123 Jun 28, 2023
6001a05
Add alter extension upgrade test (#348)
beeender Jun 28, 2023
2aefa80
Add a sleep in alter_test.sh (#351)
zhrt123 Jun 29, 2023
86ff586
Update to 2.2.2 (#352)
zhrt123 Jun 29, 2023
f1ca0c5
Reduce the remain logs in bgworker. (#354)
zhrt123 Jul 3, 2023
2c086fc
Fix bug: bgworkers only print log once. (#356)
zhrt123 Jul 4, 2023
22c35b6
Update resources by using gp-extensions-ci subtree
liuxueyang Jul 4, 2023
dbd044f
Squashed 'concourse/lib/' content from commit d51adf5
liuxueyang Jul 5, 2023
6327f5e
Merge commit 'dbd044f46d7672331e19cbd6b95a6f8df67511af' as 'concourse…
liuxueyang Jul 5, 2023
bb840f5
Update resources by using gp-extensions-ci subtree (#357)
liuxueyang Jul 5, 2023
d6cae3c
Revert "ADBDEV-3685 Error handling for disqkuota worker startup stage…
Stolb27 Jul 25, 2023
624e3e7
Merge branch '2.2.2-conflicts' into 2.2.2-sync
Stolb27 Jul 25, 2023
f280a88
ADBDEV-3685 Error handling for disqkuota worker startup stage (#20)
bimboterminator1 Jun 29, 2023
16fecbb
mute alter extension tests
Stolb27 Jul 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 26 additions & 8 deletions src/diskquota.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ static void vacuum_db_entry(DiskquotaDBEntry *db);
static void init_bgworker_handles(void);
static BackgroundWorkerHandle *get_bgworker_handle(uint32 worker_id);
static void free_bgworker_handle(uint32 worker_id);
static void resetBackgroundWorkerCorruption(void);
#if GP_VERSION_NUM < 70000
/* WaitForBackgroundWorkerShutdown is copied from gpdb7 */
static BgwHandleStatus WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle);
Expand Down Expand Up @@ -575,7 +576,7 @@ disk_quota_worker_main(Datum main_arg)
if (!diskquota_is_paused())
{
/* Refresh quota model with init mode */
refresh_disk_quota_model(!MyWorkerInfo->dbEntry->inited);
refresh_disk_quota_model(MyWorkerInfo->dbEntry);
MyWorkerInfo->dbEntry->inited = true;
is_gang_destroyed = false;
}
Expand Down Expand Up @@ -812,6 +813,7 @@ disk_quota_launcher_main(Datum main_arg)
{
elog(DEBUG1, "[diskquota] got sighup");
got_sighup = false;
resetBackgroundWorkerCorruption();
ProcessConfigFile(PGC_SIGHUP);
}

Expand All @@ -837,11 +839,12 @@ disk_quota_launcher_main(Datum main_arg)
* When curDB->in_use is false means dbEtnry has been romoved
* When curDB->dbid doesn't equtal curDBId, it means the slot has
* been used by another db
*
* When curDB->corrupted is true means worker couldn't initialize
* the extension in the first run.
* For the above conditions, we just skip this loop and try to fetch
* next db to run.
*/
if (curDB == NULL || !curDB->in_use || curDB->dbid != curDBId)
if (curDB == NULL || !curDB->in_use || curDB->dbid != curDBId || curDB->corrupted)
{
advance_one_db = true;
continue;
Expand Down Expand Up @@ -1847,7 +1850,9 @@ next_db(DiskquotaDBEntry *curDB)
if (nextSlot >= MAX_NUM_MONITORED_DB) nextSlot = 0;
DiskquotaDBEntry *dbEntry = &DiskquotaLauncherShmem->dbArray[nextSlot];
nextSlot++;
if (!dbEntry->in_use || dbEntry->workerId != INVALID_WORKER_ID || dbEntry->dbid == InvalidOid) continue;
if (!dbEntry->in_use || dbEntry->workerId != INVALID_WORKER_ID || dbEntry->dbid == InvalidOid ||
dbEntry->corrupted)
continue;
/* TODO: should release the invalid db related things */
if (!is_valid_dbid(dbEntry->dbid)) continue;
result = dbEntry;
Expand Down Expand Up @@ -1911,10 +1916,11 @@ static void
vacuum_db_entry(DiskquotaDBEntry *db)
{
if (db == NULL) return;
db->dbid = InvalidOid;
db->inited = false;
db->workerId = INVALID_WORKER_ID;
db->in_use = false;
db->dbid = InvalidOid;
db->inited = false;
db->workerId = INVALID_WORKER_ID;
db->in_use = false;
db->corrupted = false;
}

static void
Expand Down Expand Up @@ -1949,6 +1955,18 @@ free_bgworker_handle(uint32 worker_id)
}
}

static void
resetBackgroundWorkerCorruption(void)
{
LWLockAcquire(diskquota_locks.dblist_lock, LW_EXCLUSIVE);
for (int i = 0; i < MAX_NUM_MONITORED_DB; i++)
{
DiskquotaDBEntry *dbEntry = &DiskquotaLauncherShmem->dbArray[i];
if (dbEntry->corrupted) dbEntry->corrupted = false;
}
LWLockRelease(diskquota_locks.dblist_lock);
}

#if GP_VERSION_NUM < 70000
static BgwHandleStatus
WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle)
Expand Down
3 changes: 2 additions & 1 deletion src/diskquota.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ struct DiskquotaDBEntry

bool inited; // this entry is inited, will set to true after the worker finish the frist run.
bool in_use; // this slot is in using. AKA dbid != 0
bool corrupted; // consider this entry as invalid to start the worker on
bimboterminator1 marked this conversation as resolved.
Show resolved Hide resolved

TimestampTz last_log_time; // the last time log current database info.
};
Expand Down Expand Up @@ -251,7 +252,7 @@ extern void invalidate_database_rejectmap(Oid dbid);
/* quota model interface*/
extern void init_disk_quota_shmem(void);
extern void init_disk_quota_model(uint32 id);
extern void refresh_disk_quota_model(bool force);
extern void refresh_disk_quota_model(DiskquotaDBEntry *dbEntry);
extern bool check_diskquota_state_is_ready(void);
extern bool quota_check_common(Oid reloid, RelFileNode *relfilenode);

Expand Down
1 change: 1 addition & 0 deletions src/gp_activetable.c
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@ gp_fetch_active_tables(bool is_init)

if (is_init)
{
SIMPLE_FAULT_INJECTOR("diskquota_worker_initialization");
load_table_size(local_table_stats_map);
}
else
Expand Down
19 changes: 15 additions & 4 deletions src/quotamodel.c
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ static void clear_all_quota_maps(void);
static void transfer_table_for_quota(int64 totalsize, QuotaType type, Oid *old_keys, Oid *new_keys, int16 segid);

/* functions to refresh disk quota model*/
static void refresh_disk_quota_usage(bool is_init);
static void refresh_disk_quota_usage(DiskquotaDBEntry *dbEntry);
static void calculate_table_disk_usage(bool is_init, HTAB *local_active_table_stat_map);
static void flush_to_table_size(void);
static bool flush_local_reject_map(void);
Expand Down Expand Up @@ -761,8 +761,10 @@ do_check_diskquota_state_is_ready(void)
* recalculate the changed disk usage.
*/
void
refresh_disk_quota_model(bool is_init)
refresh_disk_quota_model(DiskquotaDBEntry *dbEntry)
{
bool is_init = !dbEntry->inited;

SEGCOUNT = getgpsegmentCount();
if (SEGCOUNT <= 0)
{
Expand All @@ -773,7 +775,7 @@ refresh_disk_quota_model(bool is_init)
/* skip refresh model when load_quotas failed */
if (load_quotas())
{
refresh_disk_quota_usage(is_init);
refresh_disk_quota_usage(dbEntry);
}
if (is_init) ereport(LOG, (errmsg("[diskquota] initialize quota model finished")));
}
Expand All @@ -785,11 +787,12 @@ refresh_disk_quota_model(bool is_init)
* process is constructing quota model.
*/
static void
refresh_disk_quota_usage(bool is_init)
refresh_disk_quota_usage(DiskquotaDBEntry *dbEntry)
{
bool connected = false;
bool pushed_active_snap = false;
bool ret = true;
bool is_init = !dbEntry->inited;
HTAB *local_active_table_stat_map = NULL;

StartTransactionCommand();
Expand Down Expand Up @@ -841,6 +844,14 @@ refresh_disk_quota_usage(bool is_init)
}
PG_CATCH();
{
/* Initialization failed. */
if (is_init)
{
LWLockAcquire(diskquota_locks.dblist_lock, LW_EXCLUSIVE);
dbEntry->corrupted = true;
LWLockRelease(diskquota_locks.dblist_lock);
PG_RE_THROW();
}
/* Prevents interrupts while cleaning up */
HOLD_INTERRUPTS();
EmitErrorReport();
Expand Down
46 changes: 46 additions & 0 deletions tests/isolation2/expected/test_worker_init_failure.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
--
-- Tests for error handling when the worker catches the error during
-- its first run.
--

-- Function checking whether worker on given db is up
CREATE or REPLACE LANGUAGE plpython2u;
CREATE
CREATE or REPLACE FUNCTION check_worker_presence(dbname text, wait_time int) RETURNS boolean AS $$ import psutil import time worker_name = 'bgworker: [diskquota] ' + dbname time.sleep(wait_time) for proc in psutil.process_iter(): try: if 'postgres' in proc.name().lower(): for val in proc.cmdline(): if worker_name in val: return True except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): pass return False $$ LANGUAGE plpython2u EXECUTE ON MASTER;
CREATE

-- Test diskquota behavior when an error occurs during the worker's first run.
-- The error leads to process termination. And launcher won't start it again
-- until extension reload or SIGHUP signal.
CREATE EXTENSION diskquota;
CREATE
SELECT check_worker_presence(current_database(), 0);
check_worker_presence
-----------------------
t
(1 row)
SELECT gp_inject_fault('diskquota_worker_initialization', 'error', dbid) FROM gp_segment_configuration WHERE role='p' AND content=-1;
gp_inject_fault
-----------------
Success:
(1 row)
SELECT diskquota.init_table_size_table();
init_table_size_table
-----------------------

(1 row)
SELECT check_worker_presence(current_database(), current_setting('diskquota.worker_timeout')::int / 2);
check_worker_presence
-----------------------
f
(1 row)
-- Reload configuration and check that worker is up again
!\retcode gpstop -u;
(exited with code 0)
SELECT check_worker_presence(current_database(), current_setting('diskquota.worker_timeout')::int / 2);
check_worker_presence
-----------------------
t
(1 row)
DROP EXTENSION diskquota;
DROP
1 change: 1 addition & 0 deletions tests/isolation2/isolation2_schedule
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ test: test_per_segment_config
test: test_relation_cache
test: test_ereport_from_seg
test: test_drop_extension
test: test_worker_init_failure
test: reset_config
40 changes: 40 additions & 0 deletions tests/isolation2/sql/test_worker_init_failure.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
--
-- Tests for error handling when the worker catches the error during
-- its first run.
--

-- Function checking whether worker on given db is up
CREATE or REPLACE LANGUAGE plpython2u;
CREATE or REPLACE FUNCTION check_worker_presence(dbname text, wait_time int)
RETURNS boolean
AS $$
import psutil
import time
worker_name = 'bgworker: [diskquota] ' + dbname
time.sleep(wait_time)
for proc in psutil.process_iter():
try:
if 'postgres' in proc.name().lower():
for val in proc.cmdline():
if worker_name in val:
return True
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
return False
$$ LANGUAGE plpython2u EXECUTE ON MASTER;

-- Test diskquota behavior when an error occurs during the worker's first run.
-- The error leads to process termination. And launcher won't start it again
-- until extension reload or SIGHUP signal.
CREATE EXTENSION diskquota;
SELECT check_worker_presence(current_database(), 0);
SELECT gp_inject_fault('diskquota_worker_initialization', 'error', dbid)
FROM gp_segment_configuration WHERE role='p' AND content=-1;
SELECT diskquota.init_table_size_table();
SELECT check_worker_presence(current_database(),
current_setting('diskquota.worker_timeout')::int / 2);
-- Reload configuration and check that worker is up again
!\retcode gpstop -u;
SELECT check_worker_presence(current_database(),
current_setting('diskquota.worker_timeout')::int / 2);
DROP EXTENSION diskquota;