Skip to content

Commit

Permalink
Limit diskquota hash table's size according initial request (#28)
Browse files Browse the repository at this point in the history
Diskquota did not control the size of its hash tables in shared memory and could
have consumed shared memory not intended for it, potentially impacting the other
database subsystems. Hash tables can also grow indefinitely if the background
process on the coordinator has not started, which can happen for a number of
reasons: gone done with error, pause, isn’t started. In this case, data is not
collected from segments and some hash tables (active_tables_map, relation_cache,
relid_cache) would not be cleared and would overflow.

This patch adds a limit on the size of all hash tables in shared memory by
adding a function that checks whether the hash table is full. The function
returns HASH_FIND if the map is full and HASH_ENTER otherwise. It also report a
warning if the table is full. Implemented a GUC that controls how frequently the
warning will be reported, as it could be reported too frequently. Also added a
GUC to control size of local reject map. The size of global reject map is set
as diskquota_max_local_reject_entries * diskquota_max_monitored_databases.

The test_active_table_limit test has been changed. Firstly, the value of
max_active_tables was changed from 2 to 5, since tables from all databases
processed by diskquota are placed in active_tables_map and with a limit of 2
tables overflow occurs even when the extension is created. Secondly, now a
table with 10 partitions is created to overflow active_tables_map, after which
another table is created into which data is inserted that should exhaust the
quota, but since this table does not inserted into active_tables_map, its size
is not taken into account and we can insert into the table after that. At the
end, vacuum full is done to achieve the overflow of altered_reloid_cache.
  • Loading branch information
KnightMurloc authored Dec 29, 2023
1 parent e30e01c commit 2862042
Show file tree
Hide file tree
Showing 12 changed files with 415 additions and 183 deletions.
39 changes: 16 additions & 23 deletions src/diskquota.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,16 @@ static volatile sig_atomic_t got_sigusr1 = false;
static volatile sig_atomic_t got_sigusr2 = false;

/* GUC variables */
int diskquota_naptime = 0;
int diskquota_max_active_tables = 0;
int diskquota_worker_timeout = 60; /* default timeout is 60 seconds */
bool diskquota_hardlimit = false;
int diskquota_max_workers = 10;
int diskquota_max_table_segments = 0;
int diskquota_max_monitored_databases = 0;
int diskquota_max_quota_probes = 0;
int diskquota_naptime = 0;
int diskquota_max_active_tables = 0;
int diskquota_worker_timeout = 60; /* default timeout is 60 seconds */
bool diskquota_hardlimit = false;
int diskquota_max_workers = 10;
int diskquota_max_table_segments = 0;
int diskquota_max_monitored_databases = 0;
int diskquota_max_quota_probes = 0;
int diskquota_max_local_reject_entries = 0;
int diskquota_hashmap_overflow_report_timeout = 0;

DiskQuotaLocks diskquota_locks;
ExtensionDDLMessage *extension_ddl_message = NULL;
Expand All @@ -89,12 +91,6 @@ static DiskQuotaWorkerEntry *volatile MyWorkerInfo = NULL;
// how many database diskquota are monitoring on
static int num_db = 0;

/* how many TableSizeEntry are maintained in all the table_size_map in shared memory*/
pg_atomic_uint32 *diskquota_table_size_entry_num;

/* how many QuotaInfoEntry are maintained in all the quota_info_map in shared memory*/
pg_atomic_uint32 *diskquota_quota_info_entry_num;

static DiskquotaLauncherShmemStruct *DiskquotaLauncherShmem;

#define MIN_SLEEPTIME 100 /* milliseconds */
Expand Down Expand Up @@ -414,6 +410,12 @@ define_guc_variables(void)
DefineCustomIntVariable("diskquota.max_quota_probes", "Max number of quotas on the cluster.", NULL,
&diskquota_max_quota_probes, 1024 * 1024, 1024 * INIT_QUOTA_MAP_ENTRIES, INT_MAX,
PGC_POSTMASTER, 0, NULL, NULL, NULL);
DefineCustomIntVariable("diskquota.max_reject_entries", "Max number of reject entries per database.", NULL,
&diskquota_max_local_reject_entries, 8192, 1, INT_MAX, PGC_POSTMASTER, 0, NULL, NULL, NULL);
DefineCustomIntVariable("diskquota.hashmap_overflow_report_timeout",
"The duration between each warning report about the shared hashmap overflow (in seconds).",
NULL, &diskquota_hashmap_overflow_report_timeout, 60, 0, INT_MAX / 1000, PGC_SUSET, 0, NULL,
NULL, NULL);
}

/* ---- Functions for disk quota worker process ---- */
Expand Down Expand Up @@ -1802,15 +1804,6 @@ init_launcher_shmem()
DiskquotaLauncherShmem->dbArray[i].workerId = INVALID_WORKER_ID;
}
}
/* init TableSizeEntry counter */
diskquota_table_size_entry_num =
ShmemInitStruct("diskquota TableSizeEntry counter", sizeof(pg_atomic_uint32), &found);
if (!found) pg_atomic_init_u32(diskquota_table_size_entry_num, 0);

/* init QuotaInfoEntry counter */
diskquota_quota_info_entry_num =
ShmemInitStruct("diskquota QuotaInfoEntry counter", sizeof(pg_atomic_uint32), &found);
if (!found) pg_atomic_init_u32(diskquota_quota_info_entry_num, 0);
}

/*
Expand Down
3 changes: 3 additions & 0 deletions src/diskquota.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ extern Datum diskquota_fetch_table_stat(PG_FUNCTION_ARGS);
extern int diskquota_naptime;
extern int diskquota_max_active_tables;
extern bool diskquota_hardlimit;
extern int diskquota_hashmap_overflow_report_timeout;

extern int SEGCOUNT;
extern int worker_spi_get_extension_version(int *major, int *minor);
Expand Down Expand Up @@ -316,4 +317,6 @@ extern HTAB *diskquota_hash_create(const char *tabname, long nelem, HASHC
extern HTAB *DiskquotaShmemInitHash(const char *name, long init_size, long max_size, HASHCTL *infoP, int hash_flags,
DiskquotaHashFunction hash_function);
extern void refresh_monitored_dbid_cache(void);
extern HASHACTION check_hash_fullness(HTAB *hashp, int max_size, const char *warning_message,
TimestampTz *last_overflow_report);
#endif
26 changes: 26 additions & 0 deletions src/diskquota_utility.c
Original file line number Diff line number Diff line change
Expand Up @@ -1683,3 +1683,29 @@ DiskquotaShmemInitHash(const char *name, /* table string name fo
return ShmemInitHash(name, init_size, max_size, infoP, hash_flags | HASH_BLOBS);
#endif /* GP_VERSION_NUM */
}

/*
* Returns HASH_FIND if hash table is full and HASH_ENTER otherwise.
* It can be used only under lock.
*/
HASHACTION
check_hash_fullness(HTAB *hashp, int max_size, const char *warning_message, TimestampTz *last_overflow_report)
{
long num_entries = hash_get_num_entries(hashp);

if (num_entries < max_size) return HASH_ENTER;

if (num_entries == max_size)
{
TimestampTz current_time = GetCurrentTimestamp();

if (*last_overflow_report == 0 || TimestampDifferenceExceeds(*last_overflow_report, current_time,
diskquota_hashmap_overflow_report_timeout * 1000))
{
ereport(WARNING, (errmsg("[diskquota] %s", warning_message)));
*last_overflow_report = current_time;
}
}

return HASH_FIND;
}
39 changes: 24 additions & 15 deletions src/gp_activetable.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,12 @@ typedef struct DiskQuotaSetOFCache
HASH_SEQ_STATUS pos;
} DiskQuotaSetOFCache;

HTAB *active_tables_map = NULL; // Set<DiskQuotaActiveTableFileEntry>
static HTAB *active_tables_map = NULL; // Set<DiskQuotaActiveTableFileEntry>
TimestampTz active_tables_map_last_overflow_report = 0;

static const char *active_tables_map_warning =
"the number of active tables reached the limit, please increase "
"the GUC value for diskquota.max_active_tables.";

/*
* monitored_dbid_cache is a allow list for diskquota
Expand All @@ -60,7 +65,12 @@ HTAB *active_tables_map = NULL; // Set<DiskQuotaActiveTableFileEntry>
* dbid will be added to it when creating diskquota extension
* dbid will be removed from it when droping diskquota extension
*/
HTAB *altered_reloid_cache = NULL; // Set<Oid>
static HTAB *altered_reloid_cache = NULL; // Set<Oid>
static TimestampTz altered_reloid_cache_last_overflow_report = 0;

static const char *altered_reloid_cache_warning =
"the number of altered reloid cache entries reached the limit, please increase "
"the GUC value for diskquota.max_active_tables.";

/* active table hooks which detect the disk file size change. */
static file_create_hook_type prev_file_create_hook = NULL;
Expand Down Expand Up @@ -236,7 +246,9 @@ report_altered_reloid(Oid reloid)
if (IsRoleMirror() || IS_QUERY_DISPATCHER()) return;

LWLockAcquire(diskquota_locks.altered_reloid_cache_lock, LW_EXCLUSIVE);
hash_search(altered_reloid_cache, &reloid, HASH_ENTER, NULL);
HASHACTION action = check_hash_fullness(altered_reloid_cache, diskquota_max_active_tables,
altered_reloid_cache_warning, &altered_reloid_cache_last_overflow_report);
hash_search(altered_reloid_cache, &reloid, action, NULL);
LWLockRelease(diskquota_locks.altered_reloid_cache_lock);
}

Expand Down Expand Up @@ -318,17 +330,11 @@ report_active_table_helper(const RelFileNodeBackend *relFileNode)
item.tablespaceoid = relFileNode->node.spcNode;

LWLockAcquire(diskquota_locks.active_table_lock, LW_EXCLUSIVE);
entry = hash_search(active_tables_map, &item, HASH_ENTER_NULL, &found);
HASHACTION action = check_hash_fullness(active_tables_map, diskquota_max_active_tables, active_tables_map_warning,
&active_tables_map_last_overflow_report);
entry = hash_search(active_tables_map, &item, action, &found);
if (entry && !found) *entry = item;

if (!found && entry == NULL)
{
/*
* We may miss the file size change of this relation at current
* refresh interval.
*/
ereport(WARNING, (errmsg("Share memory is not enough for active tables.")));
}
LWLockRelease(diskquota_locks.active_table_lock);
}

Expand Down Expand Up @@ -856,8 +862,9 @@ get_active_tables_oid(void)
hash_seq_init(&iter, local_active_table_file_map);
while ((active_table_file_entry = (DiskQuotaActiveTableFileEntry *)hash_seq_search(&iter)) != NULL)
{
/* TODO: handle possible ERROR here so that the bgworker will not go down. */
hash_search(active_tables_map, active_table_file_entry, HASH_ENTER, NULL);
HASHACTION action = check_hash_fullness(active_tables_map, diskquota_max_active_tables,
active_tables_map_warning, &active_tables_map_last_overflow_report);
hash_search(active_tables_map, active_table_file_entry, action, NULL);
}
/* TODO: hash_seq_term(&iter); */
LWLockRelease(diskquota_locks.active_table_lock);
Expand Down Expand Up @@ -919,7 +926,9 @@ get_active_tables_oid(void)
LWLockAcquire(diskquota_locks.active_table_lock, LW_EXCLUSIVE);
while ((active_table_file_entry = (DiskQuotaActiveTableFileEntry *)hash_seq_search(&iter)) != NULL)
{
entry = hash_search(active_tables_map, active_table_file_entry, HASH_ENTER_NULL, &found);
HASHACTION action = check_hash_fullness(active_tables_map, diskquota_max_active_tables,
active_tables_map_warning, &active_tables_map_last_overflow_report);
entry = hash_search(active_tables_map, active_table_file_entry, action, &found);
if (entry) *entry = *active_table_file_entry;
}
LWLockRelease(diskquota_locks.active_table_lock);
Expand Down
2 changes: 0 additions & 2 deletions src/gp_activetable.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ extern void init_active_table_hook(void);
extern void init_shm_worker_active_tables(void);
extern void init_lock_active_tables(void);

extern HTAB *active_tables_map;
extern HTAB *monitored_dbid_cache;
extern HTAB *altered_reloid_cache;

#ifndef atooid
#define atooid(x) ((Oid)strtoul((x), NULL, 10))
Expand Down
Loading

0 comments on commit 2862042

Please sign in to comment.