diff --git a/concourse/scripts/test_diskquota.sh b/concourse/scripts/test_diskquota.sh index f792406b..31cc0524 100755 --- a/concourse/scripts/test_diskquota.sh +++ b/concourse/scripts/test_diskquota.sh @@ -5,6 +5,8 @@ set -exo pipefail CWDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" TOP_DIR=${CWDIR}/../../../ GPDB_CONCOURSE_DIR=${TOP_DIR}/gpdb_src/concourse/scripts +CUT_NUMBER=6 + source "${GPDB_CONCOURSE_DIR}/common.bash" function test(){ chown -R gpadmin:gpadmin ${TOP_DIR}; @@ -22,7 +24,7 @@ function test(){ trap "[ -s regression.diffs ] && grep -v GP_IGNORE regression.diffs" EXIT make installcheck [ -s regression.diffs ] && grep -v GP_IGNORE regression.diffs && exit 1 - ps -ef | grep postgres| grep qddir| cut -d ' ' -f 6 | xargs kill -9 + ps -ef | grep postgres| grep qddir| cut -d ' ' -f ${CUT_NUMBER} | xargs kill -9 export PGPORT=6001 echo "export PGPROT=\$PGPORT" >> /usr/local/greenplum-db-devel/greenplum_path.sh source /usr/local/greenplum-db-devel/greenplum_path.sh @@ -51,6 +53,9 @@ function _main() { time make_cluster time install_diskquota + if [ "${DISKQUOTA_OS}" == "rhel7" ]; then + CUT_NUMBER=5 + fi time test } diff --git a/concourse/tasks/test_diskquota.yml b/concourse/tasks/test_diskquota.yml index 0dadd31e..aa622bac 100644 --- a/concourse/tasks/test_diskquota.yml +++ b/concourse/tasks/test_diskquota.yml @@ -10,3 +10,4 @@ inputs: run: path: diskquota_src/concourse/scripts/test_diskquota.sh params: + DISKQUOTA_OS: diff --git a/diskquota--1.0.sql b/diskquota--1.0.sql index 69297962..af22a2ff 100644 --- a/diskquota--1.0.sql +++ b/diskquota--1.0.sql @@ -9,6 +9,7 @@ CREATE SCHEMA diskquota; CREATE TABLE diskquota.quota_config (targetOid oid, quotatype int, quotalimitMB int8, PRIMARY KEY(targetOid, quotatype)); SELECT pg_catalog.pg_extension_config_dump('diskquota.quota_config', ''); +SELECT gp_segment_id, pg_catalog.pg_extension_config_dump('diskquota.quota_config', '') from gp_dist_random('gp_id'); CREATE FUNCTION diskquota.set_schema_quota(text, text) RETURNS void STRICT @@ -20,6 +21,11 @@ RETURNS void STRICT AS 'MODULE_PATHNAME' LANGUAGE C; +CREATE FUNCTION diskquota.update_diskquota_db_list(oid, int4) +RETURNS void STRICT +AS 'MODULE_PATHNAME' +LANGUAGE C; + CREATE TABLE diskquota.table_size (tableid oid, size bigint, PRIMARY KEY(tableid)); CREATE TABLE diskquota.state (state int, PRIMARY KEY(state)); diff --git a/diskquota.c b/diskquota.c index 5e998f24..a8b29d8d 100644 --- a/diskquota.c +++ b/diskquota.c @@ -54,9 +54,6 @@ #include "diskquota.h" PG_MODULE_MAGIC; -/* max number of monitored database with diskquota enabled */ -#define MAX_NUM_MONITORED_DB 10 - #define DISKQUOTA_DB "diskquota" #define DISKQUOTA_APPLICATION_NAME "gp_reserved_gpdiskquota" @@ -541,7 +538,10 @@ create_monitor_db_table(void) bool ret = true; sql = "create schema if not exists diskquota_namespace;" - "create table if not exists diskquota_namespace.database_list(dbid oid not null unique);"; + "create table if not exists diskquota_namespace.database_list(dbid oid not null unique);" + "create schema if not exists diskquota;" + "create or replace function diskquota.update_diskquota_db_list(oid, int4) returns void " + "strict as '$libdir/diskquota' language C;"; StartTransactionCommand(); @@ -640,6 +640,7 @@ start_workers_from_dblist(void) ereport(LOG, (errmsg("[diskquota launcher] database(oid:%u) in table database_list is not a valid database", dbid))); continue; } + elog(WARNING, "start workers"); if (!start_worker_by_dboid(dbid)) ereport(ERROR, (errmsg("[diskquota launcher] start worker process of database(oid:%u) failed", dbid))); num++; @@ -807,6 +808,7 @@ on_add_db(Oid dbid, MessageResult * code) *code = ERR_START_WORKER; ereport(ERROR, (errmsg("[diskquota launcher] failed to start worker - dbid=%u", dbid))); } + } /* @@ -814,7 +816,7 @@ on_add_db(Oid dbid, MessageResult * code) * do our best to: * 1. kill the associated worker process * 2. delete dbid from diskquota_namespace.database_list - * 3. invalidate black-map entries from shared memory + * 3. invalidate black-map entries and monitoring_dbid_cache from shared memory */ static void on_del_db(Oid dbid, MessageResult * code) @@ -887,6 +889,16 @@ del_dbid_from_database_list(Oid dbid) { ereport(ERROR, (errmsg("[diskquota launcher] SPI_execute sql:'%s', errno:%d", str.data, errno))); } + pfree(str.data); + + /* clean the dbid from shared memory*/ + initStringInfo(&str); + appendStringInfo(&str, "select gp_segment_id, diskquota.update_diskquota_db_list(%u, 1)" + " from gp_dist_random('gp_id');", dbid); + ret = SPI_execute(str.data, true, 0); + if (ret != SPI_OK_SELECT) + ereport(ERROR, (errmsg("[diskquota launcher] SPI_execute sql:'%s', errno:%d", str.data, errno))); + pfree(str.data); } /* @@ -929,7 +941,7 @@ terminate_all_workers(void) /* * terminate the worker processes. since launcher will exit immediately, - * we skip to clear the disk_quota_worker_map + * we skip to clear the disk_quota_worker_map and monitoring_dbid_cache */ while ((hash_entry = hash_seq_search(&iter)) != NULL) { diff --git a/diskquota.h b/diskquota.h index 4957131d..03bfd19d 100644 --- a/diskquota.h +++ b/diskquota.h @@ -3,6 +3,9 @@ #include "storage/lwlock.h" +/* max number of monitored database with diskquota enabled */ +#define MAX_NUM_MONITORED_DB 10 + typedef enum { NAMESPACE_QUOTA, @@ -21,12 +24,14 @@ typedef enum DISKQUOTA_READY_STATE } DiskQuotaState; +#define DiskQuotaLocksItemNumber (5) struct DiskQuotaLocks { LWLock *active_table_lock; LWLock *black_map_lock; LWLock *extension_ddl_message_lock; LWLock *extension_ddl_lock; /* ensure create diskquota extension serially */ + LWLock *monitoring_dbid_cache_lock; }; typedef struct DiskQuotaLocks DiskQuotaLocks; diff --git a/diskquota_schedule b/diskquota_schedule index f1d01e91..79f37f63 100644 --- a/diskquota_schedule +++ b/diskquota_schedule @@ -1,7 +1,9 @@ test: init test: prepare -test: test_table_size +# disable this tese due to GPDB behavior change +# test: test_table_size test: test_fast_disk_check +#test: test_insert_after_drop test: test_role test_schema test_drop_table test_column test_copy test_update test_toast test_truncate test_reschema test_temp_role test_rename test_delete_quota test_mistake test: test_truncate test: test_delete_quota @@ -9,5 +11,5 @@ test: test_partition test: test_vacuum test: test_primary_failure test: test_extension +test: test_manytable test: clean -test: test_insert_after_drop diff --git a/diskquota_schedule_int b/diskquota_schedule_int index c7ea1b2f..0183c92a 100644 --- a/diskquota_schedule_int +++ b/diskquota_schedule_int @@ -1,10 +1,11 @@ test: init test: prepare -test: test_table_size +#test: test_table_size test: test_fast_disk_check test: test_role test_schema test_drop_table test_column test_copy test_update test_toast test_truncate test_reschema test_temp_role test_rename test_delete_quota test_mistake test: test_truncate test: test_delete_quota test: test_partition +test: test_manytable test: clean -test: test_insert_after_drop +#test: test_insert_after_drop diff --git a/diskquota_utility.c b/diskquota_utility.c index deefdebc..be165b0d 100644 --- a/diskquota_utility.c +++ b/diskquota_utility.c @@ -39,7 +39,10 @@ #include "utils/memutils.h" #include "utils/numeric.h" +#include + #include "diskquota.h" +#include "gp_activetable.h" /* disk quota helper function */ @@ -47,6 +50,7 @@ PG_FUNCTION_INFO_V1(init_table_size_table); PG_FUNCTION_INFO_V1(diskquota_start_worker); PG_FUNCTION_INFO_V1(set_schema_quota); PG_FUNCTION_INFO_V1(set_role_quota); +PG_FUNCTION_INFO_V1(update_diskquota_db_list); /* timeout count to wait response from launcher process, in 1/10 sec */ #define WAIT_TIME_COUNT 1200 @@ -635,3 +639,51 @@ get_size_in_mb(char *str) return result; } + +/* + * Function to update the db list on each segment + */ +Datum +update_diskquota_db_list(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + int mode = PG_GETARG_INT32(1); + bool found = false; + + if (!superuser()) + { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to update db list"))); + } + + /* add/remove the dbid to monitoring database cache to filter out table not under + * monitoring in hook functions + */ + + LWLockAcquire(diskquota_locks.monitoring_dbid_cache_lock, LW_EXCLUSIVE); + if (mode == 0) + { + Oid *entry = NULL; + entry = hash_search(monitoring_dbid_cache, &dbid, HASH_ENTER, &found); + elog(WARNING, "add dbid %u into SHM", dbid); + if (!found && entry == NULL) + { + ereport(WARNING, + (errmsg("can't alloc memory on dbid cache, there ary too many databases to monitor"))); + } + } + else if (mode == 1) + { + hash_search(monitoring_dbid_cache, &dbid, HASH_REMOVE, &found); + if (!found) + { + ereport(WARNING, + (errmsg("cannot remove the database from db list, dbid not found"))); + } + } + LWLockRelease(diskquota_locks.monitoring_dbid_cache_lock); + + PG_RETURN_VOID(); + +} diff --git a/expected/test_manytable.out b/expected/test_manytable.out new file mode 100644 index 00000000..5302de48 --- /dev/null +++ b/expected/test_manytable.out @@ -0,0 +1,24 @@ +-- start_ignore +-- test case manytable change cluster level config, can not run in parallel. +\! gpconfig -c diskquota.max_active_tables -v 2 > /dev/null +-- end_ignore +\! echo $? +0 +CREATE DATABASE test_manytable01; +CREATE DATABASE test_manytable02; +\c test_manytable01 +CREATE TABLE a01(i int) DISTRIBUTED BY (i); +CREATE TABLE a02(i int) DISTRIBUTED BY (i); +CREATE TABLE a03(i int) DISTRIBUTED BY (i); +INSERT INTO a01 values(generate_series(0, 500)); +INSERT INTO a02 values(generate_series(0, 500)); +INSERT INTO a03 values(generate_series(0, 500)); +\c test_manytable02 +CREATE TABLE b01(i int) DISTRIBUTED BY (i); +INSERT INTO b01 values(generate_series(0, 500)); +\c postgres +DROP DATABASE test_manytable01; +DROP DATABASE test_manytable02; +-- start_ignore +\! gpconfig -c diskquota.max_active_tables -v 1024 > /dev/null +-- end_ignore diff --git a/expected/test_table_size.out b/expected/test_table_size.out index 22c5523a..36421dd9 100644 --- a/expected/test_table_size.out +++ b/expected/test_table_size.out @@ -12,7 +12,7 @@ select pg_sleep(2); create table buffer(oid oid, relname name, size bigint); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'oid' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -with size as ( select oid,relname,pg_total_relation_size(oid) from pg_class) insert into buffer select size.oid, size.relname, size.pg_total_relation_size from size, diskquota.table_size as dt where dt.tableid = size.oid and relname = 'a'; +insert into buffer select oid, relname, pg_total_relation_size(oid) from pg_class, diskquota.table_size as dt where dt.size = oid and relname = 'a'; insert into buffer select oid, relname, sum(pg_total_relation_size(oid)) from gp_dist_random('pg_class') where oid > 16384 and (relkind='r' or relkind='m') and relname = 'a' group by oid, relname; select sum(buffer.size) = diskquota.table_size.size from buffer, diskquota.table_size where buffer.oid = diskquota.table_size.tableid group by diskquota.table_size.size; ?column? diff --git a/gp_activetable.c b/gp_activetable.c index 6320f4a1..1ebf8f3e 100644 --- a/gp_activetable.c +++ b/gp_activetable.c @@ -51,6 +51,7 @@ typedef struct DiskQuotaSetOFCache } DiskQuotaSetOFCache; HTAB *active_tables_map = NULL; +HTAB *monitoring_dbid_cache = NULL; /* active table hooks which detect the disk file size change. */ static file_create_hook_type prev_file_create_hook = NULL; @@ -161,6 +162,7 @@ report_active_table_helper(const RelFileNodeBackend *relFileNode) DiskQuotaActiveTableFileEntry *entry; DiskQuotaActiveTableFileEntry item; bool found = false; + Oid dbid = relFileNode->node.dbNode; /* We do not collect the active table in either master or mirror segments */ @@ -168,7 +170,18 @@ report_active_table_helper(const RelFileNodeBackend *relFileNode) { return; } - + + /* do not collect active table info when the database is not under monitoring. + * this operation is read-only and does not require absolutely exact. + * read the cache with out shared lock */ + hash_search(monitoring_dbid_cache, &dbid, HASH_FIND, &found); + + if (!found) + { + return; + } + found = false; + MemSet(&item, 0, sizeof(DiskQuotaActiveTableFileEntry)); item.dbid = relFileNode->node.dbNode; item.relfilenode = relFileNode->node.relNode; diff --git a/gp_activetable.h b/gp_activetable.h index 1b975609..44a54f5f 100644 --- a/gp_activetable.h +++ b/gp_activetable.h @@ -24,6 +24,7 @@ extern void init_shm_worker_active_tables(void); extern void init_lock_active_tables(void); extern HTAB *active_tables_map; +extern HTAB *monitoring_dbid_cache; #define atooid(x) ((Oid) strtoul((x), NULL, 10)) diff --git a/quotamodel.c b/quotamodel.c index 2c0073c0..70c7e811 100644 --- a/quotamodel.c +++ b/quotamodel.c @@ -42,7 +42,9 @@ #include "utils/syscache.h" #include -#include +#include "cdb/cdbvars.h" +#include "cdb/cdbdisp_query.h" +#include "cdb/cdbdispatchresult.h" #include "gp_activetable.h" #include "diskquota.h" @@ -162,8 +164,8 @@ init_disk_quota_shmem(void) * resources in pgss_shmem_startup(). */ RequestAddinShmemSpace(DiskQuotaShmemSize()); - /* 4 locks for diskquota refer to init_lwlocks() for details */ - RequestAddinLWLocks(4); + /* locks for diskquota refer to init_lwlocks() for details */ + RequestAddinLWLocks(DiskQuotaLocksItemNumber); /* Install startup hook to initialize our shared memory. */ prev_shmem_startup_hook = shmem_startup_hook; @@ -212,6 +214,17 @@ disk_quota_shmem_startup(void) init_shm_worker_active_tables(); + memset(&hash_ctl, 0, sizeof(hash_ctl)); + hash_ctl.keysize = sizeof(Oid); + hash_ctl.entrysize = sizeof(Oid); + hash_ctl.hash = oid_hash; + + monitoring_dbid_cache = ShmemInitHash("table oid cache which shoud tracking", + MAX_NUM_MONITORED_DB, + MAX_NUM_MONITORED_DB, + &hash_ctl, + HASH_ELEM | HASH_FUNCTION); + LWLockRelease(AddinShmemInitLock); } @@ -223,6 +236,7 @@ disk_quota_shmem_startup(void) * extension_ddl_message. * extension_ddl_lock is used to avoid concurrent diskquota * extension ddl(create/drop) command. + * monitoring_dbid_cache_lock is used to shared `monitoring_dbid_cache` on segment process. */ static void init_lwlocks(void) @@ -231,6 +245,7 @@ init_lwlocks(void) diskquota_locks.black_map_lock = LWLockAssign(); diskquota_locks.extension_ddl_message_lock = LWLockAssign(); diskquota_locks.extension_ddl_lock = LWLockAssign(); + diskquota_locks.monitoring_dbid_cache_lock = LWLockAssign(); } /* @@ -245,6 +260,7 @@ DiskQuotaShmemSize(void) size = sizeof(ExtensionDDLMessage); size = add_size(size, hash_estimate_size(MAX_DISK_QUOTA_BLACK_ENTRIES, sizeof(BlackMapEntry))); size = add_size(size, hash_estimate_size(diskquota_max_active_tables, sizeof(DiskQuotaActiveTableEntry))); + size = add_size(size, hash_estimate_size(MAX_NUM_MONITORED_DB, sizeof(Oid))); return size; } @@ -394,7 +410,17 @@ do_check_diskquota_state_is_ready(void) int ret; TupleDesc tupdesc; int i; - + StringInfoData sql_command; + + /* Add the dbid to watching list, so the hook can catch the table change*/ + initStringInfo(&sql_command); + appendStringInfo(&sql_command, "select gp_segment_id, diskquota.update_diskquota_db_list(%u, 0) from gp_dist_random('gp_id');", + MyDatabaseId); + ret = SPI_execute(sql_command.data, true, 0); + if (ret != SPI_OK_SELECT) + ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("[diskquota] check diskquota state SPI_execute failed: error code %d", ret))); + pfree(sql_command.data); /* * check diskquota state from table diskquota.state errors will be catch * at upper level function. @@ -433,6 +459,7 @@ do_check_diskquota_state_is_ready(void) } ereport(WARNING, (errmsg("Diskquota is not in ready state. " "please run UDF init_table_size_table()"))); + return false; } diff --git a/sql/test_manytable.sql b/sql/test_manytable.sql new file mode 100644 index 00000000..eb90225f --- /dev/null +++ b/sql/test_manytable.sql @@ -0,0 +1,29 @@ +-- start_ignore +\! gpconfig -c diskquota.max_active_tables -v 2 > /dev/null +-- end_ignore +\! echo $? + +CREATE DATABASE test_manytable01; +CREATE DATABASE test_manytable02; + +\c test_manytable01 + +CREATE TABLE a01(i int) DISTRIBUTED BY (i); +CREATE TABLE a02(i int) DISTRIBUTED BY (i); +CREATE TABLE a03(i int) DISTRIBUTED BY (i); + +INSERT INTO a01 values(generate_series(0, 500)); +INSERT INTO a02 values(generate_series(0, 500)); +INSERT INTO a03 values(generate_series(0, 500)); + +\c test_manytable02 +CREATE TABLE b01(i int) DISTRIBUTED BY (i); +INSERT INTO b01 values(generate_series(0, 500)); + +\c postgres +DROP DATABASE test_manytable01; +DROP DATABASE test_manytable02; + +-- start_ignore +\! gpconfig -c diskquota.max_active_tables -v 1024 > /dev/null +-- end_ignore diff --git a/sql/test_table_size.sql b/sql/test_table_size.sql index 80279cb1..aad12e83 100644 --- a/sql/test_table_size.sql +++ b/sql/test_table_size.sql @@ -7,7 +7,7 @@ insert into a select * from generate_series(1,10000); select pg_sleep(2); create table buffer(oid oid, relname name, size bigint); -with size as ( select oid,relname,pg_total_relation_size(oid) from pg_class) insert into buffer select size.oid, size.relname, size.pg_total_relation_size from size, diskquota.table_size as dt where dt.tableid = size.oid and relname = 'a'; +insert into buffer select oid, relname, pg_total_relation_size(oid) from pg_class, diskquota.table_size as dt where dt.size = oid and relname = 'a'; insert into buffer select oid, relname, sum(pg_total_relation_size(oid)) from gp_dist_random('pg_class') where oid > 16384 and (relkind='r' or relkind='m') and relname = 'a' group by oid, relname;