Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PG -156: replace query placeholders with actual arguments for prepared statements #481

Merged
merged 10 commits into from
Nov 1, 2024
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))

TAP_TESTS = 1
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_monitor/pg_stat_monitor.conf --inputdir=regression
REGRESS = basic version guc pgsm_query_id functions counters relations database error_insert application_name application_name_unique top_query different_parent_queries cmd_type error rows tags user level_tracking
REGRESS = basic version guc pgsm_query_id functions counters relations database error_insert application_name application_name_unique top_query different_parent_queries cmd_type error rows tags user level_tracking denorm_prepared_statements

# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
Expand Down Expand Up @@ -40,4 +40,4 @@ update-typedefs:
indent:
pgindent --typedefs=typedefs-full.list .

.PHONY: update-typedefs indent
.PHONY: update-typedefs indent
169 changes: 163 additions & 6 deletions pg_stat_monitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@
#include "nodes/pg_list.h"
#include "utils/guc.h"
#include <regex.h>
#include <stddef.h>
#include "pgstat.h"
#include "commands/dbcommands.h"
#include "commands/explain.h"
#include "lib/stringinfo.h"
#include "pg_stat_monitor.h"

/*
Expand Down Expand Up @@ -206,6 +208,10 @@ static void pgsm_add_to_list(pgsmEntry *entry, char *query_text, int query_len);
static pgsmEntry *pgsm_get_entry_for_query(uint64 queryid, PlanInfo *plan_info, const char *query_text, int query_len, bool create);
static uint64 get_pgsm_query_id_hash(const char *norm_query, int len);

static void get_param_value(const ParamListInfo plist, int idx, StringInfoData *buffer);

static StringInfoData get_denormalized_query(const ParamListInfo paramlist, const char *query_text);

static void pgsm_cleanup_callback(void *arg);
static void pgsm_store_error(const char *query, ErrorData *edata);

Expand All @@ -232,7 +238,14 @@ static void pgsm_update_entry(pgsmEntry *entry,
const struct JitInstrumentation *jitusage,
bool reset,
pgsmStoreKind kind);
static void pgsm_store(pgsmEntry *entry);
static void pgsm_store_ex(pgsmEntry *entry, ParamListInfo params);

/* Stores query entry in normalized form */
static inline void
pgsm_store(pgsmEntry *entry)
{
pgsm_store_ex(entry, NULL);
}

static void pg_stat_monitor_internal(FunctionCallInfo fcinfo,
pgsmVersion api_version,
Expand Down Expand Up @@ -692,12 +705,12 @@ pgsm_ExecutorEnd(QueryDesc *queryDesc)
PlanInfo plan_info;
PlanInfo *plan_ptr = NULL;
pgsmEntry *entry = NULL;
MemoryContext oldctx;

/* Extract the plan information in case of SELECT statement */
if (queryDesc->operation == CMD_SELECT && pgsm_enable_query_plan)
{
int rv;
MemoryContext oldctx;

/*
* Making sure it is a per query context so that there's no memory
Expand Down Expand Up @@ -775,7 +788,7 @@ pgsm_ExecutorEnd(QueryDesc *queryDesc)
false, /* reset */
PGSM_EXEC); /* kind */

pgsm_store(entry);
pgsm_store_ex(entry, queryDesc->params);
}

if (prev_ExecutorEnd)
Expand Down Expand Up @@ -1861,7 +1874,6 @@ pgsm_create_hash_entry(uint64 bucket_id, uint64 queryid, PlanInfo *plan_info)
return entry;
}


/*
* Store some statistics for a statement.
*
Expand All @@ -1871,9 +1883,15 @@ pgsm_create_hash_entry(uint64 bucket_id, uint64 queryid, PlanInfo *plan_info)
* If jstate is not NULL then we're trying to create an entry for which
* we have no statistics as yet; we just want to record the normalized
* query string. total_time, rows, bufusage are ignored in this case.
*
* If params argument is not null and pgsm_normalized_query is off then we
* denormalize the query using it's actual arguments found in params.
* The denormalization is done during the first time the query is
* inserted or if the time to execute the query exceeds the average
* time computed for the same query.
*/
static void
pgsm_store(pgsmEntry *entry)
void
pgsm_store_ex(pgsmEntry *entry, ParamListInfo params)
{
pgsmEntry *shared_hash_entry;
pgsmSharedState *pgsm;
Expand All @@ -1888,6 +1906,7 @@ pgsm_store(pgsmEntry *entry)
JitInstrumentation jitusage;
char comments[COMMENTS_LEN] = {0};
int comments_len;
StringInfoData query_info;

/* Safety check... */
if (!IsSystemInitialized())
Expand Down Expand Up @@ -1976,6 +1995,14 @@ pgsm_store(pgsmEntry *entry)
dsa_area *query_dsa_area;
char *query_buff;

/* Denormalize the query if normalization is off */
if (!pgsm_normalized_query && params != NULL)
{
query_info = get_denormalized_query(params, query);
query = query_info.data;
query_len = query_info.len;
}

/* New query, truncate length if necessary. */
if (query_len > pgsm_query_max_len)
query_len = pgsm_query_max_len;
Expand Down Expand Up @@ -2065,6 +2092,50 @@ pgsm_store(pgsmEntry *entry)
snprintf(shared_hash_entry->username, sizeof(shared_hash_entry->username), "%s", entry->username);
}

/*
* Entry already exists, if query normalization is disabled and the query
* execution time exceeds the mean time for this query, then we
* denormalize the query so users can inspect which arguments caused the
* query to take more time to execute
*/
else if (
!pgsm_normalized_query &&
params != NULL &&
entry->counters.time.total_time > shared_hash_entry->counters.time.mean_time
)
{
dsa_pointer dsa_query_pointer;
dsa_area *query_dsa_area;
char *query_buff;

query_info = get_denormalized_query(params, query);
query = query_info.data;
query_len = query_info.len;

/* truncate length if necessary. */
if (query_len > pgsm_query_max_len)
query_len = pgsm_query_max_len;

/* Save the query text in raw dsa area */
query_dsa_area = get_dsa_area_for_query_text();
dsa_query_pointer = dsa_allocate_extended(query_dsa_area, query_len + 1, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO);
if (DsaPointerIsValid(dsa_query_pointer))
{
/*
* Get the memory address from DSA pointer and copy the query text
* to it.
*/
query_buff = dsa_get_address(query_dsa_area, dsa_query_pointer);
memcpy(query_buff, query, query_len);

/* release previous query from shared memory */
if (DsaPointerIsValid(shared_hash_entry->query_text.query_pos))
dsa_free(query_dsa_area, shared_hash_entry->query_text.query_pos);

shared_hash_entry->query_text.query_pos = dsa_query_pointer;
}
}

pgsm_update_entry(shared_hash_entry, /* entry */
query, /* query */
comments, /* comments */
Expand Down Expand Up @@ -4018,3 +4089,89 @@ get_query_id(JumbleState *jstate, Query *query)
return queryid;
}
#endif

/*
* extract parameter value (Datum) from plist->params[idx], cast it to string then
* append the resulting string to the buffer.
*/
void
get_param_value(const ParamListInfo plist, int idx, StringInfoData *buffer)
{
Oid typoutput;
bool typisvarlena;
char *pstring;
ParamExternData *param;

Assert(idx < plist->numParams);

param = &plist->params[idx];

if (param->isnull || !OidIsValid(param->ptype))
{
appendStringInfoString(buffer, "NULL");
artemgavrilov marked this conversation as resolved.
Show resolved Hide resolved
return;
}

getTypeOutputInfo(param->ptype, &typoutput, &typisvarlena);
pstring = OidOutputFunctionCall(typoutput, param->value);
appendStringInfo(buffer, "%s", pstring);
}

/* denormalize the query, replace placeholders with actual values */
StringInfoData
get_denormalized_query(const ParamListInfo paramlist, const char *query_text)
{
int current_param;
const char *cursor_ori;
const char *cursor_curr;
StringInfoData result_buf;
ptrdiff_t len;

current_param = 0;
cursor_ori = query_text;
cursor_curr = cursor_ori;

initStringInfo(&result_buf);

do
{
/* advance cursor until detecting a placeholder '$' start. */
while (*cursor_ori && *cursor_ori != '$')
++cursor_ori;

/* calculate length of query text before placeholder. */
len = cursor_ori - cursor_curr;

/* check if end of string is reached */
if (!*cursor_ori)
{
/* there may have remaining query data to append */
if (len > 0)
appendBinaryStringInfo(&result_buf, cursor_curr, len);

break;
}

/* append query text before the '$' sign found. */
if (len > 0)
appendBinaryStringInfo(&result_buf, cursor_curr, len);

/* skip '$' */
++cursor_ori;

/* skip the placeholder */
while (*cursor_ori && *cursor_ori >= '0' && *cursor_ori <= '9')
cursor_ori++;

/* advance current cursor */
cursor_curr = cursor_ori;

/* replace the placeholder with actual value */
get_param_value(paramlist, current_param, &result_buf);

++current_param;
} while (*cursor_ori != '\0');
artemgavrilov marked this conversation as resolved.
Show resolved Hide resolved


return result_buf;
}
63 changes: 63 additions & 0 deletions regression/expected/denorm_prepared_statements.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
CREATE EXTENSION pg_stat_monitor;
Set pg_stat_monitor.pgsm_normalized_query='off';
CREATE TABLE t1 (a TEXT, b TEXT, c TEXT);
SELECT pg_stat_monitor_reset();
pg_stat_monitor_reset
-----------------------

(1 row)

-- First test, execute cheap query then heavy query.
-- Ensure denormalized heavy query replaces the cheaper one.
PREPARE prepstmt(TEXT, TEXT, TEXT) AS INSERT INTO t1(a, b, c) VALUES($1, $2, $3);
EXECUTE prepstmt('A', 'B', 'C');
SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C";
substring | calls
--------------------------------------------------------------------------------+-------
PREPARE prepstmt(TEXT, TEXT, TEXT) AS INSERT INTO t1(a, b, c) VALUES(A, B, C); | 1
SELECT pg_stat_monitor_reset() | 1
(2 rows)

EXECUTE prepstmt(REPEAT('XYZ', 8192), md5(random()::text), REPEAT('RANDOM', 4096));
SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C";
substring | calls
---------------------------------------------------------------------------------------------------------------------------------+-------
PREPARE prepstmt(TEXT, TEXT, TEXT) AS INSERT INTO t1(a, b, c) VALUES(XYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZX | 2
SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C" | 1
SELECT pg_stat_monitor_reset() | 1
(3 rows)

TRUNCATE TABLE t1;
SELECT pg_stat_monitor_reset();
pg_stat_monitor_reset
-----------------------

(1 row)

-- Second test, execute heavy query then cheap query.
-- Ensure denormalized heavy query is not replaced by the cheaper one.
EXECUTE prepstmt(REPEAT('XYZ', 8192), md5(random()::text), REPEAT('RANDOM', 4096));
SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C";
substring | calls
---------------------------------------------------------------------------------------------------------------------------------+-------
PREPARE prepstmt(TEXT, TEXT, TEXT) AS INSERT INTO t1(a, b, c) VALUES(XYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZX | 1
SELECT pg_stat_monitor_reset() | 1
(2 rows)

EXECUTE prepstmt('A', 'B', 'C');
SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C";
substring | calls
---------------------------------------------------------------------------------------------------------------------------------+-------
PREPARE prepstmt(TEXT, TEXT, TEXT) AS INSERT INTO t1(a, b, c) VALUES(XYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZX | 2
SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C" | 1
SELECT pg_stat_monitor_reset() | 1
(3 rows)

DROP TABLE t1;
SELECT pg_stat_monitor_reset();
pg_stat_monitor_reset
-----------------------

(1 row)

DROP EXTENSION pg_stat_monitor;
12 changes: 6 additions & 6 deletions regression/expected/pgsm_query_id.out
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ SELECT * FROM t1;
---
(0 rows)

SELECT *, ADD(1, 2) FROM t1;
SELECT *, ADD(1234, 1000) FROM t1;
a | add
---+-----
(0 rows)
Expand Down Expand Up @@ -62,7 +62,7 @@ SELECT * FROM t1;
---
(0 rows)

SELECT *, ADD(1, 2) FROM t1;
SELECT *, ADD(1234, 1000) FROM t1;
a | add
---+-----
(0 rows)
Expand All @@ -87,12 +87,12 @@ SELECT datname, pgsm_query_id, query, calls FROM pg_stat_monitor ORDER BY pgsm_q
db1 | 1897482803466821995 | SELECT * FROM t2 | 3
db1 | 1988437669671417938 | SELECT * FROM t1 | 1
db2 | 1988437669671417938 | SELECT * FROM t1 | 1
db1 | 2864453209316739369 | select $1 + $2 | 1
db2 | 2864453209316739369 | select $1 + $2 | 1
db1 | 2864453209316739369 | select 1234 + 1000 | 1
db2 | 2864453209316739369 | select 1234 + 1000 | 1
db2 | 6220142855706866455 | set pg_stat_monitor.pgsm_enable_pgsm_query_id = on | 1
db2 | 6633979598391393345 | SELECT * FROM t3 where c = 20 | 1
db1 | 8140395000078788481 | SELECT *, ADD(1, 2) FROM t1 | 1
db2 | 8140395000078788481 | SELECT *, ADD(1, 2) FROM t1 | 1
db1 | 8140395000078788481 | SELECT *, ADD(1234, 1000) FROM t1 | 1
db2 | 8140395000078788481 | SELECT *, ADD(1234, 1000) FROM t1 | 1
db2 | | SELECT * FROM t3 | 1
db2 | | set pg_stat_monitor.pgsm_enable_pgsm_query_id = off | 1
(12 rows)
Expand Down
8 changes: 4 additions & 4 deletions regression/expected/pgsm_query_id_1.out
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ SELECT * FROM t1;
---
(0 rows)

SELECT *, ADD(1, 2) FROM t1;
SELECT *, ADD(1234, 1000) FROM t1;
a | add
---+-----
(0 rows)
Expand Down Expand Up @@ -62,7 +62,7 @@ SELECT * FROM t1;
---
(0 rows)

SELECT *, ADD(1, 2) FROM t1;
SELECT *, ADD(1234, 1000) FROM t1;
a | add
---+-----
(0 rows)
Expand All @@ -89,8 +89,8 @@ SELECT datname, pgsm_query_id, query, calls FROM pg_stat_monitor ORDER BY pgsm_q
db2 | 1988437669671417938 | SELECT * FROM t1 | 1
db2 | 6220142855706866455 | set pg_stat_monitor.pgsm_enable_pgsm_query_id = on | 1
db2 | 6633979598391393345 | SELECT * FROM t3 where c = 20 | 1
db1 | 8140395000078788481 | SELECT *, ADD(1, 2) FROM t1 | 1
db2 | 8140395000078788481 | SELECT *, ADD(1, 2) FROM t1 | 1
db1 | 8140395000078788481 | SELECT *, ADD(1234, 1000) FROM t1 | 1
db2 | 8140395000078788481 | SELECT *, ADD(1234, 1000) FROM t1 | 1
db2 | | SELECT * FROM t3 | 1
db2 | | set pg_stat_monitor.pgsm_enable_pgsm_query_id = off | 1
(10 rows)
Expand Down
Loading
Loading