From d8769e02eb989111eea8ab5e12f339570ea3237e Mon Sep 17 00:00:00 2001 From: Rodrigo Bernardi Date: Fri, 19 Jan 2024 01:51:37 -0300 Subject: [PATCH] feat: add bloat metrics by statistic --- pkg/db/bloat_queries.go | 197 ++++++++++++++++++++++++++++++++++++++++ pkg/db/index.go | 37 ++------ pkg/db/stats.go | 40 +++----- pkg/params/params.go | 13 ++- 4 files changed, 231 insertions(+), 56 deletions(-) create mode 100644 pkg/db/bloat_queries.go diff --git a/pkg/db/bloat_queries.go b/pkg/db/bloat_queries.go new file mode 100644 index 0000000..3a6f90f --- /dev/null +++ b/pkg/db/bloat_queries.go @@ -0,0 +1,197 @@ +package db + +const ( + qryTableBloatPgstattuple = ` + select + ceil((size - free_space - dead_tuple_len) * 100 / fillfactor / bs) as effective_page_count, + greatest(round( + (100 * (1 - (100 - free_percent - dead_tuple_percent) / fillfactor))::numeric, 2 + ),0) as free_percent, + greatest(ceil(size - (size - free_space - dead_tuple_len) * 100 / fillfactor), 0) as free_space + from ( + select + current_setting('block_size')::integer as bs, + pg_catalog.pg_relation_size(pg_catalog.pg_class.oid) as size, + coalesce( + ( + select ( + regexp_matches( + reloptions::text, e'.*fillfactor=(\\\\d+).*'))[1]), + '100')::real as fillfactor, + pgst.* + from pg_catalog.pg_class + cross join + %q.pgstattuple( + (quote_ident($1) || '.' || quote_ident($2))) as pgst + where pg_catalog.pg_class.oid = (quote_ident($1) || '.' || quote_ident($2))::regclass + ) as sq limit 1;` + + qryTableBloatStatistical = ` +SELECT CASE WHEN tblpages - est_tblpages_ff > 0 + THEN (tblpages-est_tblpages_ff)*bs + ELSE 0 + END AS free_space, + CASE WHEN tblpages - est_tblpages_ff > 0 + THEN 100 * (tblpages - est_tblpages_ff)/tblpages::float + ELSE 0 + END AS free_percent +FROM ( + SELECT ceil( reltuples / ( (bs-page_hdr)/tpl_size ) ) + ceil( toasttuples / 4 ) AS est_tblpages, + ceil( reltuples / ( (bs-page_hdr)*fillfactor/(tpl_size*100) ) ) + ceil( toasttuples / 4 ) AS est_tblpages_ff, + tblpages, fillfactor, bs, tblid, schemaname, tblname, heappages, toastpages, is_na + -- , tpl_hdr_size, tpl_data_size, pgstattuple(tblid) AS pst -- (DEBUG INFO) + FROM ( + SELECT + ( 4 + tpl_hdr_size + tpl_data_size + (2*ma) + - CASE WHEN tpl_hdr_size%ma = 0 THEN ma ELSE tpl_hdr_size%ma END + - CASE WHEN ceil(tpl_data_size)::int%ma = 0 THEN ma ELSE ceil(tpl_data_size)::int%ma END + ) AS tpl_size, bs - page_hdr AS size_per_block, (heappages + toastpages) AS tblpages, heappages, + toastpages, reltuples, toasttuples, bs, page_hdr, tblid, schemaname, tblname, fillfactor, is_na + -- , tpl_hdr_size, tpl_data_size + FROM ( + SELECT + tbl.oid AS tblid, ns.nspname AS schemaname, tbl.relname AS tblname, tbl.reltuples, + tbl.relpages AS heappages, coalesce(toast.relpages, 0) AS toastpages, + coalesce(toast.reltuples, 0) AS toasttuples, + coalesce(substring( + array_to_string(tbl.reloptions, ' ') + FROM 'fillfactor=([0-9]+)')::smallint, 100) AS fillfactor, + current_setting('block_size')::numeric AS bs, + CASE WHEN version()~'mingw32' OR version()~'64-bit|x86_64|ppc64|ia64|amd64' THEN 8 ELSE 4 END AS ma, + 24 AS page_hdr, + 23 + CASE WHEN MAX(coalesce(s.null_frac,0)) > 0 THEN ( 7 + count(s.attname) ) / 8 ELSE 0::int END + + CASE WHEN bool_or(att.attname = 'oid' and att.attnum < 0) THEN 4 ELSE 0 END AS tpl_hdr_size, + sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0) ) AS tpl_data_size, + bool_or(att.atttypid = 'pg_catalog.name'::regtype) + OR sum(CASE WHEN att.attnum > 0 THEN 1 ELSE 0 END) <> count(s.attname) AS is_na + FROM pg_attribute AS att + JOIN pg_class AS tbl ON att.attrelid = tbl.oid + JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace + LEFT JOIN pg_stats AS s ON s.schemaname=ns.nspname + AND s.tablename = tbl.relname AND s.inherited=false AND s.attname=att.attname + LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid + WHERE NOT att.attisdropped + AND tbl.relkind in ('r','m') + AND ns.nspname = $1 + AND tbl.relname = $2 + GROUP BY 1,2,3,4,5,6,7,8,9,10 + ORDER BY 2,3 + ) AS s + ) AS s2 +) AS s3 +ORDER BY ((tblpages-est_tblpages)*bs)::bigint desc, schemaname, tblname; +` + + qryIndexBloatPgstattuple = ` + SELECT + CASE + WHEN avg_leaf_density = 'NaN' THEN 0 + ELSE + round( + (100 * (1 - avg_leaf_density / fillfactor))::numeric, 2 + ) + END AS free_percent, + CASE + WHEN avg_leaf_density = 'NaN' THEN 0 + ELSE + ceil( + index_size * (1 - avg_leaf_density / fillfactor) + ) + END AS free_space + FROM ( + SELECT + coalesce( + ( + SELECT ( + regexp_matches( + reloptions::text, E'.*fillfactor=(\\\\d+).*'))[1]), + '90')::real AS fillfactor, + pgsi.* + FROM pg_catalog.pg_class + CROSS JOIN %q.pgstatindex( + quote_ident($1) || '.' || quote_ident($2)) AS pgsi + WHERE pg_catalog.pg_class.oid = (quote_ident($1) || '.' || quote_ident($2))::regclass + ) AS oq` + + qryIndexBloatStatistical = `SELECT CASE WHEN relpages > est_pages_ff + THEN bs*(relpages-est_pages_ff)::bigint + ELSE 0::bigint + END AS free_space, + ROUND((100 * (relpages-est_pages_ff)::float / relpages)::numeric , 2) AS free_percent +FROM ( + SELECT coalesce(1 + + ceil(reltuples/floor((bs-pageopqdata-pagehdr)/(4+nulldatahdrwidth)::float)), 0 -- ItemIdData size + computed avg size of a tuple (nulldatahdrwidth) + ) AS est_pages, + coalesce(1 + + ceil(reltuples/floor((bs-pageopqdata-pagehdr)*fillfactor/(100*(4+nulldatahdrwidth)::float))), 0 + ) AS est_pages_ff, + bs, nspname, tblname, idxname, relpages, fillfactor, is_na + -- , pgstatindex(idxoid) AS pst, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, reltuples -- (DEBUG INFO) + FROM ( + SELECT maxalign, bs, nspname, tblname, idxname, reltuples, relpages, idxoid, fillfactor, + ( index_tuple_hdr_bm + + maxalign - CASE -- Add padding to the index tuple header to align on MAXALIGN + WHEN index_tuple_hdr_bm%maxalign = 0 THEN maxalign + ELSE index_tuple_hdr_bm%maxalign + END + + nulldatawidth + maxalign - CASE -- Add padding to the data to align on MAXALIGN + WHEN nulldatawidth = 0 THEN 0 + WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign + ELSE nulldatawidth::integer%maxalign + END + )::numeric AS nulldatahdrwidth, pagehdr, pageopqdata, is_na + -- , index_tuple_hdr_bm, nulldatawidth -- (DEBUG INFO) + FROM ( + SELECT n.nspname, ct.relname AS tblname, i.idxname, i.reltuples, i.relpages, + i.idxoid, i.fillfactor, current_setting('block_size')::numeric AS bs, + CASE -- MAXALIGN: 4 on 32bits, 8 on 64bits (and mingw32 ?) + WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' THEN 8 + ELSE 4 + END AS maxalign, + /* per page header, fixed size: 20 for 7.X, 24 for others */ + 24 AS pagehdr, + /* per page btree opaque data */ + 16 AS pageopqdata, + /* per tuple header: add IndexAttributeBitMapData if some cols are null-able */ + CASE WHEN max(coalesce(s.stanullfrac,0)) = 0 + THEN 2 -- IndexTupleData size + ELSE 2 + (( 32 + 8 - 1 ) / 8) -- IndexTupleData size + IndexAttributeBitMapData size ( max num filed per index + 8 - 1 /8) + END AS index_tuple_hdr_bm, + /* data len: we remove null values save space using it fractionnal part from stats */ + sum( (1-coalesce(s.stanullfrac, 0)) * coalesce(s.stawidth, 1024)) AS nulldatawidth, + max( CASE WHEN a.atttypid = 'pg_catalog.name'::regtype THEN 1 ELSE 0 END ) > 0 AS is_na + FROM ( + SELECT idxname, reltuples, relpages, tbloid, idxoid, fillfactor, + CASE WHEN indkey[i]=0 THEN idxoid ELSE tbloid END AS att_rel, + CASE WHEN indkey[i]=0 THEN i ELSE indkey[i] END AS att_pos + FROM ( + SELECT idxname, reltuples, relpages, tbloid, idxoid, fillfactor, indkey, generate_series(1,indnatts) AS i + FROM ( + SELECT ci.relname AS idxname, ci.reltuples, ci.relpages, i.indrelid AS tbloid, + i.indexrelid AS idxoid, + coalesce(substring( + array_to_string(ci.reloptions, ' ') + from 'fillfactor=([0-9]+)')::smallint, 90) AS fillfactor, + i.indnatts, + string_to_array(textin(int2vectorout(i.indkey)),' ')::int[] AS indkey + FROM pg_index i + JOIN pg_class ci ON ci.oid=i.indexrelid + WHERE ci.relam=(SELECT oid FROM pg_am WHERE amname = 'btree') + AND ci.relpages > 0 + AND ci.relnamespace = ($1)::regnamespace + AND ci.relname = $2 + ) AS idx_data + ) AS idx_data_cross + ) i + JOIN pg_attribute a ON a.attrelid = i.att_rel + AND a.attnum = i.att_pos + JOIN pg_statistic s ON s.starelid = i.att_rel + AND s.staattnum = i.att_pos + JOIN pg_class ct ON ct.oid = i.tbloid + JOIN pg_namespace n ON ct.relnamespace = n.oid + GROUP BY 1,2,3,4,5,6,7,8,9,10 + ) AS rows_data_stats + ) AS rows_hdr_pdg_stats +) AS relation_stats; + ` +) diff --git a/pkg/db/index.go b/pkg/db/index.go index 1e33c8a..8572338 100644 --- a/pkg/db/index.go +++ b/pkg/db/index.go @@ -460,36 +460,13 @@ func (pg *PgConnection) getIndexBloatStats(ctx context.Context, schema, index st if pgstatstupleSchema, err := pg.GetPgStatTupleSchema(ctx); err != nil { return nil, fmt.Errorf("error getting pgstattuple schema: %w", err) } else { - qry := fmt.Sprintf(` - SELECT - CASE - WHEN avg_leaf_density = 'NaN' THEN 0 - ELSE - round( - (100 * (1 - avg_leaf_density / fillfactor))::numeric, 2 - ) - END AS free_percent, - CASE - WHEN avg_leaf_density = 'NaN' THEN 0 - ELSE - ceil( - index_size * (1 - avg_leaf_density / fillfactor) - ) - END AS free_space - FROM ( - SELECT - coalesce( - ( - SELECT ( - regexp_matches( - reloptions::text, E'.*fillfactor=(\\\\d+).*'))[1]), - '90')::real AS fillfactor, - pgsi.* - FROM pg_catalog.pg_class - CROSS JOIN %q.pgstatindex( - quote_ident($1) || '.' || quote_ident($2)) AS pgsi - WHERE pg_catalog.pg_class.oid = (quote_ident($1) || '.' || quote_ident($2))::regclass - ) AS oq`, pgstatstupleSchema) + var qry string + switch params.BLOAT_METRIC_SOURCE { + case params.BLOAT_METRIC_STATISTICAL: + qry = qryIndexBloatStatistical + default: + qry = fmt.Sprintf(qryIndexBloatPgstattuple, pgstatstupleSchema) + } var result IndexBloatStats err := pgxscan.Get(ctx, pg.Conn, &result, qry, schema, index) diff --git a/pkg/db/stats.go b/pkg/db/stats.go index b633d20..6117518 100644 --- a/pkg/db/stats.go +++ b/pkg/db/stats.go @@ -4,6 +4,7 @@ import ( "context" "fmt" + "github.com/electric-saw/pg-defrag/pkg/params" "github.com/georgysavva/scany/v2/pgxscan" ) @@ -45,34 +46,23 @@ func (pg *PgConnection) GetBloatStats(ctx context.Context, schema, table string) if err != nil { return nil, fmt.Errorf("Failed to get pg_stat_tuple schema: %v", err) } - qry := fmt.Sprintf(` - select - ceil((size - free_space - dead_tuple_len) * 100 / fillfactor / bs) as effective_page_count, - greatest(round( - (100 * (1 - (100 - free_percent - dead_tuple_percent) / fillfactor))::numeric, 2 - ),0) as free_percent, - greatest(ceil(size - (size - free_space - dead_tuple_len) * 100 / fillfactor), 0) as free_space - from ( - select - current_setting('block_size')::integer as bs, - pg_catalog.pg_relation_size(pg_catalog.pg_class.oid) as size, - coalesce( - ( - select ( - regexp_matches( - reloptions::text, e'.*fillfactor=(\\\\d+).*'))[1]), - '100')::real as fillfactor, - pgst.* - from pg_catalog.pg_class - cross join - %q.pgstattuple( - (quote_ident($1) || '.' || quote_ident($2))) as pgst - where pg_catalog.pg_class.oid = (quote_ident($1) || '.' || quote_ident($2))::regclass - ) as sq limit 1;`, pgStatTupleSchema) + + qry := fmt.Sprintf(qryTableBloatPgstattuple, pgStatTupleSchema) var stats PgBloatStats err = pgxscan.Get(ctx, pg.Conn, &stats, qry, schema, table) - return &stats, err + if err != nil { + return nil, fmt.Errorf("Failed to get bloat stats: %v", err) + } + + if params.BLOAT_METRIC_SOURCE == params.BLOAT_METRIC_STATISTICAL { + qry = qryTableBloatStatistical + err = pgxscan.Get(ctx, pg.Conn, &stats, qry, schema, table) + if err != nil { + return nil, fmt.Errorf("Failed to get bloat stats: %v", err) + } + } + return &stats, nil } func (s *PgSizeStats) Copy() *PgSizeStats { diff --git a/pkg/params/params.go b/pkg/params/params.go index 48c2ed6..a1e39a1 100644 --- a/pkg/params/params.go +++ b/pkg/params/params.go @@ -8,7 +8,17 @@ import ( "github.com/sirupsen/logrus" ) -type maxPagesPerRoundFunc func(int64) int64 +type ( + maxPagesPerRoundFunc func(int64) int64 + BloatMetricType int +) + +const ( + BLOAT_METRIC_PGSTATTUPLE BloatMetricType = iota + BLOAT_METRIC_STATISTICAL + + BLOAT_METRIC_DEFAULT = BLOAT_METRIC_PGSTATTUPLE +) var ( MINIMAL_COMPACT_PAGES int64 = envOrDefaultInt64("MINIMAL_COMPACT_PAGES", 10) @@ -22,6 +32,7 @@ var ( PROGRESS_REPORT_PERIOD time.Duration = 60 * time.Second DELAY_RATIO float64 = envOrDefaultfloat64("DELAY_RATIO", 2) MAX_DELAY time.Duration = time.Second + BLOAT_METRIC_SOURCE = BLOAT_METRIC_DEFAULT MAX_RETRY_COUNT int = 10 )