Skip to content

Commit fb015c1

Browse files
authored
Merge pull request #3300 from Zak-Kent/pdb-4832-report-parition-index-on-id
(PDB-4832) Add report id index in partitions
2 parents 5a01bdf + 0b11700 commit fb015c1

File tree

8 files changed

+211
-38
lines changed

8 files changed

+211
-38
lines changed

ext/test/upgrade-and-exit

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,6 @@ psql -U puppetdb puppetdb -c 'select max(version) from schema_migrations;' \
6969
> "$tmpdir/out"
7070
cat "$tmpdir/out"
7171
# This must be updated every time we add a new migration
72-
grep -qE ' 75$' "$tmpdir/out"
72+
grep -qE ' 76$' "$tmpdir/out"
7373

7474
test ! -e "$PDBBOX"/var/mq-migrated

resources/ext/cli/delete-reports.erb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ chown "$pg_user:$pg_user" "$tmp_dir"
8787

8888
# Verify that the PuppetDB schema version it the expected value
8989
# so that we do not incorrectly delete the report data.
90-
expected_schema_ver=75
90+
expected_schema_ver=76
9191
su - "$pg_user" -s /bin/sh -c "$psql_cmd -p $pg_port -d $pdb_db_name -c 'COPY ( SELECT max(version) FROM schema_migrations ) TO STDOUT;' > $tmp_dir/schema_ver"
9292
actual_schema_ver="$(cat "$tmp_dir/schema_ver")"
9393
if test "$actual_schema_ver" -ne $expected_schema_ver; then

src/puppetlabs/puppetdb/scf/migrate.clj

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@
6868
[clojure.set :as set]
6969
[clojure.string :as str]
7070
[puppetlabs.puppetdb.scf.storage :as scf]
71-
[puppetlabs.puppetdb.scf.partitioning :as partitioning])
71+
[puppetlabs.puppetdb.scf.partitioning :as partitioning
72+
:refer [get-temporal-partitions]])
7273
(:import [org.postgresql.util PGobject]
7374
[java.time LocalDate ZonedDateTime ZoneId OffsetDateTime]
7475
(java.sql Timestamp)
@@ -1954,6 +1955,15 @@
19541955
(jdbc/do-commands
19551956
"ALTER TABLE reports ADD COLUMN report_type text DEFAULT 'agent' NOT NULL"))
19561957

1958+
(defn add-report-partition-indexes-on-id
1959+
[]
1960+
(doseq [{:keys [table part] :as huh} (get-temporal-partitions "reports")
1961+
:let [idx-name (str "idx_reports_id_" part)]]
1962+
(jdbc/do-commands
1963+
(format "create unique index if not exists %s on %s using btree (id)"
1964+
(jdbc/double-quote idx-name)
1965+
(jdbc/double-quote table)))))
1966+
19571967
(def migrations
19581968
"The available migrations, as a map from migration version to migration function."
19591969
{00 require-schema-migrations-table
@@ -2015,7 +2025,8 @@
20152025
; or resource events, you also update the delete-reports
20162026
; cli command.
20172027
74 reports-partitioning
2018-
75 add-report-type-to-reports})
2028+
75 add-report-type-to-reports
2029+
76 add-report-partition-indexes-on-id})
20192030

20202031
(defn desired-schema-version []
20212032
"The newest migration this PuppetDB instance knows about. Anything

src/puppetlabs/puppetdb/scf/partitioning.clj

Lines changed: 57 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,22 @@
1010
(java.time.temporal ChronoUnit)
1111
(java.time.format DateTimeFormatter)))
1212

13+
(defn get-temporal-partitions
14+
"Returns a vector of {:table full-table-name :part partition-key}
15+
values for all the existing partitions associated with the
16+
name-prefix, e.g. request for \"reports\" might produce a vector of
17+
maps like {:table \"reports_20200802z\" :part \"20200802z\"}."
18+
[name-prefix]
19+
;; FIXME: use this in other relevant places.
20+
;; FIXME: restrict to our schema.
21+
(mapv (fn [{:keys [tablename]}]
22+
{:table tablename
23+
:part (subs tablename (inc (count name-prefix)))})
24+
(jdbc/query-to-vec
25+
(str "select tablename from pg_tables where tablename ~ "
26+
(jdbc/single-quote
27+
(str "^" name-prefix "_[0-9]{8}z$"))))))
28+
1329
(defn date-suffix
1430
[date]
1531
(let [formatter (.withZone (DateTimeFormatter/BASIC_ISO_DATE) (ZoneId/of "UTC"))]
@@ -95,6 +111,12 @@
95111
(format "CREATE UNIQUE INDEX IF NOT EXISTS resource_events_hash_%s ON %s (event_hash)"
96112
iso-week-year full-table-name)])))
97113

114+
;; This var is used in testing to simulate migration 74 being applied without
115+
;; adding the idx_reports_id index to partitions. Changing this behavior in
116+
;; migration 74 should be safe because the index creation is guarded by
117+
;; 'if not exists' in both the changed migration 74 and in the newer 76.
118+
(def ^:dynamic add-report-id-idx? true)
119+
98120
(defn create-reports-partition
99121
"Creates a partition in the reports table"
100122
[date]
@@ -115,33 +137,38 @@
115137
" FOREIGN KEY (status_id) REFERENCES report_statuses(id) ON DELETE CASCADE")
116138
iso-week-year)])
117139
(fn [full-table-name iso-week-year]
118-
[(format "CREATE INDEX IF NOT EXISTS idx_reports_compound_id_%s ON %s USING btree (producer_timestamp, certname, hash) WHERE (start_time IS NOT NULL)"
119-
iso-week-year full-table-name)
120-
(format "CREATE INDEX IF NOT EXISTS idx_reports_noop_pending_%s ON %s USING btree (noop_pending) WHERE (noop_pending = true)"
121-
iso-week-year full-table-name)
122-
(format "CREATE INDEX IF NOT EXISTS idx_reports_prod_%s ON %s USING btree (producer_id)"
123-
iso-week-year full-table-name)
124-
(format "CREATE INDEX IF NOT EXISTS idx_reports_producer_timestamp_%s ON %s USING btree (producer_timestamp)"
125-
iso-week-year full-table-name)
126-
(format "CREATE INDEX IF NOT EXISTS idx_reports_producer_timestamp_by_hour_certname_%s ON %s USING btree (date_trunc('hour'::text, timezone('UTC'::text, producer_timestamp)), producer_timestamp, certname)"
127-
iso-week-year full-table-name)
128-
(format "CREATE INDEX IF NOT EXISTS reports_cached_catalog_status_on_fail_%s ON %s USING btree (cached_catalog_status) WHERE (cached_catalog_status = 'on_failure'::text)"
129-
iso-week-year full-table-name)
130-
(format "CREATE INDEX IF NOT EXISTS reports_catalog_uuid_idx_%s ON %s USING btree (catalog_uuid)"
131-
iso-week-year full-table-name)
132-
(format "CREATE INDEX IF NOT EXISTS reports_certname_idx_%s ON %s USING btree (certname)"
133-
iso-week-year full-table-name)
134-
(format "CREATE INDEX IF NOT EXISTS reports_end_time_idx_%s ON %s USING btree (end_time)"
135-
iso-week-year full-table-name)
136-
(format "CREATE INDEX IF NOT EXISTS reports_environment_id_idx_%s ON %s USING btree (environment_id)"
137-
iso-week-year full-table-name)
138-
(format "CREATE UNIQUE INDEX IF NOT EXISTS reports_hash_expr_idx_%s ON %s USING btree (encode(hash, 'hex'::text))"
139-
iso-week-year full-table-name)
140-
(format "CREATE INDEX IF NOT EXISTS reports_job_id_idx_%s ON %s USING btree (job_id) WHERE (job_id IS NOT NULL)"
141-
iso-week-year full-table-name)
142-
(format "CREATE INDEX IF NOT EXISTS reports_noop_idx_%s ON %s USING btree (noop) WHERE (noop = true)"
143-
iso-week-year full-table-name)
144-
(format "CREATE INDEX IF NOT EXISTS reports_status_id_idx_%s ON %s USING btree (status_id)"
145-
iso-week-year full-table-name)
146-
(format "CREATE INDEX IF NOT EXISTS reports_tx_uuid_expr_idx_%s ON %s USING btree (((transaction_uuid)::text))"
147-
iso-week-year full-table-name)])))
140+
(let [indexes
141+
[(format "CREATE INDEX IF NOT EXISTS idx_reports_compound_id_%s ON %s USING btree (producer_timestamp, certname, hash) WHERE (start_time IS NOT NULL)"
142+
iso-week-year full-table-name)
143+
(format "CREATE INDEX IF NOT EXISTS idx_reports_noop_pending_%s ON %s USING btree (noop_pending) WHERE (noop_pending = true)"
144+
iso-week-year full-table-name)
145+
(format "CREATE INDEX IF NOT EXISTS idx_reports_prod_%s ON %s USING btree (producer_id)"
146+
iso-week-year full-table-name)
147+
(format "CREATE INDEX IF NOT EXISTS idx_reports_producer_timestamp_%s ON %s USING btree (producer_timestamp)"
148+
iso-week-year full-table-name)
149+
(format "CREATE INDEX IF NOT EXISTS idx_reports_producer_timestamp_by_hour_certname_%s ON %s USING btree (date_trunc('hour'::text, timezone('UTC'::text, producer_timestamp)), producer_timestamp, certname)"
150+
iso-week-year full-table-name)
151+
(format "CREATE INDEX IF NOT EXISTS reports_cached_catalog_status_on_fail_%s ON %s USING btree (cached_catalog_status) WHERE (cached_catalog_status = 'on_failure'::text)"
152+
iso-week-year full-table-name)
153+
(format "CREATE INDEX IF NOT EXISTS reports_catalog_uuid_idx_%s ON %s USING btree (catalog_uuid)"
154+
iso-week-year full-table-name)
155+
(format "CREATE INDEX IF NOT EXISTS reports_certname_idx_%s ON %s USING btree (certname)"
156+
iso-week-year full-table-name)
157+
(format "CREATE INDEX IF NOT EXISTS reports_end_time_idx_%s ON %s USING btree (end_time)"
158+
iso-week-year full-table-name)
159+
(format "CREATE INDEX IF NOT EXISTS reports_environment_id_idx_%s ON %s USING btree (environment_id)"
160+
iso-week-year full-table-name)
161+
(format "CREATE UNIQUE INDEX IF NOT EXISTS reports_hash_expr_idx_%s ON %s USING btree (encode(hash, 'hex'::text))"
162+
iso-week-year full-table-name)
163+
(format "CREATE INDEX IF NOT EXISTS reports_job_id_idx_%s ON %s USING btree (job_id) WHERE (job_id IS NOT NULL)"
164+
iso-week-year full-table-name)
165+
(format "CREATE INDEX IF NOT EXISTS reports_noop_idx_%s ON %s USING btree (noop) WHERE (noop = true)"
166+
iso-week-year full-table-name)
167+
(format "CREATE INDEX IF NOT EXISTS reports_status_id_idx_%s ON %s USING btree (status_id)"
168+
iso-week-year full-table-name)
169+
(format "CREATE INDEX IF NOT EXISTS reports_tx_uuid_expr_idx_%s ON %s USING btree (((transaction_uuid)::text))"
170+
iso-week-year full-table-name)]]
171+
(if add-report-id-idx?
172+
(conj indexes (format "CREATE UNIQUE INDEX IF NOT EXISTS idx_reports_id_%s ON %s USING btree (id)"
173+
iso-week-year full-table-name))
174+
indexes)))))

test/puppetlabs/puppetdb/scf/migrate_partitioning_test.clj

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,19 @@
693693
:primary? false
694694
:user "pdb_test"}
695695
:same nil}
696+
{:left-only nil
697+
:right-only
698+
{:schema "public"
699+
:table table-name
700+
:index (str "idx_reports_id_" part-name)
701+
:index_keys ["id"]
702+
:type "btree"
703+
:unique? true
704+
:functional? false
705+
:is_partial false
706+
:primary? false
707+
:user "pdb_test"}
708+
:same nil}
696709
{:left-only nil
697710
:right-only {:schema "public"
698711
:table table-name
@@ -1263,4 +1276,41 @@
12631276
:deferrable? "NO"}
12641277
:same nil}]))
12651278
dates))}
1266-
(diff-schema-maps before-migration (schema-info-map *db*))))))
1279+
(diff-schema-maps before-migration (schema-info-map *db*))))))
1280+
1281+
(deftest migration-76-schema-diff
1282+
(clear-db-for-testing!)
1283+
;; don't add the idx_reports_id index when fast forwarding past migration 74
1284+
(binding [partitioning/add-report-id-idx? false]
1285+
(fast-forward-to-migration! 75))
1286+
1287+
(let [before-migration (schema-info-map *db*)
1288+
today (ZonedDateTime/now (ZoneId/of "UTC"))
1289+
days-range (range -4 4)
1290+
dates (map #(.plusDays today %) days-range)
1291+
part-names (map #(str/lower-case (partitioning/date-suffix %)) dates)]
1292+
(apply-migration-for-testing! 76)
1293+
1294+
(is (= {:index-diff (into
1295+
[]
1296+
cat
1297+
(map
1298+
(fn [part-name]
1299+
(let [table-name (str "reports_" part-name)]
1300+
[{:left-only nil
1301+
:right-only
1302+
{:schema "public"
1303+
:table table-name
1304+
:index (str "idx_reports_id_" part-name)
1305+
:index_keys ["id"]
1306+
:type "btree"
1307+
:unique? true
1308+
:functional? false
1309+
:is_partial false
1310+
:primary? false
1311+
:user "pdb_test"}
1312+
:same nil}]))
1313+
part-names))
1314+
:table-diff nil
1315+
:constraint-diff nil}
1316+
(diff-schema-maps before-migration (schema-info-map *db*))))))

test/puppetlabs/puppetdb/scf/migrate_test.clj

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
[puppetlabs.kitchensink.core :as kitchensink]
77
[puppetlabs.puppetdb.scf.storage-utils :as sutils
88
:refer [db-serialize]]
9+
[puppetlabs.puppetdb.testutils :as utils]
910
[cheshire.core :as json]
1011
[clojure.java.jdbc :as sql]
1112
[puppetlabs.puppetdb.scf.migrate :refer :all]
@@ -18,7 +19,9 @@
1819
[puppetlabs.kitchensink.core :as ks]
1920
[puppetlabs.puppetdb.testutils.db :refer [*db* with-test-db]]
2021
[puppetlabs.puppetdb.scf.hash :as shash]
21-
[puppetlabs.puppetdb.time :refer [ago days now to-timestamp]])
22+
[puppetlabs.puppetdb.time :refer [ago days now to-timestamp]]
23+
[puppetlabs.puppetdb.scf.partitioning :as part]
24+
[clojure.string :as str])
2225
(:import (java.time ZoneId ZonedDateTime)
2326
(java.sql Timestamp)))
2427

@@ -1414,7 +1417,15 @@
14141417
(is (= 1
14151418
(count hashes)))
14161419
(is (= expected
1417-
(first hashes)))))))
1420+
(first hashes)))
1421+
1422+
(testing "idx_reports_id index present in all partitions"
1423+
(let [assert-index-exists (fn [index indexes]
1424+
(is (true? (some #(str/includes? % index) indexes))))]
1425+
;; check that idx_reports_id is present in all paritions
1426+
(dorun (->> (utils/partition-names "reports")
1427+
(map utils/table-indexes)
1428+
(map (partial assert-index-exists "idx_reports_id"))))))))))
14181429

14191430
(deftest migration-75-add-report-type-column-with-default
14201431
(testing "reports should get default value of 'agent' for report_type"
@@ -1444,3 +1455,44 @@
14441455
(apply-migration-for-testing! 75)
14451456
(is (= "agent" (-> (query-to-vec "select * from reports")
14461457
first :report_type)))))))
1458+
1459+
(deftest migration-76-is-a-no-op-if-74-already-added-idx-reports-id
1460+
(testing "Index created with new version of migration 74"
1461+
(jdbc/with-db-connection *db*
1462+
(clear-db-for-testing!)
1463+
(let [assert-index-exists (fn [index indexes]
1464+
(is (true? (some #(str/includes? % index) indexes))))
1465+
;; check that idx_reports_id is present in all paritions
1466+
check-idx-reports-id #(dorun
1467+
(->>
1468+
(utils/partition-names "reports")
1469+
(map utils/table-indexes)
1470+
(map (partial assert-index-exists "idx_reports_id"))))]
1471+
(fast-forward-to-migration! 75)
1472+
;; migration 74 should have added the parition indexes
1473+
(check-idx-reports-id)
1474+
1475+
(apply-migration-for-testing! 76)
1476+
;; migration 76 should be a no-op
1477+
(check-idx-reports-id)))))
1478+
1479+
(deftest migration-76-adds-report-id-idx-when-not-added-by-migration-74
1480+
(testing "All report paritions have idx_reports_id index when old version of 74 applied"
1481+
(jdbc/with-db-connection *db*
1482+
(clear-db-for-testing!)
1483+
;; don't add the idx_reports_id index when fast forwarding past migration 74
1484+
(binding [part/add-report-id-idx? false]
1485+
(fast-forward-to-migration! 75))
1486+
(let [assert-no-index (fn [index indexes]
1487+
(is (nil? (some #(str/includes? % index) indexes))))]
1488+
;; check that idx_reports_id wasn't added by migration 74
1489+
(dorun (->> (utils/partition-names "reports")
1490+
(map utils/table-indexes)
1491+
(map (partial assert-no-index "idx_reports_id")))))
1492+
(apply-migration-for-testing! 76)
1493+
(let [assert-index-exists (fn [index indexes]
1494+
(is (some #(str/includes? % index) indexes)))]
1495+
;; check that idx_reports_id is now present in all paritions
1496+
(dorun (->> (utils/partition-names "reports")
1497+
(map utils/table-indexes)
1498+
(map (partial assert-index-exists "idx_reports_id"))))))))

test/puppetlabs/puppetdb/scf/storage_test.clj

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1749,7 +1749,18 @@
17491749
"COMMIT TRANSACTION")
17501750
(store-example-report! report timestamp)
17511751
(is (= [{:certname certname}]
1752-
(query-to-vec ["SELECT certname FROM reports"]))))
1752+
(query-to-vec ["SELECT certname FROM reports"])))
1753+
1754+
(testing "Index is created in on demand partitions"
1755+
(let [assert-index-exists (fn [index indexes]
1756+
(is (true? (some #(str/includes? % index) indexes))))
1757+
1758+
partition (tu/partition-names "reports")]
1759+
;; check that idx_reports_id index is present in on demand paritions
1760+
(is (= 1 (count partition)))
1761+
(dorun (->> partition
1762+
(map tu/table-indexes)
1763+
(map (partial assert-index-exists "idx_reports_id_")))))))
17531764

17541765
(deftest-db report-with-event-timestamp
17551766
(let [z-report (update-event-timestamps report "2011-01-01T12:00:01Z")

test/puppetlabs/puppetdb/testutils.clj

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,3 +428,25 @@
428428

429429
(def default-timeout-ms
430430
(* 1000 60 5))
431+
432+
(defn partition-names
433+
"Return all partition names given the parent table name"
434+
[table]
435+
(let [inhparent (str "public." table)]
436+
(->> ["SELECT inhrelid::regclass AS child
437+
FROM pg_catalog.pg_inherits
438+
WHERE inhparent = ?::regclass;"
439+
inhparent]
440+
jdbc/query-to-vec
441+
(map :child)
442+
(map #(.toString %)))))
443+
444+
(defn table-indexes
445+
"Return the index definitions for the given table name"
446+
[table]
447+
(->> ["SELECT tablename, indexdef
448+
FROM pg_indexes
449+
WHERE schemaname = 'public' AND tablename = ?;"
450+
table]
451+
jdbc/query-to-vec
452+
(map :indexdef)))

0 commit comments

Comments
 (0)