diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 980acc33dc..ceb7b603b0 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@3e71e7135de310b70bc22dccb4d275acde8e055a # v2 + uses: taiki-e/install-action@37461a1de4134bec919a737ee9ba018e72011b7c # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date diff --git a/Cargo.lock b/Cargo.lock index e84129087e..c140347448 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -166,7 +166,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -273,7 +273,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -295,7 +295,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -306,7 +306,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -359,7 +359,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -517,7 +517,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.71", + "syn 2.0.72", "which", ] @@ -550,7 +550,7 @@ checksum = "1657dce144574f921af10a92876a96f0ca05dd830900598d21d91c8e4cf78f74" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -698,11 +698,27 @@ dependencies = [ "zeroize", ] +[[package]] +name = "bootstrap-agent-api" +version = "0.1.0" +dependencies = [ + "dropshot", + "nexus-client", + "omicron-common", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "schemars", + "serde", + "sled-agent-types", + "sled-hardware-types", +] + [[package]] name = "bootstrap-agent-client" version = "0.1.0" dependencies = [ "omicron-common", + "omicron-uuid-kinds", "omicron-workspace-hack", "oxnet", "progenitor", @@ -853,12 +869,12 @@ dependencies = [ [[package]] name = "cargo_toml" -version = "0.20.3" +version = "0.20.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4895c018bb228aa6b3ba1a0285543fcb4b704734c3fb1f72afaa75aa769500c1" +checksum = "ad639525b1c67b6a298f378417b060fbc04618bea559482a8484381cce27d965" dependencies = [ "serde", - "toml 0.8.14", + "toml 0.8.15", ] [[package]] @@ -1048,7 +1064,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1072,6 +1088,19 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" +[[package]] +name = "cockroach-admin-api" +version = "0.1.0" +dependencies = [ + "cockroach-admin-types", + "dropshot", + "omicron-common", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "schemars", + "serde", +] + [[package]] name = "cockroach-admin-client" version = "0.1.0" @@ -1086,6 +1115,20 @@ dependencies = [ "slog", ] +[[package]] +name = "cockroach-admin-types" +version = "0.1.0" +dependencies = [ + "chrono", + "csv", + "omicron-common", + "omicron-workspace-hack", + "proptest", + "schemars", + "serde", + "test-strategy", +] + [[package]] name = "colorchoice" version = "1.0.1" @@ -1533,7 +1576,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1557,7 +1600,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1568,7 +1611,7 @@ checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" dependencies = [ "darling_core", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1602,7 +1645,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1646,7 +1689,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1679,7 +1722,7 @@ checksum = "5fe87ce4529967e0ba1dcf8450bab64d97dfd5010a6256187ffe2e43e6f0e049" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1700,7 +1743,7 @@ checksum = "62d671cc41a825ebabc75757b62d3d168c577f9149b2d49ece1dad1f72119d25" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1721,7 +1764,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1731,7 +1774,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" dependencies = [ "derive_builder_core", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1744,7 +1787,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version 0.4.0", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1808,7 +1851,7 @@ dependencies = [ "diesel_table_macro_syntax", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1817,7 +1860,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc5557efc453706fed5e4fa85006fe9817c224c3f480a34c7e5959fd700921c5" dependencies = [ - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1945,7 +1988,7 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "toml 0.8.14", + "toml 0.8.15", "trust-dns-client", "trust-dns-proto", "trust-dns-resolver", @@ -2027,14 +2070,14 @@ dependencies = [ "serde", "serde_json", "slog", - "toml 0.8.14", + "toml 0.8.15", "uuid", ] [[package]] name = "dropshot" version = "0.10.2-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#9fef3961c0b89aa8ab8e186dc0c89f8f4f811eea" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#7b594d01f47ca783c5d4a25ca2b256602580fe92" dependencies = [ "async-stream", "async-trait", @@ -2070,7 +2113,7 @@ dependencies = [ "slog-term", "tokio", "tokio-rustls 0.25.0", - "toml 0.8.14", + "toml 0.8.15", "usdt", "uuid", "version_check", @@ -2080,14 +2123,14 @@ dependencies = [ [[package]] name = "dropshot_endpoint" version = "0.10.2-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#9fef3961c0b89aa8ab8e186dc0c89f8f4f811eea" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#7b594d01f47ca783c5d4a25ca2b256602580fe92" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", "serde", "serde_tokenstream", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -2231,9 +2274,10 @@ dependencies = [ "russh-keys", "serde", "serde_json", + "sled-agent-types", "socket2 0.5.7", "tokio", - "toml 0.8.14", + "toml 0.8.15", "trust-dns-resolver", "uuid", ] @@ -2493,7 +2537,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -2605,7 +2649,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -3492,7 +3536,7 @@ dependencies = [ "smf", "thiserror", "tokio", - "toml 0.8.14", + "toml 0.8.15", "uuid", "whoami", "zone 0.3.0", @@ -3876,7 +3920,7 @@ version = "0.1.0" source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" dependencies = [ "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -4002,7 +4046,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.5", ] [[package]] @@ -4369,7 +4413,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -4542,7 +4586,7 @@ dependencies = [ "serde_json", "serde_with", "tokio-postgres", - "toml 0.8.14", + "toml 0.8.15", "uuid", ] @@ -4746,7 +4790,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -4972,7 +5016,7 @@ version = "0.1.0" dependencies = [ "omicron-workspace-hack", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -5143,7 +5187,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -5307,6 +5351,8 @@ dependencies = [ "camino", "chrono", "clap", + "cockroach-admin-api", + "cockroach-admin-types", "csv", "dropshot", "expectorate", @@ -5335,7 +5381,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", - "toml 0.8.14", + "toml 0.8.15", "url", ] @@ -5383,7 +5429,7 @@ dependencies = [ "test-strategy", "thiserror", "tokio", - "toml 0.8.14", + "toml 0.8.15", "uuid", ] @@ -5408,7 +5454,7 @@ dependencies = [ "slog", "thiserror", "tokio", - "toml 0.8.14", + "toml 0.8.15", "uuid", ] @@ -5443,7 +5489,7 @@ dependencies = [ "subprocess", "tokio", "tokio-postgres", - "toml 0.8.14", + "toml 0.8.15", ] [[package]] @@ -5485,7 +5531,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-tungstenite 0.20.1", - "toml 0.8.14", + "toml 0.8.15", "uuid", ] @@ -5712,7 +5758,7 @@ dependencies = [ "tar", "thiserror", "tokio", - "toml 0.8.14", + "toml 0.8.15", "walkdir", ] @@ -5760,7 +5806,7 @@ dependencies = [ "slog-term", "tar", "tokio", - "toml 0.8.14", + "toml 0.8.15", "tufaceous-lib", ] @@ -5780,6 +5826,7 @@ dependencies = [ "async-trait", "base64 0.22.1", "bootstore", + "bootstrap-agent-api", "bootstrap-agent-client", "bytes", "camino", @@ -5844,6 +5891,7 @@ dependencies = [ "serde_json", "sha3", "sled-agent-client", + "sled-agent-types", "sled-hardware", "sled-hardware-types", "sled-storage", @@ -5863,7 +5911,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "toml 0.8.14", + "toml 0.8.15", "usdt", "uuid", "zeroize", @@ -6012,7 +6060,7 @@ dependencies = [ "string_cache", "subtle", "syn 1.0.109", - "syn 2.0.71", + "syn 2.0.72", "time", "time-macros", "tokio", @@ -6022,7 +6070,7 @@ dependencies = [ "toml 0.7.8", "toml_datetime", "toml_edit 0.19.15", - "toml_edit 0.22.14", + "toml_edit 0.22.16", "tracing", "trust-dns-proto", "unicode-bidi", @@ -6104,8 +6152,10 @@ version = "0.1.0" dependencies = [ "anyhow", "atomicwrites", + "bootstrap-agent-api", "camino", "clap", + "cockroach-admin-api", "dns-server-api", "dropshot", "fs-err", @@ -6135,9 +6185,9 @@ dependencies = [ [[package]] name = "openssl" -version = "0.10.64" +version = "0.10.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1" dependencies = [ "bitflags 2.5.0", "cfg-if", @@ -6156,7 +6206,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -6167,9 +6217,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.102" +version = "0.9.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" dependencies = [ "cc", "libc", @@ -6307,8 +6357,8 @@ dependencies = [ "oximeter-macro-impl", "oximeter-timeseries-macro", "prettyplease", - "syn 2.0.71", - "toml 0.8.14", + "syn 2.0.72", + "toml 0.8.15", "uuid", ] @@ -6363,7 +6413,7 @@ dependencies = [ "subprocess", "thiserror", "tokio", - "toml 0.8.14", + "toml 0.8.15", "uuid", ] @@ -6440,9 +6490,9 @@ dependencies = [ "serde_json", "slog-error-chain", "strum", - "syn 2.0.71", + "syn 2.0.72", "thiserror", - "toml 0.8.14", + "toml 0.8.15", "trybuild", "uuid", ] @@ -6479,7 +6529,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -6515,7 +6565,7 @@ dependencies = [ "oximeter-impl", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -6671,7 +6721,7 @@ dependencies = [ "regex", "regex-syntax 0.8.3", "structmeta 0.3.0", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -6758,9 +6808,9 @@ dependencies = [ [[package]] name = "peg" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a625d12ad770914cbf7eff6f9314c3ef803bfe364a1b20bc36ddf56673e71e5" +checksum = "295283b02df346d1ef66052a757869b2876ac29a6bb0ac3f5f7cd44aebe40e8f" dependencies = [ "peg-macros", "peg-runtime", @@ -6768,9 +6818,9 @@ dependencies = [ [[package]] name = "peg-macros" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f241d42067ed3ab6a4fece1db720838e1418f36d868585a27931f95d6bc03582" +checksum = "bdad6a1d9cf116a059582ce415d5f5566aabcd4008646779dab7fdc2a9a9d426" dependencies = [ "peg-runtime", "proc-macro2", @@ -6839,7 +6889,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -6909,7 +6959,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -7173,7 +7223,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -7269,7 +7319,7 @@ dependencies = [ "schemars", "serde", "serde_json", - "syn 2.0.71", + "syn 2.0.72", "thiserror", "typify", "unicode-ident", @@ -7289,7 +7339,7 @@ dependencies = [ "serde_json", "serde_tokenstream", "serde_yaml", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -7780,7 +7830,7 @@ checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -8034,7 +8084,7 @@ dependencies = [ "regex", "relative-path", "rustc_version 0.4.0", - "syn 2.0.71", + "syn 2.0.72", "unicode-ident", ] @@ -8182,7 +8232,7 @@ dependencies = [ "serde", "tempfile", "thiserror", - "toml 0.8.14", + "toml 0.8.15", "toolchain_find", ] @@ -8426,7 +8476,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -8452,7 +8502,7 @@ checksum = "7f81c2fde025af7e69b1d1420531c8a8811ca898919db177141a85313b1cb932" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -8572,7 +8622,7 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -8583,7 +8633,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -8633,7 +8683,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -8654,7 +8704,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -8696,7 +8746,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -8889,6 +8939,30 @@ dependencies = [ "uuid", ] +[[package]] +name = "sled-agent-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "bootstore", + "camino", + "camino-tempfile", + "nexus-client", + "omicron-common", + "omicron-test-utils", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "oxnet", + "rcgen", + "schemars", + "serde", + "serde_json", + "sled-hardware-types", + "slog", + "thiserror", + "toml 0.8.15", +] + [[package]] name = "sled-hardware" version = "0.1.0" @@ -9038,7 +9112,7 @@ source = "git+https://github.com/oxidecomputer/slog-error-chain?branch=main#15f6 dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -9165,7 +9239,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -9207,7 +9281,7 @@ dependencies = [ "slog-dtrace", "thiserror", "tokio", - "toml 0.8.14", + "toml 0.8.15", ] [[package]] @@ -9263,7 +9337,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -9273,7 +9347,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2ff9eaf853dec4c8802325d8b6d3dffa86cc707fd7a1a4cdbf416e13b061787a" dependencies = [ "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -9359,7 +9433,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive 0.2.0", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -9371,7 +9445,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive 0.3.0", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -9382,7 +9456,7 @@ checksum = "a60bcaff7397072dca0017d1db428e30d5002e00b6847703e2e42005c95fbe00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -9393,7 +9467,7 @@ checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -9428,7 +9502,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -9441,7 +9515,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -9488,9 +9562,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.71" +version = "2.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462" +checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" dependencies = [ "proc-macro2", "quote", @@ -9664,7 +9738,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta 0.2.0", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -9695,7 +9769,7 @@ checksum = "d20468752b09f49e909e55a5d338caa8bedf615594e9d80bc4c565d30faf798c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -9832,7 +9906,7 @@ checksum = "8d9ef545650e79f30233c0003bcc2504d7efac6dad25fca40744de773fe2049c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -9878,9 +9952,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.38.0" +version = "1.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" +checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df" dependencies = [ "backtrace", "bytes", @@ -9903,7 +9977,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -10034,14 +10108,14 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.14" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f49eb2ab21d2f26bd6db7bf383edc527a7ebaee412d17af4d40fdccd442f335" +checksum = "ac2caab0bf757388c6c0ae23b3293fdb463fee59434529014f85e3263b995c28" dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit 0.22.14", + "toml_edit 0.22.16", ] [[package]] @@ -10068,9 +10142,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.22.14" +version = "0.22.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f21c7aaf97f1bd9ca9d4f9e73b0a6c74bd5afef56f2bc931943a6e1c37e04e38" +checksum = "278f3d518e152219c994ce877758516bca5e118eaed6996192a774fb9fbf0788" dependencies = [ "indexmap 2.2.6", "serde", @@ -10180,7 +10254,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -10287,16 +10361,16 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "trybuild" -version = "1.0.96" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33a5f13f11071020bb12de7a16b925d2d58636175c20c11dc5f96cb64bb6c9b3" +checksum = "5b1e5645f2ee8025c2f1d75e1138f2dd034d74e6ba54620f3c569ba2a2a1ea06" dependencies = [ "glob", "serde", "serde_derive", "serde_json", "termcolor", - "toml 0.8.14", + "toml 0.8.15", ] [[package]] @@ -10356,7 +10430,7 @@ dependencies = [ "slog", "tar", "tokio", - "toml 0.8.14", + "toml 0.8.15", "tough", "url", "zip", @@ -10457,7 +10531,7 @@ dependencies = [ "semver 1.0.23", "serde", "serde_json", - "syn 2.0.71", + "syn 2.0.72", "thiserror", "unicode-ident", ] @@ -10474,7 +10548,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.71", + "syn 2.0.72", "typify-impl", ] @@ -10656,9 +10730,9 @@ dependencies = [ [[package]] name = "url" -version = "2.5.0" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" dependencies = [ "form_urlencoded", "idna 0.5.0", @@ -10691,7 +10765,7 @@ dependencies = [ "proc-macro2", "quote", "serde_tokenstream", - "syn 2.0.71", + "syn 2.0.72", "usdt-impl", ] @@ -10709,7 +10783,7 @@ dependencies = [ "quote", "serde", "serde_json", - "syn 2.0.71", + "syn 2.0.72", "thiserror", "thread-id", "version_check", @@ -10725,7 +10799,7 @@ dependencies = [ "proc-macro2", "quote", "serde_tokenstream", - "syn 2.0.71", + "syn 2.0.72", "usdt-impl", ] @@ -10743,9 +10817,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.9.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5de17fd2f7da591098415cff336e12965a28061ddace43b59cb3c430179c9439" +checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" dependencies = [ "getrandom 0.2.14", "serde", @@ -10904,7 +10978,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", "wasm-bindgen-shared", ] @@ -10938,7 +11012,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -11041,8 +11115,8 @@ dependencies = [ "textwrap", "tokio", "tokio-util", - "toml 0.8.14", - "toml_edit 0.22.14", + "toml 0.8.15", + "toml_edit 0.22.16", "tui-tree-widget", "unicode-width", "update-engine", @@ -11072,7 +11146,7 @@ dependencies = [ "slog", "thiserror", "tokio", - "toml 0.8.14", + "toml 0.8.15", "update-engine", ] @@ -11140,6 +11214,7 @@ dependencies = [ "omicron-ddm-admin-client", "omicron-passwords", "omicron-test-utils", + "omicron-uuid-kinds", "omicron-workspace-hack", "once_cell", "openapi-lint", @@ -11160,7 +11235,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "toml 0.8.14", + "toml 0.8.15", "tough", "trust-dns-resolver", "tufaceous", @@ -11183,6 +11258,7 @@ dependencies = [ "gateway-client", "omicron-common", "omicron-passwords", + "omicron-uuid-kinds", "omicron-workspace-hack", "schemars", "serde", @@ -11198,6 +11274,7 @@ dependencies = [ "chrono", "installinator-common", "omicron-common", + "omicron-uuid-kinds", "omicron-workspace-hack", "progenitor", "regress", @@ -11492,7 +11569,7 @@ dependencies = [ "tabled", "tar", "tokio", - "toml 0.8.14", + "toml 0.8.15", "usdt", ] @@ -11541,7 +11618,7 @@ checksum = "125139de3f6b9d625c39e2efdd73d41bdac468ccd556556440e322be0e1bbd91" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -11552,7 +11629,7 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -11572,7 +11649,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -11619,7 +11696,10 @@ dependencies = [ "dropshot", "illumos-utils", "omicron-common", + "omicron-sled-agent", "omicron-workspace-hack", + "serde_json", + "sled-hardware-types", "slog", "tokio", "uzers", diff --git a/Cargo.toml b/Cargo.toml index e46cdeb972..1e9cc6ba0e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,8 @@ members = [ "clients/sled-agent-client", "clients/wicketd-client", "cockroach-admin", + "cockroach-admin/api", + "cockroach-admin/types", "common", "dev-tools/crdb-seed", "dev-tools/omdb", @@ -73,6 +75,8 @@ members = [ "passwords", "rpaths", "sled-agent", + "sled-agent/bootstrap-agent-api", + "sled-agent/types", "sled-hardware", "sled-hardware/types", "sled-storage", @@ -110,6 +114,8 @@ default-members = [ "clients/sled-agent-client", "clients/wicketd-client", "cockroach-admin", + "cockroach-admin/api", + "cockroach-admin/types", "common", "dev-tools/crdb-seed", "dev-tools/omdb", @@ -170,6 +176,8 @@ default-members = [ "passwords", "rpaths", "sled-agent", + "sled-agent/bootstrap-agent-api", + "sled-agent/types", "sled-hardware", "sled-hardware/types", "sled-storage", @@ -250,6 +258,7 @@ bb8 = "0.8.5" bcs = "0.1.6" bincode = "1.3.3" bootstore = { path = "bootstore" } +bootstrap-agent-api = { path = "sled-agent/bootstrap-agent-api" } bootstrap-agent-client = { path = "clients/bootstrap-agent-client" } buf-list = { version = "1.0.3", features = ["tokio1"] } byteorder = "1.5.0" @@ -263,7 +272,9 @@ ciborium = "0.2.2" cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } clap = { version = "4.5", features = ["cargo", "derive", "env", "wrap_help"] } +cockroach-admin-api = { path = "cockroach-admin/api" } cockroach-admin-client = { path = "clients/cockroach-admin-client" } +cockroach-admin-types = { path = "cockroach-admin/types" } colored = "2.1" const_format = "0.2.32" cookie = "0.18" @@ -411,7 +422,7 @@ partial-io = { version = "0.5.4", features = ["proptest1", "tokio1"] } parse-size = "1.0.0" paste = "1.0.15" percent-encoding = "2.3.1" -peg = "0.8.3" +peg = "0.8.4" pem = "3.0" petgraph = "0.6.5" postgres-protocol = "0.6.6" @@ -472,6 +483,7 @@ similar-asserts = "1.5.0" # server zones. sled = "=0.34.7" sled-agent-client = { path = "clients/sled-agent-client" } +sled-agent-types = { path = "sled-agent/types" } sled-hardware = { path = "sled-hardware" } sled-hardware-types = { path = "sled-hardware/types" } sled-storage = { path = "sled-storage" } @@ -507,29 +519,29 @@ textwrap = "0.16.1" test-strategy = "0.3.1" thiserror = "1.0" tofino = { git = "https://github.com/oxidecomputer/tofino", branch = "main" } -tokio = "1.37.0" +tokio = "1.38.1" tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } tokio-stream = "0.1.15" tokio-tungstenite = "0.20" -tokio-util = { version = "0.7.10", features = ["io", "io-util"] } -toml = "0.8.12" -toml_edit = "0.22.12" +tokio-util = { version = "0.7.11", features = ["io", "io-util"] } +toml = "0.8.15" +toml_edit = "0.22.16" tough = { version = "0.17.1", features = [ "http" ] } trust-dns-client = "0.22" trust-dns-proto = "0.22" trust-dns-resolver = "0.22" trust-dns-server = "0.22" -trybuild = "1.0.91" +trybuild = "1.0.97" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } tui-tree-widget = "0.21.0" typed-rng = { path = "typed-rng" } -unicode-width = "0.1.11" +unicode-width = "0.1.13" update-common = { path = "update-common" } update-engine = { path = "update-engine" } -url = "2.5.0" +url = "2.5.2" usdt = "0.5.0" -uuid = { version = "1.8.0", features = ["serde", "v4"] } +uuid = { version = "1.10.0", features = ["serde", "v4"] } uzers = "0.11" walkdir = "2.5" whoami = "1.5" diff --git a/clients/bootstrap-agent-client/Cargo.toml b/clients/bootstrap-agent-client/Cargo.toml index 0b1d2fab4b..e152e31966 100644 --- a/clients/bootstrap-agent-client/Cargo.toml +++ b/clients/bootstrap-agent-client/Cargo.toml @@ -18,5 +18,6 @@ serde_json.workspace = true sled-hardware-types.workspace = true slog.workspace = true uuid.workspace = true +omicron-uuid-kinds.workspace = true omicron-workspace-hack.workspace = true oxnet.workspace = true diff --git a/clients/bootstrap-agent-client/src/lib.rs b/clients/bootstrap-agent-client/src/lib.rs index b29f4e69f4..c737283d84 100644 --- a/clients/bootstrap-agent-client/src/lib.rs +++ b/clients/bootstrap-agent-client/src/lib.rs @@ -24,6 +24,8 @@ progenitor::generate_api!( replace = { AllowedSourceIps = omicron_common::api::external::AllowedSourceIps, ImportExportPolicy = omicron_common::api::external::ImportExportPolicy, + TypedUuidForRackInitKind = omicron_uuid_kinds::RackInitUuid, + TypedUuidForRackResetKind = omicron_uuid_kinds::RackResetUuid, } ); diff --git a/clients/wicketd-client/Cargo.toml b/clients/wicketd-client/Cargo.toml index 8e50964e59..0e55acd8bb 100644 --- a/clients/wicketd-client/Cargo.toml +++ b/clients/wicketd-client/Cargo.toml @@ -11,6 +11,7 @@ workspace = true chrono.workspace = true installinator-common.workspace = true omicron-common.workspace = true +omicron-uuid-kinds.workspace = true omicron-workspace-hack.workspace = true progenitor.workspace = true regress.workspace = true diff --git a/clients/wicketd-client/src/lib.rs b/clients/wicketd-client/src/lib.rs index bb377de31e..7a07ecd6a5 100644 --- a/clients/wicketd-client/src/lib.rs +++ b/clients/wicketd-client/src/lib.rs @@ -81,6 +81,8 @@ progenitor::generate_api!( StepEventForInstallinatorSpec = installinator_common::StepEvent, StepEventForWicketdEngineSpec = wicket_common::update_events::StepEvent, SwitchLocation = omicron_common::api::internal::shared::SwitchLocation, + TypedUuidForRackInitKind = omicron_uuid_kinds::RackInitUuid, + TypedUuidForRackResetKind = omicron_uuid_kinds::RackResetUuid, UpdateSimulatedResult = wicket_common::rack_update::UpdateSimulatedResult, UpdateTestError = wicket_common::rack_update::UpdateTestError, UplinkPreflightStepId = wicket_common::preflight_check::UplinkPreflightStepId, diff --git a/cockroach-admin/Cargo.toml b/cockroach-admin/Cargo.toml index 07f9807463..1738fd98e5 100644 --- a/cockroach-admin/Cargo.toml +++ b/cockroach-admin/Cargo.toml @@ -12,6 +12,8 @@ anyhow.workspace = true camino.workspace = true chrono.workspace = true clap.workspace = true +cockroach-admin-api.workspace = true +cockroach-admin-types.workspace = true csv.workspace = true dropshot.workspace = true http.workspace = true diff --git a/cockroach-admin/api/Cargo.toml b/cockroach-admin/api/Cargo.toml new file mode 100644 index 0000000000..f0434856d2 --- /dev/null +++ b/cockroach-admin/api/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "cockroach-admin-api" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +cockroach-admin-types.workspace = true +dropshot.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +omicron-workspace-hack.workspace = true +schemars.workspace = true +serde.workspace = true diff --git a/cockroach-admin/api/src/lib.rs b/cockroach-admin/api/src/lib.rs new file mode 100644 index 0000000000..192ff56f04 --- /dev/null +++ b/cockroach-admin/api/src/lib.rs @@ -0,0 +1,76 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use cockroach_admin_types::{NodeDecommission, NodeStatus}; +use dropshot::{HttpError, HttpResponseOk, RequestContext, TypedBody}; +use omicron_uuid_kinds::OmicronZoneUuid; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[dropshot::api_description { + module = "cockroach_admin_api_mod", +}] +pub trait CockroachAdminApi { + type Context; + + /// Get the status of all nodes in the CRDB cluster. + #[endpoint { + method = GET, + path = "/node/status", + }] + async fn node_status( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Get the CockroachDB node ID of the local cockroach instance. + #[endpoint { + method = GET, + path = "/node/id", + }] + async fn local_node_id( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Decommission a node from the CRDB cluster. + #[endpoint { + method = POST, + path = "/node/decommission", + }] + async fn node_decommission( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError>; +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct ClusterNodeStatus { + pub all_nodes: Vec, +} + +/// CockroachDB Node ID +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct LocalNodeId { + /// The ID of this Omicron zone. + /// + /// This is included to ensure correctness even if a socket address on a + /// sled is reused for a different zone; if our caller is trying to + /// determine the node ID for a particular Omicron CockroachDB zone, they'll + /// contact us by socket address. We include our zone ID in the response for + /// their confirmation that we are the zone they intended to contact. + pub zone_id: OmicronZoneUuid, + // CockroachDB node IDs are integers, in practice, but our use of them is as + // input and output to the `cockroach` CLI. We use a string which is a bit + // more natural (no need to parse CLI output or stringify an ID to send it + // as input) and leaves open the door for the format to change in the + // future. + pub node_id: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct NodeId { + pub node_id: String, +} diff --git a/cockroach-admin/src/bin/cockroach-admin.rs b/cockroach-admin/src/bin/cockroach-admin.rs index 0399c8bbb0..ee6d8f4aa9 100644 --- a/cockroach-admin/src/bin/cockroach-admin.rs +++ b/cockroach-admin/src/bin/cockroach-admin.rs @@ -19,9 +19,6 @@ use std::net::SocketAddrV6; #[derive(Debug, Parser)] #[clap(name = "cockroach-admin", about = "Omicron CRDB cluster admin server")] enum Args { - /// Print the OpenAPI Spec document and exit - Openapi, - /// Start the CRDB admin server Run { /// Path to the `cockroach` CLI @@ -57,8 +54,6 @@ async fn main_impl() -> Result<(), CmdError> { let args = Args::parse(); match args { - Args::Openapi => omicron_cockroach_admin::run_openapi() - .map_err(|e| CmdError::Failure(anyhow!(e))), Args::Run { path_to_cockroach_binary, cockroach_address, diff --git a/cockroach-admin/src/cockroach_cli.rs b/cockroach-admin/src/cockroach_cli.rs index 1951866ce7..b812cf9749 100644 --- a/cockroach-admin/src/cockroach_cli.rs +++ b/cockroach-admin/src/cockroach_cli.rs @@ -3,20 +3,14 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use camino::Utf8PathBuf; -use chrono::DateTime; -use chrono::NaiveDateTime; -use chrono::Utc; +use cockroach_admin_types::NodeDecommission; +use cockroach_admin_types::NodeStatus; use dropshot::HttpError; use illumos_utils::output_to_exec_error; use illumos_utils::ExecutionError; -use schemars::JsonSchema; -use serde::de; -use serde::Deserialize; -use serde::Serialize; use slog_error_chain::InlineErrorChain; use slog_error_chain::SlogInlineError; use std::io; -use std::net::SocketAddr; use std::net::SocketAddrV6; use tokio::process::Command; @@ -139,463 +133,16 @@ impl CockroachCli { } } -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub struct NodeStatus { - pub node_id: String, - pub address: SocketAddr, - pub sql_address: SocketAddr, - pub build: String, - pub started_at: DateTime, - pub updated_at: DateTime, - pub locality: String, - pub is_available: bool, - pub is_live: bool, -} - -// Slightly different `NodeStatus` that matches what we get from `cockroach`: -// timestamps are a fixed format with no timezone (but are actually UTC), so we -// have a custom deserializer, and the ID column is `id` instead of `node_id`. -#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] -struct CliNodeStatus { - id: String, - address: SocketAddr, - sql_address: SocketAddr, - build: String, - #[serde(deserialize_with = "parse_cockroach_cli_timestamp")] - started_at: DateTime, - #[serde(deserialize_with = "parse_cockroach_cli_timestamp")] - updated_at: DateTime, - locality: String, - is_available: bool, - is_live: bool, -} - -impl From for NodeStatus { - fn from(cli: CliNodeStatus) -> Self { - Self { - node_id: cli.id, - address: cli.address, - sql_address: cli.sql_address, - build: cli.build, - started_at: cli.started_at, - updated_at: cli.updated_at, - locality: cli.locality, - is_available: cli.is_available, - is_live: cli.is_live, - } - } -} - -fn parse_cockroach_cli_timestamp<'de, D>( - d: D, -) -> Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - struct CockroachTimestampVisitor; - impl<'de> de::Visitor<'de> for CockroachTimestampVisitor { - type Value = DateTime; - - fn expecting( - &self, - formatter: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - formatter.write_str("a Cockroach CLI timestamp") - } - - fn visit_str(self, v: &str) -> Result - where - E: de::Error, - { - let dt = NaiveDateTime::parse_from_str(v, "%Y-%m-%d %H:%M:%S%.f") - .map_err(E::custom)?; - Ok(DateTime::from_naive_utc_and_offset(dt, Utc)) - } - } - - d.deserialize_str(CockroachTimestampVisitor) -} - -impl NodeStatus { - pub fn parse_from_csv(data: &[u8]) -> Result, csv::Error> { - let mut statuses = Vec::new(); - let mut reader = csv::Reader::from_reader(io::Cursor::new(data)); - for result in reader.deserialize() { - let record: CliNodeStatus = result?; - statuses.push(record.into()); - } - Ok(statuses) - } -} - -// The cockroach CLI and `crdb_internal.gossip_liveness` table use a string for -// node membership, but there are only three meaningful values per -// https://github.com/cockroachdb/cockroach/blob/0c92c710d2baadfdc5475be8d2238cf26cb152ca/pkg/kv/kvserver/liveness/livenesspb/liveness.go#L96, -// so we'll convert into a Rust enum and leave the "unknown" case for future -// changes that expand or reword these values. -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] -#[serde(tag = "state", rename_all = "lowercase")] -pub enum NodeMembership { - Active, - Decommissioning, - Decommissioned, - Unknown { value: String }, -} - -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub struct NodeDecommission { - pub node_id: String, - pub is_live: bool, - pub replicas: i64, - pub is_decommissioning: bool, - pub membership: NodeMembership, - pub is_draining: bool, - pub notes: Vec, -} - -// Slightly different `NodeDecommission` that matches what we get from -// `cockroach`: this omites `notes`, which isn't really a CSV field at all, but -// is instead where we collect the non-CSV string output from the CLI, uses -// a custom deserializer for `membership` to handle unknown variants, and the ID -// column is `id` instead of `node_id`. -#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] -struct CliNodeDecommission { - pub id: String, - pub is_live: bool, - pub replicas: i64, - pub is_decommissioning: bool, - #[serde(deserialize_with = "parse_node_membership")] - pub membership: NodeMembership, - pub is_draining: bool, -} - -impl From<(CliNodeDecommission, Vec)> for NodeDecommission { - fn from((cli, notes): (CliNodeDecommission, Vec)) -> Self { - Self { - node_id: cli.id, - is_live: cli.is_live, - replicas: cli.replicas, - is_decommissioning: cli.is_decommissioning, - membership: cli.membership, - is_draining: cli.is_draining, - notes, - } - } -} - -fn parse_node_membership<'de, D>(d: D) -> Result -where - D: serde::Deserializer<'de>, -{ - struct CockroachNodeMembershipVisitor; - - impl<'de> de::Visitor<'de> for CockroachNodeMembershipVisitor { - type Value = NodeMembership; - - fn expecting( - &self, - formatter: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - formatter.write_str("a Cockroach node membership string") - } - - fn visit_str(self, v: &str) -> Result - where - E: de::Error, - { - let membership = match v { - "active" => NodeMembership::Active, - "decommissioning" => NodeMembership::Decommissioning, - "decommissioned" => NodeMembership::Decommissioned, - _ => NodeMembership::Unknown { value: v.to_string() }, - }; - Ok(membership) - } - } - - d.deserialize_str(CockroachNodeMembershipVisitor) -} - -impl NodeDecommission { - pub fn parse_from_csv(data: &[u8]) -> Result { - // Reading the node decommission output is awkward because it isn't - // fully CSV. We expect a CSV header, then a row for each node being - // decommissioned, then (maybe) a blank line followed by a note that is - // just a string, not related to the initial CSV data. Even though the - // CLI supports decommissioning more than one node in one invocation, we - // only provide an API to decommission a single node, so we expect: - // - // 1. The CSV header line - // 2. The one row of CSV data - // 3. Trailing notes - // - // We'll collect the notes as a separate field and return them to our - // caller. - - // First we'll run the data through a csv::Reader; this will pull out - // the header row and the one row of data. - let mut reader = csv::Reader::from_reader(io::Cursor::new(data)); - let record: CliNodeDecommission = - reader.deserialize().next().ok_or_else(|| { - io::Error::other("fewer than two lines of output") - })??; - - // Get the position where the reader ended after that one row; we'll - // collect any remaining nonempty lines as `notes`. - let extra_data = &data[reader.position().byte() as usize..]; - let mut notes = Vec::new(); - for line in String::from_utf8_lossy(extra_data).lines() { - let line = line.trim(); - if !line.is_empty() { - notes.push(line.to_string()); - } - } - - Ok(Self::from((record, notes))) - } -} - #[cfg(test)] mod tests { + use std::net::SocketAddr; + use super::*; - use chrono::NaiveDate; + use cockroach_admin_types::NodeMembership; use nexus_test_utils::db::test_setup_database; use omicron_test_utils::dev; - use test_strategy::proptest; use url::Url; - #[test] - fn test_node_status_parse_single_line_from_csv() { - let input = br#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live -1,[::1]:42021,[::1]:42021,v22.1.9,2024-05-21 15:19:50.523796,2024-05-21 16:31:28.050069,,true,true"#; - let expected = NodeStatus { - node_id: "1".to_string(), - address: "[::1]:42021".parse().unwrap(), - sql_address: "[::1]:42021".parse().unwrap(), - build: "v22.1.9".to_string(), - started_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2024, 5, 21) - .unwrap() - .and_hms_micro_opt(15, 19, 50, 523796) - .unwrap(), - Utc, - ), - updated_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2024, 5, 21) - .unwrap() - .and_hms_micro_opt(16, 31, 28, 50069) - .unwrap(), - Utc, - ), - locality: String::new(), - is_available: true, - is_live: true, - }; - - let statuses = NodeStatus::parse_from_csv(input).expect("parsed input"); - assert_eq!(statuses, vec![expected]); - } - - #[test] - fn test_node_status_parse_multiple_lines_from_csv() { - let input = br#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live -1,[fd00:1122:3344:109::3]:32221,[fd00:1122:3344:109::3]:32221,v22.1.9-dirty,2024-05-18 19:18:00.597145,2024-05-21 15:22:34.290434,,true,true -2,[fd00:1122:3344:105::3]:32221,[fd00:1122:3344:105::3]:32221,v22.1.9-dirty,2024-05-18 19:17:01.796714,2024-05-21 15:22:34.901268,,true,true -3,[fd00:1122:3344:10b::3]:32221,[fd00:1122:3344:10b::3]:32221,v22.1.9-dirty,2024-05-18 19:18:52.37564,2024-05-21 15:22:36.341146,,true,true -4,[fd00:1122:3344:107::3]:32221,[fd00:1122:3344:107::3]:32221,v22.1.9-dirty,2024-05-18 19:16:22.788276,2024-05-21 15:22:34.897047,,true,true -5,[fd00:1122:3344:108::3]:32221,[fd00:1122:3344:108::3]:32221,v22.1.9-dirty,2024-05-18 19:18:09.196634,2024-05-21 15:22:35.168738,,true,true"#; - let expected = vec![ - NodeStatus { - node_id: "1".to_string(), - address: "[fd00:1122:3344:109::3]:32221".parse().unwrap(), - sql_address: "[fd00:1122:3344:109::3]:32221".parse().unwrap(), - build: "v22.1.9-dirty".to_string(), - started_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2024, 5, 18) - .unwrap() - .and_hms_micro_opt(19, 18, 0, 597145) - .unwrap(), - Utc, - ), - updated_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2024, 5, 21) - .unwrap() - .and_hms_micro_opt(15, 22, 34, 290434) - .unwrap(), - Utc, - ), - locality: String::new(), - is_available: true, - is_live: true, - }, - NodeStatus { - node_id: "2".to_string(), - address: "[fd00:1122:3344:105::3]:32221".parse().unwrap(), - sql_address: "[fd00:1122:3344:105::3]:32221".parse().unwrap(), - build: "v22.1.9-dirty".to_string(), - started_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2024, 5, 18) - .unwrap() - .and_hms_micro_opt(19, 17, 1, 796714) - .unwrap(), - Utc, - ), - updated_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2024, 5, 21) - .unwrap() - .and_hms_micro_opt(15, 22, 34, 901268) - .unwrap(), - Utc, - ), - locality: String::new(), - is_available: true, - is_live: true, - }, - NodeStatus { - node_id: "3".to_string(), - address: "[fd00:1122:3344:10b::3]:32221".parse().unwrap(), - sql_address: "[fd00:1122:3344:10b::3]:32221".parse().unwrap(), - build: "v22.1.9-dirty".to_string(), - started_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2024, 5, 18) - .unwrap() - .and_hms_micro_opt(19, 18, 52, 375640) - .unwrap(), - Utc, - ), - updated_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2024, 5, 21) - .unwrap() - .and_hms_micro_opt(15, 22, 36, 341146) - .unwrap(), - Utc, - ), - locality: String::new(), - is_available: true, - is_live: true, - }, - NodeStatus { - node_id: "4".to_string(), - address: "[fd00:1122:3344:107::3]:32221".parse().unwrap(), - sql_address: "[fd00:1122:3344:107::3]:32221".parse().unwrap(), - build: "v22.1.9-dirty".to_string(), - started_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2024, 5, 18) - .unwrap() - .and_hms_micro_opt(19, 16, 22, 788276) - .unwrap(), - Utc, - ), - updated_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2024, 5, 21) - .unwrap() - .and_hms_micro_opt(15, 22, 34, 897047) - .unwrap(), - Utc, - ), - locality: String::new(), - is_available: true, - is_live: true, - }, - NodeStatus { - node_id: "5".to_string(), - address: "[fd00:1122:3344:108::3]:32221".parse().unwrap(), - sql_address: "[fd00:1122:3344:108::3]:32221".parse().unwrap(), - build: "v22.1.9-dirty".to_string(), - started_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2024, 5, 18) - .unwrap() - .and_hms_micro_opt(19, 18, 9, 196634) - .unwrap(), - Utc, - ), - updated_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2024, 5, 21) - .unwrap() - .and_hms_micro_opt(15, 22, 35, 168738) - .unwrap(), - Utc, - ), - locality: String::new(), - is_available: true, - is_live: true, - }, - ]; - - let statuses = NodeStatus::parse_from_csv(input).expect("parsed input"); - assert_eq!(statuses.len(), expected.len()); - for (status, expected) in statuses.iter().zip(&expected) { - assert_eq!(status, expected); - } - } - - #[test] - fn test_node_decommission_parse_with_no_trailing_notes() { - let input = - br#"id,is_live,replicas,is_decommissioning,membership,is_draining -6,true,24,true,decommissioning,false"#; - let expected = NodeDecommission { - node_id: "6".to_string(), - is_live: true, - replicas: 24, - is_decommissioning: true, - membership: NodeMembership::Decommissioning, - is_draining: false, - notes: vec![], - }; - - let statuses = - NodeDecommission::parse_from_csv(input).expect("parsed input"); - assert_eq!(statuses, expected); - } - - #[test] - fn test_node_decommission_parse_with_trailing_notes() { - let input = - br#"id,is_live,replicas,is_decommissioning,membership,is_draining -6,false,0,true,decommissioned,false - -No more data reported on target nodes. Please verify cluster health before removing the nodes. -"#; - let expected = NodeDecommission { - node_id: "6".to_string(), - is_live: false, - replicas: 0, - is_decommissioning: true, - membership: NodeMembership::Decommissioned, - is_draining: false, - notes: vec!["No more data reported on target nodes. \ - Please verify cluster health before removing the nodes." - .to_string()], - }; - - let statuses = - NodeDecommission::parse_from_csv(input).expect("parsed input"); - assert_eq!(statuses, expected); - } - - #[test] - fn test_node_decommission_parse_with_unexpected_membership_value() { - let input = - br#"id,is_live,replicas,is_decommissioning,membership,is_draining -6,false,0,true,foobar,false"#; - let expected = NodeDecommission { - node_id: "6".to_string(), - is_live: false, - replicas: 0, - is_decommissioning: true, - membership: NodeMembership::Unknown { value: "foobar".to_string() }, - is_draining: false, - notes: vec![], - }; - - let statuses = - NodeDecommission::parse_from_csv(input).expect("parsed input"); - assert_eq!(statuses, expected); - } - // Ensure that if `cockroach node status` changes in a future CRDB version // bump, we have a test that will fail to force us to check whether our // current parsing is still valid. @@ -721,14 +268,4 @@ No more data reported on target nodes. Please verify cluster health before remov db.cleanup().await.unwrap(); logctx.cleanup_successful(); } - - #[proptest] - fn node_status_parse_doesnt_panic_on_arbitrary_input(input: Vec) { - _ = NodeStatus::parse_from_csv(&input); - } - - #[proptest] - fn node_decommission_parse_doesnt_panic_on_arbitrary_input(input: Vec) { - _ = NodeDecommission::parse_from_csv(&input); - } } diff --git a/cockroach-admin/src/http_entrypoints.rs b/cockroach-admin/src/http_entrypoints.rs index 45957df0df..77eaf7e02b 100644 --- a/cockroach-admin/src/http_entrypoints.rs +++ b/cockroach-admin/src/http_entrypoints.rs @@ -2,112 +2,53 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::cockroach_cli::NodeDecommission; -use crate::cockroach_cli::NodeStatus; use crate::context::ServerContext; -use dropshot::endpoint; -use dropshot::ApiDescriptionRegisterError; +use cockroach_admin_api::*; +use cockroach_admin_types::NodeDecommission; use dropshot::HttpError; use dropshot::HttpResponseOk; use dropshot::RequestContext; use dropshot::TypedBody; -use omicron_uuid_kinds::OmicronZoneUuid; -use schemars::JsonSchema; -use serde::Deserialize; -use serde::Serialize; use std::sync::Arc; type CrdbApiDescription = dropshot::ApiDescription>; pub fn api() -> CrdbApiDescription { - fn register_endpoints( - api: &mut CrdbApiDescription, - ) -> Result<(), ApiDescriptionRegisterError> { - api.register(local_node_id)?; - api.register(node_status)?; - api.register(node_decommission)?; - Ok(()) - } - - let mut api = CrdbApiDescription::new(); - if let Err(err) = register_endpoints(&mut api) { - panic!("failed to register entrypoints: {}", err); - } - api + cockroach_admin_api_mod::api_description::() + .expect("registered entrypoints") } -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub struct ClusterNodeStatus { - pub all_nodes: Vec, -} - -/// Get the status of all nodes in the CRDB cluster -#[endpoint { - method = GET, - path = "/node/status", -}] -async fn node_status( - rqctx: RequestContext>, -) -> Result, HttpError> { - let ctx = rqctx.context(); - let all_nodes = - ctx.cockroach_cli().node_status().await.map_err(HttpError::from)?; - Ok(HttpResponseOk(ClusterNodeStatus { all_nodes })) -} +enum CockroachAdminImpl {} -/// CockroachDB Node ID -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub struct LocalNodeId { - /// The ID of this Omicron zone. - /// - /// This is included to ensure correctness even if a socket address on a - /// sled is reused for a different zone; if our caller is trying to - /// determine the node ID for a particular Omicron CockroachDB zone, they'll - /// contact us by socket address. We include our zone ID in the response for - /// their confirmation that we are the zone they intended to contact. - pub zone_id: OmicronZoneUuid, - // CockroachDB node IDs are integers, in practice, but our use of them is as - // input and output to the `cockroach` CLI. We use a string which is a bit - // more natural (no need to parse CLI output or stringify an ID to send it - // as input) and leaves open the door for the format to change in the - // future. - pub node_id: String, -} +impl CockroachAdminApi for CockroachAdminImpl { + type Context = Arc; -/// Get the CockroachDB node ID of the local cockroach instance. -#[endpoint { - method = GET, - path = "/node/id", -}] -async fn local_node_id( - rqctx: RequestContext>, -) -> Result, HttpError> { - let ctx = rqctx.context(); - let node_id = ctx.node_id().await?.to_string(); - let zone_id = ctx.zone_id(); - Ok(HttpResponseOk(LocalNodeId { zone_id, node_id })) -} + async fn node_status( + rqctx: RequestContext, + ) -> Result, HttpError> { + let ctx = rqctx.context(); + let all_nodes = + ctx.cockroach_cli().node_status().await.map_err(HttpError::from)?; + Ok(HttpResponseOk(ClusterNodeStatus { all_nodes })) + } -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub struct NodeId { - pub node_id: String, -} + async fn local_node_id( + rqctx: RequestContext, + ) -> Result, HttpError> { + let ctx = rqctx.context(); + let node_id = ctx.node_id().await?.to_string(); + let zone_id = ctx.zone_id(); + Ok(HttpResponseOk(LocalNodeId { zone_id, node_id })) + } -/// Decommission a node from the CRDB cluster -#[endpoint { - method = POST, - path = "/node/decommission", -}] -async fn node_decommission( - rqctx: RequestContext>, - body: TypedBody, -) -> Result, HttpError> { - let ctx = rqctx.context(); - let NodeId { node_id } = body.into_inner(); - let decommission_status = - ctx.cockroach_cli().node_decommission(&node_id).await?; - Ok(HttpResponseOk(decommission_status)) + async fn node_decommission( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let ctx = rqctx.context(); + let NodeId { node_id } = body.into_inner(); + let decommission_status = + ctx.cockroach_cli().node_decommission(&node_id).await?; + Ok(HttpResponseOk(decommission_status)) + } } diff --git a/cockroach-admin/src/lib.rs b/cockroach-admin/src/lib.rs index f4a32cb6c0..1057344297 100644 --- a/cockroach-admin/src/lib.rs +++ b/cockroach-admin/src/lib.rs @@ -23,21 +23,6 @@ pub use cockroach_cli::CockroachCli; pub use cockroach_cli::CockroachCliError; pub use config::Config; -/// Run the OpenAPI generator for the API; this emits the OpenAPI spec to -/// stdout. -pub fn run_openapi() -> Result<(), String> { - http_entrypoints::api() - .openapi("Oxide CockroachDb Cluster Admin API", "0.0.1") - .description( - "API for interacting with the Oxide \ - control plane's CockroachDb cluster", - ) - .contact_url("https://oxide.computer") - .contact_email("api@oxide.computer") - .write(&mut std::io::stdout()) - .map_err(|e| e.to_string()) -} - #[derive(Debug, thiserror::Error, SlogInlineError)] pub enum StartError { #[error("failed to initialize logger")] diff --git a/cockroach-admin/tests/integration_tests/commands.rs b/cockroach-admin/tests/integration_tests/commands.rs deleted file mode 100644 index 875427d948..0000000000 --- a/cockroach-admin/tests/integration_tests/commands.rs +++ /dev/null @@ -1,43 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Tests for the executable commands in this repo. - -use expectorate::assert_contents; -use omicron_test_utils::dev::test_cmds::{ - assert_exit_code, path_to_executable, run_command, EXIT_SUCCESS, -}; -use openapiv3::OpenAPI; -use std::path::PathBuf; -use subprocess::Exec; - -// path to executable -const CMD_COCKROACH_ADMIN: &str = env!("CARGO_BIN_EXE_cockroach-admin"); - -fn path_to_cockroach_admin() -> PathBuf { - path_to_executable(CMD_COCKROACH_ADMIN) -} - -#[test] -fn test_cockroach_admin_openapi() { - let exec = Exec::cmd(path_to_cockroach_admin()).arg("openapi"); - let (exit_status, stdout_text, stderr_text) = run_command(exec); - assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); - assert_contents( - "tests/output/cmd-cockroach-admin-openapi-stderr", - &stderr_text, - ); - - let spec: OpenAPI = serde_json::from_str(&stdout_text) - .expect("stdout was not valid OpenAPI"); - - // Check for lint errors. - let errors = openapi_lint::validate(&spec); - assert!(errors.is_empty(), "{}", errors.join("\n\n")); - - // Confirm that the output hasn't changed. It's expected that we'll change - // this file as the API evolves, but pay attention to the diffs to ensure - // that the changes match your expectations. - assert_contents("../openapi/cockroach-admin.json", &stdout_text); -} diff --git a/cockroach-admin/tests/output/cmd-cockroach-admin-openapi-stderr b/cockroach-admin/tests/output/cmd-cockroach-admin-openapi-stderr deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/cockroach-admin/types/Cargo.toml b/cockroach-admin/types/Cargo.toml new file mode 100644 index 0000000000..870d1c55c2 --- /dev/null +++ b/cockroach-admin/types/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "cockroach-admin-types" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +chrono.workspace = true +csv.workspace = true +omicron-common.workspace = true +omicron-workspace-hack.workspace = true +schemars.workspace = true +serde.workspace = true + +[dev-dependencies] +proptest.workspace = true +test-strategy.workspace = true diff --git a/cockroach-admin/types/src/lib.rs b/cockroach-admin/types/src/lib.rs new file mode 100644 index 0000000000..3653cc616b --- /dev/null +++ b/cockroach-admin/types/src/lib.rs @@ -0,0 +1,477 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::{io, net::SocketAddr}; + +use chrono::{DateTime, NaiveDateTime, Utc}; +use schemars::JsonSchema; +use serde::{de, Deserialize, Serialize}; + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct NodeStatus { + pub node_id: String, + pub address: SocketAddr, + pub sql_address: SocketAddr, + pub build: String, + pub started_at: DateTime, + pub updated_at: DateTime, + pub locality: String, + pub is_available: bool, + pub is_live: bool, +} + +impl NodeStatus { + pub fn parse_from_csv(data: &[u8]) -> Result, csv::Error> { + let mut statuses = Vec::new(); + let mut reader = csv::Reader::from_reader(io::Cursor::new(data)); + for result in reader.deserialize() { + let record: CliNodeStatus = result?; + statuses.push(record.into()); + } + Ok(statuses) + } +} + +// Slightly different `NodeStatus` that matches what we get from `cockroach`: +// timestamps are a fixed format with no timezone (but are actually UTC), so we +// have a custom deserializer, and the ID column is `id` instead of `node_id`. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +struct CliNodeStatus { + id: String, + address: SocketAddr, + sql_address: SocketAddr, + build: String, + #[serde(deserialize_with = "parse_cockroach_cli_timestamp")] + started_at: DateTime, + #[serde(deserialize_with = "parse_cockroach_cli_timestamp")] + updated_at: DateTime, + locality: String, + is_available: bool, + is_live: bool, +} + +impl From for NodeStatus { + fn from(cli: CliNodeStatus) -> Self { + Self { + node_id: cli.id, + address: cli.address, + sql_address: cli.sql_address, + build: cli.build, + started_at: cli.started_at, + updated_at: cli.updated_at, + locality: cli.locality, + is_available: cli.is_available, + is_live: cli.is_live, + } + } +} + +fn parse_cockroach_cli_timestamp<'de, D>( + d: D, +) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + struct CockroachTimestampVisitor; + impl<'de> de::Visitor<'de> for CockroachTimestampVisitor { + type Value = DateTime; + + fn expecting( + &self, + formatter: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + formatter.write_str("a Cockroach CLI timestamp") + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + let dt = NaiveDateTime::parse_from_str(v, "%Y-%m-%d %H:%M:%S%.f") + .map_err(E::custom)?; + Ok(DateTime::from_naive_utc_and_offset(dt, Utc)) + } + } + + d.deserialize_str(CockroachTimestampVisitor) +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct NodeDecommission { + pub node_id: String, + pub is_live: bool, + pub replicas: i64, + pub is_decommissioning: bool, + pub membership: NodeMembership, + pub is_draining: bool, + pub notes: Vec, +} + +impl NodeDecommission { + pub fn parse_from_csv(data: &[u8]) -> Result { + // Reading the node decommission output is awkward because it isn't + // fully CSV. We expect a CSV header, then a row for each node being + // decommissioned, then (maybe) a blank line followed by a note that is + // just a string, not related to the initial CSV data. Even though the + // CLI supports decommissioning more than one node in one invocation, we + // only provide an API to decommission a single node, so we expect: + // + // 1. The CSV header line + // 2. The one row of CSV data + // 3. Trailing notes + // + // We'll collect the notes as a separate field and return them to our + // caller. + + // First we'll run the data through a csv::Reader; this will pull out + // the header row and the one row of data. + let mut reader = csv::Reader::from_reader(io::Cursor::new(data)); + let record: CliNodeDecommission = + reader.deserialize().next().ok_or_else(|| { + io::Error::other("fewer than two lines of output") + })??; + + // Get the position where the reader ended after that one row; we'll + // collect any remaining nonempty lines as `notes`. + let extra_data = &data[reader.position().byte() as usize..]; + let mut notes = Vec::new(); + for line in String::from_utf8_lossy(extra_data).lines() { + let line = line.trim(); + if !line.is_empty() { + notes.push(line.to_string()); + } + } + + Ok(Self::from((record, notes))) + } +} + +// Slightly different `NodeDecommission` that matches what we get from +// `cockroach`: this omites `notes`, which isn't really a CSV field at all, but +// is instead where we collect the non-CSV string output from the CLI, uses +// a custom deserializer for `membership` to handle unknown variants, and the ID +// column is `id` instead of `node_id`. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +struct CliNodeDecommission { + pub id: String, + pub is_live: bool, + pub replicas: i64, + pub is_decommissioning: bool, + #[serde(deserialize_with = "parse_node_membership")] + pub membership: NodeMembership, + pub is_draining: bool, +} + +impl From<(CliNodeDecommission, Vec)> for NodeDecommission { + fn from((cli, notes): (CliNodeDecommission, Vec)) -> Self { + Self { + node_id: cli.id, + is_live: cli.is_live, + replicas: cli.replicas, + is_decommissioning: cli.is_decommissioning, + membership: cli.membership, + is_draining: cli.is_draining, + notes, + } + } +} + +fn parse_node_membership<'de, D>(d: D) -> Result +where + D: serde::Deserializer<'de>, +{ + struct CockroachNodeMembershipVisitor; + + impl<'de> de::Visitor<'de> for CockroachNodeMembershipVisitor { + type Value = NodeMembership; + + fn expecting( + &self, + formatter: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + formatter.write_str("a Cockroach node membership string") + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + let membership = match v { + "active" => NodeMembership::Active, + "decommissioning" => NodeMembership::Decommissioning, + "decommissioned" => NodeMembership::Decommissioned, + _ => NodeMembership::Unknown { value: v.to_string() }, + }; + Ok(membership) + } + } + + d.deserialize_str(CockroachNodeMembershipVisitor) +} + +// The cockroach CLI and `crdb_internal.gossip_liveness` table use a string for +// node membership, but there are only three meaningful values per +// https://github.com/cockroachdb/cockroach/blob/0c92c710d2baadfdc5475be8d2238cf26cb152ca/pkg/kv/kvserver/liveness/livenesspb/liveness.go#L96, +// so we'll convert into a Rust enum and leave the "unknown" case for future +// changes that expand or reword these values. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(tag = "state", rename_all = "lowercase")] +pub enum NodeMembership { + Active, + Decommissioning, + Decommissioned, + Unknown { value: String }, +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::NaiveDate; + use test_strategy::proptest; + + #[test] + fn test_node_status_parse_single_line_from_csv() { + let input = br#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live +1,[::1]:42021,[::1]:42021,v22.1.9,2024-05-21 15:19:50.523796,2024-05-21 16:31:28.050069,,true,true"#; + let expected = NodeStatus { + node_id: "1".to_string(), + address: "[::1]:42021".parse().unwrap(), + sql_address: "[::1]:42021".parse().unwrap(), + build: "v22.1.9".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 21) + .unwrap() + .and_hms_micro_opt(15, 19, 50, 523796) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 21) + .unwrap() + .and_hms_micro_opt(16, 31, 28, 50069) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + }; + + let statuses = NodeStatus::parse_from_csv(input).expect("parsed input"); + assert_eq!(statuses, vec![expected]); + } + + #[test] + fn test_node_status_parse_multiple_lines_from_csv() { + let input = br#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live +1,[fd00:1122:3344:109::3]:32221,[fd00:1122:3344:109::3]:32221,v22.1.9-dirty,2024-05-18 19:18:00.597145,2024-05-21 15:22:34.290434,,true,true +2,[fd00:1122:3344:105::3]:32221,[fd00:1122:3344:105::3]:32221,v22.1.9-dirty,2024-05-18 19:17:01.796714,2024-05-21 15:22:34.901268,,true,true +3,[fd00:1122:3344:10b::3]:32221,[fd00:1122:3344:10b::3]:32221,v22.1.9-dirty,2024-05-18 19:18:52.37564,2024-05-21 15:22:36.341146,,true,true +4,[fd00:1122:3344:107::3]:32221,[fd00:1122:3344:107::3]:32221,v22.1.9-dirty,2024-05-18 19:16:22.788276,2024-05-21 15:22:34.897047,,true,true +5,[fd00:1122:3344:108::3]:32221,[fd00:1122:3344:108::3]:32221,v22.1.9-dirty,2024-05-18 19:18:09.196634,2024-05-21 15:22:35.168738,,true,true"#; + let expected = vec![ + NodeStatus { + node_id: "1".to_string(), + address: "[fd00:1122:3344:109::3]:32221".parse().unwrap(), + sql_address: "[fd00:1122:3344:109::3]:32221".parse().unwrap(), + build: "v22.1.9-dirty".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 18) + .unwrap() + .and_hms_micro_opt(19, 18, 0, 597145) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 21) + .unwrap() + .and_hms_micro_opt(15, 22, 34, 290434) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + }, + NodeStatus { + node_id: "2".to_string(), + address: "[fd00:1122:3344:105::3]:32221".parse().unwrap(), + sql_address: "[fd00:1122:3344:105::3]:32221".parse().unwrap(), + build: "v22.1.9-dirty".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 18) + .unwrap() + .and_hms_micro_opt(19, 17, 1, 796714) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 21) + .unwrap() + .and_hms_micro_opt(15, 22, 34, 901268) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + }, + NodeStatus { + node_id: "3".to_string(), + address: "[fd00:1122:3344:10b::3]:32221".parse().unwrap(), + sql_address: "[fd00:1122:3344:10b::3]:32221".parse().unwrap(), + build: "v22.1.9-dirty".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 18) + .unwrap() + .and_hms_micro_opt(19, 18, 52, 375640) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 21) + .unwrap() + .and_hms_micro_opt(15, 22, 36, 341146) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + }, + NodeStatus { + node_id: "4".to_string(), + address: "[fd00:1122:3344:107::3]:32221".parse().unwrap(), + sql_address: "[fd00:1122:3344:107::3]:32221".parse().unwrap(), + build: "v22.1.9-dirty".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 18) + .unwrap() + .and_hms_micro_opt(19, 16, 22, 788276) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 21) + .unwrap() + .and_hms_micro_opt(15, 22, 34, 897047) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + }, + NodeStatus { + node_id: "5".to_string(), + address: "[fd00:1122:3344:108::3]:32221".parse().unwrap(), + sql_address: "[fd00:1122:3344:108::3]:32221".parse().unwrap(), + build: "v22.1.9-dirty".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 18) + .unwrap() + .and_hms_micro_opt(19, 18, 9, 196634) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 21) + .unwrap() + .and_hms_micro_opt(15, 22, 35, 168738) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + }, + ]; + + let statuses = NodeStatus::parse_from_csv(input).expect("parsed input"); + assert_eq!(statuses.len(), expected.len()); + for (status, expected) in statuses.iter().zip(&expected) { + assert_eq!(status, expected); + } + } + + #[test] + fn test_node_decommission_parse_with_no_trailing_notes() { + let input = + br#"id,is_live,replicas,is_decommissioning,membership,is_draining +6,true,24,true,decommissioning,false"#; + let expected = NodeDecommission { + node_id: "6".to_string(), + is_live: true, + replicas: 24, + is_decommissioning: true, + membership: NodeMembership::Decommissioning, + is_draining: false, + notes: vec![], + }; + + let statuses = + NodeDecommission::parse_from_csv(input).expect("parsed input"); + assert_eq!(statuses, expected); + } + + #[test] + fn test_node_decommission_parse_with_trailing_notes() { + let input = + br#"id,is_live,replicas,is_decommissioning,membership,is_draining +6,false,0,true,decommissioned,false + +No more data reported on target nodes. Please verify cluster health before removing the nodes. +"#; + let expected = NodeDecommission { + node_id: "6".to_string(), + is_live: false, + replicas: 0, + is_decommissioning: true, + membership: NodeMembership::Decommissioned, + is_draining: false, + notes: vec!["No more data reported on target nodes. \ + Please verify cluster health before removing the nodes." + .to_string()], + }; + + let statuses = + NodeDecommission::parse_from_csv(input).expect("parsed input"); + assert_eq!(statuses, expected); + } + + #[test] + fn test_node_decommission_parse_with_unexpected_membership_value() { + let input = + br#"id,is_live,replicas,is_decommissioning,membership,is_draining +6,false,0,true,foobar,false"#; + let expected = NodeDecommission { + node_id: "6".to_string(), + is_live: false, + replicas: 0, + is_decommissioning: true, + membership: NodeMembership::Unknown { value: "foobar".to_string() }, + is_draining: false, + notes: vec![], + }; + + let statuses = + NodeDecommission::parse_from_csv(input).expect("parsed input"); + assert_eq!(statuses, expected); + } + + // TODO: the proptests below should probably be fuzz targets instead to + // allow for guided fuzzing. + + #[proptest] + fn node_status_parse_doesnt_panic_on_arbitrary_input(input: Vec) { + _ = NodeStatus::parse_from_csv(&input); + } + + #[proptest] + fn node_decommission_parse_doesnt_panic_on_arbitrary_input(input: Vec) { + _ = NodeDecommission::parse_from_csv(&input); + } +} diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 44b34b0220..98669ddc06 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -2879,7 +2879,12 @@ async fn cmd_db_validate_region_snapshots( use crucible_agent_client::types::State; use crucible_agent_client::Client as CrucibleAgentClient; - let url = format!("http://{}", dataset.address()); + let Some(dataset_addr) = dataset.address() else { + eprintln!("Dataset {} missing an IP address", dataset.id()); + continue; + }; + + let url = format!("http://{}", dataset_addr); let client = CrucibleAgentClient::new(&url); let actual_region_snapshots = client @@ -2940,7 +2945,7 @@ async fn cmd_db_validate_region_snapshots( dataset_id: region_snapshot.dataset_id, region_id: region_snapshot.region_id, snapshot_id: region_snapshot.snapshot_id, - dataset_addr: dataset.address(), + dataset_addr, error: String::from( "region snapshot was deleted, please remove its record", ), @@ -2955,7 +2960,7 @@ async fn cmd_db_validate_region_snapshots( dataset_id: region_snapshot.dataset_id, region_id: region_snapshot.region_id, snapshot_id: region_snapshot.snapshot_id, - dataset_addr: dataset.address(), + dataset_addr, error: String::from( "NEXUS BUG: region snapshot was deleted, but the higher level snapshot was not!", ), @@ -2984,7 +2989,7 @@ async fn cmd_db_validate_region_snapshots( dataset_id: region_snapshot.dataset_id, region_id: region_snapshot.region_id, snapshot_id: region_snapshot.snapshot_id, - dataset_addr: dataset.address(), + dataset_addr, error: format!( "AGENT BUG: region snapshot was deleted but has a running snapshot in state {:?}!", running_snapshot.state, @@ -3034,7 +3039,12 @@ async fn cmd_db_validate_region_snapshots( use crucible_agent_client::types::State; use crucible_agent_client::Client as CrucibleAgentClient; - let url = format!("http://{}", dataset.address()); + let Some(dataset_addr) = dataset.address() else { + eprintln!("Dataset {} missing an IP address", dataset.id()); + continue; + }; + + let url = format!("http://{}", dataset_addr); let client = CrucibleAgentClient::new(&url); let actual_region_snapshots = client @@ -3052,7 +3062,7 @@ async fn cmd_db_validate_region_snapshots( dataset_id: dataset.id(), region_id: region.id(), snapshot_id, - dataset_addr: dataset.address(), + dataset_addr, error: String::from( "Nexus does not know about this snapshot!", ), @@ -3077,7 +3087,7 @@ async fn cmd_db_validate_region_snapshots( dataset_id: dataset.id(), region_id: region.id(), snapshot_id, - dataset_addr: dataset.address(), + dataset_addr, error: String::from( "Nexus does not know about this running snapshot!" ), diff --git a/dev-tools/openapi-manager/Cargo.toml b/dev-tools/openapi-manager/Cargo.toml index aa0cfacfd5..dc07dae0e2 100644 --- a/dev-tools/openapi-manager/Cargo.toml +++ b/dev-tools/openapi-manager/Cargo.toml @@ -10,7 +10,9 @@ workspace = true [dependencies] anyhow.workspace = true atomicwrites.workspace = true +bootstrap-agent-api.workspace = true camino.workspace = true +cockroach-admin-api.workspace = true clap.workspace = true dns-server-api.workspace = true dropshot.workspace = true diff --git a/dev-tools/openapi-manager/src/spec.rs b/dev-tools/openapi-manager/src/spec.rs index 83f0f4dd57..e8ebc23550 100644 --- a/dev-tools/openapi-manager/src/spec.rs +++ b/dev-tools/openapi-manager/src/spec.rs @@ -14,6 +14,27 @@ use openapiv3::OpenAPI; /// All APIs managed by openapi-manager. pub fn all_apis() -> Vec { vec![ + ApiSpec { + title: "CockroachDB Cluster Admin API", + version: "0.0.1", + description: "API for interacting with the Oxide control plane's \ + CockroachDB cluster", + boundary: ApiBoundary::Internal, + api_description: + cockroach_admin_api::cockroach_admin_api_mod::stub_api_description, + filename: "cockroach-admin.json", + extra_validation: None, + }, + ApiSpec { + title: "Bootstrap Agent API", + version: "0.0.1", + description: "Per-sled API for setup and teardown", + boundary: ApiBoundary::Internal, + api_description: + bootstrap_agent_api::bootstrap_agent_api_mod::stub_api_description, + filename: "bootstrap-agent.json", + extra_validation: None, + }, ApiSpec { title: "Internal DNS", version: "0.0.1", diff --git a/dns-server/src/lib.rs b/dns-server/src/lib.rs index a2b1fda0d7..424159e41d 100644 --- a/dns-server/src/lib.rs +++ b/dns-server/src/lib.rs @@ -138,6 +138,7 @@ impl TransientServer { bind_address: "[::1]:0".parse().unwrap(), request_body_max_bytes: 4 * 1024 * 1024, default_handler_task_mode: dropshot::HandlerTaskMode::Detached, + log_headers: vec![], }, ) .await?; diff --git a/dns-server/tests/basic_test.rs b/dns-server/tests/basic_test.rs index 19666e82c1..b3b7f37378 100644 --- a/dns-server/tests/basic_test.rs +++ b/dns-server/tests/basic_test.rs @@ -419,6 +419,7 @@ fn test_config( bind_address: "[::1]:0".to_string().parse().unwrap(), request_body_max_bytes: 1024, default_handler_task_mode: HandlerTaskMode::Detached, + log_headers: vec![], }; Ok((tmp_dir, config_storage, config_dropshot, logctx)) diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index 1102094b61..157317cdad 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -23,6 +23,7 @@ russh = "0.43.0" russh-keys = "0.43.0" serde.workspace = true serde_json.workspace = true +sled-agent-types.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } toml.workspace = true trust-dns-resolver.workspace = true diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index e4bf61356c..76b759608c 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -1,7 +1,6 @@ use crate::helpers::generate_name; use anyhow::{anyhow, Context as _, Result}; use chrono::Utc; -use omicron_sled_agent::rack_setup::config::SetupServiceConfig; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{Name, ProjectCreate}; use oxide_client::CustomDnsResolver; @@ -9,6 +8,7 @@ use oxide_client::{Client, ClientImagesExt, ClientProjectsExt, ClientVpcsExt}; use reqwest::dns::Resolve; use reqwest::header::{HeaderMap, HeaderValue}; use reqwest::Url; +use sled_agent_types::rack_init::RackInitializeRequest; use std::net::IpAddr; use std::net::SocketAddr; use std::sync::Arc; @@ -73,7 +73,7 @@ impl Context { } } -fn rss_config() -> Result { +fn rss_config() -> Result { let path = "/opt/oxide/sled-agent/pkg/config-rss.toml"; let content = std::fs::read_to_string(&path).unwrap_or(RSS_CONFIG_STR.to_string()); @@ -81,7 +81,7 @@ fn rss_config() -> Result { .with_context(|| "parsing config-rss as TOML".to_string()) } -fn nexus_external_dns_name(config: &SetupServiceConfig) -> String { +fn nexus_external_dns_name(config: &RackInitializeRequest) -> String { format!( "{}.sys.{}", config.recovery_silo.silo_name.as_str(), @@ -89,7 +89,7 @@ fn nexus_external_dns_name(config: &SetupServiceConfig) -> String { ) } -fn external_dns_addr(config: &SetupServiceConfig) -> Result { +fn external_dns_addr(config: &RackInitializeRequest) -> Result { // From the RSS config, grab the first address from the configured services // IP pool as the DNS server's IP address. let dns_ip = config @@ -138,7 +138,7 @@ pub async fn nexus_addr() -> Result { } pub struct ClientParams { - rss_config: SetupServiceConfig, + rss_config: RackInitializeRequest, nexus_dns_name: String, resolver: Arc, proto: &'static str, diff --git a/gateway/src/lib.rs b/gateway/src/lib.rs index 1354f30a0a..be8c84d7db 100644 --- a/gateway/src/lib.rs +++ b/gateway/src/lib.rs @@ -98,6 +98,7 @@ fn start_dropshot_server( bind_address: SocketAddr::V6(addr), request_body_max_bytes, default_handler_task_mode: HandlerTaskMode::Detached, + log_headers: vec![], }; let http_server_starter = dropshot::HttpServerStarter::new( &dropshot, diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs index d041c866b0..03b4bfb5a7 100644 --- a/illumos-utils/src/lib.rs +++ b/illumos-utils/src/lib.rs @@ -36,8 +36,8 @@ pub const PFEXEC: &str = "/usr/bin/pfexec"; pub struct CommandFailureInfo { command: String, status: std::process::ExitStatus, - stdout: String, - stderr: String, + pub stdout: String, + pub stderr: String, } impl std::fmt::Display for CommandFailureInfo { diff --git a/illumos-utils/src/route.rs b/illumos-utils/src/route.rs index ceff2b3d9e..12f74bfd78 100644 --- a/illumos-utils/src/route.rs +++ b/illumos-utils/src/route.rs @@ -107,4 +107,23 @@ impl Route { }; Ok(()) } + + pub fn add_bootstrap_route( + bootstrap_prefix: u16, + gz_bootstrap_addr: Ipv6Addr, + zone_vnic_name: &str, + ) -> Result<(), ExecutionError> { + let mut cmd = std::process::Command::new(PFEXEC); + let cmd = cmd.args(&[ + ROUTE, + "add", + "-inet6", + &format!("{bootstrap_prefix:x}::/16"), + &gz_bootstrap_addr.to_string(), + "-ifp", + zone_vnic_name, + ]); + execute(cmd)?; + Ok(()) + } } diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index 64bbb91cbd..ea24a6f502 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -46,7 +46,7 @@ pub enum ServiceError { pub struct RunCommandError { zone: String, #[source] - err: crate::ExecutionError, + pub err: crate::ExecutionError, } /// Errors returned from [`RunningZone::boot`]. @@ -462,7 +462,7 @@ impl RunningZone { /// Note that the zone must already be configured to be booted. pub async fn boot(zone: InstalledZone) -> Result { // Boot the zone. - info!(zone.log, "Zone booting"); + info!(zone.log, "Booting {} zone", zone.name); Zones::boot(&zone.name).await?; @@ -480,6 +480,9 @@ impl RunningZone { zone: zone.name.to_string(), })?; + // TODO https://github.com/oxidecomputer/omicron/issues/1898: + // Remove all non-self assembling code + // If the zone is self-assembling, then SMF service(s) inside the zone // will be creating the listen address for the zone's service(s), // setting the appropriate ifprop MTU, and so on. The idea behind @@ -575,7 +578,6 @@ impl RunningZone { &self, address: Ipv6Addr, ) -> Result<(), EnsureAddressError> { - info!(self.inner.log, "Adding bootstrap address"); let vnic = self.inner.bootstrap_vnic.as_ref().ok_or_else(|| { EnsureAddressError::MissingBootstrapVnic { address: address.to_string(), @@ -735,7 +737,7 @@ impl RunningZone { gz_bootstrap_addr: Ipv6Addr, zone_vnic_name: &str, ) -> Result<(), RunCommandError> { - self.run_cmd([ + let args = [ "/usr/sbin/route", "add", "-inet6", @@ -743,7 +745,8 @@ impl RunningZone { &gz_bootstrap_addr.to_string(), "-ifp", zone_vnic_name, - ])?; + ]; + self.run_cmd(args)?; Ok(()) } @@ -775,7 +778,7 @@ impl RunningZone { /// Return a reference to the links for this zone. pub fn links(&self) -> &Vec { - &self.inner.links + &self.inner.links() } /// Return a mutable reference to the links for this zone. @@ -1010,6 +1013,11 @@ impl InstalledZone { pub fn root(&self) -> Utf8PathBuf { self.zonepath.path.join(Self::ROOT_FS_PATH) } + + /// Return a reference to the links for this zone. + pub fn links(&self) -> &Vec { + &self.links + } } #[derive(Clone)] diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index 7ba40af043..47cc84dce6 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -640,7 +640,7 @@ impl Zones { // // Does NOT check if the address already exists. #[allow(clippy::needless_lifetimes)] - fn create_address_internal<'a>( + pub fn create_address_internal<'a>( zone: Option<&'a str>, addrobj: &AddrObject, addrtype: AddressRequest, diff --git a/installinator-api/src/lib.rs b/installinator-api/src/lib.rs index cd87643a66..3ff9acffd2 100644 --- a/installinator-api/src/lib.rs +++ b/installinator-api/src/lib.rs @@ -131,6 +131,7 @@ pub fn default_config(bind_address: std::net::SocketAddr) -> ConfigDropshot { // available in omicron. request_body_max_bytes: 4 * 1024 * 1024, default_handler_task_mode: HandlerTaskMode::Detached, + log_headers: vec![], } } @@ -153,6 +154,7 @@ pub fn make_server_starter( // available in omicron. request_body_max_bytes: 4 * 1024 * 1024, default_handler_task_mode: HandlerTaskMode::Detached, + log_headers: vec![], }; let api = crate::installinator_api::api_description::()?; diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index cf5def01c5..fdd5dce428 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -434,6 +434,7 @@ mod test { bind_address: "[::1]:0".parse().unwrap(), request_body_max_bytes: 8 * 1024, default_handler_task_mode: HandlerTaskMode::Detached, + log_headers: vec![], }, ) .await diff --git a/nexus/db-model/src/dataset.rs b/nexus/db-model/src/dataset.rs index 65c0070509..a9dee990b9 100644 --- a/nexus/db-model/src/dataset.rs +++ b/nexus/db-model/src/dataset.rs @@ -36,8 +36,8 @@ pub struct Dataset { pub pool_id: Uuid, - ip: ipv6::Ipv6Addr, - port: SqlU16, + ip: Option, + port: Option, pub kind: DatasetKind, pub size_used: Option, @@ -47,7 +47,7 @@ impl Dataset { pub fn new( id: Uuid, pool_id: Uuid, - addr: SocketAddrV6, + addr: Option, kind: DatasetKind, ) -> Self { let size_used = match kind { @@ -59,19 +59,19 @@ impl Dataset { time_deleted: None, rcgen: Generation::new(), pool_id, - ip: addr.ip().into(), - port: addr.port().into(), + ip: addr.map(|addr| addr.ip().into()), + port: addr.map(|addr| addr.port().into()), kind, size_used, } } - pub fn address(&self) -> SocketAddrV6 { - self.address_with_port(self.port.into()) + pub fn address(&self) -> Option { + self.address_with_port(self.port?.into()) } - pub fn address_with_port(&self, port: u16) -> SocketAddrV6 { - SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0) + pub fn address_with_port(&self, port: u16) -> Option { + Some(SocketAddrV6::new(Ipv6Addr::from(self.ip?), port, 0, 0)) } } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 89ae6c18c5..dc57de9263 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1022,8 +1022,8 @@ table! { pool_id -> Uuid, - ip -> Inet, - port -> Int4, + ip -> Nullable, + port -> Nullable, kind -> crate::DatasetKindEnum, size_used -> Nullable, diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 3e740590c5..cc34a3581c 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(82, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(83, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(83, "dataset-address-optional"), KnownVersion::new(82, "region-port"), KnownVersion::new(81, "add-nullable-filesystem-pool"), KnownVersion::new(80, "add-instance-id-to-migrations"), diff --git a/nexus/db-queries/src/db/datastore/dataset.rs b/nexus/db-queries/src/db/datastore/dataset.rs index 3f1df24e45..a08e346fe8 100644 --- a/nexus/db-queries/src/db/datastore/dataset.rs +++ b/nexus/db-queries/src/db/datastore/dataset.rs @@ -290,7 +290,7 @@ mod test { .dataset_insert_if_not_exists(Dataset::new( Uuid::new_v4(), zpool_id, - "[::1]:0".parse().unwrap(), + Some("[::1]:0".parse().unwrap()), DatasetKind::Crucible, )) .await @@ -323,7 +323,7 @@ mod test { .dataset_insert_if_not_exists(Dataset::new( dataset1.id(), zpool_id, - "[::1]:12345".parse().unwrap(), + Some("[::1]:12345".parse().unwrap()), DatasetKind::Cockroach, )) .await @@ -339,7 +339,7 @@ mod test { .dataset_upsert(Dataset::new( Uuid::new_v4(), zpool_id, - "[::1]:0".parse().unwrap(), + Some("[::1]:0".parse().unwrap()), DatasetKind::Cockroach, )) .await @@ -371,7 +371,7 @@ mod test { .dataset_insert_if_not_exists(Dataset::new( dataset1.id(), zpool_id, - "[::1]:12345".parse().unwrap(), + Some("[::1]:12345".parse().unwrap()), DatasetKind::Cockroach, )) .await diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 461e71d88a..2540790477 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -394,9 +394,9 @@ mod test { BlockSize, ConsoleSession, Dataset, DatasetKind, ExternalIp, PhysicalDisk, PhysicalDiskKind, PhysicalDiskPolicy, PhysicalDiskState, Project, Rack, Region, SiloUser, SledBaseboard, SledSystemHardware, - SledUpdate, SshKey, VpcSubnet, Zpool, + SledUpdate, SshKey, Zpool, }; - use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; + use crate::db::queries::vpc_subnet::InsertVpcSubnetQuery; use chrono::{Duration, Utc}; use futures::stream; use futures::StreamExt; @@ -892,7 +892,8 @@ mod test { .collect() .await; - let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); + let bogus_addr = + Some(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0)); let datasets = stream::iter(zpools) .map(|zpool| { @@ -1266,7 +1267,8 @@ mod test { .collect() .await; - let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); + let bogus_addr = + Some(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0)); // 1 dataset per zpool stream::iter(zpool_ids.clone()) @@ -1365,7 +1367,8 @@ mod test { .collect() .await; - let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); + let bogus_addr = + Some(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0)); // 1 dataset per zpool stream::iter(zpool_ids) @@ -1444,7 +1447,8 @@ mod test { physical_disk_id, ) .await; - let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); + let bogus_addr = + Some(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0)); let dataset = Dataset::new( Uuid::new_v4(), zpool_id, @@ -1599,11 +1603,7 @@ mod test { "172.30.0.0/22".parse().unwrap(), "fd00::/64".parse().unwrap(), ); - let values = FilterConflictingVpcSubnetRangesQuery::new(subnet); - let query = - diesel::insert_into(db::schema::vpc_subnet::dsl::vpc_subnet) - .values(values) - .returning(VpcSubnet::as_returning()); + let query = InsertVpcSubnetQuery::new(subnet); println!("{}", diesel::debug_query(&query)); let explanation = query.explain_async(&conn).await.unwrap(); assert!( diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs index 6832665944..3b1c20c1df 100644 --- a/nexus/db-queries/src/db/datastore/region.rs +++ b/nexus/db-queries/src/db/datastore/region.rs @@ -496,7 +496,13 @@ impl DataStore { let dataset = self.dataset_get(region.dataset_id()).await?; - Ok(Some(SocketAddrV6::new(*dataset.address().ip(), port, 0, 0))) + let Some(address) = dataset.address() else { + return Err(Error::internal_error( + "Dataset for Crucible region does know IP address", + )); + }; + + Ok(Some(SocketAddrV6::new(*address.ip(), port, 0, 0))) } pub async fn regions_missing_ports( diff --git a/nexus/db-queries/src/db/datastore/saga.rs b/nexus/db-queries/src/db/datastore/saga.rs index e632bce881..939929e665 100644 --- a/nexus/db-queries/src/db/datastore/saga.rs +++ b/nexus/db-queries/src/db/datastore/saga.rs @@ -47,6 +47,15 @@ impl DataStore { // owning this saga. diesel::insert_into(dsl::saga_node_event) .values(event.clone()) + // (saga_id, node_id, event_type) is the primary key, and this is + // expected to be idempotent. + // + // Consider the situation where a saga event gets recorded and + // committed, but there's a network reset which makes the client + // (us) believe that the event wasn't recorded. If we retry the + // event, we want to not fail with a conflict. + .on_conflict((dsl::saga_id, dsl::node_id, dsl::event_type)) + .do_nothing() .execute_async(&*self.pool_connection_unauthorized().await?) .await .map_err(|e| { @@ -58,6 +67,28 @@ impl DataStore { Ok(()) } + /// Update the state of a saga in the database. + /// + /// This function is meant to be called in a loop, so that in the event of + /// network flakiness, the operation is retried until successful. + /// + /// ## About conflicts + /// + /// Currently, if the value of `saga_state` in the database is the same as + /// the value we're trying to set it to, the update will be a no-op. That + /// is okay, because at any time only one SEC will update the saga. (For + /// now, we're implementing saga adoption only in cases where the original + /// SEC/Nexus has been expunged.) + /// + /// However, in the future, it may be possible for multiple SECs to try and + /// update the same saga, and overwrite each other's state. For example, + /// one SEC might try and update the state to Running while the other one + /// updates it to Done. That case would have to be carefully considered and + /// tested here, probably using the (currently unused) + /// `current_adopt_generation` field to enable optimistic concurrency. + /// + /// To reiterate, we are *not* considering the case where several SECs try + /// to update the same saga. That will be a future enhancement. pub async fn saga_update_state( &self, saga_id: steno::SagaId, @@ -182,6 +213,7 @@ impl DataStore { mod test { use super::*; use crate::db::datastore::test_utils::datastore_test; + use nexus_db_model::{SagaNodeEvent, SecId}; use nexus_test_utils::db::test_setup_database; use omicron_test_utils::dev; use rand::seq::SliceRandom; @@ -195,20 +227,8 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; let sec_id = db::SecId(uuid::Uuid::new_v4()); - - // Create a couple batches of sagas. - let new_running_db_saga = || { - let params = steno::SagaCreateParams { - id: steno::SagaId(Uuid::new_v4()), - name: steno::SagaName::new("test saga"), - dag: serde_json::value::Value::Null, - state: steno::SagaCachedState::Running, - }; - - db::model::saga_types::Saga::new(sec_id, params) - }; let mut inserted_sagas = (0..SQL_BATCH_SIZE.get() * 2) - .map(|_| new_running_db_saga()) + .map(|_| SagaTestContext::new(sec_id).new_running_db_saga()) .collect::>(); // Shuffle these sagas into a random order to check that the pagination @@ -263,20 +283,9 @@ mod test { let logctx = dev::test_setup_log("test_list_unfinished_nodes"); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - let sec_id = db::SecId(uuid::Uuid::new_v4()); - let saga_id = steno::SagaId(Uuid::new_v4()); + let node_cx = SagaTestContext::new(SecId(Uuid::new_v4())); // Create a couple batches of saga events - let new_db_saga_nodes = - |node_id: u32, event_type: steno::SagaNodeEventType| { - let event = steno::SagaNodeEvent { - saga_id, - node_id: steno::SagaNodeId::from(node_id), - event_type, - }; - - db::model::saga_types::SagaNodeEvent::new(event, sec_id) - }; let mut inserted_nodes = (0..SQL_BATCH_SIZE.get() * 2) .flat_map(|i| { // This isn't an exhaustive list of event types, but gives us a @@ -284,9 +293,9 @@ mod test { // it's important to include a variety here. use steno::SagaNodeEventType::*; [ - new_db_saga_nodes(i, Started), - new_db_saga_nodes(i, UndoStarted), - new_db_saga_nodes(i, UndoFinished), + node_cx.new_db_event(i, Started), + node_cx.new_db_event(i, UndoStarted), + node_cx.new_db_event(i, UndoFinished), ] }) .collect::>(); @@ -311,7 +320,7 @@ mod test { let observed_nodes = datastore .saga_fetch_log_batched( &opctx, - nexus_db_model::saga_types::SagaId::from(saga_id), + nexus_db_model::saga_types::SagaId::from(node_cx.saga_id), ) .await .expect("Failed to list nodes of unfinished saga"); @@ -366,4 +375,138 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + #[tokio::test] + async fn test_create_event_idempotent() { + // Test setup + let logctx = dev::test_setup_log("test_create_event_idempotent"); + let mut db = test_setup_database(&logctx.log).await; + let (_, datastore) = datastore_test(&logctx, &db).await; + let node_cx = SagaTestContext::new(SecId(Uuid::new_v4())); + + // Generate a bunch of events. + let inserted_nodes = (0..2) + .flat_map(|i| { + use steno::SagaNodeEventType::*; + [ + node_cx.new_db_event(i, Started), + node_cx.new_db_event(i, UndoStarted), + node_cx.new_db_event(i, UndoFinished), + ] + }) + .collect::>(); + + // Insert the events into the database. + for node in &inserted_nodes { + datastore + .saga_create_event(node) + .await + .expect("inserting first node events"); + } + + // Insert the events again into the database and ensure that we don't + // get a conflict. + for node in &inserted_nodes { + datastore + .saga_create_event(node) + .await + .expect("inserting duplicate node events"); + } + + // Test cleanup + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_update_state_idempotent() { + // Test setup + let logctx = dev::test_setup_log("test_create_event_idempotent"); + let mut db = test_setup_database(&logctx.log).await; + let (_, datastore) = datastore_test(&logctx, &db).await; + let node_cx = SagaTestContext::new(SecId(Uuid::new_v4())); + + // Create a saga in the running state. + let params = node_cx.new_running_db_saga(); + datastore + .saga_create(¶ms) + .await + .expect("creating saga in Running state"); + + // Attempt to update its state to Running, which is a no-op -- this + // should be idempotent, so expect success. + datastore + .saga_update_state( + node_cx.saga_id, + steno::SagaCachedState::Running, + node_cx.sec_id, + db::model::Generation::new(), + ) + .await + .expect("updating state to Running again"); + + // Update the state to Done. + datastore + .saga_update_state( + node_cx.saga_id, + steno::SagaCachedState::Done, + node_cx.sec_id, + db::model::Generation::new(), + ) + .await + .expect("updating state to Done"); + + // Attempt to update its state to Done again, which is a no-op -- this + // should be idempotent, so expect success. + datastore + .saga_update_state( + node_cx.saga_id, + steno::SagaCachedState::Done, + node_cx.sec_id, + db::model::Generation::new(), + ) + .await + .expect("updating state to Done again"); + + // Test cleanup + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + /// Helpers to create sagas. + struct SagaTestContext { + saga_id: steno::SagaId, + sec_id: SecId, + } + + impl SagaTestContext { + fn new(sec_id: SecId) -> Self { + Self { saga_id: steno::SagaId(Uuid::new_v4()), sec_id } + } + + fn new_running_db_saga(&self) -> db::model::saga_types::Saga { + let params = steno::SagaCreateParams { + id: self.saga_id, + name: steno::SagaName::new("test saga"), + dag: serde_json::value::Value::Null, + state: steno::SagaCachedState::Running, + }; + + db::model::saga_types::Saga::new(self.sec_id, params) + } + + fn new_db_event( + &self, + node_id: u32, + event_type: steno::SagaNodeEventType, + ) -> SagaNodeEvent { + let event = steno::SagaNodeEvent { + saga_id: self.saga_id, + node_id: steno::SagaNodeId::from(node_id), + event_type, + }; + + SagaNodeEvent::new(event, self.sec_id) + } + } } diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index 84f8e211a8..b13006aa95 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -1164,12 +1164,14 @@ impl DataStore { let mut targets: Vec = vec![]; - find_matching_rw_regions_in_volume( - &vcr, - dataset.address().ip(), - &mut targets, - ) - .map_err(|e| Error::internal_error(&e.to_string()))?; + let Some(address) = dataset.address() else { + return Err(Error::internal_error( + "Crucible Dataset missing IP address", + )); + }; + + find_matching_rw_regions_in_volume(&vcr, address.ip(), &mut targets) + .map_err(|e| Error::internal_error(&e.to_string()))?; Ok(targets) } diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index fdb9c82fb5..615ecdac93 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -40,8 +40,8 @@ use crate::db::pagination::paginated; use crate::db::pagination::Paginator; use crate::db::queries::vpc::InsertVpcQuery; use crate::db::queries::vpc::VniSearchIter; -use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; -use crate::db::queries::vpc_subnet::SubnetError; +use crate::db::queries::vpc_subnet::InsertVpcSubnetError; +use crate::db::queries::vpc_subnet::InsertVpcSubnetQuery; use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; @@ -288,7 +288,7 @@ impl DataStore { self.vpc_create_subnet(opctx, &authz_vpc, vpc_subnet.clone()) .await .map(|_| ()) - .map_err(SubnetError::into_external) + .map_err(InsertVpcSubnetError::into_external) .or_else(|e| match e { Error::ObjectAlreadyExists { .. } => Ok(()), _ => Err(e), @@ -809,17 +809,17 @@ impl DataStore { opctx: &OpContext, authz_vpc: &authz::Vpc, subnet: VpcSubnet, - ) -> Result<(authz::VpcSubnet, VpcSubnet), SubnetError> { + ) -> Result<(authz::VpcSubnet, VpcSubnet), InsertVpcSubnetError> { opctx .authorize(authz::Action::CreateChild, authz_vpc) .await - .map_err(SubnetError::External)?; + .map_err(InsertVpcSubnetError::External)?; assert_eq!(authz_vpc.id(), subnet.vpc_id); let db_subnet = self.vpc_create_subnet_raw(subnet).await?; self.vpc_system_router_ensure_subnet_routes(opctx, authz_vpc.id()) .await - .map_err(SubnetError::External)?; + .map_err(InsertVpcSubnetError::External)?; Ok(( authz::VpcSubnet::new( authz_vpc.clone(), @@ -833,20 +833,16 @@ impl DataStore { pub(crate) async fn vpc_create_subnet_raw( &self, subnet: VpcSubnet, - ) -> Result { - use db::schema::vpc_subnet::dsl; - let values = FilterConflictingVpcSubnetRangesQuery::new(subnet.clone()); + ) -> Result { let conn = self .pool_connection_unauthorized() .await - .map_err(SubnetError::External)?; - - diesel::insert_into(dsl::vpc_subnet) - .values(values) - .returning(VpcSubnet::as_returning()) + .map_err(InsertVpcSubnetError::External)?; + let query = InsertVpcSubnetQuery::new(subnet.clone()); + query .get_result_async(&*conn) .await - .map_err(|e| SubnetError::from_diesel(e, &subnet)) + .map_err(|e| InsertVpcSubnetError::from_diesel(e, &subnet)) } pub async fn vpc_delete_subnet( diff --git a/nexus/db-queries/src/db/queries/vpc_subnet.rs b/nexus/db-queries/src/db/queries/vpc_subnet.rs index 72f2771a1e..8cbf4495ca 100644 --- a/nexus/db-queries/src/db/queries/vpc_subnet.rs +++ b/nexus/db-queries/src/db/queries/vpc_subnet.rs @@ -7,407 +7,322 @@ use crate::db; use crate::db::identity::Resource; use crate::db::model::VpcSubnet; -use chrono::{DateTime, Utc}; +use crate::db::schema::vpc_subnet::dsl; +use crate::db::DbConnection; use diesel::pg::Pg; use diesel::prelude::*; use diesel::query_builder::*; use diesel::result::Error as DieselError; use diesel::sql_types; +use ipnetwork::IpNetwork; use omicron_common::api::external; use ref_cast::RefCast; use uuid::Uuid; -/// Errors related to allocating VPC Subnets. -#[derive(Debug, PartialEq)] -pub enum SubnetError { - /// An IPv4 or IPv6 subnet overlaps with an existing VPC Subnet - OverlappingIpRange(ipnetwork::IpNetwork), - /// An other error - External(external::Error), -} - -impl SubnetError { - /// Construct a `SubnetError` from a Diesel error, catching the desired - /// cases and building useful errors. - pub fn from_diesel(e: DieselError, subnet: &VpcSubnet) -> Self { - use crate::db::error; - use diesel::result::DatabaseErrorKind; - const IPV4_OVERLAP_ERROR_MESSAGE: &str = - r#"null value in column "ipv4_block" violates not-null constraint"#; - const IPV6_OVERLAP_ERROR_MESSAGE: &str = - r#"null value in column "ipv6_block" violates not-null constraint"#; - const NAME_CONFLICT_CONSTRAINT: &str = "vpc_subnet_vpc_id_name_key"; - match e { - // Attempt to insert overlapping IPv4 subnet - DieselError::DatabaseError( - DatabaseErrorKind::NotNullViolation, - ref info, - ) if info.message() == IPV4_OVERLAP_ERROR_MESSAGE => { - SubnetError::OverlappingIpRange(ipnetwork::IpNetwork::V4( - subnet.ipv4_block.0.into(), - )) - } - - // Attempt to insert overlapping IPv6 subnet - DieselError::DatabaseError( - DatabaseErrorKind::NotNullViolation, - ref info, - ) if info.message() == IPV6_OVERLAP_ERROR_MESSAGE => { - SubnetError::OverlappingIpRange(ipnetwork::IpNetwork::V6( - subnet.ipv6_block.0.into(), - )) - } - - // Conflicting name for the subnet within a VPC - DieselError::DatabaseError( - DatabaseErrorKind::UniqueViolation, - ref info, - ) if info.constraint_name() == Some(NAME_CONFLICT_CONSTRAINT) => { - SubnetError::External(error::public_error_from_diesel( - e, - error::ErrorHandler::Conflict( - external::ResourceType::VpcSubnet, - subnet.identity().name.as_str(), - ), - )) - } - - // Any other error at all is a bug - _ => SubnetError::External(error::public_error_from_diesel( - e, - error::ErrorHandler::Server, - )), - } - } - - /// Convert into a public error - pub fn into_external(self) -> external::Error { - match self { - SubnetError::OverlappingIpRange(ip) => { - external::Error::invalid_request( - format!("IP address range '{}' conflicts with an existing subnet", ip).as_str() - ) - }, - SubnetError::External(e) => e, - } - } -} - -/// Generate a subquery that selects any overlapping address ranges of the same -/// type as the input IP subnet. +/// Query used to insert VPC Subnets. /// -/// This generates a query that, in full, looks like: +/// This query is used to idempotently insert a VPC Subnet. The query also looks +/// for any other subnets in the same VPC whose IP address blocks overlap. All +/// Subnets are required to have non-overlapping IP blocks. /// -/// ```sql -/// SELECT -/// -/// FROM -/// vpc_subnet -/// WHERE -/// vpc_id = AND -/// time_deleted IS NULL AND -/// inet_contains_or_equals(ipv*_block, ) -/// LIMIT 1 -/// ``` -/// -/// The input may be either an IPv4 or IPv6 subnet, and the corresponding column -/// is compared against. Note that the exact input IP range is returned on -/// purpose. -fn push_select_overlapping_ip_range<'a>( - mut out: AstPass<'_, 'a, Pg>, - vpc_id: &'a Uuid, - ip: &'a ipnetwork::IpNetwork, -) -> diesel::QueryResult<()> { - use crate::db::schema::vpc_subnet::dsl; - out.push_sql("SELECT "); - out.push_bind_param::(ip)?; - out.push_sql(" FROM "); - VPC_SUBNET_FROM_CLAUSE.walk_ast(out.reborrow())?; - out.push_sql(" WHERE "); - out.push_identifier(dsl::vpc_id::NAME)?; - out.push_sql(" = "); - out.push_bind_param::(vpc_id)?; - out.push_sql(" AND "); - out.push_identifier(dsl::time_deleted::NAME)?; - out.push_sql(" IS NULL AND inet_contains_or_equals("); - if ip.is_ipv4() { - out.push_identifier(dsl::ipv4_block::NAME)?; - } else { - out.push_identifier(dsl::ipv6_block::NAME)?; - } - out.push_sql(", "); - out.push_bind_param::(ip)?; - out.push_sql(")"); - Ok(()) -} - -/// Generate a subquery that returns NULL if there is an overlapping IP address -/// range of any type. +/// Note that this query is idempotent. If a record with the provided primary +/// key already exists, that record is returned exactly from the DB, without any +/// other modification or alteration. If callers care, they can inspect the +/// record to make sure it's what they expected, though that's usually a fraught +/// endeavor. /// -/// This specifically generates a query that looks like: +/// Here is the entire query: /// /// ```sql -/// SELECT NULLIF( -/// , -/// push_select_overlapping_ip_range(, ) -/// ) -/// ``` -/// -/// The `NULLIF` function returns NULL if those two expressions are equal, and -/// the first expression otherwise. That is, this returns NULL if there exists -/// an overlapping IP range already in the VPC Subnet table, and the requested -/// IP range if not. -fn push_null_if_overlapping_ip_range<'a>( - mut out: AstPass<'_, 'a, Pg>, - vpc_id: &'a Uuid, - ip: &'a ipnetwork::IpNetwork, -) -> diesel::QueryResult<()> { - out.push_sql("SELECT NULLIF("); - out.push_bind_param::(ip)?; - out.push_sql(", ("); - push_select_overlapping_ip_range(out.reborrow(), vpc_id, ip)?; - out.push_sql("))"); - Ok(()) -} - -/// Generate a CTE that can be used to insert a VPC Subnet, only if the IP -/// address ranges of that subnet don't overlap with existing Subnets in the -/// same VPC. -/// -/// In particular, this generates a CTE like so: -/// -/// ```sql -/// WITH candidate( -/// id, -/// name, -/// description, -/// time_created, -/// time_modified, -/// time_deleted, -/// vpc_id, -/// rcgen -/// ) AS (VALUES ( -/// , -/// , -/// , -/// , -/// , -/// NULL::TIMESTAMPTZ, -/// , -/// 0 -/// )), -/// candidate_ipv4(ipv4_block) AS ( -/// SELECT( -/// NULLIF( -/// , -/// ( -/// SELECT -/// ipv4_block -/// FROM -/// vpc_subnet -/// WHERE -/// vpc_id = AND -/// time_deleted IS NULL AND -/// inet_contains_or_equals(, ipv4_block) -/// LIMIT 1 +/// WITH +/// -- This CTE generates a casting error if any live records, other than _this_ +/// -- record, have overlapping IP blocks of either family. +/// overlap AS MATERIALIZED ( +/// SELECT +/// -- NOTE: This cast always fails, we just use _how_ it fails to +/// -- learn which IP block overlaps. The filter `id != ` below +/// -- means we're explicitly ignoring an existing, identical record. +/// -- So this cast is only run if there is another record in the same +/// -- VPC with an overlapping subnet, which is exactly the error case +/// -- we're trying to cacth. +/// CAST( +/// IF( +/// inet_contains_or_equals(ipv4_block, ), +/// 'ipv4', +/// 'ipv6' /// ) -/// ) -/// ) -/// ), -/// candidate_ipv6(ipv6_block) AS ( -/// +/// AS BOOL +/// ) +/// FROM +/// vpc_subnet +/// WHERE +/// vpc_id = AND +/// time_deleted IS NULL AND +/// id != AND +/// ( +/// inet_contains_or_equals(ipv4_block, ) OR +/// inet_contains_or_equals(ipv6_block, ) +/// ) +/// ) +/// INSERT INTO +/// vpc_subnet +/// VALUES ( +/// /// ) -/// SELECT * -/// FROM candidate, candidate_ipv4, candidate_ipv6 +/// ON CONFLICT (id) +/// -- We use this "no-op" update to allow us to return the actual row from the +/// -- DB, either the existing or inserted one. +/// DO UPDATE SET id = id +/// RETURNING *; /// ``` -pub struct FilterConflictingVpcSubnetRangesQuery { - // TODO: update with random one if the insertion fails. +#[derive(Clone, Debug)] +pub struct InsertVpcSubnetQuery { + /// The subnet to insert subnet: VpcSubnet, - - // The following fields are derived from the previous field. This begs the - // question: "Why bother storing them at all?" - // - // Diesel's [`diesel::query_builder::ast_pass::AstPass:push_bind_param`] method - // requires that the provided value now live as long as the entire AstPass - // type. By storing these values in the struct, they'll live at least as - // long as the entire call to [`QueryFragment::walk_ast`]. - ipv4_block: ipnetwork::IpNetwork, - ipv6_block: ipnetwork::IpNetwork, + /// Owned values of the IP blocks to check, for inserting in internal pieces + /// of the query. + ipv4_block: IpNetwork, + ipv6_block: IpNetwork, } -impl FilterConflictingVpcSubnetRangesQuery { +impl InsertVpcSubnetQuery { + /// Construct a new query to insert the provided subnet. pub fn new(subnet: VpcSubnet) -> Self { - let ipv4_block = - ipnetwork::Ipv4Network::from(subnet.ipv4_block.0).into(); - let ipv6_block = - ipnetwork::Ipv6Network::from(subnet.ipv6_block.0).into(); + let ipv4_block = IpNetwork::V4(subnet.ipv4_block.0.into()); + let ipv6_block = IpNetwork::V6(subnet.ipv6_block.0.into()); Self { subnet, ipv4_block, ipv6_block } } } -impl QueryId for FilterConflictingVpcSubnetRangesQuery { +impl QueryId for InsertVpcSubnetQuery { type QueryId = (); const HAS_STATIC_QUERY_ID: bool = false; } -impl QueryFragment for FilterConflictingVpcSubnetRangesQuery { +impl QueryFragment for InsertVpcSubnetQuery { fn walk_ast<'a>( &'a self, mut out: AstPass<'_, 'a, Pg>, ) -> diesel::QueryResult<()> { - use db::schema::vpc_subnet::dsl; - - // Create the base `candidate` from values provided that need no - // verificiation. - out.push_sql("SELECT * FROM (WITH candidate("); - out.push_identifier(dsl::id::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::name::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::description::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::time_created::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::time_modified::NAME)?; + out.push_sql("WITH overlap AS MATERIALIZED (SELECT CAST(IF(inet_contains_or_equals("); + out.push_identifier(dsl::ipv4_block::NAME)?; out.push_sql(", "); - out.push_identifier(dsl::time_deleted::NAME)?; + out.push_bind_param::(&self.ipv4_block)?; + out.push_sql("), "); + out.push_bind_param::( + InsertVpcSubnetError::OVERLAPPING_IPV4_BLOCK_SENTINEL, + )?; out.push_sql(", "); + out.push_bind_param::( + InsertVpcSubnetError::OVERLAPPING_IPV6_BLOCK_SENTINEL, + )?; + out.push_sql(") AS BOOL) FROM "); + VPC_SUBNET_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); out.push_identifier(dsl::vpc_id::NAME)?; - out.push_sql(","); - out.push_identifier(dsl::rcgen::NAME)?; - out.push_sql(") AS (VALUES ("); + out.push_sql(" = "); + out.push_bind_param::(&self.subnet.vpc_id)?; + out.push_sql(" AND "); + out.push_identifier(dsl::time_deleted::NAME)?; + out.push_sql(" IS NULL AND "); + out.push_identifier(dsl::id::NAME)?; + out.push_sql(" != "); out.push_bind_param::(&self.subnet.identity.id)?; + out.push_sql(" AND (inet_contains_or_equals("); + out.push_identifier(dsl::ipv4_block::NAME)?; out.push_sql(", "); - out.push_bind_param::( - db::model::Name::ref_cast(self.subnet.name()), - )?; + out.push_bind_param::(&self.ipv4_block)?; + out.push_sql(") OR inet_contains_or_equals("); + out.push_identifier(dsl::ipv6_block::NAME)?; out.push_sql(", "); - out.push_bind_param::( + out.push_bind_param::(&self.ipv6_block)?; + + out.push_sql("))) INSERT INTO "); + VPC_SUBNET_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql("VALUES ("); + out.push_bind_param::(&self.subnet.identity.id)?; + out.push_sql(", "); + out.push_bind_param::(db::model::Name::ref_cast( + self.subnet.name(), + ))?; + out.push_sql(", "); + out.push_bind_param::( &self.subnet.identity.description, )?; out.push_sql(", "); - out.push_bind_param::>( + out.push_bind_param::( &self.subnet.identity.time_created, )?; out.push_sql(", "); - out.push_bind_param::>( + out.push_bind_param::( &self.subnet.identity.time_modified, )?; out.push_sql(", "); - out.push_sql("NULL::TIMESTAMPTZ, "); - out.push_bind_param::(&self.subnet.vpc_id)?; - out.push_sql(", 0)), "); - - // Push the candidate IPv4 and IPv6 selection subqueries, which return - // NULL if the corresponding address range overlaps. - out.push_sql("candidate_ipv4("); - out.push_identifier(dsl::ipv4_block::NAME)?; - out.push_sql(") AS ("); - push_null_if_overlapping_ip_range( - out.reborrow(), - &self.subnet.vpc_id, - &self.ipv4_block, + out.push_bind_param::, _>( + &self.subnet.identity.time_deleted, )?; - - out.push_sql("), candidate_ipv6("); - out.push_identifier(dsl::ipv6_block::NAME)?; - out.push_sql(") AS ("); - push_null_if_overlapping_ip_range( - out.reborrow(), - &self.subnet.vpc_id, - &self.ipv6_block, + out.push_sql(", "); + out.push_bind_param::(&self.subnet.vpc_id)?; + out.push_sql(", "); + out.push_bind_param::(&self.subnet.rcgen)?; + out.push_sql(", "); + out.push_bind_param::(&self.ipv4_block)?; + out.push_sql(", "); + out.push_bind_param::(&self.ipv6_block)?; + out.push_sql(", "); + out.push_bind_param::, _>( + &self.subnet.custom_router_id, )?; - out.push_sql(") "); + out.push_sql(") ON CONFLICT ("); + out.push_identifier(dsl::id::NAME)?; + out.push_sql(") DO UPDATE SET "); + out.push_identifier(dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.subnet.identity.id)?; + out.push_sql(" RETURNING *"); - // Select the entire set of candidate columns. - out.push_sql( - "SELECT * FROM candidate, candidate_ipv4, candidate_ipv6)", - ); Ok(()) } } -impl Insertable - for FilterConflictingVpcSubnetRangesQuery -{ - type Values = FilterConflictingVpcSubnetRangesQueryValues; +type FromClause = + diesel::internal::table_macro::StaticQueryFragmentInstance; +type VpcSubnetFromClause = FromClause; +const VPC_SUBNET_FROM_CLAUSE: VpcSubnetFromClause = VpcSubnetFromClause::new(); - fn values(self) -> Self::Values { - FilterConflictingVpcSubnetRangesQueryValues(self) - } +impl RunQueryDsl for InsertVpcSubnetQuery {} +impl Query for InsertVpcSubnetQuery { + type SqlType = <>::SelectExpression as diesel::Expression>::SqlType; } -/// Used to allow inserting the result of the -/// `FilterConflictingVpcSubnetRangesQuery`, as in -/// `diesel::insert_into(foo).values(_). Should not be used directly. -pub struct FilterConflictingVpcSubnetRangesQueryValues( - pub FilterConflictingVpcSubnetRangesQuery, -); - -impl QueryId for FilterConflictingVpcSubnetRangesQueryValues { - type QueryId = (); - const HAS_STATIC_QUERY_ID: bool = false; +/// Errors related to inserting VPC Subnets. +#[derive(Debug, PartialEq)] +pub enum InsertVpcSubnetError { + /// The IPv4 or IPv6 subnet overlaps with an existing VPC Subnet + OverlappingIpRange(oxnet::IpNet), + /// Any other error + External(external::Error), } -impl diesel::insertable::CanInsertInSingleQuery - for FilterConflictingVpcSubnetRangesQueryValues -{ - fn rows_to_insert(&self) -> Option { - Some(1) +impl InsertVpcSubnetError { + const OVERLAPPING_IPV4_BLOCK_SENTINEL: &'static str = "ipv4"; + const OVERLAPPING_IPV4_BLOCK_ERROR_MESSAGE: &'static str = + r#"could not parse "ipv4" as type bool: invalid bool value"#; + const OVERLAPPING_IPV6_BLOCK_SENTINEL: &'static str = "ipv6"; + const OVERLAPPING_IPV6_BLOCK_ERROR_MESSAGE: &'static str = + r#"could not parse "ipv6" as type bool: invalid bool value"#; + const NAME_CONFLICT_CONSTRAINT: &'static str = "vpc_subnet_vpc_id_name_key"; + + /// Construct an `InsertError` from a Diesel error, catching the desired + /// cases and building useful errors. + pub fn from_diesel(e: DieselError, subnet: &VpcSubnet) -> Self { + use crate::db::error; + use diesel::result::DatabaseErrorKind; + match e { + // Attempt to insert an overlapping IPv4 subnet + DieselError::DatabaseError( + DatabaseErrorKind::Unknown, + ref info, + ) if info.message() + == Self::OVERLAPPING_IPV4_BLOCK_ERROR_MESSAGE => + { + InsertVpcSubnetError::OverlappingIpRange( + subnet.ipv4_block.0.into(), + ) + } + + // Attempt to insert an overlapping IPv6 subnet + DieselError::DatabaseError( + DatabaseErrorKind::Unknown, + ref info, + ) if info.message() + == Self::OVERLAPPING_IPV6_BLOCK_ERROR_MESSAGE => + { + InsertVpcSubnetError::OverlappingIpRange( + subnet.ipv6_block.0.into(), + ) + } + + // Conflicting name for the subnet within a VPC + DieselError::DatabaseError( + DatabaseErrorKind::UniqueViolation, + ref info, + ) if info.constraint_name() + == Some(Self::NAME_CONFLICT_CONSTRAINT) => + { + InsertVpcSubnetError::External(error::public_error_from_diesel( + e, + error::ErrorHandler::Conflict( + external::ResourceType::VpcSubnet, + subnet.identity().name.as_str(), + ), + )) + } + + // Any other error at all is a bug + _ => InsertVpcSubnetError::External( + error::public_error_from_diesel(e, error::ErrorHandler::Server), + ), + } } -} -impl QueryFragment for FilterConflictingVpcSubnetRangesQueryValues { - fn walk_ast<'a>( - &'a self, - mut out: AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - use db::schema::vpc_subnet::dsl; - out.push_sql("("); - out.push_identifier(dsl::id::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::name::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::description::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::time_created::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::time_modified::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::time_deleted::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::vpc_id::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::rcgen::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::ipv4_block::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::ipv6_block::NAME)?; - out.push_sql(") "); - self.0.walk_ast(out) + /// Convert into a public error + pub fn into_external(self) -> external::Error { + match self { + InsertVpcSubnetError::OverlappingIpRange(ip) => { + external::Error::invalid_request( + format!( + "IP address range '{}' \ + conflicts with an existing subnet", + ip, + ) + .as_str(), + ) + } + InsertVpcSubnetError::External(e) => e, + } } } -type FromClause = - diesel::internal::table_macro::StaticQueryFragmentInstance; -type VpcSubnetFromClause = FromClause; -const VPC_SUBNET_FROM_CLAUSE: VpcSubnetFromClause = VpcSubnetFromClause::new(); - #[cfg(test)] mod test { - use super::SubnetError; + use super::InsertVpcSubnetError; + use super::InsertVpcSubnetQuery; + use crate::db::explain::ExplainableAsync as _; use crate::db::model::VpcSubnet; - use ipnetwork::IpNetwork; use nexus_test_utils::db::test_setup_database; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::Name; use omicron_test_utils::dev; use std::convert::TryInto; use std::sync::Arc; - use uuid::Uuid; #[tokio::test] - async fn test_filter_conflicting_vpc_subnet_ranges_query() { + async fn explain_insert_query() { + let ipv4_block = "172.30.0.0/24".parse().unwrap(); + let ipv6_block = "fd12:3456:7890::/64".parse().unwrap(); + let name = "a-name".to_string().try_into().unwrap(); + let description = "some description".to_string(); + let identity = IdentityMetadataCreateParams { name, description }; + let vpc_id = "d402369d-c9ec-c5ad-9138-9fbee732d53e".parse().unwrap(); + let subnet_id = "093ad2db-769b-e3c2-bc1c-b46e84ce5532".parse().unwrap(); + let row = + VpcSubnet::new(subnet_id, vpc_id, identity, ipv4_block, ipv6_block); + let query = InsertVpcSubnetQuery::new(row); + let logctx = dev::test_setup_log("explain_insert_query"); + let log = logctx.log.new(o!()); + let mut db = test_setup_database(&log).await; + let cfg = crate::db::Config { url: db.pg_config().clone() }; + let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); + let conn = pool.pool().get().await.unwrap(); + let explain = query.explain_async(&conn).await.unwrap(); + println!("{explain}"); + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_insert_vpc_subnet_query() { let make_id = |name: &Name, description: &str| IdentityMetadataCreateParams { name: name.clone(), @@ -427,12 +342,13 @@ mod test { let subnet_id = "093ad2db-769b-e3c2-bc1c-b46e84ce5532".parse().unwrap(); let other_subnet_id = "695debcc-e197-447d-ffb2-976150a7b7cf".parse().unwrap(); + let other_other_subnet_id = + "ddbdc2b7-d22f-40d9-98df-fef5da151e0d".parse().unwrap(); let row = VpcSubnet::new(subnet_id, vpc_id, identity, ipv4_block, ipv6_block); // Setup the test database - let logctx = - dev::test_setup_log("test_filter_conflicting_vpc_subnet_ranges"); + let logctx = dev::test_setup_log("test_insert_vpc_subnet_query"); let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; @@ -445,7 +361,10 @@ mod test { // We should be able to insert anything into an empty table. assert!( - matches!(db_datastore.vpc_create_subnet_raw(row).await, Ok(_)), + matches!( + db_datastore.vpc_create_subnet_raw(row.clone()).await, + Ok(_) + ), "Should be able to insert VPC subnet into empty table" ); @@ -460,10 +379,13 @@ mod test { ); assert!( matches!( - db_datastore.vpc_create_subnet_raw(new_row).await, - Err(SubnetError::OverlappingIpRange(IpNetwork::V4(_))) + db_datastore.vpc_create_subnet_raw(new_row.clone()).await, + Err(InsertVpcSubnetError::OverlappingIpRange { .. }), ), - "Should not be able to insert new VPC subnet with the same IPv4 and IPv6 ranges" + "Should not be able to insert new VPC subnet with the \ + same IPv4 and IPv6 ranges,\n\ + first row: {row:?}\n\ + new row: {new_row:?}", ); // We should be able to insert data with the same ranges, if we change @@ -483,7 +405,7 @@ mod test { // We shouldn't be able to insert a subnet if we change only the // IPv4 or IPv6 block. They must _both_ be non-overlapping. let new_row = VpcSubnet::new( - other_subnet_id, + other_other_subnet_id, vpc_id, make_id(&other_name, &description), other_ipv4_block, @@ -495,11 +417,11 @@ mod test { .expect_err("Should not be able to insert VPC Subnet with overlapping IPv6 range"); assert_eq!( err, - SubnetError::OverlappingIpRange(ipnetwork::IpNetwork::from(oxnet::IpNet::from(ipv6_block))), - "SubnetError variant should include the exact IP range that overlaps" + InsertVpcSubnetError::OverlappingIpRange(ipv6_block.into()), + "InsertError variant should indicate an IP block overlaps" ); let new_row = VpcSubnet::new( - other_subnet_id, + other_other_subnet_id, vpc_id, make_id(&other_name, &description), ipv4_block, @@ -511,14 +433,14 @@ mod test { .expect_err("Should not be able to insert VPC Subnet with overlapping IPv4 range"); assert_eq!( err, - SubnetError::OverlappingIpRange(ipnetwork::IpNetwork::from(oxnet::IpNet::from(ipv4_block))), - "SubnetError variant should include the exact IP range that overlaps" + InsertVpcSubnetError::OverlappingIpRange(ipv4_block.into()), + "InsertError variant should indicate an IP block overlaps" ); // We should get an _external error_ if the IP address ranges are OK, // but the name conflicts. let new_row = VpcSubnet::new( - other_subnet_id, + other_other_subnet_id, vpc_id, make_id(&name, &description), other_ipv4_block, @@ -527,7 +449,7 @@ mod test { assert!( matches!( db_datastore.vpc_create_subnet_raw(new_row).await, - Err(SubnetError::External(_)) + Err(InsertVpcSubnetError::External(_)) ), "Should get an error inserting a VPC Subnet with unique IP ranges, but the same name" ); @@ -535,7 +457,7 @@ mod test { // We should be able to insert the row if _both ranges_ are different, // and the name is unique as well. let new_row = VpcSubnet::new( - Uuid::new_v4(), + other_other_subnet_id, vpc_id, make_id(&other_name, &description), other_ipv4_block, @@ -549,4 +471,104 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + // Helper to verify equality of rows, handling timestamp precision. + fn assert_rows_eq(left: &VpcSubnet, right: &VpcSubnet) { + assert_eq!( + left.identity.id, right.identity.id, + "VPC Subnet rows should be equal" + ); + assert_eq!( + left.identity.name, right.identity.name, + "VPC Subnet rows should be equal" + ); + assert_eq!( + left.identity.description, right.identity.description, + "VPC Subnet rows should be equal" + ); + // Timestamps in CRDB have microsecond precision, so ensure we're + // within 1000 nanos. + assert!( + (left.identity.time_modified - right.identity.time_modified) + .num_nanoseconds() + .unwrap() + < 1_000, + "VPC Subnet rows should be equal", + ); + assert!( + (left.identity.time_created - right.identity.time_created) + .num_nanoseconds() + .unwrap() + < 1_000, + "VPC Subnet rows should be equal", + ); + assert_eq!( + left.identity.time_deleted, right.identity.time_deleted, + "VPC Subnet rows should be equal", + ); + assert_eq!( + left.vpc_id, right.vpc_id, + "VPC Subnet rows should be equal" + ); + assert_eq!(left.rcgen, right.rcgen, "VPC Subnet rows should be equal"); + assert_eq!( + left.ipv4_block, right.ipv4_block, + "VPC Subnet rows should be equal" + ); + assert_eq!( + left.ipv6_block, right.ipv6_block, + "VPC Subnet rows should be equal" + ); + assert_eq!( + left.custom_router_id, right.custom_router_id, + "VPC Subnet rows should be equal" + ); + } + + // Regression test for https://github.com/oxidecomputer/omicron/issues/6069. + #[tokio::test] + async fn test_insert_vpc_subnet_query_is_idempotent() { + let ipv4_block = "172.30.0.0/24".parse().unwrap(); + let ipv6_block = "fd12:3456:7890::/64".parse().unwrap(); + let name = "a-name".to_string().try_into().unwrap(); + let description = "some description".to_string(); + let identity = IdentityMetadataCreateParams { name, description }; + let vpc_id = "d402369d-c9ec-c5ad-9138-9fbee732d53e".parse().unwrap(); + let subnet_id = "093ad2db-769b-e3c2-bc1c-b46e84ce5532".parse().unwrap(); + let row = + VpcSubnet::new(subnet_id, vpc_id, identity, ipv4_block, ipv6_block); + + // Setup the test database + let logctx = + dev::test_setup_log("test_insert_vpc_subnet_query_is_idempotent"); + let log = logctx.log.new(o!()); + let mut db = test_setup_database(&log).await; + let cfg = crate::db::Config { url: db.pg_config().clone() }; + let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); + let db_datastore = Arc::new( + crate::db::DataStore::new(&log, Arc::clone(&pool), None) + .await + .unwrap(), + ); + + // We should be able to insert anything into an empty table. + let inserted = db_datastore + .vpc_create_subnet_raw(row.clone()) + .await + .expect("Should be able to insert VPC subnet into empty table"); + assert_rows_eq(&inserted, &row); + + // We should be able to insert the exact same row again. The IP ranges + // overlap, but the ID is also identical, which should not be an error. + // This is important for saga idempotency. + let inserted = db_datastore + .vpc_create_subnet_raw(row.clone()) + .await + .expect( + "Must be able to insert the exact same VPC subnet more than once", + ); + assert_rows_eq(&inserted, &row); + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/db-queries/src/db/sec_store.rs b/nexus/db-queries/src/db/sec_store.rs index 72de02ff54..0dcc3aa717 100644 --- a/nexus/db-queries/src/db/sec_store.rs +++ b/nexus/db-queries/src/db/sec_store.rs @@ -8,7 +8,8 @@ use crate::db::{self, model::Generation}; use anyhow::Context; use async_trait::async_trait; use dropshot::HttpError; -use futures::TryFutureExt; +use futures::{Future, TryFutureExt}; +use omicron_common::api::external; use omicron_common::backoff; use slog::Logger; use std::fmt; @@ -66,78 +67,114 @@ impl steno::SecStore for CockroachDbSecStore { debug!(&log, "recording saga event"); let our_event = db::saga_types::SagaNodeEvent::new(event, self.sec_id); - backoff::retry_notify_ext( - // This is an internal service query to CockroachDB. - backoff::retry_policy_internal_service(), + // Add retries for this operation. saga_create_event is internally + // idempotent, so we can retry indefinitely until the event has been + // durably recorded. + backoff_saga_operation( + &log, || { - // In general, there are some kinds of database errors that are - // temporary/server errors (e.g. network failures), and some - // that are permanent/client errors (e.g. conflict during - // insertion). The permanent ones would require operator - // intervention to fix. - // - // However, there is no way to bubble up errors here, and for - // good reason: it is inherent to the nature of sagas that - // progress is durably recorded. So within *this* code there is - // no option but to retry forever. (Below, however, we do mark - // errors that likely require operator intervention.) - // - // At a higher level, callers should plan for the fact that - // record_event (and, so, saga execution) could potentially loop - // indefinitely while the datastore (or other dependent - // services) are down. self.datastore .saga_create_event(&our_event) .map_err(backoff::BackoffError::transient) }, - move |error, call_count, total_duration| { - let http_error = HttpError::from(error.clone()); - if http_error.status_code.is_client_error() { - error!( - &log, - "client error while recording saga event (likely \ - requires operator intervention), retrying anyway"; - "error" => &error, - "call_count" => call_count, - "total_duration" => ?total_duration, - ); - } else if total_duration > Duration::from_secs(20) { - warn!( - &log, - "server error while recording saga event, retrying"; - "error" => &error, - "call_count" => call_count, - "total_duration" => ?total_duration, - ); - } else { - info!( - &log, - "server error while recording saga event, retrying"; - "error" => &error, - "call_count" => call_count, - "total_duration" => ?total_duration, - ); - } - }, + "recording saga event", ) .await - .expect("the above backoff retries forever") } async fn saga_update(&self, id: SagaId, update: steno::SagaCachedState) { // TODO-robustness We should track the current generation of the saga // and use it. We'll know this either from when it was created or when // it was recovered. - info!(&self.log, "updating state"; + + let log = self.log.new(o!( "saga_id" => id.to_string(), - "new_state" => update.to_string() - ); + "new_state" => update.to_string(), + )); - // TODO-robustness This should be wrapped with a retry loop rather than - // unwrapping the result. See omicron#2416. - self.datastore - .saga_update_state(id, update, self.sec_id, Generation::new()) - .await - .unwrap(); + info!(&log, "updating state"); + + // Add retries for this operation. saga_update_state is internally + // idempotent, so we can retry indefinitely until the event has been + // durably recorded. (But see the note in saga_update_state about how + // idempotence is enough for now, but may not be in the future.) + backoff_saga_operation( + &log, + || { + self.datastore + .saga_update_state( + id, + update, + self.sec_id, + Generation::new(), + ) + .map_err(backoff::BackoffError::transient) + }, + "updating saga state", + ) + .await } } + +/// Implements backoff retry logic for saga operations. +/// +/// In general, there are some kinds of database errors that are +/// temporary/server errors (e.g. network failures), and some that are +/// permanent/client errors (e.g. conflict during insertion). The permanent +/// ones would require operator intervention to fix. +/// +/// However, there is no way to bubble up errors from the SEC store, and for +/// good reason: it is inherent to the nature of sagas that progress is durably +/// recorded. So inside this code there is no option but to retry forever. +/// (Below, however, we do mark errors that likely require operator +/// intervention.) +/// +/// At a higher level, callers should plan for the fact saga execution could +/// potentially loop indefinitely while the datastore (or other dependent +/// services) are down. +async fn backoff_saga_operation(log: &Logger, op: F, description: &str) +where + F: Fn() -> Fut, + Fut: Future>>, +{ + backoff::retry_notify_ext( + // This is an internal service query to CockroachDB. + backoff::retry_policy_internal_service(), + op, + move |error, call_count, total_duration| { + let http_error = HttpError::from(error.clone()); + if http_error.status_code.is_client_error() { + error!( + &log, + "client error while {description} (likely \ + requires operator intervention), retrying anyway"; + "error" => &error, + "call_count" => call_count, + "total_duration" => ?total_duration, + ); + } else if total_duration > WARN_DURATION { + warn!( + &log, + "server error while {description}, retrying"; + "error" => &error, + "call_count" => call_count, + "total_duration" => ?total_duration, + ); + } else { + info!( + &log, + "server error while {description}, retrying"; + "error" => &error, + "call_count" => call_count, + "total_duration" => ?total_duration, + ); + } + }, + ) + .await + .expect("the above backoff retries forever") +} + +/// Threshold at which logs about server errors during retries switch from INFO +/// to WARN. +const WARN_DURATION: Duration = Duration::from_secs(20); diff --git a/nexus/reconfigurator/execution/src/datasets.rs b/nexus/reconfigurator/execution/src/datasets.rs index 51ac45c9df..139c94c53f 100644 --- a/nexus/reconfigurator/execution/src/datasets.rs +++ b/nexus/reconfigurator/execution/src/datasets.rs @@ -66,7 +66,7 @@ pub(crate) async fn ensure_dataset_records_exist( let dataset = Dataset::new( id.into_untyped_uuid(), pool_id.into_untyped_uuid(), - address, + Some(address), kind.into(), ); let maybe_inserted = datastore diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 3e79c42978..4a5d792c80 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -928,6 +928,7 @@ pub mod test { bind_address: "[::1]:0".parse().unwrap(), request_body_max_bytes: 8 * 1024, default_handler_task_mode: HandlerTaskMode::Detached, + log_headers: vec![], }, ) .await diff --git a/nexus/src/app/background/tasks/lookup_region_port.rs b/nexus/src/app/background/tasks/lookup_region_port.rs index b0f13ac986..fbfc5c5af2 100644 --- a/nexus/src/app/background/tasks/lookup_region_port.rs +++ b/nexus/src/app/background/tasks/lookup_region_port.rs @@ -91,26 +91,33 @@ impl BackgroundTask for LookupRegionPort { } }; - let returned_region = match get_region_from_agent( - &dataset.address(), - region.id(), - ) - .await - { - Ok(returned_region) => returned_region, + let Some(dataset_addr) = dataset.address() else { + let s = format!( + "Missing dataset address for dataset: {dataset_id}" + ); + error!(log, "{s}"); + status.errors.push(s); + continue; + }; - Err(e) => { - let s = format!( - "could not get region {} from agent: {e}", - region.id(), - ); + let returned_region = + match get_region_from_agent(&dataset_addr, region.id()) + .await + { + Ok(returned_region) => returned_region, - error!(log, "{s}"); - status.errors.push(s); + Err(e) => { + let s = format!( + "could not get region {} from agent: {e}", + region.id(), + ); - continue; - } - }; + error!(log, "{s}"); + status.errors.push(s); + + continue; + } + }; match self .datastore diff --git a/nexus/src/app/crucible.rs b/nexus/src/app/crucible.rs index caa65255e5..72a5c80baf 100644 --- a/nexus/src/app/crucible.rs +++ b/nexus/src/app/crucible.rs @@ -69,11 +69,17 @@ impl super::Nexus { fn crucible_agent_client_for_dataset( &self, dataset: &db::model::Dataset, - ) -> CrucibleAgentClient { - CrucibleAgentClient::new_with_client( - &format!("http://{}", dataset.address()), + ) -> Result { + let Some(addr) = dataset.address() else { + return Err(Error::internal_error( + "Missing crucible dataset address", + )); + }; + + Ok(CrucibleAgentClient::new_with_client( + &format!("http://{}", addr), self.reqwest_client.clone(), - ) + )) } /// Return if the Crucible agent is expected to be there and answer Nexus: @@ -147,7 +153,7 @@ impl super::Nexus { dataset: &db::model::Dataset, region: &db::model::Region, ) -> Result { - let client = self.crucible_agent_client_for_dataset(dataset); + let client = self.crucible_agent_client_for_dataset(dataset)?; let dataset_id = dataset.id(); let Ok(extent_count) = u32::try_from(region.extent_count()) else { @@ -261,7 +267,7 @@ impl super::Nexus { dataset: &db::model::Dataset, region_id: Uuid, ) -> Result, Error> { - let client = self.crucible_agent_client_for_dataset(dataset); + let client = self.crucible_agent_client_for_dataset(dataset)?; let dataset_id = dataset.id(); let result = ProgenitorOperationRetry::new( @@ -303,7 +309,7 @@ impl super::Nexus { dataset: &db::model::Dataset, region_id: Uuid, ) -> Result { - let client = self.crucible_agent_client_for_dataset(dataset); + let client = self.crucible_agent_client_for_dataset(dataset)?; let dataset_id = dataset.id(); let result = ProgenitorOperationRetry::new( @@ -343,7 +349,7 @@ impl super::Nexus { dataset: &db::model::Dataset, region_id: Uuid, ) -> Result<(), Error> { - let client = self.crucible_agent_client_for_dataset(dataset); + let client = self.crucible_agent_client_for_dataset(dataset)?; let dataset_id = dataset.id(); let result = ProgenitorOperationRetry::new( @@ -386,7 +392,7 @@ impl super::Nexus { region_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { - let client = self.crucible_agent_client_for_dataset(dataset); + let client = self.crucible_agent_client_for_dataset(dataset)?; let dataset_id = dataset.id(); let result = ProgenitorOperationRetry::new( @@ -435,7 +441,7 @@ impl super::Nexus { region_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { - let client = self.crucible_agent_client_for_dataset(dataset); + let client = self.crucible_agent_client_for_dataset(dataset)?; let dataset_id = dataset.id(); let result = ProgenitorOperationRetry::new( diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 422540c0b8..13b30fd47a 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -145,7 +145,7 @@ impl super::Nexus { db::model::Dataset::new( dataset.dataset_id, dataset.zpool_id, - dataset.request.address, + Some(dataset.request.address), dataset.request.kind.into(), ) }) diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index bdccd7f79b..c350534617 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -498,9 +498,17 @@ async fn sdc_regions_ensure( .map(|(dataset, region)| { dataset .address_with_port(region.port_number) - .to_string() + .ok_or_else(|| { + ActionError::action_failed( + Error::internal_error(&format!( + "missing IP address for dataset {}", + dataset.id(), + )), + ) + }) + .map(|addr| addr.to_string()) }) - .collect(), + .collect::, ActionError>>()?, lossy: false, flush_timeout: None, diff --git a/nexus/src/app/sagas/region_replacement_start.rs b/nexus/src/app/sagas/region_replacement_start.rs index a4ba10775a..1297158b24 100644 --- a/nexus/src/app/sagas/region_replacement_start.rs +++ b/nexus/src/app/sagas/region_replacement_start.rs @@ -534,12 +534,13 @@ async fn srrs_replace_region_in_volume( "ensured_dataset_and_region", )?; - let new_region_address = SocketAddrV6::new( - *new_dataset.address().ip(), - ensured_region.port_number, - 0, - 0, - ); + let Some(new_address) = new_dataset.address() else { + return Err(ActionError::action_failed(Error::internal_error( + "Dataset missing IP address", + ))); + }; + let new_region_address = + SocketAddrV6::new(*new_address.ip(), ensured_region.port_number, 0, 0); // If this node is rerun, the forward action will have overwritten // db_region's volume id, so get the cached copy. @@ -611,12 +612,11 @@ async fn srrs_replace_region_in_volume_undo( "ensured_dataset_and_region", )?; - let new_region_address = SocketAddrV6::new( - *new_dataset.address().ip(), - ensured_region.port_number, - 0, - 0, - ); + let Some(new_address) = new_dataset.address() else { + anyhow::bail!("Dataset missing IP address"); + }; + let new_region_address = + SocketAddrV6::new(*new_address.ip(), ensured_region.port_number, 0, 0); // The forward action will have overwritten db_region's volume id, so get // the cached copy. @@ -894,25 +894,25 @@ pub(crate) mod test { Dataset::new( Uuid::new_v4(), Uuid::new_v4(), - "[fd00:1122:3344:101::1]:12345".parse().unwrap(), + Some("[fd00:1122:3344:101::1]:12345".parse().unwrap()), DatasetKind::Crucible, ), Dataset::new( Uuid::new_v4(), Uuid::new_v4(), - "[fd00:1122:3344:102::1]:12345".parse().unwrap(), + Some("[fd00:1122:3344:102::1]:12345".parse().unwrap()), DatasetKind::Crucible, ), Dataset::new( Uuid::new_v4(), Uuid::new_v4(), - "[fd00:1122:3344:103::1]:12345".parse().unwrap(), + Some("[fd00:1122:3344:103::1]:12345".parse().unwrap()), DatasetKind::Crucible, ), Dataset::new( Uuid::new_v4(), Uuid::new_v4(), - "[fd00:1122:3344:104::1]:12345".parse().unwrap(), + Some("[fd00:1122:3344:104::1]:12345".parse().unwrap()), DatasetKind::Crucible, ), ]; diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 9e665a1de1..5a8313229a 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -411,9 +411,17 @@ async fn ssc_regions_ensure( .map(|(dataset, region)| { dataset .address_with_port(region.port_number) - .to_string() + .ok_or_else(|| { + ActionError::action_failed( + Error::internal_error(&format!( + "missing IP address for dataset {}", + dataset.id(), + )), + ) + }) + .map(|addr| addr.to_string()) }) - .collect(), + .collect::, ActionError>>()?, lossy: false, flush_timeout: None, @@ -1232,8 +1240,14 @@ async fn ssc_start_running_snapshot( let mut map: BTreeMap = BTreeMap::new(); for (dataset, region) in datasets_and_regions { + let Some(dataset_addr) = dataset.address() else { + return Err(ActionError::action_failed(Error::internal_error( + &format!("Missing IP address for dataset {}", dataset.id(),), + ))); + }; + // Create a Crucible agent client - let url = format!("http://{}", dataset.address()); + let url = format!("http://{}", dataset_addr); let client = CrucibleAgentClient::new(&url); info!( @@ -1299,11 +1313,21 @@ async fn ssc_start_running_snapshot( // Map from the region to the snapshot let region_addr = format!( "{}", - dataset.address_with_port(crucible_region.port_number) + SocketAddrV6::new( + *dataset_addr.ip(), + crucible_region.port_number, + 0, + 0 + ) ); let snapshot_addr = format!( "{}", - dataset.address_with_port(crucible_running_snapshot.port_number) + SocketAddrV6::new( + *dataset_addr.ip(), + crucible_running_snapshot.port_number, + 0, + 0 + ) ); info!(log, "map {} to {}", region_addr, snapshot_addr); map.insert(region_addr, snapshot_addr.clone()); diff --git a/nexus/src/app/sagas/vpc_create.rs b/nexus/src/app/sagas/vpc_create.rs index a34b25ceb7..832ca64ace 100644 --- a/nexus/src/app/sagas/vpc_create.rs +++ b/nexus/src/app/sagas/vpc_create.rs @@ -8,7 +8,7 @@ use super::NexusSaga; use super::ACTION_GENERATE_ID; use crate::app::sagas::declare_saga_actions; use crate::external_api::params; -use nexus_db_queries::db::queries::vpc_subnet::SubnetError; +use nexus_db_queries::db::queries::vpc_subnet::InsertVpcSubnetError; use nexus_db_queries::{authn, authz, db}; use nexus_defaults as defaults; use omicron_common::api::external; @@ -368,7 +368,7 @@ async fn svc_create_subnet( .vpc_create_subnet(&opctx, &authz_vpc, subnet) .await .map_err(|err| match err { - SubnetError::OverlappingIpRange(ip) => { + InsertVpcSubnetError::OverlappingIpRange(ip) => { let ipv4_block = &defaults::DEFAULT_VPC_SUBNET_IPV4_BLOCK; let log = sagactx.user_data().log(); error!( @@ -388,7 +388,7 @@ async fn svc_create_subnet( found overlapping IP address ranges", ) } - SubnetError::External(e) => e, + InsertVpcSubnetError::External(e) => e, }) .map_err(ActionError::action_failed) } diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 0165b2d261..6e21470368 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -306,7 +306,8 @@ impl super::Nexus { "dataset_id" => id.to_string(), "address" => address.to_string() ); - let dataset = db::model::Dataset::new(id, zpool_id, address, kind); + let dataset = + db::model::Dataset::new(id, zpool_id, Some(address), kind); self.db_datastore.dataset_upsert(dataset).await?; Ok(()) } diff --git a/nexus/src/app/vpc_subnet.rs b/nexus/src/app/vpc_subnet.rs index ce0cd423f4..39b9844799 100644 --- a/nexus/src/app/vpc_subnet.rs +++ b/nexus/src/app/vpc_subnet.rs @@ -13,7 +13,7 @@ use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::VpcSubnet; -use nexus_db_queries::db::queries::vpc_subnet::SubnetError; +use nexus_db_queries::db::queries::vpc_subnet::InsertVpcSubnetError; use omicron_common::api::external; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; @@ -24,6 +24,7 @@ use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; use omicron_common::api::external::UpdateResult; +use oxnet::IpNet; use uuid::Uuid; impl super::Nexus { @@ -141,9 +142,9 @@ impl super::Nexus { // Note that we only catch IPv6 overlaps. The client // always specifies the IPv4 range, so we fail the // request if that overlaps with an existing range. - Err(SubnetError::OverlappingIpRange(ip)) - if retry <= NUM_RETRIES && ip.is_ipv6() => - { + Err(InsertVpcSubnetError::OverlappingIpRange( + IpNet::V6(_), + )) if retry <= NUM_RETRIES => { debug!( self.log, "autogenerated random IPv6 range overlap"; @@ -157,9 +158,9 @@ impl super::Nexus { } }; match result { - Err(SubnetError::OverlappingIpRange(ip)) - if ip.is_ipv6() => - { + Err(InsertVpcSubnetError::OverlappingIpRange( + IpNet::V6(_), + )) => { // TODO-monitoring TODO-debugging // // We should maintain a counter for this occurrence, and @@ -181,11 +182,11 @@ impl super::Nexus { for VPC Subnet", )) } - Err(SubnetError::OverlappingIpRange(_)) => { + Err(InsertVpcSubnetError::OverlappingIpRange(_)) => { // Overlapping IPv4 ranges, which is always a client error. Err(result.unwrap_err().into_external()) } - Err(SubnetError::External(e)) => Err(e), + Err(InsertVpcSubnetError::External(e)) => Err(e), Ok((.., subnet)) => Ok(subnet), } } @@ -210,7 +211,7 @@ impl super::Nexus { .vpc_create_subnet(opctx, &authz_vpc, subnet) .await .map(|(.., subnet)| subnet) - .map_err(SubnetError::into_external) + .map_err(InsertVpcSubnetError::into_external) } }?; diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 18efe40e27..960ded50d5 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -1575,6 +1575,7 @@ pub async fn start_dns_server( bind_address: "[::1]:0".parse().unwrap(), request_body_max_bytes: 8 * 1024, default_handler_task_mode: HandlerTaskMode::Detached, + log_headers: vec![], }, ) .await diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 879e8cdc3f..f268662ca9 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -1,8 +1,8 @@ { "openapi": "3.0.3", "info": { - "title": "Oxide Bootstrap Agent API", - "description": "API for interacting with individual sleds", + "title": "Bootstrap Agent API", + "description": "Per-sled API for setup and teardown", "contact": { "url": "https://oxide.computer", "email": "api@oxide.computer" @@ -36,7 +36,7 @@ }, "/components": { "get": { - "summary": "Provides a list of components known to the bootstrap agent.", + "summary": "Provide a list of components known to the bootstrap agent.", "description": "This API is intended to allow early boot services (such as Wicket) to query the underlying component versions installed on a sled.", "operationId": "components_get", "responses": { @@ -87,7 +87,7 @@ } }, "post": { - "summary": "Initializes the rack with the provided configuration.", + "summary": "Initialize the rack with the provided configuration.", "operationId": "rack_initialize", "requestBody": { "content": { @@ -105,7 +105,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RackInitId" + "$ref": "#/components/schemas/TypedUuidForRackInitKind" } } } @@ -119,7 +119,7 @@ } }, "delete": { - "summary": "Resets the rack to an unconfigured state.", + "summary": "Reset the rack to an unconfigured state.", "operationId": "rack_reset", "responses": { "200": { @@ -127,7 +127,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RackResetId" + "$ref": "#/components/schemas/TypedUuidForRackResetKind" } } } @@ -143,7 +143,7 @@ }, "/sled-initialize": { "delete": { - "summary": "Resets this particular sled to an unconfigured state.", + "summary": "Reset this particular sled to an unconfigured state.", "operationId": "sled_reset", "responses": { "204": { @@ -838,10 +838,6 @@ "speed400_g" ] }, - "RackInitId": { - "type": "string", - "format": "uuid" - }, "RackInitializeRequest": { "description": "Configuration for the \"rack setup service\".\n\nThe Rack Setup Service should be responsible for one-time setup actions, such as CockroachDB placement and initialization. Without operator intervention, however, these actions need a way to be automated in our deployment.", "type": "object", @@ -998,7 +994,7 @@ "type": "object", "properties": { "id": { - "$ref": "#/components/schemas/RackInitId" + "$ref": "#/components/schemas/TypedUuidForRackInitKind" }, "status": { "type": "string", @@ -1020,7 +1016,7 @@ "nullable": true, "allOf": [ { - "$ref": "#/components/schemas/RackInitId" + "$ref": "#/components/schemas/TypedUuidForRackInitKind" } ] }, @@ -1039,7 +1035,7 @@ "type": "object", "properties": { "id": { - "$ref": "#/components/schemas/RackInitId" + "$ref": "#/components/schemas/TypedUuidForRackInitKind" }, "message": { "type": "string" @@ -1061,7 +1057,7 @@ "type": "object", "properties": { "id": { - "$ref": "#/components/schemas/RackInitId" + "$ref": "#/components/schemas/TypedUuidForRackInitKind" }, "status": { "type": "string", @@ -1079,7 +1075,7 @@ "type": "object", "properties": { "id": { - "$ref": "#/components/schemas/RackResetId" + "$ref": "#/components/schemas/TypedUuidForRackResetKind" }, "status": { "type": "string", @@ -1101,7 +1097,7 @@ "nullable": true, "allOf": [ { - "$ref": "#/components/schemas/RackResetId" + "$ref": "#/components/schemas/TypedUuidForRackResetKind" } ] }, @@ -1120,7 +1116,7 @@ "type": "object", "properties": { "id": { - "$ref": "#/components/schemas/RackResetId" + "$ref": "#/components/schemas/TypedUuidForRackResetKind" }, "message": { "type": "string" @@ -1142,7 +1138,7 @@ "type": "object", "properties": { "id": { - "$ref": "#/components/schemas/RackResetId" + "$ref": "#/components/schemas/TypedUuidForRackResetKind" }, "status": { "type": "string", @@ -1158,10 +1154,6 @@ } ] }, - "RackResetId": { - "type": "string", - "format": "uuid" - }, "RecoverySiloConfig": { "description": "RecoverySiloConfig\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"silo_name\", \"user_name\", \"user_password_hash\" ], \"properties\": { \"silo_name\": { \"$ref\": \"#/components/schemas/Name\" }, \"user_name\": { \"$ref\": \"#/components/schemas/UserId\" }, \"user_password_hash\": { \"$ref\": \"#/components/schemas/NewPasswordHash\" } } } ```
", "type": "object", @@ -1235,6 +1227,14 @@ } ] }, + "TypedUuidForRackInitKind": { + "type": "string", + "format": "uuid" + }, + "TypedUuidForRackResetKind": { + "type": "string", + "format": "uuid" + }, "UplinkAddressConfig": { "type": "object", "properties": { diff --git a/openapi/cockroach-admin.json b/openapi/cockroach-admin.json index 3b03475ec5..76c0bea09b 100644 --- a/openapi/cockroach-admin.json +++ b/openapi/cockroach-admin.json @@ -1,8 +1,8 @@ { "openapi": "3.0.3", "info": { - "title": "Oxide CockroachDb Cluster Admin API", - "description": "API for interacting with the Oxide control plane's CockroachDb cluster", + "title": "CockroachDB Cluster Admin API", + "description": "API for interacting with the Oxide control plane's CockroachDB cluster", "contact": { "url": "https://oxide.computer", "email": "api@oxide.computer" @@ -12,7 +12,7 @@ "paths": { "/node/decommission": { "post": { - "summary": "Decommission a node from the CRDB cluster", + "summary": "Decommission a node from the CRDB cluster.", "operationId": "node_decommission", "requestBody": { "content": { @@ -70,7 +70,7 @@ }, "/node/status": { "get": { - "summary": "Get the status of all nodes in the CRDB cluster", + "summary": "Get the status of all nodes in the CRDB cluster.", "operationId": "node_status", "responses": { "200": { diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 27cfe576b7..1323769da2 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -2658,7 +2658,7 @@ ] }, "EarlyNetworkConfig": { - "description": "Network configuration required to bring up the control plane\n\nThe fields in this structure are those from [`super::params::RackInitializeRequest`] necessary for use beyond RSS. This is just for the initial rack configuration and cold boot purposes. Updates come from Nexus.", + "description": "Network configuration required to bring up the control plane\n\nThe fields in this structure are those from [`crate::rack_init::RackInitializeRequest`] necessary for use beyond RSS. This is just for the initial rack configuration and cold boot purposes. Updates come from Nexus.", "type": "object", "properties": { "body": { diff --git a/openapi/wicketd.json b/openapi/wicketd.json index 34e7eadb54..48e5d290a3 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -355,7 +355,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RackInitId" + "$ref": "#/components/schemas/TypedUuidForRackInitKind" } } } @@ -377,7 +377,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RackResetId" + "$ref": "#/components/schemas/TypedUuidForRackResetKind" } } } @@ -2618,19 +2618,14 @@ "rack_network_config" ] }, - "RackInitId": { - "description": "RackInitId\n\n
JSON schema\n\n```json { \"type\": \"string\", \"format\": \"uuid\" } ```
", - "type": "string", - "format": "uuid" - }, "RackOperationStatus": { - "description": "Current status of any rack-level operation being performed by this bootstrap agent.\n\n
JSON schema\n\n```json { \"description\": \"Current status of any rack-level operation being performed by this bootstrap agent.\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackInitId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initializing\" ] } } }, { \"description\": \"`id` will be none if the rack was already initialized on startup.\", \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"id\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RackInitId\" } ] }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialized\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"message\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackInitId\" }, \"message\": { \"type\": \"string\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialization_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackInitId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialization_panicked\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackResetId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"resetting\" ] } } }, { \"description\": \"`reset_id` will be None if the rack is in an uninitialized-on-startup, or Some if it is in an uninitialized state due to a reset operation completing.\", \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"reset_id\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RackResetId\" } ] }, \"status\": { \"type\": \"string\", \"enum\": [ \"uninitialized\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"message\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackResetId\" }, \"message\": { \"type\": \"string\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"reset_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackResetId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"reset_panicked\" ] } } } ] } ```
", + "description": "Current status of any rack-level operation being performed by this bootstrap agent.\n\n
JSON schema\n\n```json { \"description\": \"Current status of any rack-level operation being performed by this bootstrap agent.\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackInitKind\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initializing\" ] } } }, { \"description\": \"`id` will be none if the rack was already initialized on startup.\", \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"id\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/TypedUuidForRackInitKind\" } ] }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialized\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"message\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackInitKind\" }, \"message\": { \"type\": \"string\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialization_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackInitKind\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialization_panicked\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackResetKind\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"resetting\" ] } } }, { \"description\": \"`reset_id` will be None if the rack is in an uninitialized-on-startup, or Some if it is in an uninitialized state due to a reset operation completing.\", \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"reset_id\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/TypedUuidForRackResetKind\" } ] }, \"status\": { \"type\": \"string\", \"enum\": [ \"uninitialized\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"message\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackResetKind\" }, \"message\": { \"type\": \"string\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"reset_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackResetKind\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"reset_panicked\" ] } } } ] } ```
", "oneOf": [ { "type": "object", "properties": { "id": { - "$ref": "#/components/schemas/RackInitId" + "$ref": "#/components/schemas/TypedUuidForRackInitKind" }, "status": { "type": "string", @@ -2652,7 +2647,7 @@ "nullable": true, "allOf": [ { - "$ref": "#/components/schemas/RackInitId" + "$ref": "#/components/schemas/TypedUuidForRackInitKind" } ] }, @@ -2671,7 +2666,7 @@ "type": "object", "properties": { "id": { - "$ref": "#/components/schemas/RackInitId" + "$ref": "#/components/schemas/TypedUuidForRackInitKind" }, "message": { "type": "string" @@ -2693,7 +2688,7 @@ "type": "object", "properties": { "id": { - "$ref": "#/components/schemas/RackInitId" + "$ref": "#/components/schemas/TypedUuidForRackInitKind" }, "status": { "type": "string", @@ -2711,7 +2706,7 @@ "type": "object", "properties": { "id": { - "$ref": "#/components/schemas/RackResetId" + "$ref": "#/components/schemas/TypedUuidForRackResetKind" }, "status": { "type": "string", @@ -2733,7 +2728,7 @@ "nullable": true, "allOf": [ { - "$ref": "#/components/schemas/RackResetId" + "$ref": "#/components/schemas/TypedUuidForRackResetKind" } ] }, @@ -2752,7 +2747,7 @@ "type": "object", "properties": { "id": { - "$ref": "#/components/schemas/RackResetId" + "$ref": "#/components/schemas/TypedUuidForRackResetKind" }, "message": { "type": "string" @@ -2774,7 +2769,7 @@ "type": "object", "properties": { "id": { - "$ref": "#/components/schemas/RackResetId" + "$ref": "#/components/schemas/TypedUuidForRackResetKind" }, "status": { "type": "string", @@ -2790,11 +2785,6 @@ } ] }, - "RackResetId": { - "description": "RackResetId\n\n
JSON schema\n\n```json { \"type\": \"string\", \"format\": \"uuid\" } ```
", - "type": "string", - "format": "uuid" - }, "RackV1Inventory": { "description": "The current state of the v1 Rack as known to wicketd", "type": "object", @@ -5682,6 +5672,14 @@ } ] }, + "TypedUuidForRackInitKind": { + "type": "string", + "format": "uuid" + }, + "TypedUuidForRackResetKind": { + "type": "string", + "format": "uuid" + }, "UpdateComponent": { "oneOf": [ { diff --git a/oximeter/oximeter/schema/virtual-machine.toml b/oximeter/oximeter/schema/virtual-machine.toml index 3ef0da4615..520a97bba3 100644 --- a/oximeter/oximeter/schema/virtual-machine.toml +++ b/oximeter/oximeter/schema/virtual-machine.toml @@ -5,7 +5,7 @@ name = "virtual_machine" description = "A guest virtual machine instance" authz_scope = "project" versions = [ - { version = 1, fields = [ "instance_id", "project_id", "silo_id" ] }, + { version = 1, fields = [ "instance_id", "project_id", "silo_id", "sled_id", "sled_model", "sled_revision", "sled_serial" ] }, ] [[metrics]] @@ -56,6 +56,22 @@ description = "ID of the virtual machine instance's project" type = "uuid" description = "ID of the virtual machine instance's silo" +[fields.sled_id] +type = "uuid" +description = "ID of the sled hosting the instance" + +[fields.sled_model] +type = "string" +description = "Model number of the sled hosting the instance" + +[fields.sled_revision] +type = "u32" +description = "Revision number of the sled hosting the instance" + +[fields.sled_serial] +type = "string" +description = "Serial number of the sled hosting the instance" + [fields.state] type = "string" description = "The state of the vCPU" diff --git a/oximeter/producer/src/lib.rs b/oximeter/producer/src/lib.rs index 36b05d7bb1..e9223b62f3 100644 --- a/oximeter/producer/src/lib.rs +++ b/oximeter/producer/src/lib.rs @@ -222,6 +222,7 @@ impl Server { bind_address: server_info.address, request_body_max_bytes, default_handler_task_mode: dropshot::HandlerTaskMode::Detached, + log_headers: vec![], }; let server = Self::build_dropshot_server(&log, ®istry, &dropshot)?; diff --git a/package-manifest.toml b/package-manifest.toml index 29fb7c5da8..098e15d3b8 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -439,7 +439,6 @@ service_name = "switch_zone_setup" source.type = "local" source.paths = [ { from = "smf/switch_zone_setup/manifest.xml", to = "/var/svc/manifest/site/switch_zone_setup/manifest.xml" }, - { from = "smf/switch_zone_setup/switch_zone_setup", to = "/opt/oxide/bin/switch_zone_setup" }, { from = "smf/switch_zone_setup/support_authorized_keys", to = "/opt/oxide/support/authorized_keys" }, { from = "/opt/ooce/pgsql-13/lib/amd64", to = "/opt/ooce/pgsql-13/lib/amd64" }, ] @@ -645,8 +644,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "e83f4f164fd3dbb2100989a399a4fa087232ac36" -source.sha256 = "b28247df4d301540b0a46e4d9fdf410ee6fbdb23d18c80acbd36c016a084e30e" +source.commit = "fb571dc6512b24a777c5a9b2927a50501f6be297" +source.sha256 = "c7971efca6500cee8edf2696ec6b38014af82bacfe88a0e583bb9bb3a591bc8d" output.type = "zone" output.intermediate_only = true @@ -672,8 +671,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "e83f4f164fd3dbb2100989a399a4fa087232ac36" -source.sha256 = "caf988e39d800bdccb1b9423568a19ba10a79aa2b07f74bf7eb65589fd81f8b1" +source.commit = "fb571dc6512b24a777c5a9b2927a50501f6be297" +source.sha256 = "0a96670ce203bce7bed6a0e40842d319c2b4b8ee1a2e9210d3713423f8bd00b1" output.type = "zone" output.intermediate_only = true @@ -692,8 +691,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "e83f4f164fd3dbb2100989a399a4fa087232ac36" -source.sha256 = "378a2f32c1850a5a62fa9b320813e342a647647d2f014ab5eced7c2d1d4f9c95" +source.commit = "fb571dc6512b24a777c5a9b2927a50501f6be297" +source.sha256 = "a5bda6b899bff23fccd4dd74224fd1bc44703741054b50552921efa7470cb11a" output.type = "zone" output.intermediate_only = true @@ -740,6 +739,8 @@ source.packages = [ "switch_zone_setup.tar.gz", "xcvradm.tar.gz", "omicron-omdb.tar.gz", + "zone-setup.tar.gz", + "zone-network-install.tar.gz" ] output.type = "zone" @@ -764,6 +765,8 @@ source.packages = [ "switch_zone_setup.tar.gz", "sp-sim-stub.tar.gz", "omicron-omdb.tar.gz", + "zone-setup.tar.gz", + "zone-network-install.tar.gz" ] output.type = "zone" @@ -788,6 +791,8 @@ source.packages = [ "switch_zone_setup.tar.gz", "sp-sim-softnpu.tar.gz", "omicron-omdb.tar.gz", + "zone-setup.tar.gz", + "zone-network-install.tar.gz" ] output.type = "zone" diff --git a/schema/crdb/dataset-address-optional/up01.sql b/schema/crdb/dataset-address-optional/up01.sql new file mode 100644 index 0000000000..e29215251d --- /dev/null +++ b/schema/crdb/dataset-address-optional/up01.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.dataset ALTER COLUMN ip DROP NOT NULL; diff --git a/schema/crdb/dataset-address-optional/up02.sql b/schema/crdb/dataset-address-optional/up02.sql new file mode 100644 index 0000000000..997294fa12 --- /dev/null +++ b/schema/crdb/dataset-address-optional/up02.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.dataset ALTER COLUMN port DROP NOT NULL; diff --git a/schema/crdb/dataset-address-optional/up03.sql b/schema/crdb/dataset-address-optional/up03.sql new file mode 100644 index 0000000000..0af212e320 --- /dev/null +++ b/schema/crdb/dataset-address-optional/up03.sql @@ -0,0 +1,4 @@ +ALTER TABLE omicron.public.dataset ADD CONSTRAINT IF NOT EXISTS ip_and_port_set_for_crucible CHECK ( + (kind != 'crucible') OR + (kind = 'crucible' AND ip IS NOT NULL and port IS NOT NULL) +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 7d93a5d5bd..7fc83ad5d0 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -525,8 +525,8 @@ CREATE TABLE IF NOT EXISTS omicron.public.dataset ( pool_id UUID NOT NULL, /* Contact information for the dataset */ - ip INET NOT NULL, - port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, + ip INET, + port INT4 CHECK (port BETWEEN 0 AND 65535), kind omicron.public.dataset_kind NOT NULL, @@ -537,6 +537,11 @@ CREATE TABLE IF NOT EXISTS omicron.public.dataset ( CONSTRAINT size_used_column_set_for_crucible CHECK ( (kind != 'crucible') OR (kind = 'crucible' AND size_used IS NOT NULL) + ), + + CONSTRAINT ip_and_port_set_for_crucible CHECK ( + (kind != 'crucible') OR + (kind = 'crucible' AND ip IS NOT NULL and port IS NOT NULL) ) ); @@ -1430,7 +1435,8 @@ CREATE TYPE IF NOT EXISTS omicron.public.network_interface_kind AS ENUM ( 'instance', /* An interface attached to a service. */ - 'service' + 'service', + 'probe' ); CREATE TABLE IF NOT EXISTS omicron.public.network_interface ( @@ -1871,6 +1877,8 @@ CREATE TABLE IF NOT EXISTS omicron.public.external_ip ( */ state omicron.public.ip_attach_state NOT NULL, + is_probe BOOL NOT NULL DEFAULT false, + /* The name must be non-NULL iff this is a floating IP. */ CONSTRAINT null_fip_name CHECK ( (kind != 'floating' AND name IS NULL) OR @@ -2618,11 +2626,32 @@ CREATE TABLE IF NOT EXISTS omicron.public.switch_port_settings_port_config ( geometry omicron.public.switch_port_geometry ); +CREATE TYPE IF NOT EXISTS omicron.public.switch_link_fec AS ENUM ( + 'Firecode', + 'None', + 'Rs' +); + +CREATE TYPE IF NOT EXISTS omicron.public.switch_link_speed AS ENUM ( + '0G', + '1G', + '10G', + '25G', + '40G', + '50G', + '100G', + '200G', + '400G' +); + CREATE TABLE IF NOT EXISTS omicron.public.switch_port_settings_link_config ( port_settings_id UUID, lldp_service_config_id UUID NOT NULL, link_name TEXT, mtu INT4, + fec omicron.public.switch_link_fec, + speed omicron.public.switch_link_speed, + autoneg BOOL NOT NULL DEFAULT false, PRIMARY KEY (port_settings_id, link_name) ); @@ -3599,27 +3628,6 @@ FROM WHERE instance.time_deleted IS NULL AND vmm.time_deleted IS NULL; -CREATE TYPE IF NOT EXISTS omicron.public.switch_link_fec AS ENUM ( - 'Firecode', - 'None', - 'Rs' -); - -CREATE TYPE IF NOT EXISTS omicron.public.switch_link_speed AS ENUM ( - '0G', - '1G', - '10G', - '25G', - '40G', - '50G', - '100G', - '200G', - '400G' -); - -ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS fec omicron.public.switch_link_fec; -ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS speed omicron.public.switch_link_speed; - CREATE SEQUENCE IF NOT EXISTS omicron.public.ipv4_nat_version START 1 INCREMENT 1; CREATE TABLE IF NOT EXISTS omicron.public.ipv4_nat_entry ( @@ -3696,8 +3704,6 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_bfd_session ON omicron.public.bfd_sessi switch ) WHERE time_deleted IS NULL; -ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS autoneg BOOL NOT NULL DEFAULT false; - CREATE INDEX IF NOT EXISTS ipv4_nat_lookup_by_vni ON omicron.public.ipv4_nat_entry ( vni ) @@ -3790,10 +3796,6 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_probe_by_name ON omicron.public.probe ( ) WHERE time_deleted IS NULL; -ALTER TABLE omicron.public.external_ip ADD COLUMN IF NOT EXISTS is_probe BOOL NOT NULL DEFAULT false; - -ALTER TYPE omicron.public.network_interface_kind ADD VALUE IF NOT EXISTS 'probe'; - CREATE TYPE IF NOT EXISTS omicron.public.upstairs_repair_notification_type AS ENUM ( 'started', 'succeeded', @@ -4143,7 +4145,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '82.0.0', NULL) + (TRUE, NOW(), NOW(), '83.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index b798ba783d..7747bb768e 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -13,6 +13,7 @@ anyhow.workspace = true async-trait.workspace = true base64.workspace = true bootstore.workspace = true +bootstrap-agent-api.workspace = true bootstrap-agent-client.workspace = true bytes.workspace = true camino.workspace = true @@ -68,6 +69,7 @@ serde_human_bytes.workspace = true serde_json = { workspace = true, features = ["raw_value"] } sha3.workspace = true sled-agent-client.workspace = true +sled-agent-types.workspace = true sled-hardware.workspace = true sled-hardware-types.workspace = true sled-storage.workspace = true diff --git a/sled-agent/bootstrap-agent-api/Cargo.toml b/sled-agent/bootstrap-agent-api/Cargo.toml new file mode 100644 index 0000000000..368c5afe93 --- /dev/null +++ b/sled-agent/bootstrap-agent-api/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "bootstrap-agent-api" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +dropshot.workspace = true +nexus-client.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +omicron-workspace-hack.workspace = true +schemars.workspace = true +serde.workspace = true +sled-agent-types.workspace = true +sled-hardware-types.workspace = true diff --git a/sled-agent/bootstrap-agent-api/src/lib.rs b/sled-agent/bootstrap-agent-api/src/lib.rs new file mode 100644 index 0000000000..b1b8865351 --- /dev/null +++ b/sled-agent/bootstrap-agent-api/src/lib.rs @@ -0,0 +1,92 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The bootstrap agent's API. +//! +//! Note that the bootstrap agent also communicates over Sprockets, +//! and has a separate interface for establishing the trust quorum. + +use dropshot::{ + HttpError, HttpResponseOk, HttpResponseUpdatedNoContent, RequestContext, + TypedBody, +}; +use omicron_common::api::external::SemverVersion; +use omicron_uuid_kinds::{RackInitUuid, RackResetUuid}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sled_agent_types::{ + rack_init::RackInitializeRequest, rack_ops::RackOperationStatus, +}; +use sled_hardware_types::Baseboard; + +#[dropshot::api_description { + module = "bootstrap_agent_api_mod", +}] +pub trait BootstrapAgentApi { + type Context; + + /// Return the baseboard identity of this sled. + #[endpoint { + method = GET, + path = "/baseboard", + }] + async fn baseboard_get( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Provide a list of components known to the bootstrap agent. + /// + /// This API is intended to allow early boot services (such as Wicket) + /// to query the underlying component versions installed on a sled. + #[endpoint { + method = GET, + path = "/components", + }] + async fn components_get( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + /// Get the current status of rack initialization or reset. + #[endpoint { + method = GET, + path = "/rack-initialize", + }] + async fn rack_initialization_status( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Initialize the rack with the provided configuration. + #[endpoint { + method = POST, + path = "/rack-initialize", + }] + async fn rack_initialize( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError>; + + /// Reset the rack to an unconfigured state. + #[endpoint { + method = DELETE, + path = "/rack-initialize", + }] + async fn rack_reset( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Reset this particular sled to an unconfigured state. + #[endpoint { + method = DELETE, + path = "/sled-initialize", + }] + async fn sled_reset( + rqctx: RequestContext, + ) -> Result; +} + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct Component { + pub name: String, + pub version: SemverVersion, +} diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index b8b5abf07f..6feeffd302 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -11,13 +11,11 @@ use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; use omicron_sled_agent::bootstrap::server as bootstrap_server; use omicron_sled_agent::bootstrap::RssAccessError; -use omicron_sled_agent::rack_setup::config::SetupServiceConfig as RssConfig; use omicron_sled_agent::{config::Config as SledConfig, server as sled_server}; +use sled_agent_types::rack_init::RackInitializeRequest; #[derive(Subcommand, Debug)] enum OpenapiFlavor { - /// Generates bootstrap agent openapi spec - Bootstrap, /// Generates sled agent openapi spec Sled, } @@ -54,8 +52,6 @@ async fn do_run() -> Result<(), CmdError> { Args::Openapi(flavor) => match flavor { OpenapiFlavor::Sled => sled_server::run_openapi() .map_err(|err| CmdError::Failure(anyhow!(err))), - OpenapiFlavor::Bootstrap => bootstrap_server::run_openapi() - .map_err(|err| CmdError::Failure(anyhow!(err))), }, Args::Run { config_path } => { let config = SledConfig::from_file(&config_path) @@ -81,7 +77,7 @@ async fn do_run() -> Result<(), CmdError> { }; let rss_config = if rss_config_path.exists() { Some( - RssConfig::from_file(rss_config_path) + RackInitializeRequest::from_file(rss_config_path) .map_err(|e| CmdError::Failure(anyhow!(e)))?, ) } else { diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index 664e3242ab..742cff4e61 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -5,7 +5,6 @@ //! Network setup required to bring up the control plane use anyhow::{anyhow, Context}; -use bootstore::schemes::v0 as bootstore; use dpd_client::types::{ LinkCreate, LinkId, LinkSettings, PortId, PortSettings, }; @@ -26,9 +25,8 @@ use omicron_common::address::DENDRITE_PORT; use omicron_common::address::{MGD_PORT, MGS_PORT}; use omicron_common::api::external::{BfdMode, ImportExportPolicy}; use omicron_common::api::internal::shared::{ - BfdPeerConfig, BgpConfig, BgpPeerConfig, PortConfig, PortConfigV2, PortFec, - PortSpeed, RackNetworkConfig, RackNetworkConfigV2, RouteConfig, - SwitchLocation, UplinkAddressConfig, + BgpConfig, PortConfig, PortFec, PortSpeed, RackNetworkConfig, + SwitchLocation, }; use omicron_common::backoff::{ retry_notify, retry_policy_local, BackoffError, ExponentialBackoff, @@ -36,13 +34,10 @@ use omicron_common::backoff::{ }; use omicron_common::OMICRON_DPD_TAG; use omicron_ddm_admin_client::DdmError; -use oxnet::{IpNet, Ipv4Net, Ipv6Net}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use oxnet::IpNet; use slog::Logger; use std::collections::{HashMap, HashSet}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6}; -use std::str::FromStr; use std::time::{Duration, Instant}; use thiserror::Error; @@ -728,418 +723,6 @@ fn retry_policy_switch_mapping() -> ExponentialBackoff { .build() } -/// Network configuration required to bring up the control plane -/// -/// The fields in this structure are those from -/// [`super::params::RackInitializeRequest`] necessary for use beyond RSS. This -/// is just for the initial rack configuration and cold boot purposes. Updates -/// come from Nexus. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct EarlyNetworkConfig { - // The current generation number of data as stored in CRDB. - // The initial generation is set during RSS time and then only mutated - // by Nexus. - pub generation: u64, - - // Which version of the data structure do we have. This is to help with - // deserialization and conversion in future updates. - pub schema_version: u32, - - // The actual configuration details - pub body: EarlyNetworkConfigBody, -} - -impl FromStr for EarlyNetworkConfig { - type Err = String; - - fn from_str(value: &str) -> Result { - #[derive(Deserialize)] - struct ShadowConfig { - generation: u64, - schema_version: u32, - body: EarlyNetworkConfigBody, - } - - let v2_err = match serde_json::from_str::(&value) { - Ok(cfg) => { - return Ok(EarlyNetworkConfig { - generation: cfg.generation, - schema_version: cfg.schema_version, - body: cfg.body, - }) - } - Err(e) => format!("unable to parse EarlyNetworkConfig: {e:?}"), - }; - // If we fail to parse the config as any known version, we return the - // error corresponding to the parse failure of the newest schema. - serde_json::from_str::(&value) - .map(|v1| EarlyNetworkConfig { - generation: v1.generation, - schema_version: Self::schema_version(), - body: v1.body.into(), - }) - .map_err(|_| v2_err) - } -} - -impl EarlyNetworkConfig { - pub fn schema_version() -> u32 { - 2 - } - - // Note: This currently only converts between v0 and v1 or deserializes v1 of - // `EarlyNetworkConfig`. - pub fn deserialize_bootstore_config( - log: &Logger, - config: &bootstore::NetworkConfig, - ) -> Result { - // Try to deserialize the latest version of the data structure (v2). If - // that succeeds we are done. - let v2_error = - match serde_json::from_slice::(&config.blob) { - Ok(val) => return Ok(val), - Err(error) => { - // Log this error and continue trying to deserialize older - // versions. - warn!( - log, - "Failed to deserialize EarlyNetworkConfig \ - as v2, trying next as v1: {}", - error, - ); - error - } - }; - - match serde_json::from_slice::( - &config.blob, - ) { - Ok(v1) => { - // Convert from v1 to v2 - return Ok(EarlyNetworkConfig { - generation: v1.generation, - schema_version: EarlyNetworkConfig::schema_version(), - body: v1.body.into(), - }); - } - Err(error) => { - // Log this error. - warn!( - log, - "Failed to deserialize EarlyNetworkConfig \ - as v1, trying next as v0: {}", - error - ); - } - }; - - match serde_json::from_slice::( - &config.blob, - ) { - Ok(val) => { - // Convert from v0 to v2 - return Ok(EarlyNetworkConfig { - generation: val.generation, - schema_version: 2, - body: EarlyNetworkConfigBody { - ntp_servers: val.ntp_servers, - rack_network_config: val.rack_network_config.map( - |v0_config| { - back_compat::RackNetworkConfigV0::to_v2( - val.rack_subnet, - v0_config, - ) - }, - ), - }, - }); - } - Err(error) => { - // Log this error. - warn!( - log, - "Failed to deserialize EarlyNetworkConfig as v0: {}", error, - ); - } - }; - - // If we fail to parse the config as any known version, we return the - // error corresponding to the parse failure of the newest schema. - Err(v2_error) - } -} - -/// This is the actual configuration of EarlyNetworking. -/// -/// We nest it below the "header" of `generation` and `schema_version` so that -/// we can perform partial deserialization of `EarlyNetworkConfig` to only read -/// the header and defer deserialization of the body once we know the schema -/// version. This is possible via the use of [`serde_json::value::RawValue`] in -/// future (post-v1) deserialization paths. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct EarlyNetworkConfigBody { - /// The external NTP server addresses. - pub ntp_servers: Vec, - - // Rack network configuration as delivered from RSS or Nexus - pub rack_network_config: Option, -} - -impl From for bootstore::NetworkConfig { - fn from(value: EarlyNetworkConfig) -> Self { - // Can this ever actually fail? - // We literally just deserialized the same data in RSS - let blob = serde_json::to_vec(&value).unwrap(); - - // Yes this is duplicated, but that seems fine. - let generation = value.generation; - - bootstore::NetworkConfig { generation, blob } - } -} - -/// Structures and routines used to maintain backwards compatibility. The -/// contents of this module should only be used to convert older data into the -/// current format, and not for any ongoing run-time operations. -pub mod back_compat { - use super::*; - - #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] - pub struct EarlyNetworkConfigBodyV1 { - /// The external NTP server addresses. - pub ntp_servers: Vec, - - // Rack network configuration as delivered from RSS or Nexus - pub rack_network_config: Option, - } - - impl From for EarlyNetworkConfigBody { - fn from(v1: EarlyNetworkConfigBodyV1) -> Self { - EarlyNetworkConfigBody { - ntp_servers: v1.ntp_servers, - rack_network_config: v1 - .rack_network_config - .map(|v1_config| v1_config.into()), - } - } - } - - /// Deprecated, use `RackNetworkConfig` instead. Cannot actually deprecate due to - /// - /// - /// Our first version of `RackNetworkConfig`. If this exists in the bootstore, we - /// upgrade out of it into `RackNetworkConfigV1` or later versions if possible. - #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] - pub(crate) struct RackNetworkConfigV0 { - // TODO: #3591 Consider making infra-ip ranges implicit for uplinks - /// First ip address to be used for configuring network infrastructure - pub infra_ip_first: Ipv4Addr, - /// Last ip address to be used for configuring network infrastructure - pub infra_ip_last: Ipv4Addr, - /// Uplinks for connecting the rack to external networks - pub uplinks: Vec, - } - - impl RackNetworkConfigV0 { - /// Convert from `RackNetworkConfigV0` to `RackNetworkConfigV1` - /// - /// We cannot use `From for `RackNetworkConfigV2` - /// because the `rack_subnet` field does not exist in `RackNetworkConfigV0` - /// and must be passed in from the `EarlyNetworkConfigV0` struct which - /// contains the `RackNetworkConfigV0` struct. - pub fn to_v2( - rack_subnet: Ipv6Addr, - v0: RackNetworkConfigV0, - ) -> RackNetworkConfigV2 { - RackNetworkConfigV2 { - rack_subnet: Ipv6Net::new(rack_subnet, 56).unwrap(), - infra_ip_first: v0.infra_ip_first, - infra_ip_last: v0.infra_ip_last, - ports: v0 - .uplinks - .into_iter() - .map(|uplink| PortConfigV2::from(uplink)) - .collect(), - bgp: vec![], - bfd: vec![], - } - } - } - - /// Deprecated, use PortConfigV2 instead. Cannot actually deprecate due to - /// - #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] - pub struct PortConfigV1 { - /// The set of routes associated with this port. - pub routes: Vec, - /// This port's addresses and optional vlan IDs - pub addresses: Vec, - /// Switch the port belongs to. - pub switch: SwitchLocation, - /// Nmae of the port this config applies to. - pub port: String, - /// Port speed. - pub uplink_port_speed: PortSpeed, - /// Port forward error correction type. - pub uplink_port_fec: PortFec, - /// BGP peers on this port - pub bgp_peers: Vec, - /// Whether or not to set autonegotiation - #[serde(default)] - pub autoneg: bool, - } - - impl From for PortConfigV2 { - fn from(v1: PortConfigV1) -> Self { - PortConfigV2 { - routes: v1.routes.clone(), - addresses: v1 - .addresses - .iter() - .map(|a| UplinkAddressConfig { address: *a, vlan_id: None }) - .collect(), - switch: v1.switch, - port: v1.port, - uplink_port_speed: v1.uplink_port_speed, - uplink_port_fec: v1.uplink_port_fec, - bgp_peers: v1.bgp_peers.clone(), - autoneg: v1.autoneg, - } - } - } - - /// Deprecated, use PortConfigV2 instead. Cannot actually deprecate due to - /// - #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] - pub(crate) struct UplinkConfig { - /// Gateway address - pub gateway_ip: Ipv4Addr, - /// Switch to use for uplink - pub switch: SwitchLocation, - /// Switchport to use for external connectivity - pub uplink_port: String, - /// Speed for the Switchport - pub uplink_port_speed: PortSpeed, - /// Forward Error Correction setting for the uplink port - pub uplink_port_fec: PortFec, - /// IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport - /// (must be in infra_ip pool) - pub uplink_cidr: Ipv4Net, - /// VLAN id to use for uplink - pub uplink_vid: Option, - } - - impl From for PortConfigV2 { - fn from(value: UplinkConfig) -> Self { - PortConfigV2 { - routes: vec![RouteConfig { - destination: "0.0.0.0/0".parse().unwrap(), - nexthop: value.gateway_ip.into(), - vlan_id: value.uplink_vid, - }], - addresses: vec![UplinkAddressConfig { - address: value.uplink_cidr.into(), - vlan_id: value.uplink_vid, - }], - switch: value.switch, - port: value.uplink_port, - uplink_port_speed: value.uplink_port_speed, - uplink_port_fec: value.uplink_port_fec, - bgp_peers: vec![], - autoneg: false, - } - } - } - - /// Deprecated, use `RackNetworkConfig` instead. Cannot actually deprecate due to - /// - /// - /// Our second version of `RackNetworkConfig`. If this exists in the bootstore, - /// we upgrade out of it into `RackNetworkConfigV1` or later versions if - /// possible. - #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] - pub struct RackNetworkConfigV1 { - pub rack_subnet: Ipv6Net, - // TODO: #3591 Consider making infra-ip ranges implicit for uplinks - /// First ip address to be used for configuring network infrastructure - pub infra_ip_first: Ipv4Addr, - /// Last ip address to be used for configuring network infrastructure - pub infra_ip_last: Ipv4Addr, - /// Uplinks for connecting the rack to external networks - pub ports: Vec, - /// BGP configurations for connecting the rack to external networks - pub bgp: Vec, - /// BFD configuration for connecting the rack to external networks - #[serde(default)] - pub bfd: Vec, - } - - impl From for RackNetworkConfigV2 { - fn from(v1: RackNetworkConfigV1) -> Self { - RackNetworkConfigV2 { - rack_subnet: v1.rack_subnet, - infra_ip_first: v1.infra_ip_first, - infra_ip_last: v1.infra_ip_last, - ports: v1 - .ports - .into_iter() - .map(|ports| PortConfigV2::from(ports)) - .collect(), - bgp: v1.bgp.clone(), - bfd: v1.bfd.clone(), - } - } - } - - // The second production version of the `EarlyNetworkConfig`. - // - // If this version is in the bootstore than we need to convert it to - // `EarlyNetworkConfigV2`. - // - // Once we do this for all customers that have initialized racks with the - // old version we can go ahead and remove this type and its conversion code - // altogether. - #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] - pub struct EarlyNetworkConfigV1 { - // The current generation number of data as stored in CRDB. - // The initial generation is set during RSS time and then only mutated - // by Nexus. - pub generation: u64, - - // Which version of the data structure do we have. This is to help with - // deserialization and conversion in future updates. - pub schema_version: u32, - - // The actual configuration details - pub body: EarlyNetworkConfigBodyV1, - } - - // The first production version of the `EarlyNetworkConfig`. - // - // If this version is in the bootstore than we need to convert it to - // `EarlyNetworkConfigV2`. - // - // Once we do this for all customers that have initialized racks with the - // old version we can go ahead and remove this type and its conversion code - // altogether. - #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] - pub(crate) struct EarlyNetworkConfigV0 { - // The current generation number of data as stored in CRDB. - // The initial generation is set during RSS time and then only mutated - // by Nexus. - pub generation: u64, - - pub rack_subnet: Ipv6Addr, - - /// The external NTP server addresses. - pub ntp_servers: Vec, - - // Rack network configuration as delivered from RSS and only existing at - // generation 1 - pub rack_network_config: Option, - } -} - // The following two conversion functions translate the speed and fec types used // in the internal API to the types used in the dpd-client API. The conversion // is done here, rather than with "impl From" at the definition, to avoid a @@ -1165,163 +748,3 @@ fn convert_fec(fec: &PortFec) -> dpd_client::types::PortFec { PortFec::Rs => dpd_client::types::PortFec::Rs, } } - -#[cfg(test)] -mod tests { - use super::*; - use omicron_common::api::internal::shared::RouteConfig; - use omicron_common::api::internal::shared::UplinkAddressConfig; - use omicron_test_utils::dev::test_setup_log; - - #[test] - fn serialized_early_network_config_v0_to_v2_conversion() { - let logctx = test_setup_log( - "serialized_early_network_config_v0_to_v2_conversion", - ); - let v0 = back_compat::EarlyNetworkConfigV0 { - generation: 1, - rack_subnet: Ipv6Addr::UNSPECIFIED, - ntp_servers: Vec::new(), - rack_network_config: Some(back_compat::RackNetworkConfigV0 { - infra_ip_first: Ipv4Addr::UNSPECIFIED, - infra_ip_last: Ipv4Addr::UNSPECIFIED, - uplinks: vec![back_compat::UplinkConfig { - gateway_ip: Ipv4Addr::UNSPECIFIED, - switch: SwitchLocation::Switch0, - uplink_port: "Port0".to_string(), - uplink_port_speed: PortSpeed::Speed100G, - uplink_port_fec: PortFec::None, - uplink_cidr: "192.168.0.1/16".parse().unwrap(), - uplink_vid: None, - }], - }), - }; - - let v0_serialized = serde_json::to_vec(&v0).unwrap(); - let bootstore_conf = - bootstore::NetworkConfig { generation: 1, blob: v0_serialized }; - - let v2 = EarlyNetworkConfig::deserialize_bootstore_config( - &logctx.log, - &bootstore_conf, - ) - .unwrap(); - let v0_rack_network_config = v0.rack_network_config.unwrap(); - let uplink = v0_rack_network_config.uplinks[0].clone(); - let expected = EarlyNetworkConfig { - generation: 1, - schema_version: EarlyNetworkConfig::schema_version(), - body: EarlyNetworkConfigBody { - ntp_servers: v0.ntp_servers.clone(), - rack_network_config: Some(RackNetworkConfigV2 { - rack_subnet: Ipv6Net::new(v0.rack_subnet, 56).unwrap(), - infra_ip_first: v0_rack_network_config.infra_ip_first, - infra_ip_last: v0_rack_network_config.infra_ip_last, - ports: vec![PortConfigV2 { - routes: vec![RouteConfig { - destination: "0.0.0.0/0".parse().unwrap(), - nexthop: uplink.gateway_ip.into(), - vlan_id: None, - }], - addresses: vec![UplinkAddressConfig { - address: uplink.uplink_cidr.into(), - vlan_id: None, - }], - switch: uplink.switch, - port: uplink.uplink_port, - uplink_port_speed: uplink.uplink_port_speed, - uplink_port_fec: uplink.uplink_port_fec, - autoneg: false, - bgp_peers: vec![], - }], - bgp: vec![], - bfd: vec![], - }), - }, - }; - - assert_eq!(expected, v2); - - logctx.cleanup_successful(); - } - - #[test] - fn serialized_early_network_config_v1_to_v2_conversion() { - let logctx = test_setup_log( - "serialized_early_network_config_v1_to_v2_conversion", - ); - - let v1 = back_compat::EarlyNetworkConfigV1 { - generation: 1, - schema_version: 1, - body: back_compat::EarlyNetworkConfigBodyV1 { - ntp_servers: Vec::new(), - rack_network_config: Some(back_compat::RackNetworkConfigV1 { - rack_subnet: Ipv6Net::new(Ipv6Addr::UNSPECIFIED, 56) - .unwrap(), - infra_ip_first: Ipv4Addr::UNSPECIFIED, - infra_ip_last: Ipv4Addr::UNSPECIFIED, - ports: vec![back_compat::PortConfigV1 { - routes: vec![RouteConfig { - destination: "0.0.0.0/0".parse().unwrap(), - nexthop: "192.168.0.2".parse().unwrap(), - vlan_id: None, - }], - addresses: vec!["192.168.0.1/16".parse().unwrap()], - switch: SwitchLocation::Switch0, - port: "Port0".to_string(), - uplink_port_speed: PortSpeed::Speed100G, - uplink_port_fec: PortFec::None, - bgp_peers: Vec::new(), - autoneg: false, - }], - bgp: Vec::new(), - bfd: Vec::new(), - }), - }, - }; - - let v1_serialized = serde_json::to_vec(&v1).unwrap(); - let bootstore_conf = - bootstore::NetworkConfig { generation: 1, blob: v1_serialized }; - - let v2 = EarlyNetworkConfig::deserialize_bootstore_config( - &logctx.log, - &bootstore_conf, - ) - .unwrap(); - let v1_rack_network_config = v1.body.rack_network_config.unwrap(); - let port = v1_rack_network_config.ports[0].clone(); - let expected = EarlyNetworkConfig { - generation: 1, - schema_version: EarlyNetworkConfig::schema_version(), - body: EarlyNetworkConfigBody { - ntp_servers: v1.body.ntp_servers.clone(), - rack_network_config: Some(RackNetworkConfigV2 { - rack_subnet: v1_rack_network_config.rack_subnet, - infra_ip_first: v1_rack_network_config.infra_ip_first, - infra_ip_last: v1_rack_network_config.infra_ip_last, - ports: vec![PortConfigV2 { - routes: port.routes.clone(), - addresses: vec![UplinkAddressConfig { - address: port.addresses[0], - vlan_id: None, - }], - switch: port.switch, - port: port.port, - uplink_port_speed: port.uplink_port_speed, - uplink_port_fec: port.uplink_port_fec, - autoneg: false, - bgp_peers: vec![], - }], - bgp: vec![], - bfd: vec![], - }), - }, - }; - - assert_eq!(expected, v2); - - logctx.cleanup_successful(); - } -} diff --git a/sled-agent/src/bootstrap/http_entrypoints.rs b/sled-agent/src/bootstrap/http_entrypoints.rs index 2fa0b83f1d..2bd1745f77 100644 --- a/sled-agent/src/bootstrap/http_entrypoints.rs +++ b/sled-agent/src/bootstrap/http_entrypoints.rs @@ -10,20 +10,22 @@ use super::rack_ops::RssAccess; use super::BootstrapError; use super::RssAccessError; -use crate::bootstrap::params::RackInitializeRequest; -use crate::bootstrap::rack_ops::{RackInitId, RackResetId}; use crate::updates::ConfigUpdates; -use crate::updates::{Component, UpdateManager}; +use crate::updates::UpdateManager; use bootstore::schemes::v0 as bootstore; -use dropshot::ApiDescriptionRegisterError; +use bootstrap_agent_api::bootstrap_agent_api_mod; +use bootstrap_agent_api::BootstrapAgentApi; +use bootstrap_agent_api::Component; use dropshot::{ - endpoint, ApiDescription, HttpError, HttpResponseOk, - HttpResponseUpdatedNoContent, RequestContext, TypedBody, + ApiDescription, HttpError, HttpResponseOk, HttpResponseUpdatedNoContent, + RequestContext, TypedBody, }; use http::StatusCode; use omicron_common::api::external::Error; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use omicron_uuid_kinds::RackInitUuid; +use omicron_uuid_kinds::RackResetUuid; +use sled_agent_types::rack_init::RackInitializeRequest; +use sled_agent_types::rack_ops::RackOperationStatus; use sled_hardware_types::Baseboard; use sled_storage::manager::StorageHandle; use slog::Logger; @@ -47,7 +49,7 @@ impl BootstrapServerContext { pub(super) fn start_rack_initialize( &self, request: RackInitializeRequest, - ) -> Result { + ) -> Result { self.rss_access.start_initializing( &self.base_log, self.global_zone_bootstrap_ip, @@ -58,181 +60,98 @@ impl BootstrapServerContext { } } -type BootstrapApiDescription = ApiDescription; - /// Returns a description of the bootstrap agent API -pub(crate) fn api() -> BootstrapApiDescription { - fn register_endpoints( - api: &mut BootstrapApiDescription, - ) -> Result<(), ApiDescriptionRegisterError> { - api.register(baseboard_get)?; - api.register(components_get)?; - api.register(rack_initialization_status)?; - api.register(rack_initialize)?; - api.register(rack_reset)?; - api.register(sled_reset)?; - Ok(()) - } - - let mut api = BootstrapApiDescription::new(); - if let Err(err) = register_endpoints(&mut api) { - panic!("failed to register entrypoints: {}", err); - } - api +pub(crate) fn api() -> ApiDescription { + bootstrap_agent_api_mod::api_description::() + .expect("registered entrypoints successfully") } -/// Current status of any rack-level operation being performed by this bootstrap -/// agent. -#[derive( - Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema, -)] -#[serde(tag = "status", rename_all = "snake_case")] -pub enum RackOperationStatus { - Initializing { - id: RackInitId, - }, - /// `id` will be none if the rack was already initialized on startup. - Initialized { - id: Option, - }, - InitializationFailed { - id: RackInitId, - message: String, - }, - InitializationPanicked { - id: RackInitId, - }, - Resetting { - id: RackResetId, - }, - /// `reset_id` will be None if the rack is in an uninitialized-on-startup, - /// or Some if it is in an uninitialized state due to a reset operation - /// completing. - Uninitialized { - reset_id: Option, - }, - ResetFailed { - id: RackResetId, - message: String, - }, - ResetPanicked { - id: RackResetId, - }, -} +enum BootstrapAgentImpl {} -/// Return the baseboard identity of this sled. -#[endpoint { - method = GET, - path = "/baseboard", -}] -async fn baseboard_get( - rqctx: RequestContext, -) -> Result, HttpError> { - let ctx = rqctx.context(); - Ok(HttpResponseOk(ctx.baseboard.clone())) -} +impl BootstrapAgentApi for BootstrapAgentImpl { + type Context = BootstrapServerContext; -/// Provides a list of components known to the bootstrap agent. -/// -/// This API is intended to allow early boot services (such as Wicket) -/// to query the underlying component versions installed on a sled. -#[endpoint { - method = GET, - path = "/components", -}] -async fn components_get( - rqctx: RequestContext, -) -> Result>, HttpError> { - let ctx = rqctx.context(); - let updates = UpdateManager::new(ctx.updates.clone()); - let components = updates - .components_get() - .await - .map_err(|err| HttpError::for_internal_error(err.to_string()))?; - Ok(HttpResponseOk(components)) -} + async fn baseboard_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let ctx = rqctx.context(); + Ok(HttpResponseOk(ctx.baseboard.clone())) + } -/// Get the current status of rack initialization or reset. -#[endpoint { - method = GET, - path = "/rack-initialize", -}] -async fn rack_initialization_status( - rqctx: RequestContext, -) -> Result, HttpError> { - let ctx = rqctx.context(); - let status = ctx.rss_access.operation_status(); - Ok(HttpResponseOk(status)) -} + async fn components_get( + rqctx: RequestContext, + ) -> Result>, HttpError> { + let ctx = rqctx.context(); + let updates = UpdateManager::new(ctx.updates.clone()); + let components = updates + .components_get() + .await + .map_err(|err| HttpError::for_internal_error(err.to_string()))?; + Ok(HttpResponseOk(components)) + } -/// Initializes the rack with the provided configuration. -#[endpoint { - method = POST, - path = "/rack-initialize", -}] -async fn rack_initialize( - rqctx: RequestContext, - body: TypedBody, -) -> Result, HttpError> { - let ctx = rqctx.context(); - let request = body.into_inner(); - let id = ctx - .start_rack_initialize(request) - .map_err(|err| HttpError::for_bad_request(None, err.to_string()))?; - Ok(HttpResponseOk(id)) -} + async fn rack_initialization_status( + rqctx: RequestContext, + ) -> Result, HttpError> { + let ctx = rqctx.context(); + let status = ctx.rss_access.operation_status(); + Ok(HttpResponseOk(status)) + } -/// Resets the rack to an unconfigured state. -#[endpoint { - method = DELETE, - path = "/rack-initialize", -}] -async fn rack_reset( - rqctx: RequestContext, -) -> Result, HttpError> { - let ctx = rqctx.context(); - let id = ctx - .rss_access - .start_reset(&ctx.base_log, ctx.global_zone_bootstrap_ip) - .map_err(|err| HttpError::for_bad_request(None, err.to_string()))?; - Ok(HttpResponseOk(id)) -} + async fn rack_initialize( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let ctx = rqctx.context(); + let request = body.into_inner(); + let id = ctx + .start_rack_initialize(request) + .map_err(|err| HttpError::for_bad_request(None, err.to_string()))?; + Ok(HttpResponseOk(id)) + } -/// Resets this particular sled to an unconfigured state. -#[endpoint { - method = DELETE, - path = "/sled-initialize", -}] -async fn sled_reset( - rqctx: RequestContext, -) -> Result { - let ctx = rqctx.context(); - let (response_tx, response_rx) = oneshot::channel(); - - let make_channel_closed_err = || { - Err(HttpError::for_internal_error( - "sled_reset channel closed: task panic?".to_string(), - )) - }; - - match ctx.sled_reset_tx.try_send(response_tx) { - Ok(()) => (), - Err(TrySendError::Full(_)) => { - return Err(HttpError::for_status( - Some("ResetPending".to_string()), - StatusCode::TOO_MANY_REQUESTS, - )); - } - Err(TrySendError::Closed(_)) => { - return make_channel_closed_err(); - } + async fn rack_reset( + rqctx: RequestContext, + ) -> Result, HttpError> { + let ctx = rqctx.context(); + let id = ctx + .rss_access + .start_reset(&ctx.base_log, ctx.global_zone_bootstrap_ip) + .map_err(|err| HttpError::for_bad_request(None, err.to_string()))?; + Ok(HttpResponseOk(id)) } - match response_rx.await { - Ok(result) => { - () = result.map_err(Error::from)?; - Ok(HttpResponseUpdatedNoContent()) + async fn sled_reset( + rqctx: RequestContext, + ) -> Result { + let ctx = rqctx.context(); + let (response_tx, response_rx) = oneshot::channel(); + + let make_channel_closed_err = || { + Err(HttpError::for_internal_error( + "sled_reset channel closed: task panic?".to_string(), + )) + }; + + match ctx.sled_reset_tx.try_send(response_tx) { + Ok(()) => (), + Err(TrySendError::Full(_)) => { + return Err(HttpError::for_status( + Some("ResetPending".to_string()), + StatusCode::TOO_MANY_REQUESTS, + )); + } + Err(TrySendError::Closed(_)) => { + return make_channel_closed_err(); + } + } + + match response_rx.await { + Ok(result) => { + () = result.map_err(Error::from)?; + Ok(HttpResponseUpdatedNoContent()) + } + Err(_) => make_channel_closed_err(), } - Err(_) => make_channel_closed_err(), } } diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 4a5b443dc3..9fe399419f 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -4,301 +4,17 @@ //! Request types for the bootstrap agent -use crate::bootstrap::early_networking::back_compat::RackNetworkConfigV1; -use anyhow::{bail, Result}; +use anyhow::Result; use async_trait::async_trait; use omicron_common::address::{self, Ipv6Subnet, SLED_PREFIX}; -use omicron_common::api::external::AllowedSourceIps; -use omicron_common::api::internal::shared::RackNetworkConfig; use omicron_common::ledger::Ledgerable; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sha3::{Digest, Sha3_256}; -use sled_hardware_types::Baseboard; use std::borrow::Cow; -use std::collections::BTreeSet; use std::net::{IpAddr, Ipv6Addr, SocketAddrV6}; use uuid::Uuid; -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] -#[serde(rename_all = "snake_case", tag = "type")] -pub enum BootstrapAddressDiscovery { - /// Ignore all bootstrap addresses except our own. - OnlyOurs, - /// Ignore all bootstrap addresses except the following. - OnlyThese { addrs: BTreeSet }, -} - -/// Structures and routines used to maintain backwards compatibility. The -/// contents of this module should only be used to convert older data into the -/// current format, and not for any ongoing run-time operations. -pub mod back_compat { - use super::*; - - #[derive(Clone, Deserialize)] - struct UnvalidatedRackInitializeRequestV1 { - trust_quorum_peers: Option>, - bootstrap_discovery: BootstrapAddressDiscovery, - ntp_servers: Vec, - dns_servers: Vec, - internal_services_ip_pool_ranges: Vec, - external_dns_ips: Vec, - external_dns_zone_name: String, - external_certificates: Vec, - recovery_silo: RecoverySiloConfig, - rack_network_config: RackNetworkConfigV1, - #[serde(default = "default_allowed_source_ips")] - allowed_source_ips: AllowedSourceIps, - } - - /// This is a deprecated format, maintained to allow importing from older - /// versions. - #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] - #[serde(try_from = "UnvalidatedRackInitializeRequestV1")] - pub struct RackInitializeRequestV1 { - pub trust_quorum_peers: Option>, - pub bootstrap_discovery: BootstrapAddressDiscovery, - pub ntp_servers: Vec, - pub dns_servers: Vec, - pub internal_services_ip_pool_ranges: Vec, - pub external_dns_ips: Vec, - pub external_dns_zone_name: String, - pub external_certificates: Vec, - pub recovery_silo: RecoverySiloConfig, - pub rack_network_config: RackNetworkConfigV1, - #[serde(default = "default_allowed_source_ips")] - pub allowed_source_ips: AllowedSourceIps, - } - - impl TryFrom for RackInitializeRequestV1 { - type Error = anyhow::Error; - - fn try_from(value: UnvalidatedRackInitializeRequestV1) -> Result { - validate_external_dns( - &value.external_dns_ips, - &value.internal_services_ip_pool_ranges, - )?; - - Ok(RackInitializeRequestV1 { - trust_quorum_peers: value.trust_quorum_peers, - bootstrap_discovery: value.bootstrap_discovery, - ntp_servers: value.ntp_servers, - dns_servers: value.dns_servers, - internal_services_ip_pool_ranges: value - .internal_services_ip_pool_ranges, - external_dns_ips: value.external_dns_ips, - external_dns_zone_name: value.external_dns_zone_name, - external_certificates: value.external_certificates, - recovery_silo: value.recovery_silo, - rack_network_config: value.rack_network_config, - allowed_source_ips: value.allowed_source_ips, - }) - } - } - impl From for RackInitializeRequest { - fn from(v1: RackInitializeRequestV1) -> Self { - RackInitializeRequest { - trust_quorum_peers: v1.trust_quorum_peers, - bootstrap_discovery: v1.bootstrap_discovery, - ntp_servers: v1.ntp_servers, - dns_servers: v1.dns_servers, - internal_services_ip_pool_ranges: v1 - .internal_services_ip_pool_ranges, - external_dns_ips: v1.external_dns_ips, - external_dns_zone_name: v1.external_dns_zone_name, - external_certificates: v1.external_certificates, - recovery_silo: v1.recovery_silo, - rack_network_config: v1.rack_network_config.into(), - allowed_source_ips: v1.allowed_source_ips, - } - } - } -} - -// "Shadow" copy of `RackInitializeRequest` that does no validation on its -// fields. -#[derive(Clone, Deserialize)] -struct UnvalidatedRackInitializeRequest { - trust_quorum_peers: Option>, - bootstrap_discovery: BootstrapAddressDiscovery, - ntp_servers: Vec, - dns_servers: Vec, - internal_services_ip_pool_ranges: Vec, - external_dns_ips: Vec, - external_dns_zone_name: String, - external_certificates: Vec, - recovery_silo: RecoverySiloConfig, - rack_network_config: RackNetworkConfig, - #[serde(default = "default_allowed_source_ips")] - allowed_source_ips: AllowedSourceIps, -} - -/// Configuration for the "rack setup service". -/// -/// The Rack Setup Service should be responsible for one-time setup actions, -/// such as CockroachDB placement and initialization. Without operator -/// intervention, however, these actions need a way to be automated in our -/// deployment. -#[derive(Clone, Deserialize, Serialize, PartialEq, JsonSchema)] -#[serde(try_from = "UnvalidatedRackInitializeRequest")] -pub struct RackInitializeRequest { - /// The set of peer_ids required to initialize trust quorum - /// - /// The value is `None` if we are not using trust quorum - pub trust_quorum_peers: Option>, - - /// Describes how bootstrap addresses should be collected during RSS. - pub bootstrap_discovery: BootstrapAddressDiscovery, - - /// The external NTP server addresses. - pub ntp_servers: Vec, - - /// The external DNS server addresses. - pub dns_servers: Vec, - - /// Ranges of the service IP pool which may be used for internal services. - // TODO(https://github.com/oxidecomputer/omicron/issues/1530): Eventually, - // we want to configure multiple pools. - pub internal_services_ip_pool_ranges: Vec, - - /// Service IP addresses on which we run external DNS servers. - /// - /// Each address must be present in `internal_services_ip_pool_ranges`. - pub external_dns_ips: Vec, - - /// DNS name for the DNS zone delegated to the rack for external DNS - pub external_dns_zone_name: String, - - /// initial TLS certificates for the external API - pub external_certificates: Vec, - - /// Configuration of the Recovery Silo (the initial Silo) - pub recovery_silo: RecoverySiloConfig, - - /// Initial rack network configuration - pub rack_network_config: RackNetworkConfig, - - /// IPs or subnets allowed to make requests to user-facing services - #[serde(default = "default_allowed_source_ips")] - pub allowed_source_ips: AllowedSourceIps, -} - -impl RackInitializeRequest { - pub fn from_toml_with_fallback( - data: &str, - ) -> Result { - let v2_err = match toml::from_str::(&data) { - Ok(req) => return Ok(req), - Err(e) => e, - }; - if let Ok(v1) = - toml::from_str::(&data) - { - return Ok(v1.into()); - } - - // If we fail to parse the request as any known version, we return the - // error corresponding to the parse failure of the newest schema. - Err(v2_err.into()) - } -} - -/// This field was added after several racks were already deployed. RSS plans -/// for those racks should default to allowing any source IP, since that is -/// effectively what they did. -const fn default_allowed_source_ips() -> AllowedSourceIps { - AllowedSourceIps::Any -} - -// This custom debug implementation hides the private keys. -impl std::fmt::Debug for RackInitializeRequest { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // If you find a compiler error here, and you just added a field to this - // struct, be sure to add it to the Debug impl below! - let RackInitializeRequest { - trust_quorum_peers: trust_qurorum_peers, - bootstrap_discovery, - ntp_servers, - dns_servers, - internal_services_ip_pool_ranges, - external_dns_ips, - external_dns_zone_name, - external_certificates: _, - recovery_silo, - rack_network_config, - allowed_source_ips, - } = &self; - - f.debug_struct("RackInitializeRequest") - .field("trust_quorum_peers", trust_qurorum_peers) - .field("bootstrap_discovery", bootstrap_discovery) - .field("ntp_servers", ntp_servers) - .field("dns_servers", dns_servers) - .field( - "internal_services_ip_pool_ranges", - internal_services_ip_pool_ranges, - ) - .field("external_dns_ips", external_dns_ips) - .field("external_dns_zone_name", external_dns_zone_name) - .field("external_certificates", &"") - .field("recovery_silo", recovery_silo) - .field("rack_network_config", rack_network_config) - .field("allowed_source_ips", allowed_source_ips) - .finish() - } -} - -fn validate_external_dns( - dns_ips: &Vec, - internal_ranges: &Vec, -) -> Result<()> { - if dns_ips.is_empty() { - bail!("At least one external DNS IP is required"); - } - - // Every external DNS IP should also be present in one of the internal - // services IP pool ranges. This check is O(N*M), but we expect both N - // and M to be small (~5 DNS servers, and a small number of pools). - for &dns_ip in dns_ips { - if !internal_ranges.iter().any(|range| range.contains(dns_ip)) { - bail!( - "External DNS IP {dns_ip} is not contained in \ - `internal_services_ip_pool_ranges`" - ); - } - } - Ok(()) -} - -impl TryFrom for RackInitializeRequest { - type Error = anyhow::Error; - - fn try_from(value: UnvalidatedRackInitializeRequest) -> Result { - validate_external_dns( - &value.external_dns_ips, - &value.internal_services_ip_pool_ranges, - )?; - - Ok(RackInitializeRequest { - trust_quorum_peers: value.trust_quorum_peers, - bootstrap_discovery: value.bootstrap_discovery, - ntp_servers: value.ntp_servers, - dns_servers: value.dns_servers, - internal_services_ip_pool_ranges: value - .internal_services_ip_pool_ranges, - external_dns_ips: value.external_dns_ips, - external_dns_zone_name: value.external_dns_zone_name, - external_certificates: value.external_certificates, - recovery_silo: value.recovery_silo, - rack_network_config: value.rack_network_config, - allowed_source_ips: value.allowed_source_ips, - }) - } -} - -pub type Certificate = nexus_client::types::Certificate; -pub type RecoverySiloConfig = nexus_client::types::RecoverySiloConfig; - /// A representation of a Baseboard ID as used in the inventory subsystem /// This type is essentially the same as a `Baseboard` except it doesn't have a /// revision or HW type (Gimlet, PC, Unknown). @@ -480,70 +196,11 @@ pub(super) mod version { pub(crate) const V1: u32 = 1; } -#[cfg(test)] -pub fn test_config() -> RackInitializeRequest { - let manifest = std::env::var("CARGO_MANIFEST_DIR") - .expect("Cannot access manifest directory"); - let manifest = camino::Utf8PathBuf::from(manifest); - let path = manifest.join("../smf/sled-agent/non-gimlet/config-rss.toml"); - let contents = std::fs::read_to_string(&path).unwrap(); - toml::from_str(&contents) - .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)) -} - #[cfg(test)] mod tests { - use std::net::Ipv4Addr; use std::net::Ipv6Addr; use super::*; - use camino::Utf8PathBuf; - use oxnet::Ipv6Net; - - #[test] - fn parse_rack_initialization() { - let manifest = std::env::var("CARGO_MANIFEST_DIR") - .expect("Cannot access manifest directory"); - let manifest = Utf8PathBuf::from(manifest); - - let path = - manifest.join("../smf/sled-agent/non-gimlet/config-rss.toml"); - let contents = std::fs::read_to_string(&path).unwrap(); - let _: RackInitializeRequest = toml::from_str(&contents) - .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)); - - let path = manifest - .join("../smf/sled-agent/gimlet-standalone/config-rss.toml"); - let contents = std::fs::read_to_string(&path).unwrap(); - let _: RackInitializeRequest = toml::from_str(&contents) - .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)); - } - - #[test] - fn parse_rack_initialization_weak_hash() { - let config = r#" - bootstrap_discovery.type = "only_ours" - ntp_servers = [ "ntp.eng.oxide.computer" ] - dns_servers = [ "1.1.1.1", "9.9.9.9" ] - external_dns_zone_name = "oxide.test" - - [[internal_services_ip_pool_ranges]] - first = "192.168.1.20" - last = "192.168.1.22" - - [recovery_silo] - silo_name = "recovery" - user_name = "recovery" - user_password_hash = "$argon2i$v=19$m=16,t=2,p=1$NVR0a2QxVXNiQjlObFJXbA$iGFJWOlUqN20B8KR4Fsmrg" - "#; - - let error = toml::from_str::(config) - .expect_err("unexpectedly parsed with bad password hash"); - println!("found error: {}", error); - assert!(error.to_string().contains( - "password hash: algorithm: expected argon2id, found argon2i" - )); - } #[test] fn json_serialization_round_trips() { @@ -600,123 +257,4 @@ mod tests { Ledgerable::deserialize(&serialized).unwrap(); assert_eq!(expected, actual); } - - #[test] - fn validate_external_dns_ips_must_be_in_internal_services_ip_pools() { - // Conjure up a config; we'll tweak the internal services pools and - // external DNS IPs, but no other fields matter. - let mut config = UnvalidatedRackInitializeRequest { - trust_quorum_peers: None, - bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, - ntp_servers: Vec::new(), - dns_servers: Vec::new(), - internal_services_ip_pool_ranges: Vec::new(), - external_dns_ips: Vec::new(), - external_dns_zone_name: "".to_string(), - external_certificates: Vec::new(), - recovery_silo: RecoverySiloConfig { - silo_name: "recovery".parse().unwrap(), - user_name: "recovery".parse().unwrap(), - user_password_hash: "$argon2id$v=19$m=98304,t=13,p=1$RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/ek3GL0el/oProgTwWpHJZ8lsQQoY".parse().unwrap(), - }, - rack_network_config: RackNetworkConfig { - rack_subnet: Ipv6Net::host_net(Ipv6Addr::LOCALHOST), - infra_ip_first: Ipv4Addr::LOCALHOST, - infra_ip_last: Ipv4Addr::LOCALHOST, - ports: Vec::new(), - bgp: Vec::new(), - bfd: Vec::new(), - }, - allowed_source_ips: AllowedSourceIps::Any, - }; - - // Valid configs: all external DNS IPs are contained in the IP pool - // ranges. - for (ip_pool_ranges, dns_ips) in [ - ( - &[("fd00::1", "fd00::10")] as &[(&str, &str)], - &["fd00::1", "fd00::5", "fd00::10"] as &[&str], - ), - ( - &[("192.168.1.10", "192.168.1.20")], - &["192.168.1.10", "192.168.1.15", "192.168.1.20"], - ), - ( - &[("fd00::1", "fd00::10"), ("192.168.1.10", "192.168.1.20")], - &[ - "fd00::1", - "fd00::5", - "fd00::10", - "192.168.1.10", - "192.168.1.15", - "192.168.1.20", - ], - ), - ] { - config.internal_services_ip_pool_ranges = ip_pool_ranges - .iter() - .map(|(a, b)| { - address::IpRange::try_from(( - a.parse::().unwrap(), - b.parse::().unwrap(), - )) - .unwrap() - }) - .collect(); - config.external_dns_ips = - dns_ips.iter().map(|ip| ip.parse().unwrap()).collect(); - - match RackInitializeRequest::try_from(config.clone()) { - Ok(_) => (), - Err(err) => panic!( - "failure on {ip_pool_ranges:?} with DNS IPs {dns_ips:?}: \ - {err}" - ), - } - } - - // Invalid configs: either no DNS IPs, or one or more DNS IPs are not - // contained in the ip pool ranges. - for (ip_pool_ranges, dns_ips) in [ - (&[("fd00::1", "fd00::10")] as &[(&str, &str)], &[] as &[&str]), - (&[("fd00::1", "fd00::10")], &["fd00::1", "fd00::5", "fd00::11"]), - ( - &[("192.168.1.10", "192.168.1.20")], - &["192.168.1.9", "192.168.1.15", "192.168.1.20"], - ), - ( - &[("fd00::1", "fd00::10"), ("192.168.1.10", "192.168.1.20")], - &[ - "fd00::1", - "fd00::5", - "fd00::10", - "192.168.1.10", - "192.168.1.15", - "192.168.1.20", - "192.168.1.21", - ], - ), - ] { - config.internal_services_ip_pool_ranges = ip_pool_ranges - .iter() - .map(|(a, b)| { - address::IpRange::try_from(( - a.parse::().unwrap(), - b.parse::().unwrap(), - )) - .unwrap() - }) - .collect(); - config.external_dns_ips = - dns_ips.iter().map(|ip| ip.parse().unwrap()).collect(); - - match RackInitializeRequest::try_from(config.clone()) { - Ok(_) => panic!( - "unexpected success on {ip_pool_ranges:?} with \ - DNS IPs {dns_ips:?}" - ), - Err(_) => (), - } - } - } } diff --git a/sled-agent/src/bootstrap/rack_ops.rs b/sled-agent/src/bootstrap/rack_ops.rs index 5cfd0b074a..3eb00b419a 100644 --- a/sled-agent/src/bootstrap/rack_ops.rs +++ b/sled-agent/src/bootstrap/rack_ops.rs @@ -4,14 +4,13 @@ //! Internal API for rack-level bootstrap agent operations. -use crate::bootstrap::http_entrypoints::RackOperationStatus; -use crate::bootstrap::params::RackInitializeRequest; use crate::bootstrap::rss_handle::RssHandle; use crate::rack_setup::service::SetupServiceError; use bootstore::schemes::v0 as bootstore; -use schemars::JsonSchema; -use serde::Deserialize; -use serde::Serialize; +use omicron_uuid_kinds::RackInitUuid; +use omicron_uuid_kinds::RackResetUuid; +use sled_agent_types::rack_init::RackInitializeRequest; +use sled_agent_types::rack_ops::RackOperationStatus; use sled_storage::manager::StorageHandle; use slog::Logger; use std::mem; @@ -20,37 +19,6 @@ use std::sync::Arc; use std::sync::Mutex; use tokio::sync::oneshot; use tokio::sync::oneshot::error::TryRecvError; -use uuid::Uuid; - -#[derive( - Debug, - Clone, - Copy, - PartialEq, - Eq, - Hash, - PartialOrd, - Ord, - Serialize, - Deserialize, - JsonSchema, -)] -pub struct RackInitId(pub Uuid); - -#[derive( - Debug, - Clone, - Copy, - PartialEq, - Eq, - Hash, - PartialOrd, - Ord, - Serialize, - Deserialize, - JsonSchema, -)] -pub struct RackResetId(pub Uuid); #[derive(Debug, Clone, thiserror::Error)] pub enum RssAccessError { @@ -174,7 +142,7 @@ impl RssAccess { storage_manager: &StorageHandle, bootstore_node_handle: &bootstore::NodeHandle, request: RackInitializeRequest, - ) -> Result { + ) -> Result { let mut status = self.status.lock().unwrap(); match &*status { @@ -202,7 +170,7 @@ impl RssAccess { } RssStatus::Uninitialized { .. } => { let (completion_tx, completion) = oneshot::channel(); - let id = RackInitId(Uuid::new_v4()); + let id = RackInitUuid::new_v4(); *status = RssStatus::Initializing { id, completion }; mem::drop(status); @@ -240,7 +208,7 @@ impl RssAccess { &self, parent_log: &Logger, global_zone_bootstrap_ip: Ipv6Addr, - ) -> Result { + ) -> Result { let mut status = self.status.lock().unwrap(); match &*status { @@ -267,7 +235,7 @@ impl RssAccess { } RssStatus::Initialized { .. } => { let (completion_tx, completion) = oneshot::channel(); - let id = RackResetId(Uuid::new_v4()); + let id = RackResetUuid::new_v4(); *status = RssStatus::Resetting { id, completion }; mem::drop(status); @@ -302,40 +270,40 @@ enum RssStatus { // We can either be uninitialized on startup (in which case `reset_id` // is None) or because a reset has completed (in which case `reset_id` // is Some). - reset_id: Option, + reset_id: Option, }, Initialized { // We can either be initialized on startup (in which case `id` // is None) or because initialization has completed (in which case `id` // is Some). - id: Option, + id: Option, }, // Tranistory states (which we may be in for a long time, even on human time // scales, but should eventually leave). Initializing { - id: RackInitId, + id: RackInitUuid, completion: oneshot::Receiver<()>, }, Resetting { - id: RackResetId, + id: RackResetUuid, completion: oneshot::Receiver<()>, }, // Terminal failure states; these require support intervention. InitializationFailed { - id: RackInitId, + id: RackInitUuid, err: SetupServiceError, }, InitializationPanicked { - id: RackInitId, + id: RackInitUuid, }, ResetFailed { - id: RackResetId, + id: RackResetUuid, err: SetupServiceError, }, ResetPanicked { - id: RackResetId, + id: RackResetUuid, }, } diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs index 9baf0e7ef3..73f7537853 100644 --- a/sled-agent/src/bootstrap/rss_handle.rs +++ b/sled-agent/src/bootstrap/rss_handle.rs @@ -6,7 +6,6 @@ use super::client as bootstrap_agent_client; use super::params::StartSledAgentRequest; -use crate::rack_setup::config::SetupServiceConfig; use crate::rack_setup::service::RackSetupService; use crate::rack_setup::service::SetupServiceError; use ::bootstrap_agent_client::Client as BootstrapAgentClient; @@ -16,6 +15,7 @@ use futures::StreamExt; use omicron_common::backoff::retry_notify; use omicron_common::backoff::retry_policy_local; use omicron_common::backoff::BackoffError; +use sled_agent_types::rack_init::RackInitializeRequest; use sled_storage::manager::StorageHandle; use slog::Logger; use std::net::Ipv6Addr; @@ -44,7 +44,7 @@ impl RssHandle { /// Executes the rack setup service until it has completed pub(super) async fn run_rss( log: &Logger, - config: SetupServiceConfig, + config: RackInitializeRequest, our_bootstrap_address: Ipv6Addr, storage_manager: StorageHandle, bootstore: bootstore::NodeHandle, diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 369437d3aa..fa1d781a96 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -6,14 +6,11 @@ use super::config::BOOTSTRAP_AGENT_HTTP_PORT; use super::http_entrypoints; -use super::params::RackInitializeRequest; use super::params::StartSledAgentRequest; -use super::rack_ops::RackInitId; use super::views::SledAgentResponse; use super::BootstrapError; use super::RssAccessError; use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT; -use crate::bootstrap::http_entrypoints::api as http_api; use crate::bootstrap::http_entrypoints::BootstrapServerContext; use crate::bootstrap::maghemite; use crate::bootstrap::pre_server::BootstrapAgentStartup; @@ -42,6 +39,8 @@ use omicron_common::ledger; use omicron_common::ledger::Ledger; use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_ddm_admin_client::DdmError; +use omicron_uuid_kinds::RackInitUuid; +use sled_agent_types::rack_init::RackInitializeRequest; use sled_hardware::underlay; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; @@ -290,7 +289,7 @@ impl Server { pub fn start_rack_initialize( &self, request: RackInitializeRequest, - ) -> Result { + ) -> Result { self.bootstrap_http_server.app_private().start_rack_initialize(request) } @@ -501,17 +500,6 @@ async fn sled_config_paths( Ok(paths) } -/// Runs the OpenAPI generator, emitting the spec to stdout. -pub fn run_openapi() -> Result<(), String> { - http_api() - .openapi("Oxide Bootstrap Agent API", "0.0.1") - .description("API for interacting with individual sleds") - .contact_url("https://oxide.computer") - .contact_email("api@oxide.computer") - .write(&mut std::io::stdout()) - .map_err(|e| e.to_string()) -} - struct Inner { config: SledConfig, state: SledAgentState, diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 9c1d5a4e11..ba4fafcb54 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -5,7 +5,6 @@ //! HTTP entrypoint functions for the sled agent's exposed API use super::sled_agent::SledAgent; -use crate::bootstrap::early_networking::EarlyNetworkConfig; use crate::bootstrap::params::AddSledRequest; use crate::params::{ BootstoreStatus, CleanupContextUpdate, DatasetsConfig, DiskEnsureBody, @@ -38,6 +37,7 @@ use omicron_common::api::internal::shared::{ use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_hardware::DiskVariant; use sled_storage::resources::DatasetsManagementResult; use sled_storage::resources::DisksManagementResult; diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 836b030a87..a421bda3a6 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -599,9 +599,6 @@ impl crate::smf_helper::Service for OmicronZoneType { fn smf_name(&self) -> String { format!("svc:/oxide/{}", self.service_name()) } - fn should_import(&self) -> bool { - true - } } impl From for sled_agent_client::types::OmicronZoneType { diff --git a/sled-agent/src/profile.rs b/sled-agent/src/profile.rs index 33e30d1d7b..a30c15acfc 100644 --- a/sled-agent/src/profile.rs +++ b/sled-agent/src/profile.rs @@ -163,6 +163,7 @@ impl Display for ServiceInstanceBuilder { } } +#[derive(Clone)] pub struct PropertyGroupBuilder { name: String, /// names of the properties that were added, in the order they were added @@ -233,7 +234,7 @@ impl Display for PropertyGroupBuilder { if values.len() == 1 { write!( f, - r#" + r#" "#, name = property_name, value = &values[0], @@ -302,7 +303,7 @@ mod tests { - + "#, @@ -384,7 +385,7 @@ mod tests { - + @@ -429,11 +430,11 @@ mod tests { - - + + - + diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs deleted file mode 100644 index 43664cfd04..0000000000 --- a/sled-agent/src/rack_setup/config.rs +++ /dev/null @@ -1,249 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Interfaces for working with RSS config. - -use crate::config::ConfigError; -use camino::Utf8Path; -use omicron_common::address::{ - get_64_subnet, Ipv6Subnet, AZ_PREFIX, RACK_PREFIX, SLED_PREFIX, -}; - -pub use crate::bootstrap::params::back_compat::RackInitializeRequestV1 as SetupServiceConfigV1; -use crate::bootstrap::params::Certificate; -pub use crate::bootstrap::params::RackInitializeRequest as SetupServiceConfig; - -impl SetupServiceConfig { - pub fn from_file>(path: P) -> Result { - let path = path.as_ref(); - let contents = std::fs::read_to_string(&path) - .map_err(|err| ConfigError::Io { path: path.into(), err })?; - let mut raw_config = - SetupServiceConfig::from_toml_with_fallback(&contents) - .map_err(|err| ConfigError::Parse { path: path.into(), err })?; - - // In the same way that sled-agent itself (our caller) discovers the - // optional config-rss.toml in a well-known path relative to its config - // file, we look for a pair of well-known paths adjacent to - // config-rss.toml that specify an extra TLS certificate and private - // key. This is used by the end-to-end tests. Any developer can also - // use this to inject a TLS certificate into their setup. - // (config-rss.toml is only used for dev/test, not production - // deployments, which will always get their RSS configuration from - // Wicket.) - if let Some(parent) = path.parent() { - let cert_path = parent.join("initial-tls-cert.pem"); - let key_path = parent.join("initial-tls-key.pem"); - let cert_bytes = std::fs::read_to_string(&cert_path); - let key_bytes = std::fs::read_to_string(&key_path); - match (cert_bytes, key_bytes) { - (Ok(cert), Ok(key)) => { - raw_config - .external_certificates - .push(Certificate { key, cert }); - } - (Err(cert_error), Err(key_error)) - if cert_error.kind() == std::io::ErrorKind::NotFound - && key_error.kind() == std::io::ErrorKind::NotFound => - { - // Fine. No extra cert was provided. - } - (Err(cert_error), _) => { - return Err(ConfigError::Certificate( - anyhow::Error::new(cert_error).context(format!( - "loading certificate from {:?}", - cert_path - )), - )); - } - (_, Err(key_error)) => { - return Err(ConfigError::Certificate( - anyhow::Error::new(key_error).context(format!( - "loading private key from {:?}", - key_path - )), - )); - } - }; - } - - Ok(raw_config) - } - - pub fn az_subnet(&self) -> Ipv6Subnet { - Ipv6Subnet::::new( - self.rack_network_config.rack_subnet.addr(), - ) - } - - /// Returns the subnet for our rack. - pub fn rack_subnet(&self) -> Ipv6Subnet { - Ipv6Subnet::::new( - self.rack_network_config.rack_subnet.addr(), - ) - } - - /// Returns the subnet for the `index`-th sled in the rack. - pub fn sled_subnet(&self, index: u8) -> Ipv6Subnet { - get_64_subnet(self.rack_subnet(), index) - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::bootstrap::params::BootstrapAddressDiscovery; - use crate::bootstrap::params::RecoverySiloConfig; - use anyhow::Context; - use camino::Utf8PathBuf; - use omicron_common::address::IpRange; - use omicron_common::api::internal::shared::AllowedSourceIps; - use omicron_common::api::internal::shared::RackNetworkConfig; - use oxnet::Ipv6Net; - use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; - - #[test] - fn test_subnets() { - let cfg = SetupServiceConfig { - trust_quorum_peers: None, - bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, - ntp_servers: vec![String::from("test.pool.example.com")], - dns_servers: vec!["1.1.1.1".parse().unwrap()], - external_dns_zone_name: String::from("oxide.test"), - internal_services_ip_pool_ranges: vec![IpRange::from(IpAddr::V4( - Ipv4Addr::new(129, 168, 1, 20), - ))], - external_dns_ips: vec![], - external_certificates: vec![], - recovery_silo: RecoverySiloConfig { - silo_name: "test-silo".parse().unwrap(), - user_name: "dummy".parse().unwrap(), - // This is a hash for the password "oxide". It doesn't matter, - // though; it's not used. - user_password_hash: "$argon2id$v=19$m=98304,t=13,p=1$\ - RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/\ - ek3GL0el/oProgTwWpHJZ8lsQQoY" - .parse() - .unwrap(), - }, - rack_network_config: RackNetworkConfig { - rack_subnet: Ipv6Net::new( - "fd00:1122:3344:0100::".parse().unwrap(), - RACK_PREFIX, - ) - .unwrap(), - infra_ip_first: Ipv4Addr::LOCALHOST, - infra_ip_last: Ipv4Addr::LOCALHOST, - ports: Vec::new(), - bgp: Vec::new(), - bfd: Vec::new(), - }, - allowed_source_ips: AllowedSourceIps::Any, - }; - - assert_eq!( - Ipv6Subnet::::new( - // Masked out in AZ Subnet - // vv - "fd00:1122:3344:0000::".parse::().unwrap(), - ), - cfg.az_subnet() - ); - assert_eq!( - Ipv6Subnet::::new( - // Shows up from Rack Subnet - // vv - "fd00:1122:3344:0100::".parse::().unwrap(), - ), - cfg.rack_subnet() - ); - assert_eq!( - Ipv6Subnet::::new( - // 0th Sled Subnet - // vv - "fd00:1122:3344:0100::".parse::().unwrap(), - ), - cfg.sled_subnet(0) - ); - assert_eq!( - Ipv6Subnet::::new( - // 1st Sled Subnet - // vv - "fd00:1122:3344:0101::".parse::().unwrap(), - ), - cfg.sled_subnet(1) - ); - assert_eq!( - Ipv6Subnet::::new( - // Last Sled Subnet - // vv - "fd00:1122:3344:01ff::".parse::().unwrap(), - ), - cfg.sled_subnet(255) - ); - } - - #[test] - fn test_extra_certs() { - // The stock non-Gimlet config has no TLS certificates. - let path = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("../smf/sled-agent/non-gimlet/config-rss.toml"); - let cfg = SetupServiceConfig::from_file(&path) - .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)); - assert!(cfg.external_certificates.is_empty()); - - // Now let's create a configuration that does have an adjacent - // certificate and key. - let tempdir = - camino_tempfile::tempdir().expect("creating temporary directory"); - println!("using temp path: {:?}", tempdir); - - // Generate the certificate. - let domain = format!( - "{}.sys.{}", - cfg.external_dns_zone_name, - cfg.recovery_silo.silo_name.as_str(), - ); - let cert = rcgen::generate_simple_self_signed(vec![domain.clone()]) - .unwrap_or_else(|error| { - panic!( - "generating certificate for domain {:?}: {}", - domain, error - ) - }); - - // Write the configuration file. - let cfg_path = tempdir.path().join("config-rss.toml"); - let _ = std::fs::copy(&path, &cfg_path) - .with_context(|| { - format!("failed to copy file {:?} to {:?}", &path, &cfg_path) - }) - .unwrap(); - - // Write the certificate. - let cert_bytes = cert - .serialize_pem() - .expect("serializing generated certificate") - .into_bytes(); - let cert_path = tempdir.path().join("initial-tls-cert.pem"); - std::fs::write(&cert_path, &cert_bytes) - .with_context(|| format!("failed to write to {:?}", &cert_path)) - .unwrap(); - - // Write the private key. - let key_path = tempdir.path().join("initial-tls-key.pem"); - let key_bytes = cert.serialize_private_key_pem().into_bytes(); - std::fs::write(&key_path, &key_bytes) - .with_context(|| format!("failed to write to {:?}", &key_path)) - .unwrap(); - - // Now try to load it all. - let read_cfg = SetupServiceConfig::from_file(&cfg_path) - .expect("failed to read generated config with certificate"); - assert_eq!(read_cfg.external_certificates.len(), 1); - let cert = read_cfg.external_certificates.first().unwrap(); - let _ = rcgen::KeyPair::from_pem(&cert.key) - .expect("generated PEM did not parse as KeyPair"); - } -} diff --git a/sled-agent/src/rack_setup/mod.rs b/sled-agent/src/rack_setup/mod.rs index 0ad8e0ce71..0ec14138fc 100644 --- a/sled-agent/src/rack_setup/mod.rs +++ b/sled-agent/src/rack_setup/mod.rs @@ -4,8 +4,6 @@ //! Rack Setup Service -/// Configuration files which automate input to RSS. -pub mod config; mod plan; /// The main implementation of the RSS service. pub mod service; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 9493361d19..d23c6715c6 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -9,7 +9,6 @@ use crate::params::{ OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig, OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, }; -use crate::rack_setup::config::SetupServiceConfig as Config; use camino::Utf8PathBuf; use dns_service_client::types::DnsConfigParams; use illumos_utils::zpool::ZpoolName; @@ -37,6 +36,7 @@ use serde::{Deserialize, Serialize}; use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; +use sled_agent_types::rack_init::RackInitializeRequest as Config; use sled_storage::dataset::{DatasetKind, DatasetName, CONFIG_DATASET}; use sled_storage::manager::StorageHandle; use slog::Logger; @@ -1180,12 +1180,12 @@ impl ServicePortBuilder { #[cfg(test)] mod tests { use super::*; - use crate::bootstrap::params::BootstrapAddressDiscovery; - use crate::bootstrap::params::RecoverySiloConfig; use omicron_common::address::IpRange; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::RackNetworkConfig; use oxnet::Ipv6Net; + use sled_agent_types::rack_init::BootstrapAddressDiscovery; + use sled_agent_types::rack_init::RecoverySiloConfig; const EXPECTED_RESERVED_ADDRESSES: u16 = 2; const EXPECTED_USABLE_ADDRESSES: u16 = diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index c6d2e73ccd..3d5b90a22d 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -8,12 +8,12 @@ use crate::bootstrap::params::StartSledAgentRequestBody; use crate::bootstrap::{ config::BOOTSTRAP_AGENT_RACK_INIT_PORT, params::StartSledAgentRequest, }; -use crate::rack_setup::config::SetupServiceConfig as Config; -use crate::rack_setup::config::SetupServiceConfigV1 as ConfigV1; use camino::Utf8PathBuf; use omicron_common::ledger::{self, Ledger, Ledgerable}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use sled_agent_types::rack_init::back_compat::RackInitializeRequestV1 as ConfigV1; +use sled_agent_types::rack_init::RackInitializeRequest as Config; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; use slog::Logger; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 2d7a355440..c8e56ae9f4 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -64,14 +64,11 @@ //! completing execution, and unconditionally calls the "handoff to Nexus" API //! thereafter. -use super::config::SetupServiceConfig as Config; use super::plan::service::SledConfig; use crate::bootstrap::config::BOOTSTRAP_AGENT_HTTP_PORT; use crate::bootstrap::early_networking::{ - EarlyNetworkConfig, EarlyNetworkConfigBody, EarlyNetworkSetup, - EarlyNetworkSetupError, + EarlyNetworkSetup, EarlyNetworkSetupError, }; -use crate::bootstrap::params::BootstrapAddressDiscovery; use crate::bootstrap::params::StartSledAgentRequest; use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::nexus::{d2n_params, ConvertInto}; @@ -111,6 +108,12 @@ use serde::{Deserialize, Serialize}; use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; +use sled_agent_types::early_networking::{ + EarlyNetworkConfig, EarlyNetworkConfigBody, +}; +use sled_agent_types::rack_init::{ + BootstrapAddressDiscovery, RackInitializeRequest as Config, +}; use sled_hardware_types::underlay::BootstrapInterface; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; @@ -1545,7 +1548,7 @@ impl<'a> OmicronZonesConfigGenerator<'a> { #[cfg(test)] mod test { - use super::OmicronZonesConfigGenerator; + use super::{Config, OmicronZonesConfigGenerator}; use crate::{ params::OmicronZoneType, rack_setup::plan::service::{Plan as ServicePlan, SledInfo}, @@ -1594,7 +1597,7 @@ mod test { } fn make_test_service_plan() -> ServicePlan { - let rss_config = crate::bootstrap::params::test_config(); + let rss_config = Config::test_config(); let fake_sleds = vec![ make_sled_info( SledUuid::new_v4(), diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index f702e4c67d..ec86066096 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -68,7 +68,7 @@ impl Server { let dropshot_config = dropshot::ConfigDropshot { bind_address: SocketAddr::V6(sled_address), - ..config.dropshot + ..config.dropshot.clone() }; let dropshot_log = log.new(o!("component" => "dropshot (SledAgent)")); let http_server = dropshot::HttpServerStarter::new( diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index bbc91eee64..943ff44e06 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -93,7 +93,6 @@ use rand::prelude::SliceRandom; use sled_hardware::is_gimlet; use sled_hardware::underlay; use sled_hardware::SledMode; -use sled_hardware_types::underlay::BOOTSTRAP_PREFIX; use sled_hardware_types::Baseboard; use sled_storage::config::MountConfig; use sled_storage::dataset::{ @@ -119,6 +118,7 @@ use illumos_utils::zone::MockZones as Zones; use illumos_utils::zone::Zones; const IPV6_UNSPECIFIED: IpAddr = IpAddr::V6(Ipv6Addr::UNSPECIFIED); +pub const SWITCH_ZONE_BASEBOARD_FILE: &str = "/opt/oxide/baseboard.json"; #[derive(thiserror::Error, Debug, slog_error_chain::SlogInlineError)] pub enum Error { @@ -514,9 +514,6 @@ impl crate::smf_helper::Service for SwitchService { fn smf_name(&self) -> String { format!("svc:/oxide/{}", self.service_name()) } - fn should_import(&self) -> bool { - true - } } /// Combines the generic `SwitchZoneConfig` with other locally-determined @@ -1341,19 +1338,33 @@ impl ServiceManager { } fn zone_network_setup_install( - gw_addr: &Ipv6Addr, + gw_addr: Option<&Ipv6Addr>, zone: &InstalledZone, - static_addr: &Ipv6Addr, + static_addrs: &[Ipv6Addr], ) -> Result { let datalink = zone.get_control_vnic_name(); - let gateway = &gw_addr.to_string(); - let static_addr = &static_addr.to_string(); let mut config_builder = PropertyGroupBuilder::new("config"); - config_builder = config_builder - .add_property("datalink", "astring", datalink) - .add_property("gateway", "astring", gateway) - .add_property("static_addr", "astring", static_addr); + config_builder = + config_builder.add_property("datalink", "astring", datalink); + + // The switch zone is the only zone that will sometimes have an + // unknown underlay address at zone boot on the first scrimlet. + if let Some(gateway) = gw_addr { + config_builder = config_builder.add_property( + "gateway", + "astring", + gateway.to_string(), + ); + } + + for s in static_addrs { + config_builder = config_builder.add_property( + "static_addr", + "astring", + &s.to_string(), + ); + } Ok(ServiceBuilder::new("oxide/zone-network-setup") .add_property_group(config_builder) @@ -1514,13 +1525,13 @@ impl ServiceManager { return Err(Error::SledAgentNotReady); }; - let listen_addr = underlay_address; + let listen_addr = *underlay_address; let listen_port = &CLICKHOUSE_PORT.to_string(); let nw_setup_service = Self::zone_network_setup_install( - &info.underlay_address, + Some(&info.underlay_address), &installed_zone, - listen_addr, + &[listen_addr], )?; let dns_service = Self::dns_install(info, None, &None).await?; @@ -1567,13 +1578,13 @@ impl ServiceManager { return Err(Error::SledAgentNotReady); }; - let listen_addr = underlay_address; + let listen_addr = *underlay_address; let listen_port = &CLICKHOUSE_KEEPER_PORT.to_string(); let nw_setup_service = Self::zone_network_setup_install( - &info.underlay_address, + Some(&info.underlay_address), &installed_zone, - listen_addr, + &[listen_addr], )?; let dns_service = Self::dns_install(info, None, &None).await?; @@ -1635,9 +1646,9 @@ impl ServiceManager { .to_string(); let nw_setup_service = Self::zone_network_setup_install( - &info.underlay_address, + Some(&info.underlay_address), &installed_zone, - &crdb_listen_ip, + &[crdb_listen_ip], )?; let dns_service = Self::dns_install(info, None, &None).await?; @@ -1696,13 +1707,13 @@ impl ServiceManager { let Some(info) = self.inner.sled_info.get() else { return Err(Error::SledAgentNotReady); }; - let listen_addr = &underlay_address; + let listen_addr = *underlay_address; let listen_port = &CRUCIBLE_PORT.to_string(); let nw_setup_service = Self::zone_network_setup_install( - &info.underlay_address, + Some(&info.underlay_address), &installed_zone, - listen_addr, + &[listen_addr], )?; let dataset_name = DatasetName::new( @@ -1755,13 +1766,13 @@ impl ServiceManager { return Err(Error::SledAgentNotReady); }; - let listen_addr = &underlay_address; + let listen_addr = *underlay_address; let listen_port = &CRUCIBLE_PANTRY_PORT.to_string(); let nw_setup_service = Self::zone_network_setup_install( - &info.underlay_address, + Some(&info.underlay_address), &installed_zone, - listen_addr, + &[listen_addr], )?; let config = PropertyGroupBuilder::new("config") @@ -1811,9 +1822,9 @@ impl ServiceManager { ); let nw_setup_service = Self::zone_network_setup_install( - &info.underlay_address, + Some(&info.underlay_address), &installed_zone, - underlay_address, + &[*underlay_address], )?; let oximeter_config = PropertyGroupBuilder::new("config") @@ -1852,11 +1863,10 @@ impl ServiceManager { }; let nw_setup_service = Self::zone_network_setup_install( - &info.underlay_address, + Some(&info.underlay_address), &installed_zone, - underlay_address, + &[*underlay_address], )?; - // Like Nexus, we need to be reachable externally via // `dns_address` but we don't listen on that address // directly but instead on a VPC private IP. OPTE will @@ -1940,9 +1950,9 @@ impl ServiceManager { }; let nw_setup_service = Self::zone_network_setup_install( - &info.underlay_address, + Some(&info.underlay_address), &installed_zone, - underlay_address, + &[*underlay_address], )?; let is_boundary = matches!( @@ -2033,9 +2043,9 @@ impl ServiceManager { .. }) => { let nw_setup_service = Self::zone_network_setup_install( - gz_address, + Some(gz_address), &installed_zone, - underlay_address, + &[*underlay_address], )?; // Internal DNS zones require a special route through @@ -2117,9 +2127,9 @@ impl ServiceManager { }; let nw_setup_service = Self::zone_network_setup_install( - &info.underlay_address, + Some(&info.underlay_address), &installed_zone, - underlay_address, + &[*underlay_address], )?; // While Nexus will be reachable via `external_ip`, it @@ -2160,6 +2170,7 @@ impl ServiceManager { request_body_max_bytes: 8192 * 1024, default_handler_task_mode: HandlerTaskMode::Detached, + log_headers: vec![], }, }, dropshot_internal: dropshot::ConfigDropshot { @@ -2170,6 +2181,7 @@ impl ServiceManager { // rack setup. request_body_max_bytes: 10 * 1024 * 1024, default_handler_task_mode: HandlerTaskMode::Detached, + log_headers: vec![], }, internal_dns: nexus_config::InternalDns::FromSubnet { subnet: Ipv6Subnet::::new( @@ -2242,191 +2254,134 @@ impl ServiceManager { })?; return Ok(RunningZone::boot(installed_zone).await?); } - _ => {} - } - - let running_zone = RunningZone::boot(installed_zone).await?; - - for (link, needs_link_local) in - running_zone.links().iter().zip(links_need_link_local) - { - if needs_link_local { - info!( - self.inner.log, - "Ensuring {}/{} exists in zone", - link.name(), - IPV6_LINK_LOCAL_NAME - ); - Zones::ensure_has_link_local_v6_address( - Some(running_zone.name()), - &AddrObject::new(link.name(), IPV6_LINK_LOCAL_NAME) - .unwrap(), - )?; - } - } - - if let Some((bootstrap_name, bootstrap_address)) = - bootstrap_name_and_address.as_ref() - { - info!( - self.inner.log, - "Ensuring bootstrap address {} exists in {} zone", - bootstrap_address.to_string(), - &zone_type_str, - ); - running_zone.ensure_bootstrap_address(*bootstrap_address).await?; - info!( - self.inner.log, - "Forwarding bootstrap traffic via {} to {}", - bootstrap_name, - self.inner.global_zone_bootstrap_link_local_address, - ); - running_zone - .add_bootstrap_route( - BOOTSTRAP_PREFIX, - self.inner.global_zone_bootstrap_link_local_address, - bootstrap_name, - ) - .map_err(|err| Error::ZoneCommand { - intent: "add bootstrap network route".to_string(), - err, - })?; - } - - let addresses = match &request { - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: OmicronZoneConfig { underlay_address, .. }, + ZoneArgs::Switch(SwitchZoneConfigLocal { + zone: SwitchZoneConfig { id, services, addresses }, .. - }) => std::slice::from_ref(underlay_address), - ZoneArgs::Switch(req) => &req.zone.addresses, - }; - for addr in addresses { - if *addr == Ipv6Addr::LOCALHOST { - continue; - } - info!( - self.inner.log, - "Ensuring address {} exists", - addr.to_string() - ); - let addr_request = - AddressRequest::new_static(IpAddr::V6(*addr), None); - running_zone.ensure_address(addr_request).await?; - info!( - self.inner.log, - "Ensuring address {} exists - OK", - addr.to_string() - ); - } + }) => { + let info = self.inner.sled_info.get(); - let maybe_gateway = if let Some(info) = self.inner.sled_info.get() { - // Only consider a route to the sled's underlay address if the - // underlay is up. - let sled_underlay_subnet = - Ipv6Subnet::::new(info.underlay_address); + let gw_addr = match info { + Some(i) => Some(&i.underlay_address), + None => None, + }; - if addresses - .iter() - .any(|ip| sled_underlay_subnet.net().contains(*ip)) - { - // If the underlay is up, provide a route to it through an - // existing address in the Zone on the same subnet. - info!(self.inner.log, "Zone using sled underlay as gateway"); - Some(info.underlay_address) - } else { - // If no such address exists in the sled's subnet, don't route - // to anything. - info!( - self.inner.log, - "Zone not using gateway (even though underlay is up)" - ); - None - } - } else { - // If the underlay doesn't exist, no routing occurs. - info!( - self.inner.log, - "Zone not using gateway (underlay is not up)" - ); - None - }; + let nw_setup_service = Self::zone_network_setup_install( + gw_addr, + &installed_zone, + addresses, + )?; - let sidecar_revision = match &self.inner.sidecar_revision { - SidecarRevision::Physical(rev) => rev.to_string(), - SidecarRevision::SoftZone(rev) - | SidecarRevision::SoftPropolis(rev) => format!( - "softnpu_front_{}_rear_{}", - rev.front_port_count, rev.rear_port_count - ), - }; + let sidecar_revision = match &self.inner.sidecar_revision { + SidecarRevision::Physical(rev) => rev.to_string(), + SidecarRevision::SoftZone(rev) + | SidecarRevision::SoftPropolis(rev) => format!( + "softnpu_front_{}_rear_{}", + rev.front_port_count, rev.rear_port_count + ), + }; - if let Some(gateway) = maybe_gateway { - running_zone.add_default_route(gateway).map_err(|err| { - Error::ZoneCommand { intent: "Adding Route".to_string(), err } - })?; - } + // Define all services in the switch zone + let mut mgs_service = ServiceBuilder::new("oxide/mgs"); + let mut wicketd_service = ServiceBuilder::new("oxide/wicketd"); + let mut switch_zone_setup_service = + ServiceBuilder::new("oxide/switch_zone_setup"); + let mut dendrite_service = + ServiceBuilder::new("oxide/dendrite"); + let mut tfport_service = ServiceBuilder::new("oxide/tfport"); + let mut lldpd_service = ServiceBuilder::new("oxide/lldpd"); + let mut pumpkind_service = + ServiceBuilder::new("oxide/pumpkind"); + let mut mgd_service = ServiceBuilder::new("oxide/mgd"); + let mut mg_ddm_service = ServiceBuilder::new("oxide/mg-ddm"); + let mut uplink_service = ServiceBuilder::new("oxide/uplink"); + + let mut switch_zone_setup_config = + PropertyGroupBuilder::new("config").add_property( + "gz_local_link_addr", + "astring", + &format!( + "{}", + self.inner.global_zone_bootstrap_link_local_address + ), + ); - match &request { - ZoneArgs::Omicron(zone_config) => { - match &zone_config.zone.zone_type { - OmicronZoneType::BoundaryNtp { .. } - | OmicronZoneType::Clickhouse { .. } - | OmicronZoneType::ClickhouseKeeper { .. } - | OmicronZoneType::CockroachDb { .. } - | OmicronZoneType::Crucible { .. } - | OmicronZoneType::CruciblePantry { .. } - | OmicronZoneType::ExternalDns { .. } - | OmicronZoneType::InternalDns { .. } - | OmicronZoneType::InternalNtp { .. } - | OmicronZoneType::Nexus { .. } - | OmicronZoneType::Oximeter { .. } => { - panic!( - "{} is a service which exists as part of a \ - self-assembling zone", - &zone_config.zone.zone_type.zone_type_str(), - ) + for (link, needs_link_local) in + installed_zone.links().iter().zip(links_need_link_local) + { + if needs_link_local { + switch_zone_setup_config = switch_zone_setup_config + .add_property( + "link_local_links", + "astring", + link.name(), + ); } - }; - } - ZoneArgs::Switch(request) => { - for service in &request.zone.services { - // TODO: Related to - // https://github.com/oxidecomputer/omicron/pull/1124 , should we - // avoid importing this manifest? - debug!(self.inner.log, "importing manifest"); + } - let smfh = SmfHelper::new(&running_zone, service); - smfh.import_manifest()?; + if let Some((bootstrap_name, bootstrap_address)) = + bootstrap_name_and_address.as_ref() + { + switch_zone_setup_config = switch_zone_setup_config + .add_property( + "link_local_links", + "astring", + bootstrap_name, + ) + .add_property( + "bootstrap_addr", + "astring", + &format!("{bootstrap_address}"), + ) + .add_property( + "bootstrap_vnic", + "astring", + bootstrap_name, + ); + } + // Set properties for each service + for service in services { match service { SwitchService::ManagementGatewayService => { info!(self.inner.log, "Setting up MGS service"); - smfh.setprop("config/id", request.zone.id)?; - - // Always tell MGS to listen on localhost so wicketd - // can contact it even before we have an underlay - // network. - smfh.addpropvalue( - "config/address", - &format!("[::1]:{MGS_PORT}"), - )?; + let mut mgs_config = + PropertyGroupBuilder::new("config") + // Always tell MGS to listen on localhost so wicketd + // can contact it even before we have an underlay + // network. + .add_property( + "address", + "astring", + &format!("[::1]:{MGS_PORT}"), + ) + .add_property( + "id", + "astring", + &id.to_string(), + ); + + if let Some(i) = info { + mgs_config = mgs_config.add_property( + "rack_id", + "astring", + &i.rack_id.to_string(), + ); + } - if let Some(address) = request.zone.addresses.get(0) - { + if let Some(address) = addresses.get(0) { // Don't use localhost twice if *address != Ipv6Addr::LOCALHOST { - smfh.addpropvalue( - "config/address", + mgs_config = mgs_config.add_property( + "address", + "astring", &format!("[{address}]:{MGS_PORT}"), - )?; + ); } } - - if let Some(info) = self.inner.sled_info.get() { - smfh.setprop("config/rack_id", info.rack_id)?; - } - - smfh.refresh()?; + mgs_service = mgs_service.add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(mgs_config), + ); } SwitchService::SpSim => { info!( @@ -2436,12 +2391,6 @@ impl ServiceManager { } SwitchService::Wicketd { baseboard } => { info!(self.inner.log, "Setting up wicketd service"); - - smfh.setprop( - "config/address", - &format!("[::1]:{WICKETD_PORT}"), - )?; - // If we're launching the switch zone, we'll have a // bootstrap_address based on our call to // `self.bootstrap_address_needed` (which always @@ -2462,103 +2411,111 @@ impl ServiceManager { .to_string(), }); }; - smfh.setprop( - "config/artifact-address", - &format!( - "[{bootstrap_address}]:{BOOTSTRAP_ARTIFACT_PORT}" - ), - )?; - smfh.setprop( - "config/mgs-address", - &format!("[::1]:{MGS_PORT}"), - )?; - - // We intentionally bind `nexus-proxy-address` to - // `::` so wicketd will serve this on all - // interfaces, particularly the tech port - // interfaces, allowing external clients to connect - // to this Nexus proxy. - smfh.setprop( - "config/nexus-proxy-address", - &format!("[::]:{WICKETD_NEXUS_PROXY_PORT}"), - )?; - if let Some(underlay_address) = self - .inner - .sled_info - .get() - .map(|info| info.underlay_address) - { + let mut wicketd_config = + PropertyGroupBuilder::new("config") + .add_property( + "address", + "astring", + &format!("[::1]:{WICKETD_PORT}"), + ) + .add_property( + "artifact-address", + "astring", + &format!("[{bootstrap_address}]:{BOOTSTRAP_ARTIFACT_PORT}"), + ) + .add_property( + "baseboard-file", + "astring", + SWITCH_ZONE_BASEBOARD_FILE, + ) + .add_property( + "mgs-address", + "astring", + &format!("[::1]:{MGS_PORT}"), + ) + // We intentionally bind `nexus-proxy-address` to + // `::` so wicketd will serve this on all + // interfaces, particularly the tech port + // interfaces, allowing external clients to connect + // to this Nexus proxy. + .add_property( + "nexus-proxy-address", + "astring", + &format!("[::]:{WICKETD_NEXUS_PROXY_PORT}"), + ); + + if let Some(i) = info { let rack_subnet = Ipv6Subnet::::new( - underlay_address, + i.underlay_address, ); - smfh.setprop( - "config/rack-subnet", + + wicketd_config = wicketd_config.add_property( + "rack-subnet", + "astring", &rack_subnet.net().addr().to_string(), - )?; + ); } - let serialized_baseboard = - serde_json::to_string_pretty(&baseboard)?; - let serialized_baseboard_path = - Utf8PathBuf::from(format!( - "{}/opt/oxide/baseboard.json", - running_zone.root() - )); - tokio::fs::write( - &serialized_baseboard_path, - &serialized_baseboard, - ) - .await - .map_err(|err| { - Error::io_path(&serialized_baseboard_path, err) - })?; - smfh.setprop( - "config/baseboard-file", - String::from("/opt/oxide/baseboard.json"), - )?; + wicketd_service = wicketd_service.add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(wicketd_config), + ); - smfh.refresh()?; + let baseboard_info = + serde_json::to_string(&baseboard)?; + + switch_zone_setup_config = + switch_zone_setup_config.clone().add_property( + "baseboard_info", + "astring", + &baseboard_info, + ); } SwitchService::Dendrite { asic } => { info!( self.inner.log, "Setting up dendrite service" ); - - if let Some(info) = self.inner.sled_info.get() { - smfh.setprop("config/rack_id", info.rack_id)?; - smfh.setprop( - "config/sled_id", - info.config.sled_id, - )?; - } else { - info!( - self.inner.log, - "no rack_id/sled_id available yet" - ); + let mut dendrite_config = + PropertyGroupBuilder::new("config"); + + if let Some(i) = info { + dendrite_config = dendrite_config + .add_property( + "sled_id", + "astring", + &i.config.sled_id.to_string(), + ) + .add_property( + "rack_id", + "astring", + &i.rack_id.to_string(), + ); } - smfh.delpropvalue("config/address", "*")?; - smfh.delpropvalue("config/dns_server", "*")?; - for address in &request.zone.addresses { - smfh.addpropvalue( - "config/address", + for address in addresses { + dendrite_config = dendrite_config.add_property( + "address", + "astring", &format!("[{}]:{}", address, DENDRITE_PORT), - )?; + ); if *address != Ipv6Addr::LOCALHOST { let az_prefix = Ipv6Subnet::::new(*address); for addr in Resolver::servers_from_subnet(az_prefix) { - smfh.addpropvalue( - "config/dns_server", - &format!("{addr}"), - )?; + dendrite_config = dendrite_config + .add_property( + "dns_server", + "astring", + &format!("{addr}"), + ); } } } + match asic { DendriteAsic::TofinoAsic => { // There should be exactly one device_name @@ -2566,43 +2523,43 @@ impl ServiceManager { // for the tofino ASIC. let dev_cnt = device_names.len(); if dev_cnt == 1 { - smfh.setprop( - "config/dev_path", - device_names[0].clone(), - )?; + dendrite_config = dendrite_config + .add_property( + "dev_path", + "astring", + &device_names[0].clone(), + ); } else { return Err(Error::SledLocalZone( anyhow::anyhow!( "{dev_cnt} devices needed \ - for tofino asic" + for tofino asic" ), )); } - smfh.setprop( - "config/port_config", - "/opt/oxide/dendrite/misc/sidecar_config.toml", - )?; - smfh.setprop("config/board_rev", &sidecar_revision)?; + dendrite_config = dendrite_config + .add_property( + "port_config", + "astring", + "/opt/oxide/dendrite/misc/sidecar_config.toml", + ) + .add_property("board_rev", "astring", &sidecar_revision); + } + DendriteAsic::TofinoStub => { + dendrite_config = dendrite_config + .add_property( + "port_config", + "astring", + "/opt/oxide/dendrite/misc/model_config.toml", + ); } - DendriteAsic::TofinoStub => smfh.setprop( - "config/port_config", - "/opt/oxide/dendrite/misc/model_config.toml", - )?, asic @ (DendriteAsic::SoftNpuZone | DendriteAsic::SoftNpuPropolisDevice) => { - if asic == &DendriteAsic::SoftNpuZone { - smfh.setprop("config/mgmt", "uds")?; - smfh.setprop( - "config/uds_path", - "/opt/softnpu/stuff", - )?; - } - if asic == &DendriteAsic::SoftNpuPropolisDevice { - smfh.setprop("config/mgmt", "uart")?; - } let s = match self.inner.sidecar_revision { SidecarRevision::SoftZone(ref s) => s, - SidecarRevision::SoftPropolis(ref s) => s, + SidecarRevision::SoftPropolis( + ref s, + ) => s, _ => { return Err(Error::SidecarRevision( anyhow::anyhow!( @@ -2612,24 +2569,66 @@ impl ServiceManager { )) } }; - smfh.setprop( - "config/front_ports", - &s.front_port_count.to_string(), - )?; - smfh.setprop( - "config/rear_ports", - &s.rear_port_count.to_string(), - )?; - smfh.setprop( - "config/port_config", - "/opt/oxide/dendrite/misc/softnpu_single_sled_config.toml", - )? + + dendrite_config = dendrite_config + .add_property( + "front_ports", + "astring", + &s.front_port_count.to_string(), + ) + .add_property( + "rear_ports", + "astring", + &s.rear_port_count.to_string(), + ) + .add_property( + "port_config", + "astring", + "/opt/oxide/dendrite/misc/softnpu_single_sled_config.toml", + ); + + if asic == &DendriteAsic::SoftNpuZone { + dendrite_config = dendrite_config + .add_property( + "mgmt", "astring", "uds", + ) + .add_property( + "uds_path", + "astring", + "/opt/softnpu/stuff", + ); + } + + if asic + == &DendriteAsic::SoftNpuPropolisDevice + { + dendrite_config = dendrite_config + .add_property( + "mgmt", "astring", "uart", + ); + } } - }; - smfh.refresh()?; + } + + dendrite_service = dendrite_service.add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(dendrite_config), + ); } SwitchService::Tfport { pkt_source, asic } => { info!(self.inner.log, "Setting up tfport service"); + let mut tfport_config = + PropertyGroupBuilder::new("config") + .add_property( + "host", + "astring", + &format!("[{}]", Ipv6Addr::LOCALHOST), + ) + .add_property( + "port", + "astring", + &format!("{}", DENDRITE_PORT), + ); let is_gimlet = is_gimlet().map_err(|e| { Error::Underlay( @@ -2660,62 +2659,80 @@ impl ServiceManager { // Each `prefix` is an `Ipv6Subnet` // including a netmask. Stringify just the // network address, without the mask. - smfh.setprop( - format!("config/techport{i}_prefix"), - prefix.net().addr(), - )?; + tfport_config = tfport_config.add_property( + &format!("techport{i}_prefix"), + "astring", + prefix.net().addr().to_string(), + ) } - smfh.setprop("config/pkt_source", pkt_source)?; - } + }; + + if is_gimlet + || asic == &DendriteAsic::SoftNpuPropolisDevice + { + tfport_config = tfport_config.add_property( + "pkt_source", + "astring", + pkt_source, + ); + }; + if asic == &DendriteAsic::SoftNpuZone { - smfh.setprop("config/flags", "--sync-only")?; - } - if asic == &DendriteAsic::SoftNpuPropolisDevice { - smfh.setprop("config/pkt_source", pkt_source)?; + tfport_config = tfport_config.add_property( + "flags", + "astring", + "--sync-only", + ); } - smfh.setprop( - "config/host", - &format!("[{}]", Ipv6Addr::LOCALHOST), - )?; - smfh.setprop( - "config/port", - &format!("{}", DENDRITE_PORT), - )?; - smfh.refresh()?; + tfport_service = tfport_service.add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(tfport_config), + ); } SwitchService::Lldpd { baseboard } => { info!(self.inner.log, "Setting up lldpd service"); + let mut lldpd_config = + PropertyGroupBuilder::new("config") + .add_property( + "board_rev", + "astring", + &sidecar_revision, + ); + match baseboard { Baseboard::Gimlet { identifier, model, .. } | Baseboard::Pc { identifier, model, .. } => { - smfh.setprop( - "config/scrimlet_id", - identifier, - )?; - smfh.setprop( - "config/scrimlet_model", - model, - )?; + lldpd_config = lldpd_config + .add_property( + "scrimlet_id", + "astring", + identifier, + ) + .add_property( + "scrimlet_model", + "astring", + model, + ); } Baseboard::Unknown => {} } - smfh.setprop( - "config/board_rev", - &sidecar_revision, - )?; - smfh.delpropvalue("config/address", "*")?; - for address in &request.zone.addresses { - smfh.addpropvalue( - "config/address", + for address in addresses { + lldpd_config = lldpd_config.add_property( + "address", + "astring", &format!("[{}]:{}", address, LLDP_PORT), - )?; + ); } - smfh.refresh()?; + + lldpd_service = lldpd_service.add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(lldpd_config), + ); } SwitchService::Pumpkind { asic } => { // The pumpkin daemon is only needed when running on @@ -2725,64 +2742,114 @@ impl ServiceManager { self.inner.log, "Setting up pumpkind service" ); - smfh.setprop("config/mode", "switch")?; - smfh.refresh()?; + let pumpkind_config = + PropertyGroupBuilder::new("config") + .add_property( + "mode", "astring", "switch", + ); + + pumpkind_service = pumpkind_service + .add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group( + pumpkind_config, + ), + ); + } else { + pumpkind_service = pumpkind_service + .add_instance( + ServiceInstanceBuilder::new("default") + .disable(), + ); } } SwitchService::Uplink => { // Nothing to do here - this service is special and // configured in // `ensure_switch_zone_uplinks_configured` + uplink_service = uplink_service.add_instance( + ServiceInstanceBuilder::new("default"), + ); } SwitchService::Mgd => { info!(self.inner.log, "Setting up mgd service"); - smfh.delpropvalue("config/dns_servers", "*")?; - if let Some(info) = self.inner.sled_info.get() { - smfh.setprop("config/rack_uuid", info.rack_id)?; - smfh.setprop( - "config/sled_uuid", - info.config.sled_id, - )?; + + let mut mgd_config = + PropertyGroupBuilder::new("config"); + + if let Some(i) = info { + mgd_config = mgd_config + .add_property( + "sled_uuid", + "astring", + &i.config.sled_id.to_string(), + ) + .add_property( + "rack_uuid", + "astring", + &i.rack_id.to_string(), + ); } - for address in &request.zone.addresses { + + for address in addresses { if *address != Ipv6Addr::LOCALHOST { let az_prefix = Ipv6Subnet::::new(*address); for addr in Resolver::servers_from_subnet(az_prefix) { - smfh.addpropvalue( - "config/dns_servers", + mgd_config = mgd_config.add_property( + "dns_servers", + "astring", &format!("{addr}"), - )?; + ); } break; } } - smfh.refresh()?; + + mgd_service = mgd_service.add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(mgd_config), + ); } SwitchService::MgDdm { mode } => { info!(self.inner.log, "Setting up mg-ddm service"); - smfh.setprop("config/mode", &mode)?; - if let Some(info) = self.inner.sled_info.get() { - smfh.setprop("config/rack_uuid", info.rack_id)?; - smfh.setprop( - "config/sled_uuid", - info.config.sled_id, - )?; + + let mut mg_ddm_config = + PropertyGroupBuilder::new("config") + .add_property("mode", "astring", mode) + .add_property( + "dendrite", "astring", "true", + ); + + if let Some(i) = info { + mg_ddm_config = mg_ddm_config + .add_property( + "sled_uuid", + "astring", + &i.config.sled_id.to_string(), + ) + .add_property( + "rack_uuid", + "astring", + &i.rack_id.to_string(), + ); } - smfh.delpropvalue("config/dns_servers", "*")?; - for address in &request.zone.addresses { + + for address in addresses { if *address != Ipv6Addr::LOCALHOST { let az_prefix = Ipv6Subnet::::new(*address); for addr in Resolver::servers_from_subnet(az_prefix) { - smfh.addpropvalue( - "config/dns_servers", - &format!("{addr}"), - )?; + mg_ddm_config = mg_ddm_config + .add_property( + "dns_servers", + "astring", + &format!("{addr}"), + ); } break; } @@ -2831,45 +2898,62 @@ impl ServiceManager { .collect() }; - smfh.setprop( - "config/interfaces", - // `svccfg setprop` requires a list of values to - // be enclosed in `()`, and each string value to - // be enclosed in `""`. Note that we do _not_ - // need to escape the parentheses, since this is - // not passed through a shell, but directly to - // `exec(2)` in the zone. - format!( - "({})", - maghemite_interfaces - .iter() - .map(|interface| format!( - r#""{}""#, - interface - )) - .join(" "), - ), - )?; + for i in maghemite_interfaces { + mg_ddm_config = mg_ddm_config.add_property( + "interfaces", + "astring", + &i.to_string(), + ); + } if is_gimlet { - // Ddm for a scrimlet needs to be configured to - // talk to dendrite - smfh.setprop("config/dpd_host", "[::1]")?; - smfh.setprop("config/dpd_port", DENDRITE_PORT)?; + mg_ddm_config = mg_ddm_config + .add_property( + "dpd_host", "astring", "[::1]", + ) + .add_property( + "dpd_port", + "astring", + &DENDRITE_PORT.to_string(), + ) } - smfh.setprop("config/dendrite", "true")?; - smfh.refresh()?; + mg_ddm_service = mg_ddm_service.add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(mg_ddm_config), + ); } } - - debug!(self.inner.log, "enabling service"); - smfh.enable()?; } - } - }; - Ok(running_zone) + switch_zone_setup_service = switch_zone_setup_service + .add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(switch_zone_setup_config), + ); + + let profile = ProfileBuilder::new("omicron") + .add_service(nw_setup_service) + .add_service(disabled_dns_client_service) + .add_service(mgs_service) + .add_service(wicketd_service) + .add_service(switch_zone_setup_service) + .add_service(dendrite_service) + .add_service(tfport_service) + .add_service(lldpd_service) + .add_service(pumpkind_service) + .add_service(mgd_service) + .add_service(mg_ddm_service) + .add_service(uplink_service); + profile + .add_to_zone(&self.inner.log, &installed_zone) + .await + .map_err(|err| { + Error::io("Failed to setup Switch zone profile", err) + })?; + return Ok(RunningZone::boot(installed_zone).await?); + } + } } // Ensures that a single Omicron zone is running. @@ -3737,6 +3821,7 @@ impl ServiceManager { } }; + info!(self.inner.log, "Setting up uplinkd service"); let smfh = SmfHelper::new(&zone, &SwitchService::Uplink); // We want to delete all the properties in the `uplinks` group, but we @@ -3747,6 +3832,7 @@ impl ServiceManager { for port_config in &our_ports { for addr in &port_config.addrs { + info!(self.inner.log, "configuring port: {port_config:?}"); smfh.addpropvalue_type( &format!("uplinks/{}_0", port_config.port,), &addr.to_string(), @@ -3865,13 +3951,33 @@ impl ServiceManager { ); } + // When the request addresses have changed this means the underlay is + // available now as well. if let Some(info) = self.inner.sled_info.get() { - zone.add_default_route(info.underlay_address).map_err( + info!( + self.inner.log, + "Ensuring there is a default route"; + "gateway" => ?info.underlay_address, + ); + match zone.add_default_route(info.underlay_address).map_err( |err| Error::ZoneCommand { intent: "Adding Route".to_string(), err, }, - )?; + ) { + Ok(_) => (), + Err(e) => { + if e.to_string().contains("entry exists") { + info!( + self.inner.log, + "Default route already exists"; + "gateway" => ?info.underlay_address, + ) + } else { + return Err(e); + } + } + }; } for service in &request.services { @@ -3879,28 +3985,37 @@ impl ServiceManager { match service { SwitchService::ManagementGatewayService => { + info!(self.inner.log, "configuring MGS service"); // Remove any existing `config/address` values // without deleting the property itself. - smfh.delpropvalue("config/address", "*")?; + smfh.delpropvalue_default_instance( + "config/address", + "*", + )?; // Restore the localhost address that we always add // when setting up MGS. - smfh.addpropvalue( + smfh.addpropvalue_type_default_instance( "config/address", &format!("[::1]:{MGS_PORT}"), + "astring", )?; // Add the underlay address. - smfh.addpropvalue( + smfh.addpropvalue_type_default_instance( "config/address", &format!("[{address}]:{MGS_PORT}"), + "astring", )?; // It should be impossible for the `sled_info` not // to be set here, as the underlay is set at the // same time. if let Some(info) = self.inner.sled_info.get() { - smfh.setprop("config/rack_id", info.rack_id)?; + smfh.setprop_default_instance( + "config/rack_id", + info.rack_id, + )?; } else { error!( self.inner.log, @@ -3912,6 +4027,10 @@ impl ServiceManager { } smfh.refresh()?; + info!( + self.inner.log, + "refreshed MGS service with new configuration" + ) } SwitchService::Dendrite { .. } => { info!( @@ -3919,8 +4038,11 @@ impl ServiceManager { "configuring dendrite service" ); if let Some(info) = self.inner.sled_info.get() { - smfh.setprop("config/rack_id", info.rack_id)?; - smfh.setprop( + smfh.setprop_default_instance( + "config/rack_id", + info.rack_id, + )?; + smfh.setprop_default_instance( "config/sled_id", info.config.sled_id, )?; @@ -3930,12 +4052,19 @@ impl ServiceManager { "no rack_id/sled_id available yet" ); } - smfh.delpropvalue("config/address", "*")?; - smfh.delpropvalue("config/dns_server", "*")?; + smfh.delpropvalue_default_instance( + "config/address", + "*", + )?; + smfh.delpropvalue_default_instance( + "config/dns_server", + "*", + )?; for address in &request.addresses { - smfh.addpropvalue( + smfh.addpropvalue_type_default_instance( "config/address", &format!("[{}]:{}", address, DENDRITE_PORT), + "astring", )?; if *address != Ipv6Addr::LOCALHOST { let az_prefix = @@ -3943,14 +4072,16 @@ impl ServiceManager { for addr in Resolver::servers_from_subnet(az_prefix) { - smfh.addpropvalue( + smfh.addpropvalue_type_default_instance( "config/dns_server", &format!("{addr}"), + "astring", )?; } } } smfh.refresh()?; + info!(self.inner.log, "refreshed dendrite service with new configuration") } SwitchService::Wicketd { .. } => { if let Some(&address) = first_address { @@ -3962,12 +4093,13 @@ impl ServiceManager { "rack_subnet" => %rack_subnet.net().addr(), ); - smfh.setprop( + smfh.setprop_default_instance( "config/rack-subnet", &rack_subnet.net().addr().to_string(), )?; smfh.refresh()?; + info!(self.inner.log, "refreshed wicketd service with new configuration") } else { error!( self.inner.log, @@ -3977,14 +4109,19 @@ impl ServiceManager { } SwitchService::Lldpd { .. } => { info!(self.inner.log, "configuring lldp service"); - smfh.delpropvalue("config/address", "*")?; + smfh.delpropvalue_default_instance( + "config/address", + "*", + )?; for address in &request.addresses { - smfh.addpropvalue( + smfh.addpropvalue_type_default_instance( "config/address", &format!("[{}]:{}", address, LLDP_PORT), + "astring", )?; } smfh.refresh()?; + info!(self.inner.log, "refreshed lldpd service with new configuration") } SwitchService::Tfport { .. } => { // Since tfport and dpd communicate using localhost, @@ -4005,10 +4142,16 @@ impl ServiceManager { } SwitchService::Mgd => { info!(self.inner.log, "configuring mgd service"); - smfh.delpropvalue("config/dns_servers", "*")?; + smfh.delpropvalue_default_instance( + "config/dns_servers", + "*", + )?; if let Some(info) = self.inner.sled_info.get() { - smfh.setprop("config/rack_uuid", info.rack_id)?; - smfh.setprop( + smfh.setprop_default_instance( + "config/rack_uuid", + info.rack_id, + )?; + smfh.setprop_default_instance( "config/sled_uuid", info.config.sled_id, )?; @@ -4020,28 +4163,46 @@ impl ServiceManager { for addr in Resolver::servers_from_subnet(az_prefix) { - smfh.addpropvalue( + smfh.addpropvalue_type_default_instance( "config/dns_servers", &format!("{addr}"), + "astring", )?; } break; } } smfh.refresh()?; + info!( + self.inner.log, + "refreshed mgd service with new configuration" + ) } SwitchService::MgDdm { mode } => { info!(self.inner.log, "configuring mg-ddm service"); - smfh.delpropvalue("config/mode", "*")?; - smfh.addpropvalue("config/mode", &mode)?; + smfh.delpropvalue_default_instance( + "config/mode", + "*", + )?; + smfh.addpropvalue_type_default_instance( + "config/mode", + &mode, + "astring", + )?; if let Some(info) = self.inner.sled_info.get() { - smfh.setprop("config/rack_uuid", info.rack_id)?; - smfh.setprop( + smfh.setprop_default_instance( + "config/rack_uuid", + info.rack_id, + )?; + smfh.setprop_default_instance( "config/sled_uuid", info.config.sled_id, )?; } - smfh.delpropvalue("config/dns_servers", "*")?; + smfh.delpropvalue_default_instance( + "config/dns_servers", + "*", + )?; for address in &request.addresses { if *address != Ipv6Addr::LOCALHOST { let az_prefix = @@ -4049,15 +4210,17 @@ impl ServiceManager { for addr in Resolver::servers_from_subnet(az_prefix) { - smfh.addpropvalue( + smfh.addpropvalue_type_default_instance( "config/dns_servers", &format!("{addr}"), + "astring", )?; } break; } } smfh.refresh()?; + info!(self.inner.log, "refreshed mg-ddm service with new configuration") } } } @@ -4106,6 +4269,7 @@ impl ServiceManager { let zone_request = SwitchZoneConfigLocal { root, zone: request.clone() }; let zone_args = ZoneArgs::Switch(&zone_request); + info!(self.inner.log, "Starting switch zone",); let zone = self .initialize_zone(zone_args, filesystems, data_links, None) .await?; diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 78d48be0ff..399ec334f4 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -4,7 +4,6 @@ //! HTTP entrypoint functions for the sled agent's exposed API -use crate::bootstrap::early_networking::EarlyNetworkConfig; use crate::bootstrap::params::AddSledRequest; use crate::params::{ DiskEnsureBody, InstanceEnsureBody, InstanceExternalIpBody, @@ -30,6 +29,7 @@ use omicron_common::api::internal::shared::{ use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_storage::resources::DisksManagementResult; use std::sync::Arc; use uuid::Uuid; diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 9cb146531b..f23b14c377 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -10,9 +10,6 @@ use super::disk::SimDisk; use super::instance::SimInstance; use super::storage::CrucibleData; use super::storage::Storage; -use crate::bootstrap::early_networking::{ - EarlyNetworkConfig, EarlyNetworkConfigBody, -}; use crate::nexus::NexusClient; use crate::params::{ DiskStateRequested, InstanceExternalIpBody, InstanceHardware, @@ -47,6 +44,9 @@ use propolis_client::{ types::VolumeConstructionRequest, Client as PropolisClient, }; use propolis_mock_server::Context as PropolisContext; +use sled_agent_types::early_networking::{ + EarlyNetworkConfig, EarlyNetworkConfigBody, +}; use sled_storage::resources::DisksManagementResult; use slog::Logger; use std::collections::{HashMap, HashSet, VecDeque}; diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 5077120fdd..0d534b9c4e 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -975,6 +975,7 @@ impl PantryServer { // - bulk writes into disks request_body_max_bytes: 8192 * 1024, default_handler_task_mode: HandlerTaskMode::Detached, + log_headers: vec![], }, super::http_entrypoints_pantry::api(), pantry.clone(), diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 66e457b181..6b212c96ce 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -6,9 +6,7 @@ use crate::boot_disk_os_writer::BootDiskOsWriter; use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT; -use crate::bootstrap::early_networking::{ - EarlyNetworkConfig, EarlyNetworkSetupError, -}; +use crate::bootstrap::early_networking::EarlyNetworkSetupError; use crate::bootstrap::params::{BaseboardId, StartSledAgentRequest}; use crate::config::Config; use crate::instance_manager::InstanceManager; @@ -64,6 +62,7 @@ use omicron_common::backoff::{ use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{InstanceUuid, PropolisUuid}; use oximeter::types::ProducerRegistry; +use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_hardware::{underlay, HardwareManager}; use sled_hardware_types::underlay::BootstrapInterface; use sled_hardware_types::Baseboard; diff --git a/sled-agent/src/smf_helper.rs b/sled-agent/src/smf_helper.rs index 837aa59157..230f146323 100644 --- a/sled-agent/src/smf_helper.rs +++ b/sled-agent/src/smf_helper.rs @@ -17,53 +17,27 @@ pub enum Error { pub trait Service { fn service_name(&self) -> String; fn smf_name(&self) -> String; - fn should_import(&self) -> bool; } pub struct SmfHelper<'t> { running_zone: &'t RunningZone, - service_name: String, smf_name: String, default_smf_name: String, - import: bool, } impl<'t> SmfHelper<'t> { pub fn new(running_zone: &'t RunningZone, service: &impl Service) -> Self { - let service_name = service.service_name(); let smf_name = service.smf_name(); - let import = service.should_import(); let default_smf_name = format!("{}:default", smf_name); - SmfHelper { - running_zone, - service_name, - smf_name, - default_smf_name, - import, - } + SmfHelper { running_zone, smf_name, default_smf_name } } - pub fn import_manifest(&self) -> Result<(), Error> { - if self.import { - self.running_zone - .run_cmd(&[ - illumos_utils::zone::SVCCFG, - "import", - &format!( - "/var/svc/manifest/site/{}/manifest.xml", - self.service_name - ), - ]) - .map_err(|err| Error::ZoneCommand { - intent: "importing manifest".to_string(), - err, - })?; - } - Ok(()) - } - - pub fn setprop(&self, prop: P, val: V) -> Result<(), Error> + pub fn setprop_default_instance( + &self, + prop: P, + val: V, + ) -> Result<(), Error> where P: ToString, V: ToString, @@ -72,7 +46,7 @@ impl<'t> SmfHelper<'t> { .run_cmd(&[ illumos_utils::zone::SVCCFG, "-s", - &self.smf_name, + &self.default_smf_name, "setprop", &format!("{}={}", prop.to_string(), val.to_string()), ]) @@ -111,18 +85,25 @@ impl<'t> SmfHelper<'t> { Ok(()) } - pub fn addpropvalue(&self, prop: P, val: V) -> Result<(), Error> + pub fn addpropvalue_type_default_instance( + &self, + prop: P, + val: V, + valtype: T, + ) -> Result<(), Error> where P: ToString, V: ToString, + T: ToString, { self.running_zone .run_cmd(&[ illumos_utils::zone::SVCCFG, "-s", - &self.smf_name, + &self.default_smf_name, "addpropvalue", &prop.to_string(), + &format!("{}:", valtype.to_string()), &val.to_string(), ]) .map_err(|err| Error::ZoneCommand { @@ -183,16 +164,21 @@ impl<'t> SmfHelper<'t> { Ok(()) } - pub fn delpropvalue(&self, prop: P, val: V) -> Result<(), Error> + pub fn delpropvalue_default_instance( + &self, + prop: P, + val: V, + ) -> Result<(), Error> where P: ToString, V: ToString, { - self.running_zone + match self + .running_zone .run_cmd(&[ illumos_utils::zone::SVCCFG, "-s", - &self.smf_name, + &self.default_smf_name, "delpropvalue", &prop.to_string(), &val.to_string(), @@ -200,7 +186,17 @@ impl<'t> SmfHelper<'t> { .map_err(|err| Error::ZoneCommand { intent: format!("del {} smf property value", prop.to_string()), err, - })?; + }) { + Ok(_) => (), + Err(e) => { + // If a property already doesn't exist we don't need to + // return an error + if !e.to_string().contains("No such property") { + return Err(e); + } + } + }; + Ok(()) } @@ -221,19 +217,4 @@ impl<'t> SmfHelper<'t> { })?; Ok(()) } - - pub fn enable(&self) -> Result<(), Error> { - self.running_zone - .run_cmd(&[ - illumos_utils::zone::SVCADM, - "enable", - "-t", - &self.default_smf_name, - ]) - .map_err(|err| Error::ZoneCommand { - intent: format!("Enable {} service", self.default_smf_name), - err, - })?; - Ok(()) - } } diff --git a/sled-agent/src/updates.rs b/sled-agent/src/updates.rs index 13a1ec7623..9193a855b0 100644 --- a/sled-agent/src/updates.rs +++ b/sled-agent/src/updates.rs @@ -5,14 +5,13 @@ //! Management of per-sled updates use crate::nexus::NexusClient; +use bootstrap_agent_api::Component; use camino::{Utf8Path, Utf8PathBuf}; use camino_tempfile::NamedUtf8TempFile; use futures::{TryFutureExt, TryStreamExt}; -use omicron_common::api::external::SemverVersion; use omicron_common::api::internal::nexus::{ KnownArtifactKind, UpdateArtifactId, }; -use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::io::Read; use tokio::io::AsyncWriteExt; @@ -68,12 +67,6 @@ impl Default for ConfigUpdates { } } -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct Component { - pub name: String, - pub version: SemverVersion, -} - // Helper functions for returning errors fn version_malformed_err(path: &Utf8Path, key: &str) -> Error { Error::VersionMalformed { @@ -261,7 +254,7 @@ mod test { use crate::fakes::nexus::FakeNexusServer; use flate2::write::GzEncoder; use nexus_client::Client as NexusClient; - use omicron_common::api::external::Error; + use omicron_common::api::external::{Error, SemverVersion}; use omicron_common::api::internal::nexus::UpdateArtifactId; use omicron_test_utils::dev::test_setup_log; use std::io::Write; diff --git a/sled-agent/tests/integration_tests/commands.rs b/sled-agent/tests/integration_tests/commands.rs index 132c3d78e4..26c82e488e 100644 --- a/sled-agent/tests/integration_tests/commands.rs +++ b/sled-agent/tests/integration_tests/commands.rs @@ -79,26 +79,3 @@ fn test_sled_agent_openapi_sled() { // that the changes match your expectations. assert_contents("../openapi/sled-agent.json", &stdout_text); } - -#[test] -fn test_bootstrap_agent_openapi_sled() { - let exec = Exec::cmd(path_to_sled_agent()).arg("openapi").arg("bootstrap"); - let (exit_status, stdout_text, stderr_text) = run_command(exec); - assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); - assert_contents( - "tests/output/cmd-bootstrap-agent-openapi-sled-stderr", - &stderr_text, - ); - - let spec: OpenAPI = serde_json::from_str(&stdout_text) - .expect("stdout was not valid OpenAPI"); - - // Check for lint errors. - let errors = openapi_lint::validate(&spec); - assert!(errors.is_empty(), "{}", errors.join("\n\n")); - - // Confirm that the output hasn't changed. It's expected that we'll change - // this file as the API evolves, but pay attention to the diffs to ensure - // that the changes match your expectations. - assert_contents("../openapi/bootstrap-agent.json", &stdout_text); -} diff --git a/sled-agent/tests/integration_tests/early_network.rs b/sled-agent/tests/integration_tests/early_network.rs index 28fc0fd010..6fa91e0e4a 100644 --- a/sled-agent/tests/integration_tests/early_network.rs +++ b/sled-agent/tests/integration_tests/early_network.rs @@ -15,10 +15,10 @@ use omicron_common::api::{ RackNetworkConfig, RouteConfig, }, }; -use omicron_sled_agent::bootstrap::early_networking::{ +use omicron_test_utils::dev::test_setup_log; +use sled_agent_types::early_networking::{ EarlyNetworkConfig, EarlyNetworkConfigBody, }; -use omicron_test_utils::dev::test_setup_log; const BLOB_PATH: &str = "tests/data/early_network_blobs.txt"; diff --git a/sled-agent/tests/output/cmd-bootstrap-agent-openapi-sled-stderr b/sled-agent/tests/output/cmd-bootstrap-agent-openapi-sled-stderr deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sled-agent/types/Cargo.toml b/sled-agent/types/Cargo.toml new file mode 100644 index 0000000000..57881a37d1 --- /dev/null +++ b/sled-agent/types/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "sled-agent-types" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +bootstore.workspace = true +camino.workspace = true +nexus-client.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +omicron-workspace-hack.workspace = true +oxnet.workspace = true +schemars.workspace = true +serde.workspace = true +serde_json.workspace = true +sled-hardware-types.workspace = true +slog.workspace = true +thiserror.workspace = true +toml.workspace = true + +[dev-dependencies] +camino-tempfile.workspace = true +omicron-test-utils.workspace = true +rcgen.workspace = true diff --git a/sled-agent/types/src/early_networking.rs b/sled-agent/types/src/early_networking.rs new file mode 100644 index 0000000000..dc93aa1300 --- /dev/null +++ b/sled-agent/types/src/early_networking.rs @@ -0,0 +1,606 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types for network setup required to bring up the control plane. + +use std::str::FromStr; + +use bootstore::schemes::v0 as bootstore; +use omicron_common::api::internal::shared::RackNetworkConfig; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use slog::{warn, Logger}; + +/// Network configuration required to bring up the control plane +/// +/// The fields in this structure are those from +/// [`crate::rack_init::RackInitializeRequest`] necessary for use beyond RSS. +/// This is just for the initial rack configuration and cold boot purposes. +/// Updates come from Nexus. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct EarlyNetworkConfig { + // The current generation number of data as stored in CRDB. + // The initial generation is set during RSS time and then only mutated + // by Nexus. + pub generation: u64, + + // Which version of the data structure do we have. This is to help with + // deserialization and conversion in future updates. + pub schema_version: u32, + + // The actual configuration details + pub body: EarlyNetworkConfigBody, +} + +impl FromStr for EarlyNetworkConfig { + type Err = String; + + fn from_str(value: &str) -> Result { + #[derive(Deserialize)] + struct ShadowConfig { + generation: u64, + schema_version: u32, + body: EarlyNetworkConfigBody, + } + + let v2_err = match serde_json::from_str::(&value) { + Ok(cfg) => { + return Ok(EarlyNetworkConfig { + generation: cfg.generation, + schema_version: cfg.schema_version, + body: cfg.body, + }) + } + Err(e) => format!("unable to parse EarlyNetworkConfig: {e:?}"), + }; + // If we fail to parse the config as any known version, we return the + // error corresponding to the parse failure of the newest schema. + serde_json::from_str::(&value) + .map(|v1| EarlyNetworkConfig { + generation: v1.generation, + schema_version: Self::schema_version(), + body: v1.body.into(), + }) + .map_err(|_| v2_err) + } +} + +impl EarlyNetworkConfig { + pub fn schema_version() -> u32 { + 2 + } + + // Note: This currently only converts between v0 and v1 or deserializes v1 of + // `EarlyNetworkConfig`. + pub fn deserialize_bootstore_config( + log: &Logger, + config: &bootstore::NetworkConfig, + ) -> Result { + // Try to deserialize the latest version of the data structure (v2). If + // that succeeds we are done. + let v2_error = + match serde_json::from_slice::(&config.blob) { + Ok(val) => return Ok(val), + Err(error) => { + // Log this error and continue trying to deserialize older + // versions. + warn!( + log, + "Failed to deserialize EarlyNetworkConfig \ + as v2, trying next as v1: {}", + error, + ); + error + } + }; + + match serde_json::from_slice::( + &config.blob, + ) { + Ok(v1) => { + // Convert from v1 to v2 + return Ok(EarlyNetworkConfig { + generation: v1.generation, + schema_version: EarlyNetworkConfig::schema_version(), + body: v1.body.into(), + }); + } + Err(error) => { + // Log this error. + warn!( + log, + "Failed to deserialize EarlyNetworkConfig \ + as v1, trying next as v0: {}", + error + ); + } + }; + + match serde_json::from_slice::( + &config.blob, + ) { + Ok(val) => { + // Convert from v0 to v2 + return Ok(EarlyNetworkConfig { + generation: val.generation, + schema_version: 2, + body: EarlyNetworkConfigBody { + ntp_servers: val.ntp_servers, + rack_network_config: val.rack_network_config.map( + |v0_config| { + back_compat::RackNetworkConfigV0::to_v2( + val.rack_subnet, + v0_config, + ) + }, + ), + }, + }); + } + Err(error) => { + // Log this error. + warn!( + log, + "Failed to deserialize EarlyNetworkConfig as v0: {}", error, + ); + } + }; + + // If we fail to parse the config as any known version, we return the + // error corresponding to the parse failure of the newest schema. + Err(v2_error) + } +} + +/// This is the actual configuration of EarlyNetworking. +/// +/// We nest it below the "header" of `generation` and `schema_version` so that +/// we can perform partial deserialization of `EarlyNetworkConfig` to only read +/// the header and defer deserialization of the body once we know the schema +/// version. This is possible via the use of [`serde_json::value::RawValue`] in +/// future (post-v1) deserialization paths. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct EarlyNetworkConfigBody { + /// The external NTP server addresses. + pub ntp_servers: Vec, + + // Rack network configuration as delivered from RSS or Nexus + pub rack_network_config: Option, +} + +impl From for bootstore::NetworkConfig { + fn from(value: EarlyNetworkConfig) -> Self { + // Can this ever actually fail? + // We literally just deserialized the same data in RSS + let blob = serde_json::to_vec(&value).unwrap(); + + // Yes this is duplicated, but that seems fine. + let generation = value.generation; + + bootstore::NetworkConfig { generation, blob } + } +} + +/// Structures and routines used to maintain backwards compatibility. The +/// contents of this module should only be used to convert older data into the +/// current format, and not for any ongoing run-time operations. +pub mod back_compat { + use std::net::{Ipv4Addr, Ipv6Addr}; + + use omicron_common::api::{ + external::SwitchLocation, + internal::shared::{ + BfdPeerConfig, BgpConfig, BgpPeerConfig, PortConfigV2, PortFec, + PortSpeed, RackNetworkConfigV2, RouteConfig, UplinkAddressConfig, + }, + }; + use oxnet::{IpNet, Ipv4Net, Ipv6Net}; + + use super::*; + + #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] + pub struct EarlyNetworkConfigBodyV1 { + /// The external NTP server addresses. + pub ntp_servers: Vec, + + // Rack network configuration as delivered from RSS or Nexus + pub rack_network_config: Option, + } + + impl From for EarlyNetworkConfigBody { + fn from(v1: EarlyNetworkConfigBodyV1) -> Self { + EarlyNetworkConfigBody { + ntp_servers: v1.ntp_servers, + rack_network_config: v1 + .rack_network_config + .map(|v1_config| v1_config.into()), + } + } + } + + /// Deprecated, use `RackNetworkConfig` instead. Cannot actually deprecate due to + /// + /// + /// Our first version of `RackNetworkConfig`. If this exists in the bootstore, we + /// upgrade out of it into `RackNetworkConfigV1` or later versions if possible. + #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] + pub(crate) struct RackNetworkConfigV0 { + // TODO: #3591 Consider making infra-ip ranges implicit for uplinks + /// First ip address to be used for configuring network infrastructure + pub infra_ip_first: Ipv4Addr, + /// Last ip address to be used for configuring network infrastructure + pub infra_ip_last: Ipv4Addr, + /// Uplinks for connecting the rack to external networks + pub uplinks: Vec, + } + + impl RackNetworkConfigV0 { + /// Convert from `RackNetworkConfigV0` to `RackNetworkConfigV1` + /// + /// We cannot use `From for `RackNetworkConfigV2` + /// because the `rack_subnet` field does not exist in `RackNetworkConfigV0` + /// and must be passed in from the `EarlyNetworkConfigV0` struct which + /// contains the `RackNetworkConfigV0` struct. + pub fn to_v2( + rack_subnet: Ipv6Addr, + v0: RackNetworkConfigV0, + ) -> RackNetworkConfigV2 { + RackNetworkConfigV2 { + rack_subnet: Ipv6Net::new(rack_subnet, 56).unwrap(), + infra_ip_first: v0.infra_ip_first, + infra_ip_last: v0.infra_ip_last, + ports: v0 + .uplinks + .into_iter() + .map(|uplink| PortConfigV2::from(uplink)) + .collect(), + bgp: vec![], + bfd: vec![], + } + } + } + + /// Deprecated, use PortConfigV2 instead. Cannot actually deprecate due to + /// + #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] + pub struct PortConfigV1 { + /// The set of routes associated with this port. + pub routes: Vec, + /// This port's addresses and optional vlan IDs + pub addresses: Vec, + /// Switch the port belongs to. + pub switch: SwitchLocation, + /// Nmae of the port this config applies to. + pub port: String, + /// Port speed. + pub uplink_port_speed: PortSpeed, + /// Port forward error correction type. + pub uplink_port_fec: PortFec, + /// BGP peers on this port + pub bgp_peers: Vec, + /// Whether or not to set autonegotiation + #[serde(default)] + pub autoneg: bool, + } + + impl From for PortConfigV2 { + fn from(v1: PortConfigV1) -> Self { + PortConfigV2 { + routes: v1.routes.clone(), + addresses: v1 + .addresses + .iter() + .map(|a| UplinkAddressConfig { address: *a, vlan_id: None }) + .collect(), + switch: v1.switch, + port: v1.port, + uplink_port_speed: v1.uplink_port_speed, + uplink_port_fec: v1.uplink_port_fec, + bgp_peers: v1.bgp_peers.clone(), + autoneg: v1.autoneg, + } + } + } + + /// Deprecated, use PortConfigV2 instead. Cannot actually deprecate due to + /// + #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] + pub(crate) struct UplinkConfig { + /// Gateway address + pub gateway_ip: Ipv4Addr, + /// Switch to use for uplink + pub switch: SwitchLocation, + /// Switchport to use for external connectivity + pub uplink_port: String, + /// Speed for the Switchport + pub uplink_port_speed: PortSpeed, + /// Forward Error Correction setting for the uplink port + pub uplink_port_fec: PortFec, + /// IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport + /// (must be in infra_ip pool) + pub uplink_cidr: Ipv4Net, + /// VLAN id to use for uplink + pub uplink_vid: Option, + } + + impl From for PortConfigV2 { + fn from(value: UplinkConfig) -> Self { + PortConfigV2 { + routes: vec![RouteConfig { + destination: "0.0.0.0/0".parse().unwrap(), + nexthop: value.gateway_ip.into(), + vlan_id: value.uplink_vid, + }], + addresses: vec![UplinkAddressConfig { + address: value.uplink_cidr.into(), + vlan_id: value.uplink_vid, + }], + switch: value.switch, + port: value.uplink_port, + uplink_port_speed: value.uplink_port_speed, + uplink_port_fec: value.uplink_port_fec, + bgp_peers: vec![], + autoneg: false, + } + } + } + + /// Deprecated, use `RackNetworkConfig` instead. Cannot actually deprecate due to + /// + /// + /// Our second version of `RackNetworkConfig`. If this exists in the bootstore, + /// we upgrade out of it into `RackNetworkConfigV1` or later versions if + /// possible. + #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] + pub struct RackNetworkConfigV1 { + pub rack_subnet: Ipv6Net, + // TODO: #3591 Consider making infra-ip ranges implicit for uplinks + /// First ip address to be used for configuring network infrastructure + pub infra_ip_first: Ipv4Addr, + /// Last ip address to be used for configuring network infrastructure + pub infra_ip_last: Ipv4Addr, + /// Uplinks for connecting the rack to external networks + pub ports: Vec, + /// BGP configurations for connecting the rack to external networks + pub bgp: Vec, + /// BFD configuration for connecting the rack to external networks + #[serde(default)] + pub bfd: Vec, + } + + impl From for RackNetworkConfigV2 { + fn from(v1: RackNetworkConfigV1) -> Self { + RackNetworkConfigV2 { + rack_subnet: v1.rack_subnet, + infra_ip_first: v1.infra_ip_first, + infra_ip_last: v1.infra_ip_last, + ports: v1 + .ports + .into_iter() + .map(|ports| PortConfigV2::from(ports)) + .collect(), + bgp: v1.bgp.clone(), + bfd: v1.bfd.clone(), + } + } + } + + // The second production version of the `EarlyNetworkConfig`. + // + // If this version is in the bootstore than we need to convert it to + // `EarlyNetworkConfigV2`. + // + // Once we do this for all customers that have initialized racks with the + // old version we can go ahead and remove this type and its conversion code + // altogether. + #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] + pub struct EarlyNetworkConfigV1 { + // The current generation number of data as stored in CRDB. + // The initial generation is set during RSS time and then only mutated + // by Nexus. + pub generation: u64, + + // Which version of the data structure do we have. This is to help with + // deserialization and conversion in future updates. + pub schema_version: u32, + + // The actual configuration details + pub body: EarlyNetworkConfigBodyV1, + } + + // The first production version of the `EarlyNetworkConfig`. + // + // If this version is in the bootstore than we need to convert it to + // `EarlyNetworkConfigV2`. + // + // Once we do this for all customers that have initialized racks with the + // old version we can go ahead and remove this type and its conversion code + // altogether. + #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] + pub(crate) struct EarlyNetworkConfigV0 { + // The current generation number of data as stored in CRDB. + // The initial generation is set during RSS time and then only mutated + // by Nexus. + pub generation: u64, + + pub rack_subnet: Ipv6Addr, + + /// The external NTP server addresses. + pub ntp_servers: Vec, + + // Rack network configuration as delivered from RSS and only existing at + // generation 1 + pub rack_network_config: Option, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::net::Ipv4Addr; + use std::net::Ipv6Addr; + + use omicron_common::api::external::SwitchLocation; + use omicron_common::api::internal::shared::PortConfigV2; + use omicron_common::api::internal::shared::PortFec; + use omicron_common::api::internal::shared::PortSpeed; + use omicron_common::api::internal::shared::RackNetworkConfigV2; + use omicron_common::api::internal::shared::RouteConfig; + use omicron_common::api::internal::shared::UplinkAddressConfig; + use omicron_test_utils::dev::test_setup_log; + use oxnet::Ipv6Net; + + #[test] + fn serialized_early_network_config_v0_to_v2_conversion() { + let logctx = test_setup_log( + "serialized_early_network_config_v0_to_v2_conversion", + ); + let v0 = back_compat::EarlyNetworkConfigV0 { + generation: 1, + rack_subnet: Ipv6Addr::UNSPECIFIED, + ntp_servers: Vec::new(), + rack_network_config: Some(back_compat::RackNetworkConfigV0 { + infra_ip_first: Ipv4Addr::UNSPECIFIED, + infra_ip_last: Ipv4Addr::UNSPECIFIED, + uplinks: vec![back_compat::UplinkConfig { + gateway_ip: Ipv4Addr::UNSPECIFIED, + switch: SwitchLocation::Switch0, + uplink_port: "Port0".to_string(), + uplink_port_speed: PortSpeed::Speed100G, + uplink_port_fec: PortFec::None, + uplink_cidr: "192.168.0.1/16".parse().unwrap(), + uplink_vid: None, + }], + }), + }; + + let v0_serialized = serde_json::to_vec(&v0).unwrap(); + let bootstore_conf = + bootstore::NetworkConfig { generation: 1, blob: v0_serialized }; + + let v2 = EarlyNetworkConfig::deserialize_bootstore_config( + &logctx.log, + &bootstore_conf, + ) + .unwrap(); + let v0_rack_network_config = v0.rack_network_config.unwrap(); + let uplink = v0_rack_network_config.uplinks[0].clone(); + let expected = EarlyNetworkConfig { + generation: 1, + schema_version: EarlyNetworkConfig::schema_version(), + body: EarlyNetworkConfigBody { + ntp_servers: v0.ntp_servers.clone(), + rack_network_config: Some(RackNetworkConfigV2 { + rack_subnet: Ipv6Net::new(v0.rack_subnet, 56).unwrap(), + infra_ip_first: v0_rack_network_config.infra_ip_first, + infra_ip_last: v0_rack_network_config.infra_ip_last, + ports: vec![PortConfigV2 { + routes: vec![RouteConfig { + destination: "0.0.0.0/0".parse().unwrap(), + nexthop: uplink.gateway_ip.into(), + vlan_id: None, + }], + addresses: vec![UplinkAddressConfig { + address: uplink.uplink_cidr.into(), + vlan_id: None, + }], + switch: uplink.switch, + port: uplink.uplink_port, + uplink_port_speed: uplink.uplink_port_speed, + uplink_port_fec: uplink.uplink_port_fec, + autoneg: false, + bgp_peers: vec![], + }], + bgp: vec![], + bfd: vec![], + }), + }, + }; + + assert_eq!(expected, v2); + + logctx.cleanup_successful(); + } + + #[test] + fn serialized_early_network_config_v1_to_v2_conversion() { + let logctx = test_setup_log( + "serialized_early_network_config_v1_to_v2_conversion", + ); + + let v1 = back_compat::EarlyNetworkConfigV1 { + generation: 1, + schema_version: 1, + body: back_compat::EarlyNetworkConfigBodyV1 { + ntp_servers: Vec::new(), + rack_network_config: Some(back_compat::RackNetworkConfigV1 { + rack_subnet: Ipv6Net::new(Ipv6Addr::UNSPECIFIED, 56) + .unwrap(), + infra_ip_first: Ipv4Addr::UNSPECIFIED, + infra_ip_last: Ipv4Addr::UNSPECIFIED, + ports: vec![back_compat::PortConfigV1 { + routes: vec![RouteConfig { + destination: "0.0.0.0/0".parse().unwrap(), + nexthop: "192.168.0.2".parse().unwrap(), + vlan_id: None, + }], + addresses: vec!["192.168.0.1/16".parse().unwrap()], + switch: SwitchLocation::Switch0, + port: "Port0".to_string(), + uplink_port_speed: PortSpeed::Speed100G, + uplink_port_fec: PortFec::None, + bgp_peers: Vec::new(), + autoneg: false, + }], + bgp: Vec::new(), + bfd: Vec::new(), + }), + }, + }; + + let v1_serialized = serde_json::to_vec(&v1).unwrap(); + let bootstore_conf = + bootstore::NetworkConfig { generation: 1, blob: v1_serialized }; + + let v2 = EarlyNetworkConfig::deserialize_bootstore_config( + &logctx.log, + &bootstore_conf, + ) + .unwrap(); + let v1_rack_network_config = v1.body.rack_network_config.unwrap(); + let port = v1_rack_network_config.ports[0].clone(); + let expected = EarlyNetworkConfig { + generation: 1, + schema_version: EarlyNetworkConfig::schema_version(), + body: EarlyNetworkConfigBody { + ntp_servers: v1.body.ntp_servers.clone(), + rack_network_config: Some(RackNetworkConfigV2 { + rack_subnet: v1_rack_network_config.rack_subnet, + infra_ip_first: v1_rack_network_config.infra_ip_first, + infra_ip_last: v1_rack_network_config.infra_ip_last, + ports: vec![PortConfigV2 { + routes: port.routes.clone(), + addresses: vec![UplinkAddressConfig { + address: port.addresses[0], + vlan_id: None, + }], + switch: port.switch, + port: port.port, + uplink_port_speed: port.uplink_port_speed, + uplink_port_fec: port.uplink_port_fec, + autoneg: false, + bgp_peers: vec![], + }], + bgp: vec![], + bfd: vec![], + }), + }, + }; + + assert_eq!(expected, v2); + + logctx.cleanup_successful(); + } +} diff --git a/cockroach-admin/tests/mod.rs b/sled-agent/types/src/lib.rs similarity index 67% rename from cockroach-admin/tests/mod.rs rename to sled-agent/types/src/lib.rs index 99aeeb8299..12e8f049f9 100644 --- a/cockroach-admin/tests/mod.rs +++ b/sled-agent/types/src/lib.rs @@ -2,4 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -mod integration_tests; +//! Common types for sled-agent. + +pub mod early_networking; +pub mod rack_init; +pub mod rack_ops; diff --git a/sled-agent/types/src/rack_init.rs b/sled-agent/types/src/rack_init.rs new file mode 100644 index 0000000000..8fcf3c93fd --- /dev/null +++ b/sled-agent/types/src/rack_init.rs @@ -0,0 +1,732 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Rack initialization types. + +use std::{ + collections::BTreeSet, + net::{IpAddr, Ipv6Addr}, +}; + +use anyhow::{bail, Result}; +use camino::{Utf8Path, Utf8PathBuf}; +use omicron_common::{ + address::{ + get_64_subnet, IpRange, Ipv6Subnet, AZ_PREFIX, RACK_PREFIX, SLED_PREFIX, + }, + api::{external::AllowedSourceIps, internal::shared::RackNetworkConfig}, +}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sled_hardware_types::Baseboard; + +pub type Certificate = nexus_client::types::Certificate; +pub type RecoverySiloConfig = nexus_client::types::RecoverySiloConfig; + +/// Structures and routines used to maintain backwards compatibility. The +/// contents of this module should only be used to convert older data into the +/// current format, and not for any ongoing run-time operations. +pub mod back_compat { + use crate::early_networking::back_compat::RackNetworkConfigV1; + + use super::*; + + #[derive(Clone, Deserialize)] + struct UnvalidatedRackInitializeRequestV1 { + trust_quorum_peers: Option>, + bootstrap_discovery: BootstrapAddressDiscovery, + ntp_servers: Vec, + dns_servers: Vec, + internal_services_ip_pool_ranges: Vec, + external_dns_ips: Vec, + external_dns_zone_name: String, + external_certificates: Vec, + recovery_silo: RecoverySiloConfig, + rack_network_config: RackNetworkConfigV1, + #[serde(default = "default_allowed_source_ips")] + allowed_source_ips: AllowedSourceIps, + } + + /// This is a deprecated format, maintained to allow importing from older + /// versions. + #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] + #[serde(try_from = "UnvalidatedRackInitializeRequestV1")] + pub struct RackInitializeRequestV1 { + pub trust_quorum_peers: Option>, + pub bootstrap_discovery: BootstrapAddressDiscovery, + pub ntp_servers: Vec, + pub dns_servers: Vec, + pub internal_services_ip_pool_ranges: Vec, + pub external_dns_ips: Vec, + pub external_dns_zone_name: String, + pub external_certificates: Vec, + pub recovery_silo: RecoverySiloConfig, + pub rack_network_config: RackNetworkConfigV1, + #[serde(default = "default_allowed_source_ips")] + pub allowed_source_ips: AllowedSourceIps, + } + + impl TryFrom for RackInitializeRequestV1 { + type Error = anyhow::Error; + + fn try_from(value: UnvalidatedRackInitializeRequestV1) -> Result { + validate_external_dns( + &value.external_dns_ips, + &value.internal_services_ip_pool_ranges, + )?; + + Ok(RackInitializeRequestV1 { + trust_quorum_peers: value.trust_quorum_peers, + bootstrap_discovery: value.bootstrap_discovery, + ntp_servers: value.ntp_servers, + dns_servers: value.dns_servers, + internal_services_ip_pool_ranges: value + .internal_services_ip_pool_ranges, + external_dns_ips: value.external_dns_ips, + external_dns_zone_name: value.external_dns_zone_name, + external_certificates: value.external_certificates, + recovery_silo: value.recovery_silo, + rack_network_config: value.rack_network_config, + allowed_source_ips: value.allowed_source_ips, + }) + } + } + impl From for RackInitializeRequest { + fn from(v1: RackInitializeRequestV1) -> Self { + RackInitializeRequest { + trust_quorum_peers: v1.trust_quorum_peers, + bootstrap_discovery: v1.bootstrap_discovery, + ntp_servers: v1.ntp_servers, + dns_servers: v1.dns_servers, + internal_services_ip_pool_ranges: v1 + .internal_services_ip_pool_ranges, + external_dns_ips: v1.external_dns_ips, + external_dns_zone_name: v1.external_dns_zone_name, + external_certificates: v1.external_certificates, + recovery_silo: v1.recovery_silo, + rack_network_config: v1.rack_network_config.into(), + allowed_source_ips: v1.allowed_source_ips, + } + } + } +} + +// "Shadow" copy of `RackInitializeRequest` that does no validation on its +// fields. +#[derive(Clone, Deserialize)] +struct UnvalidatedRackInitializeRequest { + trust_quorum_peers: Option>, + bootstrap_discovery: BootstrapAddressDiscovery, + ntp_servers: Vec, + dns_servers: Vec, + internal_services_ip_pool_ranges: Vec, + external_dns_ips: Vec, + external_dns_zone_name: String, + external_certificates: Vec, + recovery_silo: RecoverySiloConfig, + rack_network_config: RackNetworkConfig, + #[serde(default = "default_allowed_source_ips")] + allowed_source_ips: AllowedSourceIps, +} + +fn validate_external_dns( + dns_ips: &Vec, + internal_ranges: &Vec, +) -> Result<()> { + if dns_ips.is_empty() { + bail!("At least one external DNS IP is required"); + } + + // Every external DNS IP should also be present in one of the internal + // services IP pool ranges. This check is O(N*M), but we expect both N + // and M to be small (~5 DNS servers, and a small number of pools). + for &dns_ip in dns_ips { + if !internal_ranges.iter().any(|range| range.contains(dns_ip)) { + bail!( + "External DNS IP {dns_ip} is not contained in \ + `internal_services_ip_pool_ranges`" + ); + } + } + Ok(()) +} + +impl TryFrom for RackInitializeRequest { + type Error = anyhow::Error; + + fn try_from(value: UnvalidatedRackInitializeRequest) -> Result { + validate_external_dns( + &value.external_dns_ips, + &value.internal_services_ip_pool_ranges, + )?; + + Ok(RackInitializeRequest { + trust_quorum_peers: value.trust_quorum_peers, + bootstrap_discovery: value.bootstrap_discovery, + ntp_servers: value.ntp_servers, + dns_servers: value.dns_servers, + internal_services_ip_pool_ranges: value + .internal_services_ip_pool_ranges, + external_dns_ips: value.external_dns_ips, + external_dns_zone_name: value.external_dns_zone_name, + external_certificates: value.external_certificates, + recovery_silo: value.recovery_silo, + rack_network_config: value.rack_network_config, + allowed_source_ips: value.allowed_source_ips, + }) + } +} + +/// Configuration for the "rack setup service". +/// +/// The Rack Setup Service should be responsible for one-time setup actions, +/// such as CockroachDB placement and initialization. Without operator +/// intervention, however, these actions need a way to be automated in our +/// deployment. +#[derive(Clone, Deserialize, Serialize, PartialEq, JsonSchema)] +#[serde(try_from = "UnvalidatedRackInitializeRequest")] +pub struct RackInitializeRequest { + /// The set of peer_ids required to initialize trust quorum + /// + /// The value is `None` if we are not using trust quorum + pub trust_quorum_peers: Option>, + + /// Describes how bootstrap addresses should be collected during RSS. + pub bootstrap_discovery: BootstrapAddressDiscovery, + + /// The external NTP server addresses. + pub ntp_servers: Vec, + + /// The external DNS server addresses. + pub dns_servers: Vec, + + /// Ranges of the service IP pool which may be used for internal services. + // TODO(https://github.com/oxidecomputer/omicron/issues/1530): Eventually, + // we want to configure multiple pools. + pub internal_services_ip_pool_ranges: Vec, + + /// Service IP addresses on which we run external DNS servers. + /// + /// Each address must be present in `internal_services_ip_pool_ranges`. + pub external_dns_ips: Vec, + + /// DNS name for the DNS zone delegated to the rack for external DNS + pub external_dns_zone_name: String, + + /// initial TLS certificates for the external API + pub external_certificates: Vec, + + /// Configuration of the Recovery Silo (the initial Silo) + pub recovery_silo: RecoverySiloConfig, + + /// Initial rack network configuration + pub rack_network_config: RackNetworkConfig, + + /// IPs or subnets allowed to make requests to user-facing services + #[serde(default = "default_allowed_source_ips")] + pub allowed_source_ips: AllowedSourceIps, +} + +impl RackInitializeRequest { + pub fn from_file>( + path: P, + ) -> Result { + let path = path.as_ref(); + let contents = std::fs::read_to_string(&path).map_err(|err| { + RackInitializeRequestParseError::Io { path: path.into(), err } + })?; + let mut raw_config = + Self::from_toml_with_fallback(&contents).map_err(|err| { + RackInitializeRequestParseError::Deserialize { + path: path.into(), + err, + } + })?; + + // In the same way that sled-agent itself (our caller) discovers the + // optional config-rss.toml in a well-known path relative to its config + // file, we look for a pair of well-known paths adjacent to + // config-rss.toml that specify an extra TLS certificate and private + // key. This is used by the end-to-end tests. Any developer can also + // use this to inject a TLS certificate into their setup. + // (config-rss.toml is only used for dev/test, not production + // deployments, which will always get their RSS configuration from + // Wicket.) + if let Some(parent) = path.parent() { + let cert_path = parent.join("initial-tls-cert.pem"); + let key_path = parent.join("initial-tls-key.pem"); + let cert_bytes = std::fs::read_to_string(&cert_path); + let key_bytes = std::fs::read_to_string(&key_path); + match (cert_bytes, key_bytes) { + (Ok(cert), Ok(key)) => { + raw_config + .external_certificates + .push(Certificate { key, cert }); + } + (Err(cert_error), Err(key_error)) + if cert_error.kind() == std::io::ErrorKind::NotFound + && key_error.kind() == std::io::ErrorKind::NotFound => + { + // Fine. No extra cert was provided. + } + (Err(cert_error), _) => { + return Err(RackInitializeRequestParseError::Certificate( + anyhow::Error::new(cert_error).context(format!( + "loading certificate from {:?}", + cert_path + )), + )); + } + (_, Err(key_error)) => { + return Err(RackInitializeRequestParseError::Certificate( + anyhow::Error::new(key_error).context(format!( + "loading private key from {:?}", + key_path + )), + )); + } + }; + } + + Ok(raw_config) + } + + pub fn from_toml_with_fallback( + data: &str, + ) -> Result { + let v2_err = match toml::from_str::(&data) { + Ok(req) => return Ok(req), + Err(e) => e, + }; + if let Ok(v1) = + toml::from_str::(&data) + { + return Ok(v1.into()); + } + + // If we fail to parse the request as any known version, we return the + // error corresponding to the parse failure of the newest schema. + Err(v2_err.into()) + } + + /// Return a configuration suitable for testing. + pub fn test_config() -> Self { + // Use env! rather than std::env::var because this might be called from + // a dependent crate. + let manifest_dir = Utf8Path::new(env!("CARGO_MANIFEST_DIR")); + let path = manifest_dir + .join("../../smf/sled-agent/non-gimlet/config-rss.toml"); + let contents = std::fs::read_to_string(&path).unwrap(); + toml::from_str(&contents) + .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)) + } + + pub fn az_subnet(&self) -> Ipv6Subnet { + Ipv6Subnet::::new( + self.rack_network_config.rack_subnet.addr(), + ) + } + + /// Returns the subnet for our rack. + pub fn rack_subnet(&self) -> Ipv6Subnet { + Ipv6Subnet::::new( + self.rack_network_config.rack_subnet.addr(), + ) + } + + /// Returns the subnet for the `index`-th sled in the rack. + pub fn sled_subnet(&self, index: u8) -> Ipv6Subnet { + get_64_subnet(self.rack_subnet(), index) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum RackInitializeRequestParseError { + #[error("Failed to read config from {path}: {err}")] + Io { + path: Utf8PathBuf, + #[source] + err: std::io::Error, + }, + #[error("Failed to deserialize config from {path}: {err}")] + Deserialize { + path: Utf8PathBuf, + #[source] + err: anyhow::Error, + }, + #[error("Loading certificate: {0}")] + Certificate(#[source] anyhow::Error), +} + +/// This field was added after several racks were already deployed. RSS plans +/// for those racks should default to allowing any source IP, since that is +/// effectively what they did. +const fn default_allowed_source_ips() -> AllowedSourceIps { + AllowedSourceIps::Any +} + +// This custom debug implementation hides the private keys. +impl std::fmt::Debug for RackInitializeRequest { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // If you find a compiler error here, and you just added a field to this + // struct, be sure to add it to the Debug impl below! + let RackInitializeRequest { + trust_quorum_peers: trust_qurorum_peers, + bootstrap_discovery, + ntp_servers, + dns_servers, + internal_services_ip_pool_ranges, + external_dns_ips, + external_dns_zone_name, + external_certificates: _, + recovery_silo, + rack_network_config, + allowed_source_ips, + } = &self; + + f.debug_struct("RackInitializeRequest") + .field("trust_quorum_peers", trust_qurorum_peers) + .field("bootstrap_discovery", bootstrap_discovery) + .field("ntp_servers", ntp_servers) + .field("dns_servers", dns_servers) + .field( + "internal_services_ip_pool_ranges", + internal_services_ip_pool_ranges, + ) + .field("external_dns_ips", external_dns_ips) + .field("external_dns_zone_name", external_dns_zone_name) + .field("external_certificates", &"") + .field("recovery_silo", recovery_silo) + .field("rack_network_config", rack_network_config) + .field("allowed_source_ips", allowed_source_ips) + .finish() + } +} + +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] +#[serde(rename_all = "snake_case", tag = "type")] +pub enum BootstrapAddressDiscovery { + /// Ignore all bootstrap addresses except our own. + OnlyOurs, + /// Ignore all bootstrap addresses except the following. + OnlyThese { addrs: BTreeSet }, +} + +#[cfg(test)] +mod tests { + use std::net::Ipv4Addr; + use std::net::Ipv6Addr; + + use super::*; + use anyhow::Context; + use oxnet::Ipv6Net; + + #[test] + fn parse_rack_initialization() { + let manifest = std::env::var("CARGO_MANIFEST_DIR") + .expect("Cannot access manifest directory"); + let manifest = Utf8PathBuf::from(manifest); + + let path = + manifest.join("../../smf/sled-agent/non-gimlet/config-rss.toml"); + let contents = std::fs::read_to_string(&path).unwrap(); + let _: RackInitializeRequest = toml::from_str(&contents) + .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)); + + let path = manifest + .join("../../smf/sled-agent/gimlet-standalone/config-rss.toml"); + let contents = std::fs::read_to_string(&path).unwrap(); + let _: RackInitializeRequest = toml::from_str(&contents) + .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)); + } + + #[test] + fn parse_rack_initialization_weak_hash() { + let config = r#" + bootstrap_discovery.type = "only_ours" + ntp_servers = [ "ntp.eng.oxide.computer" ] + dns_servers = [ "1.1.1.1", "9.9.9.9" ] + external_dns_zone_name = "oxide.test" + + [[internal_services_ip_pool_ranges]] + first = "192.168.1.20" + last = "192.168.1.22" + + [recovery_silo] + silo_name = "recovery" + user_name = "recovery" + user_password_hash = "$argon2i$v=19$m=16,t=2,p=1$NVR0a2QxVXNiQjlObFJXbA$iGFJWOlUqN20B8KR4Fsmrg" + "#; + + let error = toml::from_str::(config) + .expect_err("unexpectedly parsed with bad password hash"); + println!("found error: {}", error); + assert!(error.to_string().contains( + "password hash: algorithm: expected argon2id, found argon2i" + )); + } + + #[test] + fn validate_external_dns_ips_must_be_in_internal_services_ip_pools() { + // Conjure up a config; we'll tweak the internal services pools and + // external DNS IPs, but no other fields matter. + let mut config = UnvalidatedRackInitializeRequest { + trust_quorum_peers: None, + bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, + ntp_servers: Vec::new(), + dns_servers: Vec::new(), + internal_services_ip_pool_ranges: Vec::new(), + external_dns_ips: Vec::new(), + external_dns_zone_name: "".to_string(), + external_certificates: Vec::new(), + recovery_silo: RecoverySiloConfig { + silo_name: "recovery".parse().unwrap(), + user_name: "recovery".parse().unwrap(), + user_password_hash: "$argon2id$v=19$m=98304,t=13,p=1$RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/ek3GL0el/oProgTwWpHJZ8lsQQoY".parse().unwrap(), + }, + rack_network_config: RackNetworkConfig { + rack_subnet: Ipv6Net::host_net(Ipv6Addr::LOCALHOST), + infra_ip_first: Ipv4Addr::LOCALHOST, + infra_ip_last: Ipv4Addr::LOCALHOST, + ports: Vec::new(), + bgp: Vec::new(), + bfd: Vec::new(), + }, + allowed_source_ips: AllowedSourceIps::Any, + }; + + // Valid configs: all external DNS IPs are contained in the IP pool + // ranges. + for (ip_pool_ranges, dns_ips) in [ + ( + &[("fd00::1", "fd00::10")] as &[(&str, &str)], + &["fd00::1", "fd00::5", "fd00::10"] as &[&str], + ), + ( + &[("192.168.1.10", "192.168.1.20")], + &["192.168.1.10", "192.168.1.15", "192.168.1.20"], + ), + ( + &[("fd00::1", "fd00::10"), ("192.168.1.10", "192.168.1.20")], + &[ + "fd00::1", + "fd00::5", + "fd00::10", + "192.168.1.10", + "192.168.1.15", + "192.168.1.20", + ], + ), + ] { + config.internal_services_ip_pool_ranges = ip_pool_ranges + .iter() + .map(|(a, b)| { + IpRange::try_from(( + a.parse::().unwrap(), + b.parse::().unwrap(), + )) + .unwrap() + }) + .collect(); + config.external_dns_ips = + dns_ips.iter().map(|ip| ip.parse().unwrap()).collect(); + + match RackInitializeRequest::try_from(config.clone()) { + Ok(_) => (), + Err(err) => panic!( + "failure on {ip_pool_ranges:?} with DNS IPs {dns_ips:?}: \ + {err}" + ), + } + } + + // Invalid configs: either no DNS IPs, or one or more DNS IPs are not + // contained in the ip pool ranges. + for (ip_pool_ranges, dns_ips) in [ + (&[("fd00::1", "fd00::10")] as &[(&str, &str)], &[] as &[&str]), + (&[("fd00::1", "fd00::10")], &["fd00::1", "fd00::5", "fd00::11"]), + ( + &[("192.168.1.10", "192.168.1.20")], + &["192.168.1.9", "192.168.1.15", "192.168.1.20"], + ), + ( + &[("fd00::1", "fd00::10"), ("192.168.1.10", "192.168.1.20")], + &[ + "fd00::1", + "fd00::5", + "fd00::10", + "192.168.1.10", + "192.168.1.15", + "192.168.1.20", + "192.168.1.21", + ], + ), + ] { + config.internal_services_ip_pool_ranges = ip_pool_ranges + .iter() + .map(|(a, b)| { + IpRange::try_from(( + a.parse::().unwrap(), + b.parse::().unwrap(), + )) + .unwrap() + }) + .collect(); + config.external_dns_ips = + dns_ips.iter().map(|ip| ip.parse().unwrap()).collect(); + + match RackInitializeRequest::try_from(config.clone()) { + Ok(_) => panic!( + "unexpected success on {ip_pool_ranges:?} with \ + DNS IPs {dns_ips:?}" + ), + Err(_) => (), + } + } + } + + #[test] + fn test_subnets() { + let cfg = RackInitializeRequest { + trust_quorum_peers: None, + bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, + ntp_servers: vec![String::from("test.pool.example.com")], + dns_servers: vec!["1.1.1.1".parse().unwrap()], + external_dns_zone_name: String::from("oxide.test"), + internal_services_ip_pool_ranges: vec![IpRange::from(IpAddr::V4( + Ipv4Addr::new(129, 168, 1, 20), + ))], + external_dns_ips: vec![], + external_certificates: vec![], + recovery_silo: RecoverySiloConfig { + silo_name: "test-silo".parse().unwrap(), + user_name: "dummy".parse().unwrap(), + // This is a hash for the password "oxide". It doesn't matter, + // though; it's not used. + user_password_hash: "$argon2id$v=19$m=98304,t=13,p=1$\ + RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/\ + ek3GL0el/oProgTwWpHJZ8lsQQoY" + .parse() + .unwrap(), + }, + rack_network_config: RackNetworkConfig { + rack_subnet: Ipv6Net::new( + "fd00:1122:3344:0100::".parse().unwrap(), + RACK_PREFIX, + ) + .unwrap(), + infra_ip_first: Ipv4Addr::LOCALHOST, + infra_ip_last: Ipv4Addr::LOCALHOST, + ports: Vec::new(), + bgp: Vec::new(), + bfd: Vec::new(), + }, + allowed_source_ips: AllowedSourceIps::Any, + }; + + assert_eq!( + Ipv6Subnet::::new( + // Masked out in AZ Subnet + // vv + "fd00:1122:3344:0000::".parse::().unwrap(), + ), + cfg.az_subnet() + ); + assert_eq!( + Ipv6Subnet::::new( + // Shows up from Rack Subnet + // vv + "fd00:1122:3344:0100::".parse::().unwrap(), + ), + cfg.rack_subnet() + ); + assert_eq!( + Ipv6Subnet::::new( + // 0th Sled Subnet + // vv + "fd00:1122:3344:0100::".parse::().unwrap(), + ), + cfg.sled_subnet(0) + ); + assert_eq!( + Ipv6Subnet::::new( + // 1st Sled Subnet + // vv + "fd00:1122:3344:0101::".parse::().unwrap(), + ), + cfg.sled_subnet(1) + ); + assert_eq!( + Ipv6Subnet::::new( + // Last Sled Subnet + // vv + "fd00:1122:3344:01ff::".parse::().unwrap(), + ), + cfg.sled_subnet(255) + ); + } + + #[test] + fn test_extra_certs() { + // The stock non-Gimlet config has no TLS certificates. + let path = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("../../smf/sled-agent/non-gimlet/config-rss.toml"); + let cfg = RackInitializeRequest::from_file(&path) + .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)); + assert!(cfg.external_certificates.is_empty()); + + // Now let's create a configuration that does have an adjacent + // certificate and key. + let tempdir = + camino_tempfile::tempdir().expect("creating temporary directory"); + println!("using temp path: {:?}", tempdir); + + // Generate the certificate. + let domain = format!( + "{}.sys.{}", + cfg.external_dns_zone_name, + cfg.recovery_silo.silo_name.as_str(), + ); + let cert = rcgen::generate_simple_self_signed(vec![domain.clone()]) + .unwrap_or_else(|error| { + panic!( + "generating certificate for domain {:?}: {}", + domain, error + ) + }); + + // Write the configuration file. + let cfg_path = tempdir.path().join("config-rss.toml"); + let _ = std::fs::copy(&path, &cfg_path) + .with_context(|| { + format!("failed to copy file {:?} to {:?}", &path, &cfg_path) + }) + .unwrap(); + + // Write the certificate. + let cert_bytes = cert + .serialize_pem() + .expect("serializing generated certificate") + .into_bytes(); + let cert_path = tempdir.path().join("initial-tls-cert.pem"); + std::fs::write(&cert_path, &cert_bytes) + .with_context(|| format!("failed to write to {:?}", &cert_path)) + .unwrap(); + + // Write the private key. + let key_path = tempdir.path().join("initial-tls-key.pem"); + let key_bytes = cert.serialize_private_key_pem().into_bytes(); + std::fs::write(&key_path, &key_bytes) + .with_context(|| format!("failed to write to {:?}", &key_path)) + .unwrap(); + + // Now try to load it all. + let read_cfg = RackInitializeRequest::from_file(&cfg_path) + .expect("failed to read generated config with certificate"); + assert_eq!(read_cfg.external_certificates.len(), 1); + let cert = read_cfg.external_certificates.first().unwrap(); + let _ = rcgen::KeyPair::from_pem(&cert.key) + .expect("generated PEM did not parse as KeyPair"); + } +} diff --git a/sled-agent/types/src/rack_ops.rs b/sled-agent/types/src/rack_ops.rs new file mode 100644 index 0000000000..d8c0fa1c88 --- /dev/null +++ b/sled-agent/types/src/rack_ops.rs @@ -0,0 +1,46 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use omicron_uuid_kinds::{RackInitUuid, RackResetUuid}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Current status of any rack-level operation being performed by this bootstrap +/// agent. +#[derive( + Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema, +)] +#[serde(tag = "status", rename_all = "snake_case")] +pub enum RackOperationStatus { + Initializing { + id: RackInitUuid, + }, + /// `id` will be none if the rack was already initialized on startup. + Initialized { + id: Option, + }, + InitializationFailed { + id: RackInitUuid, + message: String, + }, + InitializationPanicked { + id: RackInitUuid, + }, + Resetting { + id: RackResetUuid, + }, + /// `reset_id` will be None if the rack is in an uninitialized-on-startup, + /// or Some if it is in an uninitialized state due to a reset operation + /// completing. + Uninitialized { + reset_id: Option, + }, + ResetFailed { + id: RackResetUuid, + message: String, + }, + ResetPanicked { + id: RackResetUuid, + }, +} diff --git a/smf/mgs/manifest.xml b/smf/mgs/manifest.xml index 125c32ce2b..e129ccf35a 100644 --- a/smf/mgs/manifest.xml +++ b/smf/mgs/manifest.xml @@ -4,13 +4,18 @@ - + + + + +