From eb46e27065eb63e0f33901188f78715a95f418a9 Mon Sep 17 00:00:00 2001 From: Roman Khimov Date: Fri, 26 Apr 2024 14:53:50 +0300 Subject: [PATCH] fstree: make combined writer configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to combine for SSDs: goos: linux goarch: amd64 pkg: github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor cpu: AMD Ryzen 7 PRO 7840U w/ Radeon 780M Graphics │ ssd.fstree-original │ ssd.fstree-combined │ │ sec/op │ sec/op vs base │ Put/size=1,thread=1-16 91.98µ ± 2% 14040.16µ ± 1% +15164.94% (p=0.000 n=10) Put/size=1,thread=20-16 1.010m ± 1% 15.095m ± 2% +1393.89% (p=0.000 n=10) Put/size=1,thread=100-16 5.272m ± 1% 17.306m ± 3% +228.27% (p=0.000 n=10) Put/size=1,thread=200-16 10.77m ± 1% 33.77m ± 2% +213.52% (p=0.000 n=10) Put/size=256,thread=1-16 91.37µ ± 2% 14018.07µ ± 1% +15242.09% (p=0.000 n=10) Put/size=256,thread=20-16 1.028m ± 4% 15.583m ± 2% +1415.81% (p=0.000 n=10) Put/size=256,thread=100-16 5.420m ± 1% 16.869m ± 6% +211.22% (p=0.000 n=10) Put/size=256,thread=200-16 12.42m ± 11% 29.55m ± 2% +137.89% (p=0.000 n=10) Put/size=1024,thread=1-16 85.38µ ± 7% 12301.06µ ± 0% +14306.75% (p=0.000 n=10) Put/size=1024,thread=20-16 1.053m ± 4% 13.855m ± 2% +1216.07% (p=0.000 n=10) Put/size=1024,thread=100-16 5.629m ± 1% 15.694m ± 2% +178.82% (p=0.000 n=10) Put/size=1024,thread=200-16 12.55m ± 9% 30.10m ± 1% +139.83% (p=0.000 n=10) Put/size=4096,thread=1-16 150.6µ ± 6% 12864.7µ ± 1% +8441.32% (p=0.000 n=10) Put/size=4096,thread=20-16 1.308m ± 1% 14.136m ± 2% +980.81% (p=0.000 n=10) Put/size=4096,thread=100-16 6.670m ± 2% 16.101m ± 2% +141.40% (p=0.000 n=10) Put/size=4096,thread=200-16 13.45m ± 1% 31.72m ± 2% +135.88% (p=0.000 n=10) Put/size=16384,thread=1-16 181.7µ ± 5% 12891.6µ ± 1% +6994.13% (p=0.000 n=18+10) Put/size=16384,thread=20-16 1.131m ± 4% 14.592m ± 1% +1189.98% (p=0.000 n=10) Put/size=16384,thread=100-16 6.060m ± 31% 17.599m ± 2% +190.41% (p=0.000 n=10) Put/size=16384,thread=200-16 12.38m ± 2% 35.42m ± 1% +185.99% (p=0.000 n=10) Put/size=65536,thread=1-16 282.8µ ± 5% 13109.3µ ± 1% +4535.47% (p=0.000 n=10) Put/size=65536,thread=20-16 1.599m ± 10% 16.549m ± 2% +935.19% (p=0.000 n=10) Put/size=65536,thread=100-16 8.472m ± 4% 23.581m ± 7% +178.34% (p=0.000 n=10) Put/size=65536,thread=200-16 16.60m ± 2% 42.20m ± 2% +154.30% (p=0.000 n=10) Put/size=262144,thread=1-16 577.9µ ± 4% 13673.9µ ± 4% +2266.11% (p=0.000 n=10) Put/size=262144,thread=20-16 3.961m ± 30% 18.688m ± 4% +371.76% (p=0.000 n=10) Put/size=262144,thread=100-16 20.09m ± 36% 68.14m ± 2% +239.16% (p=0.000 n=10) Put/size=262144,thread=200-16 38.53m ± 33% 113.45m ± 23% +194.48% (p=0.000 n=10) Put/size=1048576,thread=1-16 1.766m ± 5% 17.167m ± 1% +871.86% (p=0.000 n=10) Put/size=1048576,thread=20-16 15.71m ± 21% 40.61m ± 18% +158.54% (p=0.000 n=10) Put/size=1048576,thread=100-16 61.86m ± 25% 197.93m ± 3% +219.98% (p=0.000 n=10) Put/size=1048576,thread=200-16 123.7m ± 18% 353.1m ± 4% +185.47% (p=0.000 n=10) Put/size=4194304,thread=1-16 5.485m ± 2% 20.737m ± 1% +278.07% (p=0.000 n=10) Put/size=4194304,thread=20-16 51.55m ± 16% 143.20m ± 9% +177.79% (p=0.000 n=10) Put/size=4194304,thread=100-16 260.4m ± 20% 700.5m ± 4% +169.04% (p=0.000 n=10) Put/size=4194304,thread=200-16 521.9m ± 11% 1356.2m ± 10% +159.86% (p=0.000 n=10) geomean 4.278m 31.30m +631.70% │ ssd.fstree-original │ ssd.fstree-combined │ │ B/op │ B/op vs base │ Put/size=1,thread=1-16 1.904Ki ± 0% 2.678Ki ± 1% +40.68% (p=0.000 n=10) Put/size=1,thread=20-16 36.94Ki ± 0% 42.40Ki ± 1% +14.76% (p=0.000 n=10) Put/size=1,thread=100-16 186.9Ki ± 0% 194.3Ki ± 0% +3.99% (p=0.000 n=10) Put/size=1,thread=200-16 370.9Ki ± 1% 389.5Ki ± 0% +5.01% (p=0.000 n=10) Put/size=256,thread=1-16 1.917Ki ± 0% 2.703Ki ± 0% +40.98% (p=0.000 n=10) Put/size=256,thread=20-16 37.51Ki ± 0% 42.44Ki ± 0% +13.14% (p=0.000 n=10) Put/size=256,thread=100-16 188.3Ki ± 1% 195.0Ki ± 1% +3.53% (p=0.000 n=10) Put/size=256,thread=200-16 378.5Ki ± 1% 389.8Ki ± 1% +3.00% (p=0.000 n=10) Put/size=1024,thread=1-16 1.914Ki ± 0% 2.699Ki ± 0% +40.99% (p=0.000 n=10) Put/size=1024,thread=20-16 37.57Ki ± 1% 42.43Ki ± 1% +12.95% (p=0.000 n=10) Put/size=1024,thread=100-16 190.3Ki ± 1% 196.5Ki ± 1% +3.28% (p=0.000 n=10) Put/size=1024,thread=200-16 380.9Ki ± 0% 392.4Ki ± 1% +3.02% (p=0.000 n=10) Put/size=4096,thread=1-16 1.952Ki ± 0% 2.696Ki ± 1% +38.12% (p=0.000 n=10) Put/size=4096,thread=20-16 38.12Ki ± 1% 42.28Ki ± 1% +10.91% (p=0.000 n=10) Put/size=4096,thread=100-16 191.5Ki ± 1% 196.8Ki ± 0% +2.81% (p=0.000 n=10) Put/size=4096,thread=200-16 382.1Ki ± 1% 392.8Ki ± 0% +2.80% (p=0.000 n=10) Put/size=16384,thread=1-16 1.977Ki ± 1% 2.707Ki ± 0% +36.90% (p=0.000 n=18+10) Put/size=16384,thread=20-16 38.00Ki ± 0% 42.83Ki ± 1% +12.71% (p=0.000 n=10) Put/size=16384,thread=100-16 191.1Ki ± 0% 198.0Ki ± 0% +3.63% (p=0.000 n=10) Put/size=16384,thread=200-16 382.2Ki ± 0% 395.3Ki ± 0% +3.43% (p=0.000 n=10) Put/size=65536,thread=1-16 2.020Ki ± 1% 2.715Ki ± 0% +34.40% (p=0.000 n=10) Put/size=65536,thread=20-16 38.42Ki ± 1% 43.22Ki ± 0% +12.51% (p=0.000 n=10) Put/size=65536,thread=100-16 193.1Ki ± 0% 200.1Ki ± 1% +3.64% (p=0.000 n=10) Put/size=65536,thread=200-16 386.1Ki ± 0% 400.0Ki ± 0% +3.61% (p=0.000 n=10) Put/size=262144,thread=1-16 2.119Ki ± 0% 2.725Ki ± 1% +28.57% (p=0.000 n=10) Put/size=262144,thread=20-16 39.47Ki ± 1% 43.49Ki ± 0% +10.18% (p=0.000 n=10) Put/size=262144,thread=100-16 197.9Ki ± 1% 214.4Ki ± 1% +8.31% (p=0.000 n=10) Put/size=262144,thread=200-16 395.9Ki ± 1% 422.7Ki ± 2% +6.77% (p=0.000 n=10) Put/size=1048576,thread=1-16 2.243Ki ± 0% 2.770Ki ± 1% +23.51% (p=0.000 n=10) Put/size=1048576,thread=20-16 42.91Ki ± 3% 46.23Ki ± 5% +7.73% (p=0.000 n=10) Put/size=1048576,thread=100-16 210.3Ki ± 2% 229.8Ki ± 2% +9.27% (p=0.000 n=10) Put/size=1048576,thread=200-16 427.9Ki ± 2% 460.2Ki ± 1% +7.55% (p=0.000 n=10) Put/size=4194304,thread=1-16 2.325Ki ± 1% 2.771Ki ± 1% +19.17% (p=0.000 n=10) Put/size=4194304,thread=20-16 45.24Ki ± 2% 50.57Ki ± 1% +11.79% (p=0.000 n=10) Put/size=4194304,thread=100-16 231.7Ki ± 2% 257.8Ki ± 3% +11.26% (p=0.000 n=10) Put/size=4194304,thread=200-16 474.7Ki ± 4% 509.5Ki ± 11% +7.33% (p=0.000 n=10) geomean 50.02Ki 56.68Ki +13.32% │ ssd.fstree-original │ ssd.fstree-combined │ │ allocs/op │ allocs/op vs base │ Put/size=1,thread=1-16 22.00 ± 0% 32.00 ± 3% +45.45% (p=0.000 n=10) Put/size=1,thread=20-16 413.0 ± 0% 473.5 ± 1% +14.65% (p=0.000 n=10) Put/size=1,thread=100-16 2.069k ± 0% 2.167k ± 0% +4.71% (p=0.000 n=10) Put/size=1,thread=200-16 4.137k ± 0% 4.338k ± 0% +4.87% (p=0.000 n=10) Put/size=256,thread=1-16 22.00 ± 0% 32.00 ± 0% +45.45% (p=0.000 n=10) Put/size=256,thread=20-16 415.0 ± 0% 473.5 ± 0% +14.10% (p=0.000 n=10) Put/size=256,thread=100-16 2.070k ± 0% 2.163k ± 0% +4.47% (p=0.000 n=10) Put/size=256,thread=200-16 4.160k ± 0% 4.319k ± 0% +3.83% (p=0.000 n=10) Put/size=1024,thread=1-16 22.00 ± 5% 32.00 ± 0% +45.45% (p=0.000 n=10) Put/size=1024,thread=20-16 413.0 ± 1% 470.0 ± 1% +13.80% (p=0.000 n=10) Put/size=1024,thread=100-16 2.074k ± 0% 2.160k ± 0% +4.15% (p=0.000 n=10) Put/size=1024,thread=200-16 4.155k ± 0% 4.319k ± 0% +3.96% (p=0.000 n=10) Put/size=4096,thread=1-16 22.00 ± 0% 32.00 ± 0% +45.45% (p=0.000 n=10) Put/size=4096,thread=20-16 419.0 ± 0% 469.0 ± 0% +11.93% (p=0.000 n=10) Put/size=4096,thread=100-16 2.088k ± 0% 2.162k ± 0% +3.54% (p=0.000 n=10) Put/size=4096,thread=200-16 4.173k ± 0% 4.319k ± 0% +3.50% (p=0.000 n=10) Put/size=16384,thread=1-16 22.00 ± 0% 32.00 ± 0% +45.45% (p=0.000 n=18+10) Put/size=16384,thread=20-16 414.0 ± 0% 471.0 ± 0% +13.77% (p=0.000 n=10) Put/size=16384,thread=100-16 2.082k ± 1% 2.173k ± 0% +4.37% (p=0.000 n=10) Put/size=16384,thread=200-16 4.162k ± 0% 4.340k ± 0% +4.29% (p=0.000 n=10) Put/size=65536,thread=1-16 23.00 ± 4% 32.00 ± 0% +39.13% (p=0.000 n=10) Put/size=65536,thread=20-16 420.0 ± 0% 476.5 ± 1% +13.45% (p=0.000 n=10) Put/size=65536,thread=100-16 2.103k ± 0% 2.197k ± 1% +4.49% (p=0.000 n=10) Put/size=65536,thread=200-16 4.204k ± 0% 4.389k ± 0% +4.41% (p=0.000 n=10) Put/size=262144,thread=1-16 23.00 ± 0% 32.00 ± 0% +39.13% (p=0.000 n=10) Put/size=262144,thread=20-16 430.0 ± 1% 478.5 ± 0% +11.28% (p=0.000 n=10) Put/size=262144,thread=100-16 2.162k ± 2% 2.354k ± 1% +8.91% (p=0.000 n=10) Put/size=262144,thread=200-16 4.362k ± 2% 4.641k ± 2% +6.38% (p=0.000 n=10) Put/size=1048576,thread=1-16 25.00 ± 0% 33.00 ± 3% +32.00% (p=0.000 n=10) Put/size=1048576,thread=20-16 464.5 ± 3% 509.5 ± 2% +9.69% (p=0.000 n=10) Put/size=1048576,thread=100-16 2.322k ± 2% 2.535k ± 1% +9.17% (p=0.000 n=10) Put/size=1048576,thread=200-16 4.760k ± 1% 5.062k ± 1% +6.36% (p=0.000 n=10) Put/size=4194304,thread=1-16 26.00 ± 4% 33.00 ± 0% +26.92% (p=0.000 n=10) Put/size=4194304,thread=20-16 491.0 ± 2% 573.0 ± 1% +16.70% (p=0.000 n=10) Put/size=4194304,thread=100-16 2.550k ± 2% 2.914k ± 3% +14.29% (p=0.000 n=10) Put/size=4194304,thread=200-16 5.254k ± 4% 5.767k ± 10% +9.76% (p=0.000 n=10) geomean 552.5 638.2 +15.50% This makes flush_interval common for peapod and fstree since they're almost the same in meaning and then there are no peapod-specific configurations left. Signed-off-by: Roman Khimov --- CHANGELOG.md | 1 + cmd/neofs-lens/internal/storage/root.go | 14 +++-- cmd/neofs-node/config.go | 9 ++-- cmd/neofs-node/config/engine/config_test.go | 7 +-- .../engine/shard/blobstor/fstree/config.go | 53 ++++++++++++++++++- .../engine/shard/blobstor/peapod/config.go | 34 ------------ .../engine/shard/blobstor/storage/config.go | 23 +++++++- cmd/neofs-node/storage.go | 6 ++- cmd/neofs-node/storage/config.go | 17 +++--- config/example/node.json | 6 ++- config/example/node.yaml | 4 ++ docs/storage-node-configuration.md | 17 +++--- .../blobstor/fstree/control.go | 2 +- .../blobstor/fstree/fstree.go | 11 ++++ .../blobstor/fstree/fstree_write_linux.go | 36 +++++++------ .../blobstor/fstree/fstree_write_specific.go | 6 +-- .../blobstor/fstree/option.go | 25 +++++++++ 17 files changed, 180 insertions(+), 91 deletions(-) delete mode 100644 cmd/neofs-node/config/engine/shard/blobstor/peapod/config.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 749517baea..58a69a4cbc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ Changelog for NeoFS Node ## [Unreleased] ### Added +- More effective FSTree writer for HDDs, new configuration options for it (#2814) ### Fixed diff --git a/cmd/neofs-lens/internal/storage/root.go b/cmd/neofs-lens/internal/storage/root.go index 6cb0ce1180..3393f47f93 100644 --- a/cmd/neofs-lens/internal/storage/root.go +++ b/cmd/neofs-lens/internal/storage/root.go @@ -9,7 +9,6 @@ import ( engineconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine" shardconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine/shard" fstreeconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine/shard/blobstor/fstree" - peapodconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine/shard/blobstor/peapod" "github.com/nspcc-dev/neofs-node/cmd/neofs-node/storage" "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor" "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor/fstree" @@ -118,15 +117,18 @@ func openEngine(cmd *cobra.Command) *engine.StorageEngine { sCfg.Typ = storagesCfg[i].Type() sCfg.Path = storagesCfg[i].Path() sCfg.Perm = storagesCfg[i].Perm() + sCfg.FlushInterval = storagesCfg[i].FlushInterval() switch storagesCfg[i].Type() { case fstree.Type: sub := fstreeconfig.From((*config.Config)(storagesCfg[i])) sCfg.Depth = sub.Depth() sCfg.NoSync = sub.NoSync() + sCfg.CombinedCountLimit = sub.CombinedCountLimit() + sCfg.CombinedSizeLimit = sub.CombinedSizeLimit() + sCfg.CombinedSizeThreshold = sub.CombinedSizeThreshold() case peapod.Type: - peapodCfg := peapodconfig.From((*config.Config)(storagesCfg[i])) - sCfg.FlushInterval = peapodCfg.FlushInterval() + // Nothing peapod-specific, but it should work. default: return fmt.Errorf("can't initiate storage. invalid storage type: %s", storagesCfg[i].Type()) } @@ -192,7 +194,11 @@ func openEngine(cmd *cobra.Command) *engine.StorageEngine { fstree.WithPath(sRead.Path), fstree.WithPerm(sRead.Perm), fstree.WithDepth(sRead.Depth), - fstree.WithNoSync(sRead.NoSync)), + fstree.WithNoSync(sRead.NoSync), + fstree.WithCombinedCountLimit(sRead.CombinedCountLimit), + fstree.WithCombinedSizeLimit(sRead.CombinedSizeLimit), + fstree.WithCombinedSizeThreshold(sRead.CombinedSizeThreshold), + fstree.WithCombinedWriteInterval(sRead.FlushInterval)), Policy: func(_ *objectSDK.Object, data []byte) bool { return true }, diff --git a/cmd/neofs-node/config.go b/cmd/neofs-node/config.go index 37d8ec46e1..7620224067 100644 --- a/cmd/neofs-node/config.go +++ b/cmd/neofs-node/config.go @@ -23,7 +23,6 @@ import ( engineconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine" shardconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine/shard" fstreeconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine/shard/blobstor/fstree" - peapodconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine/shard/blobstor/peapod" loggerconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/logger" metricsconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/metrics" morphconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/morph" @@ -36,7 +35,6 @@ import ( "github.com/nspcc-dev/neofs-node/pkg/core/container" netmapCore "github.com/nspcc-dev/neofs-node/pkg/core/netmap" "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor/fstree" - "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor/peapod" "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/engine" "github.com/nspcc-dev/neofs-node/pkg/metrics" "github.com/nspcc-dev/neofs-node/pkg/morph/client" @@ -224,15 +222,18 @@ func (a *applicationConfiguration) readConfig(c *config.Config) error { sCfg.Typ = storagesCfg[i].Type() sCfg.Path = storagesCfg[i].Path() sCfg.Perm = storagesCfg[i].Perm() + sCfg.FlushInterval = storagesCfg[i].FlushInterval() switch storagesCfg[i].Type() { case fstree.Type: sub := fstreeconfig.From((*config.Config)(storagesCfg[i])) sCfg.Depth = sub.Depth() sCfg.NoSync = sub.NoSync() + sCfg.CombinedCountLimit = sub.CombinedCountLimit() + sCfg.CombinedSizeLimit = sub.CombinedSizeLimit() + sCfg.CombinedSizeThreshold = sub.CombinedSizeThreshold() case peapod.Type: - peapodCfg := peapodconfig.From((*config.Config)(storagesCfg[i])) - sCfg.FlushInterval = peapodCfg.FlushInterval() + // No specific configs, but it's a valid storage type. default: return fmt.Errorf("invalid storage type: %s", storagesCfg[i].Type()) } diff --git a/cmd/neofs-node/config/engine/config_test.go b/cmd/neofs-node/config/engine/config_test.go index 160f455d4f..de3b66a6aa 100644 --- a/cmd/neofs-node/config/engine/config_test.go +++ b/cmd/neofs-node/config/engine/config_test.go @@ -9,7 +9,6 @@ import ( engineconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine" shardconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine/shard" fstreeconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine/shard/blobstor/fstree" - peapodconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine/shard/blobstor/peapod" piloramaconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine/shard/pilorama" configtest "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/test" "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor/peapod" @@ -87,11 +86,10 @@ func TestEngineSection(t *testing.T) { require.EqualValues(t, 102400, sc.SmallSizeLimit()) require.Equal(t, 2, len(ss)) - ppd := peapodconfig.From((*config.Config)(ss[0])) require.Equal(t, "tmp/0/blob/peapod.db", ss[0].Path()) require.EqualValues(t, 0644, ss[0].Perm()) require.EqualValues(t, peapod.Type, ss[0].Type()) - require.EqualValues(t, 10*time.Millisecond, ppd.FlushInterval()) + require.EqualValues(t, 10*time.Millisecond, ss[0].FlushInterval()) require.Equal(t, "tmp/0/blob", ss[1].Path()) require.EqualValues(t, 0644, ss[1].Perm()) @@ -131,11 +129,10 @@ func TestEngineSection(t *testing.T) { require.EqualValues(t, 102400, sc.SmallSizeLimit()) require.Equal(t, 2, len(ss)) - ppd := peapodconfig.From((*config.Config)(ss[0])) require.Equal(t, "tmp/1/blob/peapod.db", ss[0].Path()) require.EqualValues(t, 0644, ss[0].Perm()) require.EqualValues(t, peapod.Type, ss[0].Type()) - require.EqualValues(t, 30*time.Millisecond, ppd.FlushInterval()) + require.EqualValues(t, 30*time.Millisecond, ss[0].FlushInterval()) require.Equal(t, "tmp/1/blob", ss[1].Path()) require.EqualValues(t, 0644, ss[1].Perm()) diff --git a/cmd/neofs-node/config/engine/shard/blobstor/fstree/config.go b/cmd/neofs-node/config/engine/shard/blobstor/fstree/config.go index 6595e9375d..43c2de6d82 100644 --- a/cmd/neofs-node/config/engine/shard/blobstor/fstree/config.go +++ b/cmd/neofs-node/config/engine/shard/blobstor/fstree/config.go @@ -1,16 +1,27 @@ package fstree import ( + "math" + "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config" "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor/fstree" + "github.com/spf13/cast" ) // Config is a wrapper over the config section // which provides access to FSTree configurations. type Config config.Config -// DepthDefault is a default shallow dir depth. -const DepthDefault = 4 +const ( + // DepthDefault is the default shallow dir depth. + DepthDefault = 4 + // CombinedCountLimitDefault is the default for the maximum number of objects to write into a single file. + CombinedCountLimitDefault = 128 + // CombinedSizeLimitDefault is the default for the maximum size of the combined object file. + CombinedSizeLimitDefault = 8 * 1024 * 1024 + // CombinedSizeThresholdDefault is the default for the minimal size of the object that won't be combined with others for writes. + CombinedSizeThresholdDefault = 128 * 1024 +) // From wraps config section into Config. func From(c *config.Config) *Config { @@ -45,3 +56,41 @@ func (x *Config) Depth() uint64 { func (x *Config) NoSync() bool { return config.BoolSafe((*config.Config)(x), "no_sync") } + +// CombinedCountLimit returns the value of "combined_count_limit" config parameter. +// +// Returns [CombinedCountLimitDefault] if the value is missing or not a positive integer. +func (x *Config) CombinedCountLimit() int { + var v = (*config.Config)(x).Value("combined_count_limit") + if v == nil { + return CombinedCountLimitDefault + } + + i, err := cast.ToIntE(v) + if err != nil { + return CombinedCountLimitDefault + } + return i +} + +// CombinedSizeLimit returns the value of "combined_size_limit" config parameter. +// +// Returns [CombinedSizeLimitDefault] if the value is missing, equal to 0 or not a proper size specification. +func (x *Config) CombinedSizeLimit() int { + var s = config.SizeInBytesSafe((*config.Config)(x), "combined_size_limit") + if s == 0 || s > math.MaxInt { + return CombinedSizeLimitDefault + } + return int(s) +} + +// CombinedSizeThreshold returns the value of "combined_size_threshold" config parameter. +// +// Returns [CombinedSizeThresholdDefault] if the value is missing, equal to 0 or not a proper size specification. +func (x *Config) CombinedSizeThreshold() int { + var s = config.SizeInBytesSafe((*config.Config)(x), "combined_size_threshold") + if s == 0 || s > math.MaxInt { + return CombinedSizeThresholdDefault + } + return int(s) +} diff --git a/cmd/neofs-node/config/engine/shard/blobstor/peapod/config.go b/cmd/neofs-node/config/engine/shard/blobstor/peapod/config.go deleted file mode 100644 index cd01d21ca9..0000000000 --- a/cmd/neofs-node/config/engine/shard/blobstor/peapod/config.go +++ /dev/null @@ -1,34 +0,0 @@ -package peapodconfig - -import ( - "time" - - "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config" -) - -// Config is a wrapper over the config section -// which provides access to Peapod configurations. -type Config config.Config - -// Various Peapod config defaults. -const ( - // DefaultFlushInterval is a default time interval between Peapod's batch writes - // to disk. - DefaultFlushInterval = 10 * time.Millisecond -) - -// From wraps config section into Config. -func From(c *config.Config) *Config { - return (*Config)(c) -} - -// FlushInterval returns the value of "flush_interval" config parameter. -// -// Returns DefaultFlushInterval if the value is not a positive duration. -func (x *Config) FlushInterval() time.Duration { - d := config.DurationSafe((*config.Config)(x), "flush_interval") - if d > 0 { - return d - } - return DefaultFlushInterval -} diff --git a/cmd/neofs-node/config/engine/shard/blobstor/storage/config.go b/cmd/neofs-node/config/engine/shard/blobstor/storage/config.go index 4a7d879e0d..32b78686a1 100644 --- a/cmd/neofs-node/config/engine/shard/blobstor/storage/config.go +++ b/cmd/neofs-node/config/engine/shard/blobstor/storage/config.go @@ -2,14 +2,22 @@ package storage import ( "io/fs" + "time" "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config" ) type Config config.Config -// PermDefault are default permission bits for BlobStor data. -const PermDefault = 0o640 +// Various config defaults. +const ( + // PermDefault are default permission bits for BlobStor data. + PermDefault = 0o640 + + // DefaultFlushInterval is the default time interval between Peapod's batch writes + // to disk. + DefaultFlushInterval = 10 * time.Millisecond +) func From(x *config.Config) *Config { return (*Config)(x) @@ -53,3 +61,14 @@ func (x *Config) Perm() fs.FileMode { return fs.FileMode(p) } + +// FlushInterval returns the value of "flush_interval" config parameter. +// +// Returns DefaultFlushInterval if the value is not a positive duration. +func (x *Config) FlushInterval() time.Duration { + d := config.DurationSafe((*config.Config)(x), "flush_interval") + if d > 0 { + return d + } + return DefaultFlushInterval +} diff --git a/cmd/neofs-node/storage.go b/cmd/neofs-node/storage.go index f613281f58..13714fbfbb 100644 --- a/cmd/neofs-node/storage.go +++ b/cmd/neofs-node/storage.go @@ -153,7 +153,11 @@ func (c *cfg) shardOpts() []shardOptsWithID { fstree.WithPath(sRead.Path), fstree.WithPerm(sRead.Perm), fstree.WithDepth(sRead.Depth), - fstree.WithNoSync(sRead.NoSync)), + fstree.WithNoSync(sRead.NoSync), + fstree.WithCombinedCountLimit(sRead.CombinedCountLimit), + fstree.WithCombinedSizeLimit(sRead.CombinedSizeLimit), + fstree.WithCombinedSizeThreshold(sRead.CombinedSizeThreshold), + fstree.WithCombinedWriteInterval(sRead.FlushInterval)), Policy: func(_ *objectSDK.Object, data []byte) bool { return true }, diff --git a/cmd/neofs-node/storage/config.go b/cmd/neofs-node/storage/config.go index b4b69d7d49..8ed85196ac 100644 --- a/cmd/neofs-node/storage/config.go +++ b/cmd/neofs-node/storage/config.go @@ -53,16 +53,17 @@ type ShardCfg struct { } type SubStorageCfg struct { // common for all storages - Typ string - Path string - Perm fs.FileMode + Typ string + Path string + Perm fs.FileMode + FlushInterval time.Duration // tree-specific (FS) - Depth uint64 - NoSync bool - - // Peapod-specific - FlushInterval time.Duration + Depth uint64 + NoSync bool + CombinedCountLimit int + CombinedSizeLimit int + CombinedSizeThreshold int } // ID returns persistent id of a shard. It is different from the ID used in runtime diff --git a/config/example/node.json b/config/example/node.json index d07a7fd8e6..a0f3e786d5 100644 --- a/config/example/node.json +++ b/config/example/node.json @@ -199,7 +199,11 @@ "path": "tmp/1/blob", "no_sync": true, "perm": "0644", - "depth": 5 + "depth": 5, + "flush_interval": "20ms", + "combined_count_limit": 64, + "combined_size_limit": "16M", + "combined_size_threshold": "512K" } ], "pilorama": { diff --git a/config/example/node.yaml b/config/example/node.yaml index 9dad016afc..5b3b2d9ac9 100644 --- a/config/example/node.yaml +++ b/config/example/node.yaml @@ -196,6 +196,10 @@ storage: - type: fstree path: tmp/1/blob # blobstor path no_sync: true + flush_interval: 20ms # time interval between combined file writes to disk (defaults to 10ms) + combined_count_limit: 64 # number of small objects to write into a single file (defaults to 128) + combined_size_limit: 16M # limit for the multi-object file size (defaults to 8M) + combined_size_threshold: 512K # threshold for combined object writing (defaults to 128K) pilorama: path: tmp/1/blob/pilorama.db diff --git a/docs/storage-node-configuration.md b/docs/storage-node-configuration.md index dfeafb8153..d39ebeee55 100644 --- a/docs/storage-node-configuration.md +++ b/docs/storage-node-configuration.md @@ -191,21 +191,24 @@ blobstor: |-------------------------------------|-----------------------------------------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `path` | `string` | | Path to the root of the blobstor. | | `perm` | file mode | `0640` | Default permission for created files and directories. | +| `flush_interval` | `duration` | `10ms` | Time interval between batch writes to disk. | #### `fstree` type options -| Parameter | Type | Default value | Description | -|---------------------|-----------|---------------|-------------------------------------------------------| -| `path` | `string` | | Path to the root of the blobstor. | -| `perm` | file mode | `0640` | Default permission for created files and directories. | -| `depth` | `int` | `4` | File-system tree depth. | -| `no_sync` | `bool` | `false` | Disable write synchronization, makes writes faster, but can lead to data loss. | +| Parameter | Type | Default value | Description | +|---------------------------|-----------|---------------|------------------------------------------------------------------------------------------------------------------------------| +| `path` | `string` | | Path to the root of the blobstor. | +| `perm` | file mode | `0640` | Default permission for created files and directories. | +| `depth` | `int` | `4` | File-system tree depth. | +| `no_sync` | `bool` | `false` | Disable write synchronization, makes writes faster, but can lead to data loss. | +| `combined_count_limit` | `int` | `128` | Maximum number of objects to write into a single file, 0 or 1 disables combined writing (disabling is recommended for SSDs). | +| `combined_size_limit` | `size` | `8M` | Maximum size of a multi-object file. | +| `combined_size_threshold` | `size` | `128K` | Minimum size of object that won't be combined with others when writing to disk. | #### `peapod` type options | Parameter | Type | Default value | Description | |---------------------|-----------|---------------|-------------------------------------------------------| | `path` | `string` | | Path to the Peapod database file. | | `perm` | file mode | `0640` | Default permission for created files and directories. | -| `flush_interval` | `duration`| `10ms` | Time interval between batch writes to disk. | ### `gc` subsection diff --git a/pkg/local_object_storage/blobstor/fstree/control.go b/pkg/local_object_storage/blobstor/fstree/control.go index 243151d9f6..b6fc940042 100644 --- a/pkg/local_object_storage/blobstor/fstree/control.go +++ b/pkg/local_object_storage/blobstor/fstree/control.go @@ -19,7 +19,7 @@ func (t *FSTree) Init() error { return fmt.Errorf("mkdir all for %q: %w", t.RootPath, err) } if !t.readOnly { - var w = newSpecificWriter(t.RootPath, t.Permissions, t.noSync) + var w = newSpecificWriter(t) if w != nil { t.writer = w } diff --git a/pkg/local_object_storage/blobstor/fstree/fstree.go b/pkg/local_object_storage/blobstor/fstree/fstree.go index 3b5f2c4915..325b4811b8 100644 --- a/pkg/local_object_storage/blobstor/fstree/fstree.go +++ b/pkg/local_object_storage/blobstor/fstree/fstree.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "strings" + "time" "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor/common" "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor/compression" @@ -32,6 +33,11 @@ type FSTree struct { noSync bool readOnly bool + + combinedCountLimit int + combinedSizeLimit int + combinedSizeThreshold int + combinedWriteInterval time.Duration } // Info groups the information about file storage. @@ -73,6 +79,11 @@ func New(opts ...Option) *FSTree { Config: nil, Depth: 4, DirNameLen: DirNameLen, + + combinedCountLimit: 128, + combinedSizeLimit: 8 * 1024 * 1024, + combinedSizeThreshold: 128 * 1024, + combinedWriteInterval: 10 * time.Millisecond, } for i := range opts { opts[i](f) diff --git a/pkg/local_object_storage/blobstor/fstree/fstree_write_linux.go b/pkg/local_object_storage/blobstor/fstree/fstree_write_linux.go index 42f4a22e0f..26acc76e54 100644 --- a/pkg/local_object_storage/blobstor/fstree/fstree_write_linux.go +++ b/pkg/local_object_storage/blobstor/fstree/fstree_write_linux.go @@ -6,7 +6,6 @@ import ( "encoding/binary" "errors" "fmt" - "io/fs" "strconv" "sync" "time" @@ -16,13 +15,6 @@ import ( "golang.org/x/sys/unix" ) -const ( - defaultTick = 10 * time.Millisecond - combinedSizeThresh = 128 * 1024 - combinedSizeLimit = 8 * 1024 * 1024 - combinedCountLimit = 128 -) - type linuxWriter struct { root string perm uint32 @@ -30,6 +22,11 @@ type linuxWriter struct { bFlags int noSync bool + combinedCountLimit int + combinedSizeLimit int + combinedSizeThreshold int + combinedWriteInterval time.Duration + batchLock sync.Mutex batch *syncBatch } @@ -46,23 +43,28 @@ type syncBatch struct { err error } -func newSpecificWriter(root string, perm fs.FileMode, noSync bool) writer { +func newSpecificWriter(t *FSTree) writer { flags := unix.O_WRONLY | unix.O_TMPFILE | unix.O_CLOEXEC bFlags := flags - if !noSync { + if !t.noSync { flags |= unix.O_DSYNC } - fd, err := unix.Open(root, flags, uint32(perm)) + fd, err := unix.Open(t.RootPath, flags, uint32(t.Permissions)) if err != nil { return nil // Which means that OS-specific writeData can't be created and FSTree should use the generic one. } _ = unix.Close(fd) // Don't care about error. w := &linuxWriter{ - root: root, - perm: uint32(perm), + root: t.RootPath, + perm: uint32(t.Permissions), flags: flags, bFlags: bFlags, - noSync: noSync, + noSync: t.noSync, + + combinedCountLimit: t.combinedCountLimit, + combinedSizeLimit: t.combinedSizeLimit, + combinedSizeThreshold: t.combinedSizeThreshold, + combinedWriteInterval: t.combinedWriteInterval, } return w } @@ -79,7 +81,7 @@ func (w *linuxWriter) newSyncBatch() (*syncBatch, error) { noSync: w.noSync, } sb.lock.Lock() - sb.timer = time.AfterFunc(defaultTick, sb.sync) + sb.timer = time.AfterFunc(w.combinedWriteInterval, sb.sync) return sb, nil } @@ -165,7 +167,7 @@ func (w *linuxWriter) finalize() error { func (w *linuxWriter) writeData(id oid.ID, p string, data []byte) error { var err error - if len(data) > combinedSizeThresh { + if len(data) > w.combinedSizeThreshold || w.combinedCountLimit < 2 { err = w.writeFile(p, data) } else { err = w.writeCombinedFile(id, p, data) @@ -202,7 +204,7 @@ func (w *linuxWriter) writeCombinedFile(id oid.ID, p string, data []byte) error return err } err = sb.write(id, p, data) - if err == nil && sb.cnt >= combinedCountLimit || sb.size >= combinedSizeLimit { + if err == nil && sb.cnt >= w.combinedCountLimit || sb.size >= w.combinedSizeLimit { sb.intSync() } sb.lock.Unlock() diff --git a/pkg/local_object_storage/blobstor/fstree/fstree_write_specific.go b/pkg/local_object_storage/blobstor/fstree/fstree_write_specific.go index b8f2cf1e4d..fc0ebf840f 100644 --- a/pkg/local_object_storage/blobstor/fstree/fstree_write_specific.go +++ b/pkg/local_object_storage/blobstor/fstree/fstree_write_specific.go @@ -2,10 +2,6 @@ package fstree -import ( - "io/fs" -) - -func newSpecificWriter(_ string, _ fs.FileMode, _ bool) writer { +func newSpecificWriter(_ *FSTree) writer { return nil } diff --git a/pkg/local_object_storage/blobstor/fstree/option.go b/pkg/local_object_storage/blobstor/fstree/option.go index 07e5474445..c16f7b190f 100644 --- a/pkg/local_object_storage/blobstor/fstree/option.go +++ b/pkg/local_object_storage/blobstor/fstree/option.go @@ -2,6 +2,7 @@ package fstree import ( "io/fs" + "time" ) type Option func(*FSTree) @@ -35,3 +36,27 @@ func WithNoSync(noSync bool) Option { f.noSync = noSync } } + +func WithCombinedCountLimit(limit int) Option { + return func(f *FSTree) { + f.combinedCountLimit = limit + } +} + +func WithCombinedSizeLimit(size int) Option { + return func(f *FSTree) { + f.combinedSizeLimit = size + } +} + +func WithCombinedSizeThreshold(size int) Option { + return func(f *FSTree) { + f.combinedSizeThreshold = size + } +} + +func WithCombinedWriteInterval(t time.Duration) Option { + return func(f *FSTree) { + f.combinedWriteInterval = t + } +}