Skip to content

Commit

Permalink
feat(CosmosFullNode): Allow configuring the chain home directory (#338)
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidNix authored Aug 30, 2023
1 parent 8914bea commit ff32bc2
Show file tree
Hide file tree
Showing 12 changed files with 227 additions and 84 deletions.
10 changes: 10 additions & 0 deletions api/v1/cosmosfullnode_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,16 @@ type ChainSpec struct {
// +kubebuilder:validation:MinLength:=1
Binary string `json:"binary"`

// The chain's home directory is where the chain's data and config is stored.
// This should be a single folder. E.g. .gaia, .dydxprotocol, .osmosisd, etc.
// Set via --home flag when running the binary.
// If empty, defaults to "cosmos" which translates to `chain start --home /home/operator/cosmos`.
// Historically, several chains do not respect the --home and save data outside --home which crashes the pods.
// Therefore, this option was introduced to mitigate those edge cases, so that you can specify the home directory
// to match the chain's default home dir.
// +optional
HomeDir string `json:"homeDir"`

// CometBFT (formerly Tendermint) configuration applied to config.toml.
// Although optional, it's highly recommended you configure this field.
// +optional
Expand Down
11 changes: 11 additions & 0 deletions config/crd/bases/cosmos.strange.love_cosmosfullnodes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,17 @@ spec:
.zip Use GenesisScript if the chain has an unconventional file
format or genesis location.'
type: string
homeDir:
description: The chain's home directory is where the chain's data
and config is stored. This should be a single folder. E.g. .gaia,
.dydxprotocol, .osmosisd, etc. Set via --home flag when running
the binary. If empty, defaults to "cosmos" which translates
to `chain start --home /home/operator/cosmos`. Historically,
several chains do not respect the --home and save data outside
--home which crashes the pods. Therefore, this option was introduced
to mitigate those edge cases, so that you can specify the home
directory to match the chain's default home dir.
type: string
logFormat:
description: One of plain or json. If not set, defaults to plain.
enum:
Expand Down
1 change: 1 addition & 0 deletions config/samples/cosmos_v1_cosmosfullnode_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ spec:
network: mainnet
chainID: cosmoshub-4
binary: gaiad
homeDir: .gaia # optional, defaults to "cosmos"
skipInvariants: true
genesisURL: "https://github.com/cosmos/mainnet/raw/master/genesis.cosmoshub-4.json.gz"
genesisScript: "arbitrary script to download genesis file. e.g. curl https://url-to-genesis.com | jq '.genesis' > $GENESIS_FILE"
Expand Down
10 changes: 2 additions & 8 deletions healtcheck_cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"github.com/spf13/cobra"
"github.com/spf13/viper"
"github.com/strangelove-ventures/cosmos-operator/internal/cosmos"
"github.com/strangelove-ventures/cosmos-operator/internal/fullnode"
"github.com/strangelove-ventures/cosmos-operator/internal/healthcheck"
"golang.org/x/sync/errgroup"
)
Expand Down Expand Up @@ -49,14 +48,9 @@ func startHealthCheckServer(cmd *cobra.Command, args []string) error {
)
defer func() { _ = zlog.Sync() }()

var (
tm = healthcheck.NewComet(logger, cometClient, rpcHost, timeout)
disk = healthcheck.DiskUsage(fullnode.ChainHomeDir)
)

mux := http.NewServeMux()
mux.Handle("/", tm)
mux.Handle("/disk", disk)
mux.Handle("/", healthcheck.NewComet(logger, cometClient, rpcHost, timeout))
mux.HandleFunc("/disk", healthcheck.DiskUsage)

srv := &http.Server{
Addr: listenAddr,
Expand Down
54 changes: 31 additions & 23 deletions internal/fullnode/pod_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func NewPodBuilder(crd *cosmosv1.CosmosFullNode) PodBuilder {
//Args: []string{"-c", `trap : TERM INT; sleep infinity & wait`},
Command: []string{startCmd},
Args: startArgs,
Env: envVars,
Env: envVars(crd),
Ports: buildPorts(crd.Spec.Type),
Resources: tpl.Resources,
ReadinessProbe: probes[0],
Expand Down Expand Up @@ -223,7 +223,7 @@ func (b PodBuilder) WithOrdinal(ordinal int32) PodBuilder {

// Mounts required by all containers.
mounts := []corev1.VolumeMount{
{Name: volChainHome, MountPath: ChainHomeDir},
{Name: volChainHome, MountPath: ChainHomeDir(b.crd)},
{Name: volSystemTmp, MountPath: systemTmpDir},
}
// Additional mounts only needed for init containers.
Expand All @@ -236,22 +236,19 @@ func (b PodBuilder) WithOrdinal(ordinal int32) PodBuilder {

// At this point, guaranteed to have at least 2 containers.
pod.Spec.Containers[0].VolumeMounts = append(mounts, corev1.VolumeMount{
Name: volNodeKey, MountPath: path.Join(ChainHomeDir, "config", nodeKeyFile), SubPath: nodeKeyFile,
Name: volNodeKey, MountPath: path.Join(ChainHomeDir(b.crd), "config", nodeKeyFile), SubPath: nodeKeyFile,
})
pod.Spec.Containers[1].VolumeMounts = []corev1.VolumeMount{
// The healthcheck sidecar needs access to the home directory so it can read disk usage.
{Name: volChainHome, MountPath: ChainHomeDir, ReadOnly: true},
{Name: volChainHome, MountPath: ChainHomeDir(b.crd), ReadOnly: true},
}

b.pod = pod
return b
}

const (
workDir = "/home/operator"
// ChainHomeDir is the abs filepath for the chain's home directory.
ChainHomeDir = workDir + "/cosmos"

workDir = "/home/operator"
tmpDir = workDir + "/.tmp"
tmpConfigDir = workDir + "/.config"
infraToolImage = "ghcr.io/strangelove-ventures/infra-toolkit:v0.0.1"
Expand All @@ -260,20 +257,30 @@ const (
systemTmpDir = "/tmp"
)

var (
envVars = []corev1.EnvVar{
// ChainHomeDir is the abs filepath for the chain's home directory.
func ChainHomeDir(crd *cosmosv1.CosmosFullNode) string {
if home := crd.Spec.ChainSpec.HomeDir; home != "" {
return path.Join(workDir, home)
}
return workDir + "/cosmos"
}

func envVars(crd *cosmosv1.CosmosFullNode) []corev1.EnvVar {
home := ChainHomeDir(crd)
return []corev1.EnvVar{
{Name: "HOME", Value: workDir},
{Name: "CHAIN_HOME", Value: ChainHomeDir},
{Name: "GENESIS_FILE", Value: path.Join(ChainHomeDir, "config", "genesis.json")},
{Name: "CONFIG_DIR", Value: path.Join(ChainHomeDir, "config")},
{Name: "DATA_DIR", Value: path.Join(ChainHomeDir, "data")},
{Name: "CHAIN_HOME", Value: home},
{Name: "GENESIS_FILE", Value: path.Join(home, "config", "genesis.json")},
{Name: "CONFIG_DIR", Value: path.Join(home, "config")},
{Name: "DATA_DIR", Value: path.Join(home, "data")},
}
)
}

func initContainers(crd *cosmosv1.CosmosFullNode, moniker string) []corev1.Container {
tpl := crd.Spec.PodTemplate
binary := crd.Spec.ChainSpec.Binary
genesisCmd, genesisArgs := DownloadGenesisCommand(crd.Spec.ChainSpec)
env := envVars(crd)

initCmd := fmt.Sprintf("%s init %s --chain-id %s", binary, moniker, crd.Spec.ChainSpec.ChainID)
required := []corev1.Container{
Expand All @@ -282,7 +289,7 @@ func initContainers(crd *cosmosv1.CosmosFullNode, moniker string) []corev1.Conta
Image: infraToolImage,
Command: []string{"sh"},
Args: []string{"-c", `rm -rf "$HOME/.tmp/*"`},
Env: envVars,
Env: env,
ImagePullPolicy: tpl.ImagePullPolicy,
WorkingDir: workDir,
},
Expand All @@ -304,7 +311,7 @@ echo "Initializing into tmp dir for downstream processing..."
%s --home "$HOME/.tmp"
`, initCmd, initCmd),
},
Env: envVars,
Env: env,
ImagePullPolicy: tpl.ImagePullPolicy,
WorkingDir: workDir,
},
Expand All @@ -314,7 +321,7 @@ echo "Initializing into tmp dir for downstream processing..."
Image: infraToolImage,
Command: []string{genesisCmd},
Args: genesisArgs,
Env: envVars,
Env: env,
ImagePullPolicy: tpl.ImagePullPolicy,
WorkingDir: workDir,
},
Expand Down Expand Up @@ -342,7 +349,7 @@ config-merge -f toml "$TMP_DIR/config.toml" "$OVERLAY_DIR/config-overlay.toml" >
config-merge -f toml "$TMP_DIR/app.toml" "$OVERLAY_DIR/app-overlay.toml" > "$CONFIG_DIR/app.toml"
`,
},
Env: envVars,
Env: env,
ImagePullPolicy: tpl.ImagePullPolicy,
WorkingDir: workDir,
},
Expand All @@ -355,7 +362,7 @@ config-merge -f toml "$TMP_DIR/app.toml" "$OVERLAY_DIR/app-overlay.toml" > "$CON
Image: infraToolImage,
Command: []string{cmd},
Args: args,
Env: envVars,
Env: env,
ImagePullPolicy: tpl.ImagePullPolicy,
WorkingDir: workDir,
})
Expand All @@ -367,7 +374,7 @@ config-merge -f toml "$TMP_DIR/app.toml" "$OVERLAY_DIR/app-overlay.toml" > "$CON
func startCmdAndArgs(crd *cosmosv1.CosmosFullNode) (string, []string) {
var (
binary = crd.Spec.ChainSpec.Binary
args = startCommandArgs(crd.Spec.ChainSpec)
args = startCommandArgs(crd)
privvalSleep int32 = 10
)
if v := crd.Spec.ChainSpec.PrivvalSleepSeconds; v != nil {
Expand All @@ -383,8 +390,9 @@ func startCmdAndArgs(crd *cosmosv1.CosmosFullNode) (string, []string) {
return binary, args
}

func startCommandArgs(cfg cosmosv1.ChainSpec) []string {
args := []string{"start", "--home", ChainHomeDir}
func startCommandArgs(crd *cosmosv1.CosmosFullNode) []string {
args := []string{"start", "--home", ChainHomeDir(crd)}
cfg := crd.Spec.ChainSpec
if cfg.SkipInvariants {
args = append(args, "--x-crisis-skip-assert-invariants")
}
Expand Down
59 changes: 54 additions & 5 deletions internal/fullnode/pod_builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ func TestPodBuilder(t *testing.T) {
require.Equal(t, startContainer.Env[3].Value, "/home/operator/cosmos/config")
require.Equal(t, startContainer.Env[4].Name, "DATA_DIR")
require.Equal(t, startContainer.Env[4].Value, "/home/operator/cosmos/data")
require.Equal(t, envVars, startContainer.Env)
require.Equal(t, envVars(&crd), startContainer.Env)

healthContainer := pod.Spec.Containers[1]
require.Equal(t, "healthcheck", healthContainer.Name)
Expand Down Expand Up @@ -252,7 +252,7 @@ func TestPodBuilder(t *testing.T) {
}))

for _, c := range pod.Spec.InitContainers {
require.Equal(t, envVars, startContainer.Env, c.Name)
require.Equal(t, envVars(&crd), startContainer.Env, c.Name)
require.Equal(t, wantWrkDir, c.WorkingDir)
}

Expand All @@ -269,6 +269,38 @@ func TestPodBuilder(t *testing.T) {
require.Contains(t, mergeConfig.Args[1], `config-merge -f toml "$TMP_DIR/app.toml" "$OVERLAY_DIR/app-overlay.toml" > "$CONFIG_DIR/app.toml`)
})

t.Run("containers - configured home dir", func(t *testing.T) {
crd := defaultCRD()
crd.Spec.ChainSpec.HomeDir = ".osmosisd"
builder := NewPodBuilder(&crd)
pod, err := builder.WithOrdinal(6).Build()
require.NoError(t, err)

require.Len(t, pod.Spec.Containers, 2)

container := pod.Spec.Containers[0]
require.Equal(t, "node", container.Name)
require.Empty(t, container.ImagePullPolicy)
require.Equal(t, crd.Spec.PodTemplate.Resources, container.Resources)

require.Equal(t, container.Env[0].Name, "HOME")
require.Equal(t, container.Env[0].Value, "/home/operator")
require.Equal(t, container.Env[1].Name, "CHAIN_HOME")
require.Equal(t, container.Env[1].Value, "/home/operator/.osmosisd")
require.Equal(t, container.Env[2].Name, "GENESIS_FILE")
require.Equal(t, container.Env[2].Value, "/home/operator/.osmosisd/config/genesis.json")
require.Equal(t, container.Env[3].Name, "CONFIG_DIR")
require.Equal(t, container.Env[3].Value, "/home/operator/.osmosisd/config")
require.Equal(t, container.Env[4].Name, "DATA_DIR")
require.Equal(t, container.Env[4].Value, "/home/operator/.osmosisd/data")

require.NotEmpty(t, pod.Spec.InitContainers)

for _, c := range pod.Spec.InitContainers {
require.Equal(t, container.Env, c.Env, c.Name)
}
})

t.Run("volumes", func(t *testing.T) {
crd := defaultCRD()
builder := NewPodBuilder(&crd)
Expand Down Expand Up @@ -352,6 +384,8 @@ func TestPodBuilder(t *testing.T) {
})

t.Run("start container command", func(t *testing.T) {
const defaultHome = "/home/operator/cosmos"

cmdCrd := defaultCRD()
cmdCrd.Spec.ChainSpec.Binary = "gaiad"
cmdCrd.Spec.PodTemplate.Image = "ghcr.io/cosmoshub:v1.2.3"
Expand All @@ -363,23 +397,30 @@ func TestPodBuilder(t *testing.T) {
require.Equal(t, "ghcr.io/cosmoshub:v1.2.3", c.Image)

require.Equal(t, []string{"gaiad"}, c.Command)
require.Equal(t, []string{"start", "--home", "/home/operator/cosmos"}, c.Args)
require.Equal(t, []string{"start", "--home", defaultHome}, c.Args)

cmdCrd.Spec.ChainSpec.SkipInvariants = true
pod, err = NewPodBuilder(&cmdCrd).WithOrdinal(1).Build()
require.NoError(t, err)
c = pod.Spec.Containers[0]

require.Equal(t, []string{"gaiad"}, c.Command)
require.Equal(t, []string{"start", "--home", "/home/operator/cosmos", "--x-crisis-skip-assert-invariants"}, c.Args)
require.Equal(t, []string{"start", "--home", defaultHome, "--x-crisis-skip-assert-invariants"}, c.Args)

cmdCrd.Spec.ChainSpec.LogLevel = ptr("debug")
cmdCrd.Spec.ChainSpec.LogFormat = ptr("json")
pod, err = NewPodBuilder(&cmdCrd).WithOrdinal(1).Build()
require.NoError(t, err)
c = pod.Spec.Containers[0]

require.Equal(t, []string{"start", "--home", "/home/operator/cosmos", "--x-crisis-skip-assert-invariants", "--log_level", "debug", "--log_format", "json"}, c.Args)
require.Equal(t, []string{"start", "--home", defaultHome, "--x-crisis-skip-assert-invariants", "--log_level", "debug", "--log_format", "json"}, c.Args)

cmdCrd.Spec.ChainSpec.HomeDir = ".other"
pod, err = NewPodBuilder(&cmdCrd).WithOrdinal(1).Build()
require.NoError(t, err)

c = pod.Spec.Containers[0]
require.Equal(t, []string{"start", "--home", "/home/operator/.other", "--x-crisis-skip-assert-invariants", "--log_level", "debug", "--log_format", "json"}, c.Args)
})

t.Run("sentry start container command ", func(t *testing.T) {
Expand Down Expand Up @@ -514,6 +555,14 @@ gaiad start --home /home/operator/cosmos`
})
}

func TestChainHomeDir(t *testing.T) {
crd := defaultCRD()
require.Equal(t, "/home/operator/cosmos", ChainHomeDir(&crd))

crd.Spec.ChainSpec.HomeDir = ".gaia"
require.Equal(t, "/home/operator/.gaia", ChainHomeDir(&crd))
}

func TestPVCName(t *testing.T) {
crd := defaultCRD()
builder := NewPodBuilder(&crd)
Expand Down
4 changes: 2 additions & 2 deletions internal/fullnode/pvc_disk_usage.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (

// DiskUsager fetches disk usage statistics
type DiskUsager interface {
DiskUsage(ctx context.Context, host string) (healthcheck.DiskUsageResponse, error)
DiskUsage(ctx context.Context, host, homeDir string) (healthcheck.DiskUsageResponse, error)
}

type PVCDiskUsage struct {
Expand Down Expand Up @@ -66,7 +66,7 @@ func (c DiskUsageCollector) CollectDiskUsage(ctx context.Context, crd *cosmosv1.
pod := pods.Items[i]
cctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
resp, err := c.diskClient.DiskUsage(cctx, "http://"+pod.Status.PodIP)
resp, err := c.diskClient.DiskUsage(cctx, "http://"+pod.Status.PodIP, ChainHomeDir(crd))
if err != nil {
errs[i] = fmt.Errorf("pod %s %s: %w", pod.Name, resp.Dir, err)
return nil
Expand Down
Loading

0 comments on commit ff32bc2

Please sign in to comment.