From 5db2cde5fab2b27f6cb778ffe6e7e525891bec19 Mon Sep 17 00:00:00 2001 From: Paul Jolly Date: Thu, 15 Feb 2024 14:05:06 +0000 Subject: [PATCH] preprocessor: use a shared /caches mount in multi-step scripts For Go API guides, use of cuelang.org/go will be common. In serve mode, making changes and re-running a guide as it progresses is common. Where a dependency on cuelang.org/go exists, this requires downloading that module and its dependencies on every run because we currently start from a cold set of caches. For such guides it is therefore useful to be able to leverage at least modules download caches. WIP Preprocessor-No-Write-Cache: true Signed-off-by: Paul Jolly Change-Id: I67f17f53e68a8b719546ccf05e6767ec6f8a9e5d Dispatch-Trailer: {"type":"trybot","CL":1177000,"patchset":1,"ref":"refs/changes/00/1177000/1","targetBranch":"alpha"} --- go.mod | 2 +- .../preprocessor/cmd/_docker/entrypoint.sh | 16 +++++ internal/cmd/preprocessor/cmd/execute.go | 58 +++++++++++++++---- internal/cmd/preprocessor/cmd/execute_doc.go | 11 ++++ .../preprocessor/cmd/gen_dockerimagetag.go | 2 +- internal/cmd/preprocessor/cmd/rootfile.go | 9 +++ .../testdata/execute_multistagescript.txtar | 8 +-- 7 files changed, 90 insertions(+), 16 deletions(-) diff --git a/go.mod b/go.mod index 882406b7e..29c7322cc 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/cue-lang/cuelang.org -go 1.20 +go 1.22 require ( cuelang.org/go v0.7.0 diff --git a/internal/cmd/preprocessor/cmd/_docker/entrypoint.sh b/internal/cmd/preprocessor/cmd/_docker/entrypoint.sh index ebe3fdf04..5a9e00469 100644 --- a/internal/cmd/preprocessor/cmd/_docker/entrypoint.sh +++ b/internal/cmd/preprocessor/cmd/_docker/entrypoint.sh @@ -22,14 +22,30 @@ then exit 1 fi fi + # In case we didn't actually create a user # add the $USER_UID user user=$(getent passwd $USER_UID | cut -d: -f1) usermod -a -G $group $user +# We are still root at this point. Ensure /caches exists and that everything +# inside /caches is rwX for all. +mkdir -p /caches +chmod a+rwX /caches +if compgen -G "/caches/*" > /dev/null; then + chmod a+rwX /caches/* +fi + # Create the home dir if it does not exist mkdir -p /home/runner chown $user:$group /home/runner cd /home/runner export HOME=/home/runner + +# Set a umask to 0000 so that writes to /caches (if that is a mount) are done +# so in a way which won't conflict with users of other UIDs in other +# containers. Worst case, writes to /caches between containers will interfere +# with each other but this will be caught in CI. +umask 0000 + exec setpriv --reuid $USER_UID --regid $USER_GID --init-groups "$@" diff --git a/internal/cmd/preprocessor/cmd/execute.go b/internal/cmd/preprocessor/cmd/execute.go index e5061e2ee..c8aec0de8 100644 --- a/internal/cmd/preprocessor/cmd/execute.go +++ b/internal/cmd/preprocessor/cmd/execute.go @@ -16,14 +16,18 @@ package cmd import ( "bytes" + "context" "crypto/sha256" "fmt" "hash" "io" "os" + "os/exec" "path/filepath" "regexp" "strings" + "sync" + "time" "cuelang.org/go/cmd/cue/cmd" "cuelang.org/go/cue" @@ -35,16 +39,18 @@ type lang string const ( langEn lang = "en" - flagDir flagName = "dir" - flagDebug flagName = "debug" - flagServe flagName = "serve" - flagUpdate flagName = "update" - flagReadonlyCache flagName = "readonlycache" - flagSkipCache flagName = "skipcache" - flagHugoFlag flagName = "hugo" - flagNoWriteCache flagName = "nowritecache" - flagCheck flagName = "check" - flagList flagName = "ls" + flagDir flagName = "dir" + flagDebug flagName = "debug" + flagServe flagName = "serve" + flagUpdate flagName = "update" + flagReadonlyCache flagName = "readonlycache" + flagSkipCache flagName = "skipcache" + flagHugoFlag flagName = "hugo" + flagNoWriteCache flagName = "nowritecache" + flagCheck flagName = "check" + flagList flagName = "ls" + flagCacheVolumeName flagName = "cachevolumename" + flagNoCacheVolume flagName = "nocachevolume" ) const ( @@ -144,6 +150,34 @@ type executionContext struct { // siteSchema is a CUE schema that validates a preprocessor site siteSchema cue.Value + + // cacheVolumeName is the name of the docker volume to use for general + // purposes caches. The volume will be mounted at /caches in the container + // unless noCacheVolume is set. + cacheVolumeName string + + // noCacheVolume can be set to avoid using a shared docker volume for + // multi-step scripts. + noCacheVolume bool + + // cacheVolumeCheck ensures we run our docker cache volume check only + // once per instance of the preprocessor (which is why this field is on + // executionContext and not executeContext). It returns the name of the + // volume to use for caches if the volume exists, and an error otherwise. + cacheVolumeCheck func() (string, error) +} + +// dockerCacheVolumeCheck ensures that a docker volume volumeName exists, and +// returns an error if not. +func dockerCacheVolumeCheck(volumeName string) (string, error) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + cmd := exec.CommandContext(ctx, "docker", "volume", "create", volumeName) + out, err := cmd.CombinedOutput() + cancel() + if err != nil { + err = fmt.Errorf("failed to ensure cache volume exists via [%v]: %v\n%s", cmd, err, out) + } + return volumeName, err } // tempDir creates a new temporary directory within the @@ -215,6 +249,10 @@ func executeDef(c *Command, args []string) error { skipCache: flagSkipCache.Bool(c), noWriteCache: flagNoWriteCache.Bool(c), siteSchema: schema, + cacheVolumeCheck: sync.OnceValues(func() (string, error) { + return dockerCacheVolumeCheck(flagCacheVolumeName.String(c)) + }), + noCacheVolume: flagNoCacheVolume.Bool(c), } // Calculate which levels of debug-level logging to enable, processing each diff --git a/internal/cmd/preprocessor/cmd/execute_doc.go b/internal/cmd/preprocessor/cmd/execute_doc.go index 94764a7e7..b7b24846d 100644 --- a/internal/cmd/preprocessor/cmd/execute_doc.go +++ b/internal/cmd/preprocessor/cmd/execute_doc.go @@ -158,5 +158,16 @@ func newExecuteCmd(c *Command) *cobra.Command { cmd.Flags().Bool(string(flagCheck), false, "check CUE in page roots is properly namespaced") cmd.Flags().Bool(string(flagList), false, "list all .cue files that form part of site configuration") cmd.Flags().StringSliceVar(&hugoArgs, string(flagHugoFlag), nil, "list of flags to pass to hugo") + cmd.Flags().String(string(flagCacheVolumeName), envOrDefault("CUE_CACHE_VOLUME", "cuelang_org_caches"), "the name of the cache volume to use; this flag overrides CUE_CACHE_VOLUME") + cmd.Flags().Bool(string(flagNoCacheVolume), false, "do not use a shared docker volume cache for mult-step scripts") return cmd } + +// envOrDefault returns the value of the environment variable named by v if +// non-empty, else the value d. +func envOrDefault(v, d string) string { + if res := os.Getenv(v); res != "" { + return res + } + return d +} diff --git a/internal/cmd/preprocessor/cmd/gen_dockerimagetag.go b/internal/cmd/preprocessor/cmd/gen_dockerimagetag.go index e8797f051..59a442b40 100644 --- a/internal/cmd/preprocessor/cmd/gen_dockerimagetag.go +++ b/internal/cmd/preprocessor/cmd/gen_dockerimagetag.go @@ -2,4 +2,4 @@ package cmd -const dockerImageTag = "preprocessor:c57835e52cdeefd7917ee22e47b2d846d4c7b19107e243378aa3776e2fc3310e" +const dockerImageTag = "preprocessor:6e7cb00a0a5ab45565a39b9daa398f8fa8e2faa58a7b3baae3101c59a43764b2" diff --git a/internal/cmd/preprocessor/cmd/rootfile.go b/internal/cmd/preprocessor/cmd/rootfile.go index f55aa37e8..95255775f 100644 --- a/internal/cmd/preprocessor/cmd/rootfile.go +++ b/internal/cmd/preprocessor/cmd/rootfile.go @@ -598,6 +598,15 @@ func (m *multiStepScript) run() (runerr error) { "-v", fmt.Sprintf("%s:/scripts", scriptsDir), ) + // Ensure we have a docker cache volume if one is required + if !m.noCacheVolume { + volumeName, err := m.cacheVolumeCheck() + if err != nil { + return m.errorf("%v: failed to ensure cache volume exists: %v", m, err) + } + args = append(args, "-v", fmt.Sprintf("%s:/caches", volumeName)) + } + // We cannot perform the --network=host trick here, even if the user wants // to be unsafe, because we might, for example, run cue mod registry which // requires its own networking isolation for binding to the port it will diff --git a/internal/cmd/preprocessor/cmd/testdata/execute_multistagescript.txtar b/internal/cmd/preprocessor/cmd/testdata/execute_multistagescript.txtar index 756f17a97..e4fb98986 100644 --- a/internal/cmd/preprocessor/cmd/testdata/execute_multistagescript.txtar +++ b/internal/cmd/preprocessor/cmd/testdata/execute_multistagescript.txtar @@ -215,12 +215,12 @@ package site page: { cache: { upload: { - "upload-some-cue": "Qeh5dBYv44+X5MjEvYJg7vlFPAdlUWMv82XbC1Sz2PE=" - "upload-some-json": "UcdyFGMTs9LUPE01F0z18B1V3EABbFvRtJ2WgBWhW7Y=" - "in-subdir": "zHoWoXaq9p6lYZoULKSi7hLssBPZmLU6v3l1DfYXv/o=" + "upload-some-cue": "7IDjGd2+UaDqL/CtCzWTF/n9JAghLOal/XYmGOXLTwU=" + "upload-some-json": "r7ZxaLhkjrkTxjeWFPVjLXXQEAk5pMHlkexqx+RnHKU=" + "in-subdir": "J43PmA4U4WvVNMgjwuRuxRYV01FPIWNFmKMuvNPDah4=" } multi_step: { - "3UEAH8JLUEFNADS5TNBJMM22ME58OUESIJUIE3BI40BJ4O3O3AO0====": [{ + "8OE47KRJN899MV3TUJQ0JF5DR4IV4E6K09S3J26JI2QFPPH0A1EG====": [{ doc: """ # script doc comment #scripttag