diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index b9f6a6417..d90500b92 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -17,6 +17,11 @@ jobs: BENCHMARK_SAMPLES_NUM: 5 BENCHMARK_PERCENTILE: 95 BENCHMARK_PERCENTILES_GRANULARITY: 25 + strategy: + fail-fast: false + max-parallel: 1 + matrix: + runtime: ["podman", "containerd"] steps: - name: Install tools run: | @@ -31,9 +36,11 @@ jobs: jq '{ location : .compute.location, vmSize : .compute.vmSize }' | \ tee ${{ env.BENCHMARK_RESULT_DIR }}/instance.json - name: Run benchmark + env: + BENCHMARK_RUNTIME_MODE: ${{ matrix.runtime }} run: make benchmark - uses: actions/upload-artifact@v1 if: ${{ always() }} with: - name: benchmarking-result + name: benchmarking-result-${{ matrix.runtime }} path: ${{ env.BENCHMARK_RESULT_DIR }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index bc1995f83..85373af27 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -89,9 +89,9 @@ jobs: BUILTIN_SNAPSHOTTER: ${{ matrix.builtin }} run: make test-pullsecrets - test-cri: + test-cri-containerd: runs-on: ubuntu-20.04 - name: CRIValidation + name: CRIValidationContainerd strategy: fail-fast: false matrix: @@ -102,11 +102,19 @@ jobs: builtin: "true" steps: - uses: actions/checkout@v2 - - name: Varidate the runtime through CRI + - name: Validate containerd through CRI env: DOCKER_BUILD_ARGS: ${{ matrix.buildargs }} BUILTIN_SNAPSHOTTER: ${{ matrix.builtin }} - run: make test-cri + run: make test-cri-containerd + + test-cri-cri-o: + runs-on: ubuntu-20.04 + name: CRIValidationCRIO + steps: + - uses: actions/checkout@v2 + - name: Validate CRI-O through CRI + run: make test-cri-o # # Project checks diff --git a/Dockerfile b/Dockerfile index 32dec1df2..31e199250 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,6 +17,16 @@ ARG RUNC_VERSION=v1.0.0-rc93 ARG CNI_PLUGINS_VERSION=v0.9.1 ARG NERDCTL_VERSION=0.7.3 +ARG PODMAN_VERSION=2314af70bdacf75135a11b48b87dba8e461a43ea +ARG CRIO_VERSION=v1.20.2 +ARG CONMON_VERSION=v2.0.26 + +# TODO: switch them to the official repo after patches are merged +ARG CONTAINERS_IMAGE_REPO=https://github.com/ktock/image +ARG CONTAINERS_IMAGE_VERSION=fc75c96e7ac713f26d6de140ea6a5f5b313bf8ee + +ARG CRIO_TEST_PAUSE_IMAGE_NAME=k8s.gcr.io/pause:3.5 + # Legacy builder that doesn't support TARGETARCH should set this explicitly using --build-arg. # If TARGETARCH isn't supported by the builder, the default value is "amd64". @@ -65,6 +75,57 @@ RUN cd $GOPATH/src/github.com/containerd/stargz-snapshotter && \ PREFIX=/out/ GOARCH=${TARGETARCH:-amd64} GO_BUILD_FLAGS=${SNAPSHOTTER_BUILD_FLAGS} make containerd-stargz-grpc && \ PREFIX=/out/ GOARCH=${TARGETARCH:-amd64} GO_BUILD_FLAGS=${CTR_REMOTE_BUILD_FLAGS} make ctr-remote +# Build stargz store +FROM golang-base AS stargz-store-dev +ARG TARGETARCH +ARG GOARM +ARG SNAPSHOTTER_BUILD_FLAGS +ARG CTR_REMOTE_BUILD_FLAGS +COPY . $GOPATH/src/github.com/containerd/stargz-snapshotter +RUN cd $GOPATH/src/github.com/containerd/stargz-snapshotter && \ + PREFIX=/out/ GOARCH=${TARGETARCH:-amd64} GO_BUILD_FLAGS=${SNAPSHOTTER_BUILD_FLAGS} make stargz-store + +# Build podman +FROM golang-base AS podman-dev +ARG PODMAN_VERSION +ARG CONTAINERS_IMAGE_REPO +ARG CONTAINERS_IMAGE_VERSION +RUN apt-get update -y && apt-get install -y libseccomp-dev libgpgme-dev && \ + git clone ${CONTAINERS_IMAGE_REPO} $GOPATH/src/github.com/containers/image && \ + cd $GOPATH/src/github.com/containers/image && \ + git checkout ${CONTAINERS_IMAGE_VERSION} && \ + git clone https://github.com/containers/podman $GOPATH/src/github.com/containers/podman && \ + cd $GOPATH/src/github.com/containers/podman && \ + git checkout ${PODMAN_VERSION} && \ + sed -i "s/-mod=vendor//g" $GOPATH/src/github.com/containers/podman/Makefile && \ + echo "replace github.com/containers/image/v5 => /go/src/github.com/containers/image" >> $GOPATH/src/github.com/containers/podman/go.mod && \ + make && make install PREFIX=/out/ + +# Build CRI-O +FROM golang-base AS cri-o-dev +ARG CRIO_VERSION +ARG CONTAINERS_IMAGE_REPO +ARG CONTAINERS_IMAGE_VERSION +RUN apt-get update -y && apt-get install -y libseccomp-dev libgpgme-dev && \ + git clone ${CONTAINERS_IMAGE_REPO} $GOPATH/src/github.com/containers/image && \ + cd $GOPATH/src/github.com/containers/image && \ + git checkout ${CONTAINERS_IMAGE_VERSION} && \ + git clone https://github.com/cri-o/cri-o $GOPATH/src/github.com/cri-o/cri-o && \ + cd $GOPATH/src/github.com/cri-o/cri-o && \ + git checkout ${CRIO_VERSION} && \ + echo "replace github.com/containers/image/v5 => /go/src/github.com/containers/image" >> $GOPATH/src/github.com/cri-o/cri-o/go.mod && \ + go mod vendor && \ + make && make install PREFIX=/out/ + +# Build conmon +FROM golang-base AS conmon-dev +ARG CONMON_VERSION +RUN apt-get update -y && apt-get install -y gcc git libc6-dev libglib2.0-dev pkg-config make && \ + git clone -b ${CONMON_VERSION} --depth 1 \ + https://github.com/containers/conmon $GOPATH/src/github.com/containers/conmon && \ + cd $GOPATH/src/github.com/containers/conmon && \ + mkdir /out/ && make && make install PREFIX=/out/ + # Binaries for release FROM scratch AS release-binaries COPY --from=snapshotter-dev /out/* / @@ -98,6 +159,36 @@ COPY --from=runc-dev /out/sbin/* /usr/local/sbin/ COPY --from=snapshotter-dev /out/ctr-remote /usr/local/bin/ RUN ln -s /usr/local/bin/ctr-remote /usr/local/bin/ctr +# Base image which contains podman with stargz-store +FROM golang-base AS podman-base +ARG TARGETARCH +ARG CNI_PLUGINS_VERSION +ARG PODMAN_VERSION +RUN apt-get update -y && apt-get --no-install-recommends install -y fuse libgpgme-dev \ + iptables libyajl-dev && \ + # Make CNI plugins manipulate iptables instead of nftables + # as this test runs in a Docker container that network is configured with iptables. + # c.f. https://github.com/moby/moby/issues/26824 + update-alternatives --set iptables /usr/sbin/iptables-legacy && \ + mkdir -p /etc/containers /etc/cni/net.d /opt/cni/bin && \ + curl -qsSL https://raw.githubusercontent.com/containers/podman/${PODMAN_VERSION}/cni/87-podman-bridge.conflist | tee /etc/cni/net.d/87-podman-bridge.conflist && \ + curl -Ls https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/cni-plugins-linux-${TARGETARCH:-amd64}-${CNI_PLUGINS_VERSION}.tgz | tar xzv -C /opt/cni/bin + +COPY --from=podman-dev /out/bin/* /usr/local/bin/ +COPY --from=runc-dev /out/sbin/* /usr/local/sbin/ +COPY --from=conmon-dev /out/bin/* /usr/local/bin/ +COPY --from=stargz-store-dev /out/* /usr/local/bin/ + +# Image which can be used as all-in-one single node demo environment +FROM snapshotter-base AS cind +COPY ./script/config/ / +COPY ./script/cind/ / +VOLUME /var/lib/containerd +VOLUME /var/lib/containerd-stargz-grpc +VOLUME /run/containerd-stargz-grpc +ENV CONTAINERD_SNAPSHOTTER=stargz +ENTRYPOINT [ "/entrypoint.sh" ] + # Image which can be used for interactive demo environment FROM containerd-base AS demo ARG CNI_PLUGINS_VERSION @@ -118,6 +209,32 @@ COPY ./script/config/ / RUN apt-get update -y && apt-get install --no-install-recommends -y fuse ENTRYPOINT [ "/usr/local/bin/entrypoint", "/sbin/init" ] +# Image for testing CRI-O with Stargz Store. +# NOTE: This cannot be used for the node image of KinD. +FROM ubuntu:20.04 AS crio-stargz-store +ARG CRIO_VERSION +ARG CNI_PLUGINS_VERSION +ARG CRIO_TEST_PAUSE_IMAGE_NAME +RUN apt-get update -y && apt-get install --no-install-recommends -y \ + ca-certificates fuse libgpgme-dev libglib2.0-dev curl \ + iptables conntrack && \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y tzdata && \ + # Make CNI plugins manipulate iptables instead of nftables + # as this test runs in a Docker container that network is configured with iptables. + # c.f. https://github.com/moby/moby/issues/26824 + update-alternatives --set iptables /usr/sbin/iptables-legacy && \ + mkdir -p /opt/cni/bin && \ + curl -sSL https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/cni-plugins-linux-${TARGETARCH:-amd64}-${CNI_PLUGINS_VERSION}.tgz | tar xzv -C /opt/cni/bin && \ + echo ${CRIO_TEST_PAUSE_IMAGE_NAME} > /pause_name + +COPY --from=stargz-store-dev /out/* /usr/local/bin/ +COPY --from=cri-o-dev /out/bin/* /usr/local/bin/ +COPY --from=runc-dev /out/sbin/* /usr/local/sbin/ +COPY --from=conmon-dev /out/bin/* /usr/local/bin/ +COPY ./script/cri-o/config/ / + +ENTRYPOINT [ "/usr/local/bin/entrypoint", "/bin/bash" ] + # Image which can be used as a node image for KinD FROM kindest/node:v1.20.0 COPY --from=containerd-dev /out/bin/containerd /out/bin/containerd-shim-runc-v2 /usr/local/bin/ diff --git a/Makefile b/Makefile index 1f5c29541..6a46b2272 100644 --- a/Makefile +++ b/Makefile @@ -23,7 +23,7 @@ VERSION=$(shell git describe --match 'v[0-9]*' --dirty='.m' --always --tags) REVISION=$(shell git rev-parse HEAD)$(shell if ! git diff --no-ext-diff --quiet --exit-code; then echo .m; fi) GO_LD_FLAGS=-ldflags '-s -w -X $(PKG)/version.Version=$(VERSION) -X $(PKG)/version.Revision=$(REVISION) $(GO_EXTRA_LDFLAGS)' -CMD=containerd-stargz-grpc ctr-remote +CMD=containerd-stargz-grpc ctr-remote stargz-store CMD_BINARIES=$(addprefix $(PREFIX),$(CMD)) @@ -41,6 +41,9 @@ containerd-stargz-grpc: FORCE ctr-remote: FORCE GO111MODULE=$(GO111MODULE_VALUE) go build -o $(PREFIX)$@ $(GO_BUILD_FLAGS) $(GO_LD_FLAGS) -v ./cmd/ctr-remote +stargz-store: FORCE + GO111MODULE=$(GO111MODULE_VALUE) go build -o $(PREFIX)$@ $(GO_BUILD_FLAGS) $(GO_LD_FLAGS) -v ./cmd/stargz-store + check: @echo "$@" @GO111MODULE=$(GO111MODULE_VALUE) golangci-lint run @@ -85,5 +88,8 @@ benchmark: test-pullsecrets: @./script/pullsecrets/test.sh -test-cri: - @./script/cri/test.sh +test-cri-containerd: + @./script/cri-containerd/test.sh + +test-cri-o: + @./script/cri-o/test.sh diff --git a/cmd/stargz-store/fs.go b/cmd/stargz-store/fs.go new file mode 100644 index 000000000..f3a748d81 --- /dev/null +++ b/cmd/stargz-store/fs.go @@ -0,0 +1,321 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "context" + "encoding/base64" + "syscall" + + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/reference" + fusefs "github.com/hanwen/go-fuse/v2/fs" + "github.com/hanwen/go-fuse/v2/fuse" + digest "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" +) + +const ( + defaultLinkMode = syscall.S_IFLNK | 0400 // -r-------- + defaultDirMode = syscall.S_IFDIR | 0500 // dr-x------ + + poolLink = "pool" + layerLink = "diff" + debugManifestLink = "manifest" + debugConfigLink = "config" + layerInfoLink = "info" + layerUseFile = "use" +) + +// node is a filesystem inode abstraction. +type rootnode struct { + fusefs.Inode + pool *pool +} + +var _ = (fusefs.InodeEmbedder)((*rootnode)(nil)) + +var _ = (fusefs.NodeLookuper)((*rootnode)(nil)) + +// Lookup loads manifest and config of specified name (imgae reference) +// and returns refnode of the specified name +func (n *rootnode) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fusefs.Inode, syscall.Errno) { + switch name { + case poolLink: + return n.NewInode(ctx, + &linknode{linkname: n.pool.root()}, defaultLinkAttr(&out.Attr)), 0 + } + refBytes, err := base64.StdEncoding.DecodeString(name) + if err != nil { + log.G(ctx).WithError(err).Debugf("failed to decode ref base64 %q", name) + return nil, syscall.EINVAL + } + ref := string(refBytes) + refspec, err := reference.Parse(ref) + if err != nil { + log.G(ctx).WithError(err).Warnf("invalid reference %q for %q", ref, name) + return nil, syscall.EINVAL + } + manifest, mPath, config, cPath, err := n.pool.loadManifestAndConfig(ctx, refspec) + if err != nil { + log.G(ctx).WithError(err). + Warnf("failed to fetch manifest and config of %q(%q)", ref, name) + return nil, syscall.EIO + } + return n.NewInode(ctx, &refnode{ + pool: n.pool, + ref: refspec, + manifest: manifest, + manifestPath: mPath, + config: config, + configPath: cPath, + }, defaultDirAttr(&out.Attr)), 0 +} + +// node is a filesystem inode abstraction. +type refnode struct { + fusefs.Inode + pool *pool + + ref reference.Spec + manifest ocispec.Manifest + manifestPath string + config ocispec.Image + configPath string +} + +var _ = (fusefs.InodeEmbedder)((*refnode)(nil)) + +var _ = (fusefs.NodeLookuper)((*refnode)(nil)) + +// Lookup returns layernode of the specified name +func (n *refnode) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fusefs.Inode, syscall.Errno) { + switch name { + case debugManifestLink: + return n.NewInode(ctx, + &linknode{linkname: n.manifestPath}, defaultLinkAttr(&out.Attr)), 0 + case debugConfigLink: + return n.NewInode(ctx, + &linknode{linkname: n.configPath}, defaultLinkAttr(&out.Attr)), 0 + } + targetDigest, err := digest.Parse(name) + if err != nil { + log.G(ctx).WithError(err).Warnf("invalid digest for %q", name) + return nil, syscall.EINVAL + } + var layer *ocispec.Descriptor + for _, l := range n.manifest.Layers { + if l.Digest == targetDigest { + layer = &l + break + } + } + if layer == nil { + log.G(ctx).WithError(err).Warnf("invalid digest for %q: %q", name, targetDigest.String()) + return nil, syscall.EINVAL + } + return n.NewInode(ctx, &layernode{ + pool: n.pool, + layer: *layer, + layers: n.manifest.Layers, + refnode: n, + }, defaultDirAttr(&out.Attr)), 0 +} + +var _ = (fusefs.NodeRmdirer)((*refnode)(nil)) + +// Rmdir marks this layer as "release". +// We don't use layernode.Unlink because Unlink event doesn't reach here when "use" file isn't visible +// to the filesystem client. +func (n *refnode) Rmdir(ctx context.Context, name string) syscall.Errno { + if name == debugManifestLink || name == debugConfigLink { + return syscall.EROFS // nop + } + targetDigest, err := digest.Parse(name) + if err != nil { + log.G(ctx).WithError(err).Warnf("invalid digest for %q during release", name) + return syscall.EINVAL + } + current, err := n.pool.release(n.ref, targetDigest) + if err != nil { + log.G(ctx).WithError(err).Warnf("failed to release layer %v / %v", n.ref, targetDigest) + return syscall.EIO + + } + log.G(ctx).WithField("refcounter", current).Warnf("layer %v / %v is marked as RELEASE", n.ref, targetDigest) + return syscall.ENOENT +} + +// node is a filesystem inode abstraction. +type layernode struct { + fusefs.Inode + pool *pool + + layer ocispec.Descriptor + layers []ocispec.Descriptor + + refnode *refnode +} + +var _ = (fusefs.InodeEmbedder)((*layernode)(nil)) + +var _ = (fusefs.NodeCreater)((*layernode)(nil)) + +// Create marks this layer as "using". +// We don't use refnode.Mkdir because Mkdir event doesn't reach here if layernode already exists. +func (n *layernode) Create(ctx context.Context, name string, flags uint32, mode uint32, out *fuse.EntryOut) (node *fusefs.Inode, fh fusefs.FileHandle, fuseFlags uint32, errno syscall.Errno) { + if name == layerUseFile { + current := n.pool.use(n.refnode.ref, n.layer.Digest) + log.G(ctx).WithField("refcounter", current).Warnf("layer %v / %v is marked as USING", + n.refnode.ref, n.layer.Digest) + } + + // TODO: implement cleanup + return nil, nil, 0, syscall.ENOENT +} + +var _ = (fusefs.NodeLookuper)((*layernode)(nil)) + +// Lookup routes to the target file stored in the pool, based on the specified file name. +func (n *layernode) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fusefs.Inode, syscall.Errno) { + switch name { + case layerInfoLink: + var err error + infopath, err := n.pool.loadLayerInfo(ctx, n.refnode.ref, n.layer.Digest) + if err != nil { + log.G(ctx).WithError(err). + Warnf("failed to get layer info for %q: %q", name, n.layer.Digest) + return nil, syscall.EIO + } + return n.NewInode(ctx, &linknode{linkname: infopath}, defaultLinkAttr(&out.Attr)), 0 + case layerLink: + l, err := n.pool.loadLayer(ctx, n.refnode.ref, n.layer, n.layers) + if err != nil { + cErr := ctx.Err() + if errors.Is(cErr, context.Canceled) || errors.Is(err, context.Canceled) { + // When filesystem client canceled to lookup this layer, + // do not log this as "preparation failure" because it's + // intensional. + log.G(ctx).WithError(err). + Debugf("error resolving layer (context error: %v)", cErr) + return nil, syscall.EIO + } + log.G(ctx).WithField(remoteSnapshotLogKey, prepareFailed). + WithField("layerdigest", n.layer.Digest). + WithError(err). + Debugf("error resolving layer (context error: %v)", cErr) + log.G(ctx).WithError(err).Warnf("failed to mount layer %q: %q", + name, n.layer.Digest) + return nil, syscall.EIO + } + ln, err := l.RootNode() + if err != nil { + log.G(ctx).WithField(remoteSnapshotLogKey, prepareFailed). + WithField("layerdigest", n.layer.Digest). + WithError(err). + Debugf("failed to get root node") + return nil, syscall.EIO + } + var ao fuse.AttrOut + if errno := ln.(fusefs.NodeGetattrer).Getattr(ctx, nil, &ao); errno != 0 { + log.G(ctx).WithField(remoteSnapshotLogKey, prepareFailed). + WithField("layerdigest", n.layer.Digest). + WithError(err). + Debugf("failed to get root node") + return nil, errno + } + copyAttr(&out.Attr, &ao.Attr) + return n.NewInode(ctx, ln, fusefs.StableAttr{ + Mode: out.Attr.Mode, + Ino: out.Attr.Ino, + }), 0 + case layerUseFile: + log.G(ctx).Debugf("\"use\" file is referred but return ENOENT for reference management") + return nil, syscall.ENOENT + default: + log.G(ctx).Warnf("unknown filename %q", name) + return nil, syscall.ENOENT + } +} + +type linknode struct { + fusefs.Inode + linkname string +} + +var _ = (fusefs.InodeEmbedder)((*linknode)(nil)) + +var _ = (fusefs.NodeReadlinker)((*linknode)(nil)) + +func (n *linknode) Readlink(ctx context.Context) ([]byte, syscall.Errno) { + return []byte(n.linkname), 0 // TODO: linkname shouldn't statically embedded? +} + +func copyAttr(dest, src *fuse.Attr) { + dest.Ino = src.Ino + dest.Size = src.Size + dest.Blocks = src.Blocks + dest.Atime = src.Atime + dest.Mtime = src.Mtime + dest.Ctime = src.Ctime + dest.Atimensec = src.Atimensec + dest.Mtimensec = src.Mtimensec + dest.Ctimensec = src.Ctimensec + dest.Mode = src.Mode + dest.Nlink = src.Nlink + dest.Owner = src.Owner + dest.Rdev = src.Rdev + dest.Blksize = src.Blksize + dest.Padding = src.Padding +} + +func defaultDirAttr(out *fuse.Attr) fusefs.StableAttr { + // out.Ino + out.Size = 0 + // out.Blksize + // out.Blocks + // out.Nlink + out.Mode = defaultDirMode + out.Owner = fuse.Owner{Uid: 0, Gid: 0} + // out.Mtime + // out.Mtimensec + // out.Rdev + // out.Padding + + return fusefs.StableAttr{ + Mode: out.Mode, + } +} + +func defaultLinkAttr(out *fuse.Attr) fusefs.StableAttr { + // out.Ino + out.Size = 0 + // out.Blksize + // out.Blocks + // out.Nlink + out.Mode = defaultLinkMode + out.Owner = fuse.Owner{Uid: 0, Gid: 0} + // out.Mtime + // out.Mtimensec + // out.Rdev + // out.Padding + + return fusefs.StableAttr{ + Mode: out.Mode, + } +} diff --git a/cmd/stargz-store/main.go b/cmd/stargz-store/main.go new file mode 100644 index 000000000..8ac7e995e --- /dev/null +++ b/cmd/stargz-store/main.go @@ -0,0 +1,263 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "context" + "flag" + golog "log" + "net/http" + "os" + "os/signal" + "path/filepath" + "syscall" + "time" + + "github.com/BurntSushi/toml" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/remotes/docker" + "github.com/containerd/stargz-snapshotter/fs/config" + "github.com/containerd/stargz-snapshotter/fs/layer" + fsmetrics "github.com/containerd/stargz-snapshotter/fs/metrics" + "github.com/containerd/stargz-snapshotter/service/keychain" + "github.com/containerd/stargz-snapshotter/task" + sddaemon "github.com/coreos/go-systemd/v22/daemon" + metrics "github.com/docker/go-metrics" + fusefs "github.com/hanwen/go-fuse/v2/fs" + "github.com/hanwen/go-fuse/v2/fuse" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +const ( + defaultLogLevel = logrus.InfoLevel + defaultRootDir = "/var/lib/stargz-store" + defaultMaxConcurrency = 2 +) + +var ( + configPath = flag.String("config", "", "path to the configuration file") + logLevel = flag.String("log-level", defaultLogLevel.String(), "set the logging level [trace, debug, info, warn, error, fatal, panic]") + rootDir = flag.String("root", defaultRootDir, "path to the root directory for this snapshotter") +) + +type Config struct { + config.Config + + // KubeconfigKeychainConfig is config for kubeconfig-based keychain. + KubeconfigKeychainConfig `toml:"kubeconfig_keychain"` + + // ResolverConfig is config for resolving registries. + ResolverConfig `toml:"resolver"` +} + +type KubeconfigKeychainConfig struct { + EnableKeychain bool `toml:"enable_keychain"` + KubeconfigPath string `toml:"kubeconfig_path"` +} + +type ResolverConfig struct { + Host map[string]HostConfig `toml:"host"` +} + +type HostConfig struct { + Mirrors []MirrorConfig `toml:"mirrors"` +} + +type MirrorConfig struct { + Host string `toml:"host"` + Insecure bool `toml:"insecure"` +} + +func main() { + flag.Parse() + mountPoint := flag.Arg(0) + lvl, err := logrus.ParseLevel(*logLevel) + if err != nil { + log.L.WithError(err).Fatal("failed to prepare logger") + } + logrus.SetLevel(lvl) + logrus.SetFormatter(&logrus.JSONFormatter{ + TimestampFormat: log.RFC3339NanoFixed, + }) + var ( + ctx = log.WithLogger(context.Background(), log.L) + config Config + ) + // Streams log of standard lib (go-fuse uses this) into debug log + // Snapshotter should use "github.com/containerd/containerd/log" otherwize + // logs are always printed as "debug" mode. + golog.SetOutput(log.G(ctx).WriterLevel(logrus.DebugLevel)) + + if mountPoint == "" { + log.G(ctx).Fatalf("mount point must be specified") + } + + // Get configuration from specified file + if *configPath != "" { + if _, err := toml.DecodeFile(*configPath, &config); err != nil { + log.G(ctx).WithError(err).Fatalf("failed to load config file %q", *configPath) + } + } + + // Prepare kubeconfig-based keychain if required + credsFuncs := []func(string) (string, string, error){keychain.NewDockerconfigKeychain(ctx)} + if config.KubeconfigKeychainConfig.EnableKeychain { + var opts []keychain.KubeconfigOption + if kcp := config.KubeconfigKeychainConfig.KubeconfigPath; kcp != "" { + opts = append(opts, keychain.WithKubeconfigPath(kcp)) + } + credsFuncs = append(credsFuncs, keychain.NewKubeconfigKeychain(ctx, opts...)) + } + + // Use RegistryHosts based on ResolverConfig and keychain + hosts := hostsFromConfig(config.ResolverConfig, credsFuncs...) + + // Configure and mount filesystem + if _, err := os.Stat(mountPoint); err != nil { + if err2 := os.MkdirAll(mountPoint, 0755); err2 != nil && !os.IsExist(err2) { + log.G(ctx).WithError(err).WithError(err2). + Fatalf("failed to prepare mountpoint %q", mountPoint) + } + } + if err := mountStore(mountPoint, *rootDir, hosts, config.Config); err != nil { + log.G(ctx).WithError(err).Fatalf("failed to mount fs at %q", mountPoint) + } + defer func() { + syscall.Unmount(mountPoint, 0) + log.G(ctx).Info("Exiting") + }() + + if os.Getenv("NOTIFY_SOCKET") != "" { + notified, notifyErr := sddaemon.SdNotify(false, sddaemon.SdNotifyReady) + log.G(ctx).Debugf("SdNotifyReady notified=%v, err=%v", notified, notifyErr) + } + defer func() { + if os.Getenv("NOTIFY_SOCKET") != "" { + notified, notifyErr := sddaemon.SdNotify(false, sddaemon.SdNotifyStopping) + log.G(ctx).Debugf("SdNotifyStopping notified=%v, err=%v", notified, notifyErr) + } + }() + + waitForSIGINT() + log.G(ctx).Info("Got SIGINT") +} + +func waitForSIGINT() { + c := make(chan os.Signal, 1) + signal.Notify(c, os.Interrupt) + <-c +} + +func hostsFromConfig(cfg ResolverConfig, credsFuncs ...func(string) (string, string, error)) docker.RegistryHosts { + return func(host string) (hosts []docker.RegistryHost, _ error) { + for _, h := range append(cfg.Host[host].Mirrors, MirrorConfig{ + Host: host, + }) { + tr := &http.Client{Transport: http.DefaultTransport.(*http.Transport).Clone()} + config := docker.RegistryHost{ + Client: tr, + Host: h.Host, + Scheme: "https", + Path: "/v2", + Capabilities: docker.HostCapabilityPull | docker.HostCapabilityResolve, + Authorizer: docker.NewDockerAuthorizer( + docker.WithAuthClient(tr), + docker.WithAuthCreds(func(host string) (string, string, error) { + for _, f := range credsFuncs { + if username, secret, err := f(host); err != nil { + return "", "", err + } else if !(username == "" && secret == "") { + return username, secret, nil + } + } + return "", "", nil + })), + } + if localhost, _ := docker.MatchLocalhost(config.Host); localhost || h.Insecure { + config.Scheme = "http" + } + if config.Host == "docker.io" { + config.Host = "registry-1.docker.io" + } + hosts = append(hosts, config) + } + return + } +} + +func mountStore(mountpoint, root string, hosts docker.RegistryHosts, cfg config.Config) error { + var ( + fsroot = filepath.Join(root, "stargz") + poolroot = filepath.Join(root, "pool") + ) + if err := os.MkdirAll(fsroot, 0700); err != nil { + return err + } + if err := os.MkdirAll(poolroot, 0700); err != nil { + return err + } + + maxConcurrency := cfg.MaxConcurrency + if maxConcurrency == 0 { + maxConcurrency = defaultMaxConcurrency + } + tm := task.NewBackgroundTaskManager(maxConcurrency, 5*time.Second) + r, err := layer.NewResolver(root, tm, cfg) + if err != nil { + return errors.Wrapf(err, "failed to setup resolver") + } + var ns *metrics.Namespace + if !cfg.NoPrometheus { + ns = metrics.NewNamespace("stargz", "fs", nil) + } + c := fsmetrics.NewLayerMetrics(ns) + if ns != nil { + metrics.Register(ns) + } + timeSec := time.Second + rawFS := fusefs.NewNodeFS(&rootnode{ + pool: &pool{ + path: poolroot, + layer: make(map[string]layer.Layer), + hosts: hosts, + refcounter: make(map[string]map[string]int), + resolver: r, + prefetchSize: cfg.PrefetchSize, + noprefetch: cfg.NoPrefetch, + noBackgroundFetch: cfg.NoBackgroundFetch, + backgroundTaskManager: tm, + allowNoVerification: cfg.AllowNoVerification, + disableVerification: cfg.DisableVerification, + metricsController: c, + }, + }, &fusefs.Options{ + AttrTimeout: &timeSec, + EntryTimeout: &timeSec, + NullPermissions: true, + }) + server, err := fuse.NewServer(rawFS, mountpoint, &fuse.MountOptions{ + AllowOther: true, // allow users other than root&mounter to access fs + Options: []string{"suid"}, // allow setuid inside container + Debug: cfg.Debug, + }) + if err != nil { + return err + } + go server.Serve() + return server.WaitMount() +} diff --git a/cmd/stargz-store/pool.go b/cmd/stargz-store/pool.go new file mode 100644 index 000000000..d2eb83594 --- /dev/null +++ b/cmd/stargz-store/pool.go @@ -0,0 +1,493 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + "time" + + "github.com/containerd/containerd/images" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/platforms" + "github.com/containerd/containerd/reference" + "github.com/containerd/containerd/remotes" + "github.com/containerd/containerd/remotes/docker" + "github.com/containerd/stargz-snapshotter/estargz" + "github.com/containerd/stargz-snapshotter/fs/layer" + fsmetrics "github.com/containerd/stargz-snapshotter/fs/metrics" + "github.com/containerd/stargz-snapshotter/task" + "github.com/containers/storage/pkg/archive" + digest "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" + "golang.org/x/sync/singleflight" +) + +const ( + // remoteSnapshotLogKey is a key for log line, which indicates whether + // `Prepare` method successfully prepared targeting remote snapshot or not, as + // defined in the following: + // - "true" : indicates the snapshot has been successfully prepared as a + // remote snapshot + // - "false" : indicates the snapshot failed to be prepared as a remote + // snapshot + // - null : undetermined + remoteSnapshotLogKey = "remote-snapshot-prepared" + prepareSucceeded = "true" + prepareFailed = "false" +) + +// pool provides manifests, configs and layers of images. +// This also manages caches for these resources. +type pool struct { + path string + layer map[string]layer.Layer + layerMu sync.Mutex + hosts docker.RegistryHosts + refcounter map[string]map[string]int + refcounterMu sync.Mutex + + resolver *layer.Resolver + prefetchSize int64 + noprefetch bool + noBackgroundFetch bool + backgroundTaskManager *task.BackgroundTaskManager + allowNoVerification bool + disableVerification bool + metricsController *fsmetrics.Controller + resolveG singleflight.Group +} + +func (p *pool) root() string { + return p.path +} + +func (p *pool) metadataDir(refspec reference.Spec) string { + return filepath.Join(p.path, + "metadata--"+colon2dash(digest.FromString(refspec.String()).String())) +} + +func (p *pool) manifestFile(refspec reference.Spec) string { + return filepath.Join(p.metadataDir(refspec), "manifest") +} + +func (p *pool) configFile(refspec reference.Spec) string { + return filepath.Join(p.metadataDir(refspec), "config") +} + +func (p *pool) layerInfoFile(refspec reference.Spec, dgst digest.Digest) string { + return filepath.Join(p.metadataDir(refspec), colon2dash(dgst.String())) +} + +func (p *pool) loadManifestAndConfig(ctx context.Context, refspec reference.Spec) (manifest ocispec.Manifest, mPath string, config ocispec.Image, cPath string, err error) { + manifest, mPath, config, cPath, err = p.readManifestAndConfig(refspec) + if err == nil { + log.G(ctx).Debugf("reusing manifest and config of %q", refspec.String()) + return + } + log.G(ctx).WithError(err).Debugf("fetching manifest and config of %q", refspec.String()) + manifest, config, err = fetchManifestAndConfig(ctx, p.hosts, refspec) + if err != nil { + return ocispec.Manifest{}, "", ocispec.Image{}, "", err + } + mPath, cPath, err = p.writeManifestAndConfig(refspec, manifest, config) + if err != nil { + return ocispec.Manifest{}, "", ocispec.Image{}, "", err + } + return manifest, mPath, config, cPath, err +} + +func (p *pool) loadLayerInfo(ctx context.Context, refspec reference.Spec, dgst digest.Digest) (layerInfoPath string, err error) { + layerInfoPath = p.layerInfoFile(refspec, dgst) + if _, err := os.Stat(layerInfoPath); err == nil { + log.G(ctx).Debugf("reusing layer info of %q/%q: %q", + refspec.String(), dgst.String(), layerInfoPath) + return layerInfoPath, nil + } + manifest, _, config, _, err := p.loadManifestAndConfig(ctx, refspec) + if err != nil { + return "", errors.Wrapf(err, "failed to get manifest and config") + } + info, err := genLayerInfo(dgst, manifest, config) + if err != nil { + return "", errors.Wrapf(err, "failed to generate layer info") + } + if err := os.MkdirAll(filepath.Dir(layerInfoPath), 0700); err != nil { + return "", err + } + infoF, err := os.Create(layerInfoPath) // TODO: file mode + if err != nil { + return "", err + } + defer infoF.Close() + return layerInfoPath, json.NewEncoder(infoF).Encode(&info) +} + +func (p *pool) loadLayer(ctx context.Context, refspec reference.Spec, target ocispec.Descriptor, preResolve []ocispec.Descriptor) (layer.Layer, error) { + var ( + result layer.Layer + resultChan = make(chan layer.Layer) + errChan = make(chan error) + ) + + for _, l := range append([]ocispec.Descriptor{target}, preResolve...) { + l := l + // Prevents the goroutine to run + key := refspec.String() + "/" + l.Digest.String() + p.layerMu.Lock() + gotL, ok := p.layer[key] + p.layerMu.Unlock() + if ok { + // Layer already resolved + if l.Digest.String() != target.Digest.String() { + continue // This is not the target layer; nop + } + result = gotL + } + go func() { + // Avoids to get canceled by client. + ctx := context.Background() + gotL, err := p.resolveLayer(ctx, refspec, l) + if l.Digest.String() != target.Digest.String() { + return // This is not target layer + } + if err != nil { + errChan <- errors.Wrapf(err, "failed to resolve layer %q / %q", + refspec, l.Digest) + return + } + // Log this as preparation success + log.G(ctx).WithField(remoteSnapshotLogKey, prepareSucceeded). + Debugf("successfully resolved layer") + resultChan <- gotL + }() + } + + if result != nil { + return result, nil + } + + // Wait for resolving completion + var l layer.Layer + select { + case l = <-resultChan: + case err := <-errChan: + log.G(ctx).WithError(err).Debug("failed to resolve layer") + return nil, errors.Wrapf(err, "failed to resolve layer") + case <-time.After(30 * time.Second): + log.G(ctx).Debug("failed to resolve layer (timeout)") + return nil, fmt.Errorf("failed to resolve layer (timeout)") + } + + return l, nil +} + +func (p *pool) resolveLayer(ctx context.Context, refspec reference.Spec, target ocispec.Descriptor) (layer.Layer, error) { + key := refspec.String() + "/" + target.Digest.String() + + p.layerMu.Lock() + gotL, ok := p.layer[key] + p.layerMu.Unlock() + if ok { + return gotL, nil + } + + resultChan := p.resolveG.DoChan(key, func() (interface{}, error) { + return p.resolve(ctx, refspec, target) + }) + var res singleflight.Result + select { + case res = <-resultChan: + case <-time.After(30 * time.Second): + p.resolveG.Forget(key) + return nil, fmt.Errorf("failed to resolve layer (timeout)") + } + if res.Err != nil || res.Val == nil { + return nil, fmt.Errorf("failed to resolve layer: %v", res.Err) + } + + l := res.Val.(layer.Layer) + p.layerMu.Lock() + p.layer[key] = l + p.layerMu.Unlock() + p.metricsController.Add(key, l) + + return l, nil +} + +func (p *pool) resolve(ctx context.Context, refspec reference.Spec, target ocispec.Descriptor) (layer.Layer, error) { + l, err := p.resolver.Resolve(ctx, p.hosts, refspec, target) + if err != nil { + return nil, err + } + + // Verify layer's content + labels := target.Annotations + if labels == nil { + labels = make(map[string]string) + } + if p.disableVerification { + // Skip if verification is disabled completely + l.SkipVerify() + log.G(ctx).Debugf("Verification forcefully skipped") + } else if tocDigest, ok := labels[estargz.TOCJSONDigestAnnotation]; ok { + // Verify this layer using the TOC JSON digest passed through label. + dgst, err := digest.Parse(tocDigest) + if err != nil { + log.G(ctx).WithError(err).Debugf("failed to parse passed TOC digest %q", dgst) + return nil, errors.Wrapf(err, "invalid TOC digest: %v", tocDigest) + } + if err := l.Verify(dgst); err != nil { + log.G(ctx).WithError(err).Debugf("invalid layer") + return nil, errors.Wrapf(err, "invalid stargz layer") + } + log.G(ctx).Debugf("verified") + } else { + // Verification must be done. Don't mount this layer. + return nil, fmt.Errorf("digest of TOC JSON must be passed") + } + + // Prefetch this layer. We prefetch several layers in parallel. The first + // Check() for this layer waits for the prefetch completion. + if !p.noprefetch { + go func() { + p.backgroundTaskManager.DoPrioritizedTask() + defer p.backgroundTaskManager.DonePrioritizedTask() + if err := l.Prefetch(p.prefetchSize); err != nil { + log.G(ctx).WithError(err).Debug("failed to prefetched layer") + return + } + log.G(ctx).Debug("completed to prefetch") + }() + } + + // Fetch whole layer aggressively in background. We use background + // reader for this so prioritized tasks(Mount, Check, etc...) can + // interrupt the reading. This can avoid disturbing prioritized tasks + // about NW traffic. + if !p.noBackgroundFetch { + go func() { + if err := l.BackgroundFetch(); err != nil { + log.G(ctx).WithError(err).Debug("failed to fetch whole layer") + return + } + log.G(ctx).Debug("completed to fetch all layer data in background") + }() + } + + return l, nil +} + +func (p *pool) release(ref reference.Spec, dgst digest.Digest) (int, error) { + // TODO: implement GC + targetRef := ref.String() + targetDgst := dgst.String() + p.refcounterMu.Lock() + defer p.refcounterMu.Unlock() + if _, ok := p.refcounter[targetRef]; !ok { + return 0, fmt.Errorf("ref %q not found during release", targetRef) + } + if c, ok := p.refcounter[targetRef][targetDgst]; !ok { + return 0, fmt.Errorf("layer %q/%q not found during release", targetRef, targetDgst) + } else if c <= 0 { + return 0, fmt.Errorf("layer %q/%q isn't used", targetRef, targetDgst) + } + p.refcounter[targetRef][targetDgst]-- + return p.refcounter[targetRef][targetDgst], nil +} + +func (p *pool) use(ref reference.Spec, dgst digest.Digest) int { + // TODO: implement GC + targetRef := ref.String() + targetDgst := dgst.String() + p.refcounterMu.Lock() + defer p.refcounterMu.Unlock() + if _, ok := p.refcounter[targetRef]; !ok { + p.refcounter[targetRef] = make(map[string]int) + } + p.refcounter[targetRef][targetDgst]++ + return p.refcounter[targetRef][targetDgst] +} + +func (p *pool) readManifestAndConfig(refspec reference.Spec) (manifest ocispec.Manifest, mPath string, config ocispec.Image, cPath string, _ error) { + mPath, cPath = p.manifestFile(refspec), p.configFile(refspec) + mf, err := os.Open(mPath) + if err != nil { + return ocispec.Manifest{}, "", ocispec.Image{}, "", err + } + defer mf.Close() + if err := json.NewDecoder(mf).Decode(&manifest); err != nil { + return ocispec.Manifest{}, "", ocispec.Image{}, "", err + } + cf, err := os.Open(cPath) + if err != nil { + return ocispec.Manifest{}, "", ocispec.Image{}, "", err + } + defer cf.Close() + if err := json.NewDecoder(cf).Decode(&config); err != nil { + return ocispec.Manifest{}, "", ocispec.Image{}, "", err + } + return manifest, mPath, config, cPath, nil +} + +func (p *pool) writeManifestAndConfig(refspec reference.Spec, manifest ocispec.Manifest, config ocispec.Image) (mPath string, cPath string, _ error) { + mPath, cPath = p.manifestFile(refspec), p.configFile(refspec) + if err := os.MkdirAll(filepath.Dir(mPath), 0700); err != nil { + return "", "", err + } + if err := os.MkdirAll(filepath.Dir(cPath), 0700); err != nil { + return "", "", err + } + mf, err := os.Create(mPath) // TODO: file mode + if err != nil { + return "", "", err + } + defer mf.Close() + if err := json.NewEncoder(mf).Encode(&manifest); err != nil { + return "", "", err + } + cf, err := os.Create(cPath) // TODO: file mode + if err != nil { + return "", "", err + } + defer cf.Close() + if err := json.NewEncoder(cf).Encode(&config); err != nil { + return "", "", err + } + return mPath, cPath, nil +} + +func fetchManifestAndConfig(ctx context.Context, hosts docker.RegistryHosts, refspec reference.Spec) (ocispec.Manifest, ocispec.Image, error) { + resolver := docker.NewResolver(docker.ResolverOptions{ + Hosts: hosts, + }) + _, img, err := resolver.Resolve(ctx, refspec.String()) + if err != nil { + return ocispec.Manifest{}, ocispec.Image{}, err + } + fetcher, err := resolver.Fetcher(ctx, refspec.String()) + if err != nil { + return ocispec.Manifest{}, ocispec.Image{}, err + } + plt := platforms.DefaultSpec() // TODO: should we make this configurable? + manifest, err := fetchManifestPlatform(ctx, fetcher, img, plt) + if err != nil { + return ocispec.Manifest{}, ocispec.Image{}, err + } + r, err := fetcher.Fetch(ctx, manifest.Config) + if err != nil { + return ocispec.Manifest{}, ocispec.Image{}, err + } + defer r.Close() + var config ocispec.Image + if err := json.NewDecoder(r).Decode(&config); err != nil { + return ocispec.Manifest{}, ocispec.Image{}, err + } + + return manifest, config, nil +} + +func fetchManifestPlatform(ctx context.Context, fetcher remotes.Fetcher, desc ocispec.Descriptor, platform ocispec.Platform) (ocispec.Manifest, error) { + ctx, cancel := context.WithTimeout(ctx, time.Minute) + defer cancel() + + r, err := fetcher.Fetch(ctx, desc) + if err != nil { + return ocispec.Manifest{}, err + } + defer r.Close() + + var manifest ocispec.Manifest + switch desc.MediaType { + case images.MediaTypeDockerSchema2Manifest, ocispec.MediaTypeImageManifest: + err = json.NewDecoder(r).Decode(&manifest) + case images.MediaTypeDockerSchema2ManifestList, ocispec.MediaTypeImageIndex: + var index ocispec.Index + if err = json.NewDecoder(r).Decode(&index); err != nil { + return ocispec.Manifest{}, err + } + var target ocispec.Descriptor + found := false + for _, m := range index.Manifests { + p := platforms.DefaultSpec() + if m.Platform != nil { + p = *m.Platform + } + if !platforms.NewMatcher(platform).Match(p) { + continue + } + target = m + found = true + break + } + if !found { + return ocispec.Manifest{}, fmt.Errorf("no manifest found for platform") + } + manifest, err = fetchManifestPlatform(ctx, fetcher, target, platform) + default: + err = fmt.Errorf("unknown mediatype %q", desc.MediaType) + } + return manifest, err +} + +func colon2dash(s string) string { + return strings.ReplaceAll(s, ":", "-") +} + +// Layer represents the layer information. Format is compatible to the one required by +// "additional layer store" of github.com/containers/storage. +type Layer struct { + CompressedDigest digest.Digest `json:"compressed-diff-digest,omitempty"` + CompressedSize int64 `json:"compressed-size,omitempty"` + UncompressedDigest digest.Digest `json:"diff-digest,omitempty"` + UncompressedSize int64 `json:"diff-size,omitempty"` + CompressionType archive.Compression `json:"compression,omitempty"` + ReadOnly bool `json:"-"` +} + +func genLayerInfo(dgst digest.Digest, manifest ocispec.Manifest, config ocispec.Image) (Layer, error) { + if len(manifest.Layers) != len(config.RootFS.DiffIDs) { + return Layer{}, fmt.Errorf( + "len(manifest.Layers) != len(config.Rootfs): %d != %d", + len(manifest.Layers), len(config.RootFS.DiffIDs)) + } + var ( + layerIndex = -1 + ) + for i, l := range manifest.Layers { + if l.Digest == dgst { + layerIndex = i + } + } + if layerIndex == -1 { + return Layer{}, fmt.Errorf("layer %q not found in the manifest", dgst.String()) + } + return Layer{ + CompressedDigest: manifest.Layers[layerIndex].Digest, + CompressedSize: manifest.Layers[layerIndex].Size, + UncompressedDigest: config.RootFS.DiffIDs[layerIndex], + UncompressedSize: 0, // TODO + CompressionType: archive.Gzip, + ReadOnly: true, + }, nil +} diff --git a/fs/fs.go b/fs/fs.go index 681faa471..2ac910c29 100644 --- a/fs/fs.go +++ b/fs/fs.go @@ -37,19 +37,12 @@ package fs import ( - "bytes" "context" - "encoding/json" "fmt" - "io" - "os" - "sort" "strconv" - "strings" "sync" "syscall" "time" - "unsafe" "github.com/containerd/containerd/log" "github.com/containerd/containerd/remotes/docker" @@ -58,7 +51,7 @@ import ( "github.com/containerd/stargz-snapshotter/fs/layer" fsmetrics "github.com/containerd/stargz-snapshotter/fs/metrics" "github.com/containerd/stargz-snapshotter/fs/source" - snbase "github.com/containerd/stargz-snapshotter/snapshot" + "github.com/containerd/stargz-snapshotter/snapshot" "github.com/containerd/stargz-snapshotter/task" metrics "github.com/docker/go-metrics" fusefs "github.com/hanwen/go-fuse/v2/fs" @@ -66,21 +59,9 @@ import ( digest "github.com/opencontainers/go-digest" ocispec "github.com/opencontainers/image-spec/specs-go/v1" "github.com/pkg/errors" - "golang.org/x/sys/unix" ) -var opaqueXattrs = []string{"trusted.overlay.opaque", "user.overlay.opaque"} - -const ( - blockSize = 4096 - whiteoutPrefix = ".wh." - whiteoutOpaqueDir = whiteoutPrefix + whiteoutPrefix + ".opq" - opaqueXattrValue = "y" - stateDirName = ".stargz-snapshotter" - defaultMaxConcurrency = 2 - statFileMode = syscall.S_IFREG | 0400 // -r-------- - stateDirMode = syscall.S_IFDIR | 0500 // dr-x------ -) +const defaultMaxConcurrency = 2 type Option func(*options) @@ -94,7 +75,7 @@ func WithGetSources(s source.GetSources) Option { } } -func NewFilesystem(root string, cfg config.Config, opts ...Option) (_ snbase.FileSystem, err error) { +func NewFilesystem(root string, cfg config.Config, opts ...Option) (_ snapshot.FileSystem, err error) { var fsOpts options for _, o := range opts { o(&fsOpts) @@ -194,8 +175,7 @@ func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[s desc := desc go func() { // Avoids to get canceled by client. - ctx := log.WithLogger(context.Background(), - log.G(ctx).WithField("mountpoint", mountpoint)) + ctx := context.Background() _, err := fs.resolver.Resolve(ctx, preResolve.Hosts, preResolve.Name, desc) if err != nil { log.G(ctx).WithError(err).Debug("failed to pre-resolve") @@ -242,6 +222,11 @@ func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[s // Verification must be done. Don't mount this layer. return fmt.Errorf("digest of TOC JSON must be passed") } + node, err := l.RootNode() + if err != nil { + log.G(ctx).WithError(err).Warnf("Failed to get root node") + return errors.Wrapf(err, "failed to get root node") + } // Register the mountpoint layer fs.layerMu.Lock() @@ -283,16 +268,10 @@ func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[s }() } - // Mounting stargz + // mount the node to the specified mountpoint // TODO: bind mount the state directory as a read-only fs on snapshotter's side timeSec := time.Second - rawFS := fusefs.NewNodeFS(&node{ - fs: fs, - layer: l, - e: l.Root(), - s: newState(l), - root: mountpoint, - }, &fusefs.Options{ + rawFS := fusefs.NewNodeFS(node, &fusefs.Options{ AttrTimeout: &timeSec, EntryTimeout: &timeSec, NullPermissions: true, @@ -405,571 +384,3 @@ func neighboringLayers(manifest ocispec.Manifest, target ocispec.Descriptor) (de } return } - -type fileReader interface { - OpenFile(name string) (io.ReaderAt, error) -} - -// node is a filesystem inode abstraction. -type node struct { - fusefs.Inode - fs *filesystem - layer fileReader - e *estargz.TOCEntry - s *state - root string - opaque bool // true if this node is an overlayfs opaque directory -} - -var _ = (fusefs.InodeEmbedder)((*node)(nil)) - -var _ = (fusefs.NodeReaddirer)((*node)(nil)) - -func (n *node) Readdir(ctx context.Context) (fusefs.DirStream, syscall.Errno) { - var ents []fuse.DirEntry - whiteouts := map[string]*estargz.TOCEntry{} - normalEnts := map[string]bool{} - n.e.ForeachChild(func(baseName string, ent *estargz.TOCEntry) bool { - - // We don't want to show prefetch landmarks in "/". - if n.e.Name == "" && (baseName == estargz.PrefetchLandmark || baseName == estargz.NoPrefetchLandmark) { - return true - } - - // We don't want to show whiteouts. - if strings.HasPrefix(baseName, whiteoutPrefix) { - if baseName == whiteoutOpaqueDir { - return true - } - // Add the overlayfs-compiant whiteout later. - whiteouts[baseName] = ent - return true - } - - // This is a normal entry. - normalEnts[baseName] = true - ents = append(ents, fuse.DirEntry{ - Mode: modeOfEntry(ent), - Name: baseName, - Ino: inodeOfEnt(ent), - }) - return true - }) - - // Append whiteouts if no entry replaces the target entry in the lower layer. - for w, ent := range whiteouts { - if !normalEnts[w[len(whiteoutPrefix):]] { - ents = append(ents, fuse.DirEntry{ - Mode: syscall.S_IFCHR, - Name: w[len(whiteoutPrefix):], - Ino: inodeOfEnt(ent), - }) - - } - } - - // Avoid undeterministic order of entries on each call - sort.Slice(ents, func(i, j int) bool { - return ents[i].Name < ents[j].Name - }) - - return fusefs.NewListDirStream(ents), 0 -} - -var _ = (fusefs.NodeLookuper)((*node)(nil)) - -func (n *node) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fusefs.Inode, syscall.Errno) { - // We don't want to show prefetch landmarks in "/". - if n.e.Name == "" && (name == estargz.PrefetchLandmark || name == estargz.NoPrefetchLandmark) { - return nil, syscall.ENOENT - } - - // We don't want to show whiteouts. - if strings.HasPrefix(name, whiteoutPrefix) { - return nil, syscall.ENOENT - } - - // state directory - if n.e.Name == "" && name == stateDirName { - return n.NewInode(ctx, n.s, stateToAttr(n.s, &out.Attr)), 0 - } - - // lookup stargz TOCEntry - ce, ok := n.e.LookupChild(name) - if !ok { - // If the entry exists as a whiteout, show an overlayfs-styled whiteout node. - if wh, ok := n.e.LookupChild(fmt.Sprintf("%s%s", whiteoutPrefix, name)); ok { - return n.NewInode(ctx, &whiteout{ - e: wh, - }, entryToWhAttr(wh, &out.Attr)), 0 - } - return nil, syscall.ENOENT - } - var opaque bool - if _, ok := ce.LookupChild(whiteoutOpaqueDir); ok { - // This entry is an opaque directory so make it recognizable for overlayfs. - opaque = true - } - - return n.NewInode(ctx, &node{ - fs: n.fs, - layer: n.layer, - e: ce, - s: n.s, - root: n.root, - opaque: opaque, - }, entryToAttr(ce, &out.Attr)), 0 -} - -var _ = (fusefs.NodeOpener)((*node)(nil)) - -func (n *node) Open(ctx context.Context, flags uint32) (fh fusefs.FileHandle, fuseFlags uint32, errno syscall.Errno) { - ra, err := n.layer.OpenFile(n.e.Name) - if err != nil { - n.s.report(fmt.Errorf("failed to open node: %v", err)) - return nil, 0, syscall.EIO - } - return &file{ - n: n, - e: n.e, - ra: ra, - }, 0, 0 -} - -var _ = (fusefs.NodeGetattrer)((*node)(nil)) - -func (n *node) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno { - entryToAttr(n.e, &out.Attr) - return 0 -} - -var _ = (fusefs.NodeGetxattrer)((*node)(nil)) - -func (n *node) Getxattr(ctx context.Context, attr string, dest []byte) (uint32, syscall.Errno) { - for _, opaqueXattr := range opaqueXattrs { - if attr == opaqueXattr && n.opaque { - // This node is an opaque directory so give overlayfs-compliant indicator. - if len(dest) < len(opaqueXattrValue) { - return uint32(len(opaqueXattrValue)), syscall.ERANGE - } - return uint32(copy(dest, opaqueXattrValue)), 0 - } - } - if v, ok := n.e.Xattrs[attr]; ok { - if len(dest) < len(v) { - return uint32(len(v)), syscall.ERANGE - } - return uint32(copy(dest, v)), 0 - } - return 0, syscall.ENODATA -} - -var _ = (fusefs.NodeListxattrer)((*node)(nil)) - -func (n *node) Listxattr(ctx context.Context, dest []byte) (uint32, syscall.Errno) { - var attrs []byte - if n.opaque { - // This node is an opaque directory so add overlayfs-compliant indicator. - for _, opaqueXattr := range opaqueXattrs { - attrs = append(attrs, []byte(opaqueXattr+"\x00")...) - } - } - for k := range n.e.Xattrs { - attrs = append(attrs, []byte(k+"\x00")...) - } - if len(dest) < len(attrs) { - return uint32(len(attrs)), syscall.ERANGE - } - return uint32(copy(dest, attrs)), 0 -} - -var _ = (fusefs.NodeReadlinker)((*node)(nil)) - -func (n *node) Readlink(ctx context.Context) ([]byte, syscall.Errno) { - return []byte(n.e.LinkName), 0 -} - -var _ = (fusefs.NodeStatfser)((*node)(nil)) - -func (n *node) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno { - defaultStatfs(out) - return 0 -} - -// file is a file abstraction which implements file handle in go-fuse. -type file struct { - n *node - e *estargz.TOCEntry - ra io.ReaderAt -} - -var _ = (fusefs.FileReader)((*file)(nil)) - -func (f *file) Read(ctx context.Context, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) { - n, err := f.ra.ReadAt(dest, off) - if err != nil && err != io.EOF { - f.n.s.report(fmt.Errorf("failed to read node: %v", err)) - return nil, syscall.EIO - } - return fuse.ReadResultData(dest[:n]), 0 -} - -var _ = (fusefs.FileGetattrer)((*file)(nil)) - -func (f *file) Getattr(ctx context.Context, out *fuse.AttrOut) syscall.Errno { - entryToAttr(f.e, &out.Attr) - return 0 -} - -// whiteout is a whiteout abstraction compliant to overlayfs. -type whiteout struct { - fusefs.Inode - e *estargz.TOCEntry -} - -var _ = (fusefs.NodeGetattrer)((*whiteout)(nil)) - -func (w *whiteout) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno { - entryToWhAttr(w.e, &out.Attr) - return 0 -} - -var _ = (fusefs.NodeStatfser)((*whiteout)(nil)) - -func (w *whiteout) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno { - defaultStatfs(out) - return 0 -} - -// newState provides new state directory node. -// It creates statFile at the same time to give it stable inode number. -func newState(layer layer.Layer) *state { - info := layer.Info() - return &state{ - statFile: &statFile{ - name: info.Digest.String() + ".json", - statJSON: statJSON{ - Digest: info.Digest.String(), - Size: info.Size, - }, - layer: layer, - }, - } -} - -// state is a directory which contain a "state file" of this layer aming to -// observability. This filesystem uses it to report something(e.g. error) to -// the clients(e.g. Kubernetes's livenessProbe). -// This directory has mode "dr-x------ root root". -type state struct { - fusefs.Inode - statFile *statFile -} - -var _ = (fusefs.NodeReaddirer)((*state)(nil)) - -func (s *state) Readdir(ctx context.Context) (fusefs.DirStream, syscall.Errno) { - return fusefs.NewListDirStream([]fuse.DirEntry{ - { - Mode: statFileMode, - Name: s.statFile.name, - Ino: inodeOfStatFile(s.statFile), - }, - }), 0 -} - -var _ = (fusefs.NodeLookuper)((*state)(nil)) - -func (s *state) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fusefs.Inode, syscall.Errno) { - if name != s.statFile.name { - return nil, syscall.ENOENT - } - attr, errno := s.statFile.attr(&out.Attr) - if errno != 0 { - return nil, errno - } - return s.NewInode(ctx, s.statFile, attr), 0 -} - -var _ = (fusefs.NodeGetattrer)((*state)(nil)) - -func (s *state) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno { - stateToAttr(s, &out.Attr) - return 0 -} - -var _ = (fusefs.NodeStatfser)((*state)(nil)) - -func (s *state) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno { - defaultStatfs(out) - return 0 -} - -func (s *state) report(err error) { - s.statFile.report(err) -} - -type statJSON struct { - Error string `json:"error,omitempty"` - Digest string `json:"digest"` - // URL is excluded for potential security reason - Size int64 `json:"size"` - FetchedSize int64 `json:"fetchedSize"` - FetchedPercent float64 `json:"fetchedPercent"` // Fetched / Size * 100.0 -} - -// statFile is a file which contain something to be reported from this layer. -// This filesystem uses statFile.report() to report something(e.g. error) to -// the clients(e.g. Kubernetes's livenessProbe). -// This file has mode "-r-------- root root". -type statFile struct { - fusefs.Inode - name string - layer layer.Layer - statJSON statJSON - mu sync.Mutex -} - -var _ = (fusefs.NodeOpener)((*statFile)(nil)) - -func (sf *statFile) Open(ctx context.Context, flags uint32) (fh fusefs.FileHandle, fuseFlags uint32, errno syscall.Errno) { - return nil, 0, 0 -} - -var _ = (fusefs.NodeReader)((*statFile)(nil)) - -func (sf *statFile) Read(ctx context.Context, f fusefs.FileHandle, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) { - sf.mu.Lock() - defer sf.mu.Unlock() - st, err := sf.updateStatUnlocked() - if err != nil { - return nil, syscall.EIO - } - n, err := bytes.NewReader(st).ReadAt(dest, off) - if err != nil && err != io.EOF { - return nil, syscall.EIO - } - return fuse.ReadResultData(dest[:n]), 0 -} - -var _ = (fusefs.NodeGetattrer)((*statFile)(nil)) - -func (sf *statFile) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno { - _, errno := sf.attr(&out.Attr) - return errno -} - -var _ = (fusefs.NodeStatfser)((*statFile)(nil)) - -func (sf *statFile) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno { - defaultStatfs(out) - return 0 -} - -func (sf *statFile) report(err error) { - sf.mu.Lock() - defer sf.mu.Unlock() - sf.statJSON.Error = err.Error() -} - -func (sf *statFile) attr(out *fuse.Attr) (fusefs.StableAttr, syscall.Errno) { - sf.mu.Lock() - defer sf.mu.Unlock() - - st, err := sf.updateStatUnlocked() - if err != nil { - return fusefs.StableAttr{}, syscall.EIO - } - - return statFileToAttr(sf, uint64(len(st)), out), 0 -} - -func (sf *statFile) updateStatUnlocked() ([]byte, error) { - sf.statJSON.FetchedSize = sf.layer.Info().FetchedSize - sf.statJSON.FetchedPercent = float64(sf.statJSON.FetchedSize) / float64(sf.statJSON.Size) * 100.0 - j, err := json.Marshal(&sf.statJSON) - if err != nil { - return nil, err - } - j = append(j, []byte("\n")...) - return j, nil -} - -// inodeOfEnt calculates the inode number which is one-to-one conresspondence -// with the TOCEntry insntance. -func inodeOfEnt(e *estargz.TOCEntry) uint64 { - return uint64(uintptr(unsafe.Pointer(e))) -} - -// entryToAttr converts stargz's TOCEntry to go-fuse's Attr. -func entryToAttr(e *estargz.TOCEntry, out *fuse.Attr) fusefs.StableAttr { - out.Ino = inodeOfEnt(e) - out.Size = uint64(e.Size) - out.Blksize = blockSize - out.Blocks = out.Size / uint64(out.Blksize) - if out.Size%uint64(out.Blksize) > 0 { - out.Blocks++ - } - mtime := e.ModTime() - out.SetTimes(nil, &mtime, nil) - out.Mode = modeOfEntry(e) - out.Owner = fuse.Owner{Uid: uint32(e.UID), Gid: uint32(e.GID)} - out.Rdev = uint32(unix.Mkdev(uint32(e.DevMajor), uint32(e.DevMinor))) - out.Nlink = uint32(e.NumLink) - if out.Nlink == 0 { - out.Nlink = 1 // zero "NumLink" means one. - } - out.Padding = 0 // TODO - - return fusefs.StableAttr{ - Mode: out.Mode, - Ino: out.Ino, - // NOTE: The inode number is unique throughout the lifettime of - // this filesystem so we don't consider about generation at this - // moment. - } -} - -// entryToWhAttr converts stargz's TOCEntry to go-fuse's Attr of whiteouts. -func entryToWhAttr(e *estargz.TOCEntry, out *fuse.Attr) fusefs.StableAttr { - fi := e.Stat() - out.Ino = inodeOfEnt(e) - out.Size = 0 - out.Blksize = blockSize - out.Blocks = 0 - mtime := fi.ModTime() - out.SetTimes(nil, &mtime, nil) - out.Mode = syscall.S_IFCHR - out.Owner = fuse.Owner{Uid: 0, Gid: 0} - out.Rdev = uint32(unix.Mkdev(0, 0)) - out.Nlink = 1 - out.Padding = 0 // TODO - - return fusefs.StableAttr{ - Mode: out.Mode, - Ino: out.Ino, - // NOTE: The inode number is unique throughout the lifettime of - // this filesystem so we don't consider about generation at this - // moment. - } -} - -// inodeOfState calculates the inode number which is one-to-one conresspondence -// with the state directory insntance which was created on mount. -func inodeOfState(s *state) uint64 { - return uint64(uintptr(unsafe.Pointer(s))) -} - -// stateToAttr converts state directory to go-fuse's Attr. -func stateToAttr(s *state, out *fuse.Attr) fusefs.StableAttr { - out.Ino = inodeOfState(s) - out.Size = 0 - out.Blksize = blockSize - out.Blocks = 0 - out.Nlink = 1 - - // root can read and open it (dr-x------ root root). - out.Mode = stateDirMode - out.Owner = fuse.Owner{Uid: 0, Gid: 0} - - // dummy - out.Mtime = 0 - out.Mtimensec = 0 - out.Rdev = 0 - out.Padding = 0 - - return fusefs.StableAttr{ - Mode: out.Mode, - Ino: out.Ino, - // NOTE: The inode number is unique throughout the lifettime of - // this filesystem so we don't consider about generation at this - // moment. - } -} - -// inodeOfStatFile calculates the inode number which is one-to-one conresspondence -// with the stat file insntance which was created on mount. -func inodeOfStatFile(s *statFile) uint64 { - return uint64(uintptr(unsafe.Pointer(s))) -} - -// statFileToAttr converts stat file to go-fuse's Attr. -func statFileToAttr(sf *statFile, size uint64, out *fuse.Attr) fusefs.StableAttr { - out.Ino = inodeOfStatFile(sf) - out.Size = size - out.Blksize = blockSize - out.Blocks = out.Size / uint64(out.Blksize) - out.Nlink = 1 - - // Root can read it ("-r-------- root root"). - out.Mode = statFileMode - out.Owner = fuse.Owner{Uid: 0, Gid: 0} - - // dummy - out.Mtime = 0 - out.Mtimensec = 0 - out.Rdev = 0 - out.Padding = 0 - - return fusefs.StableAttr{ - Mode: out.Mode, - Ino: out.Ino, - // NOTE: The inode number is unique throughout the lifettime of - // this filesystem so we don't consider about generation at this - // moment. - } -} - -// modeOfEntry gets system's mode bits from TOCEntry -func modeOfEntry(e *estargz.TOCEntry) uint32 { - m := e.Stat().Mode() - - // Permission bits - res := uint32(m & os.ModePerm) - - // File type bits - switch m & os.ModeType { - case os.ModeDevice: - res |= syscall.S_IFBLK - case os.ModeDevice | os.ModeCharDevice: - res |= syscall.S_IFCHR - case os.ModeDir: - res |= syscall.S_IFDIR - case os.ModeNamedPipe: - res |= syscall.S_IFIFO - case os.ModeSymlink: - res |= syscall.S_IFLNK - case os.ModeSocket: - res |= syscall.S_IFSOCK - default: // regular file. - res |= syscall.S_IFREG - } - - // suid, sgid, sticky bits - if m&os.ModeSetuid != 0 { - res |= syscall.S_ISUID - } - if m&os.ModeSetgid != 0 { - res |= syscall.S_ISGID - } - if m&os.ModeSticky != 0 { - res |= syscall.S_ISVTX - } - - return res -} - -func defaultStatfs(stat *fuse.StatfsOut) { - - // http://man7.org/linux/man-pages/man2/statfs.2.html - stat.Blocks = 0 // dummy - stat.Bfree = 0 - stat.Bavail = 0 - stat.Files = 0 // dummy - stat.Ffree = 0 - stat.Bsize = blockSize - stat.NameLen = 1<<32 - 1 - stat.Frsize = blockSize - stat.Padding = 0 - stat.Spare = [6]uint32{} -} diff --git a/fs/fs_test.go b/fs/fs_test.go index 57b30c6f5..a58a04a0b 100644 --- a/fs/fs_test.go +++ b/fs/fs_test.go @@ -23,39 +23,19 @@ package fs import ( - "bytes" "context" - "crypto/sha256" - "encoding/json" "fmt" - "io" - "math/rand" - "os" - "path/filepath" - "strings" - "syscall" "testing" "time" "github.com/containerd/containerd/reference" "github.com/containerd/containerd/remotes/docker" - "github.com/containerd/stargz-snapshotter/estargz" "github.com/containerd/stargz-snapshotter/fs/layer" "github.com/containerd/stargz-snapshotter/fs/source" "github.com/containerd/stargz-snapshotter/task" - "github.com/containerd/stargz-snapshotter/util/testutil" fusefs "github.com/hanwen/go-fuse/v2/fs" - "github.com/hanwen/go-fuse/v2/fuse" digest "github.com/opencontainers/go-digest" ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "golang.org/x/sys/unix" -) - -const ( - sampleChunkSize = 3 - sampleMiddleOffset = sampleChunkSize / 2 - sampleData1 = "0123456789" - lastChunkOffset1 = sampleChunkSize * (int64(len(sampleData1)) / sampleChunkSize) ) func TestCheck(t *testing.T) { @@ -83,14 +63,13 @@ type breakableLayer struct { success bool } -func (l *breakableLayer) Info() layer.Info { return layer.Info{} } -func (l *breakableLayer) Verify(tocDigest digest.Digest) error { return nil } -func (l *breakableLayer) SkipVerify() {} -func (l *breakableLayer) Root() *estargz.TOCEntry { return nil } -func (l *breakableLayer) OpenFile(name string) (io.ReaderAt, error) { return nil, fmt.Errorf("fail") } -func (l *breakableLayer) Prefetch(prefetchSize int64) error { return fmt.Errorf("fail") } -func (l *breakableLayer) WaitForPrefetchCompletion() error { return fmt.Errorf("fail") } -func (l *breakableLayer) BackgroundFetch() error { return fmt.Errorf("fail") } +func (l *breakableLayer) Info() layer.Info { return layer.Info{} } +func (l *breakableLayer) RootNode() (fusefs.InodeEmbedder, error) { return nil, nil } +func (l *breakableLayer) Verify(tocDigest digest.Digest) error { return nil } +func (l *breakableLayer) SkipVerify() {} +func (l *breakableLayer) Prefetch(prefetchSize int64) error { return fmt.Errorf("fail") } +func (l *breakableLayer) WaitForPrefetchCompletion() error { return fmt.Errorf("fail") } +func (l *breakableLayer) BackgroundFetch() error { return fmt.Errorf("fail") } func (l *breakableLayer) Check() error { if !l.success { return fmt.Errorf("failed") @@ -103,618 +82,3 @@ func (l *breakableLayer) Refresh(ctx context.Context, hosts docker.RegistryHosts } return nil } - -// Tests Read method of each file node. -func TestNodeRead(t *testing.T) { - sizeCond := map[string]int64{ - "single_chunk": sampleChunkSize - sampleMiddleOffset, - "multi_chunks": sampleChunkSize + sampleMiddleOffset, - } - innerOffsetCond := map[string]int64{ - "at_top": 0, - "at_middle": sampleMiddleOffset, - } - baseOffsetCond := map[string]int64{ - "of_1st_chunk": sampleChunkSize * 0, - "of_2nd_chunk": sampleChunkSize * 1, - "of_last_chunk": lastChunkOffset1, - } - fileSizeCond := map[string]int64{ - "in_1_chunk_file": sampleChunkSize * 1, - "in_2_chunks_file": sampleChunkSize * 2, - "in_max_size_file": int64(len(sampleData1)), - } - for sn, size := range sizeCond { - for in, innero := range innerOffsetCond { - for bo, baseo := range baseOffsetCond { - for fn, filesize := range fileSizeCond { - t.Run(fmt.Sprintf("reading_%s_%s_%s_%s", sn, in, bo, fn), func(t *testing.T) { - if filesize > int64(len(sampleData1)) { - t.Fatal("sample file size is larger than sample data") - } - - wantN := size - offset := baseo + innero - if remain := filesize - offset; remain < wantN { - if wantN = remain; wantN < 0 { - wantN = 0 - } - } - - // use constant string value as a data source. - want := strings.NewReader(sampleData1) - - // data we want to get. - wantData := make([]byte, wantN) - _, err := want.ReadAt(wantData, offset) - if err != nil && err != io.EOF { - t.Fatalf("want.ReadAt (offset=%d,size=%d): %v", offset, wantN, err) - } - - // data we get from the file node. - f := makeNodeReader(t, []byte(sampleData1)[:filesize], sampleChunkSize) - tmpbuf := make([]byte, size) // fuse library can request bigger than remain - rr, errno := f.Read(context.Background(), tmpbuf, offset) - if errno != 0 { - t.Errorf("failed to read off=%d, size=%d, filesize=%d: %v", offset, size, filesize, err) - return - } - if rsize := rr.Size(); int64(rsize) != wantN { - t.Errorf("read size: %d; want: %d", rsize, wantN) - return - } - tmpbuf = make([]byte, len(tmpbuf)) - respData, fs := rr.Bytes(tmpbuf) - if fs != fuse.OK { - t.Errorf("failed to read result data for off=%d, size=%d, filesize=%d: %v", offset, size, filesize, err) - } - - if !bytes.Equal(wantData, respData) { - t.Errorf("off=%d, filesize=%d; read data{size=%d,data=%q}; want (size=%d,data=%q)", - offset, filesize, len(respData), string(respData), wantN, string(wantData)) - return - } - }) - } - } - } - } -} - -func makeNodeReader(t *testing.T, contents []byte, chunkSize int64) *file { - testName := "test" - sgz, _ := buildStargz(t, []testutil.TarEntry{testutil.File(testName, string(contents))}, chunkSizeInfo(chunkSize)) - r, err := estargz.Open(sgz) - if err != nil { - t.Fatal("failed to make stargz") - } - rootNode := getRootNode(t, r) - var eo fuse.EntryOut - inode, errno := rootNode.Lookup(context.Background(), testName, &eo) - if errno != 0 { - t.Fatalf("failed to lookup test node; errno: %v", errno) - } - f, _, errno := inode.Operations().(fusefs.NodeOpener).Open(context.Background(), 0) - if errno != 0 { - t.Fatalf("failed to open test file; errno: %v", errno) - } - return f.(*file) -} - -func TestExistence(t *testing.T) { - tests := []struct { - name string - in []testutil.TarEntry - want []check - }{ - { - name: "1_whiteout_with_sibling", - in: []testutil.TarEntry{ - testutil.Dir("foo/"), - testutil.File("foo/bar.txt", ""), - testutil.File("foo/.wh.foo.txt", ""), - }, - want: []check{ - hasValidWhiteout("foo/foo.txt"), - fileNotExist("foo/.wh.foo.txt"), - }, - }, - { - name: "1_whiteout_with_duplicated_name", - in: []testutil.TarEntry{ - testutil.Dir("foo/"), - testutil.File("foo/bar.txt", "test"), - testutil.File("foo/.wh.bar.txt", ""), - }, - want: []check{ - hasFileDigest("foo/bar.txt", digestFor("test")), - fileNotExist("foo/.wh.bar.txt"), - }, - }, - { - name: "1_opaque", - in: []testutil.TarEntry{ - testutil.Dir("foo/"), - testutil.File("foo/.wh..wh..opq", ""), - }, - want: []check{ - hasNodeXattrs("foo/", opaqueXattrs[0], opaqueXattrValue), - hasNodeXattrs("foo/", opaqueXattrs[1], opaqueXattrValue), - fileNotExist("foo/.wh..wh..opq"), - }, - }, - { - name: "1_opaque_with_sibling", - in: []testutil.TarEntry{ - testutil.Dir("foo/"), - testutil.File("foo/.wh..wh..opq", ""), - testutil.File("foo/bar.txt", "test"), - }, - want: []check{ - hasNodeXattrs("foo/", opaqueXattrs[0], opaqueXattrValue), - hasNodeXattrs("foo/", opaqueXattrs[1], opaqueXattrValue), - hasFileDigest("foo/bar.txt", digestFor("test")), - fileNotExist("foo/.wh..wh..opq"), - }, - }, - { - name: "1_opaque_with_xattr", - in: []testutil.TarEntry{ - testutil.Dir("foo/", testutil.WithDirXattrs(map[string]string{"foo": "bar"})), - testutil.File("foo/.wh..wh..opq", ""), - }, - want: []check{ - hasNodeXattrs("foo/", opaqueXattrs[0], opaqueXattrValue), - hasNodeXattrs("foo/", opaqueXattrs[1], opaqueXattrValue), - hasNodeXattrs("foo/", "foo", "bar"), - fileNotExist("foo/.wh..wh..opq"), - }, - }, - { - name: "prefetch_landmark", - in: []testutil.TarEntry{ - testutil.File(estargz.PrefetchLandmark, "test"), - testutil.Dir("foo/"), - testutil.File(fmt.Sprintf("foo/%s", estargz.PrefetchLandmark), "test"), - }, - want: []check{ - fileNotExist(estargz.PrefetchLandmark), - hasFileDigest(fmt.Sprintf("foo/%s", estargz.PrefetchLandmark), digestFor("test")), - }, - }, - { - name: "no_prefetch_landmark", - in: []testutil.TarEntry{ - testutil.File(estargz.NoPrefetchLandmark, "test"), - testutil.Dir("foo/"), - testutil.File(fmt.Sprintf("foo/%s", estargz.NoPrefetchLandmark), "test"), - }, - want: []check{ - fileNotExist(estargz.NoPrefetchLandmark), - hasFileDigest(fmt.Sprintf("foo/%s", estargz.NoPrefetchLandmark), digestFor("test")), - }, - }, - { - name: "state_file", - in: []testutil.TarEntry{ - testutil.File("test", "test"), - }, - want: []check{ - hasFileDigest("test", digestFor("test")), - hasStateFile(t, testStateLayerDigest.String()+".json"), - }, - }, - { - name: "file_suid", - in: []testutil.TarEntry{ - testutil.File("test", "test", testutil.WithFileMode(0644|os.ModeSetuid)), - }, - want: []check{ - hasExtraMode("test", os.ModeSetuid), - }, - }, - { - name: "dir_sgid", - in: []testutil.TarEntry{ - testutil.Dir("test/", testutil.WithDirMode(0755|os.ModeSetgid)), - }, - want: []check{ - hasExtraMode("test/", os.ModeSetgid), - }, - }, - { - name: "file_sticky", - in: []testutil.TarEntry{ - testutil.File("test", "test", testutil.WithFileMode(0644|os.ModeSticky)), - }, - want: []check{ - hasExtraMode("test", os.ModeSticky), - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - sgz, _ := buildStargz(t, tt.in) - r, err := estargz.Open(sgz) - if err != nil { - t.Fatalf("stargz.Open: %v", err) - } - rootNode := getRootNode(t, r) - for _, want := range tt.want { - want(t, rootNode) - } - }) - } -} - -func getRootNode(t *testing.T, r *estargz.Reader) *node { - root, ok := r.Lookup("") - if !ok { - t.Fatalf("failed to find root in stargz") - } - l := &testLayer{r} - rootNode := &node{ - layer: l, - e: root, - s: newState(l), - } - fusefs.NewNodeFS(rootNode, &fusefs.Options{}) - return rootNode -} - -type testLayer struct { - r *estargz.Reader -} - -var testStateLayerDigest = digest.FromString("dummy") - -func (tl *testLayer) OpenFile(name string) (io.ReaderAt, error) { - return tl.r.OpenFile(name) -} -func (tl *testLayer) Info() layer.Info { - return layer.Info{ - Digest: testStateLayerDigest, - Size: 10, - FetchedSize: 5, - } -} -func (tl *testLayer) Verify(tocDigest digest.Digest) error { return nil } -func (tl *testLayer) SkipVerify() {} -func (tl *testLayer) Root() *estargz.TOCEntry { return nil } -func (tl *testLayer) Prefetch(prefetchSize int64) error { return fmt.Errorf("fail") } -func (tl *testLayer) WaitForPrefetchCompletion() error { return fmt.Errorf("fail") } -func (tl *testLayer) BackgroundFetch() error { return fmt.Errorf("fail") } -func (tl *testLayer) Check() error { return nil } -func (tl *testLayer) Refresh(ctx context.Context, hosts docker.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error { - return nil -} - -type chunkSizeInfo int - -func buildStargz(t *testing.T, ents []testutil.TarEntry, opts ...interface{}) (*io.SectionReader, digest.Digest) { - var chunkSize chunkSizeInfo - for _, opt := range opts { - if v, ok := opt.(chunkSizeInfo); ok { - chunkSize = v - } else { - t.Fatalf("unsupported opt") - } - } - - tarBuf := new(bytes.Buffer) - if _, err := io.Copy(tarBuf, testutil.BuildTar(ents)); err != nil { - t.Fatalf("failed to build tar: %v", err) - } - tarData := tarBuf.Bytes() - rc, err := estargz.Build( - io.NewSectionReader(bytes.NewReader(tarData), 0, int64(len(tarData))), - estargz.WithChunkSize(int(chunkSize)), - ) - if err != nil { - t.Fatalf("failed to build verifiable stargz: %v", err) - } - defer rc.Close() - vsb := new(bytes.Buffer) - if _, err := io.Copy(vsb, rc); err != nil { - t.Fatalf("failed to copy built stargz blob: %v", err) - } - vsbb := vsb.Bytes() - - return io.NewSectionReader(bytes.NewReader(vsbb), 0, int64(len(vsbb))), rc.TOCDigest() -} - -type check func(*testing.T, *node) - -func fileNotExist(file string) check { - return func(t *testing.T, root *node) { - if _, _, err := getDirentAndNode(t, root, file); err == nil { - t.Errorf("Node %q exists", file) - } - } -} - -func hasFileDigest(file string, digest string) check { - return func(t *testing.T, root *node) { - _, n, err := getDirentAndNode(t, root, file) - if err != nil { - t.Fatalf("failed to get node %q: %v", file, err) - } - if ndgst := n.Operations().(*node).e.Digest; ndgst != digest { - t.Fatalf("Digest(%q) = %q, want %q", file, ndgst, digest) - } - } -} - -func hasExtraMode(name string, mode os.FileMode) check { - return func(t *testing.T, root *node) { - _, n, err := getDirentAndNode(t, root, name) - if err != nil { - t.Fatalf("failed to get node %q: %v", name, err) - } - var ao fuse.AttrOut - if errno := n.Operations().(fusefs.NodeGetattrer).Getattr(context.Background(), nil, &ao); errno != 0 { - t.Fatalf("failed to get attributes of node %q: %v", name, errno) - } - a := ao.Attr - gotMode := a.Mode & (syscall.S_ISUID | syscall.S_ISGID | syscall.S_ISVTX) - wantMode := extraModeToTarMode(mode) - if gotMode != uint32(wantMode) { - t.Fatalf("got mode = %b, want %b", gotMode, wantMode) - } - } -} - -func hasValidWhiteout(name string) check { - return func(t *testing.T, root *node) { - ent, n, err := getDirentAndNode(t, root, name) - if err != nil { - t.Fatalf("failed to get node %q: %v", name, err) - } - var ao fuse.AttrOut - if errno := n.Operations().(fusefs.NodeGetattrer).Getattr(context.Background(), nil, &ao); errno != 0 { - t.Fatalf("failed to get attributes of file %q: %v", name, errno) - } - a := ao.Attr - if a.Ino != ent.Ino { - t.Errorf("inconsistent inodes %d(Node) != %d(Dirent)", a.Ino, ent.Ino) - return - } - - // validate the direntry - if ent.Mode != syscall.S_IFCHR { - t.Errorf("whiteout entry %q isn't a char device", name) - return - } - - // validate the node - if a.Mode != syscall.S_IFCHR { - t.Errorf("whiteout %q has an invalid mode %o; want %o", - name, a.Mode, syscall.S_IFCHR) - return - } - if a.Rdev != uint32(unix.Mkdev(0, 0)) { - t.Errorf("whiteout %q has invalid device numbers (%d, %d); want (0, 0)", - name, unix.Major(uint64(a.Rdev)), unix.Minor(uint64(a.Rdev))) - return - } - } -} - -func hasNodeXattrs(entry, name, value string) check { - return func(t *testing.T, root *node) { - _, n, err := getDirentAndNode(t, root, entry) - if err != nil { - t.Fatalf("failed to get node %q: %v", entry, err) - } - - // check xattr exists in the xattrs list. - buf := make([]byte, 1000) - nb, errno := n.Operations().(fusefs.NodeListxattrer).Listxattr(context.Background(), buf) - if errno != 0 { - t.Fatalf("failed to get xattrs list of node %q: %v", entry, err) - } - attrs := strings.Split(string(buf[:nb]), "\x00") - var found bool - for _, x := range attrs { - if x == name { - found = true - } - } - if !found { - t.Errorf("node %q doesn't have an opaque xattr %q", entry, value) - return - } - - // check the xattr has valid value. - v := make([]byte, len(value)) - nv, errno := n.Operations().(fusefs.NodeGetxattrer).Getxattr(context.Background(), name, v) - if errno != 0 { - t.Fatalf("failed to get xattr %q of node %q: %v", name, entry, err) - } - if int(nv) != len(value) { - t.Fatalf("invalid xattr size for file %q, value %q got %d; want %d", - name, value, nv, len(value)) - } - if string(v) != value { - t.Errorf("node %q has an invalid xattr %q; want %q", entry, v, value) - return - } - } -} - -func hasEntry(t *testing.T, name string, ents fusefs.DirStream) (fuse.DirEntry, bool) { - for ents.HasNext() { - de, errno := ents.Next() - if errno != 0 { - t.Fatalf("faield to read entries for %q", name) - } - if de.Name == name { - return de, true - } - } - return fuse.DirEntry{}, false -} - -func hasStateFile(t *testing.T, id string) check { - return func(t *testing.T, root *node) { - - // Check the state dir is hidden on OpenDir for "/" - ents, errno := root.Readdir(context.Background()) - if errno != 0 { - t.Errorf("failed to open root directory: %v", errno) - return - } - if _, ok := hasEntry(t, stateDirName, ents); ok { - t.Errorf("state direntry %q should not be listed", stateDirName) - return - } - - // Check existence of state dir - var eo fuse.EntryOut - sti, errno := root.Lookup(context.Background(), stateDirName, &eo) - if errno != 0 { - t.Errorf("failed to lookup directory %q: %v", stateDirName, errno) - return - } - st, ok := sti.Operations().(*state) - if !ok { - t.Errorf("directory %q isn't a state node", stateDirName) - return - } - - // Check existence of state file - ents, errno = st.Readdir(context.Background()) - if errno != 0 { - t.Errorf("failed to open directory %q: %v", stateDirName, errno) - return - } - if _, ok := hasEntry(t, id, ents); !ok { - t.Errorf("direntry %q not found in %q", id, stateDirName) - return - } - inode, errno := st.Lookup(context.Background(), id, &eo) - if errno != 0 { - t.Errorf("failed to lookup node %q in %q: %v", id, stateDirName, errno) - return - } - n, ok := inode.Operations().(*statFile) - if !ok { - t.Errorf("entry %q isn't a normal node", id) - return - } - - // wanted data - rand.Seed(time.Now().UnixNano()) - wantErr := fmt.Errorf("test-%d", rand.Int63()) - - // report the data - root.s.report(wantErr) - - // obtain file size (check later) - var ao fuse.AttrOut - errno = n.Operations().(fusefs.NodeGetattrer).Getattr(context.Background(), nil, &ao) - if errno != 0 { - t.Errorf("failed to get attr of state file: %v", errno) - return - } - attr := ao.Attr - - // get data via state file - tmp := make([]byte, 4096) - res, errno := n.Read(context.Background(), nil, tmp, 0) - if errno != 0 { - t.Errorf("failed to read state file: %v", errno) - return - } - gotState, status := res.Bytes(nil) - if status != fuse.OK { - t.Errorf("failed to get result bytes of state file: %v", errno) - return - } - if attr.Size != uint64(len(string(gotState))) { - t.Errorf("size %d; want %d", attr.Size, len(string(gotState))) - return - } - - var j statJSON - if err := json.Unmarshal(gotState, &j); err != nil { - t.Errorf("failed to unmarshal %q: %v", string(gotState), err) - return - } - if wantErr.Error() != j.Error { - t.Errorf("expected error %q, got %q", wantErr.Error(), j.Error) - return - } - } -} - -// getDirentAndNode gets dirent and node at the specified path at once and makes -// sure that the both of them exist. -func getDirentAndNode(t *testing.T, root *node, path string) (ent fuse.DirEntry, n *fusefs.Inode, err error) { - dir, base := filepath.Split(filepath.Clean(path)) - - // get the target's parent directory. - var eo fuse.EntryOut - d := root - for _, name := range strings.Split(dir, "/") { - if len(name) == 0 { - continue - } - di, errno := d.Lookup(context.Background(), name, &eo) - if errno != 0 { - err = fmt.Errorf("failed to lookup directory %q: %v", name, errno) - return - } - var ok bool - if d, ok = di.Operations().(*node); !ok { - err = fmt.Errorf("directory %q isn't a normal node", name) - return - } - - } - - // get the target's direntry. - ents, errno := d.Readdir(context.Background()) - if errno != 0 { - err = fmt.Errorf("failed to open directory %q: %v", path, errno) - } - ent, ok := hasEntry(t, base, ents) - if !ok { - err = fmt.Errorf("direntry %q not found in the parent directory of %q", base, path) - } - - // get the target's node. - n, errno = d.Lookup(context.Background(), base, &eo) - if errno != 0 { - err = fmt.Errorf("failed to lookup node %q: %v", path, errno) - } - - return -} - -func digestFor(content string) string { - sum := sha256.Sum256([]byte(content)) - return fmt.Sprintf("sha256:%x", sum) -} - -// suid, guid, sticky bits for archive/tar -// https://github.com/golang/go/blob/release-branch.go1.13/src/archive/tar/common.go#L607-L609 -const ( - cISUID = 04000 // Set uid - cISGID = 02000 // Set gid - cISVTX = 01000 // Save text (sticky bit) -) - -func extraModeToTarMode(fm os.FileMode) (tm int64) { - if fm&os.ModeSetuid != 0 { - tm |= cISUID - } - if fm&os.ModeSetgid != 0 { - tm |= cISGID - } - if fm&os.ModeSticky != 0 { - tm |= cISVTX - } - return -} diff --git a/fs/layer/layer.go b/fs/layer/layer.go index db007f090..528681148 100644 --- a/fs/layer/layer.go +++ b/fs/layer/layer.go @@ -43,6 +43,7 @@ import ( "github.com/containerd/stargz-snapshotter/task" "github.com/containerd/stargz-snapshotter/util/lrucache" "github.com/golang/groupcache/lru" + fusefs "github.com/hanwen/go-fuse/v2/fs" digest "github.com/opencontainers/go-digest" ocispec "github.com/opencontainers/image-spec/specs-go/v1" "github.com/pkg/errors" @@ -59,12 +60,11 @@ const ( // Layer represents a layer. type Layer interface { - // Info returns the information of this layer. Info() Info - // Root returns the root node of this layer. - Root() *estargz.TOCEntry + // RootNode returns the root node of this layer. + RootNode() (fusefs.InodeEmbedder, error) // Check checks if the layer is still connectable. Check() error @@ -78,10 +78,6 @@ type Layer interface { // SkipVerify skips verification for this layer. SkipVerify() - // OpenFile opens a file. - // Calling this function before calling Verify or SkipVerify will fail. - OpenFile(name string) (io.ReaderAt, error) - // Prefetch prefetches the specified size. If the layer is eStargz and contains landmark files, // the range indicated by these files is respected. // Calling this function before calling Verify or SkipVerify will fail. @@ -216,13 +212,13 @@ func (r *Resolver) Resolve(ctx context.Context, hosts docker.RegistryHosts, refs defer r.backgroundTaskManager.DonePrioritizedTask() return blobR.ReadAt(p, offset) }), 0, blobR.Size()) - vr, root, err := reader.NewReader(sr, r.fsCache) + vr, err := reader.NewReader(sr, r.fsCache) if err != nil { return nil, errors.Wrap(err, "failed to read layer") } // Combine layer information together - l := newLayer(r, desc, blobR, vr, root) + l := newLayer(r, desc, blobR, vr) r.layerCacheMu.Lock() r.layerCache.Add(name, l) r.layerCacheMu.Unlock() @@ -279,14 +275,12 @@ func newLayer( desc ocispec.Descriptor, blob remote.Blob, vr *reader.VerifiableReader, - root *estargz.TOCEntry, ) *layer { return &layer{ resolver: resolver, desc: desc, blob: blob, verifiableReader: vr, - root: root, prefetchWaiter: newWaiter(), } } @@ -296,7 +290,6 @@ type layer struct { desc ocispec.Descriptor blob remote.Blob verifiableReader *reader.VerifiableReader - root *estargz.TOCEntry prefetchWaiter *waiter r reader.Reader @@ -310,10 +303,6 @@ func (l *layer) Info() Info { } } -func (l *layer) Root() *estargz.TOCEntry { - return l.root -} - func (l *layer) Check() error { return l.blob.Check() } @@ -331,13 +320,6 @@ func (l *layer) SkipVerify() { l.r = l.verifiableReader.SkipVerify() } -func (l *layer) OpenFile(name string) (io.ReaderAt, error) { - if l.r == nil { - return nil, fmt.Errorf("layer hasn't been verified yet") - } - return l.r.OpenFile(name) -} - func (l *layer) Prefetch(prefetchSize int64) error { defer l.prefetchWaiter.done() // Notify the completion @@ -397,6 +379,13 @@ func (l *layer) BackgroundFetch() error { ) } +func (l *layer) RootNode() (fusefs.InodeEmbedder, error) { + if l.r == nil { + return nil, fmt.Errorf("layer hasn't been verified yet") + } + return newNode(l.desc.Digest, l.r, l.blob) +} + func newWaiter() *waiter { return &waiter{ completionCond: sync.NewCond(&sync.Mutex{}), diff --git a/fs/layer/layer_test.go b/fs/layer/layer_test.go index 108e9b309..66cee536c 100644 --- a/fs/layer/layer_test.go +++ b/fs/layer/layer_test.go @@ -23,7 +23,6 @@ package layer import ( - "bytes" "context" "io" "io/ioutil" @@ -62,9 +61,6 @@ func TestPrefetch(t *testing.T) { } return defaultPrefetchSize } - defaultPrefetchPosition := func(t *testing.T, l *layer) int64 { - return l.Info().Size - } tests := []struct { name string in []testutil.TarEntry @@ -72,19 +68,7 @@ func TestPrefetch(t *testing.T) { wants []string // filenames to compare prefetchSize func(*testing.T, *layer) int64 prioritizedFiles []string - stargz bool }{ - { - name: "default_prefetch", - in: []testutil.TarEntry{ - testutil.File("foo.txt", sampleData1), - }, - wantNum: chunkNum(sampleData1), - wants: []string{"foo.txt"}, - prefetchSize: defaultPrefetchPosition, - prioritizedFiles: nil, - stargz: true, - }, { name: "no_prefetch", in: []testutil.TarEntry{ @@ -121,13 +105,17 @@ func TestPrefetch(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - sr, dgst := buildStargz(t, tt.in, - chunkSizeInfo(sampleChunkSize), - prioritizedFilesInfo(tt.prioritizedFiles), - stargzOnlyInfo(tt.stargz)) + sr, dgst, err := testutil.BuildEStargz(tt.in, + testutil.WithEStargzOptions( + estargz.WithChunkSize(sampleChunkSize), + estargz.WithPrioritizedFiles(tt.prioritizedFiles), + )) + if err != nil { + t.Fatalf("failed to build eStargz: %v", err) + } blob := newBlob(sr) mcache := cache.NewMemoryCache() - vr, _, err := reader.NewReader(sr, mcache) + vr, err := reader.NewReader(sr, mcache) if err != nil { t.Fatalf("failed to make stargz reader: %v", err) } @@ -138,11 +126,8 @@ func TestPrefetch(t *testing.T) { ocispec.Descriptor{Digest: testStateLayerDigest}, blob, vr, - nil, ) - if tt.stargz { - l.SkipVerify() - } else if err := l.Verify(dgst); err != nil { + if err := l.Verify(dgst); err != nil { t.Errorf("failed to verify reader: %v", err) return } @@ -253,62 +238,3 @@ func TestWaiter(t *testing.T) { t.Errorf("wait time is too short: %v; want %v", doneTime.Sub(startTime), waitTime) } } - -type chunkSizeInfo int -type prioritizedFilesInfo []string -type stargzOnlyInfo bool - -func buildStargz(t *testing.T, ents []testutil.TarEntry, opts ...interface{}) (*io.SectionReader, digest.Digest) { - var chunkSize chunkSizeInfo - var prioritizedFiles prioritizedFilesInfo - var stargzOnly bool - for _, opt := range opts { - if v, ok := opt.(chunkSizeInfo); ok { - chunkSize = v - } else if v, ok := opt.(prioritizedFilesInfo); ok { - prioritizedFiles = v - } else if v, ok := opt.(stargzOnlyInfo); ok { - stargzOnly = bool(v) - } else { - t.Fatalf("unsupported opt") - } - } - - tarBuf := new(bytes.Buffer) - if _, err := io.Copy(tarBuf, testutil.BuildTar(ents)); err != nil { - t.Fatalf("failed to build tar: %v", err) - } - tarData := tarBuf.Bytes() - - if stargzOnly { - stargzBuf := new(bytes.Buffer) - w := estargz.NewWriter(stargzBuf) - if chunkSize > 0 { - w.ChunkSize = int(chunkSize) - } - if err := w.AppendTar(bytes.NewReader(tarData)); err != nil { - t.Fatalf("failed to append tar file to stargz: %q", err) - } - if _, err := w.Close(); err != nil { - t.Fatalf("failed to close stargz writer: %q", err) - } - stargzData := stargzBuf.Bytes() - return io.NewSectionReader(bytes.NewReader(stargzData), 0, int64(len(stargzData))), "" - } - rc, err := estargz.Build( - io.NewSectionReader(bytes.NewReader(tarData), 0, int64(len(tarData))), - estargz.WithPrioritizedFiles([]string(prioritizedFiles)), - estargz.WithChunkSize(int(chunkSize)), - ) - if err != nil { - t.Fatalf("failed to build verifiable stargz: %v", err) - } - defer rc.Close() - vsb := new(bytes.Buffer) - if _, err := io.Copy(vsb, rc); err != nil { - t.Fatalf("failed to copy built stargz blob: %v", err) - } - vsbb := vsb.Bytes() - - return io.NewSectionReader(bytes.NewReader(vsbb), 0, int64(len(vsbb))), rc.TOCDigest() -} diff --git a/fs/layer/node.go b/fs/layer/node.go new file mode 100644 index 000000000..58cf04716 --- /dev/null +++ b/fs/layer/node.go @@ -0,0 +1,628 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* + Copyright 2019 The Go Authors. All rights reserved. + Use of this source code is governed by a BSD-style + license that can be found in the NOTICE.md file. +*/ + +package layer + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "os" + "sort" + "strings" + "sync" + "syscall" + "unsafe" + + "github.com/containerd/stargz-snapshotter/estargz" + "github.com/containerd/stargz-snapshotter/fs/reader" + "github.com/containerd/stargz-snapshotter/fs/remote" + fusefs "github.com/hanwen/go-fuse/v2/fs" + "github.com/hanwen/go-fuse/v2/fuse" + digest "github.com/opencontainers/go-digest" + "golang.org/x/sys/unix" +) + +const ( + blockSize = 4096 + whiteoutPrefix = ".wh." + whiteoutOpaqueDir = whiteoutPrefix + whiteoutPrefix + ".opq" + opaqueXattrValue = "y" + stateDirName = ".stargz-snapshotter" + statFileMode = syscall.S_IFREG | 0400 // -r-------- + stateDirMode = syscall.S_IFDIR | 0500 // dr-x------ +) + +var opaqueXattrs = []string{"trusted.overlay.opaque", "user.overlay.opaque"} + +func newNode(layerDgst digest.Digest, r reader.Reader, blob remote.Blob) (fusefs.InodeEmbedder, error) { + root, ok := r.Lookup("") + if !ok { + return nil, fmt.Errorf("failed to get a TOCEntry of the root") + } + return &node{ + r: r, + e: root, + s: newState(layerDgst, blob), + }, nil +} + +// node is a filesystem inode abstraction. +type node struct { + fusefs.Inode + r reader.Reader + e *estargz.TOCEntry + s *state + opaque bool // true if this node is an overlayfs opaque directory +} + +var _ = (fusefs.InodeEmbedder)((*node)(nil)) + +var _ = (fusefs.NodeReaddirer)((*node)(nil)) + +func (n *node) Readdir(ctx context.Context) (fusefs.DirStream, syscall.Errno) { + var ents []fuse.DirEntry + whiteouts := map[string]*estargz.TOCEntry{} + normalEnts := map[string]bool{} + n.e.ForeachChild(func(baseName string, ent *estargz.TOCEntry) bool { + + // We don't want to show prefetch landmarks in "/". + if n.e.Name == "" && (baseName == estargz.PrefetchLandmark || baseName == estargz.NoPrefetchLandmark) { + return true + } + + // We don't want to show whiteouts. + if strings.HasPrefix(baseName, whiteoutPrefix) { + if baseName == whiteoutOpaqueDir { + return true + } + // Add the overlayfs-compiant whiteout later. + whiteouts[baseName] = ent + return true + } + + // This is a normal entry. + normalEnts[baseName] = true + ents = append(ents, fuse.DirEntry{ + Mode: modeOfEntry(ent), + Name: baseName, + Ino: inodeOfEnt(ent), + }) + return true + }) + + // Append whiteouts if no entry replaces the target entry in the lower layer. + for w, ent := range whiteouts { + if !normalEnts[w[len(whiteoutPrefix):]] { + ents = append(ents, fuse.DirEntry{ + Mode: syscall.S_IFCHR, + Name: w[len(whiteoutPrefix):], + Ino: inodeOfEnt(ent), + }) + + } + } + + // Avoid undeterministic order of entries on each call + sort.Slice(ents, func(i, j int) bool { + return ents[i].Name < ents[j].Name + }) + + return fusefs.NewListDirStream(ents), 0 +} + +var _ = (fusefs.NodeLookuper)((*node)(nil)) + +func (n *node) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fusefs.Inode, syscall.Errno) { + // We don't want to show prefetch landmarks in "/". + if n.e.Name == "" && (name == estargz.PrefetchLandmark || name == estargz.NoPrefetchLandmark) { + return nil, syscall.ENOENT + } + + // We don't want to show whiteouts. + if strings.HasPrefix(name, whiteoutPrefix) { + return nil, syscall.ENOENT + } + + // state directory + if n.e.Name == "" && name == stateDirName { + return n.NewInode(ctx, n.s, stateToAttr(n.s, &out.Attr)), 0 + } + + // lookup stargz TOCEntry + ce, ok := n.e.LookupChild(name) + if !ok { + // If the entry exists as a whiteout, show an overlayfs-styled whiteout node. + if wh, ok := n.e.LookupChild(fmt.Sprintf("%s%s", whiteoutPrefix, name)); ok { + return n.NewInode(ctx, &whiteout{ + e: wh, + }, entryToWhAttr(wh, &out.Attr)), 0 + } + return nil, syscall.ENOENT + } + var opaque bool + if _, ok := ce.LookupChild(whiteoutOpaqueDir); ok { + // This entry is an opaque directory so make it recognizable for overlayfs. + opaque = true + } + + return n.NewInode(ctx, &node{ + r: n.r, + e: ce, + s: n.s, + opaque: opaque, + }, entryToAttr(ce, &out.Attr)), 0 +} + +var _ = (fusefs.NodeOpener)((*node)(nil)) + +func (n *node) Open(ctx context.Context, flags uint32) (fh fusefs.FileHandle, fuseFlags uint32, errno syscall.Errno) { + ra, err := n.r.OpenFile(n.e.Name) + if err != nil { + n.s.report(fmt.Errorf("failed to open node: %v", err)) + return nil, 0, syscall.EIO + } + return &file{ + n: n, + e: n.e, + ra: ra, + }, 0, 0 +} + +var _ = (fusefs.NodeGetattrer)((*node)(nil)) + +func (n *node) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno { + entryToAttr(n.e, &out.Attr) + return 0 +} + +var _ = (fusefs.NodeGetxattrer)((*node)(nil)) + +func (n *node) Getxattr(ctx context.Context, attr string, dest []byte) (uint32, syscall.Errno) { + for _, opaqueXattr := range opaqueXattrs { + if attr == opaqueXattr && n.opaque { + // This node is an opaque directory so give overlayfs-compliant indicator. + if len(dest) < len(opaqueXattrValue) { + return uint32(len(opaqueXattrValue)), syscall.ERANGE + } + return uint32(copy(dest, opaqueXattrValue)), 0 + } + } + if v, ok := n.e.Xattrs[attr]; ok { + if len(dest) < len(v) { + return uint32(len(v)), syscall.ERANGE + } + return uint32(copy(dest, v)), 0 + } + return 0, syscall.ENODATA +} + +var _ = (fusefs.NodeListxattrer)((*node)(nil)) + +func (n *node) Listxattr(ctx context.Context, dest []byte) (uint32, syscall.Errno) { + var attrs []byte + if n.opaque { + // This node is an opaque directory so add overlayfs-compliant indicator. + for _, opaqueXattr := range opaqueXattrs { + attrs = append(attrs, []byte(opaqueXattr+"\x00")...) + } + } + for k := range n.e.Xattrs { + attrs = append(attrs, []byte(k+"\x00")...) + } + if len(dest) < len(attrs) { + return uint32(len(attrs)), syscall.ERANGE + } + return uint32(copy(dest, attrs)), 0 +} + +var _ = (fusefs.NodeReadlinker)((*node)(nil)) + +func (n *node) Readlink(ctx context.Context) ([]byte, syscall.Errno) { + return []byte(n.e.LinkName), 0 +} + +var _ = (fusefs.NodeStatfser)((*node)(nil)) + +func (n *node) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno { + defaultStatfs(out) + return 0 +} + +// file is a file abstraction which implements file handle in go-fuse. +type file struct { + n *node + e *estargz.TOCEntry + ra io.ReaderAt +} + +var _ = (fusefs.FileReader)((*file)(nil)) + +func (f *file) Read(ctx context.Context, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) { + n, err := f.ra.ReadAt(dest, off) + if err != nil && err != io.EOF { + f.n.s.report(fmt.Errorf("failed to read node: %v", err)) + return nil, syscall.EIO + } + return fuse.ReadResultData(dest[:n]), 0 +} + +var _ = (fusefs.FileGetattrer)((*file)(nil)) + +func (f *file) Getattr(ctx context.Context, out *fuse.AttrOut) syscall.Errno { + entryToAttr(f.e, &out.Attr) + return 0 +} + +// whiteout is a whiteout abstraction compliant to overlayfs. +type whiteout struct { + fusefs.Inode + e *estargz.TOCEntry +} + +var _ = (fusefs.NodeGetattrer)((*whiteout)(nil)) + +func (w *whiteout) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno { + entryToWhAttr(w.e, &out.Attr) + return 0 +} + +var _ = (fusefs.NodeStatfser)((*whiteout)(nil)) + +func (w *whiteout) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno { + defaultStatfs(out) + return 0 +} + +// newState provides new state directory node. +// It creates statFile at the same time to give it stable inode number. +func newState(layerDigest digest.Digest, blob remote.Blob) *state { + return &state{ + statFile: &statFile{ + name: layerDigest.String() + ".json", + statJSON: statJSON{ + Digest: layerDigest.String(), + Size: blob.Size(), + }, + blob: blob, + }, + } +} + +// state is a directory which contain a "state file" of this layer aming to +// observability. This filesystem uses it to report something(e.g. error) to +// the clients(e.g. Kubernetes's livenessProbe). +// This directory has mode "dr-x------ root root". +type state struct { + fusefs.Inode + statFile *statFile +} + +var _ = (fusefs.NodeReaddirer)((*state)(nil)) + +func (s *state) Readdir(ctx context.Context) (fusefs.DirStream, syscall.Errno) { + return fusefs.NewListDirStream([]fuse.DirEntry{ + { + Mode: statFileMode, + Name: s.statFile.name, + Ino: inodeOfStatFile(s.statFile), + }, + }), 0 +} + +var _ = (fusefs.NodeLookuper)((*state)(nil)) + +func (s *state) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fusefs.Inode, syscall.Errno) { + if name != s.statFile.name { + return nil, syscall.ENOENT + } + attr, errno := s.statFile.attr(&out.Attr) + if errno != 0 { + return nil, errno + } + return s.NewInode(ctx, s.statFile, attr), 0 +} + +var _ = (fusefs.NodeGetattrer)((*state)(nil)) + +func (s *state) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno { + stateToAttr(s, &out.Attr) + return 0 +} + +var _ = (fusefs.NodeStatfser)((*state)(nil)) + +func (s *state) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno { + defaultStatfs(out) + return 0 +} + +func (s *state) report(err error) { + s.statFile.report(err) +} + +type statJSON struct { + Error string `json:"error,omitempty"` + Digest string `json:"digest"` + // URL is excluded for potential security reason + Size int64 `json:"size"` + FetchedSize int64 `json:"fetchedSize"` + FetchedPercent float64 `json:"fetchedPercent"` // Fetched / Size * 100.0 +} + +// statFile is a file which contain something to be reported from this layer. +// This filesystem uses statFile.report() to report something(e.g. error) to +// the clients(e.g. Kubernetes's livenessProbe). +// This file has mode "-r-------- root root". +type statFile struct { + fusefs.Inode + name string + blob remote.Blob + statJSON statJSON + mu sync.Mutex +} + +var _ = (fusefs.NodeOpener)((*statFile)(nil)) + +func (sf *statFile) Open(ctx context.Context, flags uint32) (fh fusefs.FileHandle, fuseFlags uint32, errno syscall.Errno) { + return nil, 0, 0 +} + +var _ = (fusefs.NodeReader)((*statFile)(nil)) + +func (sf *statFile) Read(ctx context.Context, f fusefs.FileHandle, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) { + sf.mu.Lock() + defer sf.mu.Unlock() + st, err := sf.updateStatUnlocked() + if err != nil { + return nil, syscall.EIO + } + n, err := bytes.NewReader(st).ReadAt(dest, off) + if err != nil && err != io.EOF { + return nil, syscall.EIO + } + return fuse.ReadResultData(dest[:n]), 0 +} + +var _ = (fusefs.NodeGetattrer)((*statFile)(nil)) + +func (sf *statFile) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno { + _, errno := sf.attr(&out.Attr) + return errno +} + +var _ = (fusefs.NodeStatfser)((*statFile)(nil)) + +func (sf *statFile) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno { + defaultStatfs(out) + return 0 +} + +func (sf *statFile) report(err error) { + sf.mu.Lock() + defer sf.mu.Unlock() + sf.statJSON.Error = err.Error() +} + +func (sf *statFile) attr(out *fuse.Attr) (fusefs.StableAttr, syscall.Errno) { + sf.mu.Lock() + defer sf.mu.Unlock() + + st, err := sf.updateStatUnlocked() + if err != nil { + return fusefs.StableAttr{}, syscall.EIO + } + + return statFileToAttr(sf, uint64(len(st)), out), 0 +} + +func (sf *statFile) updateStatUnlocked() ([]byte, error) { + sf.statJSON.FetchedSize = sf.blob.FetchedSize() + sf.statJSON.FetchedPercent = float64(sf.statJSON.FetchedSize) / float64(sf.statJSON.Size) * 100.0 + j, err := json.Marshal(&sf.statJSON) + if err != nil { + return nil, err + } + j = append(j, []byte("\n")...) + return j, nil +} + +// inodeOfEnt calculates the inode number which is one-to-one conresspondence +// with the TOCEntry insntance. +func inodeOfEnt(e *estargz.TOCEntry) uint64 { + return uint64(uintptr(unsafe.Pointer(e))) +} + +// entryToAttr converts stargz's TOCEntry to go-fuse's Attr. +func entryToAttr(e *estargz.TOCEntry, out *fuse.Attr) fusefs.StableAttr { + out.Ino = inodeOfEnt(e) + out.Size = uint64(e.Size) + out.Blksize = blockSize + out.Blocks = out.Size / uint64(out.Blksize) + if out.Size%uint64(out.Blksize) > 0 { + out.Blocks++ + } + mtime := e.ModTime() + out.SetTimes(nil, &mtime, nil) + out.Mode = modeOfEntry(e) + out.Owner = fuse.Owner{Uid: uint32(e.UID), Gid: uint32(e.GID)} + out.Rdev = uint32(unix.Mkdev(uint32(e.DevMajor), uint32(e.DevMinor))) + out.Nlink = uint32(e.NumLink) + if out.Nlink == 0 { + out.Nlink = 1 // zero "NumLink" means one. + } + out.Padding = 0 // TODO + + return fusefs.StableAttr{ + Mode: out.Mode, + Ino: out.Ino, + // NOTE: The inode number is unique throughout the lifettime of + // this filesystem so we don't consider about generation at this + // moment. + } +} + +// entryToWhAttr converts stargz's TOCEntry to go-fuse's Attr of whiteouts. +func entryToWhAttr(e *estargz.TOCEntry, out *fuse.Attr) fusefs.StableAttr { + fi := e.Stat() + out.Ino = inodeOfEnt(e) + out.Size = 0 + out.Blksize = blockSize + out.Blocks = 0 + mtime := fi.ModTime() + out.SetTimes(nil, &mtime, nil) + out.Mode = syscall.S_IFCHR + out.Owner = fuse.Owner{Uid: 0, Gid: 0} + out.Rdev = uint32(unix.Mkdev(0, 0)) + out.Nlink = 1 + out.Padding = 0 // TODO + + return fusefs.StableAttr{ + Mode: out.Mode, + Ino: out.Ino, + // NOTE: The inode number is unique throughout the lifettime of + // this filesystem so we don't consider about generation at this + // moment. + } +} + +// inodeOfState calculates the inode number which is one-to-one conresspondence +// with the state directory insntance which was created on mount. +func inodeOfState(s *state) uint64 { + return uint64(uintptr(unsafe.Pointer(s))) +} + +// stateToAttr converts state directory to go-fuse's Attr. +func stateToAttr(s *state, out *fuse.Attr) fusefs.StableAttr { + out.Ino = inodeOfState(s) + out.Size = 0 + out.Blksize = blockSize + out.Blocks = 0 + out.Nlink = 1 + + // root can read and open it (dr-x------ root root). + out.Mode = stateDirMode + out.Owner = fuse.Owner{Uid: 0, Gid: 0} + + // dummy + out.Mtime = 0 + out.Mtimensec = 0 + out.Rdev = 0 + out.Padding = 0 + + return fusefs.StableAttr{ + Mode: out.Mode, + Ino: out.Ino, + // NOTE: The inode number is unique throughout the lifettime of + // this filesystem so we don't consider about generation at this + // moment. + } +} + +// inodeOfStatFile calculates the inode number which is one-to-one conresspondence +// with the stat file insntance which was created on mount. +func inodeOfStatFile(s *statFile) uint64 { + return uint64(uintptr(unsafe.Pointer(s))) +} + +// statFileToAttr converts stat file to go-fuse's Attr. +func statFileToAttr(sf *statFile, size uint64, out *fuse.Attr) fusefs.StableAttr { + out.Ino = inodeOfStatFile(sf) + out.Size = size + out.Blksize = blockSize + out.Blocks = out.Size / uint64(out.Blksize) + out.Nlink = 1 + + // Root can read it ("-r-------- root root"). + out.Mode = statFileMode + out.Owner = fuse.Owner{Uid: 0, Gid: 0} + + // dummy + out.Mtime = 0 + out.Mtimensec = 0 + out.Rdev = 0 + out.Padding = 0 + + return fusefs.StableAttr{ + Mode: out.Mode, + Ino: out.Ino, + // NOTE: The inode number is unique throughout the lifettime of + // this filesystem so we don't consider about generation at this + // moment. + } +} + +// modeOfEntry gets system's mode bits from TOCEntry +func modeOfEntry(e *estargz.TOCEntry) uint32 { + m := e.Stat().Mode() + + // Permission bits + res := uint32(m & os.ModePerm) + + // File type bits + switch m & os.ModeType { + case os.ModeDevice: + res |= syscall.S_IFBLK + case os.ModeDevice | os.ModeCharDevice: + res |= syscall.S_IFCHR + case os.ModeDir: + res |= syscall.S_IFDIR + case os.ModeNamedPipe: + res |= syscall.S_IFIFO + case os.ModeSymlink: + res |= syscall.S_IFLNK + case os.ModeSocket: + res |= syscall.S_IFSOCK + default: // regular file. + res |= syscall.S_IFREG + } + + // suid, sgid, sticky bits + if m&os.ModeSetuid != 0 { + res |= syscall.S_ISUID + } + if m&os.ModeSetgid != 0 { + res |= syscall.S_ISGID + } + if m&os.ModeSticky != 0 { + res |= syscall.S_ISVTX + } + + return res +} + +func defaultStatfs(stat *fuse.StatfsOut) { + + // http://man7.org/linux/man-pages/man2/statfs.2.html + stat.Blocks = 0 // dummy + stat.Bfree = 0 + stat.Bavail = 0 + stat.Files = 0 // dummy + stat.Ffree = 0 + stat.Bsize = blockSize + stat.NameLen = 1<<32 - 1 + stat.Frsize = blockSize + stat.Padding = 0 + stat.Spare = [6]uint32{} +} diff --git a/fs/layer/node_test.go b/fs/layer/node_test.go new file mode 100644 index 000000000..3b8c4bcab --- /dev/null +++ b/fs/layer/node_test.go @@ -0,0 +1,636 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* + Copyright 2019 The Go Authors. All rights reserved. + Use of this source code is governed by a BSD-style + license that can be found in the NOTICE.md file. +*/ + +package layer + +import ( + "bytes" + "context" + "crypto/sha256" + "encoding/json" + "fmt" + "io" + "math/rand" + "os" + "path/filepath" + "strings" + "syscall" + "testing" + "time" + + "github.com/containerd/containerd/reference" + "github.com/containerd/containerd/remotes/docker" + "github.com/containerd/stargz-snapshotter/estargz" + "github.com/containerd/stargz-snapshotter/fs/reader" + "github.com/containerd/stargz-snapshotter/fs/remote" + "github.com/containerd/stargz-snapshotter/util/testutil" + fusefs "github.com/hanwen/go-fuse/v2/fs" + "github.com/hanwen/go-fuse/v2/fuse" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "golang.org/x/sys/unix" +) + +const ( + sampleMiddleOffset = sampleChunkSize / 2 + lastChunkOffset1 = sampleChunkSize * (int64(len(sampleData1)) / sampleChunkSize) +) + +// Tests Read method of each file node. +func TestNodeRead(t *testing.T) { + sizeCond := map[string]int64{ + "single_chunk": sampleChunkSize - sampleMiddleOffset, + "multi_chunks": sampleChunkSize + sampleMiddleOffset, + } + innerOffsetCond := map[string]int64{ + "at_top": 0, + "at_middle": sampleMiddleOffset, + } + baseOffsetCond := map[string]int64{ + "of_1st_chunk": sampleChunkSize * 0, + "of_2nd_chunk": sampleChunkSize * 1, + "of_last_chunk": lastChunkOffset1, + } + fileSizeCond := map[string]int64{ + "in_1_chunk_file": sampleChunkSize * 1, + "in_2_chunks_file": sampleChunkSize * 2, + "in_max_size_file": int64(len(sampleData1)), + } + for sn, size := range sizeCond { + for in, innero := range innerOffsetCond { + for bo, baseo := range baseOffsetCond { + for fn, filesize := range fileSizeCond { + t.Run(fmt.Sprintf("reading_%s_%s_%s_%s", sn, in, bo, fn), func(t *testing.T) { + if filesize > int64(len(sampleData1)) { + t.Fatal("sample file size is larger than sample data") + } + + wantN := size + offset := baseo + innero + if remain := filesize - offset; remain < wantN { + if wantN = remain; wantN < 0 { + wantN = 0 + } + } + + // use constant string value as a data source. + want := strings.NewReader(sampleData1) + + // data we want to get. + wantData := make([]byte, wantN) + _, err := want.ReadAt(wantData, offset) + if err != nil && err != io.EOF { + t.Fatalf("want.ReadAt (offset=%d,size=%d): %v", offset, wantN, err) + } + + // data we get from the file node. + f := makeNodeReader(t, []byte(sampleData1)[:filesize], sampleChunkSize) + tmpbuf := make([]byte, size) // fuse library can request bigger than remain + rr, errno := f.Read(context.Background(), tmpbuf, offset) + if errno != 0 { + t.Errorf("failed to read off=%d, size=%d, filesize=%d: %v", offset, size, filesize, err) + return + } + if rsize := rr.Size(); int64(rsize) != wantN { + t.Errorf("read size: %d; want: %d", rsize, wantN) + return + } + tmpbuf = make([]byte, len(tmpbuf)) + respData, fs := rr.Bytes(tmpbuf) + if fs != fuse.OK { + t.Errorf("failed to read result data for off=%d, size=%d, filesize=%d: %v", offset, size, filesize, err) + } + + if !bytes.Equal(wantData, respData) { + t.Errorf("off=%d, filesize=%d; read data{size=%d,data=%q}; want (size=%d,data=%q)", + offset, filesize, len(respData), string(respData), wantN, string(wantData)) + return + } + }) + } + } + } + } +} + +func makeNodeReader(t *testing.T, contents []byte, chunkSize int) *file { + testName := "test" + sgz, _, err := testutil.BuildEStargz( + []testutil.TarEntry{testutil.File(testName, string(contents))}, + testutil.WithEStargzOptions(estargz.WithChunkSize(chunkSize)), + ) + if err != nil { + t.Fatalf("failed to build sample eStargz: %v", err) + } + r, err := estargz.Open(sgz) + if err != nil { + t.Fatal("failed to make stargz") + } + rootNode := getRootNode(t, r) + var eo fuse.EntryOut + inode, errno := rootNode.Lookup(context.Background(), testName, &eo) + if errno != 0 { + t.Fatalf("failed to lookup test node; errno: %v", errno) + } + f, _, errno := inode.Operations().(fusefs.NodeOpener).Open(context.Background(), 0) + if errno != 0 { + t.Fatalf("failed to open test file; errno: %v", errno) + } + return f.(*file) +} + +func TestExistence(t *testing.T) { + tests := []struct { + name string + in []testutil.TarEntry + want []check + }{ + { + name: "1_whiteout_with_sibling", + in: []testutil.TarEntry{ + testutil.Dir("foo/"), + testutil.File("foo/bar.txt", ""), + testutil.File("foo/.wh.foo.txt", ""), + }, + want: []check{ + hasValidWhiteout("foo/foo.txt"), + fileNotExist("foo/.wh.foo.txt"), + }, + }, + { + name: "1_whiteout_with_duplicated_name", + in: []testutil.TarEntry{ + testutil.Dir("foo/"), + testutil.File("foo/bar.txt", "test"), + testutil.File("foo/.wh.bar.txt", ""), + }, + want: []check{ + hasFileDigest("foo/bar.txt", digestFor("test")), + fileNotExist("foo/.wh.bar.txt"), + }, + }, + { + name: "1_opaque", + in: []testutil.TarEntry{ + testutil.Dir("foo/"), + testutil.File("foo/.wh..wh..opq", ""), + }, + want: []check{ + hasNodeXattrs("foo/", opaqueXattrs[0], opaqueXattrValue), + hasNodeXattrs("foo/", opaqueXattrs[1], opaqueXattrValue), + fileNotExist("foo/.wh..wh..opq"), + }, + }, + { + name: "1_opaque_with_sibling", + in: []testutil.TarEntry{ + testutil.Dir("foo/"), + testutil.File("foo/.wh..wh..opq", ""), + testutil.File("foo/bar.txt", "test"), + }, + want: []check{ + hasNodeXattrs("foo/", opaqueXattrs[0], opaqueXattrValue), + hasNodeXattrs("foo/", opaqueXattrs[1], opaqueXattrValue), + hasFileDigest("foo/bar.txt", digestFor("test")), + fileNotExist("foo/.wh..wh..opq"), + }, + }, + { + name: "1_opaque_with_xattr", + in: []testutil.TarEntry{ + testutil.Dir("foo/", testutil.WithDirXattrs(map[string]string{"foo": "bar"})), + testutil.File("foo/.wh..wh..opq", ""), + }, + want: []check{ + hasNodeXattrs("foo/", opaqueXattrs[0], opaqueXattrValue), + hasNodeXattrs("foo/", opaqueXattrs[1], opaqueXattrValue), + hasNodeXattrs("foo/", "foo", "bar"), + fileNotExist("foo/.wh..wh..opq"), + }, + }, + { + name: "prefetch_landmark", + in: []testutil.TarEntry{ + testutil.File(estargz.PrefetchLandmark, "test"), + testutil.Dir("foo/"), + testutil.File(fmt.Sprintf("foo/%s", estargz.PrefetchLandmark), "test"), + }, + want: []check{ + fileNotExist(estargz.PrefetchLandmark), + hasFileDigest(fmt.Sprintf("foo/%s", estargz.PrefetchLandmark), digestFor("test")), + }, + }, + { + name: "no_prefetch_landmark", + in: []testutil.TarEntry{ + testutil.File(estargz.NoPrefetchLandmark, "test"), + testutil.Dir("foo/"), + testutil.File(fmt.Sprintf("foo/%s", estargz.NoPrefetchLandmark), "test"), + }, + want: []check{ + fileNotExist(estargz.NoPrefetchLandmark), + hasFileDigest(fmt.Sprintf("foo/%s", estargz.NoPrefetchLandmark), digestFor("test")), + }, + }, + { + name: "state_file", + in: []testutil.TarEntry{ + testutil.File("test", "test"), + }, + want: []check{ + hasFileDigest("test", digestFor("test")), + hasStateFile(t, testStateLayerDigest.String()+".json"), + }, + }, + { + name: "file_suid", + in: []testutil.TarEntry{ + testutil.File("test", "test", testutil.WithFileMode(0644|os.ModeSetuid)), + }, + want: []check{ + hasExtraMode("test", os.ModeSetuid), + }, + }, + { + name: "dir_sgid", + in: []testutil.TarEntry{ + testutil.Dir("test/", testutil.WithDirMode(0755|os.ModeSetgid)), + }, + want: []check{ + hasExtraMode("test/", os.ModeSetgid), + }, + }, + { + name: "file_sticky", + in: []testutil.TarEntry{ + testutil.File("test", "test", testutil.WithFileMode(0644|os.ModeSticky)), + }, + want: []check{ + hasExtraMode("test", os.ModeSticky), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sgz, _, err := testutil.BuildEStargz(tt.in) + if err != nil { + t.Fatalf("failed to build sample eStargz: %v", err) + } + r, err := estargz.Open(sgz) + if err != nil { + t.Fatalf("stargz.Open: %v", err) + } + rootNode := getRootNode(t, r) + for _, want := range tt.want { + want(t, rootNode) + } + }) + } +} + +func getRootNode(t *testing.T, r *estargz.Reader) *node { + rootNode, err := newNode(testStateLayerDigest, &testReader{r}, &testBlobState{10, 5}) + if err != nil { + t.Fatalf("failed to get root node: %v", err) + } + fusefs.NewNodeFS(rootNode, &fusefs.Options{}) // initializes root node + return rootNode.(*node) +} + +type testReader struct { + r *estargz.Reader +} + +func (tr *testReader) OpenFile(name string) (io.ReaderAt, error) { return tr.r.OpenFile(name) } +func (tr *testReader) Lookup(name string) (*estargz.TOCEntry, bool) { return tr.r.Lookup(name) } +func (tr *testReader) Cache(opts ...reader.CacheOption) error { return nil } + +type testBlobState struct { + size int64 + fetchedSize int64 +} + +func (tb *testBlobState) Check() error { return nil } +func (tb *testBlobState) Size() int64 { return tb.size } +func (tb *testBlobState) FetchedSize() int64 { return tb.fetchedSize } +func (tb *testBlobState) ReadAt(p []byte, offset int64, opts ...remote.Option) (int, error) { + return 0, nil +} +func (tb *testBlobState) Cache(offset int64, size int64, opts ...remote.Option) error { return nil } +func (tb *testBlobState) Refresh(ctx context.Context, host docker.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error { + return nil +} + +type check func(*testing.T, *node) + +func fileNotExist(file string) check { + return func(t *testing.T, root *node) { + if _, _, err := getDirentAndNode(t, root, file); err == nil { + t.Errorf("Node %q exists", file) + } + } +} + +func hasFileDigest(file string, digest string) check { + return func(t *testing.T, root *node) { + _, n, err := getDirentAndNode(t, root, file) + if err != nil { + t.Fatalf("failed to get node %q: %v", file, err) + } + if ndgst := n.Operations().(*node).e.Digest; ndgst != digest { + t.Fatalf("Digest(%q) = %q, want %q", file, ndgst, digest) + } + } +} + +func hasExtraMode(name string, mode os.FileMode) check { + return func(t *testing.T, root *node) { + _, n, err := getDirentAndNode(t, root, name) + if err != nil { + t.Fatalf("failed to get node %q: %v", name, err) + } + var ao fuse.AttrOut + if errno := n.Operations().(fusefs.NodeGetattrer).Getattr(context.Background(), nil, &ao); errno != 0 { + t.Fatalf("failed to get attributes of node %q: %v", name, errno) + } + a := ao.Attr + gotMode := a.Mode & (syscall.S_ISUID | syscall.S_ISGID | syscall.S_ISVTX) + wantMode := extraModeToTarMode(mode) + if gotMode != uint32(wantMode) { + t.Fatalf("got mode = %b, want %b", gotMode, wantMode) + } + } +} + +func hasValidWhiteout(name string) check { + return func(t *testing.T, root *node) { + ent, n, err := getDirentAndNode(t, root, name) + if err != nil { + t.Fatalf("failed to get node %q: %v", name, err) + } + var ao fuse.AttrOut + if errno := n.Operations().(fusefs.NodeGetattrer).Getattr(context.Background(), nil, &ao); errno != 0 { + t.Fatalf("failed to get attributes of file %q: %v", name, errno) + } + a := ao.Attr + if a.Ino != ent.Ino { + t.Errorf("inconsistent inodes %d(Node) != %d(Dirent)", a.Ino, ent.Ino) + return + } + + // validate the direntry + if ent.Mode != syscall.S_IFCHR { + t.Errorf("whiteout entry %q isn't a char device", name) + return + } + + // validate the node + if a.Mode != syscall.S_IFCHR { + t.Errorf("whiteout %q has an invalid mode %o; want %o", + name, a.Mode, syscall.S_IFCHR) + return + } + if a.Rdev != uint32(unix.Mkdev(0, 0)) { + t.Errorf("whiteout %q has invalid device numbers (%d, %d); want (0, 0)", + name, unix.Major(uint64(a.Rdev)), unix.Minor(uint64(a.Rdev))) + return + } + } +} + +func hasNodeXattrs(entry, name, value string) check { + return func(t *testing.T, root *node) { + _, n, err := getDirentAndNode(t, root, entry) + if err != nil { + t.Fatalf("failed to get node %q: %v", entry, err) + } + + // check xattr exists in the xattrs list. + buf := make([]byte, 1000) + nb, errno := n.Operations().(fusefs.NodeListxattrer).Listxattr(context.Background(), buf) + if errno != 0 { + t.Fatalf("failed to get xattrs list of node %q: %v", entry, err) + } + attrs := strings.Split(string(buf[:nb]), "\x00") + var found bool + for _, x := range attrs { + if x == name { + found = true + } + } + if !found { + t.Errorf("node %q doesn't have an opaque xattr %q", entry, value) + return + } + + // check the xattr has valid value. + v := make([]byte, len(value)) + nv, errno := n.Operations().(fusefs.NodeGetxattrer).Getxattr(context.Background(), name, v) + if errno != 0 { + t.Fatalf("failed to get xattr %q of node %q: %v", name, entry, err) + } + if int(nv) != len(value) { + t.Fatalf("invalid xattr size for file %q, value %q got %d; want %d", + name, value, nv, len(value)) + } + if string(v) != value { + t.Errorf("node %q has an invalid xattr %q; want %q", entry, v, value) + return + } + } +} + +func hasEntry(t *testing.T, name string, ents fusefs.DirStream) (fuse.DirEntry, bool) { + for ents.HasNext() { + de, errno := ents.Next() + if errno != 0 { + t.Fatalf("faield to read entries for %q", name) + } + if de.Name == name { + return de, true + } + } + return fuse.DirEntry{}, false +} + +func hasStateFile(t *testing.T, id string) check { + return func(t *testing.T, root *node) { + + // Check the state dir is hidden on OpenDir for "/" + ents, errno := root.Readdir(context.Background()) + if errno != 0 { + t.Errorf("failed to open root directory: %v", errno) + return + } + if _, ok := hasEntry(t, stateDirName, ents); ok { + t.Errorf("state direntry %q should not be listed", stateDirName) + return + } + + // Check existence of state dir + var eo fuse.EntryOut + sti, errno := root.Lookup(context.Background(), stateDirName, &eo) + if errno != 0 { + t.Errorf("failed to lookup directory %q: %v", stateDirName, errno) + return + } + st, ok := sti.Operations().(*state) + if !ok { + t.Errorf("directory %q isn't a state node", stateDirName) + return + } + + // Check existence of state file + ents, errno = st.Readdir(context.Background()) + if errno != 0 { + t.Errorf("failed to open directory %q: %v", stateDirName, errno) + return + } + if _, ok := hasEntry(t, id, ents); !ok { + t.Errorf("direntry %q not found in %q", id, stateDirName) + return + } + inode, errno := st.Lookup(context.Background(), id, &eo) + if errno != 0 { + t.Errorf("failed to lookup node %q in %q: %v", id, stateDirName, errno) + return + } + n, ok := inode.Operations().(*statFile) + if !ok { + t.Errorf("entry %q isn't a normal node", id) + return + } + + // wanted data + rand.Seed(time.Now().UnixNano()) + wantErr := fmt.Errorf("test-%d", rand.Int63()) + + // report the data + root.s.report(wantErr) + + // obtain file size (check later) + var ao fuse.AttrOut + errno = n.Operations().(fusefs.NodeGetattrer).Getattr(context.Background(), nil, &ao) + if errno != 0 { + t.Errorf("failed to get attr of state file: %v", errno) + return + } + attr := ao.Attr + + // get data via state file + tmp := make([]byte, 4096) + res, errno := n.Read(context.Background(), nil, tmp, 0) + if errno != 0 { + t.Errorf("failed to read state file: %v", errno) + return + } + gotState, status := res.Bytes(nil) + if status != fuse.OK { + t.Errorf("failed to get result bytes of state file: %v", errno) + return + } + if attr.Size != uint64(len(string(gotState))) { + t.Errorf("size %d; want %d", attr.Size, len(string(gotState))) + return + } + + var j statJSON + if err := json.Unmarshal(gotState, &j); err != nil { + t.Errorf("failed to unmarshal %q: %v", string(gotState), err) + return + } + if wantErr.Error() != j.Error { + t.Errorf("expected error %q, got %q", wantErr.Error(), j.Error) + return + } + } +} + +// getDirentAndNode gets dirent and node at the specified path at once and makes +// sure that the both of them exist. +func getDirentAndNode(t *testing.T, root *node, path string) (ent fuse.DirEntry, n *fusefs.Inode, err error) { + dir, base := filepath.Split(filepath.Clean(path)) + + // get the target's parent directory. + var eo fuse.EntryOut + d := root + for _, name := range strings.Split(dir, "/") { + if len(name) == 0 { + continue + } + di, errno := d.Lookup(context.Background(), name, &eo) + if errno != 0 { + err = fmt.Errorf("failed to lookup directory %q: %v", name, errno) + return + } + var ok bool + if d, ok = di.Operations().(*node); !ok { + err = fmt.Errorf("directory %q isn't a normal node", name) + return + } + + } + + // get the target's direntry. + ents, errno := d.Readdir(context.Background()) + if errno != 0 { + err = fmt.Errorf("failed to open directory %q: %v", path, errno) + } + ent, ok := hasEntry(t, base, ents) + if !ok { + err = fmt.Errorf("direntry %q not found in the parent directory of %q", base, path) + } + + // get the target's node. + n, errno = d.Lookup(context.Background(), base, &eo) + if errno != 0 { + err = fmt.Errorf("failed to lookup node %q: %v", path, errno) + } + + return +} + +func digestFor(content string) string { + sum := sha256.Sum256([]byte(content)) + return fmt.Sprintf("sha256:%x", sum) +} + +// suid, guid, sticky bits for archive/tar +// https://github.com/golang/go/blob/release-branch.go1.13/src/archive/tar/common.go#L607-L609 +const ( + cISUID = 04000 // Set uid + cISGID = 02000 // Set gid + cISVTX = 01000 // Save text (sticky bit) +) + +func extraModeToTarMode(fm os.FileMode) (tm int64) { + if fm&os.ModeSetuid != 0 { + tm |= cISUID + } + if fm&os.ModeSetgid != 0 { + tm |= cISGID + } + if fm&os.ModeSticky != 0 { + tm |= cISVTX + } + return +} diff --git a/fs/reader/reader.go b/fs/reader/reader.go index 9e7c09a68..d3b5627fd 100644 --- a/fs/reader/reader.go +++ b/fs/reader/reader.go @@ -87,15 +87,10 @@ func (nv nopVerifier) Verified() bool { // NewReader creates a Reader based on the given stargz blob and cache implementation. // It returns VerifiableReader so the caller must provide a estargz.TOCEntryVerifier // to use for verifying file or chunk contained in this stargz blob. -func NewReader(sr *io.SectionReader, cache cache.BlobCache) (*VerifiableReader, *estargz.TOCEntry, error) { +func NewReader(sr *io.SectionReader, cache cache.BlobCache) (*VerifiableReader, error) { r, err := estargz.Open(sr) if err != nil { - return nil, nil, errors.Wrap(err, "failed to parse stargz") - } - - root, ok := r.Lookup("") - if !ok { - return nil, nil, fmt.Errorf("failed to get a TOCEntry of the root") + return nil, errors.Wrap(err, "failed to parse stargz") } vr := &reader{ @@ -109,7 +104,7 @@ func NewReader(sr *io.SectionReader, cache cache.BlobCache) (*VerifiableReader, }, } - return &VerifiableReader{vr}, root, nil + return &VerifiableReader{vr}, nil } type reader struct { diff --git a/fs/reader/reader_test.go b/fs/reader/reader_test.go index 23dc375ec..0cacfc81c 100644 --- a/fs/reader/reader_test.go +++ b/fs/reader/reader_test.go @@ -45,9 +45,12 @@ const ( // Tests Reader for failure cases. func TestFailReader(t *testing.T) { testFileName := "test" - stargzFile, _ := buildStargz(t, []testutil.TarEntry{ + stargzFile, _, err := testutil.BuildEStargz([]testutil.TarEntry{ testutil.File(testFileName, sampleData1), - }, chunkSizeInfo(sampleChunkSize)) + }, testutil.WithEStargzOptions(estargz.WithChunkSize(sampleChunkSize))) + if err != nil { + t.Fatalf("failed to build sample estargz") + } br := &breakReaderAt{ ReaderAt: stargzFile, success: true, @@ -288,11 +291,14 @@ func (er *exceptSectionReader) ReadAt(p []byte, offset int64) (int, error) { return er.ra.ReadAt(p, offset) } -func makeFile(t *testing.T, contents []byte, chunkSize int64) *file { +func makeFile(t *testing.T, contents []byte, chunkSize int) *file { testName := "test" - sr, dgst := buildStargz(t, []testutil.TarEntry{ + sr, dgst, err := testutil.BuildEStargz([]testutil.TarEntry{ testutil.File(testName, string(contents)), - }, chunkSizeInfo(chunkSize)) + }, testutil.WithEStargzOptions(estargz.WithChunkSize(chunkSize))) + if err != nil { + t.Fatalf("failed to build sample estargz") + } sgz, err := estargz.Open(sr) if err != nil { @@ -318,46 +324,16 @@ func makeFile(t *testing.T, contents []byte, chunkSize int64) *file { return f } -type chunkSizeInfo int - -func buildStargz(t *testing.T, ents []testutil.TarEntry, opts ...interface{}) (*io.SectionReader, digest.Digest) { - var chunkSize chunkSizeInfo - for _, opt := range opts { - if v, ok := opt.(chunkSizeInfo); ok { - chunkSize = v - } else { - t.Fatalf("unsupported opt") - } - } - - tarBuf := new(bytes.Buffer) - if _, err := io.Copy(tarBuf, testutil.BuildTar(ents)); err != nil { - t.Fatalf("failed to build tar: %v", err) - } - tarData := tarBuf.Bytes() - rc, err := estargz.Build( - io.NewSectionReader(bytes.NewReader(tarData), 0, int64(len(tarData))), - estargz.WithChunkSize(int(chunkSize)), - ) - if err != nil { - t.Fatalf("failed to build verifiable stargz: %v", err) - } - defer rc.Close() - vsb := new(bytes.Buffer) - if _, err := io.Copy(vsb, rc); err != nil { - t.Fatalf("failed to copy built stargz blob: %v", err) - } - vsbb := vsb.Bytes() - - return io.NewSectionReader(bytes.NewReader(vsbb), 0, int64(len(vsbb))), rc.TOCDigest() -} - func newReader(sr *io.SectionReader, cache cache.BlobCache, ev estargz.TOCEntryVerifier) (*reader, *estargz.TOCEntry, error) { var r *reader - vr, root, err := NewReader(sr, cache) + vr, err := NewReader(sr, cache) if vr != nil { r = vr.r r.verifier = ev } + root, ok := r.Lookup("") + if !ok { + return nil, nil, fmt.Errorf("failed to get root") + } return r, root, err } diff --git a/go.mod b/go.mod index 2df3d5d09..d5eb3c530 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/containerd/continuity v0.1.0 github.com/containerd/go-cni v1.0.2 github.com/containerd/stargz-snapshotter/estargz v0.5.0 + github.com/containers/storage v1.24.4 github.com/coreos/go-systemd/v22 v22.1.0 github.com/docker/cli v0.0.0-20191017083524-a8ff7f821017 github.com/docker/docker v17.12.0-ce-rc1.0.20200730172259-9f28837c1d93+incompatible // indirect diff --git a/go.sum b/go.sum index cb507542c..bf994fea1 100644 --- a/go.sum +++ b/go.sum @@ -89,11 +89,13 @@ github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/checkpoint-restore/go-criu/v4 v4.0.2/go.mod h1:xUQBLp4RLc5zJtWY++yjOoMoB5lihDt7fai+75m+rGw= github.com/checkpoint-restore/go-criu/v4 v4.1.0/go.mod h1:xUQBLp4RLc5zJtWY++yjOoMoB5lihDt7fai+75m+rGw= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/cilium/ebpf v0.0.0-20200110133405-4032b1d8aae3/go.mod h1:MA5e5Lr8slmEg9bt0VpxxWqJlO4iwu3FBdHUzV7wQVg= +github.com/cilium/ebpf v0.0.0-20200507155900-a9f01edf17e3/go.mod h1:XT+cAw5wfvsodedcijoh1l9cf7v1x9FlFB/3VmF/O8s= github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775/go.mod h1:7cR51M8ViRLIdUjrmSXlK9pkrsDlLHbO8jiB8X8JnOc= github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs= github.com/cilium/ebpf v0.4.0 h1:QlHdikaxALkqWasW8hAC1mfR0jdmvbfaBdBPFmRSglA= @@ -120,6 +122,7 @@ github.com/containerd/cgroups v1.0.0/go.mod h1:sgGgnAnNasYdJ1ypnikP2SO7SM0Lfgkgw github.com/containerd/console v0.0.0-20180822173158-c12b1e7919c1/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw= github.com/containerd/console v0.0.0-20181022165439-0650fd9eeb50/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw= github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e/go.mod h1:8Pf4gM6VEbTNRIT26AyyU7hxdQU3MvAvxVI0sc00XBE= +github.com/containerd/console v1.0.0/go.mod h1:8Pf4gM6VEbTNRIT26AyyU7hxdQU3MvAvxVI0sc00XBE= github.com/containerd/console v1.0.1/go.mod h1:XUsP6YE/mKtz6bxc+I8UiKKTP04qjQL4qcS3XoQ5xkw= github.com/containerd/console v1.0.2 h1:Pi6D+aZXM+oUw1czuKgH5IJ+y0jhYcwBJfx5/Ghn9dE= github.com/containerd/console v1.0.2/go.mod h1:ytZPjGgY2oeTkAONYafi2kSj0aYggsf8acV1PGKCbzQ= @@ -194,6 +197,8 @@ github.com/containernetworking/plugins v0.9.1/go.mod h1:xP/idU2ldlzN6m4p5LmGiwRD github.com/containers/ocicrypt v1.0.1/go.mod h1:MeJDzk1RJHv89LjsH0Sp5KTY3ZYkjXO/C+bKAeWFIrc= github.com/containers/ocicrypt v1.1.0/go.mod h1:b8AOe0YR67uU8OqfVNcznfFpAzu3rdgUV4GP9qXPfu4= github.com/containers/ocicrypt v1.1.1/go.mod h1:Dm55fwWm1YZAjYRaJ94z2mfZikIyIN4B0oB3dj3jFxY= +github.com/containers/storage v1.24.4 h1:QJn/C/4eNbYNpxYdnIn1u4lElIB7V9IesRraLf68JjY= +github.com/containers/storage v1.24.4/go.mod h1:Y793GKrV3RVM1Jt4QejXtCJHGUPLrDvQ9LAbCyJ9OKs= github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-iptables v0.4.5/go.mod h1:/mVI274lEDI2ns62jHCDnCyBF9Iwsmekav8Dbxlm1MU= @@ -417,9 +422,12 @@ github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQL github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.12.1 h1:/+xsCsk06wE38cyiqOR/o7U2fSftcH72xD+BQXmja/g= github.com/klauspost/compress v1.12.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= +github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= +github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -444,16 +452,19 @@ github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaO github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-shellwords v1.0.3/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= +github.com/mattn/go-shellwords v1.0.10/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI= github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= github.com/miekg/pkcs11 v1.0.3/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs= +github.com/mistifyio/go-zfs v2.1.1+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4= github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/osext v0.0.0-20151018003038-5e2d6d41470f/go.mod h1:OkQIRizQZAeMln+1tSwduZz7+Af5oFlKirV/MSYes2A= github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg= github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc= +github.com/moby/sys/mountinfo v0.1.3/go.mod h1:w2t2Avltqx8vE7gX5l+QiBKxODu2TX0+Syr3h52Tw4o= github.com/moby/sys/mountinfo v0.4.0/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A= github.com/moby/sys/mountinfo v0.4.1 h1:1O+1cHA1aujwEwwVMa2Xm2l+gIpUHyd3+D+d7LZh1kM= github.com/moby/sys/mountinfo v0.4.1/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A= @@ -466,6 +477,7 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/mrunalp/fileutils v0.0.0-20171103030105-7d4729fb3618/go.mod h1:x8F1gnqOkIEiO4rqoeEEEqQbo7HjGMTvyoq3gej4iT0= github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= @@ -505,15 +517,18 @@ github.com/opencontainers/runc v0.0.0-20190115041553-12f6a991201f/go.mod h1:qT5X github.com/opencontainers/runc v0.1.1/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= github.com/opencontainers/runc v1.0.0-rc8.0.20190926000215-3e425f80a8c9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= github.com/opencontainers/runc v1.0.0-rc9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= +github.com/opencontainers/runc v1.0.0-rc91/go.mod h1:3Sm6Dt7OT8z88EbdQqqcRN2oCT54jbi72tT/HqgflT8= github.com/opencontainers/runc v1.0.0-rc93 h1:x2UMpOOVf3kQ8arv/EsDGwim8PTNqzL1/EYDr/+scOM= github.com/opencontainers/runc v1.0.0-rc93/go.mod h1:3NOsor4w32B2tC0Zbl8Knk4Wg84SM2ImC1fxBuqJ/H0= github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.2-0.20190207185410-29686dbc5559/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-spec v1.0.3-0.20200520003142-237cc4f519e2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d h1:pNa8metDkwZjb9g4T8s+krQ+HRgZAkqnXml+wNir/+s= github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-tools v0.0.0-20181011054405-1d69bd0f9c39/go.mod h1:r3f7wjNzSs2extwzU3Y+6pKfobzPh+kKFJ3ofN+3nfs= +github.com/opencontainers/selinux v1.5.1/go.mod h1:yTcKuYAh6R95iDpefGLQaPaRwJFwyzAJufJyiTt7s0g= github.com/opencontainers/selinux v1.6.0/go.mod h1:VVGKuOLlE7v4PJyT6h7mNWvq1rzqiriPsEqVhc+svHE= github.com/opencontainers/selinux v1.8.0 h1:+77ba4ar4jsCbL1GLbFL8fFM57w6suPfSS9PDLDY7KM= github.com/opencontainers/selinux v1.8.0/go.mod h1:RScLhm78qiWa2gbVCcGkC7tCGdgk3ogry1nUQF8Evvo= @@ -529,6 +544,8 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pquerna/cachecontrol v0.0.0-20171018203845-0dec1b30a021/go.mod h1:prYjPmNq4d1NPVmpShWobRqXY3q7Vp+80DqgxxUrUIA= +github.com/pquerna/ffjson v0.0.0-20181028064349-e517b90714f7 h1:gGBSHPOU7g8YjTbhwn+lvFm2VDEhhA+PwDIlstkgSxE= +github.com/pquerna/ffjson v0.0.0-20181028064349-e517b90714f7/go.mod h1:YARuvh7BUWHNhzDq2OM5tzR2RiCcN2D7sapiKyCel/M= github.com/prometheus/client_golang v0.0.0-20180209125602-c332b6f63c06/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= @@ -616,11 +633,13 @@ github.com/syndtr/gocapability v0.0.0-20170704070218-db04d3cc01c8/go.mod h1:hkRG github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/tchap/go-patricia v2.2.6+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I= +github.com/tchap/go-patricia v2.3.0+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= github.com/urfave/cli v1.22.1 h1:+mkCCcOFKPnCmVYVcURKps1Xe+3zP90gSYGNfRkjoIY= github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= +github.com/vbatts/tar-split v0.11.1/go.mod h1:LEuURwDEiWjRjwu46yU3KVGuUdVv/dcnpcEPSzR8z6g= github.com/vishvananda/netlink v0.0.0-20181108222139-023a6dafdcdf/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk= github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= @@ -794,6 +813,7 @@ golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200327173247-9dae0f8f5775/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/script/benchmark/config-containerd/etc/containerd-stargz-grpc/config.toml b/script/benchmark/config-containerd/etc/containerd-stargz-grpc/config.toml new file mode 100644 index 000000000..43243b385 --- /dev/null +++ b/script/benchmark/config-containerd/etc/containerd-stargz-grpc/config.toml @@ -0,0 +1,2 @@ +# the value of noprefetch will be replaced during benchmarking +noprefetch = true diff --git a/script/benchmark/config/config.containerd.toml b/script/benchmark/config-containerd/etc/containerd/config.toml similarity index 100% rename from script/benchmark/config/config.containerd.toml rename to script/benchmark/config-containerd/etc/containerd/config.toml diff --git a/script/benchmark/config-podman/etc/containers/policy.json b/script/benchmark/config-podman/etc/containers/policy.json new file mode 100644 index 000000000..bb26e57ff --- /dev/null +++ b/script/benchmark/config-podman/etc/containers/policy.json @@ -0,0 +1,7 @@ +{ + "default": [ + { + "type": "insecureAcceptAnything" + } + ] +} diff --git a/script/benchmark/config-podman/etc/stargz-store/config.toml b/script/benchmark/config-podman/etc/stargz-store/config.toml new file mode 100644 index 000000000..43243b385 --- /dev/null +++ b/script/benchmark/config-podman/etc/stargz-store/config.toml @@ -0,0 +1,2 @@ +# the value of noprefetch will be replaced during benchmarking +noprefetch = true diff --git a/script/benchmark/config/config.stargz.toml b/script/benchmark/config/config.stargz.toml deleted file mode 100644 index 3defb02db..000000000 --- a/script/benchmark/config/config.stargz.toml +++ /dev/null @@ -1 +0,0 @@ -noprefetch = true diff --git a/script/benchmark/hello-bench/prepare.sh b/script/benchmark/hello-bench/prepare.sh index 3156c5344..2ca16b8b4 100755 --- a/script/benchmark/hello-bench/prepare.sh +++ b/script/benchmark/hello-bench/prepare.sh @@ -20,7 +20,6 @@ CONTEXT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )/" REPO="${CONTEXT}../../../" MEASURING_SCRIPT="${REPO}/script/benchmark/hello-bench/src/hello.py" REBOOT_CONTAINERD_SCRIPT="${REPO}/script/benchmark/hello-bench/reboot_containerd.sh" -NERDCTL_VERSION="0.7.3" if [ $# -lt 1 ] ; then echo "Specify benchmark target." @@ -31,18 +30,5 @@ fi TARGET_REPOSITORY="${1}" TARGET_IMAGES=${@:2} -if ! which ctr-remote ; then - echo "ctr-remote not found, installing..." - mkdir -p /tmp/out - PREFIX=/tmp/out/ make clean && \ - PREFIX=/tmp/out/ make ctr-remote && \ - install /tmp/out/ctr-remote /usr/local/bin -fi - -if ! which nerdctl ; then - wget -O /tmp/nerdctl.tar.gz "https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-${NERDCTL_VERSION}-linux-amd64.tar.gz" - tar zxvf /tmp/nerdctl.tar.gz -C /usr/local/bin/ -fi - -NO_STARGZ_SNAPSHOTTER="true" "${REBOOT_CONTAINERD_SCRIPT}" +DISABLE_ESTARGZ="true" "${REBOOT_CONTAINERD_SCRIPT}" "${MEASURING_SCRIPT}" --repository=${TARGET_REPOSITORY} --op=prepare ${TARGET_IMAGES} diff --git a/script/benchmark/hello-bench/reboot_containerd.sh b/script/benchmark/hello-bench/reboot_containerd.sh index e6ce83910..b980195df 100755 --- a/script/benchmark/hello-bench/reboot_containerd.sh +++ b/script/benchmark/hello-bench/reboot_containerd.sh @@ -45,6 +45,7 @@ function retry { function kill_all { if [ "${1}" != "" ] ; then ps aux | grep "${1}" \ + | grep -v "benchmark" \ | grep -v grep \ | grep -v "hello.py" \ | grep -v $(basename ${0}) \ @@ -68,7 +69,14 @@ echo "cleaning up the environment..." kill_all "containerd" kill_all "containerd-stargz-grpc" cleanup -if [ "${NO_STARGZ_SNAPSHOTTER:-}" == "true" ] ; then + +if [ "${DISABLE_PREFETCH:-}" == "true" ] ; then + sed -i 's/noprefetch = .*/noprefetch = true/g' "${REMOTE_SNAPSHOTTER_CONFIG_DIR}config.toml" +else + sed -i 's/noprefetch = .*/noprefetch = false/g' "${REMOTE_SNAPSHOTTER_CONFIG_DIR}config.toml" +fi + +if [ "${DISABLE_ESTARGZ:-}" == "true" ] ; then echo "DO NOT RUN remote snapshotter" else echo "running remote snaphsotter..." diff --git a/script/benchmark/hello-bench/reboot_store.sh b/script/benchmark/hello-bench/reboot_store.sh new file mode 100755 index 000000000..d3e107407 --- /dev/null +++ b/script/benchmark/hello-bench/reboot_store.sh @@ -0,0 +1,113 @@ +#!/bin/bash + +# Copyright The containerd Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -euo pipefail + +PODMAN_CONFIG_DIR=/etc/containers/ +PODMAN_STORAGE_CONFIG_FILE="${PODMAN_CONFIG_DIR}storage.conf" +REG_STORAGE_CONFIG_FILE="/etc/stargz-store/config.toml" +REG_STORAGE_ROOT=/var/lib/stargz-store/ +REG_STORAGE_DIR="${REG_STORAGE_ROOT}store/" +REG_STORAGE_POOL_LINK="${REG_STORAGE_ROOT}store/pool" +REG_STORAGE_MOUNTPOINT="${REG_STORAGE_DIR}" + +RETRYNUM=30 +RETRYINTERVAL=1 +TIMEOUTSEC=180 +function retry { + local SUCCESS=false + for i in $(seq ${RETRYNUM}) ; do + if eval "timeout ${TIMEOUTSEC} ${@}" ; then + SUCCESS=true + break + fi + echo "Fail(${i}). Retrying..." + sleep ${RETRYINTERVAL} + done + if [ "${SUCCESS}" == "true" ] ; then + return 0 + else + return 1 + fi +} + +function kill_all { + if [ "${1}" != "" ] ; then + ps aux | grep "${1}" \ + | grep -v grep \ + | grep -v "hello.py" \ + | grep -v $(basename ${0}) \ + | sed -E 's/ +/ /g' | cut -f 2 -d ' ' | xargs -I{} kill -9 {} || true + fi +} + +function cleanup { + umount "${REG_STORAGE_MOUNTPOINT}" || true + rm -rf "${REG_STORAGE_DIR}" || true + if [ -d "${REG_STORAGE_ROOT}pool/" ] ; then + for POOL in $(ls "${REG_STORAGE_ROOT}pool/") ; do + umount "${REG_STORAGE_ROOT}pool/${POOL}" || true + for MP in $(ls "${REG_STORAGE_ROOT}pool/${POOL}") ; do + umount "${REG_STORAGE_ROOT}pool/${POOL}/${MP}" || true + done + done + fi + rm -rf "${REG_STORAGE_ROOT}"* + rm "${PODMAN_STORAGE_CONFIG_FILE}" || true + podman system reset -f +} + +echo "cleaning up the environment..." +kill_all "stargz-store" +cleanup + +if [ "${DISABLE_PREFETCH:-}" == "true" ] ; then + sed -i 's/noprefetch = .*/noprefetch = true/g' "${REG_STORAGE_CONFIG_FILE}" +else + sed -i 's/noprefetch = .*/noprefetch = false/g' "${REG_STORAGE_CONFIG_FILE}" +fi + +mkdir -p "${PODMAN_CONFIG_DIR}" + +if [ "${DISABLE_ESTARGZ:-}" == "true" ] ; then + echo "DO NOT RUN additional storage" + cat < "${PODMAN_STORAGE_CONFIG_FILE}" +[storage] +driver = "overlay" +graphroot = "/var/lib/containers/storage" +runroot = "/run/containers/storage" +EOF +else + echo "running remote snaphsotter..." + if [ "${LOG_FILE:-}" == "" ] ; then + LOG_FILE=/dev/null + fi + cat < "${PODMAN_STORAGE_CONFIG_FILE}" +[storage] +driver = "overlay" +graphroot = "/var/lib/containers/storage" +runroot = "/run/containers/storage" + +[storage.options] +additionallayerstores = ["${REG_STORAGE_MOUNTPOINT}:ref"] +EOF + mkdir -p "${REG_STORAGE_MOUNTPOINT}" + stargz-store --log-level=debug \ + --config="${REG_STORAGE_CONFIG_FILE}" \ + "${REG_STORAGE_MOUNTPOINT}" \ + 2>&1 | tee -a "${LOG_FILE}" & # Dump all log + retry ls "${REG_STORAGE_POOL_LINK}" > /dev/null +fi diff --git a/script/benchmark/hello-bench/run.sh b/script/benchmark/hello-bench/run.sh index 989952f57..52776a377 100755 --- a/script/benchmark/hello-bench/run.sh +++ b/script/benchmark/hello-bench/run.sh @@ -30,10 +30,6 @@ REPO="${CONTEXT}../../../" source "${REPO}/script/util/utils.sh" MEASURING_SCRIPT="${REPO}/script/benchmark/hello-bench/src/hello.py" -REBOOT_CONTAINERD_SCRIPT="${REPO}/script/benchmark/hello-bench/reboot_containerd.sh" -REPO_CONFIG_DIR="${REPO}/script/benchmark/hello-bench/config/" -CONTAINERD_CONFIG_DIR=/etc/containerd/ -REMOTE_SNAPSHOTTER_CONFIG_DIR=/etc/containerd-stargz-grpc/ BENCHMARKOUT_MARK_OUTPUT="BENCHMARK_OUTPUT: " if [ $# -lt 1 ] ; then @@ -46,6 +42,16 @@ TARGET_REPOSITORY="${1}" TARGET_IMAGES=${@:2} NUM_OF_SAMPLES="${BENCHMARK_SAMPLES_NUM:-1}" +REBOOT_SCRIPT= +if [ "${BENCHMARK_RUNTIME_MODE}" == "containerd" ] ; then + REBOOT_SCRIPT="${REPO}/script/benchmark/hello-bench/reboot_containerd.sh" +elif [ "${BENCHMARK_RUNTIME_MODE}" == "podman" ] ; then + REBOOT_SCRIPT="${REPO}/script/benchmark/hello-bench/reboot_store.sh" +else + echo "Unknown runtime: ${BENCHMARK_RUNTIME_MODE}" + exit 1 +fi + TMP_LOG_FILE=$(mktemp) WORKLOADS_LIST=$(mktemp) function cleanup { @@ -60,15 +66,10 @@ function output { echo "${BENCHMARKOUT_MARK_OUTPUT}${1}" } -function set_noprefetch { - local NOPREFETCH="${1}" - sed -i 's/noprefetch = .*/noprefetch = '"${NOPREFETCH}"'/g' "${REMOTE_SNAPSHOTTER_CONFIG_DIR}config.toml" -} - function measure { local OPTION="${1}" local REPOSITORY="${2}" - "${MEASURING_SCRIPT}" ${OPTION} --repository=${REPOSITORY} --op=run --experiments=1 ${@:3} + "${MEASURING_SCRIPT}" ${OPTION} --repository=${REPOSITORY} --op=run --experiments=1 --runtime="${BENCHMARK_RUNTIME_MODE}" ${@:3} } echo "=========" @@ -109,22 +110,22 @@ for SAMPLE_NO in $(seq ${NUM_OF_SAMPLES}) ; do echo "===== Measuring [${SAMPLE_NO}] ${IMAGE} (${MODE}) =====" if [ "${MODE}" == "${LEGACY_MODE}" ] ; then - NO_STARGZ_SNAPSHOTTER="true" "${REBOOT_CONTAINERD_SCRIPT}" + # disable lazy pulling + DISABLE_ESTARGZ="true" "${REBOOT_SCRIPT}" measure "--mode=legacy" ${TARGET_REPOSITORY} ${IMAGE} fi if [ "${MODE}" == "${ESTARGZ_NOOPT_MODE}" ] ; then echo -n "" > "${TMP_LOG_FILE}" - set_noprefetch "true" # disable prefetch - LOG_FILE="${TMP_LOG_FILE}" "${REBOOT_CONTAINERD_SCRIPT}" + # disable prefetch + DISABLE_PREFETCH="true" LOG_FILE="${TMP_LOG_FILE}" "${REBOOT_SCRIPT}" measure "--mode=estargz-noopt" ${TARGET_REPOSITORY} ${IMAGE} check_remote_snapshots "${TMP_LOG_FILE}" fi if [ "${MODE}" == "${ESTARGZ_MODE}" ] ; then echo -n "" > "${TMP_LOG_FILE}" - set_noprefetch "false" # enable prefetch - LOG_FILE="${TMP_LOG_FILE}" "${REBOOT_CONTAINERD_SCRIPT}" + LOG_FILE="${TMP_LOG_FILE}" "${REBOOT_SCRIPT}" measure "--mode=estargz" ${TARGET_REPOSITORY} ${IMAGE} check_remote_snapshots "${TMP_LOG_FILE}" fi diff --git a/script/benchmark/hello-bench/src/hello.py b/script/benchmark/hello-bench/src/hello.py index ed7b12aed..97430bf4f 100755 --- a/script/benchmark/hello-bench/src/hello.py +++ b/script/benchmark/hello-bench/src/hello.py @@ -47,6 +47,8 @@ DEFAULT_PUSHER = "nerdctl image push" DEFAULT_TAGGER = "nerdctl image tag" BENCHMARKOUT_MARK = "BENCHMARK_OUTPUT: " +CTR="ctr" +PODMAN="podman" def exit(status): # cleanup @@ -63,12 +65,20 @@ def tmp_copy(src): shutil.copytree(src, dst) return dst -def genargs(arg): +def genargs_for_optimization(arg): if arg == None or arg == "": return "" else: return '-args \'["%s"]\'' % arg.replace('"', '\\\"').replace('\'', '\'"\'"\'') +def format_repo(mode, repository, name): + if mode == ESTARGZ_MODE: + return "%s/%s-esgz" % (repository, name) + elif mode == ESTARGZ_NOOPT_MODE: + return "%s/%s-esgz-noopt" % (repository, name) + else: + return "%s/%s-org" % (repository, name) + class RunArgs: def __init__(self, env={}, arg='', stdin='', stdin_sh='sh', waitline='', mount=[]): self.env = env @@ -81,7 +91,6 @@ def __init__(self, env={}, arg='', stdin='', stdin_sh='sh', waitline='', mount=[ class Bench: def __init__(self, name, category='other'): self.name = name - self.repo = name # TODO: maybe we'll eventually have multiple benches per repo self.category = category def __str__(self): @@ -142,8 +151,14 @@ class BenchRunner: Bench('wordpress:5.7', 'web-server'), ]]) - def __init__(self, repository='docker.io/library', srcrepository='docker.io/library', mode=LEGACY_MODE, optimizer=DEFAULT_OPTIMIZER, puller=DEFAULT_PULLER, pusher=DEFAULT_PUSHER): - self.docker = 'ctr' + def __init__(self, repository='docker.io/library', srcrepository='docker.io/library', mode=LEGACY_MODE, optimizer=DEFAULT_OPTIMIZER, puller=DEFAULT_PULLER, pusher=DEFAULT_PUSHER, runtime="containerd"): + if runtime == "containerd": + self.controller = ContainerdController(mode == ESTARGZ_NOOPT_MODE or mode == ESTARGZ_MODE) + elif runtime == "podman": + self.controller = PodmanController() + else: + print 'Unknown runtime mode: '+runtime + exit(1) self.repository = repository self.srcrepository = srcrepository self.mode = mode @@ -151,98 +166,45 @@ def __init__(self, repository='docker.io/library', srcrepository='docker.io/libr self.puller = puller self.pusher = pusher - def lazypull(self): - if self.mode == ESTARGZ_NOOPT_MODE or self.mode == ESTARGZ_MODE: - return True - else: - return False + def cleanup(self, cid, bench): + self.controller.cleanup(cid, self.fully_qualify(bench.name)) - def cleanup(self, name, image): - print "Cleaning up environment..." - cmd = '%s t kill -s 9 %s' % (self.docker, name) - print cmd - rc = os.system(cmd) # sometimes containers already exit. we ignore the failure. - cmd = '%s c rm %s' % (self.docker, name) - print cmd - rc = os.system(cmd) - assert(rc == 0) - cmd = '%s image rm %s' % (self.docker, image) - print cmd - rc = os.system(cmd) - assert(rc == 0) - cmd = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../reboot_containerd.sh') # clear cache - print cmd - rc = os.system(cmd) - assert(rc == 0) - - def snapshotter_opt(self): - if self.lazypull(): - return "--snapshotter=stargz" - else: - return "" - - def add_suffix(self, repo): - if self.mode == ESTARGZ_MODE: - return "%s-esgz" % repo - elif self.mode == ESTARGZ_NOOPT_MODE: - return "%s-esgz-noopt" % repo - else: - return "%s-org" % repo - - def pull_subcmd(self): - if self.lazypull(): - return "rpull" - else: - return "pull" - - def docker_pullbin(self): - if self.lazypull(): - return "ctr-remote" - else: - return "ctr" + def fully_qualify(self, repo): + return format_repo(self.mode, self.repository, repo) def run_task(self, cid): - cmd = '%s t start %s' % (self.docker, cid) - print cmd + cmd = self.controller.task_start_cmd(cid) startrun = time.time() rc = os.system(cmd) runtime = time.time() - startrun assert(rc == 0) return runtime - def run_echo_hello(self, repo, cid): - cmd = ('%s c create --net-host %s -- %s/%s %s echo hello' % - (self.docker, self.snapshotter_opt(), self.repository, self.add_suffix(repo), cid)) - print cmd + def run_echo_hello(self, image, cid): + cmd = self.controller.create_echo_hello_cmd(image, cid) startcreate = time.time() rc = os.system(cmd) createtime = time.time() - startcreate assert(rc == 0) return createtime, self.run_task(cid) - def run_cmd_arg(self, repo, cid, runargs): + def run_cmd_arg(self, image, cid, runargs): assert(len(runargs.mount) == 0) - cmd = '%s c create --net-host %s ' % (self.docker, self.snapshotter_opt()) - cmd += '-- %s/%s %s ' % (self.repository, self.add_suffix(repo), cid) - cmd += runargs.arg - print cmd + cmd = self.controller.create_cmd_arg_cmd(image, cid, runargs) startcreate = time.time() rc = os.system(cmd) createtime = time.time() - startcreate assert(rc == 0) return createtime, self.run_task(cid) - def run_cmd_arg_wait(self, repo, cid, runargs): - env = ' '.join(['--env %s=%s' % (k,v) for k,v in runargs.env.iteritems()]) - cmd = ('%s c create --net-host %s %s -- %s/%s %s %s' % - (self.docker, self.snapshotter_opt(), env, self.repository, self.add_suffix(repo), cid, runargs.arg)) - print cmd + def run_cmd_arg_wait(self, image, cid, runargs): + cmd = self.controller.create_cmd_arg_wait_cmd(image, cid, runargs) startcreate = time.time() rc = os.system(cmd) createtime = time.time() - startcreate assert(rc == 0) - cmd = '%s t start %s' % (self.docker, cid) - print cmd + + cmd = self.controller.task_start_cmd(cid) runtime = 0 startrun = time.time() @@ -260,32 +222,22 @@ def run_cmd_arg_wait(self, repo, cid, runargs): runtime = time.time() - startrun # cleanup print 'DONE' - cmd = '%s t kill -s 9 %s' % (self.docker, cid) + cmd = self.controller.task_kill_cmd(cid) rc = os.system(cmd) assert(rc == 0) break p.wait() return createtime, runtime - def run_cmd_stdin(self, repo, cid, runargs): - cmd = '%s c create --net-host %s ' % (self.docker, self.snapshotter_opt()) - for a,b in runargs.mount: - a = os.path.join(os.path.dirname(os.path.abspath(__file__)), a) - a = tmp_copy(a) - cmd += '--mount type=bind,src=%s,dst=%s,options=rbind ' % (a,b) - cmd += '-- %s/%s %s ' % (self.repository, self.add_suffix(repo), cid) - if runargs.stdin_sh: - cmd += runargs.stdin_sh # e.g., sh -c - - print cmd + def run_cmd_stdin(self, image, cid, runargs): + cmd = self.controller.create_cmd_stdin_cmd(image, cid, runargs) startcreate = time.time() rc = os.system(cmd) createtime = time.time() - startcreate assert(rc == 0) - cmd = '%s t start %s' % (self.docker, cid) - print cmd - startrun = time.time() + cmd = self.controller.task_start_cmd(cid) + startrun = time.time() p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) print runargs.stdin out, _ = p.communicate(runargs.stdin) @@ -296,73 +248,69 @@ def run_cmd_stdin(self, repo, cid, runargs): def run(self, bench, cid): name = bench.name + image = self.fully_qualify(bench.name) + print "Pulling the image..." + pullcmd = self.controller.pull_cmd(image) startpull = time.time() - cmd = ('%s images %s %s/%s' % - (self.docker_pullbin(), self.pull_subcmd(), self.repository, self.add_suffix(name))) - print cmd - rc = os.system(cmd) + rc = os.system(pullcmd) assert(rc == 0) pulltime = time.time() - startpull runtime = 0 createtime = 0 if name in BenchRunner.ECHO_HELLO: - createtime, runtime = self.run_echo_hello(repo=name, cid=cid) + createtime, runtime = self.run_echo_hello(image=image, cid=cid) elif name in BenchRunner.CMD_ARG: - createtime, runtime = self.run_cmd_arg(repo=name, cid=cid, runargs=BenchRunner.CMD_ARG[name]) + createtime, runtime = self.run_cmd_arg(image=image, cid=cid, runargs=BenchRunner.CMD_ARG[name]) elif name in BenchRunner.CMD_ARG_WAIT: - createtime, runtime = self.run_cmd_arg_wait(repo=name, cid=cid, runargs=BenchRunner.CMD_ARG_WAIT[name]) + createtime, runtime = self.run_cmd_arg_wait(image=image, cid=cid, runargs=BenchRunner.CMD_ARG_WAIT[name]) elif name in BenchRunner.CMD_STDIN: - createtime, runtime = self.run_cmd_stdin(repo=name, cid=cid, runargs=BenchRunner.CMD_STDIN[name]) + createtime, runtime = self.run_cmd_stdin(image=image, cid=cid, runargs=BenchRunner.CMD_STDIN[name]) else: print 'Unknown bench: '+name exit(1) return pulltime, createtime, runtime - def convert_echo_hello(self, repo): - self.mode = ESTARGZ_MODE + def convert_echo_hello(self, src, dest): period=10 - cmd = ('%s -cni -period %s -entrypoint \'["/bin/sh", "-c"]\' -args \'["echo hello"]\' %s/%s %s/%s' % - (self.optimizer, period, self.srcrepository, repo, self.repository, self.add_suffix(repo))) + cmd = ('%s -cni -period %s -entrypoint \'["/bin/sh", "-c"]\' -args \'["echo hello"]\' %s %s' % + (self.optimizer, period, src, dest)) print cmd rc = os.system(cmd) assert(rc == 0) - def convert_cmd_arg(self, repo, runargs): - self.mode = ESTARGZ_MODE + def convert_cmd_arg(self, src, dest, runargs): period = 30 assert(len(runargs.mount) == 0) entry = "" if runargs.arg != "": # FIXME: this is naive... entry = '-entrypoint \'["/bin/sh", "-c"]\'' - cmd = ('%s -cni -period %s %s %s %s/%s %s/%s' % - (self.optimizer, period, entry, genargs(runargs.arg), self.srcrepository, repo, self.repository, self.add_suffix(repo))) + cmd = ('%s -cni -period %s %s %s %s %s' % + (self.optimizer, period, entry, genargs_for_optimization(runargs.arg), src, dest)) print cmd rc = os.system(cmd) assert(rc == 0) - def convert_cmd_arg_wait(self, repo, runargs): - self.mode = ESTARGZ_MODE + def convert_cmd_arg_wait(self, src, dest, runargs): period = 90 env = ' '.join(['-env %s=%s' % (k,v) for k,v in runargs.env.iteritems()]) - cmd = ('%s -cni -period %s %s %s %s/%s %s/%s' % - (self.optimizer, period, env, genargs(runargs.arg), self.srcrepository, repo, self.repository, self.add_suffix(repo))) + cmd = ('%s -cni -period %s %s %s %s %s' % + (self.optimizer, period, env, genargs_for_optimization(runargs.arg), src, dest)) print cmd rc = os.system(cmd) assert(rc == 0) - def convert_cmd_stdin(self, repo, runargs): - self.mode = ESTARGZ_MODE + def convert_cmd_stdin(self, src, dest, runargs): mounts = '' for a,b in runargs.mount: a = os.path.join(os.path.dirname(os.path.abspath(__file__)), a) a = tmp_copy(a) mounts += '--mount type=bind,src=%s,dst=%s,options=rbind ' % (a,b) period = 60 - cmd = ('%s -i -cni -period %s %s -entrypoint \'["/bin/sh", "-c"]\' %s %s/%s %s/%s' % - (self.optimizer, period, mounts, genargs(runargs.stdin_sh), self.srcrepository, repo, self.repository, self.add_suffix(repo))) + cmd = ('%s -i -cni -period %s %s -entrypoint \'["/bin/sh", "-c"]\' %s %s %s' % + (self.optimizer, period, mounts, genargs_for_optimization(runargs.stdin_sh), src, dest)) print cmd p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE) print runargs.stdin @@ -371,55 +319,53 @@ def convert_cmd_stdin(self, repo, runargs): p.wait() assert(p.returncode == 0) - def copy_img(self, repo): - self.mode = LEGACY_MODE - cmd = 'crane copy %s/%s %s/%s' % (self.srcrepository, repo, self.repository, self.add_suffix(repo)) + def push_img(self, dest): + cmd = '%s %s' % (self.pusher, dest) print cmd rc = os.system(cmd) assert(rc == 0) - def convert_and_push_img(self, repo): - self.mode = ESTARGZ_NOOPT_MODE - self.pull_img(repo) - cmd = '%s --no-optimize %s/%s %s/%s' % (self.optimizer, self.srcrepository, repo, self.repository, self.add_suffix(repo)) + def pull_img(self, src): + cmd = '%s %s' % (self.puller, src) print cmd rc = os.system(cmd) assert(rc == 0) - self.push_img(repo) - - def optimize_img(self, name): - self.mode = ESTARGZ_MODE - self.pull_img(name) - if name in BenchRunner.ECHO_HELLO: - self.convert_echo_hello(repo=name) - elif name in BenchRunner.CMD_ARG: - self.convert_cmd_arg(repo=name, runargs=BenchRunner.CMD_ARG[name]) - elif name in BenchRunner.CMD_ARG_WAIT: - self.convert_cmd_arg_wait(repo=name, runargs=BenchRunner.CMD_ARG_WAIT[name]) - elif name in BenchRunner.CMD_STDIN: - self.convert_cmd_stdin(repo=name, runargs=BenchRunner.CMD_STDIN[name]) - else: - print 'Unknown bench: '+name - exit(1) - self.push_img(name) - def push_img(self, repo): - cmd = '%s %s/%s' % (self.pusher, self.repository, self.add_suffix(repo)) + def copy_img(self, src, dest): + cmd = 'crane copy %s %s' % (src, dest) print cmd rc = os.system(cmd) assert(rc == 0) - def pull_img(self, name): - cmd = '%s %s/%s' % (self.puller, self.srcrepository, name) + def convert_and_push_img(self, src, dest): + self.pull_img(src) + cmd = '%s --no-optimize %s %s' % (self.optimizer, src, dest) print cmd rc = os.system(cmd) assert(rc == 0) + self.push_img(dest) + + def optimize_img(self, name, src, dest): + self.pull_img(src) + if name in BenchRunner.ECHO_HELLO: + self.convert_echo_hello(src=src, dest=dest) + elif name in BenchRunner.CMD_ARG: + self.convert_cmd_arg(src=src, dest=dest, runargs=BenchRunner.CMD_ARG[name]) + elif name in BenchRunner.CMD_ARG_WAIT: + self.convert_cmd_arg_wait(src=src, dest=dest, runargs=BenchRunner.CMD_ARG_WAIT[name]) + elif name in BenchRunner.CMD_STDIN: + self.convert_cmd_stdin(src=src, dest=dest, runargs=BenchRunner.CMD_STDIN[name]) + else: + print 'Unknown bench: '+name + exit(1) + self.push_img(dest) def prepare(self, bench): name = bench.name - self.optimize_img(name) - self.copy_img(name) - self.convert_and_push_img(name) + src = '%s/%s' % (self.srcrepository, name) + self.copy_img(src=src, dest=format_repo(LEGACY_MODE, self.repository, name)) + self.convert_and_push_img(src=src, dest=format_repo(ESTARGZ_NOOPT_MODE, self.repository, name)) + self.optimize_img(name=name, src=src, dest=format_repo(ESTARGZ_MODE, self.repository, name)) def operation(self, op, bench, cid): if op == 'run': @@ -431,6 +377,155 @@ def operation(self, op, bench, cid): print 'Unknown operation: '+op exit(1) +class ContainerdController: + def __init__(self, is_lazypull=False): + self.is_lazypull = is_lazypull + + def pull_cmd(self, image): + base_cmd = "%s i pull" % CTR + if self.is_lazypull: + base_cmd = "ctr-remote i rpull" + cmd = '%s %s' % (base_cmd, image) + print cmd + return cmd + + def create_echo_hello_cmd(self, image, cid): + snapshotter_opt = "" + if self.is_lazypull: + snapshotter_opt = "--snapshotter=stargz" + cmd = '%s c create --net-host %s -- %s %s echo hello' % (CTR, snapshotter_opt, image, cid) + print cmd + return cmd + + def create_cmd_arg_cmd(self, image, cid, runargs): + snapshotter_opt = "" + if self.is_lazypull: + snapshotter_opt = "--snapshotter=stargz" + cmd = '%s c create --net-host %s ' % (CTR, snapshotter_opt) + cmd += '-- %s %s ' % (image, cid) + cmd += runargs.arg + print cmd + return cmd + + def create_cmd_arg_wait_cmd(self, image, cid, runargs): + snapshotter_opt = "" + if self.is_lazypull: + snapshotter_opt = "--snapshotter=stargz" + env = ' '.join(['--env %s=%s' % (k,v) for k,v in runargs.env.iteritems()]) + cmd = ('%s c create --net-host %s %s -- %s %s %s' % + (CTR, snapshotter_opt, env, image, cid, runargs.arg)) + print cmd + return cmd + + def create_cmd_stdin_cmd(self, image, cid, runargs): + snapshotter_opt = "" + if self.is_lazypull: + snapshotter_opt = "--snapshotter=stargz" + cmd = '%s c create --net-host %s ' % (CTR, snapshotter_opt) + for a,b in runargs.mount: + a = os.path.join(os.path.dirname(os.path.abspath(__file__)), a) + a = tmp_copy(a) + cmd += '--mount type=bind,src=%s,dst=%s,options=rbind ' % (a,b) + cmd += '-- %s %s ' % (image, cid) + if runargs.stdin_sh: + cmd += runargs.stdin_sh # e.g., sh -c + + print cmd + return cmd + + def task_start_cmd(self, cid): + cmd = '%s t start %s' % (CTR, cid) + print cmd + return cmd + + def task_kill_cmd(self, cid): + cmd = '%s t kill -s 9 %s' % (CTR, cid) + print cmd + return cmd + + def cleanup(self, name, image): + print "Cleaning up environment..." + cmd = '%s t kill -s 9 %s' % (CTR, name) + print cmd + rc = os.system(cmd) # sometimes containers already exit. we ignore the failure. + cmd = '%s c rm %s' % (CTR, name) + print cmd + rc = os.system(cmd) + assert(rc == 0) + cmd = '%s image rm %s' % (CTR, image) + print cmd + rc = os.system(cmd) + assert(rc == 0) + cmd = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../reboot_containerd.sh') + print cmd + rc = os.system(cmd) + assert(rc == 0) + +class PodmanController: + def pull_cmd(self, image): + cmd = '%s pull %s' % (PODMAN, image) + print cmd + return cmd + + def create_echo_hello_cmd(self, image, cid): + cmd = '%s create --name %s %s echo hello' % (PODMAN, cid, image) + print cmd + return cmd + + def create_cmd_arg_cmd(self, image, cid, runargs): + cmd = '%s create --name %s %s ' % (PODMAN, cid, image) + cmd += runargs.arg + print cmd + return cmd + + def create_cmd_arg_wait_cmd(self, image, cid, runargs): + env = ' '.join(['--env %s=%s' % (k,v) for k,v in runargs.env.iteritems()]) + cmd = ('%s create %s --name %s %s %s ' % + (PODMAN, env, cid, image, runargs.arg)) + print cmd + return cmd + + def create_cmd_stdin_cmd(self, image, cid, runargs): + cmd = '%s create -i ' % PODMAN + for a,b in runargs.mount: + a = os.path.join(os.path.dirname(os.path.abspath(__file__)), a) + a = tmp_copy(a) + cmd += '--mount type=bind,src=%s,dst=%s ' % (a,b) + cmd += '--name %s %s ' % (cid, image) + if runargs.stdin_sh: + cmd += runargs.stdin_sh # e.g., sh -c + + print cmd + return cmd + + def task_start_cmd(self, cid): + cmd = '%s start -a %s' % (PODMAN, cid) + print cmd + return cmd + + def task_kill_cmd(self, cid): + cmd = '%s kill -s 9 %s' % (PODMAN, cid) + print cmd + return cmd + + def cleanup(self, name, image): + print "Cleaning up environment..." + cmd = '%s kill -s 9 %s' % (PODMAN, name) + print cmd + rc = os.system(cmd) # sometimes containers already exit. we ignore the failure. + cmd = '%s rm %s' % (PODMAN, name) + print cmd + rc = os.system(cmd) + assert(rc == 0) + cmd = '%s image rm %s' % (PODMAN, image) + print cmd + rc = os.system(cmd) + assert(rc == 0) + cmd = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../reboot_store.sh') + print cmd + rc = os.system(cmd) + assert(rc == 0) + def main(): if len(sys.argv) == 1: print 'Usage: bench.py [OPTIONS] [BENCHMARKS]' @@ -441,6 +536,7 @@ def main(): print '--list' print '--list-json' print '--experiments' + print '--runtime' print '--op=(prepare|run)' print '--mode=(%s|%s|%s)' % (LEGACY_MODE, ESTARGZ_NOOPT_MODE, ESTARGZ_MODE) exit(1) @@ -473,7 +569,7 @@ def main(): # run benchmarks runner = BenchRunner(**kvargs) for bench in benches: - cid = '%s_bench_%d' % (bench.repo.replace(':', '-').replace('/', '-'), random.randint(1,1000000)) + cid = '%s_bench_%d' % (bench.name.replace(':', '-').replace('/', '-'), random.randint(1,1000000)) elapsed_times = [] pull_times = [] @@ -485,7 +581,7 @@ def main(): pulltime, createtime, runtime = runner.operation(op, bench, cid) elapsed = time.time() - start if op == "run": - runner.cleanup(cid, '%s/%s' % (runner.repository, runner.add_suffix(bench.repo))) + runner.cleanup(cid, bench) elapsed_times.append(elapsed) pull_times.append(pulltime) create_times.append(createtime) @@ -496,7 +592,7 @@ def main(): print 'create %s' % createtime print 'run %s' % runtime - row = {'mode':'%s' % runner.mode, 'repo':bench.repo, 'bench':bench.name, 'elapsed':sum(elapsed_times) / len(elapsed_times), 'elapsed_pull':sum(pull_times) / len(pull_times), 'elapsed_create':sum(create_times) / len(create_times), 'elapsed_run':sum(run_times) / len(run_times)} + row = {'mode':'%s' % runner.mode, 'repo':bench.name, 'bench':bench.name, 'elapsed':sum(elapsed_times) / len(elapsed_times), 'elapsed_pull':sum(pull_times) / len(pull_times), 'elapsed_create':sum(create_times) / len(create_times), 'elapsed_run':sum(run_times) / len(run_times)} js = json.dumps(row) print '%s%s,' % (BENCHMARKOUT_MARK, js) sys.stdout.flush() diff --git a/script/benchmark/test.sh b/script/benchmark/test.sh index 6a2c25892..e8c58d65d 100755 --- a/script/benchmark/test.sh +++ b/script/benchmark/test.sh @@ -24,10 +24,23 @@ BENCHMARKING_NODE_IMAGE_NAME="benchmark-image-test" BENCHMARKING_NODE=hello-bench BENCHMARKING_CONTAINER=hello-bench-container +BENCHMARKING_TARGET_BASE_IMAGE= +BENCHMARKING_TARGET_CONFIG_DIR= +if [ "${BENCHMARK_RUNTIME_MODE}" == "containerd" ] ; then + BENCHMARKING_TARGET_BASE_IMAGE=snapshotter-base + BENCHMARKING_TARGET_CONFIG_DIR="${CONTEXT}/config-containerd" +elif [ "${BENCHMARK_RUNTIME_MODE}" == "podman" ] ; then + BENCHMARKING_TARGET_BASE_IMAGE=podman-base + BENCHMARKING_TARGET_CONFIG_DIR="${CONTEXT}/config-podman" +else + echo "Unknown runtime: ${BENCHMARK_RUNTIME_MODE}" + exit 1 +fi + if [ "${BENCHMARKING_NO_RECREATE:-}" != "true" ] ; then echo "Preparing node image..." docker build ${DOCKER_BUILD_ARGS:-} -t "${BENCHMARKING_BASE_IMAGE_NAME}" \ - --target snapshotter-base "${REPO}" + --target "${BENCHMARKING_TARGET_BASE_IMAGE}" "${REPO}" fi DOCKER_COMPOSE_YAML=$(mktemp) @@ -40,7 +53,7 @@ function cleanup { } trap 'cleanup "$?"' EXIT SIGHUP SIGINT SIGQUIT SIGTERM -cp -R "${CONTEXT}/config" "${TMP_CONTEXT}" +cp -R "${BENCHMARKING_TARGET_CONFIG_DIR}" "${TMP_CONTEXT}/config" cat < "${TMP_CONTEXT}/Dockerfile" FROM ${BENCHMARKING_BASE_IMAGE_NAME} @@ -53,8 +66,7 @@ RUN apt-get update -y && \ git checkout 4b1985e5ea2104672636879e1694808f735fd214 && \ GO111MODULE=on go get github.com/google/go-containerregistry/cmd/crane -COPY ./config/config.containerd.toml /etc/containerd/config.toml -COPY ./config/config.stargz.toml /etc/containerd-stargz-grpc/config.toml +COPY ./config / ENV CONTAINERD_SNAPSHOTTER="" @@ -85,9 +97,13 @@ services: - "/dev/fuse:/dev/fuse" - "containerd-data:/var/lib/containerd:delegated" - "containerd-stargz-grpc-data:/var/lib/containerd-stargz-grpc:delegated" + - "containers-data:/var/lib/containers:delegated" + - "additional-store-data:/var/lib/stargz-store:delegated" volumes: containerd-data: containerd-stargz-grpc-data: + containers-data: + additional-store-data: EOF echo "Preparing for benchmark..." @@ -110,7 +126,8 @@ if ! ( cd "${CONTEXT}" && \ docker-compose -f "${DOCKER_COMPOSE_YAML}" build ${DOCKER_BUILD_ARGS:-} \ "${BENCHMARKING_NODE}" && \ docker-compose -f "${DOCKER_COMPOSE_YAML}" up -d --force-recreate && \ - docker exec -e BENCHMARK_SAMPLES_NUM -i "${BENCHMARKING_CONTAINER}" \ + docker exec -e BENCHMARK_RUNTIME_MODE -e BENCHMARK_SAMPLES_NUM \ + -i "${BENCHMARKING_CONTAINER}" \ script/benchmark/hello-bench/run.sh \ "${BENCHMARK_REGISTRY:-docker.io}/${BENCHMARK_USER}" \ ${BENCHMARK_TARGETS} &> "${LOG_FILE}" ) ; then diff --git a/script/cri-containerd/const.sh b/script/cri-containerd/const.sh new file mode 100644 index 000000000..3caf73008 --- /dev/null +++ b/script/cri-containerd/const.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Copyright The containerd Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -euo pipefail + +NODE_BASE_IMAGE_NAME="cri-containerd-image-base" +NODE_TEST_IMAGE_NAME="cri-containerd-image-test" +PREPARE_NODE_IMAGE="cri-containerd-prepare-image" diff --git a/script/cri/mirror.sh b/script/cri-containerd/mirror.sh similarity index 100% rename from script/cri/mirror.sh rename to script/cri-containerd/mirror.sh diff --git a/script/cri/test-legacy.sh b/script/cri-containerd/test-legacy.sh similarity index 100% rename from script/cri/test-legacy.sh rename to script/cri-containerd/test-legacy.sh diff --git a/script/cri/test-stargz.sh b/script/cri-containerd/test-stargz.sh similarity index 99% rename from script/cri/test-stargz.sh rename to script/cri-containerd/test-stargz.sh index 7f17ec64c..1e9a4c608 100755 --- a/script/cri/test-stargz.sh +++ b/script/cri-containerd/test-stargz.sh @@ -108,7 +108,7 @@ fi # Mirror and optimize all images used in tests echo "${REGISTRY_HOST}:5000" > "${MIRROR_TMP}/host" cp "${IMAGE_LIST}" "${MIRROR_TMP}/list" -cp "${REPO}/script/cri/mirror.sh" "${MIRROR_TMP}/mirror.sh" +cp "${REPO}/script/cri-containerd/mirror.sh" "${MIRROR_TMP}/mirror.sh" docker exec "${PREPARE_NODE_NAME}" /bin/bash /tools/mirror.sh # Configure mirror registries for containerd and snapshotter diff --git a/script/cri/test.sh b/script/cri-containerd/test.sh similarity index 100% rename from script/cri/test.sh rename to script/cri-containerd/test.sh diff --git a/script/cri-o/config/etc/cni/net.d/10-crio-bridge.conflist b/script/cri-o/config/etc/cni/net.d/10-crio-bridge.conflist new file mode 100644 index 000000000..d55834f40 --- /dev/null +++ b/script/cri-o/config/etc/cni/net.d/10-crio-bridge.conflist @@ -0,0 +1,25 @@ +{ + "cniVersion": "0.4.0", + "name": "crio", + "plugins": [ + { + "type": "bridge", + "bridge": "cni0", + "isGateway": true, + "ipMasq": true, + "promiscMode": true, + "ipam": { + "type": "host-local", + "routes": [{ "dst": "0.0.0.0/0" }], + "ranges": [ + [{ "subnet": "10.85.0.0/16" }] + ] + } + }, + { + "type": "portmap", + "capabilities": { + "portMappings": true + } + }] +} \ No newline at end of file diff --git a/script/cri-o/config/etc/containers/policy.json b/script/cri-o/config/etc/containers/policy.json new file mode 100644 index 000000000..bb26e57ff --- /dev/null +++ b/script/cri-o/config/etc/containers/policy.json @@ -0,0 +1,7 @@ +{ + "default": [ + { + "type": "insecureAcceptAnything" + } + ] +} diff --git a/script/cri-o/config/etc/containers/registries.conf b/script/cri-o/config/etc/containers/registries.conf new file mode 100644 index 000000000..e72b9471c --- /dev/null +++ b/script/cri-o/config/etc/containers/registries.conf @@ -0,0 +1 @@ +unqualified-search-registries = ['docker.io'] diff --git a/script/cri-o/config/etc/containers/storage.conf b/script/cri-o/config/etc/containers/storage.conf new file mode 100644 index 000000000..6f0c342b8 --- /dev/null +++ b/script/cri-o/config/etc/containers/storage.conf @@ -0,0 +1,7 @@ +[storage] +driver = "overlay" +graphroot = "/var/lib/containers/storage" +runroot = "/run/containers/storage" + +[storage.options] +additionallayerstores = ["/var/lib/stargz-store/store:ref"] diff --git a/script/cri-o/config/etc/crio/crio.conf b/script/cri-o/config/etc/crio/crio.conf new file mode 100644 index 000000000..c6848aaa9 --- /dev/null +++ b/script/cri-o/config/etc/crio/crio.conf @@ -0,0 +1,3 @@ +[crio.runtime] +cgroup_manager = "cgroupfs" +conmon_cgroup = "pod" diff --git a/script/cri-o/config/etc/stargz-store/config.toml b/script/cri-o/config/etc/stargz-store/config.toml new file mode 100644 index 000000000..81dbbc70f --- /dev/null +++ b/script/cri-o/config/etc/stargz-store/config.toml @@ -0,0 +1 @@ +# Add config of stargz store here. diff --git a/script/cri-o/config/usr/local/bin/entrypoint b/script/cri-o/config/usr/local/bin/entrypoint new file mode 100755 index 000000000..72fccd53b --- /dev/null +++ b/script/cri-o/config/usr/local/bin/entrypoint @@ -0,0 +1,54 @@ +#!/bin/bash + +# Copyright The containerd Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -euo pipefail + +RETRYNUM=100 +RETRYINTERVAL=1 +TIMEOUTSEC=180 +function retry { + local SUCCESS=false + for i in $(seq ${RETRYNUM}) ; do + if eval "timeout ${TIMEOUTSEC} ${@}" ; then + SUCCESS=true + break + fi + echo "Fail(${i}). Retrying..." + sleep ${RETRYINTERVAL} + done + if [ "${SUCCESS}" == "true" ] ; then + return 0 + else + return 1 + fi +} + +# necessary host configuration for CRI-O +# https://github.com/kubernetes-sigs/cri-tools/blob/ec9e336fd8c21c4bab89a6aed2c4a138c8cfae75/.github/workflows/crio.yml#L36-L45 +sysctl -w net.ipv4.conf.all.route_localnet=1 +iptables -t nat -I POSTROUTING -s 127.0.0.0/8 ! -d 127.0.0.0/8 -j MASQUERADE + +PAUSE_IMAGE="$(cat /pause_name)" + +stargz-store --log-level=debug --config=/etc/stargz-store/config.toml /var/lib/stargz-store/store 2>&1 \ + | tee -a /var/log/stargz-store.log & +retry ls -al /var/lib/stargz-store/store/pool/ + +crio --pause-image="${PAUSE_IMAGE}" 2>&1 | tee -a /var/log/crio.log & + +sleep infinity + +# $@ diff --git a/script/cri/const.sh b/script/cri-o/const.sh similarity index 83% rename from script/cri/const.sh rename to script/cri-o/const.sh index 46ce3f89e..7bc0197dd 100644 --- a/script/cri/const.sh +++ b/script/cri-o/const.sh @@ -16,6 +16,6 @@ set -euo pipefail -NODE_BASE_IMAGE_NAME="cri-image-base" -NODE_TEST_IMAGE_NAME="cri-image-test" -PREPARE_NODE_IMAGE="cri-prepare-image" +NODE_BASE_IMAGE_NAME="cri-o-image-base" +NODE_TEST_IMAGE_NAME="cri-o-image-test" +PREPARE_NODE_IMAGE="cri-o-prepare-image" diff --git a/script/cri-o/mirror.sh b/script/cri-o/mirror.sh new file mode 100644 index 000000000..cc602daf8 --- /dev/null +++ b/script/cri-o/mirror.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +# Copyright The containerd Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -euo pipefail + +if [ "${TOOLS_DIR}" == "" ] ; then + echo "tools dir must be provided" + exit 1 +fi +LIST_FILE="${TOOLS_DIR}/list" +HOST_FILE="${TOOLS_DIR}/host" +SS_REPO="/go/src/github.com/containerd/stargz-snapshotter" + +RETRYNUM=30 +RETRYINTERVAL=1 +TIMEOUTSEC=180 +function retry { + local SUCCESS=false + for i in $(seq ${RETRYNUM}) ; do + if eval "timeout ${TIMEOUTSEC} ${@}" ; then + SUCCESS=true + break + fi + echo "Fail(${i}). Retrying..." + sleep ${RETRYINTERVAL} + done + if [ "${SUCCESS}" == "true" ] ; then + return 0 + else + return 1 + fi +} + +cd "${SS_REPO}" +PREFIX=/out/ make ctr-remote +mv /out/ctr-remote /bin/ctr-remote + +containerd & +retry ctr-remote version + +HOST=$(cat "${HOST_FILE}") +cat "${LIST_FILE}" | sort | uniq | while read IMAGE ; do + MIRROR_URL="${HOST}"$(echo "${IMAGE}" | sed -E 's/^[^/]*//g' | sed -E 's/@.*//g') + echo "Mirroring: ${IMAGE} to ${MIRROR_URL}" + ctr-remote images pull "${IMAGE}" + ctr-remote images optimize --oci --period=1 "${IMAGE}" "${MIRROR_URL}" + ctr-remote images push --plain-http "${MIRROR_URL}" +done diff --git a/script/cri-o/test-legacy.sh b/script/cri-o/test-legacy.sh new file mode 100755 index 000000000..0380884b1 --- /dev/null +++ b/script/cri-o/test-legacy.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +# Copyright The containerd Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -euo pipefail + +CONTEXT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )/" +CRIO_SOCK=unix:///run/crio/crio.sock +PAUSE_IMAGE_NAME_PATH=/pause_name + +source "${CONTEXT}/const.sh" + +IMAGE_LIST="${1}" + +LOG_TMP=$(mktemp) +LIST_TMP=$(mktemp) +function cleanup { + ORG_EXIT_CODE="${1}" + rm "${LOG_TMP}" || true + rm "${LIST_TMP}" || true + exit "${ORG_EXIT_CODE}" +} + +RETRYNUM=100 +RETRYINTERVAL=1 +TIMEOUTSEC=180 +function retry { + local SUCCESS=false + for i in $(seq ${RETRYNUM}) ; do + if eval "timeout ${TIMEOUTSEC} ${@}" ; then + SUCCESS=true + break + fi + echo "Fail(${i}). Retrying..." + sleep ${RETRYINTERVAL} + done + if [ "${SUCCESS}" == "true" ] ; then + return 0 + else + return 1 + fi +} + +TEST_NODE_ID=$(docker run --rm -d --privileged \ + -v /dev/fuse:/dev/fuse \ + --tmpfs=/var/lib/containers:suid \ + --tmpfs=/var/lib/stargz-store:suid \ + "${NODE_TEST_IMAGE_NAME}") +echo "Running node on: ${TEST_NODE_ID}" +retry docker exec "${TEST_NODE_ID}" /go/bin/crictl stats + +# If container started successfully, varidate the runtime through CRI +FAIL= +if ! ( + echo "===== VERSION INFORMATION =====" && \ + docker exec "${TEST_NODE_ID}" runc --version && \ + docker exec "${TEST_NODE_ID}" crio --version && \ + echo "===============================" && \ + docker exec -i "${TEST_NODE_ID}" /go/bin/critest --runtime-endpoint=${CRIO_SOCK} + ) ; then + FAIL=true +fi + +echo "Dump all names of images used in the test: ${IMAGE_LIST}" +docker exec -i "${TEST_NODE_ID}" cat /var/log/crio.log > "${LOG_TMP}" +cat "${LOG_TMP}" | grep "Pulling image: " | sed -E 's/.*Pulling image: ([^"]*)".*/\1/g' > "${LIST_TMP}" +docker exec -i "${TEST_NODE_ID}" cat "${PAUSE_IMAGE_NAME_PATH}" >> "${LIST_TMP}" +cat "${LIST_TMP}" | sort | uniq > "${IMAGE_LIST}" + +echo "Cleaning up..." +docker kill "${TEST_NODE_ID}" + +if [ "${FAIL}" != "" ] ; then + exit 1 +fi + +exit 0 diff --git a/script/cri-o/test-stargz.sh b/script/cri-o/test-stargz.sh new file mode 100755 index 000000000..b029aa211 --- /dev/null +++ b/script/cri-o/test-stargz.sh @@ -0,0 +1,182 @@ +#!/bin/bash + +# Copyright The containerd Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -euo pipefail + +CONTEXT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )/" +REPO="${CONTEXT}../../" + +REGISTRY_HOST="cri-registry" +TEST_NODE_NAME="cri-testenv-container" +CRIO_SOCK=unix:///run/crio/crio.sock +PREPARE_NODE_NAME="cri-prepare-node" + +source "${CONTEXT}/const.sh" +source "${REPO}/script/util/utils.sh" + +IMAGE_LIST="${1}" + +TMP_CONTEXT=$(mktemp -d) +DOCKER_COMPOSE_YAML=$(mktemp) +CRIO_CONFIG=$(mktemp) +STORE_CONFIG=$(mktemp) +TMPFILE=$(mktemp) +LOG_FILE=$(mktemp) +MIRROR_TMP=$(mktemp -d) +function cleanup { + ORG_EXIT_CODE="${1}" + docker-compose -f "${DOCKER_COMPOSE_YAML}" down -v || true + rm -rf "${TMP_CONTEXT}" || true + rm "${DOCKER_COMPOSE_YAML}" || true + rm "${CRIO_CONFIG}" || true + rm "${STORE_CONFIG}" || true + rm "${TMPFILE}" || true + rm "${LOG_FILE}" || true + rm -rf "${MIRROR_TMP}" || true + exit "${ORG_EXIT_CODE}" +} +trap 'cleanup "$?"' EXIT SIGHUP SIGINT SIGQUIT SIGTERM + +RETRYNUM=100 +RETRYINTERVAL=1 +TIMEOUTSEC=180 +function retry { + local SUCCESS=false + for i in $(seq ${RETRYNUM}) ; do + if eval "timeout ${TIMEOUTSEC} ${@}" ; then + SUCCESS=true + break + fi + echo "Fail(${i}). Retrying..." + sleep ${RETRYINTERVAL} + done + if [ "${SUCCESS}" == "true" ] ; then + return 0 + else + return 1 + fi +} + +# Prepare the testing node and registry +cat < "${DOCKER_COMPOSE_YAML}" +version: "3.3" +services: + cri-testenv-service: + image: ${NODE_TEST_IMAGE_NAME} + container_name: ${TEST_NODE_NAME} + privileged: true + tmpfs: + - /tmp:exec,mode=777 + volumes: + - /dev/fuse:/dev/fuse + - "${CRIO_CONFIG}:/etc/containers/registries.conf" + - "${STORE_CONFIG}:/etc/stargz-store/config.toml" + - "critest-crio-data:/var/lib/containers" + - "critest-crio-stargz-store-data:/var/lib/stargz-store" + image-prepare: + image: "${PREPARE_NODE_IMAGE}" + container_name: "${PREPARE_NODE_NAME}" + privileged: true + entrypoint: + - sleep + - infinity + tmpfs: + - /tmp:exec,mode=777 + environment: + - TOOLS_DIR=/tools/ + volumes: + - "critest-prepare-containerd-data:/var/lib/containerd" + - "critest-prepare-containerd-stargz-grpc-data:/var/lib/containerd-stargz-grpc" + - "${REPO}:/go/src/github.com/containerd/stargz-snapshotter:ro" + - "${MIRROR_TMP}:/tools/" + registry: + image: registry:2 + container_name: ${REGISTRY_HOST} +volumes: + critest-crio-data: + critest-crio-stargz-store-data: + critest-prepare-containerd-data: + critest-prepare-containerd-stargz-grpc-data: +EOF +docker-compose -f "${DOCKER_COMPOSE_YAML}" up -d --force-recreate + +retry docker exec "${PREPARE_NODE_NAME}" curl -k --head "http://${REGISTRY_HOST}:5000/v2/" + +# Mirror and optimize all images used in tests +echo "${REGISTRY_HOST}:5000" > "${MIRROR_TMP}/host" +cp "${IMAGE_LIST}" "${MIRROR_TMP}/list" +cp "${REPO}/script/cri-o/mirror.sh" "${MIRROR_TMP}/mirror.sh" +docker exec "${PREPARE_NODE_NAME}" /bin/bash /tools/mirror.sh + +# Configure mirror registries for CRI-O and stargz store +cat "${CONTEXT}config/etc/containers/registries.conf" > "${CRIO_CONFIG}" +cat "${CONTEXT}config/etc/stargz-store/config.toml" > "${STORE_CONFIG}" +cat "${IMAGE_LIST}" | sed -E 's/^([^/]*).*/\1/g' | sort | uniq | while read DOMAIN ; do + echo "Adding mirror config: ${DOMAIN}" + cat <> "${CRIO_CONFIG}" +[[registry]] +prefix = "${DOMAIN}" +insecure = true +blocked = false +location = "${REGISTRY_HOST}:5000" +EOF + cat <> "${STORE_CONFIG}" +[[resolver.host."${DOMAIN}".mirrors]] +host = "${REGISTRY_HOST}:5000" +insecure = true +EOF +done +echo "==== CRI-O (containers/image) config ====" +cat "${CRIO_CONFIG}" +echo "==== Snapshotter config ====" +cat "${STORE_CONFIG}" + +# Restart crio with the above config. +# Config files are bind mounted to the right path in the container +docker-compose -f "${DOCKER_COMPOSE_YAML}" restart cri-testenv-service +retry docker exec "${TEST_NODE_NAME}" /go/bin/crictl stats + +# Replace digests specified in testing tool to stargz-formatted one +docker exec "${PREPARE_NODE_NAME}" ctr-remote i ls +cat "${IMAGE_LIST}" | grep "@sha256:" | while read IMAGE ; do + URL_PATH=$(echo "${IMAGE}" | sed -E 's/^[^/]*//g' | sed -E 's/@.*//g') + MIRROR_TAG="${REGISTRY_HOST}:5000${URL_PATH}" + OLD_DIGEST=$(echo "${IMAGE}" | sed -E 's/.*(sha256:[a-z0-9]*).*/\1/g') + echo "Getting the digest of : ${MIRROR_TAG}" + NEW_DIGEST=$(docker exec "${PREPARE_NODE_NAME}" ctr-remote i ls name=="${MIRROR_TAG}" \ + | grep "sha256" | sed -E 's/.*(sha256:[a-z0-9]*).*/\1/g') + echo "Converting: ${OLD_DIGEST} => ${NEW_DIGEST}" + docker exec "${TEST_NODE_NAME}" \ + find /go/src/github.com/kubernetes-sigs/cri-tools/pkg -type f -exec \ + sed -i -e "s|${OLD_DIGEST}|${NEW_DIGEST}|g" {} \; +done + +# Rebuild cri testing tool +docker exec "${TEST_NODE_NAME}" /bin/bash -c \ + "cd /go/src/github.com/kubernetes-sigs/cri-tools && make && make install -e BINDIR=/go/bin" + +# Varidate the runtime through CRI +echo "===== VERSION INFORMATION =====" +docker exec "${TEST_NODE_NAME}" runc --version +docker exec "${TEST_NODE_NAME}" crio --version +echo "===============================" +docker exec "${TEST_NODE_NAME}" /go/bin/critest --runtime-endpoint=${CRIO_SOCK} + +echo "Check all remote snapshots are created successfully" +docker exec "${TEST_NODE_NAME}" cat /var/log/stargz-store.log > "${LOG_FILE}" +check_remote_snapshots "${LOG_FILE}" + +exit 0 diff --git a/script/cri-o/test.sh b/script/cri-o/test.sh new file mode 100755 index 000000000..9d118c7fb --- /dev/null +++ b/script/cri-o/test.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# Copyright The containerd Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -euo pipefail + +CONTEXT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )/" +REPO="${CONTEXT}../../" +CRI_TOOLS_VERSION=53ad8bb7f97e1b1d1c0c0634e43a3c2b8b07b718 + +source "${CONTEXT}/const.sh" + +if [ "${CRI_NO_RECREATE:-}" != "true" ] ; then + echo "Preparing node image..." + + docker build ${DOCKER_BUILD_ARGS:-} -t "${NODE_BASE_IMAGE_NAME}" --target crio-stargz-store "${REPO}" + docker build ${DOCKER_BUILD_ARGS:-} -t "${PREPARE_NODE_IMAGE}" --target containerd-base "${REPO}" +fi + +TMP_CONTEXT=$(mktemp -d) +IMAGE_LIST=$(mktemp) +function cleanup { + local ORG_EXIT_CODE="${1}" + rm -rf "${TMP_CONTEXT}" || true + rm "${IMAGE_LIST}" || true + exit "${ORG_EXIT_CODE}" +} +trap 'cleanup "$?"' EXIT SIGHUP SIGINT SIGQUIT SIGTERM + +# Prepare the testing node +cat < "${TMP_CONTEXT}/Dockerfile" +# Legacy builder that doesn't support TARGETARCH should set this explicitly using --build-arg. +# If TARGETARCH isn't supported by the builder, the default value is "amd64". + +FROM ${NODE_BASE_IMAGE_NAME} +ARG TARGETARCH + +ENV PATH=$PATH:/usr/local/go/bin +ENV GOPATH=/go +RUN apt install -y --no-install-recommends git make gcc build-essential jq && \ + curl https://dl.google.com/go/go1.15.6.linux-\${TARGETARCH:-amd64}.tar.gz \ + | tar -C /usr/local -xz && \ + go get -u github.com/onsi/ginkgo/ginkgo && \ + git clone https://github.com/kubernetes-sigs/cri-tools \ + \${GOPATH}/src/github.com/kubernetes-sigs/cri-tools && \ + cd \${GOPATH}/src/github.com/kubernetes-sigs/cri-tools && \ + git checkout ${CRI_TOOLS_VERSION} && \ + make && make install -e BINDIR=\${GOPATH}/bin + +ENTRYPOINT [ "/usr/local/bin/entrypoint" ] +EOF +docker build -t "${NODE_TEST_IMAGE_NAME}" ${DOCKER_BUILD_ARGS:-} "${TMP_CONTEXT}" + +echo "Testing..." +"${CONTEXT}/test-legacy.sh" "${IMAGE_LIST}" +"${CONTEXT}/test-stargz.sh" "${IMAGE_LIST}" diff --git a/util/testutil/estargz.go b/util/testutil/estargz.go new file mode 100644 index 000000000..c59069761 --- /dev/null +++ b/util/testutil/estargz.go @@ -0,0 +1,65 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package testutil + +import ( + "bytes" + "io" + + "github.com/containerd/stargz-snapshotter/estargz" + digest "github.com/opencontainers/go-digest" +) + +type buildEStargzOptions struct { + estargzOptions []estargz.Option +} + +type BuildEStargzOption func(o *buildEStargzOptions) error + +// WithChunkSize option specifies the chunk size of eStargz blob to build. +func WithEStargzOptions(eo ...estargz.Option) BuildEStargzOption { + return func(o *buildEStargzOptions) error { + o.estargzOptions = eo + return nil + } +} + +func BuildEStargz(ents []TarEntry, opts ...BuildEStargzOption) (*io.SectionReader, digest.Digest, error) { + var beOpts buildEStargzOptions + for _, o := range opts { + o(&beOpts) + } + tarBuf := new(bytes.Buffer) + if _, err := io.Copy(tarBuf, BuildTar(ents)); err != nil { + return nil, "", err + } + tarData := tarBuf.Bytes() + rc, err := estargz.Build( + io.NewSectionReader(bytes.NewReader(tarData), 0, int64(len(tarData))), + beOpts.estargzOptions...) + if err != nil { + return nil, "", err + } + defer rc.Close() + vsb := new(bytes.Buffer) + if _, err := io.Copy(vsb, rc); err != nil { + return nil, "", err + } + vsbb := vsb.Bytes() + + return io.NewSectionReader(bytes.NewReader(vsbb), 0, int64(len(vsbb))), rc.TOCDigest(), nil +} diff --git a/util/testutil/tar.go b/util/testutil/tar.go index 26d7db35c..0d6946938 100644 --- a/util/testutil/tar.go +++ b/util/testutil/tar.go @@ -29,29 +29,29 @@ import ( // TarEntry is an entry of tar. type TarEntry interface { - AppendTar(tw *tar.Writer, opts Options) error + AppendTar(tw *tar.Writer, opts BuildTarOptions) error } -// Option is a set of options used during building blob. -type Options struct { +// BuildTarOption is a set of options used during building blob. +type BuildTarOptions struct { // Prefix is the prefix string need to be added to each file name (e.g. "./", "/", etc.) Prefix string } -// Options is an option used during building blob. -type Option func(o *Options) +// BuildTarOptions is an option used during building blob. +type BuildTarOption func(o *BuildTarOptions) // WithPrefix is an option to add a prefix string to each file name (e.g. "./", "/", etc.) -func WithPrefix(prefix string) Option { - return func(o *Options) { +func WithPrefix(prefix string) BuildTarOption { + return func(o *BuildTarOptions) { o.Prefix = prefix } } // BuildTar builds a tar blob -func BuildTar(ents []TarEntry, opts ...Option) io.Reader { - var bo Options +func BuildTar(ents []TarEntry, opts ...BuildTarOption) io.Reader { + var bo BuildTarOptions for _, o := range opts { o(&bo) } @@ -73,12 +73,12 @@ func BuildTar(ents []TarEntry, opts ...Option) io.Reader { return pr } -type tarEntryFunc func(*tar.Writer, Options) error +type tarEntryFunc func(*tar.Writer, BuildTarOptions) error -func (f tarEntryFunc) AppendTar(tw *tar.Writer, opts Options) error { return f(tw, opts) } +func (f tarEntryFunc) AppendTar(tw *tar.Writer, opts BuildTarOptions) error { return f(tw, opts) } -// DirecoryOption is an option for a directory entry. -type DirectoryOption func(o *dirOpts) +// DirecoryBuildTarOption is an option for a directory entry. +type DirectoryBuildTarOption func(o *dirOpts) type dirOpts struct { uid int @@ -88,7 +88,7 @@ type dirOpts struct { } // WithDirOwner specifies the owner of the directory. -func WithDirOwner(uid, gid int) DirectoryOption { +func WithDirOwner(uid, gid int) DirectoryBuildTarOption { return func(o *dirOpts) { o.uid = uid o.gid = gid @@ -96,22 +96,22 @@ func WithDirOwner(uid, gid int) DirectoryOption { } // WithDirXattrs specifies the extended attributes of the directory. -func WithDirXattrs(xattrs map[string]string) DirectoryOption { +func WithDirXattrs(xattrs map[string]string) DirectoryBuildTarOption { return func(o *dirOpts) { o.xattrs = xattrs } } // WithFileMode specifies the mode of the directory. -func WithDirMode(mode os.FileMode) DirectoryOption { +func WithDirMode(mode os.FileMode) DirectoryBuildTarOption { return func(o *dirOpts) { o.mode = &mode } } // Dir is a directory entry -func Dir(name string, opts ...DirectoryOption) TarEntry { - return tarEntryFunc(func(tw *tar.Writer, buildOpts Options) error { +func Dir(name string, opts ...DirectoryBuildTarOption) TarEntry { + return tarEntryFunc(func(tw *tar.Writer, buildOpts BuildTarOptions) error { var dOpts dirOpts for _, o := range opts { o(&dOpts) @@ -134,8 +134,8 @@ func Dir(name string, opts ...DirectoryOption) TarEntry { }) } -// FileOption is an option for a file entry. -type FileOption func(o *fileOpts) +// FileBuildTarOption is an option for a file entry. +type FileBuildTarOption func(o *fileOpts) type fileOpts struct { uid int @@ -145,7 +145,7 @@ type fileOpts struct { } // WithFileOwner specifies the owner of the file. -func WithFileOwner(uid, gid int) FileOption { +func WithFileOwner(uid, gid int) FileBuildTarOption { return func(o *fileOpts) { o.uid = uid o.gid = gid @@ -153,22 +153,22 @@ func WithFileOwner(uid, gid int) FileOption { } // WithFileXattrs specifies the extended attributes of the file. -func WithFileXattrs(xattrs map[string]string) FileOption { +func WithFileXattrs(xattrs map[string]string) FileBuildTarOption { return func(o *fileOpts) { o.xattrs = xattrs } } // WithFileMode specifies the mode of the file. -func WithFileMode(mode os.FileMode) FileOption { +func WithFileMode(mode os.FileMode) FileBuildTarOption { return func(o *fileOpts) { o.mode = &mode } } // File is a regilar file entry -func File(name, contents string, opts ...FileOption) TarEntry { - return tarEntryFunc(func(tw *tar.Writer, buildOpts Options) error { +func File(name, contents string, opts ...FileBuildTarOption) TarEntry { + return tarEntryFunc(func(tw *tar.Writer, buildOpts BuildTarOptions) error { var fOpts fileOpts for _, o := range opts { o(&fOpts) @@ -198,7 +198,7 @@ func File(name, contents string, opts ...FileOption) TarEntry { // Symlink is a symlink entry func Symlink(name, target string) TarEntry { - return tarEntryFunc(func(tw *tar.Writer, buildOpts Options) error { + return tarEntryFunc(func(tw *tar.Writer, buildOpts BuildTarOptions) error { return tw.WriteHeader(&tar.Header{ Typeflag: tar.TypeSymlink, Name: buildOpts.Prefix + name, @@ -211,7 +211,7 @@ func Symlink(name, target string) TarEntry { // Link is a hard-link entry func Link(name, linkname string) TarEntry { now := time.Now() - return tarEntryFunc(func(w *tar.Writer, buildOpts Options) error { + return tarEntryFunc(func(w *tar.Writer, buildOpts BuildTarOptions) error { return w.WriteHeader(&tar.Header{ Typeflag: tar.TypeLink, Name: buildOpts.Prefix + name, @@ -226,7 +226,7 @@ func Link(name, linkname string) TarEntry { // Chardev is a character device entry func Chardev(name string, major, minor int64) TarEntry { now := time.Now() - return tarEntryFunc(func(w *tar.Writer, buildOpts Options) error { + return tarEntryFunc(func(w *tar.Writer, buildOpts BuildTarOptions) error { return w.WriteHeader(&tar.Header{ Typeflag: tar.TypeChar, Name: buildOpts.Prefix + name, @@ -242,7 +242,7 @@ func Chardev(name string, major, minor int64) TarEntry { // Blockdev is a block device entry func Blockdev(name string, major, minor int64) TarEntry { now := time.Now() - return tarEntryFunc(func(w *tar.Writer, buildOpts Options) error { + return tarEntryFunc(func(w *tar.Writer, buildOpts BuildTarOptions) error { return w.WriteHeader(&tar.Header{ Typeflag: tar.TypeBlock, Name: buildOpts.Prefix + name, @@ -258,7 +258,7 @@ func Blockdev(name string, major, minor int64) TarEntry { // Fifo is a fifo entry func Fifo(name string) TarEntry { now := time.Now() - return tarEntryFunc(func(w *tar.Writer, buildOpts Options) error { + return tarEntryFunc(func(w *tar.Writer, buildOpts BuildTarOptions) error { return w.WriteHeader(&tar.Header{ Typeflag: tar.TypeFifo, Name: buildOpts.Prefix + name,