Skip to content

Commit

Permalink
[wip] implement persistent storage for services
Browse files Browse the repository at this point in the history
Signed-off-by: Serge Hallyn <[email protected]>
  • Loading branch information
hallyn committed Dec 23, 2023
1 parent 340af0a commit 0e2c5b7
Show file tree
Hide file tree
Showing 12 changed files with 372 additions and 45 deletions.
8 changes: 6 additions & 2 deletions cmd/trust/launch.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ var launchCmd = cli.Command{
Usage: "Serial number UUID to assign to the machine, empty to use a random UUID",
Value: "",
},
cli.BoolFlag{
Name: "debug",
Usage: "show console during provision and install",
},
cli.BoolFlag{
Name: "skip-provisioning",
Usage: "Skip provisioning the machine",
Expand Down Expand Up @@ -142,7 +146,7 @@ func doLaunch(ctx *cli.Context) error {
}
}()

if err := m.RunProvision(); err != nil {
if err := m.RunProvision(ctx.Bool("debug")); err != nil {
return errors.Wrapf(err, "Failed to run provisioning ISO")
}

Expand All @@ -151,7 +155,7 @@ func doLaunch(ctx *cli.Context) error {
return nil
}

if err := m.RunInstall(); err != nil {
if err := m.RunInstall(ctx.Bool("debug")); err != nil {
return errors.Wrapf(err, "Failed to run install ISO")
}

Expand Down
74 changes: 74 additions & 0 deletions docs/storage.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Storage for targes

Following is an example manifest.yaml showing how to specify storage
for targets:

```
storage:
- label: zot-data
persistent: true
nsgroup: "zot"
size: 30G
- label: zot-config
persistent: true
nsgroup: "zot"
size: 1G
- label: zot-tmp
persistent: false
nsgroup: "zot"
size: 1G
- label: nginx-data
persistent: true
nsgroup: "zot"
size: 1G
targets:
- service_name: zot
source: docker://zothub.io/machine/bootkit/demo-zot:0.0.4-squashfs
version: 1.0.0
nsgroup: zot
storage:
- dest: /zot
label: zot-data
- dest: /etc/zot
label: zot-config
- dest: /tmp
label: zot-tmp
- service_name: nginx
source: docker://zothub.io/machine/bootkit/demo-nginx:0.0.4-squashfs
version: 1.0.0
nsgroup: zot
storage:
- dest: /data/zot
label: zot-data
- dest: /var/lib/www
label: nginx-data
```

When a target starts up, its rootfs is an overlay of a writeable tmpfs
over the source OCI image (which itself is an overlay of dmverity-protected
squashfs images). The writeable overlays are all in a shared partition
mounted at /scratch-writes. In order to provide persistent storage
across boots, shared storage between containers, or a larger private
ephemeral storage which does not risk filling up /scratch-writes,
extra storage can be requested.

In the above example, four additional storage volumes are requested. The
30G volume called zot-data will be persistent, so its contents will be
saved across boots. In contrast, zot-tmp is not persistent, so its contents
will be deleted across reboots. All four are in the 'nsgroup zot', which
both of the targets, zot and nginx, run in. The nsgroup is a named
user namespace mapping, so uid 0 will be represented by the same host
uid (for instance 100000) for all.

Note that if nginx were not placed into nsgroup 'zot', it would still
be able to mount zot-data, however all files would appear as
owned by nobody:nogroup, and nginx would get the world access rights.

Each target now has an optional storage section, where it can
specify which volumes it should mount, and where.

On boot, the machine will first create the storage volumes, and uid-shift
them if needed. If a non-persistent volume already exists, it will be
deleted and recreated.

All storage volumes are created as ext4 filesystems.
164 changes: 145 additions & 19 deletions pkg/mosconfig/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,17 @@ import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"syscall"

"github.com/apex/log"
ispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
"github.com/project-machine/mos/pkg/trust"
"github.com/project-machine/mos/pkg/utils"

"machinerun.io/disko"
"machinerun.io/disko/partid"
)

// Update can be full, meaning all existing Targets are replaced, or
Expand Down Expand Up @@ -81,30 +88,146 @@ const (
FsService ServiceType = "fs-only"
)

type TargetStorage struct {
Dest string `json:"dest" yaml:"dest"`
Label string `json:"label" yaml:"label"`
}
type TargetStorageList []TargetStorage

// Target defines a single service. This includes the rootfs
// and every container and fs-only service.
// NSGroup is a user namespace group. Two services both in
// NSGroup 'ran' will have the same uid mapping. A service
// in NSGroup "none" (or "") runs in the host uid network.
type Target struct {
ServiceName string `json:"service_name"` // name of target
Version string `json:"version"` // docker or oci version tag
ServiceType ServiceType `json:"service_type"`
Network TargetNetwork `json:"network"`
NSGroup string `json:"nsgroup"`
Digest string `json:"digest"`
Size int64 `json:"size"`
ServiceName string `json:"service_name"` // name of target
Version string `json:"version"` // docker or oci version tag
ServiceType ServiceType `json:"service_type"`
Network TargetNetwork `json:"network"`
NSGroup string `json:"nsgroup"`
Digest string `json:"digest"`
Storage TargetStorageList `json:"storage"`
Size int64 `json:"size"`
}
type InstallTargets []Target

func (t *Target) NeedsIdmap() bool {
return t.NSGroup != "" && t.NSGroup != "none"
return needsIdmap(t.NSGroup)
}
func needsIdmap(nsgroup string) bool {
return nsgroup != "" && nsgroup != "none"
}

// Note - Storage is an interface, an implementation detail
// to abstract atomfs vs puzzlefs etc. So for the 'storage'
// information in manifest.yaml, we use StorageItem and
// StorageList.

// StorageItem is a request for a volume to be mounted into
// a Target.
type StorageItem struct {
Label string `json:"label" yaml:"label"`
Persistent bool `json:"persistent" yaml:"persistent"`
NSGroup string `json:"nsgroup" yaml:"nsgroup"`
Size uint64 `json:"size" yaml:"size"` // size in Mib
}

func (i *StorageItem) Delete(allDisks disko.DiskSet, mysys disko.System) error {
for _, d := range allDisks {
for _, p := range d.Partitions {
if p.Name == i.Label {
err := mysys.DeletePartition(d, p.Number)
if err != nil {
return err
}
return nil
}
}
}
return nil
}

func (i *StorageItem) IsReserved() bool {
for _, n := range []string{"esp", "machine-config", "machine-store", "machine-scratch"} {
if i.Label == n {
return true
}
}
return false
}

// Create a user-requested storage item if it does not yet exist.
// We accept the mos struct so we can find its nsgroup (uid mapping)
const mib, gib = disko.Mebibyte, disko.Mebibyte * 1024

func (i *StorageItem) Create(mos *Mos, allDisks disko.DiskSet, mysys disko.System) error {
size := i.Size * disko.Mebibyte
for _, d := range allDisks {
fslist := d.FreeSpacesWithMin(size)
if len(fslist) == 0 {
continue
}
num := uint(0)
for i := uint(1); i <= 128; i++ {
if _, ok := d.Partitions[i]; !ok {
num = i
break
}
}
if num == 0 {
return errors.Errorf("No free partition numbers")
}
freespace := fslist[0]
start := freespace.Start
p := disko.Partition{
Start: start,
Last: start + size - 1,
Number: num,
ID: disko.GenGUID(),
Type: partid.LinuxFS,
Name: i.Label,
}
if err := mysys.CreatePartition(d, p); err != nil {
return errors.Wrapf(err, "Failed creating storage %#v", i)
}
dev := filepath.Join("/dev", pathForPartition(d.Name, p.Number))
cmd := []string{"mkfs.ext4", "-F", dev}
if err := utils.RunCommand(cmd...); err != nil {
return errors.Wrapf(err, "Failed creating fs on %#v", i)
}

// mount
dest := filepath.Join("/storage", i.Label)
if err := utils.EnsureDir(dest); err != nil {
return errors.Wrapf(err, "Failed creating mount dir %q", dest)
}
if err := syscall.Mount(dev, dest, "ext4", 0, ""); err != nil {
return errors.Wrapf(err, "Failed mounting %#v", i)
}

// XXX - still have to chown the fs root to the i.NSGroup root uid
log.Infof("Created and mounted %#v onto %q", i, dest)
return nil
}
return errors.Errorf("Failed to find free space for %#v", i)
}

type StorageList []StorageItem

func (s StorageList) Contains(n StorageItem) bool {
for _, i := range s {
if i.Label == n.Label {
return true
}
}
return false
}

// This describes an install manifest
type InstallFile struct {
Version int `json:"version"`
Product string `json:"product"`
Storage StorageList `json:"storage"`
Targets InstallTargets `json:"targets"`
UpdateType UpdateType `json:"update_type"`
}
Expand All @@ -128,19 +251,20 @@ type SysTarget struct {
}
type SysTargets []SysTarget

func (s *SysTargets) Contains(needle SysTarget) (SysTarget, bool) {
func (s *SysTargets) Contains(needle SysTarget) bool {
for _, t := range *s {
if t.Name == needle.Name {
return t, true
return true
}
}
return SysTarget{}, false
return false
}

type SysManifest struct {
// Persistent stored information
UidMaps []IdmapSet `json:"uidmaps"`
SysTargets []SysTarget `json:"targets"`
Storage StorageList `json:"storage"`

// Runtime information
DefaultNic string
Expand Down Expand Up @@ -270,6 +394,7 @@ func (ts InstallTargets) Validate() error {
type ImportFile struct {
Version int `yaml:"version"`
Product string `yaml:"product"`
Storage StorageList `yaml:"storage"`
Targets UserTargets `yaml:"targets"`
UpdateType UpdateType `yaml:"update_type"`
}
Expand Down Expand Up @@ -313,13 +438,14 @@ func (i *ImportFile) CompleteTargets(keyProject string) (UserTargets, error) {
}

type UserTarget struct {
ServiceName string `yaml:"service_name"` // name of target
Source string `yaml:"source"` // docker url from which to fetch
Version string `yaml:"version"` // A version for internal use.
ServiceType ServiceType `yaml:"service_type"`
Network TargetNetwork `yaml:"network"`
NSGroup string `yaml:"nsgroup"`
Digest string `yaml:"digest"`
Size int64 `yaml:"size"`
ServiceName string `yaml:"service_name"` // name of target
Source string `yaml:"source"` // docker url from which to fetch
Version string `yaml:"version"` // A version for internal use.
Storage TargetStorageList `yaml:"storage"`
ServiceType ServiceType `yaml:"service_type"`
Network TargetNetwork `yaml:"network"`
NSGroup string `yaml:"nsgroup"`
Digest string `yaml:"digest"`
Size int64 `yaml:"size"`
}
type UserTargets []UserTarget
9 changes: 9 additions & 0 deletions pkg/mosconfig/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,13 @@ func PublishManifest(project, repo, destpath, manifestpath string, skipBootkit b
UpdateType: imports.UpdateType,
}

for _, s := range imports.Storage {
if s.IsReserved() {
return errors.Errorf("Invalid storage name %q", s.Label)
}
install.Storage = append(install.Storage, s)
}

// Copy each of the targets to specified oci repo,
// verify digest and size, and append them to the install
// manifest's list.
Expand Down Expand Up @@ -361,9 +368,11 @@ func PublishManifest(project, repo, destpath, manifestpath string, skipBootkit b
ServiceType: t.ServiceType,
Network: t.Network,
NSGroup: t.NSGroup,
Storage: t.Storage,
Digest: digest,
Size: size},
)
log.Infof("appending storage item %#v", t.Storage)
}

workdir, err := os.MkdirTemp("", "manifest")
Expand Down
8 changes: 7 additions & 1 deletion pkg/mosconfig/manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ func (mos *Mos) initManifest(manifestPath, manifestCert, manifestCA, configPath
dest = filepath.Join(dir, "manifest.json")
targets := SysTargets{}
uidmaps := []IdmapSet{}
s := StorageList{}

for _, t := range cf.Targets {
newT := SysTarget{
Expand All @@ -95,14 +96,19 @@ func (mos *Mos) initManifest(manifestPath, manifestCert, manifestCA, configPath
}
targets = append(targets, newT)

uidmaps = addUIDMap([]IdmapSet{}, uidmaps, t)
uidmaps = addUIDMap([]IdmapSet{}, uidmaps, t.NSGroup)
}

for _, n := range cf.Storage {
s = append(s, n)
}

sysmanifest := SysManifest{
UidMaps: uidmaps,
SysTargets: targets,
UsedPorts: make(map[uint]string),
IpAddrs: make(map[string]string),
Storage: s,
}

bytes, err := json.Marshal(&sysmanifest)
Expand Down
Loading

0 comments on commit 0e2c5b7

Please sign in to comment.