Skip to content

Commit

Permalink
Establish lock ordering for vfs.FilesystemImpl.PrependPath.
Browse files Browse the repository at this point in the history
- Add type parameter Filesystem to vfs/genericfstree, which is required to
  provide `ancestryMu sync.RWMutex`.

- Modify genericfstree.PrependPath() and genericfstree.IsDescendant() to use
  ancestryMu to ensure atomicity. For callers of genericfstree.PrependPath(),
  this means that (broader) FSImpl locks no longer need to be held during the
  call. For callers of genericfstree.IsDescendant(), this means that
  IsDescendant() now has the same atomicity as Linux's fs/dcache.c:is_subdir(),
  so remove documentation warnings about its non-atomicity.

PiperOrigin-RevId: 696273175
  • Loading branch information
nixprime authored and gvisor-bot committed Nov 14, 2024
1 parent 4f92c28 commit 73db556
Show file tree
Hide file tree
Showing 30 changed files with 235 additions and 185 deletions.
2 changes: 2 additions & 0 deletions pkg/sentry/fsimpl/erofs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ go_template_instance(
template = "//pkg/sentry/vfs/genericfstree:generic_fstree",
types = {
"Dentry": "dentry",
"Filesystem": "filesystem",
},
)

Expand Down Expand Up @@ -70,6 +71,7 @@ go_library(
"//pkg/sentry/memmap",
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/usermem",
],
)
5 changes: 4 additions & 1 deletion pkg/sentry/fsimpl/erofs/erofs.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"os"
"runtime"
"strconv"
"sync"
"sync/atomic"

"gvisor.dev/gvisor/pkg/abi/linux"
Expand All @@ -30,6 +29,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
)

// Name is the filesystem name. It is part of the interface used by users,
Expand Down Expand Up @@ -72,6 +72,9 @@ type filesystem struct {
// reduce the lock contention. Bucket is chosen based on the hash calculation
// on nid in filesystem.inodeBucket.
inodeBuckets []inodeBucket

// ancestryMu is required by genericfstree.
ancestryMu sync.RWMutex `state:"nosave"`
}

// InternalFilesystemOptions may be passed as
Expand Down
12 changes: 6 additions & 6 deletions pkg/sentry/fsimpl/erofs/filesystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -430,15 +430,15 @@ func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath,

// PrependPath implements vfs.FilesystemImpl.PrependPath.
func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b)
return genericPrependPath(fs, vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b)
}

// IsDescendant implements vfs.FilesystemImpl.IsDescendant.
func (fs *filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool {
return genericIsDescendant(fs, vfsroot.Dentry(), vd.Dentry().Impl().(*dentry))
}

// MountOptions implements vfs.FilesystemImpl.MountOptions.
func (fs *filesystem) MountOptions() string {
return fs.mopts
}

// IsDescendant implements vfs.FilesystemImpl.IsDescendant.
func (fs *filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool {
return genericIsDescendant(vfsroot.Dentry(), vd.Dentry().Impl().(*dentry))
}
1 change: 1 addition & 0 deletions pkg/sentry/fsimpl/gofer/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ go_template_instance(
template = "//pkg/sentry/vfs/genericfstree:generic_fstree",
types = {
"Dentry": "dentry",
"Filesystem": "filesystem",
},
)

Expand Down
10 changes: 5 additions & 5 deletions pkg/sentry/fsimpl/gofer/dentry_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -538,14 +538,14 @@ func (d *dentry) restoreFile(ctx context.Context, opts *vfs.CompleteRestoreOptio
inode, err := controlFD.Walk(ctx, d.name)
if err != nil {
if !dt.isDir() || !dt.forMountpoint {
return fmt.Errorf("failed to walk %q of type %x: %w", genericDebugPathname(d), dt.fileType(), err)
return fmt.Errorf("failed to walk %q of type %x: %w", genericDebugPathname(d.fs, d), dt.fileType(), err)
}

// Recreate directories that were created during volume mounting, since
// during restore we don't attempt to remount them.
inode, err = controlFD.MkdirAt(ctx, d.name, linux.FileMode(d.mode.Load()), lisafs.UID(d.uid.Load()), lisafs.GID(d.gid.Load()))
if err != nil {
return fmt.Errorf("failed to create mountpoint directory at %q: %w", genericDebugPathname(d), err)
return fmt.Errorf("failed to create mountpoint directory at %q: %w", genericDebugPathname(d.fs, d), err)
}
}
return dt.restoreFile(ctx, &inode, opts)
Expand All @@ -558,21 +558,21 @@ func (d *dentry) restoreFile(ctx context.Context, opts *vfs.CompleteRestoreOptio
})
if err != nil {
if !dt.isDir() || !dt.forMountpoint {
return fmt.Errorf("failed to walk %q of type %x: %w", genericDebugPathname(d), dt.fileType(), err)
return fmt.Errorf("failed to walk %q of type %x: %w", genericDebugPathname(d.fs, d), dt.fileType(), err)
}

// Recreate directories that were created during volume mounting, since
// during restore we don't attempt to remount them.
if err := unix.Mkdirat(controlFD, d.name, d.mode.Load()); err != nil {
return fmt.Errorf("failed to create mountpoint directory at %q: %w", genericDebugPathname(d), err)
return fmt.Errorf("failed to create mountpoint directory at %q: %w", genericDebugPathname(d.fs, d), err)
}

// Try again...
childFD, err = tryOpen(func(flags int) (int, error) {
return unix.Openat(controlFD, d.name, flags, 0)
})
if err != nil {
return fmt.Errorf("failed to open %q: %w", genericDebugPathname(d), err)
return fmt.Errorf("failed to open %q: %w", genericDebugPathname(d.fs, d), err)
}
}
return dt.restoreFile(ctx, childFD, opts)
Expand Down
14 changes: 7 additions & 7 deletions pkg/sentry/fsimpl/gofer/directfs_dentry.go
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ func (d *directfsDentry) getCreatedChild(name string, uid, gid int, isDir bool)
deleteChild := func() {
// Best effort attempt to remove the newly created child on failure.
if err := unix.Unlinkat(d.controlFD, name, unlinkFlags); err != nil {
log.Warningf("error unlinking newly created child %q after failure: %v", filepath.Join(genericDebugPathname(&d.dentry), name), err)
log.Warningf("error unlinking newly created child %q after failure: %v", filepath.Join(genericDebugPathname(d.fs, &d.dentry), name), err)
}
}

Expand Down Expand Up @@ -518,7 +518,7 @@ func (d *directfsDentry) bindAt(ctx context.Context, name string, creds *auth.Cr
hbep := opts.Endpoint.(transport.HostBoundEndpoint)
if err := hbep.SetBoundSocketFD(ctx, boundSocketFD); err != nil {
if err := unix.Unlinkat(d.controlFD, name, 0); err != nil {
log.Warningf("error unlinking newly created socket %q after failure: %v", filepath.Join(genericDebugPathname(&d.dentry), name), err)
log.Warningf("error unlinking newly created socket %q after failure: %v", filepath.Join(genericDebugPathname(d.fs, &d.dentry), name), err)
}
return nil, err
}
Expand Down Expand Up @@ -593,7 +593,7 @@ func (d *directfsDentry) getDirentsLocked(recordDirent func(name string, key ino
// TODO(gvisor.dev/issue/6665): Get rid of per-dirent stat.
stat, err := fsutil.StatAt(d.controlFD, name)
if err != nil {
log.Warningf("Getdent64: skipping file %q with failed stat, err: %v", path.Join(genericDebugPathname(&d.dentry), name), err)
log.Warningf("Getdent64: skipping file %q with failed stat, err: %v", path.Join(genericDebugPathname(d.fs, &d.dentry), name), err)
return
}
recordDirent(name, inoKeyFromStat(&stat), ftype)
Expand Down Expand Up @@ -650,7 +650,7 @@ func (d *directfsDentry) restoreFile(ctx context.Context, controlFD int, opts *v
var stat unix.Stat_t
if err := unix.Fstat(controlFD, &stat); err != nil {
_ = unix.Close(controlFD)
return fmt.Errorf("failed to stat %q: %w", genericDebugPathname(&d.dentry), err)
return fmt.Errorf("failed to stat %q: %w", genericDebugPathname(d.fs, &d.dentry), err)
}

d.controlFD = controlFD
Expand All @@ -672,12 +672,12 @@ func (d *directfsDentry) restoreFile(ctx context.Context, controlFD int, opts *v
if d.isRegularFile() {
if opts.ValidateFileSizes {
if d.size.RacyLoad() != uint64(stat.Size) {
return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: file size validation failed: size changed from %d to %d", genericDebugPathname(&d.dentry), d.size.Load(), stat.Size)}
return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: file size validation failed: size changed from %d to %d", genericDebugPathname(d.fs, &d.dentry), d.size.Load(), stat.Size)}
}
}
if opts.ValidateFileModificationTimestamps {
if want := dentryTimestampFromUnix(stat.Mtim); d.mtime.RacyLoad() != want {
return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: mtime validation failed: mtime changed from %+v to %+v", genericDebugPathname(&d.dentry), linux.NsecToStatxTimestamp(d.mtime.RacyLoad()), linux.NsecToStatxTimestamp(want))}
return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: mtime validation failed: mtime changed from %+v to %+v", genericDebugPathname(d.fs, &d.dentry), linux.NsecToStatxTimestamp(d.mtime.RacyLoad()), linux.NsecToStatxTimestamp(want))}
}
}
}
Expand All @@ -687,7 +687,7 @@ func (d *directfsDentry) restoreFile(ctx context.Context, controlFD int, opts *v

if rw, ok := d.fs.savedDentryRW[&d.dentry]; ok {
if err := d.ensureSharedHandle(ctx, rw.read, rw.write, false /* trunc */); err != nil {
return fmt.Errorf("failed to restore file handles (read=%t, write=%t) for %q: %w", rw.read, rw.write, genericDebugPathname(&d.dentry), err)
return fmt.Errorf("failed to restore file handles (read=%t, write=%t) for %q: %w", rw.read, rw.write, genericDebugPathname(d.fs, &d.dentry), err)
}
}

Expand Down
3 changes: 1 addition & 2 deletions pkg/sentry/fsimpl/gofer/directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ func (d *dentry) isDir() bool {
// +checklocks:d.childrenMu
func (d *dentry) cacheNewChildLocked(child *dentry, name string) {
d.IncRef() // reference held by child on its parent
child.parent.Store(d)
child.name = name
genericSetParentAndName(d.fs, child, d, name)
if d.children == nil {
d.children = make(map[string]*dentry)
} else if c, ok := d.children[name]; ok {
Expand Down
18 changes: 8 additions & 10 deletions pkg/sentry/fsimpl/gofer/filesystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -1418,7 +1418,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
return err
}
if renamed.isDir() {
if renamed == newParent || genericIsAncestorDentry(renamed, newParent) {
if renamed == newParent || genericIsAncestorDentry(fs, renamed, newParent) {
return linuxerr.EINVAL
}
if oldParent != newParent {
Expand Down Expand Up @@ -1456,7 +1456,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
if !renamed.isDir() {
return linuxerr.EISDIR
}
if genericIsAncestorDentry(replaced, renamed) {
if genericIsAncestorDentry(fs, replaced, renamed) {
return linuxerr.ENOTEMPTY
}
} else {
Expand Down Expand Up @@ -1738,9 +1738,12 @@ func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath,

// PrependPath implements vfs.FilesystemImpl.PrependPath.
func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
fs.renameMu.RLock()
defer fs.renameMu.RUnlock()
return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b)
return genericPrependPath(fs, vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b)
}

// IsDescendant implements vfs.FilesystemImpl.IsDescendant.
func (fs *filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool {
return genericIsDescendant(fs, vfsroot.Dentry(), vd.Dentry().Impl().(*dentry))
}

type mopt struct {
Expand Down Expand Up @@ -1804,8 +1807,3 @@ func (fs *filesystem) MountOptions() string {
}
return strings.Join(opts, ",")
}

// IsDescendant implements vfs.FilesystemImpl.IsDescendant.
func (fs *filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool {
return genericIsDescendant(vfsroot.Dentry(), vd.Dentry().Impl().(*dentry))
}
7 changes: 6 additions & 1 deletion pkg/sentry/fsimpl/gofer/gofer.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@
// dentry.childrenMu
// filesystem.syncMu
// dentry.metadataMu
// *** "memmap.Mappable locks" below this point
// *** "memmap.Mappable/MappingIdentity locks" below this point
// dentry.mapsMu
// *** "memmap.Mappable locks taken by Translate" below this point
// dentry.handleMu
// dentry.dataMu
// filesystem.ancestryMu
// filesystem.inoMu
// specialFileFD.mu
// specialFileFD.bufMu
Expand Down Expand Up @@ -218,6 +219,10 @@ type filesystem struct {
// it is reachable from its parent).
renameMu sync.RWMutex `state:"nosave"`

// ancestryMu additionally protects dentry.parent and dentry.name as
// required by genericfstree.
ancestryMu sync.RWMutex `state:"nosave"`

dentryCache *dentryCache

// syncableDentries contains all non-synthetic dentries. specialFileFDs
Expand Down
10 changes: 5 additions & 5 deletions pkg/sentry/fsimpl/gofer/lisafs_dentry.go
Original file line number Diff line number Diff line change
Expand Up @@ -547,18 +547,18 @@ func (d *lisafsDentry) restoreFile(ctx context.Context, inode *lisafs.Inode, opt
if d.isRegularFile() {
if opts.ValidateFileSizes {
if inode.Stat.Mask&linux.STATX_SIZE == 0 {
return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: file size validation failed: file size not available", genericDebugPathname(&d.dentry))}
return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: file size validation failed: file size not available", genericDebugPathname(d.fs, &d.dentry))}
}
if d.size.RacyLoad() != inode.Stat.Size {
return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: file size validation failed: size changed from %d to %d", genericDebugPathname(&d.dentry), d.size.Load(), inode.Stat.Size)}
return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: file size validation failed: size changed from %d to %d", genericDebugPathname(d.fs, &d.dentry), d.size.Load(), inode.Stat.Size)}
}
}
if opts.ValidateFileModificationTimestamps {
if inode.Stat.Mask&linux.STATX_MTIME == 0 {
return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: mtime validation failed: mtime not available", genericDebugPathname(&d.dentry))}
return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: mtime validation failed: mtime not available", genericDebugPathname(d.fs, &d.dentry))}
}
if want := dentryTimestamp(inode.Stat.Mtime); d.mtime.RacyLoad() != want {
return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: mtime validation failed: mtime changed from %+v to %+v", genericDebugPathname(&d.dentry), linux.NsecToStatxTimestamp(d.mtime.RacyLoad()), linux.NsecToStatxTimestamp(want))}
return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: mtime validation failed: mtime changed from %+v to %+v", genericDebugPathname(d.fs, &d.dentry), linux.NsecToStatxTimestamp(d.mtime.RacyLoad()), linux.NsecToStatxTimestamp(want))}
}
}
}
Expand All @@ -568,7 +568,7 @@ func (d *lisafsDentry) restoreFile(ctx context.Context, inode *lisafs.Inode, opt

if rw, ok := d.fs.savedDentryRW[&d.dentry]; ok {
if err := d.ensureSharedHandle(ctx, rw.read, rw.write, false /* trunc */); err != nil {
return fmt.Errorf("failed to restore file handles (read=%t, write=%t) for %q: %w", rw.read, rw.write, genericDebugPathname(&d.dentry), err)
return fmt.Errorf("failed to restore file handles (read=%t, write=%t) for %q: %w", rw.read, rw.write, genericDebugPathname(d.fs, &d.dentry), err)
}
}

Expand Down
6 changes: 3 additions & 3 deletions pkg/sentry/fsimpl/gofer/save_restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ func (d *dentry) prepareSaveRecursive(ctx context.Context) error {
// beforeSave is invoked by stateify.
func (d *dentry) beforeSave() {
if d.vfsd.IsDead() {
panic(fmt.Sprintf("gofer.dentry(%q).beforeSave: deleted and invalidated dentries can't be restored", genericDebugPathname(d)))
panic(fmt.Sprintf("gofer.dentry(%q).beforeSave: deleted and invalidated dentries can't be restored", genericDebugPathname(d.fs, d)))
}
}

Expand Down Expand Up @@ -260,15 +260,15 @@ func (fd *specialFileFD) completeRestore(ctx context.Context) error {
d := fd.dentry()
h, err := d.openHandle(ctx, fd.vfsfd.IsReadable(), fd.vfsfd.IsWritable(), false /* trunc */)
if err != nil {
return fmt.Errorf("failed to open handle for specialFileFD for %q: %w", genericDebugPathname(d), err)
return fmt.Errorf("failed to open handle for specialFileFD for %q: %w", genericDebugPathname(d.fs, d), err)
}
fd.handle = h

ftype := d.fileType()
fd.haveQueue = (ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK) && fd.handle.fd >= 0
if fd.haveQueue {
if err := fdnotifier.AddFD(fd.handle.fd, &fd.queue); err != nil {
return fmt.Errorf("failed to add FD to fdnotified for %q: %w", genericDebugPathname(d), err)
return fmt.Errorf("failed to add FD to fdnotified for %q: %w", genericDebugPathname(d.fs, d), err)
}
}

Expand Down
15 changes: 12 additions & 3 deletions pkg/sentry/fsimpl/kernfs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ go_template_instance(
template = "//pkg/sentry/vfs/genericfstree:generic_fstree",
types = {
"Dentry": "Dentry",
"Filesystem": "Filesystem",
},
)

Expand Down Expand Up @@ -86,10 +87,10 @@ go_template_instance(
)

declare_rwmutex(
name = "filesystem_mutex",
out = "filesystem_mutex.go",
name = "ancestry_mutex",
out = "ancestry_mutex.go",
package = "kernfs",
prefix = "filesystem",
prefix = "ancestry",
)

declare_mutex(
Expand All @@ -99,9 +100,17 @@ declare_mutex(
prefix = "deferredDecRefs",
)

declare_rwmutex(
name = "filesystem_mutex",
out = "filesystem_mutex.go",
package = "kernfs",
prefix = "filesystem",
)

go_library(
name = "kernfs",
srcs = [
"ancestry_mutex.go",
"deferred_dec_refs_mutex.go",
"dentry_list.go",
"dynamic_bytes_file.go",
Expand Down
14 changes: 6 additions & 8 deletions pkg/sentry/fsimpl/kernfs/filesystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -1089,9 +1089,12 @@ func (fs *Filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath,

// PrependPath implements vfs.FilesystemImpl.PrependPath.
func (fs *Filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
fs.mu.RLock()
defer fs.mu.RUnlock()
return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*Dentry), b)
return genericPrependPath(fs, vfsroot, vd.Mount(), vd.Dentry().Impl().(*Dentry), b)
}

// IsDescendant implements vfs.FilesystemImpl.IsDescendant.
func (fs *Filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool {
return genericIsDescendant(fs, vfsroot.Dentry(), vd.Dentry().Impl().(*Dentry))
}

func (fs *Filesystem) deferDecRefVD(ctx context.Context, vd vfs.VirtualDentry) {
Expand All @@ -1106,8 +1109,3 @@ func (fs *Filesystem) deferDecRefVD(ctx context.Context, vd vfs.VirtualDentry) {
vd.DecRef(ctx)
}
}

// IsDescendant implements vfs.FilesystemImpl.IsDescendant.
func (fs *Filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool {
return genericIsDescendant(vfsroot.Dentry(), vd.Dentry().Impl().(*Dentry))
}
6 changes: 5 additions & 1 deletion pkg/sentry/fsimpl/kernfs/kernfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ type Filesystem struct {
// fs.deferDecRef(dentry)
mu filesystemRWMutex `state:"nosave"`

// ancestryMu additionally protects dentry.parent and dentry.name as
// required by genericfstree.
ancestryMu ancestryRWMutex `state:"nosave"`

// nextInoMinusOne is used to to allocate inode numbers on this
// filesystem. Must be accessed by atomic operations.
nextInoMinusOne atomicbitops.Uint64
Expand Down Expand Up @@ -597,7 +601,7 @@ func (d *Dentry) Inode() Inode {
// filesystem.
func (d *Dentry) FSLocalPath() string {
var b fspath.Builder
_ = genericPrependPath(vfs.VirtualDentry{}, nil, d, &b)
_ = genericPrependPath(d.fs, vfs.VirtualDentry{}, nil, d, &b)
b.PrependByte('/')
return b.String()
}
Expand Down
Loading

0 comments on commit 73db556

Please sign in to comment.