diff --git a/pkg/sentry/fsimpl/erofs/BUILD b/pkg/sentry/fsimpl/erofs/BUILD index f4808f4a94..6628a65b03 100644 --- a/pkg/sentry/fsimpl/erofs/BUILD +++ b/pkg/sentry/fsimpl/erofs/BUILD @@ -14,6 +14,7 @@ go_template_instance( template = "//pkg/sentry/vfs/genericfstree:generic_fstree", types = { "Dentry": "dentry", + "Filesystem": "filesystem", }, ) @@ -70,6 +71,7 @@ go_library( "//pkg/sentry/memmap", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", + "//pkg/sync", "//pkg/usermem", ], ) diff --git a/pkg/sentry/fsimpl/erofs/erofs.go b/pkg/sentry/fsimpl/erofs/erofs.go index ef51b5db7a..670cb5e940 100644 --- a/pkg/sentry/fsimpl/erofs/erofs.go +++ b/pkg/sentry/fsimpl/erofs/erofs.go @@ -19,7 +19,6 @@ import ( "os" "runtime" "strconv" - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" @@ -30,6 +29,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" ) // Name is the filesystem name. It is part of the interface used by users, @@ -72,6 +72,9 @@ type filesystem struct { // reduce the lock contention. Bucket is chosen based on the hash calculation // on nid in filesystem.inodeBucket. inodeBuckets []inodeBucket + + // ancestryMu is required by genericfstree. + ancestryMu sync.RWMutex `state:"nosave"` } // InternalFilesystemOptions may be passed as diff --git a/pkg/sentry/fsimpl/erofs/filesystem.go b/pkg/sentry/fsimpl/erofs/filesystem.go index e2441faf34..14b5ac5bf5 100644 --- a/pkg/sentry/fsimpl/erofs/filesystem.go +++ b/pkg/sentry/fsimpl/erofs/filesystem.go @@ -430,15 +430,15 @@ func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, // PrependPath implements vfs.FilesystemImpl.PrependPath. func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { - return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) + return genericPrependPath(fs, vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) +} + +// IsDescendant implements vfs.FilesystemImpl.IsDescendant. +func (fs *filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool { + return genericIsDescendant(fs, vfsroot.Dentry(), vd.Dentry().Impl().(*dentry)) } // MountOptions implements vfs.FilesystemImpl.MountOptions. func (fs *filesystem) MountOptions() string { return fs.mopts } - -// IsDescendant implements vfs.FilesystemImpl.IsDescendant. -func (fs *filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool { - return genericIsDescendant(vfsroot.Dentry(), vd.Dentry().Impl().(*dentry)) -} diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD index 8ea15112b9..faeeb0ee2b 100644 --- a/pkg/sentry/fsimpl/gofer/BUILD +++ b/pkg/sentry/fsimpl/gofer/BUILD @@ -49,6 +49,7 @@ go_template_instance( template = "//pkg/sentry/vfs/genericfstree:generic_fstree", types = { "Dentry": "dentry", + "Filesystem": "filesystem", }, ) diff --git a/pkg/sentry/fsimpl/gofer/dentry_impl.go b/pkg/sentry/fsimpl/gofer/dentry_impl.go index 1d5c3c02d3..8a494353e6 100644 --- a/pkg/sentry/fsimpl/gofer/dentry_impl.go +++ b/pkg/sentry/fsimpl/gofer/dentry_impl.go @@ -538,14 +538,14 @@ func (d *dentry) restoreFile(ctx context.Context, opts *vfs.CompleteRestoreOptio inode, err := controlFD.Walk(ctx, d.name) if err != nil { if !dt.isDir() || !dt.forMountpoint { - return fmt.Errorf("failed to walk %q of type %x: %w", genericDebugPathname(d), dt.fileType(), err) + return fmt.Errorf("failed to walk %q of type %x: %w", genericDebugPathname(d.fs, d), dt.fileType(), err) } // Recreate directories that were created during volume mounting, since // during restore we don't attempt to remount them. inode, err = controlFD.MkdirAt(ctx, d.name, linux.FileMode(d.mode.Load()), lisafs.UID(d.uid.Load()), lisafs.GID(d.gid.Load())) if err != nil { - return fmt.Errorf("failed to create mountpoint directory at %q: %w", genericDebugPathname(d), err) + return fmt.Errorf("failed to create mountpoint directory at %q: %w", genericDebugPathname(d.fs, d), err) } } return dt.restoreFile(ctx, &inode, opts) @@ -558,13 +558,13 @@ func (d *dentry) restoreFile(ctx context.Context, opts *vfs.CompleteRestoreOptio }) if err != nil { if !dt.isDir() || !dt.forMountpoint { - return fmt.Errorf("failed to walk %q of type %x: %w", genericDebugPathname(d), dt.fileType(), err) + return fmt.Errorf("failed to walk %q of type %x: %w", genericDebugPathname(d.fs, d), dt.fileType(), err) } // Recreate directories that were created during volume mounting, since // during restore we don't attempt to remount them. if err := unix.Mkdirat(controlFD, d.name, d.mode.Load()); err != nil { - return fmt.Errorf("failed to create mountpoint directory at %q: %w", genericDebugPathname(d), err) + return fmt.Errorf("failed to create mountpoint directory at %q: %w", genericDebugPathname(d.fs, d), err) } // Try again... @@ -572,7 +572,7 @@ func (d *dentry) restoreFile(ctx context.Context, opts *vfs.CompleteRestoreOptio return unix.Openat(controlFD, d.name, flags, 0) }) if err != nil { - return fmt.Errorf("failed to open %q: %w", genericDebugPathname(d), err) + return fmt.Errorf("failed to open %q: %w", genericDebugPathname(d.fs, d), err) } } return dt.restoreFile(ctx, childFD, opts) diff --git a/pkg/sentry/fsimpl/gofer/directfs_dentry.go b/pkg/sentry/fsimpl/gofer/directfs_dentry.go index 7cc2682892..9d2e8847a8 100644 --- a/pkg/sentry/fsimpl/gofer/directfs_dentry.go +++ b/pkg/sentry/fsimpl/gofer/directfs_dentry.go @@ -452,7 +452,7 @@ func (d *directfsDentry) getCreatedChild(name string, uid, gid int, isDir bool) deleteChild := func() { // Best effort attempt to remove the newly created child on failure. if err := unix.Unlinkat(d.controlFD, name, unlinkFlags); err != nil { - log.Warningf("error unlinking newly created child %q after failure: %v", filepath.Join(genericDebugPathname(&d.dentry), name), err) + log.Warningf("error unlinking newly created child %q after failure: %v", filepath.Join(genericDebugPathname(d.fs, &d.dentry), name), err) } } @@ -518,7 +518,7 @@ func (d *directfsDentry) bindAt(ctx context.Context, name string, creds *auth.Cr hbep := opts.Endpoint.(transport.HostBoundEndpoint) if err := hbep.SetBoundSocketFD(ctx, boundSocketFD); err != nil { if err := unix.Unlinkat(d.controlFD, name, 0); err != nil { - log.Warningf("error unlinking newly created socket %q after failure: %v", filepath.Join(genericDebugPathname(&d.dentry), name), err) + log.Warningf("error unlinking newly created socket %q after failure: %v", filepath.Join(genericDebugPathname(d.fs, &d.dentry), name), err) } return nil, err } @@ -593,7 +593,7 @@ func (d *directfsDentry) getDirentsLocked(recordDirent func(name string, key ino // TODO(gvisor.dev/issue/6665): Get rid of per-dirent stat. stat, err := fsutil.StatAt(d.controlFD, name) if err != nil { - log.Warningf("Getdent64: skipping file %q with failed stat, err: %v", path.Join(genericDebugPathname(&d.dentry), name), err) + log.Warningf("Getdent64: skipping file %q with failed stat, err: %v", path.Join(genericDebugPathname(d.fs, &d.dentry), name), err) return } recordDirent(name, inoKeyFromStat(&stat), ftype) @@ -650,7 +650,7 @@ func (d *directfsDentry) restoreFile(ctx context.Context, controlFD int, opts *v var stat unix.Stat_t if err := unix.Fstat(controlFD, &stat); err != nil { _ = unix.Close(controlFD) - return fmt.Errorf("failed to stat %q: %w", genericDebugPathname(&d.dentry), err) + return fmt.Errorf("failed to stat %q: %w", genericDebugPathname(d.fs, &d.dentry), err) } d.controlFD = controlFD @@ -672,12 +672,12 @@ func (d *directfsDentry) restoreFile(ctx context.Context, controlFD int, opts *v if d.isRegularFile() { if opts.ValidateFileSizes { if d.size.RacyLoad() != uint64(stat.Size) { - return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: file size validation failed: size changed from %d to %d", genericDebugPathname(&d.dentry), d.size.Load(), stat.Size)} + return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: file size validation failed: size changed from %d to %d", genericDebugPathname(d.fs, &d.dentry), d.size.Load(), stat.Size)} } } if opts.ValidateFileModificationTimestamps { if want := dentryTimestampFromUnix(stat.Mtim); d.mtime.RacyLoad() != want { - return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: mtime validation failed: mtime changed from %+v to %+v", genericDebugPathname(&d.dentry), linux.NsecToStatxTimestamp(d.mtime.RacyLoad()), linux.NsecToStatxTimestamp(want))} + return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: mtime validation failed: mtime changed from %+v to %+v", genericDebugPathname(d.fs, &d.dentry), linux.NsecToStatxTimestamp(d.mtime.RacyLoad()), linux.NsecToStatxTimestamp(want))} } } } @@ -687,7 +687,7 @@ func (d *directfsDentry) restoreFile(ctx context.Context, controlFD int, opts *v if rw, ok := d.fs.savedDentryRW[&d.dentry]; ok { if err := d.ensureSharedHandle(ctx, rw.read, rw.write, false /* trunc */); err != nil { - return fmt.Errorf("failed to restore file handles (read=%t, write=%t) for %q: %w", rw.read, rw.write, genericDebugPathname(&d.dentry), err) + return fmt.Errorf("failed to restore file handles (read=%t, write=%t) for %q: %w", rw.read, rw.write, genericDebugPathname(d.fs, &d.dentry), err) } } diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go index 1531fe3a01..31c7d9c62a 100644 --- a/pkg/sentry/fsimpl/gofer/directory.go +++ b/pkg/sentry/fsimpl/gofer/directory.go @@ -51,8 +51,7 @@ func (d *dentry) isDir() bool { // +checklocks:d.childrenMu func (d *dentry) cacheNewChildLocked(child *dentry, name string) { d.IncRef() // reference held by child on its parent - child.parent.Store(d) - child.name = name + genericSetParentAndName(d.fs, child, d, name) if d.children == nil { d.children = make(map[string]*dentry) } else if c, ok := d.children[name]; ok { diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index af81a4570b..87a6f37b4c 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -1418,7 +1418,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa return err } if renamed.isDir() { - if renamed == newParent || genericIsAncestorDentry(renamed, newParent) { + if renamed == newParent || genericIsAncestorDentry(fs, renamed, newParent) { return linuxerr.EINVAL } if oldParent != newParent { @@ -1456,7 +1456,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa if !renamed.isDir() { return linuxerr.EISDIR } - if genericIsAncestorDentry(replaced, renamed) { + if genericIsAncestorDentry(fs, replaced, renamed) { return linuxerr.ENOTEMPTY } } else { @@ -1738,9 +1738,12 @@ func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, // PrependPath implements vfs.FilesystemImpl.PrependPath. func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { - fs.renameMu.RLock() - defer fs.renameMu.RUnlock() - return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) + return genericPrependPath(fs, vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) +} + +// IsDescendant implements vfs.FilesystemImpl.IsDescendant. +func (fs *filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool { + return genericIsDescendant(fs, vfsroot.Dentry(), vd.Dentry().Impl().(*dentry)) } type mopt struct { @@ -1804,8 +1807,3 @@ func (fs *filesystem) MountOptions() string { } return strings.Join(opts, ",") } - -// IsDescendant implements vfs.FilesystemImpl.IsDescendant. -func (fs *filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool { - return genericIsDescendant(vfsroot.Dentry(), vd.Dentry().Impl().(*dentry)) -} diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 297e983ba3..d3faea5f7a 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -25,11 +25,12 @@ // dentry.childrenMu // filesystem.syncMu // dentry.metadataMu -// *** "memmap.Mappable locks" below this point +// *** "memmap.Mappable/MappingIdentity locks" below this point // dentry.mapsMu // *** "memmap.Mappable locks taken by Translate" below this point // dentry.handleMu // dentry.dataMu +// filesystem.ancestryMu // filesystem.inoMu // specialFileFD.mu // specialFileFD.bufMu @@ -218,6 +219,10 @@ type filesystem struct { // it is reachable from its parent). renameMu sync.RWMutex `state:"nosave"` + // ancestryMu additionally protects dentry.parent and dentry.name as + // required by genericfstree. + ancestryMu sync.RWMutex `state:"nosave"` + dentryCache *dentryCache // syncableDentries contains all non-synthetic dentries. specialFileFDs diff --git a/pkg/sentry/fsimpl/gofer/lisafs_dentry.go b/pkg/sentry/fsimpl/gofer/lisafs_dentry.go index 28bf85add4..e9a5a1dd28 100644 --- a/pkg/sentry/fsimpl/gofer/lisafs_dentry.go +++ b/pkg/sentry/fsimpl/gofer/lisafs_dentry.go @@ -547,18 +547,18 @@ func (d *lisafsDentry) restoreFile(ctx context.Context, inode *lisafs.Inode, opt if d.isRegularFile() { if opts.ValidateFileSizes { if inode.Stat.Mask&linux.STATX_SIZE == 0 { - return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: file size validation failed: file size not available", genericDebugPathname(&d.dentry))} + return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: file size validation failed: file size not available", genericDebugPathname(d.fs, &d.dentry))} } if d.size.RacyLoad() != inode.Stat.Size { - return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: file size validation failed: size changed from %d to %d", genericDebugPathname(&d.dentry), d.size.Load(), inode.Stat.Size)} + return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: file size validation failed: size changed from %d to %d", genericDebugPathname(d.fs, &d.dentry), d.size.Load(), inode.Stat.Size)} } } if opts.ValidateFileModificationTimestamps { if inode.Stat.Mask&linux.STATX_MTIME == 0 { - return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: mtime validation failed: mtime not available", genericDebugPathname(&d.dentry))} + return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: mtime validation failed: mtime not available", genericDebugPathname(d.fs, &d.dentry))} } if want := dentryTimestamp(inode.Stat.Mtime); d.mtime.RacyLoad() != want { - return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: mtime validation failed: mtime changed from %+v to %+v", genericDebugPathname(&d.dentry), linux.NsecToStatxTimestamp(d.mtime.RacyLoad()), linux.NsecToStatxTimestamp(want))} + return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: mtime validation failed: mtime changed from %+v to %+v", genericDebugPathname(d.fs, &d.dentry), linux.NsecToStatxTimestamp(d.mtime.RacyLoad()), linux.NsecToStatxTimestamp(want))} } } } @@ -568,7 +568,7 @@ func (d *lisafsDentry) restoreFile(ctx context.Context, inode *lisafs.Inode, opt if rw, ok := d.fs.savedDentryRW[&d.dentry]; ok { if err := d.ensureSharedHandle(ctx, rw.read, rw.write, false /* trunc */); err != nil { - return fmt.Errorf("failed to restore file handles (read=%t, write=%t) for %q: %w", rw.read, rw.write, genericDebugPathname(&d.dentry), err) + return fmt.Errorf("failed to restore file handles (read=%t, write=%t) for %q: %w", rw.read, rw.write, genericDebugPathname(d.fs, &d.dentry), err) } } diff --git a/pkg/sentry/fsimpl/gofer/save_restore.go b/pkg/sentry/fsimpl/gofer/save_restore.go index 459ac65fb8..66bde23d17 100644 --- a/pkg/sentry/fsimpl/gofer/save_restore.go +++ b/pkg/sentry/fsimpl/gofer/save_restore.go @@ -130,7 +130,7 @@ func (d *dentry) prepareSaveRecursive(ctx context.Context) error { // beforeSave is invoked by stateify. func (d *dentry) beforeSave() { if d.vfsd.IsDead() { - panic(fmt.Sprintf("gofer.dentry(%q).beforeSave: deleted and invalidated dentries can't be restored", genericDebugPathname(d))) + panic(fmt.Sprintf("gofer.dentry(%q).beforeSave: deleted and invalidated dentries can't be restored", genericDebugPathname(d.fs, d))) } } @@ -260,7 +260,7 @@ func (fd *specialFileFD) completeRestore(ctx context.Context) error { d := fd.dentry() h, err := d.openHandle(ctx, fd.vfsfd.IsReadable(), fd.vfsfd.IsWritable(), false /* trunc */) if err != nil { - return fmt.Errorf("failed to open handle for specialFileFD for %q: %w", genericDebugPathname(d), err) + return fmt.Errorf("failed to open handle for specialFileFD for %q: %w", genericDebugPathname(d.fs, d), err) } fd.handle = h @@ -268,7 +268,7 @@ func (fd *specialFileFD) completeRestore(ctx context.Context) error { fd.haveQueue = (ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK) && fd.handle.fd >= 0 if fd.haveQueue { if err := fdnotifier.AddFD(fd.handle.fd, &fd.queue); err != nil { - return fmt.Errorf("failed to add FD to fdnotified for %q: %w", genericDebugPathname(d), err) + return fmt.Errorf("failed to add FD to fdnotified for %q: %w", genericDebugPathname(d.fs, d), err) } } diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD index 29f7453e41..fb936629a9 100644 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ b/pkg/sentry/fsimpl/kernfs/BUILD @@ -26,6 +26,7 @@ go_template_instance( template = "//pkg/sentry/vfs/genericfstree:generic_fstree", types = { "Dentry": "Dentry", + "Filesystem": "Filesystem", }, ) @@ -86,10 +87,10 @@ go_template_instance( ) declare_rwmutex( - name = "filesystem_mutex", - out = "filesystem_mutex.go", + name = "ancestry_mutex", + out = "ancestry_mutex.go", package = "kernfs", - prefix = "filesystem", + prefix = "ancestry", ) declare_mutex( @@ -99,9 +100,17 @@ declare_mutex( prefix = "deferredDecRefs", ) +declare_rwmutex( + name = "filesystem_mutex", + out = "filesystem_mutex.go", + package = "kernfs", + prefix = "filesystem", +) + go_library( name = "kernfs", srcs = [ + "ancestry_mutex.go", "deferred_dec_refs_mutex.go", "dentry_list.go", "dynamic_bytes_file.go", diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 30e5cc4e26..1297d0b3f7 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -1089,9 +1089,12 @@ func (fs *Filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, // PrependPath implements vfs.FilesystemImpl.PrependPath. func (fs *Filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { - fs.mu.RLock() - defer fs.mu.RUnlock() - return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*Dentry), b) + return genericPrependPath(fs, vfsroot, vd.Mount(), vd.Dentry().Impl().(*Dentry), b) +} + +// IsDescendant implements vfs.FilesystemImpl.IsDescendant. +func (fs *Filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool { + return genericIsDescendant(fs, vfsroot.Dentry(), vd.Dentry().Impl().(*Dentry)) } func (fs *Filesystem) deferDecRefVD(ctx context.Context, vd vfs.VirtualDentry) { @@ -1106,8 +1109,3 @@ func (fs *Filesystem) deferDecRefVD(ctx context.Context, vd vfs.VirtualDentry) { vd.DecRef(ctx) } } - -// IsDescendant implements vfs.FilesystemImpl.IsDescendant. -func (fs *Filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool { - return genericIsDescendant(vfsroot.Dentry(), vd.Dentry().Impl().(*Dentry)) -} diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index 9059d05849..8512411cf9 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -111,6 +111,10 @@ type Filesystem struct { // fs.deferDecRef(dentry) mu filesystemRWMutex `state:"nosave"` + // ancestryMu additionally protects dentry.parent and dentry.name as + // required by genericfstree. + ancestryMu ancestryRWMutex `state:"nosave"` + // nextInoMinusOne is used to to allocate inode numbers on this // filesystem. Must be accessed by atomic operations. nextInoMinusOne atomicbitops.Uint64 @@ -597,7 +601,7 @@ func (d *Dentry) Inode() Inode { // filesystem. func (d *Dentry) FSLocalPath() string { var b fspath.Builder - _ = genericPrependPath(vfs.VirtualDentry{}, nil, d, &b) + _ = genericPrependPath(d.fs, vfs.VirtualDentry{}, nil, d, &b) b.PrependByte('/') return b.String() } diff --git a/pkg/sentry/fsimpl/overlay/BUILD b/pkg/sentry/fsimpl/overlay/BUILD index 2353d0b43a..2b54607867 100644 --- a/pkg/sentry/fsimpl/overlay/BUILD +++ b/pkg/sentry/fsimpl/overlay/BUILD @@ -6,6 +6,13 @@ package(default_applicable_licenses = ["//:license"]) licenses(["notice"]) +declare_rwmutex( + name = "ancestry_rwmutex", + out = "ancestry_rwmutex.go", + package = "overlay", + prefix = "ancestry", +) + declare_mutex( name = "dir_mutex", out = "dir_mutex.go", @@ -75,12 +82,14 @@ go_template_instance( template = "//pkg/sentry/vfs/genericfstree:generic_fstree", types = { "Dentry": "dentry", + "Filesystem": "filesystem", }, ) go_library( name = "overlay", srcs = [ + "ancestry_rwmutex.go", "copy_up.go", "data_rwmutex.go", "dev_mutex.go", diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go index aa440b40aa..21407e95ba 100644 --- a/pkg/sentry/fsimpl/overlay/filesystem.go +++ b/pkg/sentry/fsimpl/overlay/filesystem.go @@ -1152,7 +1152,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa return err } if renamed.isDir() { - if renamed == newParent || genericIsAncestorDentry(renamed, newParent) { + if renamed == newParent || genericIsAncestorDentry(fs, renamed, newParent) { return linuxerr.EINVAL } if oldParent != newParent { @@ -1195,7 +1195,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa if !renamed.isDir() { return linuxerr.EISDIR } - if genericIsAncestorDentry(replaced, renamed) { + if genericIsAncestorDentry(fs, replaced, renamed) { return linuxerr.ENOTEMPTY } replaced.dirMu.NestedLock(dirLockReplaced) @@ -1336,9 +1336,8 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa oldParent.DecRef(ctx) ds = appendDentry(ds, oldParent) newParent.IncRef() - renamed.parent.Store(newParent) } - renamed.name = newName + genericSetParentAndName(fs, renamed, newParent, newName) if newParent.children == nil { newParent.children = make(map[string]*dentry) } @@ -1879,9 +1878,12 @@ func (fs *filesystem) removeXattrLocked(ctx context.Context, d *dentry, mnt *vfs // PrependPath implements vfs.FilesystemImpl.PrependPath. func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { - fs.renameMu.RLock() - defer fs.renameMu.RUnlock() - return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) + return genericPrependPath(fs, vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) +} + +// IsDescendant implements vfs.FilesystemImpl.IsDescendant. +func (fs *filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool { + return genericIsDescendant(fs, vfsroot.Dentry(), vd.Dentry().Impl().(*dentry)) } // MountOptions implements vfs.FilesystemImpl.MountOptions. diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go index a99277267b..d4d0e7735c 100644 --- a/pkg/sentry/fsimpl/overlay/overlay.go +++ b/pkg/sentry/fsimpl/overlay/overlay.go @@ -23,10 +23,11 @@ // dentry.dirMu // dentry.copyMu // filesystem.devMu -// *** "memmap.Mappable locks" below this point +// *** "memmap.Mappable/MappingIdentity locks" below this point // dentry.mapsMu // *** "memmap.Mappable locks taken by Translate" below this point // dentry.dataMu +// filesystem.ancestryMu // // Locking dentry.dirMu in multiple dentries requires that parent dentries are // locked before child dentries, and that filesystem.renameMu is locked to @@ -117,6 +118,10 @@ type filesystem struct { // dentries. renameMu renameRWMutex `state:"nosave"` + // ancestryMu additionally protects dentry.parent and dentry.name as + // required by genericfstree. + ancestryMu ancestryRWMutex `state:"nosave"` + // dirInoCache caches overlay-private directory inode numbers by mapped // bottommost device numbers and inode number. dirInoCache is protected by // dirInoCacheMu. @@ -482,11 +487,6 @@ func (fs *filesystem) getLowerDevMinor(layerMajor, layerMinor uint32) (uint32, e return minor, nil } -// IsDescendant implements vfs.FilesystemImpl.IsDescendant. -func (fs *filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool { - return genericIsDescendant(vfsroot.Dentry(), vd.Dentry().Impl().(*dentry)) -} - // dentry implements vfs.DentryImpl. // // +stateify savable diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index b08cf3a156..38b955b9b7 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -26,6 +26,7 @@ go_template_instance( template = "//pkg/sentry/vfs/genericfstree:generic_fstree", types = { "Dentry": "dentry", + "Filesystem": "filesystem", }, ) @@ -40,18 +41,25 @@ go_template_instance( }, ) -declare_mutex( - name = "inode_mutex", - out = "inode_mutex.go", +declare_rwmutex( + name = "ancestry_mutex", + out = "ancestry_mutex.go", package = "tmpfs", - prefix = "inode", + prefix = "ancestry", +) + +declare_rwmutex( + name = "filesystem_mutex", + out = "filesystem_mutex.go", + package = "tmpfs", + prefix = "filesystem", ) declare_mutex( - name = "pages_used_mutex", - out = "pages_used_mutex.go", + name = "inode_mutex", + out = "inode_mutex.go", package = "tmpfs", - prefix = "pagesUsed", + prefix = "inode", ) declare_mutex( @@ -61,16 +69,17 @@ declare_mutex( prefix = "iter", ) -declare_rwmutex( - name = "filesystem_mutex", - out = "filesystem_mutex.go", +declare_mutex( + name = "pages_used_mutex", + out = "pages_used_mutex.go", package = "tmpfs", - prefix = "filesystem", + prefix = "pagesUsed", ) go_library( name = "tmpfs", srcs = [ + "ancestry_mutex.go", "dentry_list.go", "device_file.go", "directory.go", diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go index c2ae5ebda9..2f26403de3 100644 --- a/pkg/sentry/fsimpl/tmpfs/directory.go +++ b/pkg/sentry/fsimpl/tmpfs/directory.go @@ -60,8 +60,7 @@ func (fs *filesystem) newDirectory(kuid auth.KUID, kgid auth.KGID, mode linux.Fi // - filesystem.mu must be locked for writing. // - dir must not already contain a child with the given name. func (dir *directory) insertChildLocked(child *dentry, name string) { - child.parent.Store(&dir.dentry) - child.name = name + genericSetParentAndName(dir.dentry.inode.fs, child, &dir.dentry, name) if dir.childMap == nil { dir.childMap = make(map[string]*dentry) } diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 52162d9ecf..eb71dab701 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -580,7 +580,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa // mount point then we want to rename the mount point, not anything in the // mounted filesystem. if renamed.inode.isDir() { - if renamed == &newParentDir.dentry || genericIsAncestorDentry(renamed, &newParentDir.dentry) { + if renamed == &newParentDir.dentry || genericIsAncestorDentry(fs, renamed, &newParentDir.dentry) { return linuxerr.EINVAL } if oldParentDir != newParentDir { @@ -936,37 +936,31 @@ func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, // PrependPath implements vfs.FilesystemImpl.PrependPath. func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { - fs.mu.RLock() - defer fs.mu.RUnlock() - mnt := vd.Mount() d := vd.Dentry().Impl().(*dentry) - for { - if mnt == vfsroot.Mount() && &d.vfsd == vfsroot.Dentry() { - return vfs.PrependPathAtVFSRootError{} - } - if mnt != nil && &d.vfsd == mnt.Root() { - return nil + if d.parent.Load() == nil { + fs.ancestryMu.Lock() + name := d.name + fs.ancestryMu.Unlock() + if name != "" { + // This file must have been created by + // newUnlinkedRegularFileDescription(). In Linux, + // mm/shmem.c:__shmem_file_setup() => + // fs/file_table.c:alloc_file_pseudo() sets the created + // dentry's dentry_operations to anon_ops, for which d_dname == + // simple_dname. fs/d_path.c:simple_dname() defines the + // dentry's pathname to be its name, prefixed with "/" and + // suffixed with " (deleted)". + b.PrependComponent("/" + name) + b.AppendString(" (deleted)") + return vfs.PrependPathSyntheticError{} } - parent := d.parent.Load() - if parent == nil { - if d.name != "" { - // This file must have been created by - // newUnlinkedRegularFileDescription(). In Linux, - // mm/shmem.c:__shmem_file_setup() => - // fs/file_table.c:alloc_file_pseudo() sets the created - // dentry's dentry_operations to anon_ops, for which d_dname == - // simple_dname. fs/d_path.c:simple_dname() defines the - // dentry's pathname to be its name, prefixed with "/" and - // suffixed with " (deleted)". - b.PrependComponent("/" + d.name) - b.AppendString(" (deleted)") - return vfs.PrependPathSyntheticError{} - } - return vfs.PrependPathAtNonMountRootError{} - } - b.PrependComponent(d.name) - d = parent } + return genericPrependPath(fs, vfsroot, vd.Mount(), d, b) +} + +// IsDescendant implements vfs.FilesystemImpl.IsDescendant. +func (fs *filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool { + return genericIsDescendant(fs, vfsroot.Dentry(), vd.Dentry().Impl().(*dentry)) } // MountOptions implements vfs.FilesystemImpl.MountOptions. @@ -974,11 +968,6 @@ func (fs *filesystem) MountOptions() string { return fs.mopts } -// IsDescendant implements vfs.FilesystemImpl.IsDescendant. -func (fs *filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool { - return genericIsDescendant(vfsroot.Dentry(), vd.Dentry().Impl().(*dentry)) -} - // adjustPageAcct adjusts the accounting done against filesystem size limit in // case there is any discrepancy between the number of pages reserved vs the // number of pages actually allocated. diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index b46456017f..8a75f27f4b 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -20,11 +20,12 @@ // filesystem.mu // inode.mu // regularFileFD.offMu -// *** "memmap.Mappable locks" below this point +// *** "memmap.Mappable/MappingIdentity locks" below this point // regularFile.mapsMu // *** "memmap.Mappable locks taken by Translate" below this point // regularFile.dataMu // fs.pagesUsedMu +// filesystem.ancestryMu // directory.iterMu package tmpfs @@ -46,6 +47,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sentry/vfs/memxattr" + "gvisor.dev/gvisor/pkg/sync" ) // Name is the default filesystem name. @@ -83,6 +85,10 @@ type filesystem struct { // mu serializes changes to the Dentry tree. mu filesystemRWMutex `state:"nosave"` + // ancestryMu additionally protects dentry.parent and dentry.name as + // required by genericfstree. + ancestryMu sync.RWMutex `state:"nosave"` + nextInoMinusOne atomicbitops.Uint64 // accessed using atomic memory operations root *dentry diff --git a/pkg/sentry/mm/debug.go b/pkg/sentry/mm/debug.go index 3fff52dafc..d927b17026 100644 --- a/pkg/sentry/mm/debug.go +++ b/pkg/sentry/mm/debug.go @@ -42,10 +42,8 @@ func (mm *MemoryManager) String() string { func (mm *MemoryManager) DebugString(ctx context.Context) string { var b bytes.Buffer - // FIXME(b/235153601): Need to replace RLockBypass with RLockBypass - // after fixing b/235153601. - mm.mappingMu.RLockBypass() - defer mm.mappingMu.RUnlockBypass() + mm.mappingMu.RLock() + defer mm.mappingMu.RUnlock() b.WriteString("VMAs:\n") for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() { b.Write(mm.vmaMapsEntryLocked(ctx, vseg)) diff --git a/pkg/sentry/mm/procfs.go b/pkg/sentry/mm/procfs.go index 20f436ec90..5b23623b60 100644 --- a/pkg/sentry/mm/procfs.go +++ b/pkg/sentry/mm/procfs.go @@ -78,13 +78,10 @@ func (mm *MemoryManager) MapsCallbackFuncForBuffer(buf *bytes.Buffer) MapsCallba // ReadMapsDataInto is called by fsimpl/proc.mapsData.Generate to // implement /proc/[pid]/maps. func (mm *MemoryManager) ReadMapsDataInto(ctx context.Context, fn MapsCallbackFunc) { - // FIXME(b/235153601): Need to replace RLockBypass with RLockBypass - // after fixing b/235153601. - mm.mappingMu.RLockBypass() - defer mm.mappingMu.RUnlockBypass() - var start hostarch.Addr + mm.mappingMu.RLock() + defer mm.mappingMu.RUnlock() - for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() { + for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() { mm.appendVMAMapsEntryLocked(ctx, vseg, fn) } @@ -95,11 +92,7 @@ func (mm *MemoryManager) ReadMapsDataInto(ctx context.Context, fn MapsCallbackFu // something is really mapped in the tiny ~10 MiB segment afterwards, we'll // get the sorting on the maps file wrong at worst; but that's not possible // on any current platform). - // - // Artificially adjust the seqfile handle so we only output vsyscall entry once. - if start != vsyscallEnd { - fn(hostarch.Addr(0xffffffffff600000), hostarch.Addr(0xffffffffff601000), hostarch.ReadExecute, "p", 0, 0, 0, 0, "[vsyscall]") - } + fn(hostarch.Addr(0xffffffffff600000), hostarch.Addr(0xffffffffff601000), hostarch.ReadExecute, "p", 0, 0, 0, 0, "[vsyscall]") } // vmaMapsEntryLocked returns a /proc/[pid]/maps entry for the vma iterated by @@ -133,11 +126,6 @@ func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaI if vma.hint != "" { path = vma.hint } else if vma.id != nil { - // FIXME(jamieliu): We are holding mm.mappingMu here, which is - // consistent with Linux's holding mmap_sem in - // fs/proc/task_mmu.c:show_map_vma() => fs/seq_file.c:seq_file_path(). - // However, it's not clear that fs.File.MappedName() is actually - // consistent with this lock order. path = vma.id.MappedName(ctx) } fn(vseg.Start(), vseg.End(), vma.realPerms, private, vma.off, devMajor, devMinor, ino, path) @@ -146,21 +134,16 @@ func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaI // ReadSmapsDataInto is called by fsimpl/proc.smapsData.Generate to // implement /proc/[pid]/maps. func (mm *MemoryManager) ReadSmapsDataInto(ctx context.Context, buf *bytes.Buffer) { - // FIXME(b/235153601): Need to replace RLockBypass with RLockBypass - // after fixing b/235153601. - mm.mappingMu.RLockBypass() - defer mm.mappingMu.RUnlockBypass() - var start hostarch.Addr + mm.mappingMu.RLock() + defer mm.mappingMu.RUnlock() - for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() { + for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() { mm.vmaSmapsEntryIntoLocked(ctx, vseg, buf) } // We always emulate vsyscall, so advertise it here. See // ReadMapsSeqFileData for additional commentary. - if start != vsyscallEnd { - buf.WriteString(vsyscallSmapsEntry) - } + buf.WriteString(vsyscallSmapsEntry) } // vmaSmapsEntryLocked returns a /proc/[pid]/smaps entry for the vma iterated diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go index 187770b2ad..f42b7f7464 100644 --- a/pkg/sentry/vfs/anonfs.go +++ b/pkg/sentry/vfs/anonfs.go @@ -290,6 +290,11 @@ func (fs *anonFilesystem) RemoveXattrAt(ctx context.Context, rp *ResolvingPath, return linuxerr.EPERM } +// IsDescendant implements FilesystemImpl.IsDescendant. +func (fs *anonFilesystem) IsDescendant(vfsroot, vd VirtualDentry) bool { + return vfsroot == vd +} + // PrependPath implements FilesystemImpl.PrependPath. func (fs *anonFilesystem) PrependPath(ctx context.Context, vfsroot, vd VirtualDentry, b *fspath.Builder) error { b.PrependComponent(fmt.Sprintf("anon_inode:%s", vd.dentry.impl.(*anonDentry).name)) @@ -301,9 +306,6 @@ func (fs *anonFilesystem) MountOptions() string { return "" } -// IsDescendant implements FilesystemImpl.IsDescendant. -func (fs *anonFilesystem) IsDescendant(vfsroot, vd VirtualDentry) bool { return vfsroot == vd } - // IncRef implements DentryImpl.IncRef. func (d *anonDentry) IncRef() { // no-op diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index f2647ea1ca..8a7c763a07 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -496,13 +496,19 @@ type FilesystemImpl interface { // an arbitrary descriptive string to b and then return a // PrependPathSyntheticError. // - // Most implementations can acquire the appropriate locks to ensure that - // Dentry.Name() and Dentry.Parent() are fixed for vd.Dentry() and all of - // its ancestors, then call GenericPrependPath. + // Most implementations can use genericfstree.PrependPath. // // Preconditions: vd.Mount().Filesystem().Impl() == this FilesystemImpl. PrependPath(ctx context.Context, vfsroot, vd VirtualDentry, b *fspath.Builder) error + // IsDescendant returns true if vd is a descendant of vfsroot or if vd and + // vfsroot are the same dentry. + // + // Most implementations can use genericfstree.IsDescendant. + // + // Preconditions: vd.Mount().Filesystem().Impl() == this FilesystemImpl. + IsDescendant(vfsroot, vd VirtualDentry) bool + // MountOptions returns mount options for the current filesystem. This // should only return options specific to the filesystem (i.e. don't return // "ro", "rw", etc). Options should be returned as a comma-separated string, @@ -511,15 +517,6 @@ type FilesystemImpl interface { // If the implementation has no filesystem-specific options, it should // return the empty string. MountOptions() string - - // IsDescendant returns true if vd is a descendant of vfsroot or if vd and - // vfsroot are the same dentry. The method does not take filesystem locks when - // accessing the parents of each dentry, so it's possible for parents to be - // mutated concurrently during a call to IsDescendant. Callers should take - // appropriate caution when using this method. - // - // Preconditions: vd.Mount().Filesystem().Impl() == this FilesystemImpl. - IsDescendant(vfsroot, vd VirtualDentry) bool } // PrependPathAtVFSRootError is returned by implementations of diff --git a/pkg/sentry/vfs/genericfstree/BUILD b/pkg/sentry/vfs/genericfstree/BUILD index 5a8e55cc1d..73cc9100cf 100644 --- a/pkg/sentry/vfs/genericfstree/BUILD +++ b/pkg/sentry/vfs/genericfstree/BUILD @@ -13,5 +13,6 @@ go_template( ], types = [ "Dentry", + "Filesystem", ], ) diff --git a/pkg/sentry/vfs/genericfstree/genericfstree.go b/pkg/sentry/vfs/genericfstree/genericfstree.go index b4dd7c0886..2d4b52fd21 100644 --- a/pkg/sentry/vfs/genericfstree/genericfstree.go +++ b/pkg/sentry/vfs/genericfstree/genericfstree.go @@ -13,12 +13,18 @@ // limitations under the License. // Package genericfstree provides tools for implementing vfs.FilesystemImpls -// where a single statically-determined lock or set of locks is sufficient to -// ensure that a Dentry's name and parent are contextually immutable. +// that follow a standard pattern for synchronizing Dentry parent and name. // // Clients using this package must use the go_template_instance rule in // tools/go_generics/defs.bzl to create an instantiation of this template -// package, providing types to use in place of Dentry. +// package, providing types to use in place of Filesystem and Dentry. +// +// TODO: As of this writing, every filesystem implementation with its own +// dentry type uses at least part of genericfstree, suggesting that we may want +// to merge its functionality into vfs.Dentry. However, this will incur the +// cost of an extra (entirely predictable) branch per directory traversal, +// since Dentry.parent will need to be atomic.Pointer[vfs.Dentry] rather than a +// filesystem-specific dentry, requiring a type assertion to the latter. package genericfstree import ( @@ -29,61 +35,79 @@ import ( // We need to define an interface instead of using atomic.Pointer because // the Dentry type gets removed during code generation and the compiler // complains about the unused sync/atomic type. -type atomicptr interface { +type atomicptrDentry interface { Load() *Dentry + Store(*Dentry) +} + +// Filesystem is a required type parameter that is a struct with the given +// fields. +type Filesystem struct { + // ancestryMu makes parent and name writes atomic for all dentries in the + // filesystem. + ancestryMu sync.RWMutex } // Dentry is a required type parameter that is a struct with the given fields. -// -// +stateify savable type Dentry struct { // vfsd is the embedded vfs.Dentry corresponding to this vfs.DentryImpl. vfsd vfs.Dentry // parent is the parent of this Dentry in the filesystem's tree. If this // Dentry is a filesystem root, parent is nil. - parent atomicptr + parent atomicptrDentry // name is the name of this Dentry in its parent. If this Dentry is a // filesystem root, name is unspecified. name string } +// ParentOrSelf returns d.parent. If d.parent is nil, ParentOrSelf returns d. +func ParentOrSelf(d *Dentry) *Dentry { + if parent := d.parent.Load(); parent != nil { + return parent + } + return d +} + +// SetParentAndName atomically replaces a Dentry's parent and name. +// +// SetParentAndName must be used when changes to a Dentry's parent and name may +// race with observations of the same. If a Dentry is not visible to other +// goroutines (including concurrent calls to PrependPath or IsDescendant) when +// its parent and name are changed, it is safe to either call SetParentAndName +// or mutate d.parent and d.name directly. +func SetParentAndName(fs *Filesystem, d, newParent *Dentry, newName string) { + fs.ancestryMu.Lock() + defer fs.ancestryMu.Unlock() + d.parent.Store(newParent) + d.name = newName +} + // IsAncestorDentry returns true if d is an ancestor of d2; that is, d is // either d2's parent or an ancestor of d2's parent. -func IsAncestorDentry(d, d2 *Dentry) bool { - for d2 != nil { // Stop at root, where d2.parent == nil. - parent := d2.parent.Load() - if parent == d { - return true - } - if parent == d2 { - return false - } - d2 = parent +func IsAncestorDentry(fs *Filesystem, d, d2 *Dentry) bool { + if d == d2 { + return false } - return false + return IsDescendant(fs, &d.vfsd, d2) } // IsDescendant returns true if vd is a descendant of vfsroot or if vd and // vfsroot are the same dentry. -func IsDescendant(vfsroot *vfs.Dentry, d *Dentry) bool { +func IsDescendant(fs *Filesystem, vfsroot *vfs.Dentry, d *Dentry) bool { + fs.ancestryMu.RLock() + defer fs.ancestryMu.RUnlock() for d != nil && &d.vfsd != vfsroot { d = d.parent.Load() } return d != nil } -// ParentOrSelf returns d.parent. If d.parent is nil, ParentOrSelf returns d. -func ParentOrSelf(d *Dentry) *Dentry { - if parent := d.parent.Load(); parent != nil { - return parent - } - return d -} - // PrependPath is a generic implementation of FilesystemImpl.PrependPath(). -func PrependPath(vfsroot vfs.VirtualDentry, mnt *vfs.Mount, d *Dentry, b *fspath.Builder) error { +func PrependPath(fs *Filesystem, vfsroot vfs.VirtualDentry, mnt *vfs.Mount, d *Dentry, b *fspath.Builder) error { + fs.ancestryMu.RLock() + defer fs.ancestryMu.RUnlock() for { if mnt == vfsroot.Mount() && &d.vfsd == vfsroot.Dentry() { return vfs.PrependPathAtVFSRootError{} @@ -103,8 +127,8 @@ func PrependPath(vfsroot vfs.VirtualDentry, mnt *vfs.Mount, d *Dentry, b *fspath // DebugPathname returns a pathname to d relative to its filesystem root. // DebugPathname does not correspond to any Linux function; it's used to // generate dentry pathnames for debugging. -func DebugPathname(d *Dentry) string { +func DebugPathname(fs *Filesystem, d *Dentry) string { var b fspath.Builder - _ = PrependPath(vfs.VirtualDentry{}, nil, d, &b) + _ = PrependPath(fs, vfs.VirtualDentry{}, nil, d, &b) return b.String() } diff --git a/pkg/sentry/vfs/pathname.go b/pkg/sentry/vfs/pathname.go index a06a69c59f..e3dae89059 100644 --- a/pkg/sentry/vfs/pathname.go +++ b/pkg/sentry/vfs/pathname.go @@ -211,8 +211,4 @@ loop: // - d_absolute_path(), which returns EINVAL if (effectively) any call to // FilesystemImpl.PrependPath() would return PrependPathAtNonMountRootError. // -// - dentry_path(), which does not walk up mounts (and only returns the path -// relative to Filesystem root), but also appends "//deleted" for disowned -// Dentries. -// // These should be added as necessary. diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 7a0f3dbfa5..e7ec567a97 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -18,10 +18,11 @@ // // EpollInstance.interestMu // FileDescription.epollMu -// Locks acquired by FilesystemImpl/FileDescriptionImpl methods (except IsDescendant) +// Locks acquired by FilesystemImpl/FileDescriptionImpl methods (except FilesystemImpl.PrependPath and IsDescendant) // VirtualFilesystem.mountMu // Dentry.mu // Locks acquired by FilesystemImpls between Prepare{Delete,Rename}Dentry and Commit{Delete,Rename*}Dentry +// Locks acquired by FilesystemImpl.PrependPath and IsDescendant (typically genericfstree.Filesystem.ancestryMu) // VirtualFilesystem.filesystemsMu // fdnotifier.notifier.mu // EpollInstance.readyMu @@ -33,11 +34,6 @@ // Locking Dentry.mu in multiple Dentries requires holding // VirtualFilesystem.mountMu. Locking EpollInstance.interestMu in multiple // EpollInstances requires holding epollCycleMu. -// -// FilesystemImpl locks are not held during calls to FilesystemImpl.IsDescendant -// since it's called under mountMu. It's possible for concurrent mutation -// to dentry ancestors during calls IsDescendant. Callers should take -// appropriate caution when using this method. package vfs import (