Skip to content

Commit

Permalink
fs: clean up filesystem import state
Browse files Browse the repository at this point in the history
Clean up two aspects of importing filesystems:

First: instead of trying to avoid crossing mountpoints (by checking
`.st_dev`) in order to use `.st_ino` values as a "unique key" for
tracking hardlinks, just take the `(.st_dev, .st_ino)` pair.  This
resolves some issues with overlayfs where adjacent files can end up with
different `.st_dev`.

Second: we were abusing the first setting of `st_dev` as a flag field to
determine if we were `stat()`-ing the root inode or not, which was
always a bit evil.  We did this to avoid collecting the mtime of the
root directory for determining the newest file in the filesystem.  Let's
change our approach here: we can collect the mtime when adding items to
their parent directories (which is never done for the root directory).
Use a impl on `Inode` to make it easier to collect that information.

These changes together turn .stat() into a pure operation (with respect
to the state of the reader), so remove the `&mut self` reference.

Fixes #41

Signed-off-by: Allison Karlitskaya <[email protected]>
  • Loading branch information
allisonkarlitskaya committed Dec 4, 2024
1 parent 77b8b96 commit 73e9e0d
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 36 deletions.
58 changes: 22 additions & 36 deletions src/fs.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::{
cell::RefCell,
cmp::max,
collections::{BTreeMap, HashMap},
ffi::OsString,
ffi::{CStr, OsStr},
Expand All @@ -9,7 +10,7 @@ use std::{
rc::Rc,
};

use anyhow::{bail, ensure, Result};
use anyhow::{ensure, Result};
use rustix::{
fd::{AsFd, OwnedFd},
fs::{
Expand Down Expand Up @@ -119,14 +120,13 @@ pub fn write_to_path(repo: &Repository, dir: &Directory, output_dir: &Path) -> R
}

pub struct FilesystemReader<'repo> {
st_dev: u64,
repo: Option<&'repo Repository>,
inodes: HashMap<u64, Rc<Leaf>>,
root_mtime: i64,
inodes: HashMap<(u64, u64), Rc<Leaf>>,
root_mtime: Option<i64>,
}

impl<'repo> FilesystemReader<'repo> {
fn read_xattrs(&mut self, fd: &OwnedFd) -> Result<BTreeMap<Box<OsStr>, Box<[u8]>>> {
fn read_xattrs(fd: &OwnedFd) -> Result<BTreeMap<Box<OsStr>, Box<[u8]>>> {
// flistxattr() and fgetxattr() don't with with O_PATH fds, so go via /proc/self/fd. Note:
// we want the symlink-following version of this call, which produces the correct behaviour
// even when trying to read xattrs from symlinks themselves. See
Expand Down Expand Up @@ -159,7 +159,7 @@ impl<'repo> FilesystemReader<'repo> {
Ok(xattrs)
}

fn stat(&mut self, fd: &OwnedFd, ifmt: FileType) -> Result<(rustix::fs::Stat, Stat)> {
fn stat(fd: &OwnedFd, ifmt: FileType) -> Result<(rustix::fs::Stat, Stat)> {
let buf = fstat(fd)?;

ensure!(
Expand All @@ -168,31 +168,14 @@ impl<'repo> FilesystemReader<'repo> {
between readdir() and fstat()"
);

let mtime = buf.st_mtime as i64;

if buf.st_dev != self.st_dev {
if self.st_dev == u64::MAX {
self.st_dev = buf.st_dev;
} else {
bail!("Attempting to cross devices while importing filesystem");
}
} else {
// The root mtime is equal to the most recent mtime of any inode *except* the root
// directory. Because self.st_dev is unset at first, we know we're in this branch only
// if this is the second (or later) inode we process (ie: not the root directory).
if mtime > self.root_mtime {
self.root_mtime = mtime;
}
}

Ok((
buf,
Stat {
st_mode: buf.st_mode & 0o7777,
st_uid: buf.st_uid,
st_gid: buf.st_gid,
st_mtim_sec: mtime,
xattrs: RefCell::new(self.read_xattrs(fd)?),
st_mtim_sec: buf.st_mtime as i64,
xattrs: RefCell::new(FilesystemReader::read_xattrs(fd)?),
},
))
}
Expand Down Expand Up @@ -240,15 +223,16 @@ impl<'repo> FilesystemReader<'repo> {
Mode::empty(),
)?;

let (buf, stat) = self.stat(&fd, ifmt)?;
let (buf, stat) = FilesystemReader::stat(&fd, ifmt)?;

if let Some(leafref) = self.inodes.get(&buf.st_ino) {
if let Some(leafref) = self.inodes.get(&(buf.st_dev, buf.st_ino)) {
Ok(Rc::clone(leafref))
} else {
let content = self.read_leaf_content(fd, buf)?;
let leaf = Rc::new(Leaf { stat, content });
if buf.st_nlink > 1 {
self.inodes.insert(buf.st_ino, Rc::clone(&leaf));
self.inodes
.insert((buf.st_dev, buf.st_ino), Rc::clone(&leaf));
}
Ok(leaf)
}
Expand All @@ -262,7 +246,7 @@ impl<'repo> FilesystemReader<'repo> {
Mode::empty(),
)?;

let (_, stat) = self.stat(&fd, FileType::Directory)?;
let (_, stat) = FilesystemReader::stat(&fd, FileType::Directory)?;
let mut directory = Directory {
stat,
entries: vec![],
Expand All @@ -277,6 +261,7 @@ impl<'repo> FilesystemReader<'repo> {
}

let inode = self.read_inode(&fd, name, entry.file_type())?;
self.root_mtime = max(self.root_mtime, Some(inode.stat().st_mtim_sec));
directory.insert(name, inode);
}

Expand All @@ -285,11 +270,11 @@ impl<'repo> FilesystemReader<'repo> {

fn read_inode(&mut self, dirfd: &OwnedFd, name: &OsStr, ifmt: FileType) -> Result<Inode> {
if ifmt == FileType::Directory {
Ok(Inode::Directory(Box::new(
self.read_directory(dirfd, name)?,
)))
let dir = self.read_directory(dirfd, name)?;
Ok(Inode::Directory(Box::new(dir)))
} else {
Ok(Inode::Leaf(self.read_leaf(dirfd, name, ifmt)?))
let leaf = self.read_leaf(dirfd, name, ifmt)?;
Ok(Inode::Leaf(leaf))
}
}
}
Expand All @@ -298,13 +283,14 @@ pub fn read_from_path(path: &Path, repo: Option<&Repository>) -> Result<FileSyst
let mut reader = FilesystemReader {
repo,
inodes: HashMap::new(),
st_dev: u64::MAX,
root_mtime: 0,
root_mtime: None,
};
let mut fs = FileSystem {
root: reader.read_directory(CWD, path.as_os_str())?,
};
fs.root.stat.st_mtim_sec = reader.root_mtime;

// A filesystem with no files ends up in the 1970s...
fs.root.stat.st_mtim_sec = reader.root_mtime.unwrap_or(0);

// We can only relabel if we have the repo because we need to read the config and policy files
if let Some(repo) = repo {
Expand Down
9 changes: 9 additions & 0 deletions src/image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@ pub struct DirEnt {
pub inode: Inode,
}

impl Inode {
pub fn stat(&self) -> &Stat {
match self {
Inode::Directory(dir) => &dir.stat,
Inode::Leaf(leaf) => &leaf.stat,
}
}
}

impl Directory {
pub fn find_entry(&self, name: &OsStr) -> Result<usize, usize> {
// OCI layer tarballs are typically sorted, with the entries for a particular directory
Expand Down

0 comments on commit 73e9e0d

Please sign in to comment.