Skip to content

Commit

Permalink
avoid re-evaluating to know if a package has changed (#859)
Browse files Browse the repository at this point in the history
Summary:

Context: Adding a random file to `fbcode/` does some recomputation in the graph that leads to 40s build time in the incremental case.

When `fbcode/BUCK` is a dep, this is unavoidable since you need to traverse the entire graph to understand if you need to recompute anything. However, in the case of some targets like `//aps_models/ads/gmp:launcher_with_publish` `fbcode/BUCK` is not a dep, but since we check for nearby package files for the entire graph, we end up spending more time.

#thanks jakobdegen for helping me figure out what causes additional 35s computation in dice when we add files to fbcode/

Reviewed By: JakobDegen

Differential Revision: D69714496
  • Loading branch information
AishwaryaSivaraman authored and facebook-github-bot committed Feb 19, 2025
1 parent 14f2041 commit 3fdcc16
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 12 deletions.
74 changes: 73 additions & 1 deletion app/buck2_common/src/dice/file_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use async_trait::async_trait;
use buck2_core::cells::cell_path::CellPath;
use buck2_core::cells::cell_path::CellPathRef;
use buck2_core::cells::name::CellName;
use buck2_core::fs::paths::file_name::FileName;
use buck2_core::fs::paths::file_name::FileNameBuf;
use buck2_futures::cancellation::CancellationContext;
use cmp_any::PartialEqAny;
Expand All @@ -32,6 +33,7 @@ use futures::FutureExt;

use crate::buildfiles::HasBuildfiles;
use crate::dice::file_ops::delegate::get_delegated_file_ops;
use crate::file_ops::DirectorySubListingMatchingOutput;
use crate::file_ops::FileOps;
use crate::file_ops::FileOpsError;
use crate::file_ops::RawPathMetadata;
Expand Down Expand Up @@ -62,6 +64,22 @@ impl DiceFileComputations {
.await?
}

/// Returns the entries in the given directory with the provided filename, matching up to case.
///
/// This is intended to make it possible to reason about not-fully-case-sensitive file
/// systems without picking up a dependency on the whole `ReadDirKey`
pub async fn directory_sublisting_matching_any_case(
ctx: &mut DiceComputations<'_>,
directory_path: CellPathRef<'_>,
filename: &FileName,
) -> buck2_error::Result<DirectorySubListingMatchingOutput> {
ctx.compute(&DirectorySubListingMatchingInAnyCaseKey {
directory_path: directory_path.to_owned(),
filename: filename.to_owned(),
})
.await?
}

pub async fn read_dir_include_ignores(
ctx: &mut DiceComputations<'_>,
path: CellPathRef<'_>,
Expand Down Expand Up @@ -164,6 +182,8 @@ pub struct FileChangeTracker {
/// We cannot unconditionally respect directory modification events from the file watcher, as it
/// is not aware of our ignore rules.
maybe_modified_dirs: HashSet<CellPath>,

directory_sublisting_matching_any_case: HashSet<DirectorySubListingMatchingInAnyCaseKey>,
}

impl FileChangeTracker {
Expand All @@ -173,6 +193,7 @@ impl FileChangeTracker {
dirs_to_dirty: Default::default(),
paths_to_dirty: Default::default(),
maybe_modified_dirs: Default::default(),
directory_sublisting_matching_any_case: Default::default(),
}
}

Expand All @@ -182,21 +203,37 @@ impl FileChangeTracker {
if let Some(dir) = p.0.parent() {
if self.maybe_modified_dirs.contains(&dir.to_owned()) {
self.insert_dir_keys(dir.to_owned());
self.insert_sublisting_matching_any_case(p.0.to_owned());
}
}
}

ctx.changed(self.files_to_dirty)?;
ctx.changed(self.dirs_to_dirty)?;
ctx.changed(self.paths_to_dirty)?;
ctx.changed(self.directory_sublisting_matching_any_case)?;

Ok(())
}

fn file_contents_modify(&mut self, path: CellPath) {
self.files_to_dirty
.insert(ReadFileKey(Arc::new(path.clone())));
self.paths_to_dirty.insert(PathMetadataKey(path));
self.paths_to_dirty.insert(PathMetadataKey(path.clone()));
}

fn insert_sublisting_matching_any_case(&mut self, path: CellPath) {
if let Some(parent) = &path.parent() {
if let Some(file_name) = path.path().file_name() {
let lower_case_file_name = file_name.to_string().to_lowercase();
self.directory_sublisting_matching_any_case.insert(
DirectorySubListingMatchingInAnyCaseKey {
directory_path: parent.to_owned(),
filename: FileName::unchecked_new(&lower_case_file_name).to_owned(),
},
);
}
}
}

fn insert_dir_keys(&mut self, path: CellPath) {
Expand All @@ -214,6 +251,7 @@ impl FileChangeTracker {
let parent = path.parent();

self.file_contents_modify(path.clone());
self.insert_sublisting_matching_any_case(path.clone());
if let Some(parent) = parent {
// The above can be None (validly!) if we have a cell we either create or delete.
// That never happens in established repos, but if you are setting one up, it's not uncommon.
Expand All @@ -225,6 +263,7 @@ impl FileChangeTracker {

pub fn dir_added_or_removed(&mut self, path: CellPath) {
self.paths_to_dirty.insert(PathMetadataKey(path.clone()));
self.insert_sublisting_matching_any_case(path.clone());
if let Some(parent) = path.parent() {
let parent = parent.to_owned();
// The above can be None (validly!) if we have a cell we either create or delete.
Expand Down Expand Up @@ -321,6 +360,39 @@ impl Key for ReadDirKey {
}
}

#[derive(Clone, Display, Allocative, Debug, Eq, Hash, PartialEq)]
#[display("{}, {}", directory_path, filename)]
struct DirectorySubListingMatchingInAnyCaseKey {
directory_path: CellPath,
filename: FileNameBuf,
}

#[async_trait]
impl Key for DirectorySubListingMatchingInAnyCaseKey {
type Value = buck2_error::Result<DirectorySubListingMatchingOutput>;
async fn compute(
&self,
ctx: &mut DiceComputations,
_cancellations: &CancellationContext,
) -> Self::Value {
get_delegated_file_ops(ctx, self.directory_path.cell(), CheckIgnores::Yes)
.await?
.read_matching_files_from_dir(self.directory_path.path(), &self.filename)
.await
}

fn equality(x: &Self::Value, y: &Self::Value) -> bool {
match (x, y) {
(Ok(x), Ok(y)) => x == y,
_ => false,
}
}

fn validity(x: &Self::Value) -> bool {
x.is_ok()
}
}

#[derive(Clone, Display, Debug, Eq, Hash, PartialEq, Allocative)]
struct PathMetadataKey(CellPath);

Expand Down
18 changes: 18 additions & 0 deletions app/buck2_common/src/dice/file_ops/delegate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use crate::dice::file_ops::delegate::keys::FileOpsKey;
use crate::dice::file_ops::delegate::keys::FileOpsValue;
use crate::dice::file_ops::CheckIgnores;
use crate::external_cells::EXTERNAL_CELLS_IMPL;
use crate::file_ops::DirectorySubListingMatchingOutput;
use crate::file_ops::RawDirEntry;
use crate::file_ops::RawPathMetadata;
use crate::file_ops::ReadDirOutput;
Expand Down Expand Up @@ -314,6 +315,23 @@ impl FileOpsDelegateWithIgnores {
})
}

pub(crate) async fn read_matching_files_from_dir(
&self,
directory: &CellRelativePath,
file_name: &FileNameBuf,
) -> buck2_error::Result<DirectorySubListingMatchingOutput> {
let dir = self.read_dir(directory).await?;
let mut sublisting = Vec::new();
for entry in dir.included.iter() {
if entry.file_name.as_str().to_lowercase() == file_name.as_str() {
sublisting.push(entry.to_owned());
}
}
Ok(DirectorySubListingMatchingOutput {
included: sublisting.into(),
})
}

pub async fn read_path_metadata_if_exists(
&self,
path: &CellRelativePath,
Expand Down
5 changes: 5 additions & 0 deletions app/buck2_common/src/file_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ impl ReadDirOutput {
}
}

#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Dupe, Allocative)]
pub struct DirectorySubListingMatchingOutput {
pub included: Arc<[SimpleDirEntry]>,
}

#[derive(Allocative)]
pub struct FileDigestKind {
_private: (),
Expand Down
4 changes: 4 additions & 0 deletions app/buck2_interpreter/src/paths/package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,8 @@ impl PackageFilePath {
pub fn path(&self) -> &CellPath {
&self.path
}

pub fn file_name(&self) -> &FileName {
self.path.path().file_name().unwrap()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use buck2_common::package_listing::listing::PackageListing;
use buck2_core::build_file_path::BuildFilePath;
use buck2_core::cells::build_file_cell::BuildFileCell;
use buck2_core::cells::name::CellName;
use buck2_core::fs::paths::file_name::FileName;
use buck2_core::package::PackageLabel;
use buck2_error::internal_error;
use buck2_error::BuckErrorContext;
Expand Down Expand Up @@ -286,8 +287,9 @@ impl<'c, 'd: 'c> DiceCalculationDelegate<'c, 'd> {
package: PackageLabel,
) -> buck2_error::Result<Option<(PackageFilePath, AstModule, ModuleDeps)>> {
// Note:
// * we are using `read_dir` instead of `read_path_metadata` because
// * it is an extra IO, and `read_dir` is likely already cached.
/// To avoid paying the cost of read_dir when computing if any specific file has changed (e.g. PACKAGE),
/// we depend on directory_sublisting_matching_any_case_key to invalidate all files that match (regardless of case).
/// We need to do this to make sure to work with case-sensitive file paths.
// * `read_path_metadata` would not tell us if the file name is `PACKAGE`
// and not `package` on case-insensitive filesystems.
// We do case-sensitive comparison for `BUCK` files, so we do the same here.
Expand All @@ -311,16 +313,23 @@ impl<'c, 'd: 'c> DiceCalculationDelegate<'c, 'd> {
ctx: &mut DiceComputations,
_cancellation: &CancellationContext,
) -> Self::Value {
// This is cached if evaluating a `PACKAGE` file next to a `BUCK` file.
let dir = DiceFileComputations::read_dir(ctx, self.0.as_cell_path()).await?;
for package_file_path in PackageFilePath::for_dir(self.0.as_cell_path()) {
if !dir.contains(
package_file_path
.path()
.path()
.file_name()
.internal_error("Must have name")?,
) {
let file_name = package_file_path.file_name();
let lower_case_file_name = file_name.as_str().to_lowercase();
let directory_sublisting_output =
DiceFileComputations::directory_sublisting_matching_any_case(
ctx,
self.0.as_cell_path(),
FileName::unchecked_new(&lower_case_file_name),
)
.await?;

let file_exists = directory_sublisting_output
.included
.iter()
.any(|f| f.file_name == file_name);

if !file_exists {
continue;
}
return Ok(Some(Arc::new(package_file_path)));
Expand Down

0 comments on commit 3fdcc16

Please sign in to comment.