Skip to content

Commit

Permalink
prefetch: ignore file paths
Browse files Browse the repository at this point in the history
Summary:
When fetching file contents, the file paths are used mainly for informational purposes. "prefetch" fetches a lot of files, so the file paths easily add up to a few GB of memory usage. I think it's worth omitting the paths for "prefetch" to save memory.

The downside is that file paths don't show up in certain log messages, and any future smarts that rely on file paths won't work for "prefetch".

Reviewed By: sggutier

Differential Revision: D68355874

fbshipit-source-id: 27867ee532e1806c6476a906ed59b11889e63035
  • Loading branch information
muirdm authored and facebook-github-bot committed Jan 29, 2025
1 parent a905f68 commit 70264f7
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,4 @@ Prefetch should fail with corruption error
pulling from mono:repo

$ LOG=revisionstore=debug hg prefetch -r ":" 2>&1 | grep "Invalid hash"
* Errors = 1, Error = Some("005d992c5dcf32993668f7cede29d296c494a5d9 A: Invalid hash: 005d992c5dcf32993668f7cede29d296c494a5d9 (expected) != a2e456504a5e61f763f1a0b36a6c247c7541b2b3 (computed)") (glob)
* Errors = 1, Error = Some("005d992c5dcf32993668f7cede29d296c494a5d9 : Invalid hash: 005d992c5dcf32993668f7cede29d296c494a5d9 (expected) != a2e456504a5e61f763f1a0b36a6c247c7541b2b3 (computed)") (glob)
11 changes: 10 additions & 1 deletion eden/scm/sapling/ext/remotefilelog/shallowrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,17 @@ def _prefetch(self, revs, base=None, matcher=None):
if matcher is None:
matcher = self.maybesparsematch(rev)

# Don't store millions of file paths in memory unnecessarily. It maybe
# be useful to turn paths back on to get more info for file specific
# errors.
omit_paths = self.ui.configbool(
"experimental", "prefetch-omit-paths", True
)

with progress.spinner(self.ui, _("computing files")):
walked = walkfiles(repo, ctx, matcher, base)
walked = walkfiles(
repo, ctx, matcher, base, nodes_only=omit_paths
)
if type(files) is set:
files.update(walked)
elif type(files) is list:
Expand Down
11 changes: 7 additions & 4 deletions eden/scm/sapling/scmutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
from .node import hex, nullid, short, wdirid, wdirrev
from .pycompat import basestring, isint


if pycompat.iswindows:
from . import scmwindows as scmplatform
else:
Expand Down Expand Up @@ -1567,15 +1566,19 @@ def rootrelpaths(ctx, paths):
return [rootrelpath(ctx, path) for path in paths]


def walkfiles(repo, walkctx, matcher, base=None):
def walkfiles(repo, walkctx, matcher, base=None, nodes_only=False):
"""Return a list (path, filenode) pairs that match the matcher in the given context."""
mf = walkctx.manifest()
if base is None and hasattr(mf, "walkfiles"):
# If there is no base, skip diff and use more efficient walk.
return mf.walkfiles(matcher)
return mf.walkfiles(matcher, nodes_only=nodes_only)
else:
basemf = repo[base or nullid].manifest()
return [(p, n[0]) for p, (n, _o) in mf.diff(basemf, matcher).items() if n[0]]
return [
(p, n[0])
for p, (n, _o) in mf.diff(basemf, matcher, nodes_only=nodes_only).items()
if n[0]
]


def publicbase(repo, ctx):
Expand Down
22 changes: 17 additions & 5 deletions eden/scm/saplingnative/bindings/modules/pymanifest/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,16 +174,24 @@ py_class!(pub class treemanifest |py| {
}

/// Like walk(), but includes file node as well.
def walkfiles(&self, pymatcher: PyObject) -> PyResult<Vec<(PyPathBuf, PyBytes)>> {
def walkfiles(&self, pymatcher: PyObject, nodes_only: bool = false) -> PyResult<Vec<(PyPathBuf, PyBytes)>> {
let tree = self.underlying(py);

let (matcher, is_rust_matcher) = extract_matcher(py, pymatcher)?;

let make_path = |path: RepoPathBuf| -> PyPathBuf {
if nodes_only {
PyPathBuf::default()
} else {
path.into()
}
};

if is_rust_matcher {
tree.read().files(matcher)
.map(|file| {
let file = file?;
Ok((file.path.into(), PyBytes::new(py, file.meta.hgid.as_ref())))
Ok((make_path(file.path), PyBytes::new(py, file.meta.hgid.as_ref())))
})
.collect::<Result<Vec<_>>>().map_pyerr(py)
} else {
Expand All @@ -194,7 +202,7 @@ py_class!(pub class treemanifest |py| {
let mut result = Vec::new();
for entry in files.into_iter() {
let file = entry.map_pyerr(py)?;
result.push((file.path.into(), PyBytes::new(py, file.meta.hgid.as_ref())));
result.push((make_path(file.path), PyBytes::new(py, file.meta.hgid.as_ref())));
}
Ok(result)
}
Expand Down Expand Up @@ -300,7 +308,7 @@ py_class!(pub class treemanifest |py| {
/// Diff between two treemanifests.
///
/// Return a dict of {path: (left, right)}, where left and right are (file_hgid, file_type) tuple.
def diff(&self, other: &treemanifest, matcher: Option<PyObject> = None) -> PyResult<PyDict> {
def diff(&self, other: &treemanifest, matcher: Option<PyObject> = None, nodes_only: bool = false) -> PyResult<PyDict> {
fn convert_side_diff(
py: Python,
entry: Option<FileMetadata>
Expand All @@ -323,7 +331,11 @@ py_class!(pub class treemanifest |py| {
this_tree.read().diff(&other_tree.read(), matcher)?.collect()
}).map_pyerr(py)?;
for entry in results {
let path = PyPathBuf::from(entry.path);
let path = if nodes_only {
PyPathBuf::default()
} else {
PyPathBuf::from(entry.path)
};
let diff_left = convert_side_diff(py, entry.diff_type.left());
let diff_right = convert_side_diff(py, entry.diff_type.right());
result.set_item(py, path, (diff_left, diff_right))?;
Expand Down

0 comments on commit 70264f7

Please sign in to comment.