Skip to content

Commit

Permalink
use_database=True should only consider files in cache_files_urlpath (#…
Browse files Browse the repository at this point in the history
…134)

* use_database=True should only consider files in cache_files_urlpath

* improve test
  • Loading branch information
malmans2 authored Sep 10, 2024
1 parent a23a830 commit e2955fb
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
3 changes: 2 additions & 1 deletion cacholote/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,9 @@ def known_files(self) -> dict[str, int]:
known_files: dict[str, int] = {}
with config.get().instantiated_sessionmaker() as session:
for cache_entry in session.scalars(sa.select(database.CacheEntry)):
files = _get_files_from_cache_entry(cache_entry, key="file:size")
known_files.update(
_get_files_from_cache_entry(cache_entry, key="file:size")
{k: v for k, v in files.items() if k.startswith(self.urldir)}
)
return known_files

Expand Down
23 changes: 23 additions & 0 deletions tests/test_60_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,3 +388,26 @@ def test_multiple(tmp_path: pathlib.Path) -> None:
clean.clean_cache_files(0)
assert not cached_newpath.exists()
assert cached_oldpath.exists()


@pytest.mark.parametrize("use_database", [True, False])
def test_clean_multiple_urlpaths(tmp_path: pathlib.Path, use_database: bool) -> None:
# Create files
tmpfile1 = tmp_path / "file1.txt"
fsspec.filesystem("file").pipe_file(tmpfile1, ONE_BYTE)
tmpfile2 = tmp_path / "file2.txt"
fsspec.filesystem("file").pipe_file(tmpfile2, ONE_BYTE)

# Copy to cache
path1 = tmp_path / "cache_files" / "folder1"
with config.set(cache_files_urlpath=str(path1 / "today")):
cached_file1 = pathlib.Path(open_url(tmpfile1).path)
path2 = tmp_path / "cache_files" / "folder2"
with config.set(cache_files_urlpath=str(path2 / "today")):
cached_file2 = pathlib.Path(open_url(tmpfile2).path)

# Clean
with config.set(cache_files_urlpath=str(path1)):
clean.clean_cache_files(maxsize=0, use_database=use_database, depth=2)
assert not cached_file1.exists()
assert cached_file2.exists()

0 comments on commit e2955fb

Please sign in to comment.