diff --git a/cacholote/clean.py b/cacholote/clean.py index 62cd7ec..173b49e 100644 --- a/cacholote/clean.py +++ b/cacholote/clean.py @@ -159,8 +159,9 @@ def known_files(self) -> dict[str, int]: known_files: dict[str, int] = {} with config.get().instantiated_sessionmaker() as session: for cache_entry in session.scalars(sa.select(database.CacheEntry)): + files = _get_files_from_cache_entry(cache_entry, key="file:size") known_files.update( - _get_files_from_cache_entry(cache_entry, key="file:size") + {k: v for k, v in files.items() if k.startswith(self.urldir)} ) return known_files diff --git a/tests/test_60_clean.py b/tests/test_60_clean.py index 3deca80..688b7b8 100644 --- a/tests/test_60_clean.py +++ b/tests/test_60_clean.py @@ -388,3 +388,26 @@ def test_multiple(tmp_path: pathlib.Path) -> None: clean.clean_cache_files(0) assert not cached_newpath.exists() assert cached_oldpath.exists() + + +@pytest.mark.parametrize("use_database", [True, False]) +def test_clean_multiple_urlpaths(tmp_path: pathlib.Path, use_database: bool) -> None: + # Create files + tmpfile1 = tmp_path / "file1.txt" + fsspec.filesystem("file").pipe_file(tmpfile1, ONE_BYTE) + tmpfile2 = tmp_path / "file2.txt" + fsspec.filesystem("file").pipe_file(tmpfile2, ONE_BYTE) + + # Copy to cache + path1 = tmp_path / "cache_files" / "folder1" + with config.set(cache_files_urlpath=str(path1 / "today")): + cached_file1 = pathlib.Path(open_url(tmpfile1).path) + path2 = tmp_path / "cache_files" / "folder2" + with config.set(cache_files_urlpath=str(path2 / "today")): + cached_file2 = pathlib.Path(open_url(tmpfile2).path) + + # Clean + with config.set(cache_files_urlpath=str(path1)): + clean.clean_cache_files(maxsize=0, use_database=use_database, depth=2) + assert not cached_file1.exists() + assert cached_file2.exists()