Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove packages based on dependencies #25

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion conda_subchannel/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,25 @@ def configure_parser(parser: argparse.ArgumentParser):
action="append",
help="Remove packages matching this spec. Can be used several times.",
)
parser.add_argument(
"--remove-depends",
action="append",
help="Remove packages that have a dependency on this pattern. Can be used several times.",
)


def execute(args: argparse.Namespace) -> int:
if not any([args.after, args.before, args.keep, args.remove, args.keep_tree, args.prune]):
if not any(
[
args.after,
args.before,
args.keep,
args.remove,
args.keep_tree,
args.prune,
args.remove_depends,
]
):
raise ArgumentError("Please provide at least one filter.")

with Spinner("Syncing source channel"):
Expand All @@ -130,6 +145,7 @@ def execute(args: argparse.Namespace) -> int:
"subdir_datas": subdir_datas,
"specs_to_keep": args.keep,
"specs_to_remove": args.remove,
"specs_depends_to_remove": args.remove_depends,
"specs_to_prune": args.prune,
"trees_to_keep": args.keep_tree,
"after": args.after,
Expand Down
64 changes: 61 additions & 3 deletions conda_subchannel/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import hashlib
import json
import logging
import fnmatch as _fnmatch
import re
from functools import lru_cache
from datetime import datetime, timezone
from pathlib import Path
from typing import TYPE_CHECKING
Expand Down Expand Up @@ -32,6 +35,47 @@
log = logging.getLogger(f"conda.{__name__}")


# Copied from https://github.com/conda-forge/conda-forge-repodata-patches-feedstock/blob/2daa1f620d750a16849d9bfe340885ac1ad39dc5/recipe/patch_yaml_utils.py#L47C1-L78C35
@lru_cache(maxsize=32768)
def fnmatch(name, pat):
"""Test whether FILENAME matches PATTERN with custom
allowed optional space via '?( *)'.

This is useful to match single names with or without a version
but not other packages.

Here are various cases to illustrate how this works:

- 'numpy*' will match 'numpy', 'numpy >=1', and 'numpy-blah >=10'.
- 'numpy?( *)' will match only 'numpy', 'numpy >=1'.
- 'numpy' only matches 'numpy'

**doc string below is from python stdlib**

Patterns are Unix shell style:

* matches everything
? matches any single character
[seq] matches any character in seq
[!seq] matches any char not in seq

An initial period in FILENAME is not special.
Both FILENAME and PATTERN are first case-normalized
if the operating system requires it.
If you don't want this, use fnmatchcase(FILENAME, PATTERN).
"""
name = os.path.normcase(name)
pat = os.path.normcase(pat)
match = _fnmatch_build_re(pat)
return match(name) is not None


@lru_cache(maxsize=32768)
def _fnmatch_build_re(pat):
repat = "(?s:\\ .*)?".join([_fnmatch.translate(p)[:-2] for p in pat.split("?( *)")]) + "\\Z"
return re.compile(repat).match


def _fetch_channel(channel, subdirs=None, repodata_fn=REPODATA_FN):
def fetch(url):
subdir_data = SubdirData(Channel(url), repodata_fn=repodata_fn)
Expand Down Expand Up @@ -74,13 +118,15 @@ def _reduce_index(
subdir_datas: Iterable[SubdirData],
specs_to_keep: Iterable[str | MatchSpec] | None = None,
specs_to_remove: Iterable[str | MatchSpec] | None = None,
specs_depends_to_remove: Iterable[str] | None = None,
specs_to_prune: Iterable[str | MatchSpec] | None = None,
trees_to_keep: Iterable[str | MatchSpec] | None = None,
after: int | None = None,
before: int | None = None,
) -> dict[tuple[str, str], PackageRecord]:
specs_to_keep = [MatchSpec(spec) for spec in (specs_to_keep or ())]
specs_to_remove = [MatchSpec(spec) for spec in (specs_to_remove or ())]
specs_depends_to_remove = specs_depends_to_remove or ()
specs_to_prune = [MatchSpec(spec) for spec in (specs_to_prune or ())]
trees_to_keep = [MatchSpec(spec) for spec in (trees_to_keep or ())]
if trees_to_keep or specs_to_keep or after is not None or before is not None:
Expand Down Expand Up @@ -153,7 +199,14 @@ def _reduce_index(
for key, record in records.items():
if spec.match(record):
to_remove.add(key)


for spec in specs_depends_to_remove:
for key, record in records.items():
for dep in record["depends"]:
if fnmatch(dep, spec):
to_remove.add(key)
break

for key in to_remove:
records.pop(key)

Expand Down Expand Up @@ -206,7 +259,12 @@ def _checksum(path, algorithm, buffersize=65536):
return hash_impl.hexdigest()


def _write_channel_index_html(source_channel: Channel, channel_path: Path, cli_flags: dict[str, Any], served_at: str | None = None):
def _write_channel_index_html(
source_channel: Channel,
channel_path: Path,
cli_flags: dict[str, Any],
served_at: str | None = None,
):
templates_dir = Path(__file__).parent / "templates"
environment = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
template = environment.get_template("channel.j2.html")
Expand Down Expand Up @@ -313,4 +371,4 @@ def _sortkey_package_filenames(fn: str):
if field.isdigit():
build_number = field
break
return name, VersionOrder(version), build_number, ext
return name, VersionOrder(version), build_number, ext
13 changes: 8 additions & 5 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

```
$ conda subchannel --help
usage: conda subchannel -c CHANNEL [--repodata-fn REPODATA_FN] [--base-url BASE_URL] [--output PATH] [--subdir PLATFORM] [--after TIME] [--before TIME] [--keep-tree SPEC] [--keep SPEC] [--remove SPEC] [-h]
usage: conda subchannel -c CHANNEL [--repodata-fn REPODATA_FN] [--base-url BASE_URL] [--output PATH] [--served-at URL] [--subdir PLATFORM] [--after TIME] [--before TIME] [--keep-tree SPEC] [--keep SPEC] [--prune SPEC] [--remove SPEC] [--remove-depends REMOVE_DEPENDS] [-h]

Create subsets of conda channels thanks to CEP-15 metadata

Expand All @@ -17,18 +17,20 @@ options:
'--channel'. Only needed if the user wants to mirror the required packages
separately.
--output PATH Directory where the subchannel repodata.json artifacts will be written to.
--served-at URL URL or location where the subchannel files will be eventually served. Used for the HTML output.
--subdir PLATFORM, --platform PLATFORM
Process records for this platform. Defaults to osx-arm64. noarch is always included. Can be used several times.
--after TIME Timestamp as ts:<float> or date as YYYY-[MM[-DD[-HH[-MM[-SS]]]]]
--before TIME Timestamp as ts:<float> or date as YYYY-[MM[-DD[-HH[-MM[-SS]]]]]
--keep-tree SPEC Keep packages matching this spec and their dependencies. Can be used
several times.
--keep SPEC Keep packages matching this spec only. Can be used several times.
--prune SPEC Remove the distributions of this package name that do not match the
given constraints.
--prune SPEC Remove the distributions of this package name that do not match this spec.
--remove SPEC Remove packages matching this spec. Can be used several times.
--remove-depends REMOVE_DEPENDS
Remove packages that have a dependency on this pattern. Can be used several times.
-h, --help Show this help message and exit.
```
```


## Filtering algorithm
Expand All @@ -40,7 +42,8 @@ In the first phase, we _select_ which records are going to be kept. Everything e
1. A selection list is built. Records in this list are added if:
- They match specs in `--keep-tree`, or any of the dependencies in their tree (assessed recursively).
- They match any of the specs in `--keep`.
- Their timestamp is within the limits marked by `--before` and `--after`, when applicable.
- Their timestamp is within the limits marked by `--before` and `--after`, when applicable.
2. At this point, records that didn't make it to the selection list are removed.
3. The specs defined `--prune` are processed. Records that have the same name but don't match the spec are removed. Everything else is ignored.
4. Records matching any of the specs in `--remove` are filtered out.
5. Records that have dependencies matching any of the patterns in ``--remove-depends`` are filtered out.
48 changes: 47 additions & 1 deletion tests/test_subchannel.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from conda.models.channel import Channel
from conda.models.match_spec import MatchSpec
from conda.exceptions import ArgumentError, DryRunExit, PackagesNotFoundError
from conda_libmamba_solver.exceptions import LibMambaUnsatisfiableError
from conda.testing import conda_cli # noqa


Expand Down Expand Up @@ -322,4 +323,49 @@ def test_pruned_python(conda_cli, tmp_path):
"--channel",
channel_path,
"nodejs",
)
)


def test_remove_depends(conda_cli, tmp_path):
spec = "python_abi 3.9.*"
channel_path = tmp_path / "channel"
out, err, rc = conda_cli(
"subchannel",
"-c",
"conda-forge",
"--remove-depends",
spec,
"--output",
channel_path,
)
print(out)
print(err, file=sys.stderr)
assert rc == 0

# This should be solvable, we didn't remove anything for Python 3.10
with pytest.raises(DryRunExit):
conda_cli(
"create",
"--dry-run",
"-n",
"unused",
"--override-channels",
"--channel",
channel_path,
"python=3.10",
"pyarrow",
)

# This fails as we removed all numpy builds for 3.9
with pytest.raises(LibMambaUnsatisfiableError):
conda_cli(
"create",
"--dry-run",
"-n",
"unused",
"--override-channels",
"--channel",
channel_path,
"python=3.9",
"pyarrow",
)
Loading