diff --git a/RELEASE.md b/RELEASE.md index fc0281cc1c..157b65088f 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -19,6 +19,7 @@ * Upgraded `pip-tools`, which is used by `kedro build-reqs`, to 6.4. This `pip-tools` version requires `pip>=21.2` while [adding support for `pip>=21.3`](https://github.com/jazzband/pip-tools/pull/1501). To upgrade `pip`, please refer to [their documentation](https://pip.pypa.io/en/stable/installing/#upgrading-pip). * Relaxed the bounds on the `plotly` requirement for `plotly.PlotlyDataSet`. * `kedro pipeline package ` now raises an error if the `` argument doesn't look like a valid Python module path (e.g. has `/` instead of `.`). +* `kedro pipeline pull` now works when the project requirements contains entries such as `-r`, `--extra-index-url` and local wheel files ([Issue #913](https://github.com/quantumblacklabs/kedro/issues/913)). * Fixed slow startup because of catalog processing by reducing the exponential growth of extra processing during `_FrozenDatasets` creations. * Removed `.coveragerc` from the Kedro project template. `coverage` settings are now given in `pyproject.toml`. * Fixed a bug where packaging or pulling a modular pipeline with the same name as the project's package name would throw an error (or silently pass without including the pipeline source code in the wheel file). diff --git a/kedro/framework/cli/pipeline.py b/kedro/framework/cli/pipeline.py index e2b54529ce..672ef4e96e 100644 --- a/kedro/framework/cli/pipeline.py +++ b/kedro/framework/cli/pipeline.py @@ -9,7 +9,7 @@ from importlib import import_module from pathlib import Path from textwrap import indent -from typing import Any, List, NamedTuple, Optional, Tuple, Union +from typing import Any, Iterable, List, NamedTuple, Optional, Set, Tuple, Union from zipfile import ZipFile import click @@ -320,9 +320,6 @@ def _pull_package( package_name = dist_info_file[0].stem.split("-")[0] package_metadata = dist_info_file[0] / "METADATA" - _clean_pycache(temp_dir_path) - _install_files(metadata, package_name, temp_dir_path, env, alias) - req_pattern = r"Requires-Dist: (.*?)\n" package_reqs = re.findall(req_pattern, package_metadata.read_text()) if package_reqs: @@ -331,6 +328,9 @@ def _pull_package( ) _append_package_reqs(requirements_in, package_reqs, package_name) + _clean_pycache(temp_dir_path) + _install_files(metadata, package_name, temp_dir_path, env, alias) + def _pull_packages_from_manifest(metadata: ProjectMetadata) -> None: # pylint: disable=import-outside-toplevel @@ -1103,9 +1103,9 @@ def _append_package_reqs( requirements_in: Path, package_reqs: List[str], pipeline_name: str ) -> None: """Appends modular pipeline requirements to project level requirements.in""" - existing_reqs = pkg_resources.parse_requirements(requirements_in.read_text()) - new_reqs = pkg_resources.parse_requirements(package_reqs) - reqs_to_add = set(new_reqs) - set(existing_reqs) + existing_reqs = _safe_parse_requirements(requirements_in.read_text()) + incoming_reqs = _safe_parse_requirements(package_reqs) + reqs_to_add = set(incoming_reqs) - set(existing_reqs) if not reqs_to_add: return @@ -1124,3 +1124,21 @@ def _append_package_reqs( "Use `kedro install --build-reqs` to compile and install the updated list of " "requirements." ) + + +def _safe_parse_requirements( + requirements: Union[str, Iterable[str]] +) -> Set[pkg_resources.Requirement]: + """Safely parse a requirement or set of requirements. This effectively replaces + pkg_resources.parse_requirements, which blows up with a ValueError as soon as it + encounters a requirement it cannot parse (e.g. `-r requirements.txt`). This way + we can still extract all the parseable requirements out of a set containing some + unparseable requirements. + """ + parseable_requirements = set() + for requirement in pkg_resources.yield_lines(requirements): + try: + parseable_requirements.add(pkg_resources.Requirement.parse(requirement)) + except ValueError: + continue + return parseable_requirements diff --git a/tests/framework/cli/pipeline/test_pipeline_requirements.py b/tests/framework/cli/pipeline/test_pipeline_requirements.py index 70bbac477f..7a7f04f7ce 100644 --- a/tests/framework/cli/pipeline/test_pipeline_requirements.py +++ b/tests/framework/cli/pipeline/test_pipeline_requirements.py @@ -1,15 +1,14 @@ -import pkg_resources import pytest from click.testing import CliRunner -from kedro.framework.cli.pipeline import _get_wheel_name +from kedro.framework.cli.pipeline import _get_wheel_name, _safe_parse_requirements PIPELINE_NAME = "my_pipeline" # Inspired by test cases given in https://www.python.org/dev/peps/pep-0508/. # These are all valid requirement specifications that can be used in both # requirements.txt and in METADATA Requires-Dist. -VALID_REQUIREMENTS = """A +SIMPLE_REQUIREMENTS = """A A.B-C_D aa name @@ -27,6 +26,14 @@ pip @ https://github.com/pypa/pip/archive/1.3.1.zip#sha1=da9234ees """ +# These requirements can be used in requirements.txt but not in METADATA Requires-Dist. +# They cannot be parsed by pkg_resources. +COMPLEX_REQUIREMENTS = """--extra-index-url https://this.wont.work +-r other_requirements.txt +./path/to/package.whl +http://some.website.com/package.whl +""" + @pytest.mark.usefixtures("chdir_to_dummy_project", "cleanup_dist") class TestPipelineRequirements: @@ -73,30 +80,94 @@ def call_pipeline_pull(self, cli, metadata, repo_path): ) assert result.exit_code == 0 + def test_existing_complex_project_requirements_txt( + self, fake_project_cli, fake_metadata, fake_package_path, fake_repo_path + ): + """Pipeline requirements.txt and project requirements.txt, but no project + requirements.in.""" + project_requirements_txt = fake_repo_path / "src" / "requirements.txt" + with open(project_requirements_txt, "a", encoding="utf-8") as file: + file.write(COMPLEX_REQUIREMENTS) + existing_requirements = _safe_parse_requirements( + project_requirements_txt.read_text() + ) + + self.call_pipeline_create(fake_project_cli, fake_metadata) + pipeline_requirements_txt = ( + fake_package_path / "pipelines" / PIPELINE_NAME / "requirements.txt" + ) + pipeline_requirements_txt.write_text(SIMPLE_REQUIREMENTS) + + self.call_pipeline_package(fake_project_cli, fake_metadata) + self.call_pipeline_delete(fake_project_cli, fake_metadata) + self.call_pipeline_pull(fake_project_cli, fake_metadata, fake_repo_path) + + packaged_requirements = _safe_parse_requirements(SIMPLE_REQUIREMENTS) + project_requirements_in = fake_repo_path / "src" / "requirements.in" + pulled_requirements = _safe_parse_requirements( + project_requirements_in.read_text() + ) + # requirements.in afterwards should be the requirements that already existed in + # project requirements.txt + those pulled in from pipeline requirements.txt. + # Unparseable COMPLEX_REQUIREMENTS should still be there. + assert pulled_requirements == existing_requirements | packaged_requirements + assert COMPLEX_REQUIREMENTS in project_requirements_in.read_text() + def test_existing_project_requirements_txt( self, fake_project_cli, fake_metadata, fake_package_path, fake_repo_path ): """Pipeline requirements.txt and project requirements.txt, but no project requirements.in.""" + project_requirements_txt = fake_repo_path / "src" / "requirements.txt" + existing_requirements = _safe_parse_requirements( + project_requirements_txt.read_text() + ) + self.call_pipeline_create(fake_project_cli, fake_metadata) pipeline_requirements_txt = ( fake_package_path / "pipelines" / PIPELINE_NAME / "requirements.txt" ) - pipeline_requirements_txt.write_text(VALID_REQUIREMENTS) + pipeline_requirements_txt.write_text(SIMPLE_REQUIREMENTS) self.call_pipeline_package(fake_project_cli, fake_metadata) self.call_pipeline_delete(fake_project_cli, fake_metadata) self.call_pipeline_pull(fake_project_cli, fake_metadata, fake_repo_path) - packaged_requirements = pkg_resources.parse_requirements(VALID_REQUIREMENTS) + packaged_requirements = _safe_parse_requirements(SIMPLE_REQUIREMENTS) project_requirements_in = fake_repo_path / "src" / "requirements.in" - pulled_requirements = pkg_resources.parse_requirements( + pulled_requirements = _safe_parse_requirements( project_requirements_in.read_text() ) - # Packaged requirements expected to be a subset of pulled requirements due to - # default project level requirements.txt (e.g. black, flake8), which should be - # preserved - assert set(packaged_requirements) <= set(pulled_requirements) + # requirements.in afterwards should be the requirements that already existed in + # project requirements.txt + those pulled in from pipeline requirements.txt. + assert pulled_requirements == existing_requirements | packaged_requirements + + def test_existing_complex_project_requirements_in( + self, fake_project_cli, fake_metadata, fake_package_path, fake_repo_path + ): + """Pipeline requirements.txt and a pre-existing project requirements.in.""" + project_requirements_in = fake_repo_path / "src" / "requirements.in" + project_requirements_in.write_text(COMPLEX_REQUIREMENTS) + self.call_pipeline_create(fake_project_cli, fake_metadata) + pipeline_requirements_txt = ( + fake_package_path / "pipelines" / PIPELINE_NAME / "requirements.txt" + ) + pipeline_requirements_txt.write_text(SIMPLE_REQUIREMENTS) + + self.call_pipeline_package(fake_project_cli, fake_metadata) + self.call_pipeline_delete(fake_project_cli, fake_metadata) + self.call_pipeline_pull(fake_project_cli, fake_metadata, fake_repo_path) + + packaged_requirements = _safe_parse_requirements(SIMPLE_REQUIREMENTS) + existing_requirements = _safe_parse_requirements(COMPLEX_REQUIREMENTS) + pulled_requirements = _safe_parse_requirements( + project_requirements_in.read_text() + ) + # requirements.in afterwards should be the requirements that already existed in + # project requirements.txt + those pulled in from pipeline requirements.txt. + # Unparseable COMPLEX_REQUIREMENTS should still be there. + assert pulled_requirements == existing_requirements | packaged_requirements + assert COMPLEX_REQUIREMENTS in project_requirements_in.read_text() def test_existing_project_requirements_in( self, fake_project_cli, fake_metadata, fake_package_path, fake_repo_path @@ -109,22 +180,20 @@ def test_existing_project_requirements_in( pipeline_requirements_txt = ( fake_package_path / "pipelines" / PIPELINE_NAME / "requirements.txt" ) - pipeline_requirements_txt.write_text(VALID_REQUIREMENTS) + pipeline_requirements_txt.write_text(SIMPLE_REQUIREMENTS) self.call_pipeline_package(fake_project_cli, fake_metadata) self.call_pipeline_delete(fake_project_cli, fake_metadata) self.call_pipeline_pull(fake_project_cli, fake_metadata, fake_repo_path) - packaged_requirements = pkg_resources.parse_requirements(VALID_REQUIREMENTS) - existing_requirements = pkg_resources.parse_requirements(initial_dependency) - pulled_requirements = pkg_resources.parse_requirements( + packaged_requirements = _safe_parse_requirements(SIMPLE_REQUIREMENTS) + existing_requirements = _safe_parse_requirements(initial_dependency) + pulled_requirements = _safe_parse_requirements( project_requirements_in.read_text() ) - # Requirements after pulling a pipeline expected to be the union of - # requirements packaged and requirements already existing at project level - assert set(pulled_requirements) == set(packaged_requirements) | set( - existing_requirements - ) + # requirements.in afterwards should be the requirements that already existed in + # project requirements.txt + those pulled in from pipeline requirements.txt. + assert pulled_requirements == existing_requirements | packaged_requirements def test_missing_project_requirements_in_and_txt( self, @@ -144,8 +213,8 @@ def test_missing_project_requirements_in_and_txt( fake_package_path / "pipelines" / PIPELINE_NAME / "requirements.txt" ) - pipeline_requirements_txt.write_text(VALID_REQUIREMENTS) - packaged_requirements = pkg_resources.parse_requirements(VALID_REQUIREMENTS) + pipeline_requirements_txt.write_text(SIMPLE_REQUIREMENTS) + packaged_requirements = _safe_parse_requirements(SIMPLE_REQUIREMENTS) self.call_pipeline_package(fake_project_cli, fake_metadata) self.call_pipeline_delete(fake_project_cli, fake_metadata) @@ -155,10 +224,10 @@ def test_missing_project_requirements_in_and_txt( assert not project_requirements_txt.exists() assert project_requirements_in.exists() - pulled_requirements = pkg_resources.parse_requirements( + pulled_requirements = _safe_parse_requirements( project_requirements_in.read_text() ) - assert set(packaged_requirements) == set(pulled_requirements) + assert packaged_requirements == pulled_requirements def test_no_requirements( self, @@ -192,8 +261,8 @@ def test_all_requirements_already_covered( fake_package_path / "pipelines" / PIPELINE_NAME / "requirements.txt" ) project_requirements_txt = fake_repo_path / "src" / "requirements.txt" - pipeline_requirements_txt.write_text(VALID_REQUIREMENTS) - project_requirements_txt.write_text(VALID_REQUIREMENTS) + pipeline_requirements_txt.write_text(SIMPLE_REQUIREMENTS) + project_requirements_txt.write_text(SIMPLE_REQUIREMENTS) self.call_pipeline_package(fake_project_cli, fake_metadata) self.call_pipeline_delete(fake_project_cli, fake_metadata) @@ -203,7 +272,7 @@ def test_all_requirements_already_covered( # addition project_requirements_in = fake_repo_path / "src" / "requirements.in" assert project_requirements_in.exists() - assert project_requirements_in.read_text() == VALID_REQUIREMENTS + assert project_requirements_in.read_text() == SIMPLE_REQUIREMENTS def test_no_pipeline_requirements_txt( self, fake_project_cli, fake_metadata, fake_repo_path @@ -235,16 +304,8 @@ def test_empty_pipeline_requirements_txt( project_requirements_in = fake_repo_path / "src" / "requirements.in" assert not project_requirements_in.exists() - @pytest.mark.parametrize( - "requirement", - [ - "--extra-index-url https://this.wont.work", - "-r other_requirements.txt", - "./path/to/package.whl", - "http://some.website.com/package.whl", - ], - ) - def test_invalid_requirements( + @pytest.mark.parametrize("requirement", COMPLEX_REQUIREMENTS.splitlines()) + def test_complex_requirements( self, requirement, fake_project_cli, fake_metadata, fake_package_path ): """Options that are valid in requirements.txt but cannot be packaged using