Skip to content

Commit

Permalink
parser additions (#2308)
Browse files Browse the repository at this point in the history
* change from ValueError to a better formatted metaflow specific error for parsers

* support Rye by skipping over non-requirement lines in lockfile

* add unit tests for parsers.

* fix yml parser regex for whitespaces
  • Loading branch information
saikonen authored Feb 27, 2025
1 parent 744cba9 commit bdd83f7
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 14 deletions.
43 changes: 29 additions & 14 deletions metaflow/plugins/pypi/parsers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# this file can be overridden by extensions as is (e.g. metaflow-nflx-extensions)
from metaflow.exception import MetaflowException


class ParserValueError(MetaflowException):
headline = "Value error"


def requirements_txt_parser(content: str):
Expand Down Expand Up @@ -28,7 +33,7 @@ def requirements_txt_parser(content: str):
Raises
------
ValueError
ParserValueError
If a requirement line is invalid PEP 508 or if environment markers are
detected, or if multiple Python constraints are specified.
"""
Expand All @@ -41,6 +46,10 @@ def requirements_txt_parser(content: str):
for line in content.splitlines():
line = line.strip()

# support Rye lockfiles by skipping lines not compliant with requirements
if line == "-e file:.":
continue

if not line or line.startswith("#"):
continue

Expand All @@ -51,10 +60,10 @@ def requirements_txt_parser(content: str):
try:
req = Requirement(line)
except InvalidRequirement:
raise ValueError(f"Not a valid PEP 508 requirement: '{line}'")
raise ParserValueError(f"Not a valid PEP 508 requirement: '{line}'")

if req.marker is not None:
raise ValueError(
raise ParserValueError(
"Environment markers (e.g. 'platform_system==\"Linux\"') "
f"are not supported for line: '{line}'"
)
Expand All @@ -69,7 +78,9 @@ def requirements_txt_parser(content: str):

if req.name.lower() == "python":
if parsed["python"] is not None and dep_spec:
raise ValueError(f"Multiple Python version specs not allowed: '{line}'")
raise ParserValueError(
f"Multiple Python version specs not allowed: '{line}'"
)
parsed["python"] = dep_spec or None
else:
parsed["packages"][dep_key] = dep_spec
Expand Down Expand Up @@ -104,7 +115,7 @@ def pyproject_toml_parser(content: str):
------
RuntimeError
If no TOML library (tomllib in Python 3.11+ or tomli in earlier versions) is found.
ValueError
ParserValueError
If a dependency is not valid PEP 508, if environment markers are used, or if
multiple Python constraints are specified.
"""
Expand Down Expand Up @@ -138,10 +149,12 @@ def pyproject_toml_parser(content: str):
try:
req = Requirement(dep_line_stripped)
except InvalidRequirement:
raise ValueError(f"Not a valid PEP 508 requirement: '{dep_line_stripped}'")
raise ParserValueError(
f"Not a valid PEP 508 requirement: '{dep_line_stripped}'"
)

if req.marker is not None:
raise ValueError(
raise ParserValueError(
f"Environment markers not supported for line: '{dep_line_stripped}'"
)

Expand All @@ -155,7 +168,7 @@ def pyproject_toml_parser(content: str):

if req.name.lower() == "python":
if parsed["python"] is not None and dep_spec:
raise ValueError(
raise ParserValueError(
f"Multiple Python version specs not allowed: '{dep_line_stripped}'"
)
parsed["python"] = dep_spec or None
Expand All @@ -171,7 +184,7 @@ def conda_environment_yml_parser(content: str):
The file must contain a 'dependencies:' line, after which each dependency line
appears with a '- ' prefix. Python can appear as 'python=3.9', etc.; other
packages as 'numpy=1.21.2' or simply 'numpy'. Non-compliant lines raise ValueError.
packages as 'numpy=1.21.2' or simply 'numpy'. Non-compliant lines raise ParserValueError.
Parameters
----------
Expand All @@ -189,7 +202,7 @@ def conda_environment_yml_parser(content: str):
Raises
------
ValueError
ParserValueError
If the file has malformed lines or unsupported sections.
"""
import re
Expand All @@ -202,7 +215,7 @@ def conda_environment_yml_parser(content: str):
# Basic pattern for lines like "numpy=1.21.2"
# Group 1: package name
# Group 2: optional operator + version (could be "=1.21.2", "==1.21.2", etc.)
line_regex = re.compile(r"^([A-Za-z0-9_\-\.]+)([=<>!~].+)?$")
line_regex = re.compile(r"^([A-Za-z0-9_\-\.]+)(\s*[=<>!~].+\s*)?$")
inline_comment_pattern = re.compile(r"\s+#.*$")

for line in content.splitlines():
Expand All @@ -227,11 +240,13 @@ def conda_environment_yml_parser(content: str):

dep_line = line.lstrip("-").strip()
if dep_line.endswith(":"):
raise ValueError(f"Unsupported subsection '{dep_line}' in environment.yml.")
raise ParserValueError(
f"Unsupported subsection '{dep_line}' in environment.yml."
)

match = line_regex.match(dep_line)
if not match:
raise ValueError(
raise ParserValueError(
f"Line '{dep_line}' is not a valid conda package specifier."
)

Expand All @@ -243,7 +258,7 @@ def conda_environment_yml_parser(content: str):

if pkg_name.lower() == "python":
if python_version is not None and version_spec:
raise ValueError(
raise ParserValueError(
f"Multiple Python version specs detected: '{dep_line}'"
)
python_version = version_spec
Expand Down
116 changes: 116 additions & 0 deletions test/unit/test_pypi_parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from metaflow.plugins.pypi.parsers import (
requirements_txt_parser,
conda_environment_yml_parser,
pyproject_toml_parser,
ParserValueError,
)

VALID_REQ = """
dummypkg==1.1.1
anotherpkg==0.0.1
"""

INVALID_REQ = """
--no-index
-e dummypkg==1.1.1
anotherpkg==0.0.1
"""

VALID_RYE_LOCK = """
# generated by rye
# use `rye lock` or `rye sync` to update this lockfile
#
# last locked with the following flags:
# pre: false
# features: []
# all-features: false
# with-sources: false
# generate-hashes: false
# universal: false
-e file:.
dummypkg==1.1.1
# via some-dependency
anotherpkg==0.0.1
# another-dependency
"""

VALID_TOML = """
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "test-package"
version = "2020.0.0"
dependencies = [
"dummypkg==1.1.1",
"anotherpkg==0.0.1",
]
requires-python = ">=3.8"
authors = [
{name = "Tester", email = "[email protected]"},
]
maintainers = [
{name = "Tester", email = "[email protected]"}
]
description = "A lengthy project description"
readme = "README.rst"
license = "MIT"
license-files = ["LICEN[CS]E.*"]
keywords = ["parser", "testing"]
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python"
]
[project.urls]
Homepage = "https://example.com"
"""

# Keep the whitespace to make sure that loosely written yml is also handled.
VALID_YML = """
dependencies:
- dummypkg = 1.1.1
- anotherpkg=0.0.1
- python = 3.10.*
"""


def test_yml_parser():
result = conda_environment_yml_parser(VALID_YML)

assert result["python"] == "3.10.*"
assert result["packages"]["dummypkg"] == "1.1.1"
assert result["packages"]["anotherpkg"] == "0.0.1"


def test_requirements_parser():
# success case
result = requirements_txt_parser(VALID_REQ)

assert result["python"] == None
assert result["packages"]["dummypkg"] == "1.1.1"
assert result["packages"]["anotherpkg"] == "0.0.1"

# Rye lockfile success case
result = requirements_txt_parser(VALID_RYE_LOCK)

assert result["python"] == None
assert result["packages"]["dummypkg"] == "1.1.1"
assert result["packages"]["anotherpkg"] == "0.0.1"

# failures
try:
requirements_txt_parser(INVALID_REQ)
raise Exception("parsing invalid content did not raise an expected exception.")
except ParserValueError:
pass # expected to raise


def test_toml_parser():
result = pyproject_toml_parser(VALID_TOML)

assert result["python"] == ">=3.8"
assert result["packages"]["dummypkg"] == "1.1.1"
assert result["packages"]["anotherpkg"] == "0.0.1"

0 comments on commit bdd83f7

Please sign in to comment.