Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise specification after section #121

Merged
merged 3 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions tests/parse/test_parse_vrplib.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,26 @@ def test_parse_vrplib_raises_data_specification_and_section():
parse_vrplib(instance)


def test_parse_vrplib_raises_when_specification_after_section():
"""
Tests that a ValueError is raised when a specification is presented after
a data section.
"""
instance = "\n".join(
[
"NODE_COORD_SECTION",
"1 20 20",
"NAME: Test",
"EDGE_WEIGHT_TYPE: EUC_2D",
"EOF",
]
)

# Specification after a section is not allowed.
with assert_raises(ValueError):
parse_vrplib(instance)


def test_empty_text():
"""
Tests if an empty text file is still read correctly.
Expand Down
59 changes: 26 additions & 33 deletions vrplib/parse/parse_vrplib.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,20 @@ def parse_vrplib(text: str, compute_edge_weights: bool = True) -> Instance:
The instance data.
"""
instance = {}

specs, sections = group_specifications_and_sections(text2lines(text))

for spec in specs:
key, value = parse_specification(spec)
instance[key] = value

for section in sections:
section_name, data = parse_section(section, instance)
name, data = parse_section(section, instance)

if section_name in instance:
name = section_name.upper()
msg = f"'{name}' is used both as a specification and a section."
if name in instance:
msg = f"{name.upper()} is used both as specification and section."
raise ValueError(msg)

instance[section_name] = data # type: ignore
instance[name] = data # type: ignore

if instance and compute_edge_weights and "edge_weight" not in instance:
# Compute edge weights if there was no explicit edge weight section
Expand Down Expand Up @@ -79,6 +77,9 @@ def group_specifications_and_sections(lines: list[str]):
end_section = start + 1

for next_line in lines[start + 1 :]:
if ":" in next_line:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a sufficient test for a specification? (probably yes but can there ever be data in a section with a colon in it?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot think of any data section that must include :'s. Only thing I can think of are strings but we should probably avoid strings in data sections as they can often be mapped to numerical values. I think we're OK if we define the VRPLIB standard to reserve : as a specifications identifier.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am excited about having a grammar for this so we can definitively say what's OK and what's not 😆.

raise ValueError("Specification presented after section.")

# The current section ends when a next section or an EOF token
# is found.
if "_SECTION" in next_line or "EOF" in next_line:
Expand Down Expand Up @@ -107,34 +108,26 @@ def parse_section(
lines: list, instance: dict
) -> tuple[str, Union[list, np.ndarray]]:
"""
Parses the data section into numpy arrays.
Parses the data section lines.
"""
section = _remove_suffix(lines[0].strip(), "_SECTION").lower()
data_ = [[infer_type(n) for n in line.split()] for line in lines[1:]]

if section == "edge_weight":
# Parse separately because it may require additional processing
return section, parse_distances(data_, **instance) # type: ignore

if any(len(row) != len(data_[0]) for row in data_):
# This is a ragged array, so we shortcut to avoid casting to np.array.
return section, [row[1:] for row in data_]

data = np.array(data_)

if section == "depot":
# Remove -1 end token and renormalize depots to start at zero
data = data[data != -1] - 1
name = lines[0].strip().removesuffix("_SECTION").lower()
values = [[infer_type(n) for n in line.split()] for line in lines[1:]]

if name == "edge_weight":
# Parse edge weights separately as it involves extra processing.
data = parse_distances(values, **instance) # type: ignore
elif name == "depot":
# Remove -1 end token and renormalize depots to start at zero.
data = np.array(values[0]) - 1
elif any(len(row) != len(values[0]) for row in values):
# This is a ragged array, so we keep it as a nested list, but we
# remove the indices column.
data = [row[1:] for row in values]
else:
# We remove the customer indices column from non-depot section
data = data[:, 1:]

if data.ndim > 1 and data.shape[-1] == 1:
# Squeeze data sections that contain only one column.
data = data.squeeze(-1)

return section, data
data = np.array([row[1:] for row in values])

if data.ndim > 1 and data.shape[-1] == 1:
# Squeeze data lines that contain only one column.
data = data.squeeze(-1)

def _remove_suffix(name: str, suffix: str):
return name[: -len(suffix)] if name.endswith(suffix) else name
return name, data
Loading