diff --git a/tests/parse/test_parse_vrplib.py b/tests/parse/test_parse_vrplib.py index 01dae71a..f7473666 100644 --- a/tests/parse/test_parse_vrplib.py +++ b/tests/parse/test_parse_vrplib.py @@ -250,6 +250,26 @@ def test_parse_vrplib_raises_data_specification_and_section(): parse_vrplib(instance) +def test_parse_vrplib_raises_when_specification_after_section(): + """ + Tests that a ValueError is raised when a specification is presented after + a data section. + """ + instance = "\n".join( + [ + "NODE_COORD_SECTION", + "1 20 20", + "NAME: Test", + "EDGE_WEIGHT_TYPE: EUC_2D", + "EOF", + ] + ) + + # Specification after a section is not allowed. + with assert_raises(ValueError): + parse_vrplib(instance) + + def test_empty_text(): """ Tests if an empty text file is still read correctly. diff --git a/vrplib/parse/parse_vrplib.py b/vrplib/parse/parse_vrplib.py index 546b2244..d3533529 100644 --- a/vrplib/parse/parse_vrplib.py +++ b/vrplib/parse/parse_vrplib.py @@ -34,7 +34,6 @@ def parse_vrplib(text: str, compute_edge_weights: bool = True) -> Instance: The instance data. """ instance = {} - specs, sections = group_specifications_and_sections(text2lines(text)) for spec in specs: @@ -42,14 +41,13 @@ def parse_vrplib(text: str, compute_edge_weights: bool = True) -> Instance: instance[key] = value for section in sections: - section_name, data = parse_section(section, instance) + name, data = parse_section(section, instance) - if section_name in instance: - name = section_name.upper() - msg = f"'{name}' is used both as a specification and a section." + if name in instance: + msg = f"{name.upper()} is used both as specification and section." raise ValueError(msg) - instance[section_name] = data # type: ignore + instance[name] = data # type: ignore if instance and compute_edge_weights and "edge_weight" not in instance: # Compute edge weights if there was no explicit edge weight section @@ -79,6 +77,9 @@ def group_specifications_and_sections(lines: list[str]): end_section = start + 1 for next_line in lines[start + 1 :]: + if ":" in next_line: + raise ValueError("Specification presented after section.") + # The current section ends when a next section or an EOF token # is found. if "_SECTION" in next_line or "EOF" in next_line: @@ -107,34 +108,26 @@ def parse_section( lines: list, instance: dict ) -> tuple[str, Union[list, np.ndarray]]: """ - Parses the data section into numpy arrays. + Parses the data section lines. """ - section = _remove_suffix(lines[0].strip(), "_SECTION").lower() - data_ = [[infer_type(n) for n in line.split()] for line in lines[1:]] - - if section == "edge_weight": - # Parse separately because it may require additional processing - return section, parse_distances(data_, **instance) # type: ignore - - if any(len(row) != len(data_[0]) for row in data_): - # This is a ragged array, so we shortcut to avoid casting to np.array. - return section, [row[1:] for row in data_] - - data = np.array(data_) - - if section == "depot": - # Remove -1 end token and renormalize depots to start at zero - data = data[data != -1] - 1 + name = lines[0].strip().removesuffix("_SECTION").lower() + values = [[infer_type(n) for n in line.split()] for line in lines[1:]] + + if name == "edge_weight": + # Parse edge weights separately as it involves extra processing. + data = parse_distances(values, **instance) # type: ignore + elif name == "depot": + # Remove -1 end token and renormalize depots to start at zero. + data = np.array(values[0]) - 1 + elif any(len(row) != len(values[0]) for row in values): + # This is a ragged array, so we keep it as a nested list, but we + # remove the indices column. + data = [row[1:] for row in values] else: - # We remove the customer indices column from non-depot section - data = data[:, 1:] - - if data.ndim > 1 and data.shape[-1] == 1: - # Squeeze data sections that contain only one column. - data = data.squeeze(-1) - - return section, data + data = np.array([row[1:] for row in values]) + if data.ndim > 1 and data.shape[-1] == 1: + # Squeeze data lines that contain only one column. + data = data.squeeze(-1) -def _remove_suffix(name: str, suffix: str): - return name[: -len(suffix)] if name.endswith(suffix) else name + return name, data