From 0722cd88ec8b9c81c1c61c1d49237d084215e851 Mon Sep 17 00:00:00 2001 From: Silvano Cirujano Cuesta Date: Thu, 17 Jul 2025 17:52:47 +0200 Subject: [PATCH 1/2] feat(processing): add module to process inlining Trying to address issue 2813 [1] inconsistencies have been identified WRT how object inlining is behaving depending on the values of `inlined`, `inlined_as_list` and the presence/absence of an identifier in the range class. These inconsistencies appear from the fact that no normalization is happening on schema loading (not only in SchemaLoader, but also in SchemaView) and some consumers apply their own logic. This patch provides a module that should be used on any schema loading (as of now SchemaLoader and SchemaView) to have a common behavior. The code is structured in a way that it covers all potential combinations in an easy to understand manner. Unit testing is also provided for the module. [1]: https://github.com/linkml/linkml/issues/2813 Signed-off-by: Silvano Cirujano Cuesta --- linkml_runtime/processing/inlining.py | 159 ++++++++++++++++ .../test_inlining_processor.py | 180 ++++++++++++++++++ 2 files changed, 339 insertions(+) create mode 100644 linkml_runtime/processing/inlining.py create mode 100644 tests/test_processing/test_inlining_processor.py diff --git a/linkml_runtime/processing/inlining.py b/linkml_runtime/processing/inlining.py new file mode 100644 index 00000000..6fd7f486 --- /dev/null +++ b/linkml_runtime/processing/inlining.py @@ -0,0 +1,159 @@ +from copy import deepcopy +from logging import Logger +from typing import cast + +from linkml_runtime.linkml_model.meta import ( + ClassDefinitionName, + SchemaDefinition, + SlotDefinition, +) + + +def _create_function_dispatcher(slot: SlotDefinition, logger: Logger) -> None: + """Function dispatcher for slot inlining processing""" + + def set_inlined_and_warn(range_class_has_identifier: bool) -> None: + slot.inlined = True + text_identifier = "with an identifier" if range_class_has_identifier else "without an identifier" + msg = ( + f"Slot '{slot.name}' is requesting for an object {text_identifier} inlining as a " + + "list, but no inlining requested! Forcing `inlined: true`!!" + ) + logger.warning(msg) + + def set_inlined_and_report(range_class_has_identifier: bool) -> None: + slot.inlined = True + text_identifier = "with an identifier" if range_class_has_identifier else "without an identifier" + msg = ( + f"Slot '{slot.name}' is requesting for an object {text_identifier} inlining as a " + + "list, but no inlining requested! Forcing `inlined: true`!!" + ) + logger.info(msg) + + def debug_output(range_class_has_identifier: bool) -> None: + msg = ( + f"Slot '{slot.name}' has a complete inlining specification: " + + f"range class has identifier: {range_class_has_identifier}," + ) + if slot.inlined_as_list is None: + msg += f"`inlined: {slot.inlined}`, `inlined_as_list` unspecified." + else: + msg += f"`inlined: {slot.inlined}` and `inlined_as_list: {slot.inlined_as_list}`" + logger.debug(msg) + + def info(range_class_has_identifier: bool) -> None: + text_identifier = "with an identifier" if range_class_has_identifier else "without an identifier" + msg = ( + f"Slot '{slot.name}' has following illogic or incomplete inlining " + + f"specification: `inlined: {slot.inlined}` and `inlined_as_list: " + + f"{slot.inlined_as_list}` for objects {text_identifier}" + ) + logger.info(msg) + + function_map = { + # OK + (True, True, True): debug_output, + # OK + (True, True, False): debug_output, + # what type of inlining to use? + (True, True, None): info, + # overriding specified value!! + (True, False, True): set_inlined_and_warn, + # why specifying inlining type if no inlining? + (True, False, False): info, + # OK + (True, False, None): debug_output, + # applying implicit default!! + (True, None, True): set_inlined_and_report, + # why specifying inlining type if inlining not requested? + (True, None, False): info, + # no defaults, in-code implicit defaults will apply + (True, None, None): info, + # OK + (False, True, True): debug_output, + # how to select a key for an object without an identifier? + (False, True, False): info, + # no defaults, in-code implicit defaults will apply + (False, True, None): info, + # how to add a reference to an object without an identifier? + (False, False, True): info, + # how to add a reference to an object without an identifier? + (False, False, False): info, + # how to add a reference to an object without an identifier? + (False, False, None): info, + # applying implicit default!! + (False, None, True): set_inlined_and_report, + # why specifying inlining type if inlining not requested? + (False, None, False): info, + # no defaults, in-code implicit defaults will apply + (False, None, None): info, + } + + def dispatch(range_class_has_identifier, inlined, inlined_as_list): + # func = function_map.get((range_class_has_identifier, inlined, inlined_as_list), default_function) + func = function_map.get((range_class_has_identifier, inlined, inlined_as_list)) + return func(range_class_has_identifier) + + return dispatch + + +def process(slot: SlotDefinition, schema_map: dict[str, SchemaDefinition], logger: Logger) -> (bool, bool): + """ + Processing the inlining behavior of a slot, including the type of inlining + (as a list or as a dictionary). + + Processing encompasses analyzing the combination of elements relevant for + object inlining (reporting the result of the analysis with different logging + levels) and enforcing certain values. + + It is important to take into account following: + - slot.inlined and slot.inlined_as_list can have three different values: + True, False or None (if nothing specified in the schema) + - if a class has an identifier is a pure boolean + + Changes to `inlined` are applied directly on the provided slot object. + + :param slot: the slot to process + :param schema: the schema in which the slot is contained + :param logger: the logger to use + """ + + fixed_slot = deepcopy(slot) + # first of all, validate that the values of `inlined` and `inlined_as_list` are legal + # either `True` or `False` (if specified) or `None` (if nothing specified) + for value in ("inlined", "inlined_as_list"): + if getattr(fixed_slot, value) not in (True, False, None): + raise ValueError( + f"Invalid value for '{value}' in the schema for slot " + + f"'{fixed_slot.name}': '{getattr(fixed_slot, value)}'" + ) + range_class = None + for schema in schema_map.values(): + if cast(ClassDefinitionName, fixed_slot.range) in schema.classes: + range_class = schema.classes[cast(ClassDefinitionName, fixed_slot.range)] + break + # range is a type + if range_class is None: + return (None, None) + range_has_identifier = False + for sn in range_class.slots: + for schema in schema_map.values(): + if sn in schema.slots: + range_has_identifier = bool(schema.slots[sn].identifier or schema.slots[sn].key) + break + else: + continue + break + + dispatcher = _create_function_dispatcher(fixed_slot, logger) + dispatcher(range_has_identifier, fixed_slot.inlined, fixed_slot.inlined_as_list) + + return (fixed_slot.inlined, fixed_slot.inlined_as_list) + + +def is_inlined(slot: SlotDefinition, schema_map: dict[str, SchemaDefinition], logger: Logger) -> (bool, bool): + return bool(process(slot, schema_map, logger)[0]) + + +def is_inlined_as_list(slot: SlotDefinition, schema_map: dict[str, SchemaDefinition], logger: Logger) -> (bool, bool): + return bool(process(slot, schema_map, logger)[1]) diff --git a/tests/test_processing/test_inlining_processor.py b/tests/test_processing/test_inlining_processor.py new file mode 100644 index 00000000..8231a349 --- /dev/null +++ b/tests/test_processing/test_inlining_processor.py @@ -0,0 +1,180 @@ +import logging + +import pytest + +from linkml_runtime.linkml_model.meta import ( + ClassDefinition, + SlotDefinition, +) +from linkml_runtime.processing import inlining +from linkml_runtime.utils.schema_builder import SchemaBuilder + + +def prepare_schema(with_identifier, inlined, inlined_as_list): + builder = SchemaBuilder() + + id = SlotDefinition(name="id", identifier=True) + builder.add_slot(id) + + range_class = ClassDefinition(name="RangeClass") + if with_identifier: + range_class.slots = ["id"] + builder.add_class(range_class) + + slot = SlotDefinition(name="slot_under_test", range=range_class.name) + if isinstance(inlined, bool): + slot.inlined = inlined + if isinstance(inlined_as_list, bool): + slot.inlined_as_list = inlined_as_list + builder.add_slot(slot) + + slot_type = SlotDefinition(name="slot_with_type", range="int") + if isinstance(inlined, bool): + slot_type.inlined = inlined + if isinstance(inlined_as_list, bool): + slot_type.inlined_as_list = inlined_as_list + builder.add_slot(slot_type) + + return (slot, slot_type, {"schema": builder.schema}) + + +@pytest.mark.parametrize( + ("with_identifier", "inlined", "inlined_as_list", "expected_inlined", "expected_inlined_as_list"), + [ + (True, True, True, True, True), + (True, True, False, True, False), + (True, False, None, False, None), + (False, True, True, True, True), + ], +) +def test_report_ok(with_identifier, inlined, inlined_as_list, expected_inlined, expected_inlined_as_list, caplog): + """Test that combinations that are clear an unproblematic only generate debug output.""" + logger = logging.getLogger("Test") + caplog.set_level(logging.DEBUG) + + slot, _, schema_map = prepare_schema(with_identifier, inlined, inlined_as_list) + fixed_inlined, fixed_inlined_as_list = inlining.process(slot, schema_map, logger) + assert fixed_inlined == expected_inlined + assert fixed_inlined_as_list == expected_inlined_as_list + for logrecord in caplog.records: + assert logrecord.levelname == "DEBUG" + assert " complete inlining specification" in logrecord.message + + +@pytest.mark.parametrize( + ("with_identifier", "inlined", "inlined_as_list", "expected_inlined_as_list"), + [ + # overriding specified `inlined: false` with `inlined: true`!! + (True, False, True, True), + ], +) +def test_force_inlined(with_identifier, inlined, inlined_as_list, expected_inlined_as_list, caplog): + """Test that combinations that end up forcing `inlined: true` does so and generate a warning.""" + logger = logging.getLogger("Test") + caplog.set_level(logging.WARNING) + + slot, _, schema_map = prepare_schema(with_identifier, inlined, inlined_as_list) + fixed_inlined, fixed_inlined_as_list = inlining.process(slot, schema_map, logger) + assert fixed_inlined + assert fixed_inlined_as_list == expected_inlined_as_list + for logrecord in caplog.records: + assert logrecord.levelname == "WARNING" + assert "Forcing `inlined: true`!!" in logrecord.message + + +@pytest.mark.parametrize( + ("with_identifier", "inlined", "inlined_as_list", "expected_inlined_as_list"), + [ + # applying implicit default!! + (True, None, True, True), + # applying implicit default!! + (False, None, True, True), + ], +) +def test_default_inlined(with_identifier, inlined, inlined_as_list, expected_inlined_as_list, caplog): + """Test that combinations that end up forcing `inlined: true` does so and generate a warning.""" + logger = logging.getLogger("Test") + caplog.set_level(logging.INFO) + + slot, _, schema_map = prepare_schema(with_identifier, inlined, inlined_as_list) + fixed_inlined, fixed_inlined_as_list = inlining.process(slot, schema_map, logger) + assert fixed_inlined + assert fixed_inlined_as_list == expected_inlined_as_list + for logrecord in caplog.records: + assert logrecord.levelname == "INFO" + assert "Forcing `inlined: true`!!" in logrecord.message + + +@pytest.mark.parametrize( + ("with_identifier", "inlined", "inlined_as_list", "expected_inlined", "expected_inlined_as_list"), + [ + # what type of inlining to use? + (True, True, None, True, None), + # why specifying inlining type if no inlining? + (True, False, False, False, False), + # why specifying inlining type if inlining not requested? + (True, None, False, None, False), + # no defaults, in-code implicit defaults will apply + (True, None, None, None, None), + # how to select a key for an object without an identifier? + (False, True, False, True, False), + # no defaults, in-code implicit defaults will apply + (False, True, None, True, None), + # how to add a reference to an object without an identifier? + (False, False, True, False, True), + # how to add a reference to an object without an identifier? + (False, False, False, False, False), + # how to add a reference to an object without an identifier? + (False, False, None, False, None), + # why specifying inlining type if inlining not requested? + (False, None, False, None, False), + # no defaults, in-code implicit defaults will apply + (False, None, None, None, None), + ], +) +def test_info_inconsistencies( + with_identifier, inlined, inlined_as_list, expected_inlined, expected_inlined_as_list, caplog +): + """Test that combinations that are somehow illogical or incomplete are reported.""" + logger = logging.getLogger("Test") + caplog.set_level(logging.INFO) + + slot, _, schema_map = prepare_schema(with_identifier, inlined, inlined_as_list) + fixed_inlined, fixed_inlined_as_list = inlining.process(slot, schema_map, logger) + assert fixed_inlined == expected_inlined + assert fixed_inlined_as_list == expected_inlined_as_list + for logrecord in caplog.records: + assert logrecord.levelname == "INFO" + assert "illogic or incomplete inlining specification" in logrecord.message + + +@pytest.mark.parametrize( + ("with_identifier", "inlined", "inlined_as_list"), + [ + (True, True, True), + (True, True, False), + (True, True, None), + (True, False, True), + (True, False, False), + (True, False, None), + (True, None, True), + (True, None, False), + (True, None, None), + (False, True, True), + (False, True, False), + (False, True, None), + (False, False, True), + (False, False, False), + (False, False, None), + (False, None, True), + (False, None, False), + (False, None, None), + ], +) +def test_slot_type(with_identifier, inlined, inlined_as_list): + """Test that slots with a type as range returns (None, None).""" + logger = logging.getLogger("Test") + _, slot, schema_map = prepare_schema(with_identifier, inlined, inlined_as_list) + fixed_inlined, fixed_inlined_as_list = inlining.process(slot, schema_map, logger) + assert fixed_inlined is None + assert fixed_inlined_as_list is None From 7c2a6ec9f0caf934c6d2ad5a831a3d03a1d193c0 Mon Sep 17 00:00:00 2001 From: Silvano Cirujano Cuesta Date: Thu, 17 Jul 2025 22:03:55 +0200 Subject: [PATCH 2/2] feat(schemaview): use inlining processor Use created module to process object inlining. Signed-off-by: Silvano Cirujano Cuesta --- linkml_runtime/utils/schemaview.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/linkml_runtime/utils/schemaview.py b/linkml_runtime/utils/schemaview.py index 70ee1c00..e2941c78 100644 --- a/linkml_runtime/utils/schemaview.py +++ b/linkml_runtime/utils/schemaview.py @@ -39,6 +39,7 @@ TypeDefinition, TypeDefinitionName, ) +from linkml_runtime.processing import inlining from linkml_runtime.utils.context_utils import map_import, parse_import_map from linkml_runtime.utils.formatutils import camelcase, is_empty, sfx, underscore from linkml_runtime.utils.namespaces import Namespaces @@ -1474,6 +1475,7 @@ def induced_slot( :param imports: include imports closure :return: dynamic slot constructed by inference """ + # print(f"inducing {slot_name}") if class_name: cls = self.get_class(class_name, imports, strict=True) else: @@ -1549,8 +1551,7 @@ def induced_slot( v = self.schema.default_range if v is not None: setattr(induced_slot, metaslot_name, v) - if slot.inlined_as_list: - slot.inlined = True + slot.inlined, slot.inlined_as_list = inlining.process(induced_slot, self.schema_map, logger) if slot.identifier or slot.key: slot.required = True if mangle_name: @@ -1679,16 +1680,27 @@ def is_inlined(self, slot: SlotDefinition, imports: bool = True) -> bool: range = slot.range if range in self.all_classes(): if slot.inlined or slot.inlined_as_list: + # print(f"is_inlined({slot.name}) -> True") return True id_slot = self.get_identifier_slot(range, imports=imports) if id_slot is None: # must be inlined as has no identifier + # print(f"is_inlined({slot.name}) -> True") return True # not explicitly declared inline and has an identifier: assume is ref, not inlined + # print(f"is_inlined({slot.name}) -> False") return False + # print(f"is_inlined({slot.name}) -> False") return False + result = inlining.is_inlined(slot, self.schema_map, logger) + print(f"is_inlined({slot.name}) -> {result}") + # if slot.name == "a_thing_without_id": + # result = True + return result + return inlining.is_inlined(slot, self.schema_map, logger) + def slot_applicable_range_elements(self, slot: SlotDefinition) -> list[ClassDefinitionName]: """Retrieve all applicable metamodel elements for a slot range. @@ -2043,13 +2055,7 @@ def materialize_derived_schema(self) -> SchemaDefinition: if metaslot_val is not None: setattr(slot, metaslot, metaslot_val) slot_range_pk_slot_name = None - if isinstance(slot_range_element, ClassDefinition): - slot_range_pk_slot_name = self.get_identifier_slot(slot_range_element.name, use_key=True) - if not slot_range_pk_slot_name: - slot.inlined = True - slot.inlined_as_list = True - if slot.inlined_as_list: - slot.inlined = True + slot.inlined, slot.inlined_as_list = inlining.process(slot, self.schema_map, logger) if slot.identifier or slot.key: slot.required = True cls.attributes[slot.name] = slot