diff --git a/linkml_runtime/processing/inlining.py b/linkml_runtime/processing/inlining.py new file mode 100644 index 00000000..6fd7f486 --- /dev/null +++ b/linkml_runtime/processing/inlining.py @@ -0,0 +1,159 @@ +from copy import deepcopy +from logging import Logger +from typing import cast + +from linkml_runtime.linkml_model.meta import ( + ClassDefinitionName, + SchemaDefinition, + SlotDefinition, +) + + +def _create_function_dispatcher(slot: SlotDefinition, logger: Logger) -> None: + """Function dispatcher for slot inlining processing""" + + def set_inlined_and_warn(range_class_has_identifier: bool) -> None: + slot.inlined = True + text_identifier = "with an identifier" if range_class_has_identifier else "without an identifier" + msg = ( + f"Slot '{slot.name}' is requesting for an object {text_identifier} inlining as a " + + "list, but no inlining requested! Forcing `inlined: true`!!" + ) + logger.warning(msg) + + def set_inlined_and_report(range_class_has_identifier: bool) -> None: + slot.inlined = True + text_identifier = "with an identifier" if range_class_has_identifier else "without an identifier" + msg = ( + f"Slot '{slot.name}' is requesting for an object {text_identifier} inlining as a " + + "list, but no inlining requested! Forcing `inlined: true`!!" + ) + logger.info(msg) + + def debug_output(range_class_has_identifier: bool) -> None: + msg = ( + f"Slot '{slot.name}' has a complete inlining specification: " + + f"range class has identifier: {range_class_has_identifier}," + ) + if slot.inlined_as_list is None: + msg += f"`inlined: {slot.inlined}`, `inlined_as_list` unspecified." + else: + msg += f"`inlined: {slot.inlined}` and `inlined_as_list: {slot.inlined_as_list}`" + logger.debug(msg) + + def info(range_class_has_identifier: bool) -> None: + text_identifier = "with an identifier" if range_class_has_identifier else "without an identifier" + msg = ( + f"Slot '{slot.name}' has following illogic or incomplete inlining " + + f"specification: `inlined: {slot.inlined}` and `inlined_as_list: " + + f"{slot.inlined_as_list}` for objects {text_identifier}" + ) + logger.info(msg) + + function_map = { + # OK + (True, True, True): debug_output, + # OK + (True, True, False): debug_output, + # what type of inlining to use? + (True, True, None): info, + # overriding specified value!! + (True, False, True): set_inlined_and_warn, + # why specifying inlining type if no inlining? + (True, False, False): info, + # OK + (True, False, None): debug_output, + # applying implicit default!! + (True, None, True): set_inlined_and_report, + # why specifying inlining type if inlining not requested? + (True, None, False): info, + # no defaults, in-code implicit defaults will apply + (True, None, None): info, + # OK + (False, True, True): debug_output, + # how to select a key for an object without an identifier? + (False, True, False): info, + # no defaults, in-code implicit defaults will apply + (False, True, None): info, + # how to add a reference to an object without an identifier? + (False, False, True): info, + # how to add a reference to an object without an identifier? + (False, False, False): info, + # how to add a reference to an object without an identifier? + (False, False, None): info, + # applying implicit default!! + (False, None, True): set_inlined_and_report, + # why specifying inlining type if inlining not requested? + (False, None, False): info, + # no defaults, in-code implicit defaults will apply + (False, None, None): info, + } + + def dispatch(range_class_has_identifier, inlined, inlined_as_list): + # func = function_map.get((range_class_has_identifier, inlined, inlined_as_list), default_function) + func = function_map.get((range_class_has_identifier, inlined, inlined_as_list)) + return func(range_class_has_identifier) + + return dispatch + + +def process(slot: SlotDefinition, schema_map: dict[str, SchemaDefinition], logger: Logger) -> (bool, bool): + """ + Processing the inlining behavior of a slot, including the type of inlining + (as a list or as a dictionary). + + Processing encompasses analyzing the combination of elements relevant for + object inlining (reporting the result of the analysis with different logging + levels) and enforcing certain values. + + It is important to take into account following: + - slot.inlined and slot.inlined_as_list can have three different values: + True, False or None (if nothing specified in the schema) + - if a class has an identifier is a pure boolean + + Changes to `inlined` are applied directly on the provided slot object. + + :param slot: the slot to process + :param schema: the schema in which the slot is contained + :param logger: the logger to use + """ + + fixed_slot = deepcopy(slot) + # first of all, validate that the values of `inlined` and `inlined_as_list` are legal + # either `True` or `False` (if specified) or `None` (if nothing specified) + for value in ("inlined", "inlined_as_list"): + if getattr(fixed_slot, value) not in (True, False, None): + raise ValueError( + f"Invalid value for '{value}' in the schema for slot " + + f"'{fixed_slot.name}': '{getattr(fixed_slot, value)}'" + ) + range_class = None + for schema in schema_map.values(): + if cast(ClassDefinitionName, fixed_slot.range) in schema.classes: + range_class = schema.classes[cast(ClassDefinitionName, fixed_slot.range)] + break + # range is a type + if range_class is None: + return (None, None) + range_has_identifier = False + for sn in range_class.slots: + for schema in schema_map.values(): + if sn in schema.slots: + range_has_identifier = bool(schema.slots[sn].identifier or schema.slots[sn].key) + break + else: + continue + break + + dispatcher = _create_function_dispatcher(fixed_slot, logger) + dispatcher(range_has_identifier, fixed_slot.inlined, fixed_slot.inlined_as_list) + + return (fixed_slot.inlined, fixed_slot.inlined_as_list) + + +def is_inlined(slot: SlotDefinition, schema_map: dict[str, SchemaDefinition], logger: Logger) -> (bool, bool): + return bool(process(slot, schema_map, logger)[0]) + + +def is_inlined_as_list(slot: SlotDefinition, schema_map: dict[str, SchemaDefinition], logger: Logger) -> (bool, bool): + return bool(process(slot, schema_map, logger)[1]) diff --git a/linkml_runtime/utils/schemaview.py b/linkml_runtime/utils/schemaview.py index 70ee1c00..e2941c78 100644 --- a/linkml_runtime/utils/schemaview.py +++ b/linkml_runtime/utils/schemaview.py @@ -39,6 +39,7 @@ TypeDefinition, TypeDefinitionName, ) +from linkml_runtime.processing import inlining from linkml_runtime.utils.context_utils import map_import, parse_import_map from linkml_runtime.utils.formatutils import camelcase, is_empty, sfx, underscore from linkml_runtime.utils.namespaces import Namespaces @@ -1474,6 +1475,7 @@ def induced_slot( :param imports: include imports closure :return: dynamic slot constructed by inference """ + # print(f"inducing {slot_name}") if class_name: cls = self.get_class(class_name, imports, strict=True) else: @@ -1549,8 +1551,7 @@ def induced_slot( v = self.schema.default_range if v is not None: setattr(induced_slot, metaslot_name, v) - if slot.inlined_as_list: - slot.inlined = True + slot.inlined, slot.inlined_as_list = inlining.process(induced_slot, self.schema_map, logger) if slot.identifier or slot.key: slot.required = True if mangle_name: @@ -1679,16 +1680,27 @@ def is_inlined(self, slot: SlotDefinition, imports: bool = True) -> bool: range = slot.range if range in self.all_classes(): if slot.inlined or slot.inlined_as_list: + # print(f"is_inlined({slot.name}) -> True") return True id_slot = self.get_identifier_slot(range, imports=imports) if id_slot is None: # must be inlined as has no identifier + # print(f"is_inlined({slot.name}) -> True") return True # not explicitly declared inline and has an identifier: assume is ref, not inlined + # print(f"is_inlined({slot.name}) -> False") return False + # print(f"is_inlined({slot.name}) -> False") return False + result = inlining.is_inlined(slot, self.schema_map, logger) + print(f"is_inlined({slot.name}) -> {result}") + # if slot.name == "a_thing_without_id": + # result = True + return result + return inlining.is_inlined(slot, self.schema_map, logger) + def slot_applicable_range_elements(self, slot: SlotDefinition) -> list[ClassDefinitionName]: """Retrieve all applicable metamodel elements for a slot range. @@ -2043,13 +2055,7 @@ def materialize_derived_schema(self) -> SchemaDefinition: if metaslot_val is not None: setattr(slot, metaslot, metaslot_val) slot_range_pk_slot_name = None - if isinstance(slot_range_element, ClassDefinition): - slot_range_pk_slot_name = self.get_identifier_slot(slot_range_element.name, use_key=True) - if not slot_range_pk_slot_name: - slot.inlined = True - slot.inlined_as_list = True - if slot.inlined_as_list: - slot.inlined = True + slot.inlined, slot.inlined_as_list = inlining.process(slot, self.schema_map, logger) if slot.identifier or slot.key: slot.required = True cls.attributes[slot.name] = slot diff --git a/tests/test_processing/test_inlining_processor.py b/tests/test_processing/test_inlining_processor.py new file mode 100644 index 00000000..8231a349 --- /dev/null +++ b/tests/test_processing/test_inlining_processor.py @@ -0,0 +1,180 @@ +import logging + +import pytest + +from linkml_runtime.linkml_model.meta import ( + ClassDefinition, + SlotDefinition, +) +from linkml_runtime.processing import inlining +from linkml_runtime.utils.schema_builder import SchemaBuilder + + +def prepare_schema(with_identifier, inlined, inlined_as_list): + builder = SchemaBuilder() + + id = SlotDefinition(name="id", identifier=True) + builder.add_slot(id) + + range_class = ClassDefinition(name="RangeClass") + if with_identifier: + range_class.slots = ["id"] + builder.add_class(range_class) + + slot = SlotDefinition(name="slot_under_test", range=range_class.name) + if isinstance(inlined, bool): + slot.inlined = inlined + if isinstance(inlined_as_list, bool): + slot.inlined_as_list = inlined_as_list + builder.add_slot(slot) + + slot_type = SlotDefinition(name="slot_with_type", range="int") + if isinstance(inlined, bool): + slot_type.inlined = inlined + if isinstance(inlined_as_list, bool): + slot_type.inlined_as_list = inlined_as_list + builder.add_slot(slot_type) + + return (slot, slot_type, {"schema": builder.schema}) + + +@pytest.mark.parametrize( + ("with_identifier", "inlined", "inlined_as_list", "expected_inlined", "expected_inlined_as_list"), + [ + (True, True, True, True, True), + (True, True, False, True, False), + (True, False, None, False, None), + (False, True, True, True, True), + ], +) +def test_report_ok(with_identifier, inlined, inlined_as_list, expected_inlined, expected_inlined_as_list, caplog): + """Test that combinations that are clear an unproblematic only generate debug output.""" + logger = logging.getLogger("Test") + caplog.set_level(logging.DEBUG) + + slot, _, schema_map = prepare_schema(with_identifier, inlined, inlined_as_list) + fixed_inlined, fixed_inlined_as_list = inlining.process(slot, schema_map, logger) + assert fixed_inlined == expected_inlined + assert fixed_inlined_as_list == expected_inlined_as_list + for logrecord in caplog.records: + assert logrecord.levelname == "DEBUG" + assert " complete inlining specification" in logrecord.message + + +@pytest.mark.parametrize( + ("with_identifier", "inlined", "inlined_as_list", "expected_inlined_as_list"), + [ + # overriding specified `inlined: false` with `inlined: true`!! + (True, False, True, True), + ], +) +def test_force_inlined(with_identifier, inlined, inlined_as_list, expected_inlined_as_list, caplog): + """Test that combinations that end up forcing `inlined: true` does so and generate a warning.""" + logger = logging.getLogger("Test") + caplog.set_level(logging.WARNING) + + slot, _, schema_map = prepare_schema(with_identifier, inlined, inlined_as_list) + fixed_inlined, fixed_inlined_as_list = inlining.process(slot, schema_map, logger) + assert fixed_inlined + assert fixed_inlined_as_list == expected_inlined_as_list + for logrecord in caplog.records: + assert logrecord.levelname == "WARNING" + assert "Forcing `inlined: true`!!" in logrecord.message + + +@pytest.mark.parametrize( + ("with_identifier", "inlined", "inlined_as_list", "expected_inlined_as_list"), + [ + # applying implicit default!! + (True, None, True, True), + # applying implicit default!! + (False, None, True, True), + ], +) +def test_default_inlined(with_identifier, inlined, inlined_as_list, expected_inlined_as_list, caplog): + """Test that combinations that end up forcing `inlined: true` does so and generate a warning.""" + logger = logging.getLogger("Test") + caplog.set_level(logging.INFO) + + slot, _, schema_map = prepare_schema(with_identifier, inlined, inlined_as_list) + fixed_inlined, fixed_inlined_as_list = inlining.process(slot, schema_map, logger) + assert fixed_inlined + assert fixed_inlined_as_list == expected_inlined_as_list + for logrecord in caplog.records: + assert logrecord.levelname == "INFO" + assert "Forcing `inlined: true`!!" in logrecord.message + + +@pytest.mark.parametrize( + ("with_identifier", "inlined", "inlined_as_list", "expected_inlined", "expected_inlined_as_list"), + [ + # what type of inlining to use? + (True, True, None, True, None), + # why specifying inlining type if no inlining? + (True, False, False, False, False), + # why specifying inlining type if inlining not requested? + (True, None, False, None, False), + # no defaults, in-code implicit defaults will apply + (True, None, None, None, None), + # how to select a key for an object without an identifier? + (False, True, False, True, False), + # no defaults, in-code implicit defaults will apply + (False, True, None, True, None), + # how to add a reference to an object without an identifier? + (False, False, True, False, True), + # how to add a reference to an object without an identifier? + (False, False, False, False, False), + # how to add a reference to an object without an identifier? + (False, False, None, False, None), + # why specifying inlining type if inlining not requested? + (False, None, False, None, False), + # no defaults, in-code implicit defaults will apply + (False, None, None, None, None), + ], +) +def test_info_inconsistencies( + with_identifier, inlined, inlined_as_list, expected_inlined, expected_inlined_as_list, caplog +): + """Test that combinations that are somehow illogical or incomplete are reported.""" + logger = logging.getLogger("Test") + caplog.set_level(logging.INFO) + + slot, _, schema_map = prepare_schema(with_identifier, inlined, inlined_as_list) + fixed_inlined, fixed_inlined_as_list = inlining.process(slot, schema_map, logger) + assert fixed_inlined == expected_inlined + assert fixed_inlined_as_list == expected_inlined_as_list + for logrecord in caplog.records: + assert logrecord.levelname == "INFO" + assert "illogic or incomplete inlining specification" in logrecord.message + + +@pytest.mark.parametrize( + ("with_identifier", "inlined", "inlined_as_list"), + [ + (True, True, True), + (True, True, False), + (True, True, None), + (True, False, True), + (True, False, False), + (True, False, None), + (True, None, True), + (True, None, False), + (True, None, None), + (False, True, True), + (False, True, False), + (False, True, None), + (False, False, True), + (False, False, False), + (False, False, None), + (False, None, True), + (False, None, False), + (False, None, None), + ], +) +def test_slot_type(with_identifier, inlined, inlined_as_list): + """Test that slots with a type as range returns (None, None).""" + logger = logging.getLogger("Test") + _, slot, schema_map = prepare_schema(with_identifier, inlined, inlined_as_list) + fixed_inlined, fixed_inlined_as_list = inlining.process(slot, schema_map, logger) + assert fixed_inlined is None + assert fixed_inlined_as_list is None