|
| 1 | +from schema_automator.importers.import_engine import ImportEngine |
| 2 | +from pydbml import PyDBML |
| 3 | +from linkml_runtime.linkml_model import SchemaDefinition, ClassDefinition, SlotDefinition |
| 4 | +from dataclasses import dataclass |
| 5 | + |
| 6 | + |
| 7 | +def _map_dbml_type_to_linkml(dbml_type: str) -> str: |
| 8 | + """ |
| 9 | + Maps DBML data types to LinkML types. |
| 10 | +
|
| 11 | + :param dbml_type: The DBML column type. |
| 12 | + :return: Corresponding LinkML type. |
| 13 | + """ |
| 14 | + type_mapping = { |
| 15 | + "int": "integer", |
| 16 | + "varchar": "string", |
| 17 | + "text": "string", |
| 18 | + "float": "float", |
| 19 | + "boolean": "boolean", |
| 20 | + "date": "date", |
| 21 | + "datetime": "datetime", |
| 22 | + } |
| 23 | + return type_mapping.get(dbml_type.lower(), "string") |
| 24 | + |
| 25 | + |
| 26 | +@dataclass |
| 27 | +class DbmlImportEngine(ImportEngine): |
| 28 | + """ |
| 29 | + An ImportEngine that introspects a DBML schema to determine a corresponding LinkML schema. |
| 30 | + """ |
| 31 | + |
| 32 | + def convert( |
| 33 | + self, |
| 34 | + file: str, |
| 35 | + name: str = None, |
| 36 | + model_uri: str = None, |
| 37 | + identifier: str = None, |
| 38 | + **kwargs |
| 39 | + ) -> SchemaDefinition: |
| 40 | + """ |
| 41 | + Converts a DBML schema file into a LinkML SchemaDefinition. |
| 42 | +
|
| 43 | + :param file: Path to the DBML schema file. |
| 44 | + :param name: Optional name for the generated LinkML schema. |
| 45 | + :param model_uri: Optional URI for the schema. |
| 46 | + :param identifier: Identifier field for the schema. |
| 47 | + :return: SchemaDefinition object representing the DBML schema. |
| 48 | + """ |
| 49 | + # Initialize the schema definition |
| 50 | + schema_name = name or "GeneratedSchema" |
| 51 | + schema = SchemaDefinition(name=schema_name, id=model_uri or f"https://example.org/{schema_name}") |
| 52 | + |
| 53 | + # Parse the DBML file |
| 54 | + with open(file, 'r', encoding='utf-8') as f: |
| 55 | + dbml_content = f.read() |
| 56 | + parsed_dbml = PyDBML(dbml_content) |
| 57 | + |
| 58 | + # Process tables |
| 59 | + for table in parsed_dbml.tables: |
| 60 | + class_def = ClassDefinition( |
| 61 | + name=table.name, |
| 62 | + description=table.note or f"Auto-generated class for table '{table.name}'", |
| 63 | + slots=[], |
| 64 | + unique_keys=[], # Initialize unique keys property |
| 65 | + ) |
| 66 | + processed_slots = set() # Track processed slot names to avoid duplicates |
| 67 | + |
| 68 | + # Handle primary key and unique constraints |
| 69 | + primary_key_columns = [col for col in table.columns if col.pk] |
| 70 | + unique_columns = [col for col in table.columns if col.unique and not col.pk] |
| 71 | + |
| 72 | + # Process columns |
| 73 | + for column in table.columns: |
| 74 | + |
| 75 | + slot_name = column.name |
| 76 | + slot_def = SlotDefinition( |
| 77 | + name=slot_name, |
| 78 | + range=_map_dbml_type_to_linkml(column.type), |
| 79 | + description=column.note or f"Column '{slot_name}'", |
| 80 | + required=column in primary_key_columns or column.unique, |
| 81 | + identifier=column in primary_key_columns, # Mark primary key columns as identifiers |
| 82 | + ) |
| 83 | + schema.slots[slot_name] = slot_def |
| 84 | + class_def.slots.append(slot_name) |
| 85 | + processed_slots.add(slot_name) |
| 86 | + |
| 87 | + # Handle single unique column as primary key if no explicit primary key exists |
| 88 | + if not primary_key_columns and len(unique_columns) == 1: |
| 89 | + unique_column = unique_columns[0] |
| 90 | + schema.slots[unique_column.name].identifier = True |
| 91 | + schema.slots[unique_column.name].required = True |
| 92 | + |
| 93 | + schema.classes[table.name] = class_def |
| 94 | + |
| 95 | + return schema |
0 commit comments