Skip to content

Commit 3d1ecfd

Browse files
authored
Merge pull request linkml#155 from linkml/dmbl_importer
add stub of a dbml importer
2 parents 2ae9ba2 + 3b952f0 commit 3d1ecfd

File tree

5 files changed

+193
-8
lines changed

5 files changed

+193
-8
lines changed

docs/packages/importers.rst

+7
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,13 @@ NCI implements a JSON serialization of ISO-11197. You can import this JSON and c
7777
schemauto import-cadsr "cdes/*.json"
7878
7979
80+
Importing from DBML
81+
--------------------
82+
83+
DBML is a simple DSL for defining database schemas. It is a subset of SQL DDL.
84+
85+
86+
8087
Packages for importing
8188
----------------------
8289

poetry.lock

+18-8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ click-default-group = "^1.2.4"
5353
linkml-runtime = "^1.7.2"
5454
duckdb = "^0.10.1"
5555
numpy = "<2.0"
56+
pydbml = "^1.1.2"
5657

5758
[tool.poetry.dev-dependencies]
5859
pytest = ">=7.1.1"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
from schema_automator.importers.import_engine import ImportEngine
2+
from pydbml import PyDBML
3+
from linkml_runtime.linkml_model import SchemaDefinition, ClassDefinition, SlotDefinition
4+
from dataclasses import dataclass
5+
6+
7+
def _map_dbml_type_to_linkml(dbml_type: str) -> str:
8+
"""
9+
Maps DBML data types to LinkML types.
10+
11+
:param dbml_type: The DBML column type.
12+
:return: Corresponding LinkML type.
13+
"""
14+
type_mapping = {
15+
"int": "integer",
16+
"varchar": "string",
17+
"text": "string",
18+
"float": "float",
19+
"boolean": "boolean",
20+
"date": "date",
21+
"datetime": "datetime",
22+
}
23+
return type_mapping.get(dbml_type.lower(), "string")
24+
25+
26+
@dataclass
27+
class DbmlImportEngine(ImportEngine):
28+
"""
29+
An ImportEngine that introspects a DBML schema to determine a corresponding LinkML schema.
30+
"""
31+
32+
def convert(
33+
self,
34+
file: str,
35+
name: str = None,
36+
model_uri: str = None,
37+
identifier: str = None,
38+
**kwargs
39+
) -> SchemaDefinition:
40+
"""
41+
Converts a DBML schema file into a LinkML SchemaDefinition.
42+
43+
:param file: Path to the DBML schema file.
44+
:param name: Optional name for the generated LinkML schema.
45+
:param model_uri: Optional URI for the schema.
46+
:param identifier: Identifier field for the schema.
47+
:return: SchemaDefinition object representing the DBML schema.
48+
"""
49+
# Initialize the schema definition
50+
schema_name = name or "GeneratedSchema"
51+
schema = SchemaDefinition(name=schema_name, id=model_uri or f"https://example.org/{schema_name}")
52+
53+
# Parse the DBML file
54+
with open(file, 'r', encoding='utf-8') as f:
55+
dbml_content = f.read()
56+
parsed_dbml = PyDBML(dbml_content)
57+
58+
# Process tables
59+
for table in parsed_dbml.tables:
60+
class_def = ClassDefinition(
61+
name=table.name,
62+
description=table.note or f"Auto-generated class for table '{table.name}'",
63+
slots=[],
64+
unique_keys=[], # Initialize unique keys property
65+
)
66+
processed_slots = set() # Track processed slot names to avoid duplicates
67+
68+
# Handle primary key and unique constraints
69+
primary_key_columns = [col for col in table.columns if col.pk]
70+
unique_columns = [col for col in table.columns if col.unique and not col.pk]
71+
72+
# Process columns
73+
for column in table.columns:
74+
75+
slot_name = column.name
76+
slot_def = SlotDefinition(
77+
name=slot_name,
78+
range=_map_dbml_type_to_linkml(column.type),
79+
description=column.note or f"Column '{slot_name}'",
80+
required=column in primary_key_columns or column.unique,
81+
identifier=column in primary_key_columns, # Mark primary key columns as identifiers
82+
)
83+
schema.slots[slot_name] = slot_def
84+
class_def.slots.append(slot_name)
85+
processed_slots.add(slot_name)
86+
87+
# Handle single unique column as primary key if no explicit primary key exists
88+
if not primary_key_columns and len(unique_columns) == 1:
89+
unique_column = unique_columns[0]
90+
schema.slots[unique_column.name].identifier = True
91+
schema.slots[unique_column.name].required = True
92+
93+
schema.classes[table.name] = class_def
94+
95+
return schema
+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import pytest
2+
from linkml_runtime.linkml_model import SchemaDefinition
3+
from schema_automator.importers.dbml_import_engine import DbmlImportEngine
4+
5+
# Sample DBML content for testing
6+
DBML_SAMPLE = """
7+
Table Users {
8+
id int [primary key, not null]
9+
email varchar [unique, not null]
10+
username varchar
11+
}
12+
13+
Table Orders {
14+
order_id int [not null]
15+
user_id int [not null]
16+
product_id int [not null]
17+
quantity int
18+
}
19+
20+
Table Countries {
21+
code varchar [primary key, not null]
22+
name varchar [not null]
23+
}
24+
"""
25+
26+
@pytest.fixture
27+
def dbml_file(tmp_path):
28+
"""
29+
Fixture to create a temporary DBML file.
30+
"""
31+
dbml_path = tmp_path / "test.dbml"
32+
dbml_path.write_text(DBML_SAMPLE)
33+
print(dbml_path)
34+
return dbml_path
35+
36+
@pytest.fixture
37+
def importer():
38+
"""
39+
Fixture to initialize the DbmlImportEngine.
40+
"""
41+
return DbmlImportEngine()
42+
43+
def test_dbml_to_linkml_conversion(dbml_file, importer):
44+
"""
45+
Test the basic conversion of DBML to a LinkML schema.
46+
"""
47+
schema = importer.convert(file=str(dbml_file), name="TestSchema")
48+
49+
# Assert the schema object is created
50+
assert isinstance(schema, SchemaDefinition)
51+
52+
# Check that expected classes are present
53+
assert "Users" in schema.classes
54+
assert "Orders" in schema.classes
55+
56+
# Check that expected slots are present
57+
assert "id" in schema.slots
58+
assert schema.slots["id"].identifier
59+
assert schema.slots["id"].required
60+
61+
62+
def test_primary_key_handling(dbml_file, importer):
63+
"""
64+
Test correct handling of primary keys and required attributes.
65+
"""
66+
schema = importer.convert(file=str(dbml_file), name="TestSchema")
67+
68+
# Check that primary keys are marked as required and identifiers
69+
users_class = schema.classes["Users"]
70+
assert "id" in users_class.slots
71+
assert schema.slots["id"].identifier
72+
assert schema.slots["id"].required

0 commit comments

Comments
 (0)