-
Notifications
You must be signed in to change notification settings - Fork 1
/
README.html
5923 lines (5381 loc) · 290 KB
/
README.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Semantic bSDD</title>
<meta name="author" content="Vladimir Alexiev, Mihail Radkov, Nataliya Keberle" />
<meta name="keywords" content="Linked building data, LBD, buildingSMART Data Dictionary, bSDD, FAIR data, data quality" />
<meta name="generator" content="Org Mode" />
<style>
#content { max-width: 60em; margin: auto; }
.title { text-align: center;
margin-bottom: .2em; }
.subtitle { text-align: center;
font-size: medium;
font-weight: bold;
margin-top:0; }
.todo { font-family: monospace; color: red; }
.done { font-family: monospace; color: green; }
.priority { font-family: monospace; color: orange; }
.tag { background-color: #eee; font-family: monospace;
padding: 2px; font-size: 80%; font-weight: normal; }
.timestamp { color: #bebebe; }
.timestamp-kwd { color: #5f9ea0; }
.org-right { margin-left: auto; margin-right: 0px; text-align: right; }
.org-left { margin-left: 0px; margin-right: auto; text-align: left; }
.org-center { margin-left: auto; margin-right: auto; text-align: center; }
.underline { text-decoration: underline; }
#postamble p, #preamble p { font-size: 90%; margin: .2em; }
p.verse { margin-left: 3%; }
pre {
border: 1px solid #e6e6e6;
border-radius: 3px;
background-color: #f2f2f2;
padding: 8pt;
font-family: monospace;
overflow: auto;
margin: 1.2em;
}
pre.src {
position: relative;
overflow: auto;
}
pre.src:before {
display: none;
position: absolute;
top: -8px;
right: 12px;
padding: 3px;
color: #555;
background-color: #f2f2f299;
}
pre.src:hover:before { display: inline; margin-top: 14px;}
/* Languages per Org manual */
pre.src-asymptote:before { content: 'Asymptote'; }
pre.src-awk:before { content: 'Awk'; }
pre.src-authinfo::before { content: 'Authinfo'; }
pre.src-C:before { content: 'C'; }
/* pre.src-C++ doesn't work in CSS */
pre.src-clojure:before { content: 'Clojure'; }
pre.src-css:before { content: 'CSS'; }
pre.src-D:before { content: 'D'; }
pre.src-ditaa:before { content: 'ditaa'; }
pre.src-dot:before { content: 'Graphviz'; }
pre.src-calc:before { content: 'Emacs Calc'; }
pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
pre.src-fortran:before { content: 'Fortran'; }
pre.src-gnuplot:before { content: 'gnuplot'; }
pre.src-haskell:before { content: 'Haskell'; }
pre.src-hledger:before { content: 'hledger'; }
pre.src-java:before { content: 'Java'; }
pre.src-js:before { content: 'Javascript'; }
pre.src-latex:before { content: 'LaTeX'; }
pre.src-ledger:before { content: 'Ledger'; }
pre.src-lisp:before { content: 'Lisp'; }
pre.src-lilypond:before { content: 'Lilypond'; }
pre.src-lua:before { content: 'Lua'; }
pre.src-matlab:before { content: 'MATLAB'; }
pre.src-mscgen:before { content: 'Mscgen'; }
pre.src-ocaml:before { content: 'Objective Caml'; }
pre.src-octave:before { content: 'Octave'; }
pre.src-org:before { content: 'Org mode'; }
pre.src-oz:before { content: 'OZ'; }
pre.src-plantuml:before { content: 'Plantuml'; }
pre.src-processing:before { content: 'Processing.js'; }
pre.src-python:before { content: 'Python'; }
pre.src-R:before { content: 'R'; }
pre.src-ruby:before { content: 'Ruby'; }
pre.src-sass:before { content: 'Sass'; }
pre.src-scheme:before { content: 'Scheme'; }
pre.src-screen:before { content: 'Gnu Screen'; }
pre.src-sed:before { content: 'Sed'; }
pre.src-sh:before { content: 'shell'; }
pre.src-sql:before { content: 'SQL'; }
pre.src-sqlite:before { content: 'SQLite'; }
/* additional languages in org.el's org-babel-load-languages alist */
pre.src-forth:before { content: 'Forth'; }
pre.src-io:before { content: 'IO'; }
pre.src-J:before { content: 'J'; }
pre.src-makefile:before { content: 'Makefile'; }
pre.src-maxima:before { content: 'Maxima'; }
pre.src-perl:before { content: 'Perl'; }
pre.src-picolisp:before { content: 'Pico Lisp'; }
pre.src-scala:before { content: 'Scala'; }
pre.src-shell:before { content: 'Shell Script'; }
pre.src-ebnf2ps:before { content: 'ebfn2ps'; }
/* additional language identifiers per "defun org-babel-execute"
in ob-*.el */
pre.src-cpp:before { content: 'C++'; }
pre.src-abc:before { content: 'ABC'; }
pre.src-coq:before { content: 'Coq'; }
pre.src-groovy:before { content: 'Groovy'; }
/* additional language identifiers from org-babel-shell-names in
ob-shell.el: ob-shell is the only babel language using a lambda to put
the execution function name together. */
pre.src-bash:before { content: 'bash'; }
pre.src-csh:before { content: 'csh'; }
pre.src-ash:before { content: 'ash'; }
pre.src-dash:before { content: 'dash'; }
pre.src-ksh:before { content: 'ksh'; }
pre.src-mksh:before { content: 'mksh'; }
pre.src-posh:before { content: 'posh'; }
/* Additional Emacs modes also supported by the LaTeX listings package */
pre.src-ada:before { content: 'Ada'; }
pre.src-asm:before { content: 'Assembler'; }
pre.src-caml:before { content: 'Caml'; }
pre.src-delphi:before { content: 'Delphi'; }
pre.src-html:before { content: 'HTML'; }
pre.src-idl:before { content: 'IDL'; }
pre.src-mercury:before { content: 'Mercury'; }
pre.src-metapost:before { content: 'MetaPost'; }
pre.src-modula-2:before { content: 'Modula-2'; }
pre.src-pascal:before { content: 'Pascal'; }
pre.src-ps:before { content: 'PostScript'; }
pre.src-prolog:before { content: 'Prolog'; }
pre.src-simula:before { content: 'Simula'; }
pre.src-tcl:before { content: 'tcl'; }
pre.src-tex:before { content: 'TeX'; }
pre.src-plain-tex:before { content: 'Plain TeX'; }
pre.src-verilog:before { content: 'Verilog'; }
pre.src-vhdl:before { content: 'VHDL'; }
pre.src-xml:before { content: 'XML'; }
pre.src-nxml:before { content: 'XML'; }
/* add a generic configuration mode; LaTeX export needs an additional
(add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */
pre.src-conf:before { content: 'Configuration File'; }
table { border-collapse:collapse; }
caption.t-above { caption-side: top; }
caption.t-bottom { caption-side: bottom; }
td, th { vertical-align:top; }
th.org-right { text-align: center; }
th.org-left { text-align: center; }
th.org-center { text-align: center; }
td.org-right { text-align: right; }
td.org-left { text-align: left; }
td.org-center { text-align: center; }
dt { font-weight: bold; }
.footpara { display: inline; }
.footdef { margin-bottom: 1em; }
.figure { padding: 1em; }
.figure p { text-align: center; }
.equation-container {
display: table;
text-align: center;
width: 100%;
}
.equation {
vertical-align: middle;
}
.equation-label {
display: table-cell;
text-align: right;
vertical-align: middle;
}
.inlinetask {
padding: 10px;
border: 2px solid gray;
margin: 10px;
background: #ffffcc;
}
#org-div-home-and-up
{ text-align: right; font-size: 70%; white-space: nowrap; }
textarea { overflow-x: auto; }
.linenr { font-size: smaller }
.code-highlighted { background-color: #ffff00; }
.org-info-js_info-navigation { border-style: none; }
#org-info-js_console-label
{ font-size: 10px; font-weight: bold; white-space: nowrap; }
.org-info-js_search-highlight
{ background-color: #ffff00; color: #000000; font-weight: bold; }
.org-svg { }
</style>
<style> #content {max-width: 120em; margin: auto} h1,h2,h3,h4,h5,h6,h7 {font-family: Arial} .author, .date, .creator {-webkit-margin-before: 0em; -webkit-margin-after: 0em} .abstract {margin: 1em; padding: 1em; border: 1px solid black} .abstract:before {content: 'Abstract: '; font-weight: bold} #preamble p {font-size: 110%%; margin-left: auto; margin-right: auto; text-align: center} th.org-left {text-align:left} th.org-right {text-align:right} th.org-center {text-align:center} th, td {line-height: 1em; border-width: 1px; border-style: solid solid; border-spacing: 2px 2px; padding: 2px 1px} .CANCELED {color: blue} .MAYBE {color: blue} .POSTPONED {color: blue} .SAME {color: blue} .APPLIED {color: orange} .FOLLOW {color: orange} .INPROGRESS {color: orange} .NEXT {color: orange} .IER {color: orange}.underline {text-decoration:underline} </style>
<link rel="icon" type="image/x-icon" href="img/favicon.ico">
</head>
<body>
<div id="preamble" class="status">
<p class='author'>Author: Vladimir Alexiev, Mihail Radkov, Nataliya Keberle<br/>(<a href="mailto:[email protected]">[email protected]</a>)</p><p class='date'>Date: 2023-05-25</p><p><img src='./img/SemBSDD-Logo-400px.png'/></p>
</div>
<div id="content" class="content">
<h1 class="title">Semantic bSDD
<br />
<span class="subtitle">Improving the GraphQL, JSON and RDF Representations of buildingSmart Data Dictionary</span>
</h1>
<div id="table-of-contents" role="doc-toc">
<h2>Table of Contents</h2>
<div id="text-table-of-contents" role="doc-toc">
<ul>
<li><a href="#abstract">1. Abstract</a></li>
<li><a href="#introduction">2. Introduction</a>
<ul>
<li><a href="#bsdd">2.1. bSDD</a></li>
<li><a href="#graphql-benefits">2.2. GraphQL Benefits</a></li>
<li><a href="#original-graphql-bsdd-schema-voyager">2.3. Original GraphQL bSDD Schema: Voyager</a></li>
<li><a href="#original-graphql-bsdd-schema-problems">2.4. Original GraphQL bSDD Schema: Problems</a></li>
<li><a href="#refactored-graphql-bsdd-schema-voyager">2.5. Refactored GraphQL bSDD Schema: Voyager</a></li>
<li><a href="#refactored-bsdd-schema-plantuml-overview-diagram">2.6. Refactored bSDD Schema: PlantUML Overview Diagram</a></li>
<li><a href="#refactored-bsdd-schema-plantuml-full-diagram">2.7. Refactored bSDD Schema: PlantUML Full Diagram</a></li>
<li><a href="#graphiql-querying-of-original-endpoint">2.8. GraphiQL Querying of Original Endpoint</a></li>
<li><a href="#graphiql-querying-of-refactored-endpoint">2.9. GraphiQL Querying of Refactored Endpoint</a></li>
<li><a href="#files">2.10. Files</a></li>
<li><a href="#endpoints-and-pages">2.11. Endpoints and Pages</a></li>
</ul>
</li>
<li><a href="#original-bsdd-data">3. Original bSDD Data</a>
<ul>
<li><a href="#getting-bsdd-data-dumps">3.1. Getting bSDD Data Dumps</a></li>
<li><a href="#statistics">3.2. Statistics</a>
<ul>
<li><a href="#total-entities">3.2.1. Total Entities</a></li>
<li><a href="#classifications-per-domain">3.2.2. Classifications Per Domain</a></li>
<li><a href="#fields-used">3.2.3. Fields Used</a></li>
<li><a href="#fields-used-in-classification">3.2.4. Fields Used in Classification</a></li>
<li><a href="#fields-used-in-classificationproperty">3.2.5. Fields Used in ClassificationProperty</a></li>
<li><a href="#fields-used-in-property">3.2.6. Fields Used in Property</a></li>
<li><a href="#fields-used-in-propertyvalue">3.2.7. Fields Used in PropertyValue</a></li>
</ul>
</li>
<li><a href="#key-fields">3.3. Key Fields</a>
<ul>
<li><a href="#classificationtype">3.3.1. ClassificationType</a></li>
<li><a href="#propertyvaluekind-of-classificationproperty">3.3.2. PropertyValueKind of ClassificationProperty</a></li>
<li><a href="#propertyvaluekind-of-property">3.3.3. PropertyValueKind of Property</a></li>
<li><a href="#status">3.3.4. Status</a></li>
<li><a href="#dynamic-properties">3.3.5. Dynamic Properties</a></li>
<li><a href="#iswritable-property">3.3.6. isWritable Property</a></li>
<li><a href="#isrequired-property">3.3.7. isRequired Property</a></li>
<li><a href="#domains-with-iswritable-and-isrequired-properties">3.3.8. Domains with isWritable and isRequired Properties</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#suggested-improvements">4. Suggested Improvements</a>
<ul>
<li><a href="#return-the-same-data-across-apis">4.1. Return the Same Data Across APIs</a></li>
<li><a href="#improve-property-names">4.2. Improve Property Names</a></li>
<li><a href="#use-the-same-url-for-data-and-for-web-pages">4.3. Use the Same URL for Data and for Web Pages</a></li>
<li><a href="#time-dependent-search-uris">4.4. Time-Dependent search URIs</a></li>
<li><a href="#improve-url-structure-and-consistency">4.5. Improve URL Structure and Consistency</a>
<ul>
<li><a href="#explicate-domain-versions">4.5.1. Explicate Domain Versions</a></li>
<li><a href="#declare-urls-to-be-id-and-use-a-mandatory-field-id">4.5.2. Declare URLs to be <code>ID</code> and Use a Mandatory Field <code>id</code></a></li>
<li><a href="#overlap-of-entity-classes-with-classificationtype">4.5.3. Overlap of Entity Classes with <code>classificationType</code></a></li>
<li><a href="#property-vs-classificationproperty-use-distinct-urls">4.5.4. Property vs ClassificationProperty: Use Distinct URLs</a></li>
<li><a href="#all-entities-should-have-url">4.5.5. All Entities Should Have URL</a></li>
</ul>
</li>
<li><a href="#modeling-issues">4.6. Modeling Issues</a>
<ul>
<li><a href="#modeling-of-complex-properties">4.6.1. Modeling of Complex Properties</a></li>
<li><a href="#modeling-of-dynamic-properties">4.6.2. Modeling of Dynamic Properties</a></li>
<li><a href="#improve-relations-between-entities">4.6.3. Improve Relations Between Entities</a></li>
<li><a href="#add-more-entities">4.6.4. Add More Entities</a></li>
<li><a href="#use-class-inheritance">4.6.5. Use Class Inheritance</a></li>
<li><a href="#improve-description-of-classificationproperties">4.6.6. Improve Description of ClassificationProperties</a></li>
<li><a href="#improve-representation-of-propertyvalues">4.6.7. Improve Representation of PropertyValues</a></li>
<li><a href="#improve-representation-of-predefinedvalue">4.6.8. Improve Representation of predefinedValue</a></li>
</ul>
</li>
<li><a href="#improve-multilingual-support">4.7. Improve Multilingual Support</a>
<ul>
<li><a href="#self-describing-langstrings">4.7.1. Self-Describing langStrings</a></li>
<li><a href="#language-fallback">4.7.2. Language Fallback</a></li>
<li><a href="#use-language-content-negotiation">4.7.3. Use Language Content Negotiation</a></li>
<li><a href="#improve-lang-tags">4.7.4. Improve Lang Tags</a></li>
</ul>
</li>
<li><a href="#improve-rdf-structure">4.8. Improve RDF Structure</a></li>
<li><a href="#graphql-improvements">4.9. GraphQL Improvements</a>
<ul>
<li><a href="#searchability-and-pagination">4.9.1. Searchability and Pagination</a></li>
<li><a href="#eliminate-parallel-links-between-entities">4.9.2. Eliminate Parallel Links Between Entities</a></li>
<li><a href="#graphql-arrays-and-nullability">4.9.3. GraphQL Arrays and Nullability</a></li>
<li><a href="#null-classifications-error">4.9.4. Null Classifications Error</a></li>
<li><a href="#null-classification-childs-error">4.9.5. Null Classification Childs Error</a></li>
<li><a href="#null-classificationproperty-name-error">4.9.6. Null ClassificationProperty Name Error</a></li>
<li><a href="#missing-domains">4.9.7. Missing Domains</a></li>
<li><a href="#unexpected-multiple-values">4.9.8. Unexpected Multiple Values</a></li>
<li><a href="#deprecated-properties">4.9.9. Deprecated Properties</a></li>
</ul>
</li>
<li><a href="#data-quality-problems">4.10. Data Quality Problems</a>
<ul>
<li><a href="#trim-leading-trailing-consecutive-whitespace">4.10.1. Trim Leading, Trailing, Consecutive Whitespace</a></li>
<li><a href="#improve-physical-quantities-and-units">4.10.2. Improve Physical Quantities and Units</a></li>
<li><a href="#rules-about-missing-data">4.10.3. Rules About Missing Data</a></li>
<li><a href="#unicode-problems">4.10.4. Unicode Problems</a></li>
<li><a href="#unresolved-html-entities">4.10.5. Unresolved HTML Entities</a></li>
<li><a href="#bad-classification-relations">4.10.6. Bad Classification Relations</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#implementing-improvements">5. Implementing Improvements</a>
<ul>
<li><a href="#converting-json-to-raw-rdf-using-sparql-anything">5.1. Converting JSON to Raw RDF using SPARQL Anything</a>
<ul>
<li><a href="#raw-json-example">5.1.1. Raw JSON Example</a></li>
<li><a href="#raw-rdf-example">5.1.2. Raw RDF Example</a></li>
</ul>
</li>
<li><a href="#refactoring-rdf-using-sparql-update">5.2. Refactoring RDF using SPARQL Update</a>
<ul>
<li><a href="#original-rdf-example">5.2.1. Original RDF Example</a></li>
<li><a href="#refactored-rdf-example">5.2.2. Refactored RDF Example</a></li>
</ul>
</li>
<li><a href="#graphql-to-soml-and-back">5.3. GraphQL to SOML and Back</a></li>
<li><a href="#sample-queries">5.4. Sample Queries</a>
<ul>
<li><a href="#domains-with-lang-en-and-their-classifications">5.4.1. Domains with lang=EN and their Classifications</a></li>
<li><a href="#classificationproperties-in-ifc-class-ifcwall">5.4.2. ClassificationProperties in IFC class IfcWall</a></li>
<li><a href="#classifications-of-type-composed-property-and-their-constituent-properties">5.4.3. Classifications of Type COMPOSED_PROPERTY and their Constituent Properties</a></li>
<li><a href="#properties-that-have-connected-properties">5.4.4. Properties that Have Connected Properties</a></li>
<li><a href="#pagination">5.4.5. Pagination</a></li>
<li><a href="#classifications-with-relations">5.4.6. Classifications with Relations</a></li>
<li><a href="#relations-of-classifications">5.4.7. Relations of Classifications</a></li>
<li><a href="#length-properties">5.4.8. Length Properties</a></li>
</ul>
</li>
<li><a href="#graph-visualizations">5.5. Graph Visualizations</a>
<ul>
<li><a href="#composed-property-classifications">5.5.1. "COMPOSED_PROPERTY" Classifications</a></li>
<li><a href="#domain-classifications">5.5.2. "DOMAIN" Classifications</a></li>
<li><a href="#classification-relations">5.5.3. Classification Relations</a></li>
<li><a href="#multivalued-propsets">5.5.4. Multivalued propSets</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#conclusions">6. Conclusions</a>
<ul>
<li><a href="#future-work">6.1. Future Work</a></li>
<li><a href="#acknowledgements">6.2. Acknowledgements</a></li>
</ul>
</li>
</ul>
</div>
</div>
<div id="outline-container-abstract" class="outline-2">
<h2 id="abstract"><span class="section-number-2">1.</span> Abstract</h2>
<div class="outline-text-2" id="text-abstract">
<p>
The buildingSmart Data Dictionary (bSDD) is an important shared resource in the Architecture, Engineering, Construction, and Operations (AECO) domain.
It is a collection of datasets ("domains") that define various classifications (objects representing building components, products, and materials),
their properties, allowed values, etc.
bSDD defines a GraphQL API, as well as REST APIs that return JSON and RDF representations.
This improves the interoperability of bSDD and its easier deployment in architectural Computer Aided Design (CAD) and other AECO software.
</p>
<p>
However, bSDD data is not structured as well as possible, and data retrieved via different APIs is not identical in content and structure.
This lowers bSDD data quality, usability and trust.
</p>
<p>
We conduct a thorough comparison and analysis of bSDD data related to fulfillment of FAIR (findable, accessible, interoperable, and reusable) principles.
Based on this analysis, we suggest enhancements to make bSDD data better structured and more FAIR.
</p>
<p>
We implement many of the suggestions by refactoring the original data to make it better structured/interconnected, and more "semantic".
We provide a SPARQL endpoint using <a href="https://graphdb.ontotext.com/">Ontotext GraphDB</a>, and GraphQL endpoint using <a href="https://platform.ontotext.com/semantic-objects/">Ontotext Platform Semantic Objects</a>.
Our detailed work is available at <a href="https://github.com/Accord-Project/bsdd">https://github.com/Accord-Project/bsdd</a> (open source) and <a href="https://bsdd.ontotext.com">https://bsdd.ontotext.com</a> (home page, schemas, data, sample queries).
</p>
</div>
</div>
<div id="outline-container-introduction" class="outline-2">
<h2 id="introduction"><span class="section-number-2">2.</span> Introduction</h2>
<div class="outline-text-2" id="text-introduction">
</div>
<div id="outline-container-bsdd" class="outline-3">
<h3 id="bsdd"><span class="section-number-3">2.1.</span> bSDD</h3>
<div class="outline-text-3" id="text-bsdd">
<p>
Reusable data dictionaries are widely used for the electronic exchange of product and component information across industries, improving interoperation between systems.
Examples include:
</p>
<ul class="org-ul">
<li><a href="https://cdd.iec.ch/">IEC Common Data Dictionary</a> (IEC CDD): electrical components, units of measure, documents and certificates, etc.</li>
<li><a href="https://eclass.eu/en/">eCl@ss</a>: a product classification and parts description for a variety of industries.</li>
<li>ISO 15926 part 4 <a href="https://rds.posccaesar.org/">Reference Data and Services</a>: for digital information across process plant industries (oil & gas).</li>
<li><a href="https://bsdd.buildingsmart.org/">buildingSMART Data Dictionary (bSDD)</a>: for materials and components in the AECO industry.</li>
</ul>
<p>
The bSDD is a hierarchical dictionary of object concepts (Classifications), their Properties and allowed values used in Building Information Models (BIM).
Property sets are predefined by regulation agencies and vendors and extend common property sets of the Industry Foundation Classes (IFC).
This allows us to describe specific domains (e.g. transportation) and building elements (e.g. doors, windows, stairs).
bSDD is organized according to the ISO 23386 (2020) Methodology to describe, author and maintain properties in interconnected data dictionaries.
This is a language-independent model used for the development of dictionaries according to ISO 12006-3 (2022) Framework for object-oriented information.
</p>
<p>
bSDD was initiated to improve interoperability in the building and construction industry.
bSDD is a comprehensive solution that provisions open product data definitions, identification, and distribution methods.
</p>
<p>
As of February 2023, bSDD keeps descriptions of nearly 80,000 Classifications in 108 domains,
ranging from roads and rails to DIN, Omniclass, Uniclass, IFC extensions, etc.
It is a widely accepted source of BIM reference data.
bSDD uses URLs for nearly all defined entities to enable globalized data use in a variety of AECO applications and structured documents.
</p>
</div>
</div>
<div id="outline-container-graphql-benefits" class="outline-3">
<h3 id="graphql-benefits"><span class="section-number-3">2.2.</span> GraphQL Benefits</h3>
<div class="outline-text-3" id="text-graphql-benefits">
<p>
<a href="https://graphql.org/">GraphQL</a> as an approach to create simplified "facades" over various storages,
and to provide schema, uniform query language, API and runtime
for handling queries, mutations and subscriptions.
It has many benefits over traditional REST APIs:
</p>
<ul class="org-ul">
<li>Avoid over-fetching by specifying exactly which data and in what nested structure should be returned by the server</li>
<li>Data is returned in JSON that is precisely congruent to the shape of the query</li>
<li>Retrieve many resources in a single request; even across storages by using GraphQL Federation</li>
<li>Schema introspection that allows IDEs and query helpers to offer contextual auto-completion at any point in the query</li>
<li>Data validation (for both input through mutations and output through queries) that guarantees type and cardinality conformance (optional/mandatory, single/multi-valued)</li>
</ul>
<p>
bSDD does offer GraphQL access:
</p>
<ul class="org-ul">
<li>Test: <a href="https://test.bsdd.buildingsmart.org/graphiql/">https://test.bsdd.buildingsmart.org/graphiql/</a></li>
<li>Production: <a href="https://api.bsdd.buildingsmart.org/graphqls/">https://api.bsdd.buildingsmart.org/graphqls/</a> (secured endpoint).</li>
<li>NOTE: we worked with bSI to get access to the production endpoint, but due to delays all our analysis is done on data from the test endpoint.
Nevertheless, we believe that most of our findings also apply to the production data.</li>
</ul>
<p>
Both endpoints use <code>GraphiQL</code> - a graphical interactive in-browser <a href="https://github.com/graphql/graphiql/tree/main/packages/graphiql">GraphQL IDE</a>.
</p>
</div>
</div>
<div id="outline-container-original-graphql-bsdd-schema-voyager" class="outline-3">
<h3 id="original-graphql-bsdd-schema-voyager"><span class="section-number-3">2.3.</span> Original GraphQL bSDD Schema: Voyager</h3>
<div class="outline-text-3" id="text-original-graphql-bsdd-schema-voyager">
<p>
<a href="https://ivangoncharov.github.io/graphql-voyager/">GraphQL Voyager</a> (see <a href="https://github.com/IvanGoncharov/graphql-voyager">source</a>) is a visual app
that uses a Schema Introspection query (<code>schemaIntrospection.graphql</code>) to explore a GraphQL endpoint
and displays the schema of the endpoint, allowing the user to search and browse the available types and queries.
</p>
<p>
We wrote a page <a href="https://rawgit2.com/Accord-Project/bsdd/main/bsdd-graphql-voyager-orig.html">bsdd-graphql-voyager-orig</a> (see <code>bsdd-graphql-voyager-orig.html</code>) that deploys Voyager over the bSDD GraphQL endpoint.
We used it to investigate the original bSDD schema:
</p>
<div id="org56ff3c1" class="figure">
<p><img src="./img/bsdd-graphql-voyager-overview.png" alt="bsdd-graphql-voyager-overview.png" />
</p>
<p><span class="figure-number">Figure 1: </span>Original bSDD GraphQL Schema: Overview (uncheck "Show leaf fields")</p>
</div>
<p>
As we can see, bSDD has 12 entities (object types):
</p>
<ul class="org-ul">
<li>Reference entities:
<ul class="org-ul">
<li><code>Country</code></li>
<li><code>Language</code></li>
<li><code>ReferenceDocument</code>, such as a standard</li>
<li><code>Unit</code>: unit of measure</li>
</ul></li>
<li><code>Domain</code>: dataset by a single data provider</li>
<li><code>Property</code>: global property definition
<ul class="org-ul">
<li><code>PropertyRelation</code>: relation between properties</li>
<li><code>PropertyValue</code>: allowed property value for enumerated properties</li>
</ul></li>
<li><code>Classification</code>: object, material, component
<ul class="org-ul">
<li><code>ClassificationRelation</code>: relation between classifications</li>
</ul></li>
<li><code>ClassificationProperty</code>: property that is localized to a classification
<ul class="org-ul">
<li><code>ClassificationPropertyValue</code>: allowed property value for enumerated properties</li>
</ul></li>
</ul>
<p>
We can also look at details of the schema:
</p>
<div id="org24042b6" class="figure">
<p><img src="./img/bsdd-graphql-voyager-Classification-ClassificationProperty.png" alt="bsdd-graphql-voyager-Classification-ClassificationProperty.png" />
</p>
<p><span class="figure-number">Figure 2: </span>Original bSDD GraphQL Schema: Detail of Classification and ClassificationProperty</p>
</div>
<p>
Last but not least, Voyager presents detailed and searchable documentation about the schema;
the same is available in the GraphiQL query tool.
</p>
</div>
</div>
<div id="outline-container-original-graphql-bsdd-schema-problems" class="outline-3">
<h3 id="original-graphql-bsdd-schema-problems"><span class="section-number-3">2.4.</span> Original GraphQL bSDD Schema: Problems</h3>
<div class="outline-text-3" id="text-original-graphql-bsdd-schema-problems">
<p>
Even in the Schema Overview (at low level of detail) we can notice some defects:
</p>
<ul class="org-ul">
<li>The reference entities (<code>Country, Language, ReferenceDocument, Unit</code>)
are disconnected from the rest of the schema, i.e. not used by the other entities</li>
<li>Relation entities have only an incoming link but no outgoing link.
This means that if you want to get some data of a <code>Classification</code>
and all its related <code>Classifications</code>, you need to issue two queries
because you cannot navigate past <code>ClassificationRelation</code>.</li>
<li>Many entities cannot be queried directly from the root, but have to be reached through their respective "parent" entity.</li>
<li>There are no backward relations (arrows) to get from a lower-level entity back to its "parent" entity.</li>
<li>There are a number of parallel relations (arrows).
This is not needed in GraphQL because the schema can use parameters to distinguish between the different uses.</li>
</ul>
<p>
At the high level of detail we can notice more defects:
</p>
<ul class="org-ul">
<li><code>Property</code> and <code>ClassificationProperty</code> are very similar, but there's no inheritance/relation between them</li>
<li><code>PropertyValue</code> and <code>ClassificationPropertyValue</code> are exactly the same, so can be reduced to one entity</li>
</ul>
<p>
We'll have a lot more to say about this in further sections.
But first let's look at a refactored (improved) schema.
</p>
</div>
</div>
<div id="outline-container-refactored-graphql-bsdd-schema-voyager" class="outline-3">
<h3 id="refactored-graphql-bsdd-schema-voyager"><span class="section-number-3">2.5.</span> Refactored GraphQL bSDD Schema: Voyager</h3>
<div class="outline-text-3" id="text-refactored-graphql-bsdd-schema-voyager">
<p>
The main purpose of this work is to refactor the bSDD data and schema in order to improve them.
We explain the refactoring process in the last section.
</p>
<p>
But we show the refactored schema here in order to compare it to the original schema.
We wrote the web page <a href="https://rawgit2.com/Accord-Project/bsdd/main/bsdd-graphql-voyager-refact.html">bsdd-graphql-voyager-refact.html</a> (see <code>bsdd-graphql-voyager-refact.html</code>)
that allows you to explore the refactored schema.
</p>
<div id="org690ffde" class="figure">
<p><img src="./img/bsdd-graphql-voyager-refact-overview.png" alt="bsdd-graphql-voyager-refact-overview.png" />
</p>
<p><span class="figure-number">Figure 3: </span>Refactored bSDD GraphQL Schema: Overview (uncheck "Show leaf fields")</p>
</div>
<p>
Improvements:
</p>
<ul class="org-ul">
<li>All entities are queryable directly from the root.
Note: There's a common interface <code>Object</code> that provides functionality common to all entities: the dashed arrows show that each entity implements it.
This creates some clutter in the diagram, but doesn't complicate querying and navigation.</li>
<li>There are no parallel arrows (relations) between entities;
each relation is named the same as the target entity, improving predictability and consistency.</li>
<li>Navigation between entities is bidirectional (e.g. <code>Domain.classification</code> but also <code>Classification.domain</code>),
which is a feature expected of a Knowledge Graph.
<ul class="org-ul">
<li>In particular, the <code>Classification</code> hierarchy can be navigated both up and down (<code>parentClassification, childClassification</code>)</li>
</ul></li>
<li>A query can traverse a <code>Relation</code> entity to get data about the related entity:
<ul class="org-ul">
<li><code>Classification.relation -> ClassificationRelation.related -> Classification</code></li>
<li><code>Property.relation -> PropertyRelation.related -> Property</code></li>
</ul></li>
<li>A single entity <code>PropertyValue</code> is used by both <code>Property</code> and <code>ClassificationProperty</code></li>
</ul>
<p>
This does not fix all defects noted with the original diagram.
The reference entities are still not used by the main entities.
To fix that would require data cleaning work
(e.g., to ensure that Unit code strings used in all Properties and ClassificationProperties are in the reference list).
</p>
<p>
We can also take a look at a detail of the refactored schema. It looks pretty similar,
but all fields are normalized to singular names,
and string fields like <code>propertyNamespaceUri</code> are converted to object fields like <code>Property</code>.
</p>
<div id="org882df5d" class="figure">
<p><img src="./img/bsdd-graphql-voyager-refact-Classification-ClassificationProperty.png" alt="bsdd-graphql-voyager-refact-Classification-ClassificationProperty.png" />
</p>
<p><span class="figure-number">Figure 4: </span>Refactored bSDD GraphQL Schema: Detail of Classification and ClassificationProperty</p>
</div>
</div>
</div>
<div id="outline-container-refactored-bsdd-schema-plantuml-overview-diagram" class="outline-3">
<h3 id="refactored-bsdd-schema-plantuml-overview-diagram"><span class="section-number-3">2.6.</span> Refactored bSDD Schema: PlantUML Overview Diagram</h3>
<div class="outline-text-3" id="text-refactored-bsdd-schema-plantuml-overview-diagram">
<p>
<a href="https://github.com/VladimirAlexiev/soml/tree/master/soml2puml">soml2puml</a> is a new open source tool that uses PlantUML to generate diagrams from SOML schemas.
</p>
<ul class="org-ul">
<li>They include nice visualizations of classes and attribute types using emoji</li>
<li>Unlike Voyager, these are static diagrams. They can still be rendered as SVG, to make them searchable</li>
</ul>
<p>
We used a slightly modified SOML schema <code>bsdd-graphql-soml-withDiagram-noLabel.yaml</code>
that adds <code>diagram</code> keys (node <code>rank</code> and <code>emoji</code>).
</p>
<p>
The overview diagram shows classes but not attributes:
</p>
<div id="orga9ac035" class="figure">
<p><img src="./img/bsdd-graphql-soml-diagram-overview.png" alt="bsdd-graphql-soml-diagram-overview.png" />
</p>
<p><span class="figure-number">Figure 5: </span>Refactored bSDD Schema: PlantUML Overview Diagram</p>
</div>
</div>
</div>
<div id="outline-container-refactored-bsdd-schema-plantuml-full-diagram" class="outline-3">
<h3 id="refactored-bsdd-schema-plantuml-full-diagram"><span class="section-number-3">2.7.</span> Refactored bSDD Schema: PlantUML Full Diagram</h3>
<div class="outline-text-3" id="text-refactored-bsdd-schema-plantuml-full-diagram">
<p>
The full schema uses the same layout, but also adds attributes:
</p>
<div id="orgf1291f4" class="figure">
<p><img src="./img/bsdd-graphql-soml-diagram.png" alt="bsdd-graphql-soml-diagram.png" style="width:1200px" />
</p>
<p><span class="figure-number">Figure 6: </span>Refactored bSDD Schema: PlantUML Full Diagram</p>
</div>
</div>
</div>
<div id="outline-container-graphiql-querying-of-original-endpoint" class="outline-3">
<h3 id="graphiql-querying-of-original-endpoint"><span class="section-number-3">2.8.</span> GraphiQL Querying of Original Endpoint</h3>
<div class="outline-text-3" id="text-graphiql-querying-of-original-endpoint">
<p>
<a href="https://test.bsdd.buildingsmart.org/graphiql">https://test.bsdd.buildingsmart.org/graphiql</a> is the original GraphQL endpoint.
</p>
<div id="org364de7c" class="figure">
<p><img src="./img/graphiql-orig.png" alt="graphiql-orig.png" />
</p>
<p><span class="figure-number">Figure 7: </span>GraphiQL Querying of Original bSDD Endpoint</p>
</div>
<p>
It provides a number of useful features:
</p>
<ul class="org-ul">
<li>Online searchable documentation of the GraphQL schema</li>
<li>Auto-completion of field names and parameters at any point in the query: queries practically "write themselves"!</li>
<li>Ability to parameterize queries through Query Variables</li>
<li>Code formatting (Prettifying) of the query</li>
<li>Syntax highlighting</li>
<li>History of previous queries</li>
<li>JSON results that conform exactly to the form of hate query</li>
</ul>
</div>
</div>
<div id="outline-container-graphiql-querying-of-refactored-endpoint" class="outline-3">
<h3 id="graphiql-querying-of-refactored-endpoint"><span class="section-number-3">2.9.</span> GraphiQL Querying of Refactored Endpoint</h3>
<div class="outline-text-3" id="text-graphiql-querying-of-refactored-endpoint">
<p>
<a href="https://bsdd.ontotext.com/graphiql/">https://bsdd.ontotext.com/graphiql/</a> is the refactored GraphQL endpoint:
</p>
<div id="org59d2b28" class="figure">
<p><img src="./img/graphiql-refact.png" alt="graphiql-refact.png" />
</p>
<p><span class="figure-number">Figure 8: </span>GraphiQL Querying of Refactored bSDD Endpoint</p>
</div>
<p>
We have deployed a newer version of GraphiQL that has all benefits described in the previous section, and adds some more:
</p>
<ul class="org-ul">
<li>A hierarchical Explorer pane that shows the total schema structure and allows you to select fields by clicking rather than typing.
The History and Documentation panes are still present (see toggles at the left edge)</li>
<li>Useful keyboard shortcuts</li>
<li>Search in the query text (in addition to search in the Documentation)</li>
<li>Improved syntax highlighting</li>
<li>Multiple query tabs so you can easily access several queries at once</li>
<li>The query response reports errors in addition to returning data
(this comes from our GraphQL server implementation, not from the GraphiQL version)</li>
</ul>
</div>
</div>
<div id="outline-container-files" class="outline-3">
<h3 id="files"><span class="section-number-3">2.10.</span> Files</h3>
<div class="outline-text-3" id="text-files">
<p>
Description of all files in <a href="https://github.com/Accord-Project/bsdd">https://github.com/Accord-Project/bsdd</a>:
</p>
<ul class="org-ul">
<li><code>bsdd-graphql-schema-orig.json</code>, 116k: original GraphQL schema, obtained with <code>schemaIntrospection.graphql</code></li>
<li><code>bsdd-graphql-schema-refact.json</code>, 867k: refactored GraphQL schema, obtained with <code>schemaIntrospection.graphql</code>.
The endpoint is generated with <a href="https://platform.ontotext.com/semantic-objects/">Ontotext Platform Semantic Objects</a>.
The reason it is so much bigger is that it includes a comprehensive <code>where</code> query language</li>
<li><code>bsdd-graphql-soml-template.yaml</code>: template file for the GraphQL-SOML generator</li>
<li><code>bsdd-graphql-soml-orig.yaml</code>: draft <a href="https://platform.ontotext.com/semantic-objects/soml/index.html">SOML</a> generated from the original GraphQL schema</li>
<li><code>bsdd-graphql-soml-refact.yaml</code>: SOML refactored by hand to make structural improvements. We use this with <a href="https://platform.ontotext.com/semantic-objects/">Ontotext Platform Semantic Objects</a> to generate the refactored GraphQL endpoint</li>
<li><code>bsdd-graphql-soml.patch</code>: difference between the two SOML schemas</li>
<li><code>bsdd-graphql-voyager-orig.html</code>: HTML page that displays the original GraphQL schema with Voyager</li>
<li><code>bsdd-graphql-voyager-refact.html</code>:HTML page that displays the refactored GraphQL schema with Voyager</li>
<li><code>bsdd-ontology.ttl</code>: start of a bSDD ontology, very incomplete</li>
<li><code>index.html</code>: home page source</li>
<li><code>README.org</code>: detailed description of the work we did in emacs <code>orgmode</code> (this file)</li>
<li><code>README.md</code>: detailed description of the work we did, exported to <code>markdown</code></li>
<li><code>README.html</code>: HTML rendition of the work we did</li>
<li><code>paper</code>: paper submitted to LDAC 2023
<ul class="org-ul">
<li><code>bsdd.bib</code>: bibtex file for the paper, made from the <a href="https://www.zotero.org/groups/3007408/semantic_bim">Zotero semantic BIM library</a>, we don't use it</li>
<li><code>bsdd.biblatex</code>: biblatex file for the paper, made from the same library, we use this one</li>
<li><code>Makefile</code>: update the bibliography files</li>
<li><code>paper.md</code>: paper as markdown</li>
<li><code>paper.tex</code>: paper as latex, generated with Scholarly Pandoc</li>
<li><code>paper.pdf</code>: paper as PDF</li>
</ul></li>
<li><code>graphql</code>: GraphQL queries to get the 9 kinds of entities from the original GraphQL endpoint
<ul class="org-ul">
<li>Also <code>schemaIntrospection.graphql</code> that gets the GraphQL schema of an endpoint
(you can find more at the <a href="https://bsdd.ontotext.com/graphiql/">Refactored GraphiQL Endpoint</a>)</li>
</ul></li>
<li><code>scripts</code>: all necessary scripts to export data in various formats, convert bSDD schema to SOML, convert JSON to RDF, refactor RDF, etc
<ul class="org-ul">
<li><code>bsdd2json.py</code>: connects to the original GraphQL API and exports all objects of the GraphQL schema as JSON (domains, classifications, classification properties, etc).
There is no way to get more than 5000 classifications per domain (a limitation of the <code>classificationSearch</code> field)</li>
<li><code>bsdd_export.py</code>: export one kind of bSDD entity using a specific GraphQL query</li>
<li><code>bsdd_graphql_api.py</code>: helper module for working with the bSDD GraphQL endpoint</li>
<li><code>graphql2soml.py</code>: generates a draft SOML from a GraphQL endpoint</li>
<li><code>list-zip.sparql</code>: list all files in a zip using SPARQL Anything</li>
<li><code>rdfize.sparql</code>: RDFize a bSDD JSON file using SPARQL Anything</li>
<li><code>rdfize-folder.sparql</code>: RDFize a folder of bSDD JSON files using SPARQL Anything</li>
<li><code>rdfize-zip.sparql</code>: RDFize a zip of bSDD JSON files using SPARQL Anything (doesn't work: <a href="https://github.com/SPARQL-Anything/sparql.anything/issues/335">https://github.com/SPARQL-Anything/sparql.anything/issues/335</a>)</li>
<li><code>transform.ru</code>: transform (refactor) RDF data. Described in detail below</li>
</ul></li>
<li><code>dump</code>: scripts to get the complete original bSDD data from the original GraphQL endpoint
<ul class="org-ul">
<li><code>docker-compose.yaml, Dockerfile</code>: docker files</li>
<li><code>dump.sh</code>: perform the dump</li>
<li><code>sparql-anything.bat</code>: batch file to invoke SPARQL Anything</li>
</ul></li>
<li><code>samples</code>: sample bSDD entities:
whole sets (e.g. <code>units</code>)
or selected "interesting" entities with most fields filled (e.g. <code>class-IfcWall, prop-Ifc-ACResistance</code>)
<ul class="org-ul">
<li><code>*-orig.json</code>: original files from GraphQL endpoint or JSON API</li>
<li><code>*-orig.ttl</code>: original files from RDF API (not all can be obtained this way, e.g. Domains cannot)</li>
<li><code>*-refact.ttl</code>: refactored RDF files</li>
<li><code>Makefile</code> to export and convert data</li>
</ul></li>
</ul>
</div>
</div>
<div id="outline-container-endpoints-and-pages" class="outline-3">
<h3 id="endpoints-and-pages"><span class="section-number-3">2.11.</span> Endpoints and Pages</h3>
<div class="outline-text-3" id="text-endpoints-and-pages">
<p>
Description of all endpoints that this project worked with and produced:
</p>
<ul class="org-ul">
<li><a href="https://bsdd.ontotext.com">https://bsdd.ontotext.com</a>: home page, includes all of these links</li>
<li><a href="https://bsdd.ontotext.com/README.html">https://bsdd.ontotext.com/README.html</a>: detailed description of the work we did (<a href="https://github.com/Accord-Project/bsdd/issues/14">issue #14</a>: make better version from .md instead of .org)</li>
<li><a href="https://bsdd.ontotext.com/paper/paper.pdf">https://bsdd.ontotext.com/paper/paper.pdf</a>: accepted to LDAC 2023</li>
<li><a href="https://bsdd.ontotext.com/paper/presentation.html">https://bsdd.ontotext.com/paper/presentation.html</a>: presentation for LDAC 2023</li>
<li><a href="https://test.bsdd.buildingsmart.org/graphql/">https://test.bsdd.buildingsmart.org/graphql/</a>: original GraphQL endpoint (protected)</li>
<li><a href="https://test.bsdd.buildingsmart.org/graphiql/">https://test.bsdd.buildingsmart.org/graphiql/</a>: original GraphQL query editor</li>
<li><a href="https://rawgit2.com/Accord-Project/bsdd/main/bsdd-graphql-voyager-orig.html">https://rawgit2.com/Accord-Project/bsdd/main/bsdd-graphql-voyager-orig.html</a>: original GraphQL schema visualization with Voyager</li>
<li><a href="https://bsdd.ontotext.com/platform">https://bsdd.ontotext.com/platform</a>: Semantic Objects workbench: administrative interface for the Ontotext Platform implementing GraphQL (protected)</li>
<li><a href="https://bsdd.ontotext.com/graphql/">https://bsdd.ontotext.com/graphql/</a>: refactored GraphQL endpoint (protected)</li>
<li><a href="https://bsdd.ontotext.com/graphiql/">https://bsdd.ontotext.com/graphiql/</a>: refactored GraphQL query editor</li>
<li><a href="https://rawgit2.com/Accord-Project/bsdd/main/bsdd-graphql-voyager-refact.html">https://rawgit2.com/Accord-Project/bsdd/main/bsdd-graphql-voyager-refact.html</a>: refactored GraphQL schema visualization with Voyager</li>
<li><a href="https://bsdd.ontotext.com/graphdb">https://bsdd.ontotext.com/graphdb</a>: GraphDB Workbench: administrative interface for our semantic database (protected)</li>
<li><a href="https://bsdd.ontotext.com/graphdb/repositories/bsdd">https://bsdd.ontotext.com/graphdb/repositories/bsdd</a>: GraphDB SPARQL endpoint</li>
<li><a href="https://bsdd.ontotext.com/graphdb/sparql">https://bsdd.ontotext.com/graphdb/sparql</a>: GraphDB SPARQL editor</li>
</ul>
</div>
</div>
</div>
<div id="outline-container-original-bsdd-data" class="outline-2">
<h2 id="original-bsdd-data"><span class="section-number-2">3.</span> Original bSDD Data</h2>
<div class="outline-text-2" id="text-original-bsdd-data">
</div>
<div id="outline-container-getting-bsdd-data-dumps" class="outline-3">
<h3 id="getting-bsdd-data-dumps"><span class="section-number-3">3.1.</span> Getting bSDD Data Dumps</h3>
<div class="outline-text-3" id="text-getting-bsdd-data-dumps">
<p>
Although bSDD is available in RDF, we decided to fetch all data in GraphQL JSON due to RDF defects described below.
</p>
<p>
We use the script <code>bsdd2json.py</code> to connect to the original GraphQL API
and export all objects of the GraphQL schema as JSON (domains, classifications, classification properties, etc).
Running this script takes around 10 hours due to the large number of objects.
After it finishes, the exported data will be located in the <code>data/</code> directory (not included in Github).
</p>
<p>
To work with the script, use a virtual Python environment:
</p>
<ul class="org-ul">
<li>Create a virtual environment: <code>python3.9 -m venv .venv</code></li>
<li>Activate it: <code>source .venv/bin/activate</code></li>
<li>Install the required libraries: <code>pip install -r scripts/requirements.txt</code></li>
</ul>
<p>
The overall process of the script is:
</p>
<ul class="org-ul">
<li>Export all "root" types with their attributes, but not relations (object properties)
<ul class="org-ul">
<li>Domains in <code>data/domains.json</code> (<code>Domain</code> in GraphQL)</li>
<li>Countries in <code>data/domains.json</code> (<code>Country</code> in GraphQL)</li>
<li>Languages in <code>data/languages.json</code> (<code>Language</code> in GraphQL)</li>
<li>Measurements units in <code>data/units.json</code> (<code>Unit</code> in GraphQL)</li>
<li>Reference documents in <code>data/reference_documents.json</code> (<code>ReferenceDocument</code> in GraphQL)</li>
</ul></li>
<li>For each exported Domain, create a subdirectory and export all Classifications in that domain in separate JSONs (<code>Classification</code> in GraphQL).
This includes nested objects: <code>ClassificationProperty, ClassificationRelation, ClassificationPropertyValue</code></li>
<li>Collect all unique global Properties and export them in <code>properties.json</code> (<code>Property</code> in GraphQL)</li>
</ul>
<p>
On the other hand, <code>bsdd_export.py</code> invokes a given query (with or without variable, which is the <code>namespaceUri</code> of the entity sought), and returns a JSON response.
Examples:
</p>
<ul class="org-ul">
<li>Get all domains:</li>
</ul>
<div class="org-src-container">
<pre class="src src-sh">python scripts/bsdd_export.py getDomains -o domains.json
</pre>
</div>
<ul class="org-ul">
<li>Get a particular domain:</li>
</ul>
<div class="org-src-container">
<pre class="src src-sh">python scripts/bsdd_export.py getDomain -v <span style="color: #8b2252;">"URI=https://identifier.buildingsmart.org/uri/buildingsmart/ifc-4.3"</span> -o ifc-4.3.json
</pre>
</div>
</div>
</div>
<div id="outline-container-statistics" class="outline-3">
<h3 id="statistics"><span class="section-number-3">3.2.</span> Statistics</h3>
<div class="outline-text-3" id="text-statistics">
<p>
Here we provide various statistics about bSDD data.
Although the bSDD schema is rich, we find that some features are rarely used.
</p>
<ul class="org-ul">
<li>It is possible that the production bSDD endpoint will have higher use of features
(our counts were done on the test endpoint)</li>
<li>The counts were performed on refactored RDF data, but we guarantee that we have not lost data during refactoring</li>
</ul>
</div>
<div id="outline-container-total-entities" class="outline-4">
<h4 id="total-entities"><span class="section-number-4">3.2.1.</span> Total Entities</h4>
<div class="outline-text-4" id="text-total-entities">
<table border="1" cellspacing="0" cellpadding="2" rules="all" frame="box">
<colgroup>
<col class="org-left" />
<col class="org-right" />
<col class="org-left" />
</colgroup>
<thead>
<tr>
<th scope="col" class="org-left">type</th>
<th scope="col" class="org-right">c</th>
<th scope="col" class="org-left">Comment</th>
</tr>
</thead>
<tbody>
<tr>
<td class="org-left">Classification</td>
<td class="org-right">31720</td>
<td class="org-left"> </td>
</tr>
<tr>
<td class="org-left">ClassificationProperty</td>
<td class="org-right">111566</td>
<td class="org-left"> </td>
</tr>
<tr>
<td class="org-left">ClassificationRelation</td>
<td class="org-right">6420</td>
<td class="org-left"> </td>
</tr>
<tr>
<td class="org-left">Country</td>
<td class="org-right">246</td>
<td class="org-left"> </td>
</tr>
<tr>
<td class="org-left">Domain</td>
<td class="org-right">108</td>
<td class="org-left"> </td>
</tr>
<tr>
<td class="org-left">Language</td>
<td class="org-right">39</td>
<td class="org-left"> </td>
</tr>
<tr>
<td class="org-left">Property</td>
<td class="org-right">36069</td>
<td class="org-left"> </td>
</tr>
<tr>
<td class="org-left">PropertyValue</td>
<td class="org-right">214121</td>
<td class="org-left">We merged <code>PropertyValue</code> and <code>ClassificationPropertyValue</code> because they have the same structure</td>
</tr>
<tr>
<td class="org-left">ReferenceDocument</td>
<td class="org-right">484</td>
<td class="org-left"> </td>
</tr>
<tr>
<td class="org-left">Unit</td>
<td class="org-right">603</td>
<td class="org-left"> </td>
</tr>
</tbody>
</table>
<p>
We used this query but then removed uninteresting RDF classes:
</p>
<div class="org-src-container">
<pre class="src src-sparql"><span style="color: #a020f0;">select</span> <span style="color: #a0522d;">?type</span> (count(*) <span style="color: #a020f0;">as</span> <span style="color: #a0522d;">?c</span>) {
<span style="color: #a0522d;">?x</span> a <span style="color: #a0522d;">?type</span>
} <span style="color: #a020f0;">group</span> <span style="color: #a020f0;">by</span> <span style="color: #a0522d;">?type</span> <span style="color: #a020f0;">order</span> <span style="color: #a020f0;">by</span> <span style="color: #a020f0;">desc</span>(<span style="color: #a0522d;">?c</span>)
</pre>
</div>
</div>
</div>