-
Notifications
You must be signed in to change notification settings - Fork 0
/
presentation.tex
867 lines (713 loc) · 35.6 KB
/
presentation.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
% SSSOM/T and Uberon presentation
% © 2024 Damien Goutte-Gattat
%
% This work is released under the Creative Commons Attribution 4.0
% International License. To view a copy of this license, visit
% https://creativecommons.org/licenses/by/4.0/.
\usepackage{fontspec}
\usepackage{polyglossia}
\usepackage{tikz}
\setmainlanguage{english}
\usetheme{cambridge}
\graphicspath{{imgs/}{svgs/}}
% TikZ libraries and styles
\usetikzlibrary{positioning,shapes.geometric,graphs,graphdrawing,quotes,fit,overlay-beamer-styles}
\usegdlibrary{layered,trees}
\colorlet{GenericItem}{yellow!20!white}
\colorlet{GenericHighlight}{red!20!white}
\tikzset{Term/.style={draw,fill=yellow!20!white,font=\tiny\bfseries}}
\tikzset{Relation/.style={draw=blue!40!white,thick,arrows=<-}}
\tikzset{RelationLabel/.style={shape=circle,fill=white,draw=black,thin,font=\tiny\bfseries,inner sep=1pt}}
\tikzset{File/.style={draw,fill=yellow!20!white,font=\tiny\ttfamily}}
\tikzset{Entity/.style={draw,fill=yellow!20!white,font=\scriptsize}}
\tikzset{Process/.style={draw,shape=ellipse,fill=blue!20!white,font=\scriptsize}}
\title{(Re-)bridging the anatomy ontologies with SSSOM}
\author{Damien~Goutte-Gattat\inst{1}}
\institute{\inst{1}FlyBase group, Department of Physiology, Development and Neuroscience}
\date{International Conference on Biomedical Ontologies 2024, July 2024}
\hypersetup{pdfpagemode=UseNone,
pdftitle={},
pdfauthor={Damien Goutte-Gattat}}
\AtBeginSection{
\begin{frame}{Outline}{}
\tableofcontents[currentsection]
\end{frame}
}
\AtBeginSubsection{
\begin{frame}{Outline}{}
\tableofcontents[currentsection,currentsubsection]
\end{frame}
}
\begin{document}
\begin{frame}
\titlepage
\end{frame}
\section{Introduction}
\begin{frame}
\frametitle{An approach to a multi-species anatomy ontology}
\begin{columns}
\begin{column}{.5\textwidth}
\begin{tikzpicture}[nodes={shape=circle,draw,fill=GenericItem}]
\node (core) [alt=<6| handout:0>{fill=GenericHighlight}{}] {Uberon};
\node (fbbt) [alt=<{1,7| handout:0}>{fill=GenericHighlight}{}] at (150:2cm) {FBbt};
\node (zfa) [alt=<{2,7| handout:0}>{fill=GenericHighlight}{}] at (90:2cm) {ZFA};
\node (xao) [alt=<{3,7| handout:0}>{fill=GenericHighlight}{}] at (30:2cm) {XAO};
\node (ma) [alt=<{4,7| handout:0}>{fill=GenericHighlight}{}] at (210:2cm) {MA};
\node (wbbt) [alt=<{5,7| handout:0}>{fill=GenericHighlight}{}] at (270:2cm) {WBbt};
\node (tx1) at (310:2cm) {...};
\node (tx2) at (330:2cm) {...};
\node (tx3) at (350:2cm) {...};
\draw (core) -- (fbbt) (core) -- (zfa) (core) -- (xao)
(core) -- (ma) (core) -- (wbbt) (core) -- (tx1)
(core) -- (tx2) (core) -- (tx3);
\end{tikzpicture}
\end{column}
\begin{column}{.45\textwidth}
Many species-specific anatomy ontologies available:
\smallskip
\begin{overlayarea}{\textwidth}{1cm}\color{red}
\only<1>{\emph{Drosophila} Anatomy Ontology}
\only<2| handout:0>{Zebrafish Anatomy Ontology}
\only<3| handout:0>{Xenopus Anatomy Ontology}
\only<4| handout:0>{Mouse Anatomy Ontology}
\only<5| handout:0>{\emph{C. elegans} Anatomy Ontology}
\end{overlayarea}
Strategy for a multi-species ontology:
\begin{itemize}
\item Provide a \textcolor<6>{red}{taxon-neutral core framework}
\item Integrate \textcolor<7>{red}{existing taxon-specific ontologies}
\end{itemize}
\end{column}
\end{columns}
\note<1>{Anatomy ontologies are used in many aspects of biocuration, for
example to capture the organ of origin of a biological sample, the tissue
where a gene is expressed, or the anatomical location where a disease
manifests itself.
Many species-specific curation projects have, therefore, their own ontology
to describe the anatomy of their favourite organism. For example, at FlyBase
we have our own \emph{Drosophila} Anatomy Ontology (FBbt), ZFIN has its
Zebrafish Anatomy Ontology (ZFA), XenBase has its Xenopus Anatomy Ontology
(XAO), and so on.
But to enable \emph{cross-species} curation and analysis of data (especially
high-throughput data, such as scRNAseq), we need ideally a single,
multi-species ontology that covers all our model organisms. For the past 12
years or so, this has been the goal of Uberon, the Uber Anatomy Ontology.
Uberon's strategy to achieve that goal is to provide a taxon-neutral core
framework, describing common anatomical structures in broadly generic terms,
and to plug the species-specific ontologies into that core.}
\end{frame}
\begin{frame}
\frametitle{Example: integrating FBbt into Uberon}
\only<1| handout:1>{
\begin{columns}[t]
\begin{column}{.45\textwidth}
Uberon hierarchy
\begin{tikzpicture}
\graph[layered layout,orient=down,nodes={Term},edge quotes mid,edges={Relation,nodes={RelationLabel}}] {
anatomical entity <-
anatomical structure <-
circulatory organ <- {
primary circulatory organ <- { heart, dorsal vessel heart[fill=GenericHighlight] },
accessory circulatory organ <- lymph heart
};
};
\end{tikzpicture}
\end{column}
\begin{column}{.45\textwidth}
FBbt hierarchy
\begin{tikzpicture}
\graph[layered layout,orient=down,nodes={Term},edge quotes mid,edges={Relation,nodes={RelationLabel}}] {
anatomical entity <-
anatomical structure <-
heart[fill=GenericHighlight] <-["P"] {
aortic funnel, heart chamber, dorsal diaphragm
};
};
\end{tikzpicture}
FBbt's ``heart'' is the fly-specific equivalent of Uberon's ``dorsal vessel heart''
\end{column}
\end{columns}
}
\only<2| handout:2>{
\begin{columns}
\begin{column}{.3\textwidth}
Integrated hierarchy:
\smallskip
FBbt's ``heart'' classified under Uberon's ``dorsal vessel heart''
\end{column}
\begin{column}{.6\textwidth}
\begin{tikzpicture}
\graph[layered layout,orient=down,nodes={Term},edge quotes mid,edges={Relation,nodes={RelationLabel}}] {
anatomical entity <-
anatomical structure <-
circulatory organ <- {
primary circulatory organ <- {
heart,
dorsal vessel heart[fill=GenericHighlight]
<- "heart (drosophila)"[fill=GenericHighlight] <-["P"] {
aortic funnel, heart chamber, dorsal diaphragm
}
},
accessory circulatory organ <- lymph heart
};
};
\end{tikzpicture}
\end{column}
\end{columns}
}
\note<1>{To illustrate this strategy, here is (on the left) a simplified
excerpt of Uberon's hierarchy around the concept of ``heart'', which in
Uberon parlance is called, more generically, the ``primary circulatory
organ'' (to make room for the \emph{accessory} circulatory organs that can
be found in some species).
Below ``primary circulatory organ'', Uberon defines both ``heart'', which is
intended to refer to the \emph{vertebrate} heart, and ``dorsal vessel
heart'', which refers to the heart of arthropods such as fruit flies. Uberon
itself does not describe the arthropod heart in any more details -- there
are no terms under ``dorsal vessel heart''. That term is in fact merely
intended as a ``branching point'', where more specific terms from
arthropod-specific ontologies can be rattached.
In the \emph{Drosophila} Anatomy Ontology, we have a single term ``heart''
to refer to the dorsal vessel heart (we don't need to call it ``dorsal
vessel heart'' since it is the only heart that exists in flies), and below
that term, we have a handful of terms that describe more precisely the
different parts that make up the fly heart.}
\note<2>{When FBbt is integrated into Uberon, its ``heart'' term is
automatically treated as a subclass of the more generic ``dorsal vessel
heart'', thereby creating a unified hierarchy where the
\emph{Drosophila}-specific terms are appropriately rattached to Uberon's
taxon-neutral core.
Using this strategy, Uberon is able to accurately and precisely describe the
anatomy of the most used model organisms, by benefitting from all the
existing species-specific ontologies without having to duplicate their
efforts.
This talk is about how this integration process works, and how we recently
overhauled it to make it easier to maintain, more robust, and FAIRer.}
\end{frame}
\begin{frame}
\frametitle{Integration requires cross-ontology mappings}
\begin{block}{}
Integrating the taxon-specific ontologies with the Uberon core framework
requires to know where taxon-specific terms should be ``plugged'' in the
core framework.
E.g., we need to know that FBbt's \emph{heart} must be attached to Uberon's
\emph{dorsal vessel heart}.
\end{block}
\note{The key component of the integration process is the knowledge of which
species-specific terms, from each of the species-specific ontologies such as
FBbt, should be ``attached'' to which taxon-neutral term in Uberon's core
framework. For example, knowing that FBbt's ``heart'' is the fly-specific
term for what Uberon calls the ``dorsal vessel heart''.
This is the role of cross-ontology \emph{mappings}.}
\end{frame}
\section{Existing system and its limitations}
\begin{frame}
\frametitle{Integration pipeline overview}
\begin{columns}
\begin{column}{.46\textwidth}
\begin{tikzpicture}[node distance=.3cm]
\node [Entity,text width=1.3cm,alt=<1>{fill=GenericHighlight}{}] (uberon) {Uberon\\(incl. xrefs)};
\node [Process,below right=of uberon] (script) {Custom script};
\node [Entity,above right=of script,align=left] (ssao) {Taxon-specific\\ontologies};
\node [Entity,below=of script,alt=<2>{fill=GenericHighlight}{}] (cob) {Bridge files};
\node [Process,below=of cob] (merge) {OWL merge};
\node [Entity,below=of merge] (cm) {Multi-species ontology};
\draw [->] (uberon.east) -| (script.north) (script.south) -- (cob.north);
\draw [->] (cob) -- (merge) (merge) -- (cm);
\draw [->] (uberon.south) |- (merge.west);
\draw [->] (ssao.south) |- (merge.east);
\end{tikzpicture}
\end{column}
\begin{column}{.53\textwidth}
\begin{block}{Mappings maintained as \textcolor<1>{red}{xrefs}}\scriptsize\ttfamily
[Term]
id: UBERON:0015230
name: dorsal vessel heart
is\_a: UBERON:0007100 ! primary circulatory organ
\textcolor<1>{red}{xref: FBbt:00003154}
\end{block}
\begin{block}{Xrefs transformed to \textcolor<2>{red}{bridge files}}\scriptsize\ttfamily
[Term]
id: FBbt:00003154
intersection\_of: UBERON:0015230
intersection\_of: part\_of NCBITaxon:7227
\end{block}
\end{column}
\end{columns}
\note<1>{Here is a brief overview of the integration pipeline.
Mappings between Uberon and the taxon-specific ontologies are maintained as
cross-references that are carried by the taxon-neutral terms in Uberon and
that point to the taxon-specific terms in other ontologies -- as shown here,
where Uberon's ``dorsal vessel heart'' term carries a ``xref'' to
FBbt:0003154, which is FBbt's term for the fly heart.
At build time, xrefs are extracted by a custom script that turns them into
bridge files, which contains OBO materialisations of the mappings, in the
form of equivalence axioms (such as an axiom stating that FBbt's ``heart''
is equivalent to a Uberon ``dorsal vessel heart'' that is part of the
\emph{Drosophila melanogaster} taxon).
While the process looks superficially simple (though it is, in fact, quite
complex, it's just that most of the complexity is hidden behind that
script), it is also severely limited.
The limitations stem both from the way the mappings are represented, and the
way they are transformed into bridge files.}
\end{frame}
\begin{frame}
\frametitle{Issues with mappings maintained as cross-references}
\begin{block}{Mappings embedded within the ontology}
\begin{itemize}
\item Not available separately
\end{itemize}
\end{block}
\begin{block}{No additional data}
\begin{itemize}
\item All the cross-references say is that the mappings exist
\end{itemize}
\end{block}
\begin{block}{Meaningless mapping predicate}
\begin{itemize}
\item Overloading of the \emph{oboInOwl:hasDbXref} property
\item Workaround: {\ttfamily\scriptsize treat-xrefs-as-reverse-genus-differentia FBbt NCBITaxon:7227}
\begin{itemize}
\item Assigns a meaning to \emph{all} cross-references between FBbt and Uberon
\end{itemize}
\end{itemize}
\end{block}
\note{The one benefit of using xrefs to represent mappings is that it is a
very simple method, but that simplicity comes at a high cost.
First, the mappings are embedded within the ontology and as such are not
independently available -- they need to be forcefully extracted whenever
someone needs them. This makes the mappings more difficult to find, access,
and reuse than it should be.
Second, the cross-reference is telling us that there is a mapping, but we
can't know anything else about that mapping. Why does that mapping exist?
Who asserted it? When? On what basis? This is simply not recorded.
Third, the \emph{oboInOwl:hasDbXref} annotation has been used for a lot of
things since its introduction, and no longer has a clear and unambiguous
meaning -- what it means depends on where it is used. At best, it indicates
that two terms are mapped \emph{somehow}, but the nature of the relation is
unspecified.
In Uberon we can circumvent that last issue by using supplementary
ontology-level annotations to provide meaning to some cross-references. But
this is more a workaround than a proper solution, and it comes with problems
of its own.}
\end{frame}
\begin{frame}
\frametitle{Issues with the bridge generation process}
\begin{columns}
\begin{column}{.4\textwidth}
\begin{center}
\begin{tikzpicture}
\node [File] (uberon) at (0,0) {uberon.obo};
\node [File] (script) at (0,-1) {make-bridge-ontology-from-xrefs.pl};
\node [File] (bridge) at (0,-2) {uberon-bridge-to-fbbt.obo};
\node [File] at (0,-2.5) {uberon-bridge-to-wbbt.obo};
\node [File] at (0,-3) {uberon-bridge-to-ma.obo};
\node [File] at (0,-3.5) {uberon-bridge-to-....obo};
\draw[->] (uberon) -- (script) (script) -- (bridge);
\end{tikzpicture}
\end{center}
\end{column}
\begin{column}{.55\textwidth}
\begin{itemize}
\item Lack of flexibility
\item Maintenance nightmare
\item Dependent on the OBO format
\item Highly Uberon-specific
\end{itemize}
\end{column}
\end{columns}
\note{The bridge files are generated by this custom Perl script, which reads
the Uberon ontology file and generates one bridge file for each
taxon-specific ontology. That script is plagued with several issues.
The process is very rigid. For example, it is impossible to generate
only one bridge file on-demand. Almost everything is hardcoded in the
script, making it difficult to change anything whenever needed.
The script is difficult to maintain for most Uberon editors and developers,
in no small part because it conflates the technical aspects of extracting
the cross-references and generating the bridges, and the logical aspects of
determining which bridging axioms should be produced.
It's a smaller issue, but the script also only works with the OBO files,
both for its input and its output.
But the main problem is that the bridge generation script is highly specific
to Uberon. It is almost impossible to reuse its logic elsewhere. Yet the
need for integrating several ontologies together, using mappings as glue to
connect terms between those ontologies, is certainly not a Uberon-specific
need.}
\end{frame}
\begin{frame}
\frametitle{Goals for a new approach}
\begin{enumerate}
\item Treat cross-ontology mappings as proper entities
\begin{itemize}
\item Mappings set made available as independent artefact
\end{itemize}
\item Represent mappings using a standard format intended for that purpose
\begin{itemize}
\item allowing for mapping-specific metadata
\item allowing for meaningful mapping predicates
\end{itemize}
\item Make it easy to derive bridging axioms from mappings
\begin{itemize}
\item without requiring Uberon-specific tooling
\end{itemize}
\end{enumerate}
\note{In response to the limits of the previous system, we set out to design
a new system, with the following aims:
First, we want the cross-ontology mappings to be treated as first-class
objects, similarly to the terms of the ontology, rather than merely
annotations on the terms. They should in fact be treated as the source
artefact from which the bridge files are derived. As with other source
artefacts, they must be published on their own and be independently findable.
Second, we want to store the mappings using a format that is both standard
and specifically intended for representing mappings. That format should make
it possible to store mapping-specific metadata, and to give precise meaning
to every individual mapping (everything that is not possible with the
cross-reference format).
Lastly, we want a bridge generation process that is much simpler than the
current one, and that is not tied to Uberon in any way, so that it can be
reused by other projects that have a similar need for integrating several
ontologies together.}
\end{frame}
\section{Implementation of a new approach}
\subsection{New representation of mappings}
\begin{frame}
\frametitle{The SSSOM standard}
``Simple Standard for Sharing Ontological Mappings''
\begin{itemize}
\item A data model to represent semantic mappings and associated metadata
\item A simple TSV-based format to store and exchange them
\end{itemize}
\begin{block}{SSSOM/TSV format}\tiny\ttfamily
\begin{tabular}{lllll}
\hline
subject\_id & predicate\_id & object\_id & mapping\_justification & mapping\_date\\
\hline
FBbt:00000001 & semapv:crossSpeciesExactMatch & UBERON:0000468 & semapv:ManualMappingCuration & 2024-06-01\\
FBbt:00000002 & semapv:crossSpeciesExactMatch & UBERON:6000002 & semapv:ManualMappingCuration & 2024-06-01\\
... & ... & ... & ... & ...\\
\textcolor<2>{red}{FBbt:00003154} & \textcolor<2>{red}{semapv:crossSpeciesExactMatch} & \textcolor<2>{red}{UBERON:0015230} & \textcolor<3>{red}{semapv:ManualMappingCuration} & \textcolor<3>{red}{2024-06-01}\\
... & ... & ... & ... & ...\\
\hline
\end{tabular}
\end{block}
\begin{overlayarea}{\textwidth}{1cm}
\only<2>{Mapping proper: subject -- predicate -- object triple}
\only<3>{Mapping metadata}
\end{overlayarea}
\note<1>{Over the past three years, independently of Uberon, a new standard
has been devised to work with semantic mappings: the Simple Standard for
Sharing Ontological Mappings.
The SSSOM standard defines a common data model to represent and manipulate
mappings in programming languages. The model notably allows to capture a
rich set of metadata about each mapping.
The standard also defines several serialisation formats to store and
exchange mappings, the most important of which being the ``SSSOM/TSV''
format, intended to make reusing mappings across resources as easy as
possible.
You see here an very basic example of what a mapping set in SSSOM/TSV format
looks like: one mapping per line, each mapping consisting, at its core, of a
triple made of a subject, an object, and a mapping predicate. That core is
associated with several metadata.}
\end{frame}
\begin{frame}
\frametitle{Meaningful predicates for cross-species mappings}
Additions to the \emph{Semantic Mapping Vocabulary} (SEMAPV)
\begin{center}
\begin{tikzpicture}
\graph[tree layout,nodes={Term},edges={Relation}] {
"skos:mappingRelation" <- {
"semapv:isomorphicMatch" <- {
"skos:exactMatch",
"semapv:crossSpeciesExactMatch" [fill=GenericHighlight]
},
"semapv:nonIsomorphicMatch" <- {
"skos:broadMatch" <- "semapv:crossSpeciesBroadMatch" [fill=GenericHighlight],
"skos:narrowMatch" <- "semapv:crossSpeciesNarrowMatch" [fill=GenericHighlight]
}
};
};
\end{tikzpicture}
\end{center}
\emph{crossSpeciesExactMatch}: ``a match where the subject is considered analogous
to the object in a different taxonomic grouping''
\note{One of the benefits of the SSSOM model is that it allows to specify
an explicit mapping predicate, so as to give a precise meaning to each
mapping. It is then possible to distinguish, for example between mappings of
terms that represent exactly the same concept in two different vocabularies,
and mappings of terms that represent \emph{similar}, but not exactly
identical, concepts.
The authors of the SSSOM standard have compiled a dedicated vocabulary, the
\emph{Semantic Mapping Vocabulary}, to host, among other things, various
mapping predicates that can be used to qualify mappings. In that vocabulary,
as part of this project, we have added a handful of predicates to
specifically represent \emph{cross-species mappings}.
The most important being \emph{crossSpeciesExactMatch}, which is intended
for mappings between terms that represent the same concept, modulo a
taxonomic restriction. We can use that predicate to explicitly say that the
FBbt term for the fly heart represents exactly the same concept as the
Uberon term for dorsal vessel heart, \emph{except} for the fact that the
former is specific to flies.}
\end{frame}
\subsection{New bridge generation process}
\begin{frame}[t]
\frametitle{From SSSOM sets to bridge files: SSSOM/T-OWL}
A domain-specific language to transform SSSOM mappings into OWL axioms
\begin{block}{Model of a mapping processing rule}
\begin{center}
\begin{tikzpicture}
\node [text width=5cm,fill=GenericItem,draw] (filter) at (0,0)
{\textbf{Filter}\\
Mapping $\rightarrow$ boolean\\
Decide whether the rule applies to a mapping};
\node [text width=5cm,fill=GenericItem,draw] (preprocessor) at (6,1)
{\textbf{Preprocessor}\\
Mapping $\rightarrow$ mapping\\
Modify a mapping on the fly};
\node [text width=5cm,fill=GenericItem,draw] (generator) at (6,-1)
{\textbf{Generator}\\
Mapping $\rightarrow$ OWL axiom\\
Derive an axiom from a mapping};
\draw[->] (filter) -- (preprocessor);
\draw[->] (filter) -- (generator);
\end{tikzpicture}
\end{center}
\end{block}
\note{Now that we can store mappings in a dedicated format, along with their
metadata and with meaningful mapping predicates, we need a way to use those
mappings when we want to integrate several ontologies together. And, as per
our third goal, we want to do that in a way that is, as much as possible,
not specific to Uberon, but reusable by other projects as well.
For that, we have designed, and we propose, a small domain-specific language
specifically intended to process SSSOM mappings and turn them into arbitrary
OWL axioms. We call that language \emph{SSSOM/T-OWL}, or ``SSSOM Transform
to OWL''.
The core concept of SSSOM/T-OWL is the mapping processing rule, which is
made of two elements.
First, a filter that takes a mapping and returns a boolean value, indicating
whether the rule should apply to that mapping or not; the filter can examine
all the metadata of a mapping to make its decision.
Second, either a preprocessor that takes a mapping and returns a modified
version of that mapping (allowing to modify a mapping on the fly), or a
generator that takes a mapping and returns an OWL axiom.}
\end{frame}
\begin{frame}
\frametitle{From SSSOM sets to bridge files: SSSOM/T-OWL}
Examples of SSSOM/T-OWL rules
\begin{overlayarea}{\textwidth}{3cm}
\only<1-2| handout:1>{
\begin{block}{Inverting a mapping}\ttfamily\scriptsize
\textcolor<1>{red}{subject==UBERON:*} -> \textcolor<2>{red}{invert()};
\end{block}
}
\only<3-4| handout:2>{
\begin{block}{Generating a simple axiom}\ttfamily\scriptsize
\textcolor<3>{red}{predicate==skos:broadMatch} -> \textcolor<4>{red}{create\_axiom("\%subject\_id SubClassOf: \%object\_id")};
\end{block}
}
\only<5-10| handout:3>{
\begin{block}{Generating a more complex axiom}\ttfamily\scriptsize
\textcolor<5>{red}{subject==FBbt:*}\\
\hspace{1cm} \&\& \textcolor<6>{red}{predicate==semapv:crossSpeciesExactMatch}\\
\hspace{1cm} \&\&
(\textcolor<7>{red}{mapping\_justification==semapv:ManualMappingCuration}
\textcolor<8>{red}{||} \textcolor<9>{red}{confidence>=0.8})\\
-> \textcolor<10>{red}{create\_axiom("\%subject\_id EquivalentTo: \%object\_id and\\
\hspace{2cm} (BFO:0000050 some NCBITaxon:7227"))};
\end{block}
}
\end{overlayarea}
\begin{overlayarea}{\textwidth}{2.5cm}
\only<1-2| handout:1>{\textcolor<1>{red}{Select mappings with a UBERON subject}
and \textcolor<2>{red}{flip them around the predicate}}
\only<3-4| handout:2>{\textcolor<3>{red}{Select mappings with a \emph{skos:broadMatch} predicate}
and \textcolor<4>{red}{create a \emph{SubClassOf} axiom}}
\only<5-10| handout:3>{Select mappings
\begin{itemize}
\item \textcolor<5>{red}{with a FBbt subject}
\item \textcolor<6>{red}{with a \emph{semapv:crossSpeciesExactMatch} predicate}
\item that have been \textcolor<7>{red}{manually curated} \textcolor<8>{red}{or} have a \textcolor<9>{red}{confidence higher than 80\%}
\end{itemize}
and \textcolor<10>{red}{create an equivalence axiom with a taxonomic restriction}}
\end{overlayarea}
\note<1>{To illustrate, here are some examples of SSSOM/T-OWL rules.
First, a simple rule with a simple filter that selects all mappings where
the subject is a Uberon term, and applies an inversion preprocessor to them.
Inversion flips a mapping around the predicate, so that the subject becomes
the object and the other way round. So after applying this rule, all
mappings with a Uberon subject in the set will become mappings with a Uberon
object.
Second, a rule with an equally simple filter that selects all mappings that
use a \emph{skos:broadMatch} predicate, and generate from them a SubClassOf
axiom that states that the subject term is a subclass of the object term
(which represents a broader concept).
Third, a more complex rule, with a compound filter that selects mappings
that: (1) have a FBbt subject, (2) use a \emph{crossSpeciesExactMatch}
predicate, (3) have either been manually curated, or have a confidence level
higher than 80\%. When such a mapping is found, the rule creates an
equivalence axiom stating that the subject (the FBbt term) is equivalent to
the object that is part of the \emph{Drosophila melanogaster} taxon.}
\end{frame}
\begin{frame}
\frametitle{Adding SSSOM/T-OWL support to ROBOT}
\begin{block}{Prerequisite step}
Support for ``pluggable commands'' in ROBOT (since version 1.9.5)
\end{block}
\begin{block}{sssom:inject command}
Apply \textcolor<2>{red}{SSSOM/T-OWL} rules to \textcolor<3>{red}{a
mapping set} and inject the resulting axioms into the
\textcolor<4>{red}{current ontology}
\medskip
{\ttfamily\scriptsize
robot sssom:inject -i \textcolor<4>{red}{input.owl} --sssom \textcolor<3>{red}{mappings.sssom.tsv} --ruleset \textcolor<2>{red}{bridge.rules}
}
\end{block}
\note<1>{With the SSSOM/T-OWL language fully designed, we then needed to
actually implement it. We chose to add support for SSSOM/T-OWL to ROBOT,
since this is the main tool we use for most of our pipelines.
The first step was to add plugin support for ROBOT -- that is, the
possibility to add third-party commands to ROBOT. This is so that we could
quickly add new commands, such as SSSOM-related commands, to ROBOT without
being tied by ROBOT's release cycle. This was done with version 1.9.5 of
ROBOT, released at the end of Summer 2023.
We then developed a SSSOM plugin for ROBOT, which among other things,
provides a command that implement our SSSOM/T-OWL language. That command
will read a set of SSSOM/T-OWL rules and apply them to a set of mappings,
thereby producing bridging axioms which will then be injected into the
ontology that is currently being manipulated by ROBOT.}
\end{frame}
\begin{frame}
\frametitle{New SSSOM-based integration pipeline}
\begin{columns}
\begin{column}{.45\textwidth}
\begin{center}
\begin{tikzpicture}[node distance=.5cm]
\node [Entity] (uberon) {Uberon};
\node [Entity,right=of uberon,alt=<2>{fill=GenericHighlight}{}] (com) {Mapping sets};
\node [Entity,right=of com,align=left,alt=<3>{fill=GenericHighlight}{}] (rules) {SSSOM/T-OWL\\
ruleset};
\node [Process,below=of com,alt=<4>{fill=GenericHighlight}{}] (inject) {sssom:inject};
\node [Process,below=of inject,alt=<5>{fill=GenericHighlight}{}] (merge) {merge};
\node [Entity,right=of merge,align=left] (ssao) {Taxon-specific\\
ontologies};
\node [Entity,below=of merge] (cm) {Multi-species ontology};
\draw [->] (com) -- (inject) (inject) -- (merge) (merge) -- (cm);
\draw [->] (rules.south) |- (inject.east);
\draw [->] (uberon.south) |- (inject.west);
\draw [->] (ssao.west) -- (merge.east);
\end{tikzpicture}
\end{center}
\end{column}
\begin{column}{.45\textwidth}
\begin{enumerate}
\item Maintaining (and collecting) mappings as \textcolor<2>{red}{SSSOM sets}
\item Maintaining the integration logic as \textcolor<3>{red}{SSSOM/T-OWL rules}
\item Generating the bridging axioms through \textcolor<4>{red}{SSSOM/T-OWL support in ROBOT}
\item \textcolor<5>{red}{Merging in} the taxon-specific ontologies
\end{enumerate}
\end{column}
\end{columns}
\note<1>{This leads us to our new, SSSOM-based integration pipeline for
Uberon.
The mappings are now maintained separately from Uberon itself, as SSSOM
mapping sets.
The integration logic, which decides how the bridging axioms should be
generated, is also maintained separately, as a large ruleset written in the
SSSOM/T-OWL language. It contains rules similar to the ``complex'' example
of rule shown previously.
At build time, we use our SSSOM ROBOT plugin to apply that ruleset to our
mappings, and inject the resulting bridging axioms into Uberon.
Then, we just need to merge the taxon-specific ontologies into the result of
the previous step, producing the multi-species ontology that we want.
Overall, that pipeline is both more robust, as most of the code is
consolidated in the ROBOT plugin, and more flexible, as the decision logic
is easily modifiable in the SSSOM/T-OWL ruleset.}
\end{frame}
\section{Conclusion}
\begin{frame}
\frametitle{Benefits of the new approach}
\begin{block}{Cross-ontology mappings as first-class artefacts}
\begin{itemize}
\item With their own metadata
\item Can be maintained separately
\item Available as a separate release product in a standard format
\end{itemize}
\end{block}
\begin{block}{Generic framework for SSSOM-based integration of multiple ontologies}
\begin{itemize}
\item SSSOM/T-OWL is completely independent of Uberon
\item Only the ruleset is specific
\end{itemize}
\end{block}
\note{Beyond the more robust and more flexible pipeline, which is mostly of
interest for Uberon editors and developers, the new approch has broader
benefits.
The mappings between Uberon and the taxon-specific ontologies are now
first-class artefacts. They can have their own metadata, allowing for
traceability and accountability of each individual mapping statement, and
their own mapping predicate. They can be maintained separately from the
Uberon ontology itself, and we can even delegate the maintenance of some of
them to the taxon-specific ontologies themselves (something we have already
started doing for the mappings with FBbt, which are now maintained by
FlyBase rather than by Uberon). They are available as a distinct release
product, which is findable on its own, in a format that is still relatively
new but that we hope will become the standard format for semantic mappings.
And beyond Uberon, we now have a generic framework to integrate multiple
ontologies with SSSOM-maintained mappings. The SSSOM/T-OWL language, and its
implementation in the SSSOM ROBOT plugin, has no ties whatsoever to Uberon
and can be reused by any project. Only the SSSOM/T-OWL rules that we use are
tailored to the needs of Uberon.}
\end{frame}
\begin{frame}[fragile]
\frametitle{Example of reuse: FBbt/Uberon integration test}
A single ROBOT command to \textcolor<1>{red}{merge FBbt with Uberon},
\textcolor<2>{red}{inject the mappings-derived bridging axioms}, and
\textcolor<3>{red}{check for possible inconsistencies} that would reveal
incorrect mappings:
\begin{block}{}
\begin{semiverbatim}
robot \textcolor<1>{red}{merge} -i \textcolor<1>{red}{fbbt.owl} -i \textcolor<1>{red}{uberon.owl} \\
\textcolor<2>{red}{sssom:inject} --sssom \textcolor<2>{red}{mappings.sssom.tsv} \\
--ruleset \textcolor<2>{red}{bridging.rules} \\
\textcolor<3>{red}{reason} -r ELK
\end{semiverbatim}
\end{block}
\note<1>{Lastly, here is an example to illustrate some of the benefits of
reusable integration logic.
Because FBbt and Uberon are developed separately, there is always a risk
that, whenever something changes in either FBbt or Uberon, an inter-ontology
inconsistency is introduced. Because the logic to integrate FBbt into
Uberon was impossible to reuse outside of Uberon, it was difficult for FBbt
editors to detect and prevent such inconsistencies whenever we edited the
ontology.
But with the SSSOM approach, such testing can be done with a single ROBOT
command in three steps, as shown here: (1) we merge the current versions of
FBbt and Uberon; (2) we derive OWL axioms from the FBbt-to-Uberon mappings
and inject them into the merged ontology; (3) we reason over the merged
ontology to detect inconsistencies. This takes but a couple of minutes, and
that check is now part of our test suite in FBbt. Thanks to this approach,
over the past two years we have rooted out all existing inconsistencies
between FBbt and Uberon.}
\end{frame}
\begin{frame}
\frametitle{About}
\begin{block}{Acknowledgements}
\begin{itemize}
\item Grant BB/T014008 from BBSRC (UK) and NSF/BIO (US)
\item Mapping Commons project for the SSSOM standard
\end{itemize}
\end{block}
\begin{block}{\includegraphics[scale=.26]{ccby}\hspace{.2em}%
2024 Damien Goutte-Gattat \texttt{<[email protected]>}}
This presentation is released under the Creative Commons Attribution
4.0 International License (CC BY 4.0).
\end{block}
\begin{block}{Revision}\ttfamily\scriptsize
\input{revision.tex}
\end{block}
\end{frame}
\appendix
\begin{frame}[noframenumbering]
\frametitle{Questions?}
\end{frame}
\end{document}
% vim: tw=78 et ai ts=2 st=2 sw=2