Update terms and template docs (#90)

* Remove another deprecated modelSystemStrainNomenclature * Update schema csv and jsonld Add to modelSystemName, make dataType required, rewording some descriptions, add backend class notation, range and editor note (mainly for docs) * Update ext_classes * Add documentation functions and sections, regenerate * Revise new extended classes
nf-osi · Dec 8, 2021 · 9f2c485 · 9f2c485
1 parent fe90b8e
commit 9f2c485
Show file tree

Hide file tree

Showing 14 changed files with 1,568 additions and 769 deletions.
diff --git a/NF.csv b/NF.csv
diff --git a/NF.jsonld b/NF.jsonld
@@ -5137,7 +5137,7 @@
             "rdfs:label": "SporadicSchwannoma",
             "rdfs:subClassOf": [
                 {
-                    "@id": "schema:Thing"
+                    "@id": "bts:Diagnosis"
                 }
             ],
             "schema:isPartOf": {
@@ -5991,7 +5991,7 @@
                 }
             ],
             "sms:displayName": "dataType",
-            "sms:required": "sms:false",
+            "sms:required": "sms:true",
             "sms:validationRules": []
         },
         {
@@ -9817,6 +9817,74 @@
             "sms:required": "sms:false",
             "sms:validationRules": []
         },
+        {
+            "@id": "bts:Sc93.1",
+            "@type": "rdfs:Class",
+            "rdfs:comment": "TBD",
+            "rdfs:label": "Sc93.1",
+            "rdfs:subClassOf": [
+                {
+                    "@id": "bts:ModelSystemName"
+                }
+            ],
+            "schema:isPartOf": {
+                "@id": "http://schema.biothings.io"
+            },
+            "sms:displayName": "Sc93.1",
+            "sms:required": "sms:false",
+            "sms:validationRules": []
+        },
+        {
+            "@id": "bts:JH-2-002-CL",
+            "@type": "rdfs:Class",
+            "rdfs:comment": "Cell line derived from JHU biobank individual",
+            "rdfs:label": "JH-2-002-CL",
+            "rdfs:subClassOf": [
+                {
+                    "@id": "bts:ModelSystemName"
+                }
+            ],
+            "schema:isPartOf": {
+                "@id": "http://schema.biothings.io"
+            },
+            "sms:displayName": "JH-2-002-CL",
+            "sms:required": "sms:false",
+            "sms:validationRules": []
+        },
+        {
+            "@id": "bts:JH-2-079-CL",
+            "@type": "rdfs:Class",
+            "rdfs:comment": "Cell line derived from JHU biobank individual",
+            "rdfs:label": "JH-2-079-CL",
+            "rdfs:subClassOf": [
+                {
+                    "@id": "bts:ModelSystemName"
+                }
+            ],
+            "schema:isPartOf": {
+                "@id": "http://schema.biothings.io"
+            },
+            "sms:displayName": "JH-2-079-CL",
+            "sms:required": "sms:false",
+            "sms:validationRules": []
+        },
+        {
+            "@id": "bts:JH-2-103-CL",
+            "@type": "rdfs:Class",
+            "rdfs:comment": "Cell line derived from JHU biobank individual",
+            "rdfs:label": "JH-2-103-CL",
+            "rdfs:subClassOf": [
+                {
+                    "@id": "bts:ModelSystemName"
+                }
+            ],
+            "schema:isPartOf": {
+                "@id": "http://schema.biothings.io"
+            },
+            "sms:displayName": "JH-2-103-CL",
+            "sms:required": "sms:false",
+            "sms:validationRules": []
+        },
         {
             "@id": "bts:ModelSystemName",
             "@type": "rdfs:Class",
@@ -9967,10 +10035,10 @@
                     "@id": "bts:KCL025"
                 },
                 {
-                    "@id": "bts:5PNFTDiPSsvPM6"
+                    "@id": "bts:5PNFTdiPSsvPM6"
                 },
                 {
-                    "@id": "bts:5PNFTDiPSsvMM4"
+                    "@id": "bts:5PNFTdiPSsvMM4"
                 },
                 {
                     "@id": "bts:7PNFSiPSrvPM12"
@@ -10016,6 +10084,18 @@
                 },
                 {
                     "@id": "bts:S462.TY"
+                },
+                {
+                    "@id": "bts:Sc93.1"
+                },
+                {
+                    "@id": "bts:JH-2-002-CL"
+                },
+                {
+                    "@id": "bts:JH-2-079-CL"
+                },
+                {
+                    "@id": "bts:JH-2-103-CL"
                 }
             ],
             "sms:displayName": "modelSystemName",
@@ -10201,9 +10281,6 @@
                 {
                     "@id": "bts:ModelSystemName"
                 },
-                {
-                    "@id": "bts:ModelSystemStrainNomenclature"
-                },
                 {
                     "@id": "bts:GenePerturbationType"
                 },
@@ -11472,7 +11549,7 @@
         {
             "@id": "bts:ReadLength",
             "@type": "rdfs:Class",
-            "rdfs:comment": "TBD",
+            "rdfs:comment": "Number of base pairs (bp) sequenced for a read",
             "rdfs:label": "ReadLength",
             "rdfs:subClassOf": [
                 {
@@ -11486,6 +11563,23 @@
             "sms:required": "sms:false",
             "sms:validationRules": []
         },
+        {
+            "@id": "bts:ReadDepth",
+            "@type": "rdfs:Class",
+            "rdfs:comment": "Average number of reads obtained",
+            "rdfs:label": "ReadDepth",
+            "rdfs:subClassOf": [
+                {
+                    "@id": "bts:Ngs"
+                }
+            ],
+            "schema:isPartOf": {
+                "@id": "http://schema.biothings.io"
+            },
+            "sms:displayName": "readDepth",
+            "sms:required": "sms:false",
+            "sms:validationRules": []
+        },
         {
             "@id": "bts:Bulkcell",
             "@type": "rdfs:Class",
@@ -14661,7 +14755,7 @@
         {
             "@id": "bts:ProgressReportNumber",
             "@type": "rdfs:Class",
-            "rdfs:comment": "Indicates which milestone you're annotating data files for. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the \"progress report\" timeline. An example: if submitting data for your 6-month milestone report for NTAP, progressReportNumber=1.  Also if you are submitting data associated with your first milestone, progressReportNumber =1",
+            "rdfs:comment": "Indicates milestone the  data is associated with. Currently only required for projects funded by NTAP, GFF, and NFRI. For GFF studies, this is the ‘progress report’ timeline. Example: if submitting data for the 6-month milestone report for NTAP, progressReportNumber=1.  Also if submitting data associated with first milestone, progressReportNumber =1",
             "rdfs:label": "ProgressReportNumber",
             "rdfs:subClassOf": [
                 {
@@ -14861,7 +14955,7 @@
         {
             "@id": "bts:SpecimenPreparationMethod",
             "@type": "rdfs:Class",
-            "rdfs:comment": "Term that represents the method used to preserve the sample before preparing the sample for sequencing",
+            "rdfs:comment": "Term that represents preservation of the sample before usage in, e.g. sequencing",
             "rdfs:label": "SpecimenPreparationMethod",
             "rdfs:subClassOf": [
                 {
@@ -15984,70 +16078,70 @@
             "sms:validationRules": []
         },
         {
-            "@id": "bts:+/?",
+            "@id": "bts:5PNFTdiPSsvPM6",
             "@type": "rdfs:Class",
             "rdfs:comment": "TBD",
-            "rdfs:label": "+/?",
+            "rdfs:label": "5PNFTdiPSsvPM6",
             "rdfs:subClassOf": [
                 {
-                    "@id": "bts:Nf2Genotype"
+                    "@id": "bts:ModelSystemName"
                 }
             ],
             "schema:isPartOf": {
                 "@id": "http://schema.biothings.io"
             },
-            "sms:displayName": "+/?",
+            "sms:displayName": "5PNF_TdiPSsv_PM_6",
             "sms:required": "sms:false",
             "sms:validationRules": []
         },
         {
-            "@id": "bts:-/?",
+            "@id": "bts:5PNFTdiPSsvMM4",
             "@type": "rdfs:Class",
             "rdfs:comment": "TBD",
-            "rdfs:label": "-/?",
+            "rdfs:label": "5PNFTdiPSsvMM4",
             "rdfs:subClassOf": [
                 {
-                    "@id": "bts:Nf2Genotype"
+                    "@id": "bts:ModelSystemName"
                 }
             ],
             "schema:isPartOf": {
                 "@id": "http://schema.biothings.io"
             },
-            "sms:displayName": "-/?",
+            "sms:displayName": "5PNF_TdiPSsv_MM_4",
             "sms:required": "sms:false",
             "sms:validationRules": []
         },
         {
-            "@id": "bts:ModelSystemStrainNomenclature",
+            "@id": "bts:+/?",
             "@type": "rdfs:Class",
             "rdfs:comment": "TBD",
-            "rdfs:label": "ModelSystemStrainNomenclature",
+            "rdfs:label": "+/?",
             "rdfs:subClassOf": [
                 {
-                    "@id": "bts:GenomicsAssay"
+                    "@id": "bts:Nf2Genotype"
                 }
             ],
             "schema:isPartOf": {
                 "@id": "http://schema.biothings.io"
             },
-            "sms:displayName": "modelSystemStrainNomenclature",
+            "sms:displayName": "+/?",
             "sms:required": "sms:false",
             "sms:validationRules": []
         },
         {
-            "@id": "bts:ReadDepth",
+            "@id": "bts:-/?",
             "@type": "rdfs:Class",
             "rdfs:comment": "TBD",
-            "rdfs:label": "ReadDepth",
+            "rdfs:label": "-/?",
             "rdfs:subClassOf": [
                 {
-                    "@id": "bts:GenomicsAssay"
+                    "@id": "bts:Nf2Genotype"
                 }
             ],
             "schema:isPartOf": {
                 "@id": "http://schema.biothings.io"
             },
-            "sms:displayName": "readDepth",
+            "sms:displayName": "-/?",
             "sms:required": "sms:false",
             "sms:validationRules": []
         },

diff --git a/docs/docTemplate.R b/docs/docTemplate.R
@@ -0,0 +1,40 @@
+#' Function to generate template documentation
+#' 
+#' Creates one row per property and selected informational columns for:
+#' - marginality (required vs. recommended vs. optional; in our case, recommended/optional collapsed to optional)
+#' - controlled values / constraints on fields 
+#' - cardinality (one or many values allowed) *currently omitted, see additional notes
+#' Example related resources for what this can look like:
+#' 1. https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE
+#' 2. https://fairplus.github.io/the-fair-cookbook/content/recipes/interoperability/transcriptomics-metadata.html#assay-metadata
+#' 3. https://www.immport.org/shared/templateDocumentation?tab=1&template=bioSamples.txt
+#' 4. https://lincsproject.org/LINCS/files//2020_exp_meta_stand/General_Proteomics.pdf
+#' Marginality is mentioned in all examples.
+#' CV is mentioned for #1,2,3.
+#' Cardinality is mentioned in #1 only, so it's not prioritized.
+docTemplate <- function(schema, savedir = "templates/") {
+    templates <- schema %>%
+      filter(Root == "Template" & SubclassOf != "") %>%
+      select(ID, DependsOn)
+    for(template in templates$ID) {
+      fields <- schema %>%
+        filter(template == ID) %>% 
+        pull(DependsOn) %>%
+        strsplit(split = ", ?") %>% 
+        unlist()
+      index <- match(fields, schema$Attribute)
+      # ControlledVocab col is handled specially and is derived from the Range col
+      # Range is either filled with a class or blank, where blank means free text or Boolean values
+      # Bools are "controlled vocabulary" vs. true ontology terms
+      range <- dplyr::if_else(schema[index, "Range"] != "", paste0("#", schema[index, "Range"]), schema[index, "Valid.Values"])
+      template_tab <- data.frame(Field = fields,
+                                 Description = schema[index, "Description"],
+                                 Required = ifelse(schema[index, "Required"], "required", "optional"),
+                                 ControlledVocab = range,
+                                # Cardinality = schema[index, "Cardinality"],
+                                Note = schema[index, "EditorNote"])
+      write.csv(template_tab, file = paste0(savedir, template, ".csv"), row.names = F) 
+  }
+}
+
+