Skip to content

Commit

Permalink
Patch/docs (#278)
Browse files Browse the repository at this point in the history
* Update template docs

* Refactor docs code and update templates

* Rename a module file

* Add Dockerfile for doc-builder image

* Clean up some dependencies and imports

* Add workflow for docs

* Add workflow comment

* Build jsonld

---------

Co-authored-by: nf-osi[bot] <[email protected]>
  • Loading branch information
anngvu and nfosi-service authored Apr 12, 2023
1 parent f3ced33 commit f0fad92
Show file tree
Hide file tree
Showing 28 changed files with 4,278 additions and 1,360 deletions.
45 changes: 45 additions & 0 deletions .github/workflows/publish-docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: Build and publish docs to GH Pages

on:
push:
branches:
- main

# TODO setup conditional to build but not push
#pull_request:
# branches:
# - main

jobs:

build-and-publish:
runs-on: ubuntu-latest

permissions:
contents: read
pages: write
id-token: write

environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Setup GH Pages
uses: actions/configure-pages@v3

- name: Build using docker
run: |
docker run -v $(pwd):/app ghcr.io/nf-osi/data-model-docs
- name: Upload artifact
uses: actions/upload-pages-artifact@v1
with:
path: docs

- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v2
64 changes: 32 additions & 32 deletions NF.csv

Large diffs are not rendered by default.

39 changes: 22 additions & 17 deletions NF.jsonld
Original file line number Diff line number Diff line change
Expand Up @@ -7558,6 +7558,28 @@
],
"sms:validationRules": []
},
{
"@id": "bts:Particlecharacterization",
"@type": "rdfs:Class",
"rdfs:comment": "A series of analytical methods that provide information about entities such as composition, structure and defects. ",
"rdfs:label": "Particlecharacterization",
"rdfs:subClassOf": [
{
"@id": "bts:DataType"
}
],
"schema:isPartOf": {
"@id": "http://schema.biothings.io"
},
"sms:displayName": "particle characterization",
"sms:required": "sms:false",
"sms:requiresDependency": [
{
"@id": "bts:Assay"
}
],
"sms:validationRules": []
},
{
"@id": "bts:Kinomics",
"@type": "rdfs:Class",
Expand Down Expand Up @@ -21519,23 +21541,6 @@
"sms:required": "sms:false",
"sms:validationRules": []
},
{
"@id": "bts:Particlecharacterization",
"@type": "rdfs:Class",
"rdfs:comment": "TBD",
"rdfs:label": "Particlecharacterization",
"rdfs:subClassOf": [
{
"@id": "bts:DataType"
}
],
"schema:isPartOf": {
"@id": "http://schema.biothings.io"
},
"sms:displayName": "particle characterization",
"sms:required": "sms:false",
"sms:validationRules": []
},
{
"@id": "bts:TPM",
"@type": "rdfs:Class",
Expand Down
9 changes: 9 additions & 0 deletions docs/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
FROM ghcr.io/nf-osi/nfportalutils:develop

WORKDIR /app

RUN apt-get -yq install pandoc

RUN R -e "install.packages(c('rmarkdown', 'reactable', 'visNetwork'), repos='http://cran.rstudio.com/')"

ENTRYPOINT ["/bin/bash", "R", "-e", "rmarkdown::render('docs/index.Rmd')"]
74 changes: 49 additions & 25 deletions docs/docTemplate.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,48 +15,72 @@
#'
#' Currently, schematic templates allow modeling more on the simplistic side and
#' don't formally express all these, so only a few are checked.
#' Moreover, the jsonld version encodes much less information than the csv version
#' (jsonld conversion loses custom metadata in the csv), which is why this currently depends on both formats.
#' Currently, the jsonld version loses some information when translated from the csv source
#' (mainly the summary Range definition corresponding to https://www.w3.org/TR/rdf-schema/#ch_range and EditorNote).
#'
#' @param templates Named vector of templates to process,
#' where names corresponds to id without prefix (currently whatever follows "bts:"),
#' and value is the real internal ID (in .ID).
#' @param schema_csv Schema representation read from `.csv`.
#' @param schema_jsonld Schema path to jsonld file.
#' @param schema Schema list object parsed from a schematic jsonld.
#' @param prefix Namespace prefix.
#' @param savedir Directory where template representations will be outputted.
#' @param verbose Whether to be verbose about what's going on.
docTemplate <- function(templates,
schema_csv,
schema_jsonld = "../NF.jsonld",
savedir = "templates/") {
schema,
prefix = "bts:",
savedir = "templates/",
verbose = TRUE) {


for(x in names(templates)) { # e.g. x <- "GenomicsAssayTemplate"
# For template, parse DependsOn to get all props present in manifest
props <- nfportalutils::get_dependency_from_json_schema(paste0("bts:", x),
schema = schema_jsonld)
prop_ids <- nfportalutils::get_dependency_from_json_schema(paste0(prefix, x),
schema = schema,
return_labels = FALSE)

# Create the ControlledVocab aka Range col for each prop
# ControlledVocab col is handled specially and uses a custom Range col defined in csv
# For CV col we create a link to a class if the term editor has referenced a class in Range,
# else we simply fall back to enumerating the valid values
index <- match(props, schema_csv$Attribute)
range <- dplyr::if_else(schema_csv[index, "Range"] != "",
paste0("#", schema_csv[index, "Range"]),
schema_csv[index, "Valid.Values"])
# The range of prop `assay` is anything of class `Assay` --
# However, the json-ld does not make this so conceptually concise for props, instead listing all possible values
# In the docs, we don't want to enumerate all values and instead want to create a _link_ to a class that defines the range
# To do this, we can infer class by look up the class of the first listed enum for that prop
# The range could also be inferred to be a boolean or string/integer rather than a class
summarize_range <- function(prop_id, schema, return_labels = FALSE) {

enums <- nfportalutils::get_by_prop_from_json_schema(id = prop_id,
prop = "schema:rangeIncludes",
schema = schema,
return_labels = FALSE)
if(is.null(enums)) return("")
if(length(enums) < 5) return(paste(gsub("bts:", "", enums), collapse = ","))
if("bts:Yes" %in% enums) return("Y/N")
enum1 <- enums[1]
# additional lookup class
class <- nfportalutils::get_by_prop_from_json_schema(enum1,
prop = "rdfs:subClassOf",
schema = schema,
return_labels = FALSE)[[1]]
if(length(class) > 1) warning(enum1, " has multiple parent classes")
class <- sub("bts:", "", class[1]) # use first but warn
class <- paste0("#", class)
class
}

template_tab <- data.table(Field = props,
Description = schema_csv[index, "Description"],
Required = ifelse(schema_csv[index, "Required"], "required", "optional"),
ControlledVocab = range,
# Cardinality = schema_csv[index, "Cardinality"],
Note = schema_csv[index, "EditorNote"])
# because of the way schematic imports biothings without us having much control over it some ids can be duplicated (!)
schema <- schema[!duplicated(sapply(schema, function(x) x$`@id`))]
sms <- Filter(function(x) x$`@id` %in% prop_ids, schema)
sms <- lapply(sms, function(x) {
list(Field = x$`sms:displayName`,
Description = if(!is.null(x$`rdfs:comment`)) x$`rdfs:comment` else " ",
Required = if(!is.null(x$`sms:required`)) sub("sms:", "", x$`sms:required`) else "?",
ValidRange = summarize_range(x$`@id`, schema))
})
tt <- rbindlist(sms)

# Sort to show by required, then alphabetically
template_tab <- template_tab[order(-Required, Field), ]
tt <- tt[order(-Required, Field), ]

template_id <- templates[x]
filepath <- paste0(savedir, template_id, ".csv")
write.csv(template_tab, file = filepath, row.names = F)
write.csv(tt, file = filepath, row.names = F)
}
}

Expand Down
7 changes: 3 additions & 4 deletions docs/graph.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
library(visNetwork)
library(tidyverse)

#-------------------------------------------------------------------------------#

Expand All @@ -10,7 +9,7 @@ library(tidyverse)
# schema <- readExtSchema("NF.csv")
readExtSchema <- function(schema_csv, ext_classes_csv = "ext_classes.csv") {
schema <- read.csv(schema_csv) %>%
select(label = Attribute, id = .ID, Root = .Root, SubOf = .SubOf)
dplyr::select(label = Attribute, id = .ID, Root = .Root, SubOf = .SubOf)

# Extended class definitions
ext_classes <- read.csv(ext_classes_csv) %>%
Expand All @@ -32,7 +31,7 @@ getNodesEdges <- function(schema, cluster_root,
font.color = list(A = "white", C = "white"))
) {
cluster <- schema %>%
filter(Root == cluster_root)
dplyr::filter(Root == cluster_root)

# Namespaces for cluster ancestor vs Children
A <- paste(prefix, "A", sep = "_")
Expand Down Expand Up @@ -66,7 +65,7 @@ c2Cluster <- function(cluster_1, cluster_2, connect_by,
# Configure between-cluster relations
relations <- read.csv(ext_relations_csv, header = T)
edges <- relations %>%
filter(property == connect_by)
dplyr::filter(property == connect_by)
relations$color <- viz$color
relations$width <- viz$width

Expand Down
Loading

0 comments on commit f0fad92

Please sign in to comment.