Various improvements:

include dependencies as SoftwareApplication types with name, version, provider publisher-> provider for CRAN/BIOC tidier json output (fewer empty nodes) tidier, more sensible subroutines. cleaner logic for handling pkg="." vs pkg="packagename"
hsonne · May 11, 2017 · 65b4607 · 65b4607
1 parent 6beb4f4
commit 65b4607
Show file tree

Hide file tree

Showing 10 changed files with 317 additions and 168 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -13,10 +13,7 @@ Encoding: UTF-8
 LazyData: true
 RoxygenNote: 6.0.1
 Imports: jsonlite (>= 1.3)
-Suggests: testthat,
-    jsonvalidate,
-    jsonld,
+Suggests: testthat, jsonvalidate, jsonld,
     covr,
-    knitr,
-    rmarkdown
+    knitr, rmarkdown
 VignetteBuilder: knitr
diff --git a/R/codemeta_description.R b/R/codemeta_description.R
@@ -75,29 +75,31 @@ codemeta_description <-  function(descr, id = NULL, codemeta = new_codemeta()){
 read_dcf <- function(pkg) {
 
   ## Takes path to DESCRIPTION, to package root, or the package name as an argument
-  path <- paste(pkg, "DESCRIPTION", sep="/")
-  if(basename(pkg) == "DESCRIPTION")
+  if(basename(pkg) == "DESCRIPTION"){ #so read_dcf can take a dcf file as argument, instead of just a path.
     dcf <- pkg
-  else if(file.exists(path)){
-    dcf <- path
   } else {
-    dcf <- system.file("DESCRIPTION", package = pkg)
+    dcf <- get_file("DESCRIPTION", pkg)
   }
 
   fields <- colnames(read.dcf(dcf))
   as.list(read.dcf(dcf, keep.white = fields)[1, ])
+}
 
-  ## Alternate approach:
-  ## utils::packageDescription assumes package is installed, takes pkg name not path.
-  ## Advantages: Handles encoding, a little handling of Authors@R (actually done by install.packages step)
-  ##descr <- utils::packageDescription(pkg)
-
+## Like system.file, but pkg can instead be path to package root directory
+get_file <- function(FILE, pkg = "."){
+  f <- file.path(pkg, FILE)
+  if(file.exists(f))
+    f
+  else {
+    f <- system.file(FILE, package = pkg)
+  }
 }
 
 
 
 
 
+
 is_IRI <- function(string){
   ## FIXME IRI can be many other things too, see https://github.com/dgerber/rfc3987 for more formal implementation
   grepl("^http[s]?://", string)

diff --git a/R/create_codemeta.R b/R/create_codemeta.R
@@ -0,0 +1,55 @@
+
+#' create_codemeta
+#'
+#' create a codemeta list object in R for further manipulation
+#' @inheritParams write_codemeta
+#' @return a codemeta list object
+#' @export
+#' @examples
+#' cm <- create_codemeta("jsonlite")
+#' cm$keywords <- list("metadata", "ropensci")
+#' write_codemeta(cm = cm)
+create_codemeta <- function(pkg = ".",
+                            path = "codemeta.json",
+                            version = "2",
+                            ...){
+
+  cm <- new_codemeta()
+  descr <- read_dcf(pkg)
+  cm <- import_pkg_description(descr = descr, cm = cm, version = version)
+
+  ## legacy support only
+  if(version == "1") return(cm)
+
+  readme <- get_file("README.md", pkg)
+
+  cm$contIntegration <- guess_ci(readme)
+  cm$developmentStatus <- guess_devStatus(readme)
+  cm$provider <- guess_provider(cm$name)
+
+
+  ## Add blank slots as placeholders? and declare as an S3 class?
+
+  cm
+}
+
+
+
+
+## generate codemeta.json from a DESCRIPTION file
+## FIXME parse and use crosswalk to reference DESCRIPTION terms?
+import_pkg_description <-
+  function(descr,
+           id = NULL,
+           cm = new_codemeta(),
+           version = "2") {
+    version <- as.character(version)
+
+
+
+    switch(version,
+           "2" = codemeta_description(descr, id, cm),
+           "1" = create_codemeta_v1(descr, id))
+
+  }
+
diff --git a/R/guess_metadata.R b/R/guess_metadata.R
@@ -1,58 +1,59 @@
 ## Methods that guess additional metadata fields based on README badges and realted information
 
 
-guess_published <- function(codemeta){
 
-  ## Currently only considers if published to CRAN
-  cran_published(codemeta)
-}
+## cache avail packages
+CRAN <- utils::available.packages(utils::contrib.url("https://cran.rstudio.com", "source"))
+BIOC <- utils::available.packages(utils::contrib.url("https://www.bioconductor.org/packages/release/bioc", "source"))
+
+
+guess_provider <- function(pkg){
+
+  ## Assumes a single provider
+
+  if(pkg %in% CRAN[,"Package"]){
+    list("@id" = "https://cran.r-project.org",
+         "@type" = "Organization",
+         "name" = "Central R Archive Network (CRAN)",
+         "url" = "https://cran.r-project.org")
 
-cran_published <- function(codemeta){
-  if(codemeta$name %in% avail[,"Package"]){
-    codemeta$publisher <-
-      list("@type" = "Organization",
-           "name" = "CRAN",
-           "url" = "https://cran.r-project.org")
-  }
-  codemeta
 
-}
 
-## Do not run if we're not in the working directory of the package!
-at_pkg_root <- function(cm, path){
-  if(file.exists(paste0(path, "/DESCRIPTION"))){
-    descr <- read_dcf(path)
-    cm$name == descr$Package
+  } else if(pkg %in% BIOC[,"Package"]){
+    list("@id" = "https://www.bioconductor.org/",
+         "@type" = "Organization",
+         "name" = "BioConductor",
+         "url" = "https://www.bioconductor.org/packages/")
+
   } else {
-    FALSE
+    NULL
   }
 
 }
 
 
+
 ## look for .travis.yml ? GREP .travis badge so we can guess repo name.
-guess_ci <- function(codemeta, pkg = "."){
+guess_ci <- function(readme){
   link <- NULL
-  if(at_pkg_root(codemeta, ".") && file.exists("README.md")){
-    txt <- readLines("README.md")
+  if(file.exists(readme)){
+    txt <- readLines(readme)
     badge <- txt[grepl("travis-ci", txt)]
     link <- gsub(".*(https://travis-ci.org/\\w+/\\w+).*", "\\1", badge)
+    if(length(link)>1) link <- link[[1]]
   }
-  codemeta$contIntegration <- link[[1]]
-
-  codemeta
+  link
 }
 
-guess_devStatus <- function(codemeta, pkg = "."){
-
-  link <- NULL
-  if(at_pkg_root(codemeta, ".") && file.exists("README.md")){
-    txt <- readLines("README.md")
+guess_devStatus <- function(readme){
+  status <- NULL
+  if(file.exists(readme)){
+    txt <- readLines(readme)
     badge <- txt[grepl("Project Status", txt)]
     status <- gsub(".*\\[!\\[(Project Status: .*)\\.\\].*", "\\1", badge)
   }
-  codemeta$developmentStatus <- status
-  codemeta
+  status
+
 }
 
 

diff --git a/R/parse_depends.R b/R/parse_depends.R
@@ -1,41 +1,35 @@
 ## internal method for parsing a list of package dependencies into pkg URLs
 
 
-## cache avail packages
-avail <- utils::available.packages(utils::contrib.url("https://cran.rstudio.com", "source"))
 
-
-## FIXME: makes a list of package URLs.  Technically we could declare a different type for these, e.g. SoftwareApplication or SoftwareSourceCode
-## Revisit with SoftwareSourceCode and just implement fields suggested in schema v1
-#' @importFrom utils available.packages contrib.url
 parse_depends <- function(deps){
   if(!is.null(deps))
     str <- strsplit(deps, ",\n*")[[1]]
   else
     str <- NULL
 
-  out <- lapply(str, function(str){
+  lapply(str, function(str){
+
+    if(length(str) > 1){
+      warning(paste0("package depends", str, "may be multiple packages?"))
+      }
 
-    ## this is vectorized, though we apply it pkg by pkg anyhow
+    pkg <- gsub("\\s*(\\w+)\\s.*", "\\1", str)
+    pkg <- gsub("\\s+", "", pkg)
 
-    pkgs <- gsub("\\s*(\\w+)\\s.*", "\\1", str)
+    out <- list("@type" = "SoftwareApplication",
+                name = pkg)
+
+    ## Add Version if available
     pattern <- "\\s*\\w+\\s+\\([><=]+\\s([1-9.\\-]*)\\)*"
-    versions <- gsub(pattern, "\\1", str)
-    ## We want a NULL, not the full string, if no match is found
-    nomatch  <- !grepl(pattern, str)
-    versions[nomatch] <- NA
-
-    pkgs <- gsub("\\s+", "", pkgs)
-
-    ## Check if pkg is on CRAN
-    if(all(pkgs %in% avail[,"Package"])){
-      pkgs <- paste0("https://cran.r-project.org/package=", pkgs)
-    } else {
-      ## Consider suppressing message (and fixing url) for R & base packages?
-   #   message(paste("could not find URL for package", pkgs, "since it is not available on CRAN."))
-    }
-    pkgs
-  })
+    version <- gsub(pattern, "\\1", str)
+    has_version  <- grepl(pattern, str)
+    if(has_version)
+      out$version <- version
 
-  out
+    out$provider <- guess_provider(pkg)
+
+    out
+  })
 }
+
diff --git a/R/parse_people.R b/R/parse_people.R
@@ -46,15 +46,22 @@ person_to_schema <- function(p){
   else
     type <- "Person"
 
-  switch(type,
-         "Person" =   list("@id" = id,
-                           "@type" = type,
+  out <- switch(type,
+         "Person" =   list("@type" = type,
                            givenName = p$given,
-                           familyName = p$family,
-                           email = p$email),
-         "Organization" = list("@id" = id,
-                               "@type" = type,
-                               name = c(p$given,p$family),
-                               email = p$email)
+                           familyName = p$family),
+         "Organization" = list("@type" = type,
+                               name = c(p$given,p$family))
   )
+
+  ## we don't want `{}` if none is found
+  if(!is.null(p$email)){
+    out$email <- p$email
+  }
+  if(!is.null(id)){
+    out$`@id` <- id
+  }
+
+  out
+
 }
diff --git a/R/write_codemeta.R b/R/write_codemeta.R
@@ -28,51 +28,3 @@ write_codemeta <- function(pkg = ".",
 
 }
 
-
-#' create_codemeta
-#'
-#' create a codemeta list object in R for further manipulation
-#' @inheritParams write_codemeta
-#' @return a codemeta list object
-#' @export
-#' @examples
-#' cm <- create_codemeta("jsonlite")
-#' cm$keywords <- list("metadata", "ropensci")
-#' write_codemeta(cm = cm)
-create_codemeta <- function(pkg = ".",
-                            path = "codemeta.json",
-                            version = "2",
-                            ...){
-
-  cm <- new_codemeta()
-  descr <- read_dcf(pkg)
-  cm <- import_pkg_description(descr = descr, cm = cm, version = version)
-
-  if(version != "1"){
-    #cm <- guess_ci(cm, pkg)
-    cm <- guess_published(cm)
-  }
-
-  ## Add blank slots as placeholders? and declare as an S3 class?
-
-  cm
-}
-
-
-## generate codemeta.json from a DESCRIPTION file
-## FIXME parse and use crosswalk to reference DESCRIPTION terms?
-import_pkg_description <-
-  function(descr,
-           id = NULL,
-           cm = new_codemeta(),
-           version = "2") {
-    version <- as.character(version)
-
-
-
-    switch(version,
-           "2" = codemeta_description(descr, id, cm),
-           "1" = create_codemeta_v1(descr, id))
-
-  }
-