refactor code

hsonne · May 10, 2017 · 634fbe0 · 634fbe0
1 parent 8750281
commit 634fbe0
Show file tree

Hide file tree

Showing 16 changed files with 390 additions and 105 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(create_codemeta)
 export(write_codemeta)
 importFrom(jsonlite,write_json)
 importFrom(utils,available.packages)

diff --git a/R/add_metadata.R b/R/add_metadata.R
@@ -3,29 +3,9 @@
 
 
 ## simple, top-level items
-add_metadata <- function(codemeta, keywords, developmentStatus, dateModified, datePublished, embargoDate, publisher, funding){}
+add_metadata <- function(codemeta, keywords, developmentStatus, dateModified, datePublished, embargoDate, funding){}
 
 
-
-
-## look for .travis.yml ? GREP .travis badge so we can guess repo name.
-guess_ci <- function(codemeta){
-  link <- NULL
-  if(file.exists("README.md")){
-    txt <- readLines("README.md")
-    badge <- txt[grepl("travis-ci", txt)]
-    link <- gsub(".*(https://travis-ci.org/\\w+/\\w+).*", "\\1", badge)
-  }
-  codemeta$contIntegration <- link
-
-  codemeta
-}
-
-## use rorcid / ORCID API to infer ORCID ID from name?
-guess_orcids <- function(codemeta){
-  NULL
-}
-
 codemeta_template <- function(){
   ## Load a template from inst/
 

diff --git a/R/codemeta_description.R b/R/codemeta_description.R
@@ -36,89 +36,30 @@ codemeta_description <-  function(descr, id = NULL, codemeta = new_codemeta()){
   ## license is a URL in schema.org, assume SPDX ID (though not all recognized CRAN abbreviations are valid SPDX strings)
   codemeta$license <- paste0("https://spdx.org/licenses/", gsub("^(\\w+).*", "\\1", as.character(descr$License)))
   codemeta$version <- descr$Version
-  codemeta$programmingLanguage <- list(name = R.version$language,
-                                       version = paste(R.version$major, R.version$minor, sep = "."), # According to Crosswalk, we just want numvers and not R.version.string
-                                       URL = "https://r-project.org")
+  codemeta$programmingLanguage <-
+    list("@type" = "ComputerLanguage",
+         name = R.version$language,
+         version = paste(R.version$major, R.version$minor, sep = "."), # According to Crosswalk, we just want numvers and not R.version.string
+         url = "https://r-project.org")
   ## According to schema.org, programmingLanguage doesn't have a version; but runtimePlatform, a plain string, does.  Of course this is less computable/structured:
   codemeta$runtimePlatform <- R.version.string
 
   ## FIXME Need to deal with: descr$Author, descr$Maintainer, and descr$Authors@R
-  if("Authors@R" %in% names(descr))
-    codemeta <- parse_authors_at_R(codemeta, descr)
+  if("Authors@R" %in% names(descr)){
+    codemeta <- parse_people(eval(parse(text=descr$`Authors@R`)), codemeta)
+  } else {
+    codemeta <- parse_people(as.person(descr$Maintainer), codemeta)
+    codemeta <- parse_people(as.person(descr$Author), codemeta)
+  }
 
-  #codemeta$author <- descr$Author
-#  codemeta$maintainer <- descr$Maintainer
   codemeta$suggests <- parse_depends(descr$Suggests)
   codemeta$depends <- c(parse_depends(descr$Imports), parse_depends(descr$Depends))
 
   codemeta
 
 }
 
-parse_authors_at_R <- function(codemeta, descr){
-  person_string <- descr$`Authors@R`
-  people <- eval(parse(text = person_string))
-
-  ## listing same person under multiple fields is inelegant?
-  codemeta$author <- lapply(people[ locate_role(people, "aut") ], person_to_schema)
-  codemeta$contributor <- lapply(people[ locate_role(people, "ctb") ], person_to_schema)
-  codemeta$copyrightHolder <- lapply(people[ locate_role(people, "cph") ], person_to_schema)
-  codemeta$maintainer <- person_to_schema(people[ locate_role(people, "cre") ])
-  codemeta
-}
-
-locate_role <- function(people, role = "aut"){
-  vapply(people, function(p) any(grepl(role, p$role)), logical(1))
-}
-
-person_to_schema <- function(p){
 
-  ## Store ORCID id in comment?
-  id <- NULL
-  if(!is.null(p$comment)){
-    if(grepl("orcid", p$comment)){
-      id <- p$comment
-    }
-  }
-  list("@id" = id,
-       givenName = p$given,
-       familyName = p$family,
-       email = p$email)
-}
-
-
-#' @importFrom utils available.packages contrib.url
-parse_depends <- function(deps){
-  if(!is.null(deps))
-    str <- strsplit(deps, ",\n*")[[1]]
-  else
-    str <- NULL
-
-  out <- lapply(str, function(str){
-
-    ## this is vectorized, though we apply it pkg by pkg anyhow
-
-    pkgs <- gsub("\\s*(\\w+)\\s.*", "\\1", str)
-    pattern <- "\\s*\\w+\\s+\\([><=]+\\s([1-9.\\-]*)\\)*"
-    versions <- gsub(pattern, "\\1", str)
-    ## We want a NULL, not the full string, if no match is found
-    nomatch  <- !grepl(pattern, str)
-    versions[nomatch] <- NA
-
-    pkgs <- gsub("\\s+", "", pkgs)
-
-    ## Check if pkg is on CRAN
-    avail <- utils::available.packages(utils::contrib.url("https://cran.rstudio.com", "source"))
-    if(all(pkgs %in% avail[,"Package"]))
-      pkgs <- paste0("https://cran.r-project.org/package=", pkgs)
-    else{
-      message(paste("could not find URL for package", pkgs, "since it is not available on CRAN."))
-    }
-      pkgs
-  })
-
-  out
-}
 
 
 

diff --git a/R/guess_metadata.R b/R/guess_metadata.R
@@ -0,0 +1,54 @@
+## Methods that guess additional metadata fields based on README badges and realted information
+
+
+guess_published <- function(codemeta){
+
+  ## Currently only considers if published to CRAN
+  cran_published(codemeta)
+}
+
+cran_published <- function(codemeta){
+  if(codemeta$name %in% avail[,"Package"]){
+    codemeta$publisher <-
+      list("@type" = "Organization",
+           "name" = "CRAN",
+           "url" = "https://cran.r-project.org")
+  }
+  codemeta
+
+}
+
+## look for .travis.yml ? GREP .travis badge so we can guess repo name.
+guess_ci <- function(codemeta, pkg = "."){
+  link <- NULL
+  if(file.exists("README.md")){
+    txt <- readLines("README.md")
+    badge <- txt[grepl("travis-ci", txt)]
+    link <- gsub(".*(https://travis-ci.org/\\w+/\\w+).*", "\\1", badge)
+  }
+  codemeta$contIntegration <- link
+
+  codemeta
+}
+
+guess_devStatus <- function(codemeta, pkg = "."){
+
+  link <- NULL
+  if(file.exists("README.md")){
+    txt <- readLines("README.md")
+    badge <- txt[grepl("Project Status", txt)]
+    status <- gsub(".*\\[!\\[(Project Status: .*)\\.\\].*", "\\1", badge)
+  }
+  codemeta$developmentStatus <- status
+  codemeta
+}
+
+
+## use rorcid / ORCID API to infer ORCID ID from name?
+## (Can't use email since only 2% of ORCID users expose email)
+## Also can get Affiliation from ORCID search
+guess_orcids <- function(codemeta){
+  NULL
+}
+
+### Consider: guess_releastNotes()
diff --git a/R/parse_depends.R b/R/parse_depends.R
@@ -0,0 +1,40 @@
+## internal method for parsing a list of package dependencies into pkg URLs
+
+
+## cache avail packages
+avail <- utils::available.packages(utils::contrib.url("https://cran.rstudio.com", "source"))
+
+
+## FIXME: makes a list of package URLs.  Technically we could declare a different type for these, e.g. SoftwareApplication or SoftwareSourceCode
+
+#' @importFrom utils available.packages contrib.url
+parse_depends <- function(deps){
+  if(!is.null(deps))
+    str <- strsplit(deps, ",\n*")[[1]]
+  else
+    str <- NULL
+
+  out <- lapply(str, function(str){
+
+    ## this is vectorized, though we apply it pkg by pkg anyhow
+
+    pkgs <- gsub("\\s*(\\w+)\\s.*", "\\1", str)
+    pattern <- "\\s*\\w+\\s+\\([><=]+\\s([1-9.\\-]*)\\)*"
+    versions <- gsub(pattern, "\\1", str)
+    ## We want a NULL, not the full string, if no match is found
+    nomatch  <- !grepl(pattern, str)
+    versions[nomatch] <- NA
+
+    pkgs <- gsub("\\s+", "", pkgs)
+
+    ## Check if pkg is on CRAN
+    if(all(pkgs %in% avail[,"Package"]))
+      pkgs <- paste0("https://cran.r-project.org/package=", pkgs)
+    else{
+      message(paste("could not find URL for package", pkgs, "since it is not available on CRAN."))
+    }
+    pkgs
+  })
+
+  out
+}
diff --git a/R/parse_people.R b/R/parse_people.R
@@ -0,0 +1,39 @@
+## internal method, takes R person object and turns to codemeta / json-ld
+
+## FIXME if @id is available, avoid replicate listing of node?
+parse_people <- function(people, codemeta){
+
+  ## listing same person under multiple fields is inelegant?
+  codemeta$author <- lapply(people[ locate_role(people, "aut") ], person_to_schema)
+  codemeta$contributor <- lapply(people[ locate_role(people, "ctb") ], person_to_schema)
+  codemeta$copyrightHolder <- lapply(people[ locate_role(people, "cph") ], person_to_schema)
+  codemeta$maintainer <- person_to_schema(people[ locate_role(people, "cre") ])
+  codemeta
+}
+
+
+locate_role <- function(people, role = "aut"){
+  vapply(people, function(p) any(grepl(role, p$role)), logical(1))
+}
+
+person_to_schema <- function(p){
+
+  ## Store ORCID id in comment?
+  id <- NULL
+  if(!is.null(p$comment)){
+    if(grepl("orcid", p$comment)){
+      id <- p$comment
+    }
+  }
+
+  ## assume type is Organization if family name is null
+  if(is.null(p$family))
+    type <- "Organization"
+  else
+    type <- "Person"
+  list("@id" = id,
+       "@type" = type,
+       givenName = p$given,
+       familyName = p$family,
+       email = p$email)
+}
diff --git a/R/write_codemeta.R b/R/write_codemeta.R
@@ -1,6 +1,6 @@
 #' write_codemeta
 #' @param pkg package description file, can be path or package name.
-#' @param id an id for the package, such as the Zenodo DOI. a UUID will be generated if none is provided
+#' @param cm a codemeta list object, e.g. from \code{\link{create_codemeta}}
 #' @param path file name of the output, leave at default "codemeta.json"
 #' @param pretty formatting option to \code{\link{write_json}}
 #' @param auto_unbox formatting  option to \code{\link{write_json}}
@@ -14,36 +14,64 @@
 #' @examples
 #' write_codemeta("codemetar")
 write_codemeta <- function(pkg = ".",
-                           id = NULL,
+                           cm = NULL,
                            path = "codemeta.json",
                            pretty = TRUE,
                            auto_unbox = TRUE,
                            version = "2",
                            ...) {
 
-  descr <- read_dcf(pkg)
-  cm <- import_pkg_description(descr = descr, id = id, version = version)
-
+  if(is.null(cm)){
+    cm <- create_codemeta(pkg = pkg, path = path, version = version)
+  }
   jsonlite::write_json(cm, path, pretty = pretty, auto_unbox = auto_unbox, ...)
 
 }
 
 
+#' create_codemeta
+#'
+#' create a codemeta list object in R for further manipulation
+#' @inheritParams write_codemeta
+#' @return a codemeta list object
+#' @export
+#' @examples
+#' cm <- create_codemeta()
+#' cm$keywords <- list("metadata", "ropensci")
+#' write_codemeta(cm = cm)
+create_codemeta <- function(pkg = ".",
+                            path = "codemeta.json",
+                            version = "2",
+                            ...){
+
+  cm <- new_codemeta()
+  descr <- read_dcf(pkg)
+  cm <- import_pkg_description(descr = descr, cm = cm, version = version)
+
+  if(version != "1"){
+    cm <- guess_ci(cm, pkg)
+    cm <- guess_published(cm)
+  }
+
+  ## Add blank slots as placeholders? and declare as an S3 class?
+
+  cm
+}
 
 
 ## generate codemeta.json from a DESCRIPTION file
 ## FIXME parse and use crosswalk to reference DESCRIPTION terms?
 import_pkg_description <-
   function(descr,
            id = NULL,
-           codemeta = new_codemeta(),
+           cm = new_codemeta(),
            version = "2") {
     version <- as.character(version)
 
 
 
     switch(version,
-           "2" = codemeta_description(descr, id = id, codemeta = codemeta),
+           "2" = codemeta_description(descr, id, cm),
            "1" = create_codemeta_v1(descr, id))
 
   }