Skip to content

Commit

Permalink
refactor code
Browse files Browse the repository at this point in the history
  • Loading branch information
cboettig committed May 10, 2017
1 parent 8750281 commit 634fbe0
Show file tree
Hide file tree
Showing 16 changed files with 390 additions and 105 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(create_codemeta)
export(write_codemeta)
importFrom(jsonlite,write_json)
importFrom(utils,available.packages)
Expand Down
22 changes: 1 addition & 21 deletions R/add_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,9 @@


## simple, top-level items
add_metadata <- function(codemeta, keywords, developmentStatus, dateModified, datePublished, embargoDate, publisher, funding){}
add_metadata <- function(codemeta, keywords, developmentStatus, dateModified, datePublished, embargoDate, funding){}




## look for .travis.yml ? GREP .travis badge so we can guess repo name.
guess_ci <- function(codemeta){
link <- NULL
if(file.exists("README.md")){
txt <- readLines("README.md")
badge <- txt[grepl("travis-ci", txt)]
link <- gsub(".*(https://travis-ci.org/\\w+/\\w+).*", "\\1", badge)
}
codemeta$contIntegration <- link

codemeta
}

## use rorcid / ORCID API to infer ORCID ID from name?
guess_orcids <- function(codemeta){
NULL
}

codemeta_template <- function(){
## Load a template from inst/

Expand Down
81 changes: 11 additions & 70 deletions R/codemeta_description.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,89 +36,30 @@ codemeta_description <- function(descr, id = NULL, codemeta = new_codemeta()){
## license is a URL in schema.org, assume SPDX ID (though not all recognized CRAN abbreviations are valid SPDX strings)
codemeta$license <- paste0("https://spdx.org/licenses/", gsub("^(\\w+).*", "\\1", as.character(descr$License)))
codemeta$version <- descr$Version
codemeta$programmingLanguage <- list(name = R.version$language,
version = paste(R.version$major, R.version$minor, sep = "."), # According to Crosswalk, we just want numvers and not R.version.string
URL = "https://r-project.org")
codemeta$programmingLanguage <-
list("@type" = "ComputerLanguage",
name = R.version$language,
version = paste(R.version$major, R.version$minor, sep = "."), # According to Crosswalk, we just want numvers and not R.version.string
url = "https://r-project.org")
## According to schema.org, programmingLanguage doesn't have a version; but runtimePlatform, a plain string, does. Of course this is less computable/structured:
codemeta$runtimePlatform <- R.version.string

## FIXME Need to deal with: descr$Author, descr$Maintainer, and descr$Authors@R
if("Authors@R" %in% names(descr))
codemeta <- parse_authors_at_R(codemeta, descr)
if("Authors@R" %in% names(descr)){
codemeta <- parse_people(eval(parse(text=descr$`Authors@R`)), codemeta)
} else {
codemeta <- parse_people(as.person(descr$Maintainer), codemeta)
codemeta <- parse_people(as.person(descr$Author), codemeta)
}

#codemeta$author <- descr$Author
# codemeta$maintainer <- descr$Maintainer
codemeta$suggests <- parse_depends(descr$Suggests)
codemeta$depends <- c(parse_depends(descr$Imports), parse_depends(descr$Depends))

codemeta

}

parse_authors_at_R <- function(codemeta, descr){
person_string <- descr$`Authors@R`
people <- eval(parse(text = person_string))

## listing same person under multiple fields is inelegant?
codemeta$author <- lapply(people[ locate_role(people, "aut") ], person_to_schema)
codemeta$contributor <- lapply(people[ locate_role(people, "ctb") ], person_to_schema)
codemeta$copyrightHolder <- lapply(people[ locate_role(people, "cph") ], person_to_schema)
codemeta$maintainer <- person_to_schema(people[ locate_role(people, "cre") ])
codemeta
}

locate_role <- function(people, role = "aut"){
vapply(people, function(p) any(grepl(role, p$role)), logical(1))
}

person_to_schema <- function(p){

## Store ORCID id in comment?
id <- NULL
if(!is.null(p$comment)){
if(grepl("orcid", p$comment)){
id <- p$comment
}
}
list("@id" = id,
givenName = p$given,
familyName = p$family,
email = p$email)
}


#' @importFrom utils available.packages contrib.url
parse_depends <- function(deps){
if(!is.null(deps))
str <- strsplit(deps, ",\n*")[[1]]
else
str <- NULL

out <- lapply(str, function(str){

## this is vectorized, though we apply it pkg by pkg anyhow

pkgs <- gsub("\\s*(\\w+)\\s.*", "\\1", str)
pattern <- "\\s*\\w+\\s+\\([><=]+\\s([1-9.\\-]*)\\)*"
versions <- gsub(pattern, "\\1", str)
## We want a NULL, not the full string, if no match is found
nomatch <- !grepl(pattern, str)
versions[nomatch] <- NA

pkgs <- gsub("\\s+", "", pkgs)

## Check if pkg is on CRAN
avail <- utils::available.packages(utils::contrib.url("https://cran.rstudio.com", "source"))
if(all(pkgs %in% avail[,"Package"]))
pkgs <- paste0("https://cran.r-project.org/package=", pkgs)
else{
message(paste("could not find URL for package", pkgs, "since it is not available on CRAN."))
}
pkgs
})

out
}



Expand Down
54 changes: 54 additions & 0 deletions R/guess_metadata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
## Methods that guess additional metadata fields based on README badges and realted information


guess_published <- function(codemeta){

## Currently only considers if published to CRAN
cran_published(codemeta)
}

cran_published <- function(codemeta){
if(codemeta$name %in% avail[,"Package"]){
codemeta$publisher <-
list("@type" = "Organization",
"name" = "CRAN",
"url" = "https://cran.r-project.org")
}
codemeta

}

## look for .travis.yml ? GREP .travis badge so we can guess repo name.
guess_ci <- function(codemeta, pkg = "."){
link <- NULL
if(file.exists("README.md")){
txt <- readLines("README.md")
badge <- txt[grepl("travis-ci", txt)]
link <- gsub(".*(https://travis-ci.org/\\w+/\\w+).*", "\\1", badge)
}
codemeta$contIntegration <- link

codemeta
}

guess_devStatus <- function(codemeta, pkg = "."){

link <- NULL
if(file.exists("README.md")){
txt <- readLines("README.md")
badge <- txt[grepl("Project Status", txt)]
status <- gsub(".*\\[!\\[(Project Status: .*)\\.\\].*", "\\1", badge)
}
codemeta$developmentStatus <- status
codemeta
}


## use rorcid / ORCID API to infer ORCID ID from name?
## (Can't use email since only 2% of ORCID users expose email)
## Also can get Affiliation from ORCID search
guess_orcids <- function(codemeta){
NULL
}

### Consider: guess_releastNotes()
40 changes: 40 additions & 0 deletions R/parse_depends.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
## internal method for parsing a list of package dependencies into pkg URLs


## cache avail packages
avail <- utils::available.packages(utils::contrib.url("https://cran.rstudio.com", "source"))


## FIXME: makes a list of package URLs. Technically we could declare a different type for these, e.g. SoftwareApplication or SoftwareSourceCode

#' @importFrom utils available.packages contrib.url
parse_depends <- function(deps){
if(!is.null(deps))
str <- strsplit(deps, ",\n*")[[1]]
else
str <- NULL

out <- lapply(str, function(str){

## this is vectorized, though we apply it pkg by pkg anyhow

pkgs <- gsub("\\s*(\\w+)\\s.*", "\\1", str)
pattern <- "\\s*\\w+\\s+\\([><=]+\\s([1-9.\\-]*)\\)*"
versions <- gsub(pattern, "\\1", str)
## We want a NULL, not the full string, if no match is found
nomatch <- !grepl(pattern, str)
versions[nomatch] <- NA

pkgs <- gsub("\\s+", "", pkgs)

## Check if pkg is on CRAN
if(all(pkgs %in% avail[,"Package"]))
pkgs <- paste0("https://cran.r-project.org/package=", pkgs)
else{
message(paste("could not find URL for package", pkgs, "since it is not available on CRAN."))
}
pkgs
})

out
}
39 changes: 39 additions & 0 deletions R/parse_people.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
## internal method, takes R person object and turns to codemeta / json-ld

## FIXME if @id is available, avoid replicate listing of node?
parse_people <- function(people, codemeta){

## listing same person under multiple fields is inelegant?
codemeta$author <- lapply(people[ locate_role(people, "aut") ], person_to_schema)
codemeta$contributor <- lapply(people[ locate_role(people, "ctb") ], person_to_schema)
codemeta$copyrightHolder <- lapply(people[ locate_role(people, "cph") ], person_to_schema)
codemeta$maintainer <- person_to_schema(people[ locate_role(people, "cre") ])
codemeta
}


locate_role <- function(people, role = "aut"){
vapply(people, function(p) any(grepl(role, p$role)), logical(1))
}

person_to_schema <- function(p){

## Store ORCID id in comment?
id <- NULL
if(!is.null(p$comment)){
if(grepl("orcid", p$comment)){
id <- p$comment
}
}

## assume type is Organization if family name is null
if(is.null(p$family))
type <- "Organization"
else
type <- "Person"
list("@id" = id,
"@type" = type,
givenName = p$given,
familyName = p$family,
email = p$email)
}
42 changes: 35 additions & 7 deletions R/write_codemeta.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' write_codemeta
#' @param pkg package description file, can be path or package name.
#' @param id an id for the package, such as the Zenodo DOI. a UUID will be generated if none is provided
#' @param cm a codemeta list object, e.g. from \code{\link{create_codemeta}}
#' @param path file name of the output, leave at default "codemeta.json"
#' @param pretty formatting option to \code{\link{write_json}}
#' @param auto_unbox formatting option to \code{\link{write_json}}
Expand All @@ -14,36 +14,64 @@
#' @examples
#' write_codemeta("codemetar")
write_codemeta <- function(pkg = ".",
id = NULL,
cm = NULL,
path = "codemeta.json",
pretty = TRUE,
auto_unbox = TRUE,
version = "2",
...) {

descr <- read_dcf(pkg)
cm <- import_pkg_description(descr = descr, id = id, version = version)

if(is.null(cm)){
cm <- create_codemeta(pkg = pkg, path = path, version = version)
}
jsonlite::write_json(cm, path, pretty = pretty, auto_unbox = auto_unbox, ...)

}


#' create_codemeta
#'
#' create a codemeta list object in R for further manipulation
#' @inheritParams write_codemeta
#' @return a codemeta list object
#' @export
#' @examples
#' cm <- create_codemeta()
#' cm$keywords <- list("metadata", "ropensci")
#' write_codemeta(cm = cm)
create_codemeta <- function(pkg = ".",
path = "codemeta.json",
version = "2",
...){

cm <- new_codemeta()
descr <- read_dcf(pkg)
cm <- import_pkg_description(descr = descr, cm = cm, version = version)

if(version != "1"){
cm <- guess_ci(cm, pkg)
cm <- guess_published(cm)
}

## Add blank slots as placeholders? and declare as an S3 class?

cm
}


## generate codemeta.json from a DESCRIPTION file
## FIXME parse and use crosswalk to reference DESCRIPTION terms?
import_pkg_description <-
function(descr,
id = NULL,
codemeta = new_codemeta(),
cm = new_codemeta(),
version = "2") {
version <- as.character(version)



switch(version,
"2" = codemeta_description(descr, id = id, codemeta = codemeta),
"2" = codemeta_description(descr, id, cm),
"1" = create_codemeta_v1(descr, id))

}
Expand Down
Loading

0 comments on commit 634fbe0

Please sign in to comment.