Skip to content

Commit

Permalink
Various improvements:
Browse files Browse the repository at this point in the history
include dependencies as SoftwareApplication types with name, version, provider

publisher-> provider for CRAN/BIOC

tidier json output (fewer empty nodes)
tidier, more sensible subroutines.

cleaner logic for handling pkg="." vs pkg="packagename"
  • Loading branch information
cboettig committed May 11, 2017
1 parent 6beb4f4 commit 65b4607
Show file tree
Hide file tree
Showing 10 changed files with 317 additions and 168 deletions.
7 changes: 2 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@ Encoding: UTF-8
LazyData: true
RoxygenNote: 6.0.1
Imports: jsonlite (>= 1.3)
Suggests: testthat,
jsonvalidate,
jsonld,
Suggests: testthat, jsonvalidate, jsonld,
covr,
knitr,
rmarkdown
knitr, rmarkdown
VignetteBuilder: knitr
22 changes: 12 additions & 10 deletions R/codemeta_description.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,29 +75,31 @@ codemeta_description <- function(descr, id = NULL, codemeta = new_codemeta()){
read_dcf <- function(pkg) {

## Takes path to DESCRIPTION, to package root, or the package name as an argument
path <- paste(pkg, "DESCRIPTION", sep="/")
if(basename(pkg) == "DESCRIPTION")
if(basename(pkg) == "DESCRIPTION"){ #so read_dcf can take a dcf file as argument, instead of just a path.
dcf <- pkg
else if(file.exists(path)){
dcf <- path
} else {
dcf <- system.file("DESCRIPTION", package = pkg)
dcf <- get_file("DESCRIPTION", pkg)
}

fields <- colnames(read.dcf(dcf))
as.list(read.dcf(dcf, keep.white = fields)[1, ])
}

## Alternate approach:
## utils::packageDescription assumes package is installed, takes pkg name not path.
## Advantages: Handles encoding, a little handling of Authors@R (actually done by install.packages step)
##descr <- utils::packageDescription(pkg)

## Like system.file, but pkg can instead be path to package root directory
get_file <- function(FILE, pkg = "."){
f <- file.path(pkg, FILE)
if(file.exists(f))
f
else {
f <- system.file(FILE, package = pkg)
}
}






is_IRI <- function(string){
## FIXME IRI can be many other things too, see https://github.com/dgerber/rfc3987 for more formal implementation
grepl("^http[s]?://", string)
Expand Down
55 changes: 55 additions & 0 deletions R/create_codemeta.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@

#' create_codemeta
#'
#' create a codemeta list object in R for further manipulation
#' @inheritParams write_codemeta
#' @return a codemeta list object
#' @export
#' @examples
#' cm <- create_codemeta("jsonlite")
#' cm$keywords <- list("metadata", "ropensci")
#' write_codemeta(cm = cm)
create_codemeta <- function(pkg = ".",
path = "codemeta.json",
version = "2",
...){

cm <- new_codemeta()
descr <- read_dcf(pkg)
cm <- import_pkg_description(descr = descr, cm = cm, version = version)

## legacy support only
if(version == "1") return(cm)

readme <- get_file("README.md", pkg)

cm$contIntegration <- guess_ci(readme)
cm$developmentStatus <- guess_devStatus(readme)
cm$provider <- guess_provider(cm$name)


## Add blank slots as placeholders? and declare as an S3 class?

cm
}




## generate codemeta.json from a DESCRIPTION file
## FIXME parse and use crosswalk to reference DESCRIPTION terms?
import_pkg_description <-
function(descr,
id = NULL,
cm = new_codemeta(),
version = "2") {
version <- as.character(version)



switch(version,
"2" = codemeta_description(descr, id, cm),
"1" = create_codemeta_v1(descr, id))

}

65 changes: 33 additions & 32 deletions R/guess_metadata.R
Original file line number Diff line number Diff line change
@@ -1,58 +1,59 @@
## Methods that guess additional metadata fields based on README badges and realted information


guess_published <- function(codemeta){

## Currently only considers if published to CRAN
cran_published(codemeta)
}
## cache avail packages
CRAN <- utils::available.packages(utils::contrib.url("https://cran.rstudio.com", "source"))
BIOC <- utils::available.packages(utils::contrib.url("https://www.bioconductor.org/packages/release/bioc", "source"))


guess_provider <- function(pkg){

## Assumes a single provider

if(pkg %in% CRAN[,"Package"]){
list("@id" = "https://cran.r-project.org",
"@type" = "Organization",
"name" = "Central R Archive Network (CRAN)",
"url" = "https://cran.r-project.org")

cran_published <- function(codemeta){
if(codemeta$name %in% avail[,"Package"]){
codemeta$publisher <-
list("@type" = "Organization",
"name" = "CRAN",
"url" = "https://cran.r-project.org")
}
codemeta

}

## Do not run if we're not in the working directory of the package!
at_pkg_root <- function(cm, path){
if(file.exists(paste0(path, "/DESCRIPTION"))){
descr <- read_dcf(path)
cm$name == descr$Package
} else if(pkg %in% BIOC[,"Package"]){
list("@id" = "https://www.bioconductor.org/",
"@type" = "Organization",
"name" = "BioConductor",
"url" = "https://www.bioconductor.org/packages/")

} else {
FALSE
NULL
}

}



## look for .travis.yml ? GREP .travis badge so we can guess repo name.
guess_ci <- function(codemeta, pkg = "."){
guess_ci <- function(readme){
link <- NULL
if(at_pkg_root(codemeta, ".") && file.exists("README.md")){
txt <- readLines("README.md")
if(file.exists(readme)){
txt <- readLines(readme)
badge <- txt[grepl("travis-ci", txt)]
link <- gsub(".*(https://travis-ci.org/\\w+/\\w+).*", "\\1", badge)
if(length(link)>1) link <- link[[1]]
}
codemeta$contIntegration <- link[[1]]

codemeta
link
}

guess_devStatus <- function(codemeta, pkg = "."){

link <- NULL
if(at_pkg_root(codemeta, ".") && file.exists("README.md")){
txt <- readLines("README.md")
guess_devStatus <- function(readme){
status <- NULL
if(file.exists(readme)){
txt <- readLines(readme)
badge <- txt[grepl("Project Status", txt)]
status <- gsub(".*\\[!\\[(Project Status: .*)\\.\\].*", "\\1", badge)
}
codemeta$developmentStatus <- status
codemeta
status

}


Expand Down
46 changes: 20 additions & 26 deletions R/parse_depends.R
Original file line number Diff line number Diff line change
@@ -1,41 +1,35 @@
## internal method for parsing a list of package dependencies into pkg URLs


## cache avail packages
avail <- utils::available.packages(utils::contrib.url("https://cran.rstudio.com", "source"))


## FIXME: makes a list of package URLs. Technically we could declare a different type for these, e.g. SoftwareApplication or SoftwareSourceCode
## Revisit with SoftwareSourceCode and just implement fields suggested in schema v1
#' @importFrom utils available.packages contrib.url
parse_depends <- function(deps){
if(!is.null(deps))
str <- strsplit(deps, ",\n*")[[1]]
else
str <- NULL

out <- lapply(str, function(str){
lapply(str, function(str){

if(length(str) > 1){
warning(paste0("package depends", str, "may be multiple packages?"))
}

## this is vectorized, though we apply it pkg by pkg anyhow
pkg <- gsub("\\s*(\\w+)\\s.*", "\\1", str)
pkg <- gsub("\\s+", "", pkg)

pkgs <- gsub("\\s*(\\w+)\\s.*", "\\1", str)
out <- list("@type" = "SoftwareApplication",
name = pkg)

## Add Version if available
pattern <- "\\s*\\w+\\s+\\([><=]+\\s([1-9.\\-]*)\\)*"
versions <- gsub(pattern, "\\1", str)
## We want a NULL, not the full string, if no match is found
nomatch <- !grepl(pattern, str)
versions[nomatch] <- NA

pkgs <- gsub("\\s+", "", pkgs)

## Check if pkg is on CRAN
if(all(pkgs %in% avail[,"Package"])){
pkgs <- paste0("https://cran.r-project.org/package=", pkgs)
} else {
## Consider suppressing message (and fixing url) for R & base packages?
# message(paste("could not find URL for package", pkgs, "since it is not available on CRAN."))
}
pkgs
})
version <- gsub(pattern, "\\1", str)
has_version <- grepl(pattern, str)
if(has_version)
out$version <- version

out
out$provider <- guess_provider(pkg)

out
})
}

25 changes: 16 additions & 9 deletions R/parse_people.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,22 @@ person_to_schema <- function(p){
else
type <- "Person"

switch(type,
"Person" = list("@id" = id,
"@type" = type,
out <- switch(type,
"Person" = list("@type" = type,
givenName = p$given,
familyName = p$family,
email = p$email),
"Organization" = list("@id" = id,
"@type" = type,
name = c(p$given,p$family),
email = p$email)
familyName = p$family),
"Organization" = list("@type" = type,
name = c(p$given,p$family))
)

## we don't want `{}` if none is found
if(!is.null(p$email)){
out$email <- p$email
}
if(!is.null(id)){
out$`@id` <- id
}

out

}
48 changes: 0 additions & 48 deletions R/write_codemeta.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,51 +28,3 @@ write_codemeta <- function(pkg = ".",

}


#' create_codemeta
#'
#' create a codemeta list object in R for further manipulation
#' @inheritParams write_codemeta
#' @return a codemeta list object
#' @export
#' @examples
#' cm <- create_codemeta("jsonlite")
#' cm$keywords <- list("metadata", "ropensci")
#' write_codemeta(cm = cm)
create_codemeta <- function(pkg = ".",
path = "codemeta.json",
version = "2",
...){

cm <- new_codemeta()
descr <- read_dcf(pkg)
cm <- import_pkg_description(descr = descr, cm = cm, version = version)

if(version != "1"){
#cm <- guess_ci(cm, pkg)
cm <- guess_published(cm)
}

## Add blank slots as placeholders? and declare as an S3 class?

cm
}


## generate codemeta.json from a DESCRIPTION file
## FIXME parse and use crosswalk to reference DESCRIPTION terms?
import_pkg_description <-
function(descr,
id = NULL,
cm = new_codemeta(),
version = "2") {
version <- as.character(version)



switch(version,
"2" = codemeta_description(descr, id, cm),
"1" = create_codemeta_v1(descr, id))

}

Loading

0 comments on commit 65b4607

Please sign in to comment.