From 12734d8b9bc12b7ff5bd530200ec96e38be6a12e Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Wed, 22 Nov 2017 11:36:30 -0900 Subject: [PATCH 001/318] Rewrite resource map updating routines to fix various issues This commit addresses the cause of #51 The previous version of `parse_resource_map` used a SPARQL query to pull the triples out of the resource map being updated but didn't actually parse the result in an RDF-aware manner. This lead to " characters ending up in various places in the RDF/XML which causes tons of issues. The new routine simply uses datapack::parseRDF instead and just filters out cito:documents, cito:isDocumentedBy, dcterms:identifier, and the DataONE R Client statement. --- R/packaging.R | 89 +++++------------------------------ tests/testthat/test_editing.R | 40 +++++++++------- 2 files changed, 34 insertions(+), 95 deletions(-) diff --git a/R/packaging.R b/R/packaging.R index 7002d33..9dec754 100644 --- a/R/packaging.R +++ b/R/packaging.R @@ -448,6 +448,10 @@ generate_resource_map <- function(metadata_pid, message("Adding ", nrow(other_statements), " custom statement(s) to the Resource Map.") + # Add an NA dataTypeURI to all the statements so the subsequent rbind works + # This is fine because they're all URIs and they don't need a datType + relationships$dataTypeURI <- NA + relationships <- rbind(relationships, other_statements) } @@ -1037,65 +1041,21 @@ update_package <- function(inventory, parse_resource_map <- function(path) { stopifnot(file.exists(path)) - world <- new("World") - storage <- new("Storage", - world, - "hashes", - name = "", - options = "hash-type='memory'") - model <- new("Model", world, storage, options = "") - parser <- new("Parser", world) - - redland::parseFileIntoModel(parser, world, path, model) - - query <- new("Query", - world, - "select ?s ?p ?o where { ?s ?p ?o }", - base_uri = NULL, - query_language = "sparql", - query_uri = NULL) - - queryResult <- redland::executeQuery(query, model) - - statements <- data.frame() - - while(!is.null(result <- redland::getNextResult(queryResult))) { - statements <- rbind(statements, - data.frame(subject = result$s, - predicate = result$p, - object = result$o, - stringsAsFactors = FALSE)) - } - - # Remove < and > around URIs. We do this because redland needs them to be - # without those characters or it complains about being unable to convert into - # a qname - statements$subject <- stringr::str_replace_all(statements$subject, "^[<]", "") - statements$predicate <- stringr::str_replace_all(statements$predicate, "^[<]", "") - statements$object <- stringr::str_replace_all(statements$object, "^[<]", "") - statements$subject <- stringr::str_replace_all(statements$subject, "[>]$", "") - statements$predicate <- stringr::str_replace_all(statements$predicate, "[>]$", "") - statements$object <- stringr::str_replace_all(statements$object, "[>]$", "") - - statements + rm <- new("ResourceMap") + datapack::parseRDF(rm, path) + datapack:::getTriples(rm) } #' Filter statements related to packaging #' +#' This is intended to be called after `datapack::getTriples` has been called +#' on a ResourceMap. +#' #' This function was written specifically for the case of updating a resource #' map while preserving any extra statements that have been added such as PROV #' statements. Statements are filtered according to these rules: #' -#' 1. If the subject or object is the ore:ResourceMap resource -#' 2. If the subject or object is the ore:Aggregation resource -#' 3. If the predicate is cito:documents or cito:isDocumentedBy -#' 4. Once filters 1-3 have been executed, any remaining triples are considered -#' for removal if they look like dangling dc:identifier statements -#' -#' The consequence of filter 4 is that dc:identifier statements are left in if -#' they are still in use by another statement -#' #' @param statements (data.frame) A set of Statements to be filtered #' #' @return (data.frame) The filtered Statements @@ -1106,36 +1066,11 @@ filter_packaging_statements <- function(statements) { stopifnot(is.data.frame(statements)) if (nrow(statements) == 0) return(statements) - # Collect URIs we're going to use to filter by - resource_map_uri <- statements[grepl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", statements$predicate) & grepl("http://www.openarchives.org/ore/terms/ResourceMap", statements$object),"subject"] - aggregation_uri <- statements[grepl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", statements$predicate) & grepl("http://www.openarchives.org/ore/terms/Aggregation", statements$object),"subject"] - - # Filter statements by subject - statements <- statements[!(statements$subject %in% c(resource_map_uri, aggregation_uri)),] - - # Filter statements by object - statements <- statements[!(statements$object %in% c(resource_map_uri, aggregation_uri)),] - # Filter cito:documents / cito:isDocumentedBy statements statements <- statements[!(statements$predicate == "http://purl.org/spar/cito/documents"),] statements <- statements[!(statements$predicate == "http://purl.org/spar/cito/isDocumentedBy"),] - - # If this is a simple package without extra statements, then we should just be - # left with some dc:identifier statements left over. Here we try to detect - # that case by collecting the unique subjects taking part in dc:identifier - # statements and filtering statements about subjects with only one statement - # about them - - dc_identifiers <- unique(statements[statements$predicate == "http://purl.org/dc/terms/identifier", "subject"]) - - for (identifier in dc_identifiers) { - if (nrow(statements[statements$subject == identifier | statements$object == identifier,]) == 1) { - statements <- statements[!(statements$subject == identifier | statements$object == identifier),] - } - } - - # Remove introduced by the second filter statement - statements <- statements[complete.cases(statements),] + statements <- statements[!(statements$predicate == "http://purl.org/dc/terms/identifier"),] + statements <- statements[!((statements$predicate == "http://xmlns.com/foaf/0.1/name" & statements$object == "DataONE R Client")),] statements } diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index a088262..0d607c3 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -15,7 +15,8 @@ test_that("we can publish an update", { update <- publish_update(mn, package$metadata, package$resource_map, - package$data) + package$data, + check_first = FALSE) expect_named(update, c("metadata", "resource_map", "data")) expect_true(all(object_exists(mn, unlist(update)))) @@ -36,7 +37,8 @@ test_that("an identifier can be manually specified when publishing an update", { package$metadata, package$resource_map, package$data, - identifier = new_identifier) + identifier = new_identifier, + check_first = FALSE) expect_equal(update$metadata, new_identifier) }) @@ -53,7 +55,7 @@ test_that("we can create a resource map", { response <- create_resource_map(mn, metadata_pid, data_pid) expect_true(object_exists(mn, response)) - expect_equal(response, get_package(mn, metadata_pid)$resource_map) + expect_equal(response, get_package(mn, response)$resource_map) }) @@ -121,7 +123,7 @@ test_that("SIDs are maintained when publishing an update to an object with a SID sid = new_sid) resmap_pid <- create_resource_map(mn, metadata_pid = pid) - response <- publish_update(mn, pid, resmap_pid) + response <- publish_update(mn, pid, resmap_pid, check_first = FALSE) sysmeta <- getSystemMetadata(mn, response$metadata) expect_equal(sysmeta@seriesId, new_sid) @@ -143,7 +145,10 @@ test_that("we can publish an update to an object", { csv <- data.frame(x = 1:50) write.csv(csv, tmp) - upd <- update_object(mn, old, tmp) + suppressWarnings({ + upd <- update_object(mn, old, tmp) + }) + file.remove(tmp) sm <- dataone::getSystemMetadata(mn, upd) @@ -181,27 +186,26 @@ test_that("extra statements are maintained between updates", { # Add some PROV triples to the Resource Map rm <- tempfile() writeLines(rawToChar(dataone::getObject(mn, pkg$resource_map)), rm) - # statements <- data.frame(subject = paste0("https://cn.dataone.org/cn/v2/resolve/", URLencode(pkg$data[1], reserved = TRUE)), - # predicate = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", - # object = "http://www.w3.org/ns/prov#Entity") - # - # statements <- rbind(statements, - # data.frame(subject = paste0("https://cn.dataone.org/cn/v2/resolve/", URLencode(pkg$data[2], reserved = TRUE)), - # predicate = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", - # object = "http://www.w3.org/ns/prov#Entity")) statements <- data.frame(subject = paste0("https://cn.dataone.org/cn/v2/resolve/", URLencode(pkg$data[1], reserved = TRUE)), predicate = "http://www.w3.org/ns/prov#wasDerivedFrom", - object = paste0("https://cn.dataone.org/cn/v2/resolve/", URLencode(pkg$data[2], reserved = TRUE))) - - new_rm <- update_resource_map(mn, pkg$resource_map, pkg$metadata, pkg$data, other_statements = statements, public = TRUE) + object = paste0("https://cn.dataone.org/cn/v2/resolve/", URLencode(pkg$data[2], reserved = TRUE)), + subjectType = "uri", + objectType = "uri", + dataTypeURI = NA) + + new_rm <- update_resource_map(mn, + pkg$resource_map, + pkg$metadata, + pkg$data, + other_statements = statements, + public = TRUE) rm <- tempfile() writeLines(rawToChar(dataone::getObject(mn, new_rm)), rm) statements <- parse_resource_map(rm) expect_true("http://www.w3.org/ns/prov#wasDerivedFrom" %in% statements$predicate) - new_new_rm <- update_resource_map(mn, new_rm, pkg$metadata, pkg$data, public = TRUE) rm <- tempfile() writeLines(rawToChar(dataone::getObject(mn, new_new_rm)), rm) @@ -220,7 +224,7 @@ test_that("rightsholder is properly set back after publishing an update", { set_result <- set_rights_holder(mn, unlist(pkg), "CN=arctic-data-admins,DC=dataone,DC=org") expect_true(all(set_result)) - new_pkg <- publish_update(mn, pkg$metadata, pkg$resource_map, pkg$data) + new_pkg <- publish_update(mn, pkg$metadata, pkg$resource_map, pkg$data, check_first = FALSE) rhs <- lapply(unlist(pkg), function(pid) { dataone::getSystemMetadata(mn, pid)@rightsHolder }) From 2678bec0e05e1ca7814cbc79dc771435bcdc3290 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 1 Dec 2017 13:52:39 -0900 Subject: [PATCH 002/318] Remove routine for migrating prov(/other) statements across resmap versions --- R/editing.R | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/R/editing.R b/R/editing.R index 9199019..5a3394f 100644 --- a/R/editing.R +++ b/R/editing.R @@ -621,17 +621,6 @@ update_resource_map <- function(mn, me <- get_token_subject() set_rights_holder(mn, resource_map_pid, me) - # Get the old resource map so we can extract any statements we need out of it - # such as PROV statements - old_resource_map_path <- tempfile() - writeLines(rawToChar(dataone::getObject(mn, resource_map_pid)), old_resource_map_path) - statements <- parse_resource_map(old_resource_map_path) - statements <- filter_packaging_statements(statements) - if (is.data.frame(other_statements)) { - statements <- rbind(statements, - other_statements) - } - # Create the replacement resource map if (is.null(identifier)) { identifier <- paste0("resource_map_", new_uuid()) @@ -640,7 +629,6 @@ update_resource_map <- function(mn, new_rm_path <- generate_resource_map(metadata_pid = metadata_pid, data_pids = data_pids, child_pids = child_pids, - other_statements = statements, resource_map_pid = identifier) stopifnot(file.exists(new_rm_path)) From fb5d654715ff58651bed8b574b4b1826d9da12f3 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 1 Dec 2017 15:48:05 -0900 Subject: [PATCH 003/318] Remove now-failing tests for maintianing prov statements --- tests/testthat/test_editing.R | 38 --------------------------------- tests/testthat/test_packaging.R | 10 --------- 2 files changed, 48 deletions(-) diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index 0d607c3..c0b9e7f 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -176,44 +176,6 @@ test_that("we can publish an update to an object and specify our own format id", expect_equal(sm@formatId, "text/plain") }) -test_that("extra statements are maintained between updates", { - if (!is_token_set(mn)) { - skip("No token set. Skipping test.") - } - - pkg <- create_dummy_package(mn, 3) - - # Add some PROV triples to the Resource Map - rm <- tempfile() - writeLines(rawToChar(dataone::getObject(mn, pkg$resource_map)), rm) - - statements <- data.frame(subject = paste0("https://cn.dataone.org/cn/v2/resolve/", URLencode(pkg$data[1], reserved = TRUE)), - predicate = "http://www.w3.org/ns/prov#wasDerivedFrom", - object = paste0("https://cn.dataone.org/cn/v2/resolve/", URLencode(pkg$data[2], reserved = TRUE)), - subjectType = "uri", - objectType = "uri", - dataTypeURI = NA) - - new_rm <- update_resource_map(mn, - pkg$resource_map, - pkg$metadata, - pkg$data, - other_statements = statements, - public = TRUE) - - rm <- tempfile() - writeLines(rawToChar(dataone::getObject(mn, new_rm)), rm) - statements <- parse_resource_map(rm) - expect_true("http://www.w3.org/ns/prov#wasDerivedFrom" %in% statements$predicate) - - new_new_rm <- update_resource_map(mn, new_rm, pkg$metadata, pkg$data, public = TRUE) - rm <- tempfile() - writeLines(rawToChar(dataone::getObject(mn, new_new_rm)), rm) - statements <- parse_resource_map(rm) - expect_true("http://www.w3.org/ns/prov#wasDerivedFrom" %in% statements$predicate) -}) - - test_that("rightsholder is properly set back after publishing an update", { if (!is_token_set(mn)) { skip("No token set. Skipping test.") diff --git a/tests/testthat/test_packaging.R b/tests/testthat/test_packaging.R index 94a9ef3..6bb9891 100644 --- a/tests/testthat/test_packaging.R +++ b/tests/testthat/test_packaging.R @@ -13,13 +13,3 @@ test_that("child pids are correctly determined", { expect_equal(determine_child_pids(inventory, "B"), "resource_map_C") expect_equal(determine_child_pids(inventory, "C"), NULL) }) - - -test_that("extra triple can be added to a resource map", { - path <- generate_resource_map("metadata", "data", - other_statements = data.frame(subject="http://example.com/me", - predicate="http://example.com/is_related_to", - object="http://example.com/myself")) - statements <- parse_resource_map(path) - expect_true("http://example.com/me" %in% statements$subject) -}) From 87f91179f4820ecdb283672e2179984d4f6cd334 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 4 Dec 2017 14:02:28 -0900 Subject: [PATCH 004/318] v0.6.3 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 055f868..b51e749 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: arcticdatautils Title: Arctic Data Utilities -Version: 0.6.2 +Version: 0.6.3 Authors@R: c( person("Bryce", "Mecum", email = "mecum@nceas.ucsb.edu", role = c("aut", "cre")), person("Matt", "Jones", email = "jones@nceas.ucsb.edu", role = "ctb"), From 09d965e0336887b11afa15bbff9d2bf4c03b0f0b Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Thu, 21 Dec 2017 15:37:06 -0800 Subject: [PATCH 005/318] Added check for "https" in sysmeta update functions --- R/access.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/access.R b/R/access.R index e9fee0d..e7e1395 100644 --- a/R/access.R +++ b/R/access.R @@ -22,6 +22,8 @@ set_rights_holder <- function(mn, pids, subject) { all(nchar(pids) > 0)) stopifnot(is.character(subject), nchar(subject) > 0) + stopifnot(!grepl("https", subject)) + result <- vector(mode = "logical", length = length(pids)) @@ -83,8 +85,10 @@ set_access <- function(mn, pids, subjects, permissions=c("read", "write", "chang nchar(pids) > 0) stopifnot(is.character(subjects), nchar(subjects) > 0) + stopifnot(!grepl("https", subjects)) stopifnot(all(permissions %in% c("read", "write", "changePermission"))) + result <- c() for (pid in pids) { @@ -219,6 +223,7 @@ set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "writ all(is.character(pids)), all(nchar(pids) > 0), is.character(subject), + !grepl("https", subject), is.character(permissions)) # Store the results of each attempted update From 17f2e821c37e0431f249e2db3cf0cb31617b7141 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Thu, 21 Dec 2017 16:05:39 -0800 Subject: [PATCH 006/318] Changed logical expression in grepl --- R/access.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/access.R b/R/access.R index e7e1395..896bae6 100644 --- a/R/access.R +++ b/R/access.R @@ -22,7 +22,7 @@ set_rights_holder <- function(mn, pids, subject) { all(nchar(pids) > 0)) stopifnot(is.character(subject), nchar(subject) > 0) - stopifnot(!grepl("https", subject)) + stopifnot(grepl("http:", subject)) result <- vector(mode = "logical", length = length(pids)) @@ -85,7 +85,7 @@ set_access <- function(mn, pids, subjects, permissions=c("read", "write", "chang nchar(pids) > 0) stopifnot(is.character(subjects), nchar(subjects) > 0) - stopifnot(!grepl("https", subjects)) + stopifnot(grepl("http:", subjects)) stopifnot(all(permissions %in% c("read", "write", "changePermission"))) @@ -223,7 +223,7 @@ set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "writ all(is.character(pids)), all(nchar(pids) > 0), is.character(subject), - !grepl("https", subject), + grepl("http:", subject), is.character(permissions)) # Store the results of each attempted update From 6995f94523c3c40477e757ffdbe674b9feef8222 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Fri, 22 Dec 2017 17:19:23 -0800 Subject: [PATCH 007/318] Updated error catches to include messages --- R/access.R | 90 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 68 insertions(+), 22 deletions(-) diff --git a/R/access.R b/R/access.R index 896bae6..db5eb45 100644 --- a/R/access.R +++ b/R/access.R @@ -17,12 +17,23 @@ #' @import datapack #' @export set_rights_holder <- function(mn, pids, subject) { - stopifnot(is(mn, "MNode")) - stopifnot(is.character(pids), - all(nchar(pids) > 0)) - stopifnot(is.character(subject), - nchar(subject) > 0) - stopifnot(grepl("http:", subject)) + if (!is(mn, "MNode")) { + stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) + } + + if (!all(is.character(pids), + all(nchar(pids) > 0))){ + stop("Argument 'pids' must be character class with non-zero number of characters.") + } + + if (!all(is.character(subject), + nchar(subject) > 0)){ + stop("Argument 'pids' must be character class with non-zero number of characters.") + } + + if (grepl("^https:\\/\\/orcid\\.org", subject)) { + stop("Argument 'subjects' cannot contain 'https:', use 'http:' instead.") + } result <- vector(mode = "logical", length = length(pids)) @@ -80,13 +91,27 @@ set_rights_holder <- function(mn, pids, subject) { #' #' @examples set_access <- function(mn, pids, subjects, permissions=c("read", "write", "changePermission")) { - stopifnot(is(mn, "MNode")) - stopifnot(is.character(pids), - nchar(pids) > 0) - stopifnot(is.character(subjects), - nchar(subjects) > 0) - stopifnot(grepl("http:", subjects)) - stopifnot(all(permissions %in% c("read", "write", "changePermission"))) + if (!is(mn, "MNode")) { + stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) + } + + if (!all(is.character(pids), + all(nchar(pids) > 0))){ + stop("Argument 'pids' must be character class with non-zero number of characters.") + } + + if (!all(is.character(subjects), + all(nchar(subjects)) > 0)){ + stop("Argument 'pids' must be character class with non-zero number of characters.") + } + + if (any(grepl("^https:\\/\\/orcid\\.org", subjects))) { + stop("Argument 'subjects' cannot contain 'https:', use 'http:' instead.") + } + + if (!all(permissions %in% c("read", "write", "changePermission"))) { + stop("Argument 'permissions' must be one or more of: 'read', 'write', 'changePermission'") + } result <- c() @@ -145,9 +170,15 @@ set_public_read <- function(mn, pids) { #' #' @examples remove_public_read <- function(mn, pids) { - stopifnot(is(mn, "MNode"), - all(is.character(pids)), - all(nchar(pids) > 0)) + if (!is(mn, "MNode")) { + stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) + } + + if (!all(is.character(pids), + all(nchar(pids) > 0))){ + stop("Argument 'pids' must be character class with non-zero number of characters.") + } + # Store the results of each attempted update results <- c() @@ -219,12 +250,27 @@ remove_public_read <- function(mn, pids) { #' #' @examples set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "write", "changePermission")) { - stopifnot(is(mn, "MNode"), - all(is.character(pids)), - all(nchar(pids) > 0), - is.character(subject), - grepl("http:", subject), - is.character(permissions)) + if (!is(mn, "MNode")) { + stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) + } + + if (!all(is.character(pids), + all(nchar(pids) > 0))){ + stop("Argument 'pids' must be character class with non-zero number of characters.") + } + + if (!all(is.character(subject), + nchar(subject) > 0)){ + stop("Argument 'pids' must be character class with non-zero number of characters.") + } + + if (grepl("^https:\\/\\/orcid\\.org", subject)) { + stop("Argument 'subjects' cannot contain 'https:', use 'http:' instead.") + } + + if (!all(permissions %in% c("read", "write", "changePermission"))) { + stop("Argument 'permissions' must be one or more of: 'read', 'write', 'changePermission'") + } # Store the results of each attempted update results <- c() From 1cf82b255a8c4c4cb136fcb65e5fa97752450fb4 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Wed, 3 Jan 2018 09:55:43 -0800 Subject: [PATCH 008/318] Update eml@packageId element in update_object --- R/editing.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/editing.R b/R/editing.R index 5a3394f..288188d 100644 --- a/R/editing.R +++ b/R/editing.R @@ -170,6 +170,14 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) # Set the replication policy back to default sysmeta <- clear_replication_policy(sysmeta) + # Add packageId to metadata if the object is an xml file + if (grepl("^eml:\\/\\/ecoinformatics.org\\/eml", format_id)) { + eml <- EML::read_eml(path) + eml@packageId <- new("xml_attribute", new_pid) + path <- tempfile() + EML::write_eml(eml, path) + } + # Make the update dataone::updateObject(mn, pid = pid, From a819db818e0d40825e23e6bc3b766c91286a562d Mon Sep 17 00:00:00 2001 From: eodean Date: Thu, 4 Jan 2018 11:23:50 -0800 Subject: [PATCH 009/318] In set_abstract, replaced length() with nchar() since we are evaluating if a String is >0 characters. --- R/eml.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/eml.R b/R/eml.R index 457e9e6..d1defc7 100644 --- a/R/eml.R +++ b/R/eml.R @@ -660,11 +660,11 @@ eml_address <- function(delivery_points, city, administrative_area, postal_code) set_abstract <- function(doc, text) { stopifnot(is(doc, "eml")) stopifnot(is.character(text), - length(text) > 0) + nchar(text) > 0) - if (length(text) == 1) { + if (nchar(text) == 1) { doc@dataset@abstract <- new("abstract", .Data = new("TextType", .Data = "hi")) - } else if (length(text) > 1) { + } else if (nchar(text) > 1) { doc@dataset@abstract <- new("abstract", para = new("ListOfpara", lapply(text, function(x) new("para", x)))) } From 64c44901df41c941e8f727339f5a4bd344cfd1d0 Mon Sep 17 00:00:00 2001 From: eodean Date: Fri, 5 Jan 2018 08:53:40 -0800 Subject: [PATCH 010/318] Updated to address CR comments. Evaluate all elements in vector text to confirm non-zero length. --- DESCRIPTION | 3 ++- R/eml.R | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b51e749..9da2595 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,7 +4,8 @@ Version: 0.6.3 Authors@R: c( person("Bryce", "Mecum", email = "mecum@nceas.ucsb.edu", role = c("aut", "cre")), person("Matt", "Jones", email = "jones@nceas.ucsb.edu", role = "ctb"), - person("Jeanette", "Clark", email = "jclark@nceas.ucsb.edu", role = "ctb") + person("Jeanette", "Clark", email = "jclark@nceas.ucsb.edu", role = "ctb"), + person("Emily", "O'Dean", email="eodean10@gmail.com", role = "ctb") ) Description: This package provides a set of utility methods for uploading and editing data on the Arctic Data Catalog. diff --git a/R/eml.R b/R/eml.R index d1defc7..bed03df 100644 --- a/R/eml.R +++ b/R/eml.R @@ -660,11 +660,12 @@ eml_address <- function(delivery_points, city, administrative_area, postal_code) set_abstract <- function(doc, text) { stopifnot(is(doc, "eml")) stopifnot(is.character(text), - nchar(text) > 0) + length(text) > 0, + all(nchar(text)) > 0) - if (nchar(text) == 1) { + if (length(text) == 1) { doc@dataset@abstract <- new("abstract", .Data = new("TextType", .Data = "hi")) - } else if (nchar(text) > 1) { + } else if (length(text) > 1) { doc@dataset@abstract <- new("abstract", para = new("ListOfpara", lapply(text, function(x) new("para", x)))) } From 2ffba5ea22532fb5d84af1156821b7e0c6d0a231 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Fri, 5 Jan 2018 09:55:28 -0800 Subject: [PATCH 011/318] Added checksum update --- DESCRIPTION | 1 + R/editing.R | 2 ++ 2 files changed, 3 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index b51e749..ac85a41 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,6 +5,7 @@ Authors@R: c( person("Bryce", "Mecum", email = "mecum@nceas.ucsb.edu", role = c("aut", "cre")), person("Matt", "Jones", email = "jones@nceas.ucsb.edu", role = "ctb"), person("Jeanette", "Clark", email = "jclark@nceas.ucsb.edu", role = "ctb") + person("Dominic", "Mullen", email = "dmullen17@gmail.com", role = "ctb") ) Description: This package provides a set of utility methods for uploading and editing data on the Arctic Data Catalog. diff --git a/R/editing.R b/R/editing.R index 288188d..74f4f08 100644 --- a/R/editing.R +++ b/R/editing.R @@ -176,6 +176,8 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) eml@packageId <- new("xml_attribute", new_pid) path <- tempfile() EML::write_eml(eml, path) + # File changed - update checksum + sysmeta@checksum <- digest::digest(path, algo = "sha1", serialize = FALSE, file = TRUE) } # Make the update From 865d6652c160172b334af70c6a5bab2b862c9ed4 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 5 Jan 2018 16:04:29 -0900 Subject: [PATCH 012/318] Add note about PRs to readme --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ab2090b..e727b7b 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,8 @@ remotes::install_github("nceas/arcticdatautils") ## Contributing -Please submit suggestions or bugs as [Issues](https://github.com/NCEAS/arcticdatautils/issues). +- Please submit suggestions or bugs as [Issues](https://github.com/NCEAS/arcticdatautils/issues). +- Pull Requestss should target the `master` branch ## Testing From 36af628f337d1b1532eb117fecdbd0a9fc6386b7 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 8 Jan 2018 09:21:38 -0900 Subject: [PATCH 013/318] Add test for update_object's EML packageId update routine --- tests/testthat/test_editing.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index c0b9e7f..eaaf4a7 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -198,3 +198,20 @@ test_that("publish update returns an error if its arguments are malformed", { expect_error(publish_update(mn, metadata_pid = 1)) expect_error(publish_update(mn, metadata_pid = "a", resource_map_pid = "b", data_pids = list(1, 2, 3))) }) + +test_that("update_object updates the packageId for EML object updates", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + eml_pid <- create_dummy_metadata(mn) + eml_path <- tempfile(fileext = ".xml") + writeBin(dataone::getObject(mn, eml_pid), eml_path) + + new_pid <- update_object(mn, eml_pid, eml_path, format_id = format_eml()) + updated_eml_path <- tempfile(fileext = ".xml") + writeBin(dataone::getObject(mn, new_pid), updated_eml_path) + + doc <- xml2::read_xml(updated_eml_path) + expect_equal(new_pid, xml2::xml_attr(xml2::xml_root(doc), "packageId")) +}) From 4ddf7f3b1f1899064549abde60b6185954dfdc63 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 8 Jan 2018 11:56:27 -0900 Subject: [PATCH 014/318] Fix missing docs for update_object --- R/editing.R | 2 ++ man/update_object.Rd | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/R/editing.R b/R/editing.R index 74f4f08..7d883b0 100644 --- a/R/editing.R +++ b/R/editing.R @@ -116,6 +116,8 @@ publish_object <- function(mn, #' @param pid (character) The PID of the object to update. #' @param path (character) The full path to the file to update with. #' @param format_id (character) Optional. The format ID to set for the object. When not set, \code{\link{guess_format_id}} will be used to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}. +#' @param new_pid (character) Optional. Specify the PID for the new Object. Defaults to automatically generating a new, random UUID-style PID. +#' @param sid (character) Optiona. Specify a Series ID (SID) to use for the new Object. #' #' @return (character) The PID of the updated object. #' @export diff --git a/man/update_object.Rd b/man/update_object.Rd index 0dd8cb3..4a72e53 100644 --- a/man/update_object.Rd +++ b/man/update_object.Rd @@ -14,6 +14,10 @@ update_object(mn, pid, path, format_id = NULL, new_pid = NULL, sid = NULL) \item{path}{(character) The full path to the file to update with.} \item{format_id}{(character) Optional. The format ID to set for the object. When not set, \code{\link{guess_format_id}} will be used to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}.} + +\item{new_pid}{(character) Optional. Specify the PID for the new Object. Defaults to automatically generating a new, random UUID-style PID.} + +\item{sid}{(character) Optiona. Specify a Series ID (SID) to use for the new Object.} } \value{ (character) The PID of the updated object. From 74a497ef1aff86a4cec33e21dd5421ef4fecbcfa Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 8 Jan 2018 11:56:43 -0900 Subject: [PATCH 015/318] Re-roxygenize docs for filter_packaging_statements --- man/filter_packaging_statements.Rd | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/man/filter_packaging_statements.Rd b/man/filter_packaging_statements.Rd index 86229b9..77e94e0 100644 --- a/man/filter_packaging_statements.Rd +++ b/man/filter_packaging_statements.Rd @@ -13,17 +13,11 @@ filter_packaging_statements(statements) (data.frame) The filtered Statements } \description{ +This is intended to be called after `datapack::getTriples` has been called +on a ResourceMap. +} +\details{ This function was written specifically for the case of updating a resource map while preserving any extra statements that have been added such as PROV statements. Statements are filtered according to these rules: } -\details{ -1. If the subject or object is the ore:ResourceMap resource -2. If the subject or object is the ore:Aggregation resource -3. If the predicate is cito:documents or cito:isDocumentedBy -4. Once filters 1-3 have been executed, any remaining triples are considered - for removal if they look like dangling dc:identifier statements - -The consequence of filter 4 is that dc:identifier statements are left in if -they are still in use by another statement -} From 673713b9729a86bf252f4398866e802196f0d803 Mon Sep 17 00:00:00 2001 From: eodean Date: Tue, 9 Jan 2018 16:52:35 -0800 Subject: [PATCH 016/318] Adding helper for setting project data. --- R/eml.R | 79 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/R/eml.R b/R/eml.R index bed03df..d01c03a 100644 --- a/R/eml.R +++ b/R/eml.R @@ -487,6 +487,22 @@ eml_metadata_provider <- function(...) { eml_associated_party <- function(...) { eml_party("associatedParty", ...) } + +#' Create an EML personnel +#' +#' See \code{\link{eml_party}} for details. +#' +#' @param ... Arguments passed on to eml_party +#' +#' @return (personnel) The new personnel +#' @export +#' +#' @examples +#' eml_personnel("test", "user", email = "test@user.com", role = "Principal Investigator") +eml_personnel <- function(...) { + eml_party("personnel", ...) +} + #' Create an EML individualName section #' #' @param given_names (character) One or more given names. @@ -527,55 +543,46 @@ eml_individual_name <- function(given_names=NULL, sur_name) { #' Note: This is super-limited right now. #' #' @param title (character) Title of the project. +#' @param abstract (character) Project abstract. +#' @param personnelList (list of eml_personnel objects) Personnel involved with the project. #' @param awards (character) One or more awards for the project. -#' @param first (character) First name of the person with role `role`. -#' @param last (character) Last name of the person with role `role`. -#' @param organizations (character) Optional. One or more organization strings. -#' @param role (character) Optional. Specify an alternate role. #' #' @return (project) The new project section. #' @export #' #' @examples -#' eml_project("Some title", "51231", "Some", "User") -eml_project <- function(title, awards, first, last, organizations = NULL, role = "originator") { - stopifnot(all(sapply(c(title, awards, first, last), is.character)), - all(lengths(c(title, awards, first, last)) > 0)) +#' eml_project("Some title", "Abstract", "list(personnel1, personnel2)", "#1 Best Human Award") +eml_project <- function(title, abstract, personnelList, awards=NULL) { + + # stopifnot(all(sapply(c(title, awards, first, last), is.character)), + # all(lengths(c(title, awards, first, last)) > 0)) - # project + # Project project <- new("project") - # title - title_ele <- new("title") - title_ele@.Data <- title - project@title <- new("ListOftitle", list(title_ele)) + # Title + project@title <- c(new("title", .Data = title)) - # personnel + # Abstract + # Apparently we can accept many abstracts though, so I guess this needs to be changed? + project@abstract <- new("abstract", .Data = abstract) + + # Personnel personnel <- new("personnel") - - # individualName - personnel@individualName <- new("ListOfindividualName", list(eml_individual_name(first, last))) - - # organizationName - if (!is.null(organizations)) { - organizations <- lapply(organizations, function(org) { o <- new("organizationName"); o@.Data <- org; o } ) - personnel@organizationName <- new("ListOforganizationName", organizations) + project@personnel <- new("ListOfpersonnel", personnelList) + + # Funding + if(!is.null(awards)) + { + funding_paras <- lapply(awards, function(awd) { + a <- new("para"); + a@.Data <- list(awd); + a@.Data <- list(xml2::xml_new_root("para", as.character(awd))) + a + }) + project@funding@para <- new("ListOfpara", funding_paras) } - # role - personnel@role <- new("ListOfrole", list(new("role", role))) - - project@personnel <- new("ListOfpersonnel", list(personnel)) - - # funding - funding_paras <- lapply(awards, function(awd) { - a <- new("para"); - a@.Data <- list(awd); - a@.Data <- list(xml2::xml_new_root("para", as.character(awd))) - a - }) - project@funding@para <- new("ListOfpara", funding_paras) - project } From cf2086aa007fe874482e20bae5b9aebbd6d8b7fd Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Wed, 10 Jan 2018 13:25:32 -0800 Subject: [PATCH 017/318] Modified publish_update to remove eml@access elements and accompanying unit test --- R/editing.R | 5 +++++ tests/testthat/test_editing.R | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/R/editing.R b/R/editing.R index 7d883b0..f8a8662 100644 --- a/R/editing.R +++ b/R/editing.R @@ -378,6 +378,11 @@ publish_update <- function(mn, eml@system <- new("xml_attribute", "https://arcticdata.io") } + # Replace access if needed + if (length(eml@access@allow)) { + eml@access <- new("access") + } + # Write out the document to disk. We do this in part because # set_other_entities takes a path to the doc. eml_path <- tempfile() diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index eaaf4a7..3074b27 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -215,3 +215,25 @@ test_that("update_object updates the packageId for EML object updates", { doc <- xml2::read_xml(updated_eml_path) expect_equal(new_pid, xml2::xml_attr(xml2::xml_root(doc), "packageId")) }) + +test_that("publish_update removes the deprecated eml@access element", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + pids <- create_dummy_package(mn) + eml_path <- tempfile(fileext = ".xml") + writeBin(dataone::getObject(mn, pids$metadata), eml_path) + + eml <- EML::read_eml(eml_path) + # Populate dummy access element + eml@access@allow <- c(new("allow", .Data = "hello")) + write_eml(eml, eml_path) + + new_pids <- publish_update(mn, pids$metadata, pids$resource_map, metadata_path = eml_path) + updated_eml_path <- tempfile(fileext = ".xml") + writeBin(dataone::getObject(mn, new_pids$metadata), updated_eml_path) + + new_eml <- EML::read_eml(updated_eml_path) + expect_equal(0, length(new_eml@access@allow)) +}) From a61172be6e4b8a9caa0a27eae497c14cb28e8cd3 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Wed, 10 Jan 2018 13:33:45 -0800 Subject: [PATCH 018/318] Modified publish_update to only remove eml@access if metadata_path isn't null --- R/editing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/editing.R b/R/editing.R index f8a8662..c385254 100644 --- a/R/editing.R +++ b/R/editing.R @@ -379,7 +379,7 @@ publish_update <- function(mn, } # Replace access if needed - if (length(eml@access@allow)) { + if (length(eml@access@allow) & (!is.null(metadata_path))) { eml@access <- new("access") } From 7ad26cdc590a3413371a452b34c88d3f3490c872 Mon Sep 17 00:00:00 2001 From: eodean Date: Wed, 10 Jan 2018 15:10:31 -0800 Subject: [PATCH 019/318] Updated eml_party to include case for personnel. Added error checking logic for personnel passed to eml_project. --- R/eml.R | 64 +++++++++++++++++++++++++++------------ tests/testthat/test_eml.R | 30 ++++++++++++++---- 2 files changed, 69 insertions(+), 25 deletions(-) diff --git a/R/eml.R b/R/eml.R index d01c03a..17cba4e 100644 --- a/R/eml.R +++ b/R/eml.R @@ -416,12 +416,16 @@ eml_party <- function(type="associatedParty", # Role if (!is.null(role)) { - if (type != "associatedParty") { + if (type != "associatedParty" && type != "personnel") { stop(call. = FALSE, - paste0("Setting a role is only valid on an associatedParty, not a ", type, ".")) + paste0("Setting a role is only valid on an associatedParty or personnel, not a ", type, ".")) + } + + if (type == "personnel") { + party@role <- new("ListOfrole", list(new("role", role))) + } else { + party@role <- new("role", .Data = role) } - - party@role <- new("role", .Data = role) } party @@ -542,20 +546,21 @@ eml_individual_name <- function(given_names=NULL, sur_name) { #' #' Note: This is super-limited right now. #' -#' @param title (character) Title of the project. +#' @param title (character) Title of the project (Required). +#' @param personnelList (list of eml_personnel objects) Personnel involved with the project (Originator required). #' @param abstract (character) Project abstract. -#' @param personnelList (list of eml_personnel objects) Personnel involved with the project. -#' @param awards (character) One or more awards for the project. +#' @param funding (character) Funding sources for the project such as grant and contract numbers. #' #' @return (project) The new project section. #' @export #' #' @examples -#' eml_project("Some title", "Abstract", "list(personnel1, personnel2)", "#1 Best Human Award") -eml_project <- function(title, abstract, personnelList, awards=NULL) { +#' eml_project("Some title", list(personnel1, personnel2), "Abstract", "#1 Best Scientist Award") +eml_project <- function(title, personnelList, abstract=NULL, funding=NULL) { - # stopifnot(all(sapply(c(title, awards, first, last), is.character)), - # all(lengths(c(title, awards, first, last)) > 0)) + stopifnot(is.character(title), + nchar(title) > 0) + stopifnot(length(personnelList) > 0) # Project project <- new("project") @@ -563,18 +568,39 @@ eml_project <- function(title, abstract, personnelList, awards=NULL) { # Title project@title <- c(new("title", .Data = title)) - # Abstract - # Apparently we can accept many abstracts though, so I guess this needs to be changed? - project@abstract <- new("abstract", .Data = abstract) - # Personnel + if(!all(sapply(personnelList, function(x) { is(x, "personnel") }))) { + stop(call. = FALSE, + "All personnel in the list must be of type 'personnel'") + } + + if(!all(sapply(personnelList, function(x) { length(x@role) > 0 }))) { + stop(call. = FALSE, + "You must specify a role for each personnel on the project.") + } + + if (!any(sapply(personnelList, function(x) { x@role == "originator" }))) { + stop(call. = FALSE, + "You must specify at least one personnel with role type originator on the project.") + } + personnel <- new("personnel") project@personnel <- new("ListOfpersonnel", personnelList) - + + # Abstract + if(!is.null(abstract)) { + project@abstract <- new("abstract", .Data = abstract) + + if (length(abstract) == 1) { + project@abstract <- new("abstract", .Data = new("TextType", .Data = abstract)) + } else if (length(abstract) > 1) { + project@abstract <- new("abstract", para = new("ListOfpara", lapply(abstract, function(x) new("para", x)))) + } + } + # Funding - if(!is.null(awards)) - { - funding_paras <- lapply(awards, function(awd) { + if(!is.null(funding)) { + funding_paras <- lapply(funding, function(awd) { a <- new("para"); a@.Data <- list(awd); a@.Data <- list(xml2::xml_new_root("para", as.character(awd))) diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 5882cd7..6cb9e85 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -80,18 +80,36 @@ test_that("a contact can be created", { expect_equal(contact@individualName[[1]]@surName@.Data, "user") }) +test_that("a personnel can be created", { + personnel <- eml_personnel("test", "user") + + expect_is(personnel, "personnel") + expect_equal(contact@individualName[[1]]@givenName[[1]]@.Data, "test") + expect_equal(contact@individualName[[1]]@surName@.Data, "user") +}) -test_that("a project can be created", { - project <- eml_project("some title", "12345", "a", "user") +test_that("a project can be created", { + test_personnel_1 <- eml_personnel(given_names="A", + sur_name="User", + organization="NCEAS", + role="originator") + test_personnel_2 <- eml_personnel(given_names="Testy", + sur_name="Mactesterson", + organization="A Test Org", + role="user") + + project <- eml_project("some title", list(test_personnel_1, test_personnel_2), "This is a test abstract", "I won an award, yay") + expect_is(project, "project") expect_equal(project@title[[1]]@.Data, "some title") - expect_equal(project@personnel[[1]]@individualName[[1]]@givenName[[1]]@.Data, "a") - expect_equal(project@personnel[[1]]@individualName[[1]]@surName@.Data, "user") - expect_equal(xml2::xml_text(project@funding@para[[1]]@.Data[[1]]), "12345") + expect_equal(project@personnel[[1]]@individualName[[1]]@givenName[[1]]@.Data, "A") + expect_equal(project@personnel[[1]]@individualName[[1]]@surName@.Data, "User") + expect_equal(project@personnel[[1]]@organizationName[[1]]@.Data, "NCEAS") + expect_equal(project@personnel[[1]]@role, "originator") + expect_equal(xml2::xml_text(project@funding@para[[1]]@.Data[[1]]), "I won an award, yay") }) - test_that("a project can be created with multiple awards", { project <- eml_project("some title", c("12345", "54321"), "a", "user") From cdd9dac7920572a9cd4bbf4ad8579f719eda24af Mon Sep 17 00:00:00 2001 From: eodean Date: Wed, 10 Jan 2018 16:02:02 -0800 Subject: [PATCH 020/318] Added logic for multiple roles. Committing before entering the realm of studyAreaDescription... --- R/eml.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/eml.R b/R/eml.R index 17cba4e..57c7d67 100644 --- a/R/eml.R +++ b/R/eml.R @@ -422,7 +422,8 @@ eml_party <- function(type="associatedParty", } if (type == "personnel") { - party@role <- new("ListOfrole", list(new("role", role))) + roles <- lapply(role, function(x) { r <- new("role"); r@.Data <- x; r } ) + party@role <- new("ListOfrole", roles) } else { party@role <- new("role", .Data = role) } @@ -541,6 +542,7 @@ eml_individual_name <- function(given_names=NULL, sur_name) { indiv_name } +eml_study_area_description #' Create an eml-project section. #' From a47f154c6651796645de303593706cb9c561c2b4 Mon Sep 17 00:00:00 2001 From: eodean Date: Thu, 11 Jan 2018 13:25:54 -0800 Subject: [PATCH 021/318] Added arguments for studyAreaDescription and designDescription. Removed validation logic for incorrect test (role is not required to be "originator"). --- R/eml.R | 64 +++++++++++++++++++++++---------------- tests/testthat/test_eml.R | 49 +++++++++++++++--------------- 2 files changed, 62 insertions(+), 51 deletions(-) diff --git a/R/eml.R b/R/eml.R index 57c7d67..206b751 100644 --- a/R/eml.R +++ b/R/eml.R @@ -416,11 +416,13 @@ eml_party <- function(type="associatedParty", # Role if (!is.null(role)) { + # Only allow roles to be set if type is associatedParty or personnel if (type != "associatedParty" && type != "personnel") { stop(call. = FALSE, paste0("Setting a role is only valid on an associatedParty or personnel, not a ", type, ".")) } + # If type is personnel, role needs to be ListOfrole, otherwise just role if (type == "personnel") { roles <- lapply(role, function(x) { r <- new("role"); r@.Data <- x; r } ) party@role <- new("ListOfrole", roles) @@ -542,23 +544,24 @@ eml_individual_name <- function(given_names=NULL, sur_name) { indiv_name } -eml_study_area_description - #' Create an eml-project section. #' -#' Note: This is super-limited right now. +#' Note - studyAreaDescription, designDescription, and relatedProject are not fully fleshed out. Need to pass these objects in directly if you want to use them. #' #' @param title (character) Title of the project (Required). -#' @param personnelList (list of eml_personnel objects) Personnel involved with the project (Originator required). +#' @param personnelList (list of eml_personnel) Personnel involved with the project (role:originator required). #' @param abstract (character) Project abstract. #' @param funding (character) Funding sources for the project such as grant and contract numbers. +#' @param studyAreaDescription (studyAreaDescription) +#' @param designDescription (designDescription) +#' @param relatedProject (project) #' #' @return (project) The new project section. #' @export #' #' @examples #' eml_project("Some title", list(personnel1, personnel2), "Abstract", "#1 Best Scientist Award") -eml_project <- function(title, personnelList, abstract=NULL, funding=NULL) { +eml_project <- function(title, personnelList, abstract=NULL, funding=NULL, studyAreaDescription=NULL, designDescription=NULL, relatedProject=NULL) { stopifnot(is.character(title), nchar(title) > 0) @@ -580,35 +583,44 @@ eml_project <- function(title, personnelList, abstract=NULL, funding=NULL) { stop(call. = FALSE, "You must specify a role for each personnel on the project.") } - - if (!any(sapply(personnelList, function(x) { x@role == "originator" }))) { - stop(call. = FALSE, - "You must specify at least one personnel with role type originator on the project.") - } - personnel <- new("personnel") - project@personnel <- new("ListOfpersonnel", personnelList) + project@personnel <- new("ListOfpersonnel", .Data = personnelList) - # Abstract + # Abstract if(!is.null(abstract)) { - project@abstract <- new("abstract", .Data = abstract) - - if (length(abstract) == 1) { - project@abstract <- new("abstract", .Data = new("TextType", .Data = abstract)) - } else if (length(abstract) > 1) { - project@abstract <- new("abstract", para = new("ListOfpara", lapply(abstract, function(x) new("para", x)))) - } + abstract_paras <- lapply(abstract, function(x) { + para <- new("para"); + para@.Data <- list(x); + para@.Data <- list(xml2::xml_new_root("para", as.character(x))) + para + }) + project@abstract@para <- new("ListOfpara", .Data = abstract_paras) } # Funding if(!is.null(funding)) { - funding_paras <- lapply(funding, function(awd) { - a <- new("para"); - a@.Data <- list(awd); - a@.Data <- list(xml2::xml_new_root("para", as.character(awd))) - a + funding_paras <- lapply(funding, function(x) { + para <- new("para"); + para@.Data <- list(x); + para@.Data <- list(xml2::xml_new_root("para", as.character(x))) + para }) - project@funding@para <- new("ListOfpara", funding_paras) + project@funding@para <- new("ListOfpara", .Data = funding_paras) + } + + # Study area description + if(!is.null(studyAreaDescription)) { + project@studyAreaDescription <- studyAreaDescription + } + + # Design description + if(!is.null(designDescription)) { + project@designDescription <- designDescription + } + + # Related Project + if(!is.null(relatedProject)) { + project@relatedProject <- relatedProject } project diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 6cb9e85..d812a38 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -81,25 +81,21 @@ test_that("a contact can be created", { }) test_that("a personnel can be created", { - personnel <- eml_personnel("test", "user") + personnel <- eml_personnel("test", "user", role=c("author", "originator")) expect_is(personnel, "personnel") expect_equal(contact@individualName[[1]]@givenName[[1]]@.Data, "test") expect_equal(contact@individualName[[1]]@surName@.Data, "user") + expect_equal(contact@individualName[[1]]@role[[1]]@.Data, "author") }) - test_that("a project can be created", { - test_personnel_1 <- eml_personnel(given_names="A", - sur_name="User", - organization="NCEAS", - role="originator") - test_personnel_2 <- eml_personnel(given_names="Testy", - sur_name="Mactesterson", - organization="A Test Org", - role="user") + test_personnel_1 <- eml_personnel(given_names="A", sur_name="User", organization="NCEAS", role="originator") - project <- eml_project("some title", list(test_personnel_1, test_personnel_2), "This is a test abstract", "I won an award, yay") + project <- eml_project("some title", + list(test_personnel_1), + "This is a test abstract", + "I won an award, yay") expect_is(project, "project") expect_equal(project@title[[1]]@.Data, "some title") @@ -110,20 +106,23 @@ test_that("a project can be created", { expect_equal(xml2::xml_text(project@funding@para[[1]]@.Data[[1]]), "I won an award, yay") }) -test_that("a project can be created with multiple awards", { - project <- eml_project("some title", c("12345", "54321"), "a", "user") - - expect_length(project@funding@para, 2) - expect_equal(xml2::xml_text(project@funding@para[[1]]@.Data[[1]]), "12345") - expect_equal(xml2::xml_text(project@funding@para[[2]]@.Data[[1]]), "54321") -}) - -test_that("a project can be created with multiple organizations", { - project <- eml_project("some title", "12345", "a", "user", organizations = c("org1", "org2")) - - expect_length(project@personnel[[1]]@organizationName, 2) - expect_equal(project@personnel[[1]]@organizationName[[1]]@.Data, "org1") - expect_equal(project@personnel[[1]]@organizationName[[2]]@.Data, "org2") +test_that("a project can be created with multiple personnel, an abstract can be created with multiple paragraphs, awards with multiple awards", { + test_personnel_1 <- eml_personnel(given_names="A", sur_name="User", organization="NCEAS", role="originator") + test_personnel_2 <- eml_personnel(given_names="Testy", sur_name="Mactesterson", organization="A Test Org", role=c("user", "author")) + + project <- eml_project("some title", + list(test_personnel_1, test_personnel_2), + c("This is a test abstract", "This is the second paragraph"), + c("I won an award, yay", "I won a second award, wow")) + + expect_is(project, "project") + expect_equal(project@title[[1]]@.Data, "some title") + expect_equal(project@personnel[[2]]@individualName[[1]]@givenName[[1]]@.Data, "Testy") + expect_equal(project@personnel[[2]]@individualName[[1]]@surName@.Data, "Macterterson") + expect_equal(project@personnel[[2]]@organizationName[[1]]@.Data, "A Test Org") + expect_equal(project@personnel[[2]]@role[[2]], "author") + expect_equal(xml2::xml_text(project@abstract@para[[2]]@.Data[[1]]), "This is the second paragraph") + expect_equal(xml2::xml_text(project@funding@para[[2]]@.Data[[1]]), "I won a second award, wow") }) test_that("an other entity can be added from a pid", { From dea8eed40db69727e0a70bced12ee7018416c35b Mon Sep 17 00:00:00 2001 From: eodean Date: Thu, 11 Jan 2018 17:09:21 -0800 Subject: [PATCH 022/318] Nicer spacing. --- R/eml.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/eml.R b/R/eml.R index 206b751..42f75ae 100644 --- a/R/eml.R +++ b/R/eml.R @@ -561,7 +561,7 @@ eml_individual_name <- function(given_names=NULL, sur_name) { #' #' @examples #' eml_project("Some title", list(personnel1, personnel2), "Abstract", "#1 Best Scientist Award") -eml_project <- function(title, personnelList, abstract=NULL, funding=NULL, studyAreaDescription=NULL, designDescription=NULL, relatedProject=NULL) { +eml_project <- function(title, personnelList, abstract = NULL, funding = NULL, studyAreaDescription = NULL, designDescription = NULL, relatedProject = NULL) { stopifnot(is.character(title), nchar(title) > 0) From b9ab1ffa953f8bc5248028711eaad37c984d794c Mon Sep 17 00:00:00 2001 From: eodean Date: Fri, 12 Jan 2018 11:17:43 -0800 Subject: [PATCH 023/318] Addressed CR comments. --- R/eml.R | 45 +++++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/R/eml.R b/R/eml.R index 42f75ae..7848bf7 100644 --- a/R/eml.R +++ b/R/eml.R @@ -424,10 +424,9 @@ eml_party <- function(type="associatedParty", # If type is personnel, role needs to be ListOfrole, otherwise just role if (type == "personnel") { - roles <- lapply(role, function(x) { r <- new("role"); r@.Data <- x; r } ) - party@role <- new("ListOfrole", roles) + party@role <- as(lapply(role, as, Class = "role"), "ListOfrole") } else { - party@role <- new("role", .Data = role) + party@role <- as(role, as, Class = "role") } } @@ -506,8 +505,13 @@ eml_associated_party <- function(...) { #' #' @examples #' eml_personnel("test", "user", email = "test@user.com", role = "Principal Investigator") -eml_personnel <- function(...) { - eml_party("personnel", ...) +eml_personnel <- function(role = NULL, ...) { + if(is.null(role)) { + stop(call. = FALSE, + "You must specify a role for a personnel.") + } + + eml_party("personnel", role = role, ...) } #' Create an EML individualName section @@ -549,9 +553,9 @@ eml_individual_name <- function(given_names=NULL, sur_name) { #' Note - studyAreaDescription, designDescription, and relatedProject are not fully fleshed out. Need to pass these objects in directly if you want to use them. #' #' @param title (character) Title of the project (Required). -#' @param personnelList (list of eml_personnel) Personnel involved with the project (role:originator required). -#' @param abstract (character) Project abstract. -#' @param funding (character) Funding sources for the project such as grant and contract numbers. +#' @param personnelList (list of personnel) Personnel involved with the project. +#' @param abstract (character) Project abstract. Can pass as a character vector for separate paragraphs. +#' @param funding (character) Funding sources for the project such as grant and contract numbers. Can pass as a character vector for separate paragraphs. #' @param studyAreaDescription (studyAreaDescription) #' @param designDescription (designDescription) #' @param relatedProject (project) @@ -560,7 +564,7 @@ eml_individual_name <- function(given_names=NULL, sur_name) { #' @export #' #' @examples -#' eml_project("Some title", list(personnel1, personnel2), "Abstract", "#1 Best Scientist Award") +#' eml_project("Some title", list(personnel1, personnel2), c("Abstract paragraph 1", "Abstract paragraph 2"), "#1 Best Scientist Award") eml_project <- function(title, personnelList, abstract = NULL, funding = NULL, studyAreaDescription = NULL, designDescription = NULL, relatedProject = NULL) { stopifnot(is.character(title), @@ -571,41 +575,30 @@ eml_project <- function(title, personnelList, abstract = NULL, funding = NULL, s project <- new("project") # Title - project@title <- c(new("title", .Data = title)) + project@title <- c(as(title, "title")) # Personnel if(!all(sapply(personnelList, function(x) { is(x, "personnel") }))) { stop(call. = FALSE, "All personnel in the list must be of type 'personnel'") } - - if(!all(sapply(personnelList, function(x) { length(x@role) > 0 }))) { - stop(call. = FALSE, - "You must specify a role for each personnel on the project.") - } - project@personnel <- new("ListOfpersonnel", .Data = personnelList) + project@personnel <- as(personnelList, "ListOfpersonnel") # Abstract if(!is.null(abstract)) { abstract_paras <- lapply(abstract, function(x) { - para <- new("para"); - para@.Data <- list(x); - para@.Data <- list(xml2::xml_new_root("para", as.character(x))) - para + as(list(xml2::xml_new_root("para", as.character(x))), "para") }) - project@abstract@para <- new("ListOfpara", .Data = abstract_paras) + project@abstract@para <- as(abstract_paras, "ListOfpara") } # Funding if(!is.null(funding)) { funding_paras <- lapply(funding, function(x) { - para <- new("para"); - para@.Data <- list(x); - para@.Data <- list(xml2::xml_new_root("para", as.character(x))) - para + as(list(xml2::xml_new_root("para", as.character(x))), "para") }) - project@funding@para <- new("ListOfpara", .Data = funding_paras) + project@funding@para <- as(funding_paras, "ListOfpara") } # Study area description From cdd3dcac7f684f8fee817cd24c0dc484dd3f53d9 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Fri, 12 Jan 2018 14:23:48 -0800 Subject: [PATCH 024/318] Check to see if sysmeta fileName is already equal to user input. --- R/editing.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/editing.R b/R/editing.R index c385254..82b931d 100644 --- a/R/editing.R +++ b/R/editing.R @@ -715,6 +715,11 @@ set_file_name <- function(mn, pid, name) { nchar(name) > 0) sysmeta <- dataone::getSystemMetadata(mn, pid) + if (!is.na(sysmeta@fileName)) { + if (sysmeta@fileName == name) + stop(paste0("fileName for object ", pid, "is already set to: ", name)) + } + sysmeta@fileName <- name dataone::updateSystemMetadata(mn, pid, sysmeta) } From b528adde412994e48b6acf98f75b9ce2f463b76a Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Fri, 12 Jan 2018 15:27:44 -0800 Subject: [PATCH 025/318] Added whitespace to previous commit --- R/editing.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/editing.R b/R/editing.R index 82b931d..d5b844e 100644 --- a/R/editing.R +++ b/R/editing.R @@ -715,6 +715,7 @@ set_file_name <- function(mn, pid, name) { nchar(name) > 0) sysmeta <- dataone::getSystemMetadata(mn, pid) + if (!is.na(sysmeta@fileName)) { if (sysmeta@fileName == name) stop(paste0("fileName for object ", pid, "is already set to: ", name)) From badbfb77a0a320c7c47f86634faac29abc81f8df Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 15 Jan 2018 17:29:37 -0900 Subject: [PATCH 026/318] Add Travis CI support --- .Rbuildignore | 1 + .travis.yml | 5 +++++ README.md | 2 ++ 3 files changed, 8 insertions(+) create mode 100644 .travis.yml diff --git a/.Rbuildignore b/.Rbuildignore index 5b5cb9d..dbdfa42 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,3 +2,4 @@ ^\.Rproj\.user$ ^etc$ ^docs$ +^\.travis\.yml$ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..8d139ac --- /dev/null +++ b/.travis.yml @@ -0,0 +1,5 @@ +# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r + +language: R +sudo: false +cache: packages diff --git a/README.md b/README.md index e727b7b..0df6e15 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # arcticadatautils +[![Travis build status](https://travis-ci.org/NCEAS/arcticdatautils.svg?branch=master)](https://travis-ci.org/NCEAS/arcticdatautils) + The `articadatautils` R package contains code for doing lots of useful stuff that's too specific for the [dataone](https://github.com/DataONEorg/rdataone) package: - Inserting large numbers of files into Metacat From 42dd941eb1c76b56e3c86ab39d22b459ccb2d77b Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 15 Jan 2018 18:25:56 -0900 Subject: [PATCH 027/318] Add librdf0-dev to travis deps --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 8d139ac..0c56951 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,3 +3,6 @@ language: R sudo: false cache: packages +before_install: + - sudo apt-get -qq update + - sudo apt-get install -y librdf0-dev From 67c0f639bad27e60eb8f55a0da03f4c1d1db2125 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 15 Jan 2018 22:02:12 -0900 Subject: [PATCH 028/318] Add libnetcdf to travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 0c56951..9732c91 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,4 +5,4 @@ sudo: false cache: packages before_install: - sudo apt-get -qq update - - sudo apt-get install -y librdf0-dev + - sudo apt-get install -y librdf0-dev libnetcdf-dev From afcb5d3e99621f1edaf7b85fd2ece4787f0683f2 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 15 Jan 2018 23:15:59 -0900 Subject: [PATCH 029/318] Add matrix build to travis --- .travis.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9732c91..7df42c8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,10 @@ # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r -language: R +language: r +r: + - oldrel + - release + - devel sudo: false cache: packages before_install: From c725bf1535ece9793ed9a26e77d8eb68f7b24e1e Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 15 Jan 2018 23:32:23 -0900 Subject: [PATCH 030/318] Switch redland/ncdf4 to binary install in travis --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 7df42c8..4c090fa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,9 @@ r: - devel sudo: false cache: packages +r_binary_packages: + - redland + - ncdf4 before_install: - sudo apt-get -qq update - sudo apt-get install -y librdf0-dev libnetcdf-dev From 9b8da0be27b6c8e8caea4f57afbdf774ebdbbea5 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 16 Jan 2018 00:05:40 -0900 Subject: [PATCH 031/318] More travis gymnastics --- .travis.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4c090fa..91ebdfc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,9 +7,6 @@ r: - devel sudo: false cache: packages -r_binary_packages: - - redland - - ncdf4 before_install: - sudo apt-get -qq update - - sudo apt-get install -y librdf0-dev libnetcdf-dev + - sudo apt-get install -y librdf0-dev libnetcdf-dev r-cran-ncdf4 From c38f3916e8b11ca7fcb5cad2cfaf017be95e6bdd Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Wed, 17 Jan 2018 13:34:54 -0800 Subject: [PATCH 032/318] remove resource_map_pid from parent_child_pids --- R/editing.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/editing.R b/R/editing.R index d5b844e..5467816 100644 --- a/R/editing.R +++ b/R/editing.R @@ -287,6 +287,14 @@ publish_update <- function(mn, stopifnot(all(is.character(parent_child_pids))) } + # Check to see if the obsoleted package is in the list of parent_child_pids + # If it is notify the user and remove it from the list + if (resource_map_pid %in% parent_child_pids) { + message("Removing the old resource map from the list of child PIDs in the parent package.") + resource_map_pid_index <- which(resource_map_pid == parent_child_pids) + parent_child_pids <- parent_child_pids[-resource_map_pid_index] + } + all_pids <- c(metadata_pid, resource_map_pid, data_pids, child_pids, identifier, parent_resmap_pid, parent_metadata_pid, parent_data_pids, parent_child_pids) From df0495b502231195a2b37ecfeddbfe6a77bcabb8 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 19 Jan 2018 11:36:49 -0900 Subject: [PATCH 033/318] Add note to PR about contributing and PRs --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0df6e15..2ca265f 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,14 @@ remotes::install_github("nceas/arcticdatautils") ## Contributing - Please submit suggestions or bugs as [Issues](https://github.com/NCEAS/arcticdatautils/issues). -- Pull Requestss should target the `master` branch +- Pull Requestss (PR) should target the `master` branch +- Before submitting a PR, please: + - Re-document and commit any `*.Rd` file changes + > `devtools::document()` + - R CMD CHECK and fix any issues related to your changes + > `devtools::check()` + - Run the tests and make sure they all pass + > `devtools::test()` ## Testing From bb20ad3d1a01a773df33eef18b0fd76d15d9479d Mon Sep 17 00:00:00 2001 From: eodean Date: Fri, 19 Jan 2018 13:48:41 -0800 Subject: [PATCH 034/318] Fixed tests and built .Rds for projects. --- NAMESPACE | 1 + man/eml_personnel.Rd | 20 ++++++++++++++++++++ man/eml_project.Rd | 23 +++++++++++++---------- tests/testthat/test_eml.R | 14 +++++++------- 4 files changed, 41 insertions(+), 17 deletions(-) create mode 100644 man/eml_personnel.Rd diff --git a/NAMESPACE b/NAMESPACE index 2703443..cfff53e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,6 +26,7 @@ export(eml_creator) export(eml_individual_name) export(eml_metadata_provider) export(eml_party) +export(eml_personnel) export(eml_project) export(eml_validate_attributes) export(env_get) diff --git a/man/eml_personnel.Rd b/man/eml_personnel.Rd new file mode 100644 index 0000000..a749e9d --- /dev/null +++ b/man/eml_personnel.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_personnel} +\alias{eml_personnel} +\title{Create an EML personnel} +\usage{ +eml_personnel(role = NULL, ...) +} +\arguments{ +\item{...}{Arguments passed on to eml_party} +} +\value{ +(personnel) The new personnel +} +\description{ +See \code{\link{eml_party}} for details. +} +\examples{ +eml_personnel("test", "user", email = "test@user.com", role = "Principal Investigator") +} diff --git a/man/eml_project.Rd b/man/eml_project.Rd index 3a7c847..08ebd55 100644 --- a/man/eml_project.Rd +++ b/man/eml_project.Rd @@ -4,28 +4,31 @@ \alias{eml_project} \title{Create an eml-project section.} \usage{ -eml_project(title, awards, first, last, organizations = NULL, - role = "originator") +eml_project(title, personnelList, abstract = NULL, funding = NULL, + studyAreaDescription = NULL, designDescription = NULL, + relatedProject = NULL) } \arguments{ -\item{title}{(character) Title of the project.} +\item{title}{(character) Title of the project (Required).} -\item{awards}{(character) One or more awards for the project.} +\item{personnelList}{(list of personnel) Personnel involved with the project.} -\item{first}{(character) First name of the person with role `role`.} +\item{abstract}{(character) Project abstract. Can pass as a character vector for separate paragraphs.} -\item{last}{(character) Last name of the person with role `role`.} +\item{funding}{(character) Funding sources for the project such as grant and contract numbers. Can pass as a character vector for separate paragraphs.} -\item{organizations}{(character) Optional. One or more organization strings.} +\item{studyAreaDescription}{(studyAreaDescription)} -\item{role}{(character) Optional. Specify an alternate role.} +\item{designDescription}{(designDescription)} + +\item{relatedProject}{(project)} } \value{ (project) The new project section. } \description{ -Note: This is super-limited right now. +Note - studyAreaDescription, designDescription, and relatedProject are not fully fleshed out. Need to pass these objects in directly if you want to use them. } \examples{ -eml_project("Some title", "51231", "Some", "User") +eml_project("Some title", list(personnel1, personnel2), c("Abstract paragraph 1", "Abstract paragraph 2"), "#1 Best Scientist Award") } diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index d812a38..4d821e2 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -81,12 +81,12 @@ test_that("a contact can be created", { }) test_that("a personnel can be created", { - personnel <- eml_personnel("test", "user", role=c("author", "originator")) + personnel <- eml_personnel(given_names="test", sur_name="user", role="principalInvestigator") expect_is(personnel, "personnel") - expect_equal(contact@individualName[[1]]@givenName[[1]]@.Data, "test") - expect_equal(contact@individualName[[1]]@surName@.Data, "user") - expect_equal(contact@individualName[[1]]@role[[1]]@.Data, "author") + expect_equal(personnel@individualName[[1]]@givenName[[1]]@.Data, "test") + expect_equal(personnel@individualName[[1]]@surName@.Data, "user") + expect_equal(personnel@role[[1]]@.Data, "principalInvestigator") }) test_that("a project can be created", { @@ -102,7 +102,7 @@ test_that("a project can be created", { expect_equal(project@personnel[[1]]@individualName[[1]]@givenName[[1]]@.Data, "A") expect_equal(project@personnel[[1]]@individualName[[1]]@surName@.Data, "User") expect_equal(project@personnel[[1]]@organizationName[[1]]@.Data, "NCEAS") - expect_equal(project@personnel[[1]]@role, "originator") + expect_equal(project@personnel[[1]]@role[[1]]@.Data, "originator") expect_equal(xml2::xml_text(project@funding@para[[1]]@.Data[[1]]), "I won an award, yay") }) @@ -118,9 +118,9 @@ test_that("a project can be created with multiple personnel, an abstract can be expect_is(project, "project") expect_equal(project@title[[1]]@.Data, "some title") expect_equal(project@personnel[[2]]@individualName[[1]]@givenName[[1]]@.Data, "Testy") - expect_equal(project@personnel[[2]]@individualName[[1]]@surName@.Data, "Macterterson") + expect_equal(project@personnel[[2]]@individualName[[1]]@surName@.Data, "Mactesterson") expect_equal(project@personnel[[2]]@organizationName[[1]]@.Data, "A Test Org") - expect_equal(project@personnel[[2]]@role[[2]], "author") + expect_equal(project@personnel[[2]]@role[[2]]@.Data, "author") expect_equal(xml2::xml_text(project@abstract@para[[2]]@.Data[[1]]), "This is the second paragraph") expect_equal(xml2::xml_text(project@funding@para[[2]]@.Data[[1]]), "I won a second award, wow") }) From c006dcbdef55fa5d3709d99a2b2fc8cf0e9b5d63 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 19 Jan 2018 15:33:41 -0900 Subject: [PATCH 035/318] Disable checking github for the latest version GitHub's API rate-limiting will make this too confusing for users if they run into the rate limit. --- R/zzz.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/zzz.R b/R/zzz.R index b34d3f5..480cce0 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,6 +1,5 @@ .onLoad <- function(libname, pkgname) { load_d1_formats_list("https://cn.dataone.org/cn/v2/formats") - warn_if_outdated() invisible() } From 250cf58be2aa5d544fc75bb13ca6daad63a9c7cb Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 19 Jan 2018 15:34:02 -0900 Subject: [PATCH 036/318] Make call to `utils::packageVersion` explicit in code --- R/zzz.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/zzz.R b/R/zzz.R index 480cce0..e5aebe0 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -27,7 +27,7 @@ load_d1_formats_list <- function(url) { } warn_if_outdated <- function() { - installed_version <- packageVersion("arcticdatautils") + installed_version <- utils::packageVersion("arcticdatautils") req <- httr::GET("https://api.github.com/repos/nceas/arcticdatautils/releases/latest", httr::add_headers("Accept", "application/vnd.github.v3+json")) From 997849967cc9a345a4a274e229b5eb14001de02a Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 19 Jan 2018 15:34:52 -0900 Subject: [PATCH 037/318] Fix various whitespace and line length issues --- R/eml.R | 62 ++++++++++++++++++++++------------ man/eml_add_entities.Rd | 7 ++-- man/sysmeta_to_eml_physical.Rd | 6 ++-- 3 files changed, 50 insertions(+), 25 deletions(-) diff --git a/R/eml.R b/R/eml.R index 7848bf7..5076e73 100644 --- a/R/eml.R +++ b/R/eml.R @@ -134,10 +134,12 @@ sysmeta_to_other_entity <- function(sysmeta) { #' @export #' #' @examples -#' #' \dontrun { +#' \dontrun { #' # Generate EML physical objects for all the data in a package #' pkg <- get_package(mn, pid) -#' sm <- lapply(pkg$data, function(pid) { getSystemMetadata(mn, pid) }) +#' sm <- lapply(pkg$data, function(pid) { +#' getSystemMetadata(mn, pid) +#' }) #' sysmeta_to_eml_physical(sm) #' } sysmeta_to_eml_physical <- function(sysmeta) { @@ -421,7 +423,7 @@ eml_party <- function(type="associatedParty", stop(call. = FALSE, paste0("Setting a role is only valid on an associatedParty or personnel, not a ", type, ".")) } - + # If type is personnel, role needs to be ListOfrole, otherwise just role if (type == "personnel") { party@role <- as(lapply(role, as, Class = "role"), "ListOfrole") @@ -510,7 +512,7 @@ eml_personnel <- function(role = NULL, ...) { stop(call. = FALSE, "You must specify a role for a personnel.") } - + eml_party("personnel", role = role, ...) } @@ -550,23 +552,36 @@ eml_individual_name <- function(given_names=NULL, sur_name) { #' Create an eml-project section. #' -#' Note - studyAreaDescription, designDescription, and relatedProject are not fully fleshed out. Need to pass these objects in directly if you want to use them. +#' Note - studyAreaDescription, designDescription, and relatedProject are not +#' fully fleshed out. Need to pass these objects in directly if you want to use +#' them. #' #' @param title (character) Title of the project (Required). #' @param personnelList (list of personnel) Personnel involved with the project. -#' @param abstract (character) Project abstract. Can pass as a character vector for separate paragraphs. -#' @param funding (character) Funding sources for the project such as grant and contract numbers. Can pass as a character vector for separate paragraphs. -#' @param studyAreaDescription (studyAreaDescription) -#' @param designDescription (designDescription) -#' @param relatedProject (project) +#' @param abstract (character) Project abstract. Can pass as a character vector +#' for separate paragraphs. +#' @param funding (character) Funding sources for the project such as grant and +#' contract numbers. Can pass as a character vector for separate paragraphs. +#' @param studyAreaDescription (studyAreaDescription) +#' @param designDescription (designDescription) +#' @param relatedProject (project) #' #' @return (project) The new project section. #' @export #' #' @examples -#' eml_project("Some title", list(personnel1, personnel2), c("Abstract paragraph 1", "Abstract paragraph 2"), "#1 Best Scientist Award") -eml_project <- function(title, personnelList, abstract = NULL, funding = NULL, studyAreaDescription = NULL, designDescription = NULL, relatedProject = NULL) { - +#' eml_project("Some title", +#' list(personnel1, personnel2), +#' c("Abstract paragraph 1", "Abstract paragraph 2"), +#' "#1 Best Scientist Award") +eml_project <- function(title, + personnelList, + abstract = NULL, + funding = NULL, + studyAreaDescription = NULL, + designDescription = NULL, + relatedProject = NULL) { + stopifnot(is.character(title), nchar(title) > 0) stopifnot(length(personnelList) > 0) @@ -584,7 +599,7 @@ eml_project <- function(title, personnelList, abstract = NULL, funding = NULL, s } project@personnel <- as(personnelList, "ListOfpersonnel") - + # Abstract if(!is.null(abstract)) { abstract_paras <- lapply(abstract, function(x) { @@ -592,7 +607,7 @@ eml_project <- function(title, personnelList, abstract = NULL, funding = NULL, s }) project@abstract@para <- as(abstract_paras, "ListOfpara") } - + # Funding if(!is.null(funding)) { funding_paras <- lapply(funding, function(x) { @@ -600,17 +615,17 @@ eml_project <- function(title, personnelList, abstract = NULL, funding = NULL, s }) project@funding@para <- as(funding_paras, "ListOfpara") } - + # Study area description if(!is.null(studyAreaDescription)) { project@studyAreaDescription <- studyAreaDescription } - + # Design description if(!is.null(designDescription)) { project@designDescription <- designDescription } - + # Related Project if(!is.null(relatedProject)) { project@relatedProject <- relatedProject @@ -807,8 +822,11 @@ eml_validate_attributes <- function(attributes) { #' \dontrun{ #' types <- c("dataTable") #' paths <- list.files(., full.names = TRUE) # Get full paths to some files -#' pids <- vapply(paths, function(x) { paste0("urn:uuid:", uuid::UUIDgenerate()) }, "") # Generate some UUID PIDs -#' format_ids <- guess_format_id(paths) # Try to guess format IDs, you should check this afterwards +#' pids <- vapply(paths, function(x) { +#' paste0("urn:uuid:", uuid::UUIDgenerate()) +#' }, "") # Generate some UUID PIDs +#' Try to guess format IDs, you should check this afterwards +#' format_ids <- guess_format_id(paths) #' #' entity_df <- data.frame(type = types, #' path = paths, @@ -826,7 +844,9 @@ eml_validate_attributes <- function(attributes) { #' doc <- new("eml") #' doc <- eml_add_entities(doc, entity_df) #' } -eml_add_entities <- function(doc, entities, resolve_base="https://cn.dataone.org/cn/v2/resolve/") { +eml_add_entities <- function(doc, + entities, + resolve_base="https://cn.dataone.org/cn/v2/resolve/") { stopifnot(is(doc, "eml")) if (!is(entities, "data.frame")) { diff --git a/man/eml_add_entities.Rd b/man/eml_add_entities.Rd index 06091e5..4ad0278 100644 --- a/man/eml_add_entities.Rd +++ b/man/eml_add_entities.Rd @@ -28,8 +28,11 @@ Add new entity (otherEntity, dataTable, etc) elements to an EML document from a \dontrun{ types <- c("dataTable") paths <- list.files(., full.names = TRUE) # Get full paths to some files - pids <- vapply(paths, function(x) { paste0("urn:uuid:", uuid::UUIDgenerate()) }, "") # Generate some UUID PIDs - format_ids <- guess_format_id(paths) # Try to guess format IDs, you should check this afterwards + pids <- vapply(paths, function(x) { + paste0("urn:uuid:", uuid::UUIDgenerate()) + }, "") # Generate some UUID PIDs +Try to guess format IDs, you should check this afterwards + format_ids <- guess_format_id(paths) entity_df <- data.frame(type = types, path = paths, diff --git a/man/sysmeta_to_eml_physical.Rd b/man/sysmeta_to_eml_physical.Rd index ee0772f..36ab823 100644 --- a/man/sysmeta_to_eml_physical.Rd +++ b/man/sysmeta_to_eml_physical.Rd @@ -18,10 +18,12 @@ System Metadata of an Object. Note that it sets an Online Distrubtion URL of the DataONE v2 resolve service for the PID. } \examples{ -#' \dontrun { +\dontrun { # Generate EML physical objects for all the data in a package pkg <- get_package(mn, pid) -sm <- lapply(pkg$data, function(pid) { getSystemMetadata(mn, pid) }) +sm <- lapply(pkg$data, function(pid) { + getSystemMetadata(mn, pid) +}) sysmeta_to_eml_physical(sm) } } From 2f2dc35a44cfacadd0705de4cf8516f4b80eb56e Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 19 Jan 2018 15:42:36 -0900 Subject: [PATCH 038/318] Add humaniformat to suggests, where it should be --- DESCRIPTION | 1 + R/util.R | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 3477b8d..0c1cd0d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,6 +32,7 @@ License: MIT + file LICENSE LazyData: true Suggests: testthat, + humaniformat, knitr, rmarkdown, xslt diff --git a/R/util.R b/R/util.R index a086b3f..c328b04 100644 --- a/R/util.R +++ b/R/util.R @@ -375,6 +375,12 @@ change_eml_name <- function(party) { # Replace commas with spaces user_name <- stringr::str_replace_all(user_name, ",", "") + + if (!requireNamespace("humaniformat")) { + stop("The package 'humaniformat' is required to run this function. ", + "Please install it.") + } + parsed_name <- humaniformat::parse_names(user_name) # Create the new node to hold the parts of the name From 9a14815ae322ebb2ce96cff57d185785a1227ed8 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 19 Jan 2018 15:42:44 -0900 Subject: [PATCH 039/318] Fix roxygen doc issues with util.R --- R/util.R | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/R/util.R b/R/util.R index c328b04..5b1c232 100644 --- a/R/util.R +++ b/R/util.R @@ -1,16 +1,10 @@ -#' util.R -#' Author: Bryce Mecum -#' -#' General utility functions that may be later merged into other files. - - -#' Extracts the local identifier for an ACADIS ISO metadata XML file. +#' Extract the local identifier for an ACADIS ISO metadata XML file. #' #' @param type (character) A string, one of "gateway" or "field-projects". -#' @param file (character) A string, a connection, or raw vector (same as xml2::read_xml). +#' @param file (character) A string, a connection, or raw vector +#' (same as \code{\link[xml2]{read_xml}}). #' -#' @returns The identifier string. (character) - +#' @return The identifier string. (character) extract_local_identifier <- function(type, file) { stopifnot(is.character(type), length(type) == 1) stopifnot(type %in% c("gateway", "field-projects")) From c0e2f72eb53c9517a81d327d4331f10922c84791 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 19 Jan 2018 15:43:22 -0900 Subject: [PATCH 040/318] Change ::: to :: in call to getTriples (was a mistake) --- R/packaging.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/packaging.R b/R/packaging.R index 9dec754..4d403a4 100644 --- a/R/packaging.R +++ b/R/packaging.R @@ -1043,7 +1043,7 @@ parse_resource_map <- function(path) { rm <- new("ResourceMap") datapack::parseRDF(rm, path) - datapack:::getTriples(rm) + datapack::getTriples(rm) } From 6e8d262ad29ba50d2ecb6cdaa17dd08f111353f2 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 19 Jan 2018 15:43:38 -0900 Subject: [PATCH 041/318] Re-doc package --- man/eml_add_entities.Rd | 2 +- man/eml_project.Rd | 15 +++++++++++---- man/sysmeta_to_eml_physical.Rd | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/man/eml_add_entities.Rd b/man/eml_add_entities.Rd index 4ad0278..1ef5a0c 100644 --- a/man/eml_add_entities.Rd +++ b/man/eml_add_entities.Rd @@ -32,7 +32,7 @@ Add new entity (otherEntity, dataTable, etc) elements to an EML document from a paste0("urn:uuid:", uuid::UUIDgenerate()) }, "") # Generate some UUID PIDs Try to guess format IDs, you should check this afterwards - format_ids <- guess_format_id(paths) + format_ids <- guess_format_id(paths) entity_df <- data.frame(type = types, path = paths, diff --git a/man/eml_project.Rd b/man/eml_project.Rd index 08ebd55..2053ffe 100644 --- a/man/eml_project.Rd +++ b/man/eml_project.Rd @@ -13,9 +13,11 @@ eml_project(title, personnelList, abstract = NULL, funding = NULL, \item{personnelList}{(list of personnel) Personnel involved with the project.} -\item{abstract}{(character) Project abstract. Can pass as a character vector for separate paragraphs.} +\item{abstract}{(character) Project abstract. Can pass as a character vector +for separate paragraphs.} -\item{funding}{(character) Funding sources for the project such as grant and contract numbers. Can pass as a character vector for separate paragraphs.} +\item{funding}{(character) Funding sources for the project such as grant and +contract numbers. Can pass as a character vector for separate paragraphs.} \item{studyAreaDescription}{(studyAreaDescription)} @@ -27,8 +29,13 @@ eml_project(title, personnelList, abstract = NULL, funding = NULL, (project) The new project section. } \description{ -Note - studyAreaDescription, designDescription, and relatedProject are not fully fleshed out. Need to pass these objects in directly if you want to use them. +Note - studyAreaDescription, designDescription, and relatedProject are not +fully fleshed out. Need to pass these objects in directly if you want to use +them. } \examples{ -eml_project("Some title", list(personnel1, personnel2), c("Abstract paragraph 1", "Abstract paragraph 2"), "#1 Best Scientist Award") +eml_project("Some title", + list(personnel1, personnel2), + c("Abstract paragraph 1", "Abstract paragraph 2"), + "#1 Best Scientist Award") } diff --git a/man/sysmeta_to_eml_physical.Rd b/man/sysmeta_to_eml_physical.Rd index 36ab823..f86001b 100644 --- a/man/sysmeta_to_eml_physical.Rd +++ b/man/sysmeta_to_eml_physical.Rd @@ -18,7 +18,7 @@ System Metadata of an Object. Note that it sets an Online Distrubtion URL of the DataONE v2 resolve service for the PID. } \examples{ -\dontrun { +\dontrun{ # Generate EML physical objects for all the data in a package pkg <- get_package(mn, pid) sm <- lapply(pkg$data, function(pid) { From b64757624f0c57179076149efee8e37d54bdfc7e Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 19 Jan 2018 15:43:55 -0900 Subject: [PATCH 042/318] Fix roxygen tag issue Was causing a check failure though --- R/eml.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/eml.R b/R/eml.R index 5076e73..d137290 100644 --- a/R/eml.R +++ b/R/eml.R @@ -134,7 +134,7 @@ sysmeta_to_other_entity <- function(sysmeta) { #' @export #' #' @examples -#' \dontrun { +#' \dontrun{ #' # Generate EML physical objects for all the data in a package #' pkg <- get_package(mn, pid) #' sm <- lapply(pkg$data, function(pid) { From 0e47a568489cc74df9bf32846af5645ac9c889a0 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 19 Jan 2018 15:44:11 -0900 Subject: [PATCH 043/318] Fix bug in call to as This was introduced in an earlier commit --- R/eml.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/eml.R b/R/eml.R index d137290..f8e3e95 100644 --- a/R/eml.R +++ b/R/eml.R @@ -428,7 +428,7 @@ eml_party <- function(type="associatedParty", if (type == "personnel") { party@role <- as(lapply(role, as, Class = "role"), "ListOfrole") } else { - party@role <- as(role, as, Class = "role") + party@role <- as(role, "role") } } From b6b9a257a6a264d16fb061d157ed3e6915e9b6a1 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 19 Jan 2018 15:45:48 -0900 Subject: [PATCH 044/318] Update yet another doc --- man/extract_local_identifier.Rd | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/man/extract_local_identifier.Rd b/man/extract_local_identifier.Rd index e4e3748..f74ff62 100644 --- a/man/extract_local_identifier.Rd +++ b/man/extract_local_identifier.Rd @@ -2,17 +2,19 @@ % Please edit documentation in R/util.R \name{extract_local_identifier} \alias{extract_local_identifier} -\title{util.R -Author: Bryce Mecum } +\title{Extract the local identifier for an ACADIS ISO metadata XML file.} \usage{ extract_local_identifier(type, file) } \arguments{ \item{type}{(character) A string, one of "gateway" or "field-projects".} -\item{file}{(character) A string, a connection, or raw vector (same as xml2::read_xml).} +\item{file}{(character) A string, a connection, or raw vector +(same as \code{\link[xml2]{read_xml}}).} +} +\value{ +The identifier string. (character) } \description{ -General utility functions that may be later merged into other files. -Extracts the local identifier for an ACADIS ISO metadata XML file. +Extract the local identifier for an ACADIS ISO metadata XML file. } From 88e748fad4cec7aa35c8d569cb9a38dcd803feee Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 19 Jan 2018 16:13:46 -0900 Subject: [PATCH 045/318] Fix broken eml_project example --- R/eml.R | 2 +- man/eml_project.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/eml.R b/R/eml.R index f8e3e95..14eeb04 100644 --- a/R/eml.R +++ b/R/eml.R @@ -571,7 +571,7 @@ eml_individual_name <- function(given_names=NULL, sur_name) { #' #' @examples #' eml_project("Some title", -#' list(personnel1, personnel2), +#' c(eml_personnel("Bryce", "Mecum", role = "Test")), #' c("Abstract paragraph 1", "Abstract paragraph 2"), #' "#1 Best Scientist Award") eml_project <- function(title, diff --git a/man/eml_project.Rd b/man/eml_project.Rd index 2053ffe..5795212 100644 --- a/man/eml_project.Rd +++ b/man/eml_project.Rd @@ -35,7 +35,7 @@ them. } \examples{ eml_project("Some title", - list(personnel1, personnel2), + c(eml_personnel("Bryce", "Mecum", role = "Test")), c("Abstract paragraph 1", "Abstract paragraph 2"), "#1 Best Scientist Award") } From 6b0a6f25aeb35b50dc043f8cb3ebee98006f633c Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 19 Jan 2018 16:24:30 -0900 Subject: [PATCH 046/318] Try out using the apt addon for apt packages --- .travis.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 91ebdfc..a4c91ba 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,9 @@ r: - devel sudo: false cache: packages -before_install: - - sudo apt-get -qq update - - sudo apt-get install -y librdf0-dev libnetcdf-dev r-cran-ncdf4 +addons: + apt: + packages: + - librdf0-dev + - libnetcdf-dev + - r-cran-ncdf4 From fbbb32bc1ffb570e0b09c7dba7a23faee64322c9 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 22 Jan 2018 01:26:47 -0900 Subject: [PATCH 047/318] Fix example code for set_abstract --- R/eml.R | 9 ++++++++- man/set_abstract.Rd | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/R/eml.R b/R/eml.R index 14eeb04..84e5896 100644 --- a/R/eml.R +++ b/R/eml.R @@ -710,8 +710,15 @@ eml_address <- function(delivery_points, city, administrative_area, postal_code) #' @export #' #' @examples +#' # Create a new EML document +#' library(EML) +#' doc <- new("eml") +#' +#' # Set an abstract with a single paragraph #' set_abstract(doc, c("Test abstract...")) -#' set_abstract(doc, c("First para", "second para")) +#' +#' # Or one with multiple paragraphs +#' set_abstract(doc, c("First para...", "second para...")) set_abstract <- function(doc, text) { stopifnot(is(doc, "eml")) stopifnot(is.character(text), diff --git a/man/set_abstract.Rd b/man/set_abstract.Rd index 52c91ca..85a7349 100644 --- a/man/set_abstract.Rd +++ b/man/set_abstract.Rd @@ -21,6 +21,13 @@ used for each element.} Set the abstract on an EML document } \examples{ +# Create a new EML document +library(EML) +doc <- new("eml") + +# Set an abstract with a single paragraph set_abstract(doc, c("Test abstract...")) -set_abstract(doc, c("First para", "second para")) + +# Or one with multiple paragraphs +set_abstract(doc, c("First para...", "second para...")) } From 5791dde723b3fc5262a94ba6724691176a422062 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 22 Jan 2018 01:27:00 -0900 Subject: [PATCH 048/318] Remove test that relies on files in tests folder --- tests/testthat/test_util.R | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/testthat/test_util.R b/tests/testthat/test_util.R index 2560c77..9d4c0bd 100644 --- a/tests/testthat/test_util.R +++ b/tests/testthat/test_util.R @@ -2,16 +2,6 @@ context("util") -test_that("identifiers can be extracted", { - x <- file.path(system.file("tests", "testfiles", package = "arcticdatautils"), "example-field-projects-file.xml") - identifier <- extract_local_identifier("field-projects", x) - expect_equal(identifier, "215.001") - - x <- file.path(system.file("tests", "testfiles", package = "arcticdatautils"), "example-gateway-file.xml") - identifier <- extract_local_identifier("gateway", x) - expect_equal(identifier, "urn:x-wmo:md:org.aoncadis.www::d9330d2b-4174-11e3-8af4-00c0f03d5b7c") -}) - test_that("paths can be joined", { expect_equal(path_join(""), "") expect_equal(path_join(1), "1") From c9043281597b4114896bb26918b18b8c264e274e Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 22 Jan 2018 01:42:31 -0900 Subject: [PATCH 049/318] Add eml_abstract helper --- NAMESPACE | 1 + R/eml.R | 32 +++++++++++++++++++++++++++++--- man/eml_abstract.Rd | 25 +++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 man/eml_abstract.Rd diff --git a/NAMESPACE b/NAMESPACE index cfff53e..8c416fd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(create_object) export(create_resource_map) export(create_sysmeta) export(determine_child_pids) +export(eml_abstract) export(eml_add_entities) export(eml_address) export(eml_associated_party) diff --git a/R/eml.R b/R/eml.R index 84e5896..5ce11a8 100644 --- a/R/eml.R +++ b/R/eml.R @@ -721,17 +721,43 @@ eml_address <- function(delivery_points, city, administrative_area, postal_code) #' set_abstract(doc, c("First para...", "second para...")) set_abstract <- function(doc, text) { stopifnot(is(doc, "eml")) + + if (length(text) == 1) { + doc@dataset@abstract <- eml_abstract(text) + } else if (length(text) > 1) { + doc@dataset@abstract <- eml_abstract(text) + } + + doc +} + + +#' Minimalistic helper function to generate EML abstracts +#' +#' @param text (character) Paragraphs of text, one paragraph per element in the +#' character vector +#' +#' @return (abstract) An EML abstract +#' @export +#' +#' @examples +#' # Set an abstract with a single paragraph +#' eml_abstract("Test abstract...") +#' +#' # Or one with multiple paragraphs +#' eml_abstract(c("First para...", "second para...")) +eml_abstract <- function(text) { stopifnot(is.character(text), length(text) > 0, all(nchar(text)) > 0) if (length(text) == 1) { - doc@dataset@abstract <- new("abstract", .Data = new("TextType", .Data = "hi")) + abstract <- new("abstract", .Data = new("TextType", .Data = "hi")) } else if (length(text) > 1) { - doc@dataset@abstract <- new("abstract", para = new("ListOfpara", lapply(text, function(x) new("para", x)))) + abstract <- new("abstract", para = new("ListOfpara", lapply(text, function(x) new("para", x)))) } - doc + abstract } diff --git a/man/eml_abstract.Rd b/man/eml_abstract.Rd new file mode 100644 index 0000000..c2a7190 --- /dev/null +++ b/man/eml_abstract.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_abstract} +\alias{eml_abstract} +\title{Minimalistic helper function to generate EML abstracts} +\usage{ +eml_abstract(text) +} +\arguments{ +\item{text}{(character) Paragraphs of text, one paragraph per element in the +character vector} +} +\value{ +(abstract) An EML abstract +} +\description{ +Minimalistic helper function to generate EML abstracts +} +\examples{ +# Set an abstract with a single paragraph +eml_abstract("Test abstract...") + +# Or one with multiple paragraphs +eml_abstract(c("First para...", "second para...")) +} From ea3fab00490b9f18e776fd8774f2e0cbf4e63daa Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 22 Jan 2018 02:08:37 -0900 Subject: [PATCH 050/318] Fix/remove tests so they run on Travis --- tests/testthat/test_eml.R | 48 +++++--------------- tests/testthat/test_environment.R | 2 +- tests/testthat/test_inventory.R | 74 ------------------------------- tests/testthat/test_util.R | 39 ---------------- 4 files changed, 11 insertions(+), 152 deletions(-) diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 4d821e2..3c68982 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -2,34 +2,6 @@ context("EML") mn <- env_load()$mn -test_that("an EML otherEntity subtree can be created when the sysmeta has a filename", { - x <- file.path(system.file("tests", "testfiles", package = "arcticdatautils"), "example-sysmeta.xml") - doc <- XML::xmlParse(x) - sysmeta <- new("SystemMetadata") - sysmeta <- datapack::parseSystemMetadata(sysmeta, XML::xmlRoot(doc)) - - other_entity <- sysmeta_to_eml_other_entity(sysmeta)[[1]] - - # Check some rough properties of the subtree - expect_is(other_entity, "otherEntity") - expect_equal(other_entity@entityName@.Data, "some_file.bin") - expect_equal(other_entity@physical[[1]]@dataFormat@externallyDefinedFormat@formatName, "application/octet-stream") -}) - -test_that("an EML otherEntity subtree can be created when the sysmeta doesn't have a filename ", { - x <- file.path(system.file("tests", "testfiles", package = "arcticdatautils"), "example-sysmeta-nofilename.xml") - doc <- XML::xmlParse(x) - sysmeta <- new("SystemMetadata") - sysmeta <- datapack::parseSystemMetadata(sysmeta, XML::xmlRoot(doc)) - - other_entity <- sysmeta_to_eml_other_entity(sysmeta)[[1]] - - # Check some rough properties of the subtree - expect_is(other_entity, "otherEntity") - expect_equal(other_entity@entityName@.Data, "NA") - expect_equal(other_entity@physical[[1]]@dataFormat@externallyDefinedFormat@formatName, "application/octet-stream") -}) - test_that("a methods step can be added to an EML document", { library(XML) library(EML) @@ -82,7 +54,7 @@ test_that("a contact can be created", { test_that("a personnel can be created", { personnel <- eml_personnel(given_names="test", sur_name="user", role="principalInvestigator") - + expect_is(personnel, "personnel") expect_equal(personnel@individualName[[1]]@givenName[[1]]@.Data, "test") expect_equal(personnel@individualName[[1]]@surName@.Data, "user") @@ -91,12 +63,12 @@ test_that("a personnel can be created", { test_that("a project can be created", { test_personnel_1 <- eml_personnel(given_names="A", sur_name="User", organization="NCEAS", role="originator") - - project <- eml_project("some title", - list(test_personnel_1), + + project <- eml_project("some title", + list(test_personnel_1), "This is a test abstract", "I won an award, yay") - + expect_is(project, "project") expect_equal(project@title[[1]]@.Data, "some title") expect_equal(project@personnel[[1]]@individualName[[1]]@givenName[[1]]@.Data, "A") @@ -109,12 +81,12 @@ test_that("a project can be created", { test_that("a project can be created with multiple personnel, an abstract can be created with multiple paragraphs, awards with multiple awards", { test_personnel_1 <- eml_personnel(given_names="A", sur_name="User", organization="NCEAS", role="originator") test_personnel_2 <- eml_personnel(given_names="Testy", sur_name="Mactesterson", organization="A Test Org", role=c("user", "author")) - - project <- eml_project("some title", - list(test_personnel_1, test_personnel_2), + + project <- eml_project("some title", + list(test_personnel_1, test_personnel_2), c("This is a test abstract", "This is the second paragraph"), c("I won an award, yay", "I won a second award, wow")) - + expect_is(project, "project") expect_equal(project@title[[1]]@.Data, "some title") expect_equal(project@personnel[[2]]@individualName[[1]]@givenName[[1]]@.Data, "Testy") @@ -134,7 +106,7 @@ test_that("an other entity can be added from a pid", { writeLines(LETTERS, data_path) pid <- publish_object(mn, data_path, "text/plain") - eml_path <- file.path(system.file("inst", package = "arcticdatautils"), "example-eml.xml") + eml_path <- file.path(system.file(package = "arcticdatautils"), "example-eml.xml") doc <- EML::read_eml(eml_path) doc@dataset@otherEntity <- new("ListOfotherEntity", list()) diff --git a/tests/testthat/test_environment.R b/tests/testthat/test_environment.R index 91a1529..20b45b9 100644 --- a/tests/testthat/test_environment.R +++ b/tests/testthat/test_environment.R @@ -5,7 +5,7 @@ context("environment") test_that("can load a simple environment file", { - x <- yaml::yaml.load_file(file.path(system.file("tests", "testfiles", package = "arcticdatautils"), "test_environment.yml")) + x <- yaml::yaml.load_file(system.file("./environment.yml", package = "arcticdatautils")) expect_true(length(x) == 3) expect_true(length(setdiff(c("development", "test", "production"), names(x))) == 0) diff --git a/tests/testthat/test_inventory.R b/tests/testthat/test_inventory.R index 8022be8..f3269b4 100644 --- a/tests/testthat/test_inventory.R +++ b/tests/testthat/test_inventory.R @@ -8,80 +8,6 @@ test_that("an inventory can be created correctly", { expect_true(nrow(x) == 0) }) -test_that("an inventory can be populated with files", { - # Case 1: Empty inv, non-empty file - inv <- inv_init() - result <- inv_load_files(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "files_simple.txt")) - expect_true(nrow(result) == 3) - - # Case 2: Non-empty inv, non-empty file, where some are the same - inv <- read.csv(file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "inventory_simple.csv"), stringsAsFactors = FALSE) - result <- inv_load_files(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "files_simple.txt")) - expect_true(nrow(result) == 5) - - # Case 3: Non-empty inv, non-empty file, where none are the same - inv <- read.csv(file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "inventory_simple.csv"), stringsAsFactors = FALSE) - inv <- subset(inv, file != "C") - result <- inv_load_files(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "files_simple.txt")) - expect_true(nrow(result) == 5) - - # Case 4: Non-empty inv, non-empty file, inventory already has columns - inv <- read.csv(file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "inventory_simple.csv"), stringsAsFactors = FALSE) - inv$bytes <- rep(1000, nrow(inv)) - result <- inv_load_files(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "files_simple.txt")) - - expect_true(nrow(result) == 5) - expect_true(ncol(result) == 2) - expect_true(length(table(result$bytes, exclude=NULL)) == 2) -}) - -test_that("an inventory can be populated with byte sizes", { - inv <- read.csv(file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "inventory_simple.csv"), stringsAsFactors = FALSE) - result <- inv_load_sizes(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "sizes_simple.txt")) - - expect_true(nrow(result) == 3) - expect_true(ncol(result) == 2) -}) - -test_that("an inventory can be populated with checksums", { - # Test if we can populate with checksums before sizes - inv <- read.csv(file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "inventory_simple.csv"), stringsAsFactors = FALSE) - inv_with_checksums <- inv_load_checksums(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "checksums_simple.txt")) - inv_with_both <- inv_load_sizes(inv_with_checksums, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "sizes_simple.txt")) - - expect_true(nrow(inv_with_both) == 3) - expect_true(ncol(inv_with_both) == 3) - - # Test if we can populate with sizes before checksums - inv <- read.csv(file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "inventory_simple.csv"), stringsAsFactors = FALSE) - inv_with_sizes <- inv_load_sizes(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "sizes_simple.txt")) - inv_with_both <- inv_load_checksums(inv_with_sizes, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "checksums_simple.txt")) - - expect_true(nrow(inv_with_both) == 3) - expect_true(ncol(inv_with_both) == 3) -}) - -# We should be able to call the same function multiple times and not break things -test_that("calling things repeatedly does not break things", { - inv <- read.csv(file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "inventory_simple.csv"), stringsAsFactors = FALSE) - inv <- inv_load_checksums(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "checksums_simple.txt")) - inv$test <- NA - inv <- inv_load_checksums(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "checksums_simple.txt")) - - expect_true(ncol(inv) == 3) - expect_true(nrow(inv) == 3) -}) test_that("an inventory can be updated with new information", { test_inv <- data.frame(file = "A", pid="", created = FALSE, stringsAsFactors = FALSE) diff --git a/tests/testthat/test_util.R b/tests/testthat/test_util.R index 9d4c0bd..5d70859 100644 --- a/tests/testthat/test_util.R +++ b/tests/testthat/test_util.R @@ -17,42 +17,3 @@ test_that("paths can be joined", { # Other tests expect_equal(path_join("~/src/arcticdata./inst/asdf"), "~/src/arcticdata/inst/asdf") }) - -test_that("a string can be added to a file", { - # Prepare a temp file with an example EML doc - eml_file <- file.path(system.file("tests", "testfiles", package = "arcticdatautils"), "example-eml.xml") - tmp <- tempfile() - file.copy(eml_file, tmp) - - # Get original title - doc_pre <- XML::xmlParseDoc(tmp) - title_pre <- XML::xmlValue(XML::getNodeSet(doc_pre, "//dataset/title")[[1]]) - - # Add the string - add_string_to_title(tmp, " a test") - - # Get the updated title - doc_post <- XML::xmlParseDoc(tmp) - title_post <- XML::xmlValue(XML::getNodeSet(doc_post, "//dataset/title")[[1]]) - - expect_equal(paste0(title_pre, " a test"), title_post) - - # Clean up - file.remove(tmp) -}) - -test_that("a package id can be changed", { - library(EML) - - eml_file <- file.path(system.file("tests", "testfiles", package = "arcticdatautils"), "example-eml.xml") - tmp <- tempfile() - file.copy(eml_file, tmp) - - replace_package_id(tmp, "new_package_id") - - doc <- read_eml(tmp) - expect_equal(as.character(doc@packageId), "new_package_id") - - # Clean up - file.remove(tmp) -}) From c5d915385127ebe8f6002589c87e69d5107504ec Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Wed, 31 Jan 2018 13:47:41 -0900 Subject: [PATCH 051/318] Bump dev version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0c1cd0d..32fed3e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: arcticdatautils Title: Arctic Data Utilities -Version: 0.6.3 +Version: 0.6.4-pre Authors@R: c( person("Bryce", "Mecum", email = "mecum@nceas.ucsb.edu", role = c("aut", "cre")), person("Matt", "Jones", email = "jones@nceas.ucsb.edu", role = "ctb"), From 9be0e50969c04209aaad2bd305596ca1d96ccaa8 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Thu, 1 Feb 2018 17:00:27 -0800 Subject: [PATCH 052/318] added documentation --- R/access.R | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/R/access.R b/R/access.R index db5eb45..20e1de4 100644 --- a/R/access.R +++ b/R/access.R @@ -16,6 +16,13 @@ #' @import dataone #' @import datapack #' @export +#' @examples +#'\dontrun{ +#'cn <- CNode('STAGING2') +#'mn <- getMNode(cn,"urn:node:mnTestKNB") +#'pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +#'set_rights_holder(mn, pids, subjects = 'http://orcid.org/0000-000X-XXXX-XXXX') +#'} set_rights_holder <- function(mn, pids, subject) { if (!is(mn, "MNode")) { stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) @@ -90,6 +97,12 @@ set_rights_holder <- function(mn, pids, subject) { #' @export #' #' @examples +#'\dontrun{ +#'cn <- CNode('STAGING2') +#'mn <- getMNode(cn,"urn:node:mnTestKNB") +#'pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +#'set_access(mn, pids, subjects = 'http://orcid.org/0000-000X-XXXX-XXXX', permissions = c('read', 'write', 'changePermission')) +#'} set_access <- function(mn, pids, subjects, permissions=c("read", "write", "changePermission")) { if (!is(mn, "MNode")) { stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) @@ -152,10 +165,15 @@ set_access <- function(mn, pids, subjects, permissions=c("read", "write", "chang #' @param mn (MNode) #' @param pids (character) A vector of PIDs to set public access on #' -#' @return #' @export #' #' @examples +#'\dontrun{ +#'cn <- CNode('STAGING2') +#'mn <- getMNode(cn,"urn:node:mnTestKNB") +#'pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +#'set_public_read(mn, pids) +#'} set_public_read <- function(mn, pids) { set_access(mn, pids, "public", "read") } @@ -165,10 +183,15 @@ set_public_read <- function(mn, pids) { #' @param mn (MNode) #' @param pids (character) A vector of PIDs to set public access on #' -#' @return #' @export #' #' @examples +#'\dontrun{ +#'cn <- CNode('STAGING2') +#'mn <- getMNode(cn,"urn:node:mnTestKNB") +#'pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +#'remove_public_read(mn, pids) +#'} remove_public_read <- function(mn, pids) { if (!is(mn, "MNode")) { stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) @@ -249,6 +272,12 @@ remove_public_read <- function(mn, pids) { #' @export #' #' @examples +#'\dontrun{ +#'cn <- CNode('STAGING2') +#'mn <- getMNode(cn,"urn:node:mnTestKNB") +#'pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +#'set_rights_and_access(mn, pids, subjects = 'http://orcid.org/0000-000X-XXXX-XXXX', permissions = c('read', 'write', 'changePermission')) +#'} set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "write", "changePermission")) { if (!is(mn, "MNode")) { stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) From db33cd46b69104e1ac21143f33d974cb12b54d8a Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Thu, 1 Feb 2018 17:00:58 -0800 Subject: [PATCH 053/318] added documentation examples, and fixed one typeo bug in "get_mn_base_url" --- R/dataone.R | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/R/dataone.R b/R/dataone.R index d705b53..75862d5 100644 --- a/R/dataone.R +++ b/R/dataone.R @@ -10,6 +10,11 @@ #' @export #' #' @examples +#'\dontrun{ +#'cn <- CNode('STAGING2') +#'mn <- getMNode(cn,"urn:node:mnTestKNB") +#'is_token_set(mn) +#'} is_token_set <- function(node) { token <- tryCatch(get_token(node), error = function(e) FALSE) @@ -30,6 +35,11 @@ is_token_set <- function(node) { #' @export #' #' @examples +#'\dontrun{ +#'cn <- CNode('STAGING2') +#'mn <- getMNode(cn,"urn:node:mnTestKNB") +#'get_token(mn) +#'} get_token <- function(node) { if (!(class(node) %in% c("MNode", "CNode"))) { stop(paste0("Node must be an MNode or CNode. You passed in a '", class(node), "'.")) @@ -51,10 +61,15 @@ get_token <- function(node) { #' Determine whether the set token is expired. #' -#' @return +#' @return (boolean) #' @export #' #' @examples +#'\dontrun{ +#'cn <- CNode('STAGING2') +#'mn <- getMNode(cn,"urn:node:mnTestKNB") +#'is_token_expired(mn) +#'} is_token_expired <- function(node) { token_name <- ifelse(node@env == "prod", "dataone_token", "dataone_test_token") @@ -91,14 +106,19 @@ is_token_expired <- function(node) { #' #' @param mn #' -#' @return +#' @return (character) The URL #' @export #' #' @examples +#'\dontrun{ +#'cn <- CNode('STAGING2') +#'mn <- getMNode(cn,"urn:node:mnTestKNB") +#'url <- get_mn_base_url(mn) +#'} get_mn_base_url <- function(mn) { # Determine MN URL. Accept either an MNode or a character string if (is(mn, "MNode")) { - mn_base_url <- mn$base_url + mn_base_url <- mn@base_url } mn_base_url <- mn @@ -110,8 +130,16 @@ get_mn_base_url <- function(mn) { #' @param node (MNode|CNode) The Node to query. #' @param ids (character) The PID or SID to check. #' @param action (character) One of read, write, or changePermission. -#' +#' @return (boolean) #' @export +#' +#' @examples +#'\dontrun{ +#'cn <- CNode('STAGING2') +#'mn <- getMNode(cn,"urn:node:mnTestKNB") +#'pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +#'is_authorized(mn, pids, 'write') +#'} is_authorized <- function(node, ids, action) { stopifnot(class(node) %in% c("MNode", "CNode")) stopifnot(is.character(ids)) From 6993e93cdb5fb2a5016ec09ecfe4138725acf164 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Thu, 1 Feb 2018 17:01:43 -0800 Subject: [PATCH 054/318] added documentation --- man/get_mn_base_url.Rd | 10 ++++++++++ man/get_token.Rd | 7 +++++++ man/is_authorized.Rd | 11 +++++++++++ man/is_token_expired.Rd | 10 ++++++++++ man/is_token_set.Rd | 7 +++++++ man/remove_public_read.Rd | 8 ++++++++ man/set_access.Rd | 8 ++++++++ man/set_public_read.Rd | 8 ++++++++ man/set_rights_and_access.Rd | 8 ++++++++ man/set_rights_holder.Rd | 8 ++++++++ 10 files changed, 85 insertions(+) diff --git a/man/get_mn_base_url.Rd b/man/get_mn_base_url.Rd index a635f3d..2d50c57 100644 --- a/man/get_mn_base_url.Rd +++ b/man/get_mn_base_url.Rd @@ -9,6 +9,16 @@ get_mn_base_url(mn) \arguments{ \item{mn}{} } +\value{ +(character) The URL +} \description{ Get the base URL of the Member Node. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +url <- get_mn_base_url(mn) +} +} diff --git a/man/get_token.Rd b/man/get_token.Rd index 3be7382..ac66603 100644 --- a/man/get_token.Rd +++ b/man/get_token.Rd @@ -15,3 +15,10 @@ get_token(node) \description{ Gets the currently set authentication token. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +get_token(mn) +} +} diff --git a/man/is_authorized.Rd b/man/is_authorized.Rd index d290d4c..2718809 100644 --- a/man/is_authorized.Rd +++ b/man/is_authorized.Rd @@ -13,6 +13,17 @@ is_authorized(node, ids, action) \item{action}{(character) One of read, write, or changePermission.} } +\value{ +(boolean) +} \description{ Check if the user has authorization to perform an action on an object. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +is_authorized(mn, pids, 'write') +} +} diff --git a/man/is_token_expired.Rd b/man/is_token_expired.Rd index dfa9d4b..93e1cad 100644 --- a/man/is_token_expired.Rd +++ b/man/is_token_expired.Rd @@ -6,6 +6,16 @@ \usage{ is_token_expired(node) } +\value{ +(boolean) +} \description{ Determine whether the set token is expired. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +is_token_expired(mn) +} +} diff --git a/man/is_token_set.Rd b/man/is_token_set.Rd index 8f1c739..4b202d2 100644 --- a/man/is_token_set.Rd +++ b/man/is_token_set.Rd @@ -16,3 +16,10 @@ is_token_set(node) Helpers for the DataONE R package. Test whether a token is set. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +is_token_set(mn) +} +} diff --git a/man/remove_public_read.Rd b/man/remove_public_read.Rd index a86c026..718dccb 100644 --- a/man/remove_public_read.Rd +++ b/man/remove_public_read.Rd @@ -14,3 +14,11 @@ remove_public_read(mn, pids) \description{ Remove public access on a set of objects. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +remove_public_read(mn, pids) +} +} diff --git a/man/set_access.Rd b/man/set_access.Rd index ac4a37d..1001a5a 100644 --- a/man/set_access.Rd +++ b/man/set_access.Rd @@ -23,3 +23,11 @@ set_access(mn, pids, subjects, permissions = c("read", "write", For each permission, this function checks if the permission is already set and moves on. System Metadata are only updated when a change was needed. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +set_access(mn, pids, subjects = 'http://orcid.org/0000-000X-XXXX-XXXX', permissions = c('read', 'write', 'changePermission')) +} +} diff --git a/man/set_public_read.Rd b/man/set_public_read.Rd index 381024e..1e988bc 100644 --- a/man/set_public_read.Rd +++ b/man/set_public_read.Rd @@ -14,3 +14,11 @@ set_public_read(mn, pids) \description{ Set public access on a set of objects. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +set_public_read(mn, pids) +} +} diff --git a/man/set_rights_and_access.Rd b/man/set_rights_and_access.Rd index c9efa3d..0e7ef09 100644 --- a/man/set_rights_and_access.Rd +++ b/man/set_rights_and_access.Rd @@ -25,3 +25,11 @@ Whether an update was needed. This function only updates the existing System Metadata if a change is needed. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +set_rights_and_access(mn, pids, subjects = 'http://orcid.org/0000-000X-XXXX-XXXX', permissions = c('read', 'write', 'changePermission')) +} +} diff --git a/man/set_rights_holder.Rd b/man/set_rights_holder.Rd index 09b59e0..67a04cc 100644 --- a/man/set_rights_holder.Rd +++ b/man/set_rights_holder.Rd @@ -22,3 +22,11 @@ Set the rightsHolder field for a given PID. Update the rights holder to the provided subject for the object identified in the provided system metadata document on the given Member Node. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +set_rights_holder(mn, pids, subjects = 'http://orcid.org/0000-000X-XXXX-XXXX') +} +} From d1f72fc564dc2f380fba213f3cfa027d3adec105 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Thu, 1 Feb 2018 17:53:51 -0800 Subject: [PATCH 055/318] added more examples --- R/access.R | 40 +++++++++---------- R/dataone.R | 14 +++---- R/editing.R | 74 ++++++++++++++++++++++++++++++++---- man/create_resource_map.Rd | 12 ++++++ man/get_mn_base_url.Rd | 1 - man/is_authorized.Rd | 4 +- man/publish_object.Rd | 11 ++++++ man/publish_update.Rd | 14 +++++++ man/remove_public_read.Rd | 4 +- man/set_access.Rd | 6 +-- man/set_file_name.Rd | 9 +++++ man/set_public_read.Rd | 4 +- man/set_rights_and_access.Rd | 6 +-- man/set_rights_holder.Rd | 6 +-- man/update_object.Rd | 9 +++++ man/update_resource_map.Rd | 31 ++++++++++++++- 16 files changed, 194 insertions(+), 51 deletions(-) diff --git a/R/access.R b/R/access.R index 20e1de4..3585686 100644 --- a/R/access.R +++ b/R/access.R @@ -18,10 +18,10 @@ #' @export #' @examples #'\dontrun{ -#'cn <- CNode('STAGING2') -#'mn <- getMNode(cn,"urn:node:mnTestKNB") -#'pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') -#'set_rights_holder(mn, pids, subjects = 'http://orcid.org/0000-000X-XXXX-XXXX') +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' set_rights_holder(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX") #'} set_rights_holder <- function(mn, pids, subject) { if (!is(mn, "MNode")) { @@ -98,10 +98,10 @@ set_rights_holder <- function(mn, pids, subject) { #' #' @examples #'\dontrun{ -#'cn <- CNode('STAGING2') -#'mn <- getMNode(cn,"urn:node:mnTestKNB") -#'pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') -#'set_access(mn, pids, subjects = 'http://orcid.org/0000-000X-XXXX-XXXX', permissions = c('read', 'write', 'changePermission')) +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' set_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX", permissions = c("read", "write", "changePermission")) #'} set_access <- function(mn, pids, subjects, permissions=c("read", "write", "changePermission")) { if (!is(mn, "MNode")) { @@ -169,10 +169,10 @@ set_access <- function(mn, pids, subjects, permissions=c("read", "write", "chang #' #' @examples #'\dontrun{ -#'cn <- CNode('STAGING2') -#'mn <- getMNode(cn,"urn:node:mnTestKNB") -#'pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') -#'set_public_read(mn, pids) +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' set_public_read(mn, pids) #'} set_public_read <- function(mn, pids) { set_access(mn, pids, "public", "read") @@ -187,10 +187,10 @@ set_public_read <- function(mn, pids) { #' #' @examples #'\dontrun{ -#'cn <- CNode('STAGING2') -#'mn <- getMNode(cn,"urn:node:mnTestKNB") -#'pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') -#'remove_public_read(mn, pids) +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' remove_public_read(mn, pids) #'} remove_public_read <- function(mn, pids) { if (!is(mn, "MNode")) { @@ -273,10 +273,10 @@ remove_public_read <- function(mn, pids) { #' #' @examples #'\dontrun{ -#'cn <- CNode('STAGING2') -#'mn <- getMNode(cn,"urn:node:mnTestKNB") -#'pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') -#'set_rights_and_access(mn, pids, subjects = 'http://orcid.org/0000-000X-XXXX-XXXX', permissions = c('read', 'write', 'changePermission')) +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' set_rights_and_access(mn, pids, "http://orcid.org/0000-000X-XXXX-XXXX", permissions = c("read", "write", "changePermission")) #'} set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "write", "changePermission")) { if (!is(mn, "MNode")) { diff --git a/R/dataone.R b/R/dataone.R index 75862d5..ad3fbbb 100644 --- a/R/dataone.R +++ b/R/dataone.R @@ -111,9 +111,9 @@ is_token_expired <- function(node) { #' #' @examples #'\dontrun{ -#'cn <- CNode('STAGING2') -#'mn <- getMNode(cn,"urn:node:mnTestKNB") -#'url <- get_mn_base_url(mn) +#' cn <- CNode('STAGING2') +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +# 'url <- get_mn_base_url(mn) #'} get_mn_base_url <- function(mn) { # Determine MN URL. Accept either an MNode or a character string @@ -135,10 +135,10 @@ get_mn_base_url <- function(mn) { #' #' @examples #'\dontrun{ -#'cn <- CNode('STAGING2') -#'mn <- getMNode(cn,"urn:node:mnTestKNB") -#'pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') -#'is_authorized(mn, pids, 'write') +#' cn <- CNode('STAGING2') +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' is_authorized(mn, pids, "write") #'} is_authorized <- function(node, ids, action) { stopifnot(class(node) %in% c("MNode", "CNode")) diff --git a/R/editing.R b/R/editing.R index 5467816..97b94f4 100644 --- a/R/editing.R +++ b/R/editing.R @@ -20,8 +20,16 @@ #' #' @import dataone #' @import datapack +#' @return pid (character). The PID of the published object. #' #' @export +#' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' my_path <- "/home/Documents/myfile.csv" +#' pid <- publish_object(mn, path = my_path, format_id = "text/csv", public = FALSE) +#'} publish_object <- function(mn, path, format_id=NULL, @@ -123,6 +131,13 @@ publish_object <- function(mn, #' @export #' #' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +#' my_path <- "/home/Documents/myfile.csv" +#' new_pid <- update_object(mn, pid, my_path, format_id = "text/csv") +#'} update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) { stopifnot(is(mn, "MNode")) stopifnot(object_exists(mn, pid)) @@ -235,6 +250,19 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) #' @import EML #' #' @export +#' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' +#' rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +#' meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +#' data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' +#' meta_path <- "/home/Documents/myMetadata.xml" +#' +#' publish_update(mn, meta_pid, rm_pid, data_pids, meta_path, public = TRUE) +#'} publish_update <- function(mn, metadata_pid, resource_map_pid, @@ -528,7 +556,16 @@ publish_update <- function(mn, #' @export #' #' @examples - +#'\dontrun{ +#' cn <- CNode('STAGING2') +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' +#' meta_pid <- 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe' +#' dat_pid <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +#' +#' +#' create_resource_map(mn, metadata_pid = meta_pid, data_pids = dat_pid) +#'} create_resource_map <- function(mn, metadata_pid, data_pids=NULL, @@ -591,20 +628,36 @@ create_resource_map <- function(mn, #' temporarily to allow updating but sets it back to the rightsHolder that was #' in place before the update. #' -#' @param mn -#' @param metadata_pid -#' @param data_pids -#' @param child_pids +#' @param mn (MNode) The Member Node +#' @param metadata_pid (character) The PID of the metadata object to go in the +#' package. +#' @param data_pids (character) The PID(s) of the data objects to go in the +#' package. +#' @param child_pids child_pids (character) The resource map PIDs of the packages to be +#' nested under the package. #' @param public Whether or not to make the new resource map public read #' (logical) #' @param check_first (logical) Optional. Whether to check the PIDs passed in as #' aruments exist on the MN before continuing. This speeds up the function, #' especially when `data_pids` has many elements. -#' @param resource_map_pid +#' @param resource_map_pid (character) The PID of the resource map to be updated. #' @param other_statements (data.frame) Extra statements to add to the Resource Map. -#' @param identifier +#' @param identifier (character) Manually specify the identifier for the new metadata object. +#' @return pid (character) Updated resource map PID. #' #' @export +#' @examples +#'\dontrun{ +#' cn <- CNode('STAGING2') +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' +#' rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +#' meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +#' data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' +#' +#' rm_new <- update_resource_map(mn, rm_pid, meta_pid, data_pids) +#'} update_resource_map <- function(mn, resource_map_pid, metadata_pid, @@ -715,6 +768,13 @@ update_resource_map <- function(mn, #' @export #' #' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn, "urn:node:mnTestKNB") +#' +#' pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +#' set_file_name(mn, pid, "myfile.csv") +#' } set_file_name <- function(mn, pid, name) { stopifnot(is(mn, "MNode")) stopifnot(is.character(pid), diff --git a/man/create_resource_map.Rd b/man/create_resource_map.Rd index 2240a32..0a54b53 100644 --- a/man/create_resource_map.Rd +++ b/man/create_resource_map.Rd @@ -35,3 +35,15 @@ specified MN. If you only want to generate resource map RDF/XML, see \code{\link{generate_resource_map}} } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") + +meta_pid <- 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe' +dat_pid <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') + + +create_resource_map(mn, metadata_pid = meta_pid, data_pids = dat_pid) +} +} diff --git a/man/get_mn_base_url.Rd b/man/get_mn_base_url.Rd index 2d50c57..f015c69 100644 --- a/man/get_mn_base_url.Rd +++ b/man/get_mn_base_url.Rd @@ -19,6 +19,5 @@ Get the base URL of the Member Node. \dontrun{ cn <- CNode('STAGING2') mn <- getMNode(cn,"urn:node:mnTestKNB") -url <- get_mn_base_url(mn) } } diff --git a/man/is_authorized.Rd b/man/is_authorized.Rd index 2718809..6831971 100644 --- a/man/is_authorized.Rd +++ b/man/is_authorized.Rd @@ -23,7 +23,7 @@ Check if the user has authorization to perform an action on an object. \dontrun{ cn <- CNode('STAGING2') mn <- getMNode(cn,"urn:node:mnTestKNB") -pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') -is_authorized(mn, pids, 'write') +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +is_authorized(mn, pids, "write") } } diff --git a/man/publish_object.Rd b/man/publish_object.Rd index 0a603d8..3291908 100644 --- a/man/publish_object.Rd +++ b/man/publish_object.Rd @@ -20,6 +20,9 @@ publish_object(mn, path, format_id = NULL, pid = NULL, sid = NULL, \item{clone_pid}{(character) PID of objet to clone System Metadata from} } +\value{ +pid (character). The PID of the published object. +} \description{ High-level functions for managing content. Publish an object on a member node @@ -31,3 +34,11 @@ system metadata for that identifier and use it to provide rightsHolder, accessPo and replicationPolicy metadata. Note that this function only uploads the object to the Member Node, and does not add it to a data package, which can be done separately. } +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +my_path <- "/home/Documents/myfile.csv" +pid <- publish_object(mn, path = my_path, format_id = "text/csv", public = FALSE) +} +} diff --git a/man/publish_update.Rd b/man/publish_update.Rd index efbcab7..56518ec 100644 --- a/man/publish_update.Rd +++ b/man/publish_update.Rd @@ -64,3 +64,17 @@ should be updated as well, using the parent_medata_pid, parent_data_pids, and parent_child_pids as members of the updated package. In all cases, the objects are made publicly readable. } +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") + +rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") + +meta_path <- "/home/Documents/myMetadata.xml" + +publish_update(mn, meta_pid, rm_pid, data_pids, meta_path, public = TRUE) +} +} diff --git a/man/remove_public_read.Rd b/man/remove_public_read.Rd index 718dccb..1b27566 100644 --- a/man/remove_public_read.Rd +++ b/man/remove_public_read.Rd @@ -16,9 +16,9 @@ Remove public access on a set of objects. } \examples{ \dontrun{ -cn <- CNode('STAGING2') +cn <- CNode("STAGING2") mn <- getMNode(cn,"urn:node:mnTestKNB") -pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") remove_public_read(mn, pids) } } diff --git a/man/set_access.Rd b/man/set_access.Rd index 1001a5a..f6e46b1 100644 --- a/man/set_access.Rd +++ b/man/set_access.Rd @@ -25,9 +25,9 @@ and moves on. System Metadata are only updated when a change was needed. } \examples{ \dontrun{ -cn <- CNode('STAGING2') +cn <- CNode("STAGING2") mn <- getMNode(cn,"urn:node:mnTestKNB") -pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') -set_access(mn, pids, subjects = 'http://orcid.org/0000-000X-XXXX-XXXX', permissions = c('read', 'write', 'changePermission')) +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +set_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX", permissions = c("read", "write", "changePermission")) } } diff --git a/man/set_file_name.Rd b/man/set_file_name.Rd index f03b1d3..3ae909b 100644 --- a/man/set_file_name.Rd +++ b/man/set_file_name.Rd @@ -19,3 +19,12 @@ set_file_name(mn, pid, name) \description{ Set the file name on an object } +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn, "urn:node:mnTestKNB") + +pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +set_file_name(mn, pid, "myfile.csv") +} +} diff --git a/man/set_public_read.Rd b/man/set_public_read.Rd index 1e988bc..efa39e6 100644 --- a/man/set_public_read.Rd +++ b/man/set_public_read.Rd @@ -16,9 +16,9 @@ Set public access on a set of objects. } \examples{ \dontrun{ -cn <- CNode('STAGING2') +cn <- CNode("STAGING2") mn <- getMNode(cn,"urn:node:mnTestKNB") -pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") set_public_read(mn, pids) } } diff --git a/man/set_rights_and_access.Rd b/man/set_rights_and_access.Rd index 0e7ef09..b821034 100644 --- a/man/set_rights_and_access.Rd +++ b/man/set_rights_and_access.Rd @@ -27,9 +27,9 @@ needed. } \examples{ \dontrun{ -cn <- CNode('STAGING2') +cn <- CNode("STAGING2") mn <- getMNode(cn,"urn:node:mnTestKNB") -pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') -set_rights_and_access(mn, pids, subjects = 'http://orcid.org/0000-000X-XXXX-XXXX', permissions = c('read', 'write', 'changePermission')) +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +set_rights_and_access(mn, pids, "http://orcid.org/0000-000X-XXXX-XXXX", permissions = c("read", "write", "changePermission")) } } diff --git a/man/set_rights_holder.Rd b/man/set_rights_holder.Rd index 67a04cc..c19d5db 100644 --- a/man/set_rights_holder.Rd +++ b/man/set_rights_holder.Rd @@ -24,9 +24,9 @@ the provided system metadata document on the given Member Node. } \examples{ \dontrun{ -cn <- CNode('STAGING2') +cn <- CNode("STAGING2") mn <- getMNode(cn,"urn:node:mnTestKNB") -pids <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') -set_rights_holder(mn, pids, subjects = 'http://orcid.org/0000-000X-XXXX-XXXX') +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +set_rights_holder(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX") } } diff --git a/man/update_object.Rd b/man/update_object.Rd index 4a72e53..bc4d88b 100644 --- a/man/update_object.Rd +++ b/man/update_object.Rd @@ -27,3 +27,12 @@ This is a convenience wrapper around `dataone::updateObject` which copies in fields from the old object's System Metadata such as the rightsHolder and accessPolicy and updates only what needs to be changed. } +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +my_path <- "/home/Documents/myfile.csv" +new_pid <- update_object(mn, pid, my_path, format_id = "text/csv") +} +} diff --git a/man/update_resource_map.Rd b/man/update_resource_map.Rd index 59e4351..fb7f1bf 100644 --- a/man/update_resource_map.Rd +++ b/man/update_resource_map.Rd @@ -9,9 +9,22 @@ update_resource_map(mn, resource_map_pid, metadata_pid, data_pids = NULL, public = FALSE, check_first = TRUE) } \arguments{ +\item{mn}{(MNode) The Member Node} + +\item{resource_map_pid}{(character) The PID of the resource map to be updated.} + +\item{metadata_pid}{(character) The PID of the metadata object to go in the +package.} + +\item{data_pids}{(character) The PID(s) of the data objects to go in the +package.} + +\item{child_pids}{child_pids (character) The resource map PIDs of the packages to be +nested under the package.} + \item{other_statements}{(data.frame) Extra statements to add to the Resource Map.} -\item{identifier}{} +\item{identifier}{(character) Manually specify the identifier for the new metadata object.} \item{public}{Whether or not to make the new resource map public read (logical)} @@ -20,6 +33,9 @@ update_resource_map(mn, resource_map_pid, metadata_pid, data_pids = NULL, aruments exist on the MN before continuing. This speeds up the function, especially when `data_pids` has many elements.} } +\value{ +pid (character) Updated resource map PID. +} \description{ This function first generates a new resource map RDF/XML document locally and then uses the dataone::updateObject function to update an Object on the @@ -44,3 +60,16 @@ Note: This function currently replaces the rightsHolder on the Resource Map temporarily to allow updating but sets it back to the rightsHolder that was in place before the update. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") + +rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") + + +rm_new <- update_resource_map(mn, rm_pid, meta_pid, data_pids) +} +} From b501e68ce65bd9b5600a83921704e9eaa6a8059a Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Thu, 1 Feb 2018 17:08:32 -0900 Subject: [PATCH 056/318] Use .9000 convention instead of -pre since r cmd check hates that --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 32fed3e..05d0f87 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: arcticdatautils Title: Arctic Data Utilities -Version: 0.6.4-pre +Version: 0.6.4.9000 Authors@R: c( person("Bryce", "Mecum", email = "mecum@nceas.ucsb.edu", role = c("aut", "cre")), person("Matt", "Jones", email = "jones@nceas.ucsb.edu", role = "ctb"), From 526f9afa215b86f39e54c96872c2c6dabc5fe1d4 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Thu, 1 Feb 2018 17:22:45 -0900 Subject: [PATCH 057/318] Start draft of maintenance guide --- MAINTENANCE.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 MAINTENANCE.md diff --git a/MAINTENANCE.md b/MAINTENANCE.md new file mode 100644 index 0000000..2988764 --- /dev/null +++ b/MAINTENANCE.md @@ -0,0 +1,44 @@ +# Maintenance + +This document should serve as a guide for new maintainers of this package. +It's a work in progress so expect it to change and, hopefully, get better and more complete over time. + +## Releases + +### When to release? + +Whenever, really. +Since this package isn't on CRAN, we can release it as much as we like and we're only bothering people we know and those people can tell us how displeased they are with how often we're releasing this package in person. + +### How to release? + +- Increment the `Version` tag in the `DESCRIPTION` file to the appropriate next version. + + This package tries to use [Semantic Versioning](https://semver.org/) (semver) which can be summarized in three bullets: + + > Given a version number MAJOR.MINOR.PATCH, increment the: + > + > - MAJOR version when you make incompatible API changes, + > - MINOR version when you add functionality in a backwards-compatible manner, and + > - PATCH version when you make backwards-compatible bug fixes. + + Note: A common mistake people make is thinking that the next version after 0.9 is 1.0, but it could be 0.10, then 0.11, and so on. + +- Make and push a commit with just that diff. + + Example here: https://github.com/NCEAS/arcticdatautils/commit/87f91179f4820ecdb283672e2179984d4f6cd334 + +- Go to [the releases tab](https://github.com/NCEAS/arcticdatautils/releases) and click "Draft a new release" + + - Tag version and Release title should match v{MAJOR}.{MINOR}.{PATCH}, e.g., v6.4.5 + - The release description should include: + - A brief, 1-2 sentence description of what's changed since the last release + - Sections for ADDED/FIXED/REMOVED (omit section if not applicable), each with a bulleted list of changes in human-readable prose + + Example: https://github.com/NCEAS/arcticdatautils/releases/tag/v0.6.2 + +- You're done, now go tell people to upgrade! + +## Pull Requests + +TODO From 300822d8b1dc687ecbcac852f9407d8c112b7328 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Thu, 1 Feb 2018 17:28:41 -0900 Subject: [PATCH 058/318] Increment the version to the correct next version I mistakenly changed the version to 0.6.4.9000 instead of 0.6.3.9000. 0.6.4 will be the next release though! --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 05d0f87..6d5d52e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: arcticdatautils Title: Arctic Data Utilities -Version: 0.6.4.9000 +Version: 0.6.3.9000 Authors@R: c( person("Bryce", "Mecum", email = "mecum@nceas.ucsb.edu", role = c("aut", "cre")), person("Matt", "Jones", email = "jones@nceas.ucsb.edu", role = "ctb"), From 5e4942f378b86f7bb7e62441542254969f53ee7c Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 09:25:57 -0800 Subject: [PATCH 059/318] replace boolean with logical --- R/dataone.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/dataone.R b/R/dataone.R index ad3fbbb..663a2e5 100644 --- a/R/dataone.R +++ b/R/dataone.R @@ -6,7 +6,7 @@ #' #' @param node (MNode|CNode) The CN or MN you want to find a token for. #' -#' @return (boolean) +#' @return (logical) #' @export #' #' @examples @@ -61,7 +61,7 @@ get_token <- function(node) { #' Determine whether the set token is expired. #' -#' @return (boolean) +#' @return (logical) #' @export #' #' @examples From 4d6ac0a1b7f6b61f5167f4560f90ce5d120fb79d Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 09:26:04 -0800 Subject: [PATCH 060/318] added return --- R/editing.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/editing.R b/R/editing.R index 97b94f4..03b936e 100644 --- a/R/editing.R +++ b/R/editing.R @@ -244,6 +244,7 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) #' access policies are not affected. #' @param check_first (logical) Optional. Whether to check the PIDs passed in as aruments exist on the MN before continuing. Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements. #' @param parent_data_pids +#' @return pids (character) Named character vector of pids in the data package, including pids for the metadata, resource map, and data objects. #' #' @import dataone #' @import datapack From fecd4827b62dacfd946bc5b167b34c232c03a9dd Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 09:39:00 -0800 Subject: [PATCH 061/318] added name and definition for param --- R/editing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/editing.R b/R/editing.R index 03b936e..0692088 100644 --- a/R/editing.R +++ b/R/editing.R @@ -243,7 +243,7 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) #' This applies to the new metadata PID and its resource map and data object. #' access policies are not affected. #' @param check_first (logical) Optional. Whether to check the PIDs passed in as aruments exist on the MN before continuing. Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements. -#' @param parent_data_pids +#' @param parent_data_pids (character) Optional. Data pids of a parent package to be updated. #' @return pids (character) Named character vector of pids in the data package, including pids for the metadata, resource map, and data objects. #' #' @import dataone From bb0243cdceaf045629b1f8ee4f735412925e03bd Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 09:50:30 -0800 Subject: [PATCH 062/318] removed export from a few functions that are obsolete, added examples, fixed param and return vlues --- R/eml.R | 22 ++++++++++++++++------ man/add_methods_step.Rd | 6 ++++++ man/clear_methods.Rd | 6 ++++++ man/eml_address.Rd | 3 +++ man/eml_individual_name.Rd | 3 +++ man/eml_party.Rd | 4 ++++ man/is_token_expired.Rd | 2 +- man/is_token_set.Rd | 2 +- man/publish_update.Rd | 5 ++++- man/sysmeta_to_other_entity.Rd | 5 +++++ 10 files changed, 49 insertions(+), 9 deletions(-) diff --git a/R/eml.R b/R/eml.R index 5ce11a8..f39397e 100644 --- a/R/eml.R +++ b/R/eml.R @@ -35,7 +35,7 @@ pid_to_eml_other_entity <- function(mn, pids) { #' @param pids (character) #' #' @return -#' @export +#' sysmeta_to_other_entity <- function(mn, pids) { .Deprecated("pid_to_other_eml_entity", package = "arcticdtautils", @@ -115,7 +115,7 @@ sysmeta_to_eml_other_entity <- function(sysmeta) { #' @param sysmeta (SystemMetadata) #' #' @return -#' @export +#' sysmeta_to_other_entity <- function(sysmeta) { .Deprecated("sysmeta_to_eml_other_entity", package = "arcticdtautils", @@ -235,9 +235,9 @@ set_other_entities <- function(mn, path, pids) { #' @param sysmeta (SystemMetadata) The sysmeta of the object you want to find. #' #' @return (character) The docid -#' @export #' -#' @examples +#' +#' get_doc_id <- function(sysmeta) { stopifnot(is(sysmeta, "SystemMetadata")) @@ -284,6 +284,10 @@ get_doc_id <- function(sysmeta) { #' @export #' #' @examples +#' \dontrun{ +#' eml <- read_eml("~/Documents/metadata.xml") +#' eml <- add_methods_step(eml, "Field Sampling", "Samples were collected using a niskin water sampler.") +#' } add_methods_step <- function(doc, title, description) { stopifnot(is(doc, "eml")) stopifnot(is(doc@dataset, "dataset")) @@ -310,6 +314,10 @@ add_methods_step <- function(doc, title, description) { #' @export #' #' @examples +#' \dontrun{ +#' eml <- read_eml("~/Documents/metadata.xml") +#' eml <- clear_methods(eml) +#' } clear_methods <- function(doc) { stopifnot(is(doc, "eml")) @@ -342,7 +350,7 @@ clear_methods <- function(doc) { #' @export #' #' @examples -#' eml_party("creator", "Test", "User) +#' eml_party("creator", "Test", "User") #' eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") eml_party <- function(type="associatedParty", given_names=NULL, @@ -525,7 +533,7 @@ eml_personnel <- function(role = NULL, ...) { #' @export #' #' @examples -#' eml_individual_name("some", "user) +#' eml_individual_name("some", "user") eml_individual_name <- function(given_names=NULL, sur_name) { stopifnot(is.character(sur_name) && nchar(sur_name) > 0) @@ -660,6 +668,8 @@ eml_geographic_coverage <- function(description, north, east, south, west) { #' @export #' #' @examples +#' NCEASadd <- eml_address("735 State St #300", "Santa Barbara," "CA", "93101") + eml_address <- function(delivery_points, city, administrative_area, postal_code) { stopifnot(is.character(delivery_points), is.character(city), diff --git a/man/add_methods_step.Rd b/man/add_methods_step.Rd index 8b3c022..033d64b 100644 --- a/man/add_methods_step.Rd +++ b/man/add_methods_step.Rd @@ -19,3 +19,9 @@ add_methods_step(doc, title, description) \description{ Adds a step to the methods document } +\examples{ +\dontrun{ +eml <- read_eml("~/Documents/metadata.xml") +eml <- add_methods_step(eml, "Field Sampling", "Samples were collected using a niskin water sampler.") +} +} diff --git a/man/clear_methods.Rd b/man/clear_methods.Rd index c1a261c..370b3a7 100644 --- a/man/clear_methods.Rd +++ b/man/clear_methods.Rd @@ -15,3 +15,9 @@ clear_methods(doc) \description{ Clear all methods from the document. } +\examples{ +\dontrun{ +eml <- read_eml("~/Documents/metadata.xml") +eml <- clear_methods(eml) +} +} diff --git a/man/eml_address.Rd b/man/eml_address.Rd index 7221f2c..b8d2246 100644 --- a/man/eml_address.Rd +++ b/man/eml_address.Rd @@ -21,3 +21,6 @@ eml_address(delivery_points, city, administrative_area, postal_code) \description{ Create an EML address element. } +\examples{ +NCEASadd <- eml_address("735 State St #300", "Santa Barbara," "CA", "93101") +} diff --git a/man/eml_individual_name.Rd b/man/eml_individual_name.Rd index 3d5ec2b..c59f165 100644 --- a/man/eml_individual_name.Rd +++ b/man/eml_individual_name.Rd @@ -17,3 +17,6 @@ eml_individual_name(given_names = NULL, sur_name) \description{ Create an EML individualName section } +\examples{ +eml_individual_name("some", "user") +} diff --git a/man/eml_party.Rd b/man/eml_party.Rd index c008c35..6abee51 100644 --- a/man/eml_party.Rd +++ b/man/eml_party.Rd @@ -40,3 +40,7 @@ fine. \details{ The \code{userId} argument assumes an ORCID so be sure to adjust for that. } +\examples{ +eml_party("creator", "Test", "User") +eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") +} diff --git a/man/is_token_expired.Rd b/man/is_token_expired.Rd index 93e1cad..03e2584 100644 --- a/man/is_token_expired.Rd +++ b/man/is_token_expired.Rd @@ -7,7 +7,7 @@ is_token_expired(node) } \value{ -(boolean) +(logical) } \description{ Determine whether the set token is expired. diff --git a/man/is_token_set.Rd b/man/is_token_set.Rd index 4b202d2..46b036c 100644 --- a/man/is_token_set.Rd +++ b/man/is_token_set.Rd @@ -10,7 +10,7 @@ is_token_set(node) \item{node}{(MNode|CNode) The CN or MN you want to find a token for.} } \value{ -(boolean) +(logical) } \description{ Helpers for the DataONE R package. diff --git a/man/publish_update.Rd b/man/publish_update.Rd index 56518ec..747b10e 100644 --- a/man/publish_update.Rd +++ b/man/publish_update.Rd @@ -41,7 +41,10 @@ access policies are not affected.} \item{check_first}{(logical) Optional. Whether to check the PIDs passed in as aruments exist on the MN before continuing. Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements.} -\item{parent_data_pids}{} +\item{parent_data_pids}{(character) Optional. Data pids of a parent package to be updated.} +} +\value{ +pids (character) Named character vector of pids in the data package, including pids for the metadata, resource map, and data objects. } \description{ This function can be used for a variety of tasks: diff --git a/man/sysmeta_to_other_entity.Rd b/man/sysmeta_to_other_entity.Rd index 4ef8a59..a798f4a 100644 --- a/man/sysmeta_to_other_entity.Rd +++ b/man/sysmeta_to_other_entity.Rd @@ -15,6 +15,11 @@ sysmeta_to_other_entity(sysmeta) \item{mn}{(MNode)} \item{pids}{(character)} +} +\value{ + + + } \description{ This function is deprecated. See \link{pid_to_other_eml_entity}. From 33696f32586ae9fd714b69db3b70f8f76ba95ddb Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 09:50:58 -0800 Subject: [PATCH 063/318] these functions don't need to get exported (I think) --- NAMESPACE | 2 -- 1 file changed, 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8c416fd..d6ae249 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,7 +42,6 @@ export(generate_resource_map) export(generate_resource_map_pid) export(get_all_versions) export(get_current_version) -export(get_doc_id) export(get_identifier) export(get_latest_release) export(get_mn_base_url) @@ -95,7 +94,6 @@ export(show_random_dataset) export(substitute_eml_party) export(sysmeta_to_eml_other_entity) export(sysmeta_to_eml_physical) -export(sysmeta_to_other_entity) export(theme_packages) export(update_object) export(update_package) From d91e2991c439b3130950ca6be5a4d1dbc557cb65 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 09:52:29 -0800 Subject: [PATCH 064/318] remove export for this function that I don't think people need to see --- NAMESPACE | 1 - R/environment.R | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d6ae249..8087915 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,7 +31,6 @@ export(eml_personnel) export(eml_project) export(eml_validate_attributes) export(env_get) -export(env_load) export(filter_obsolete_pids) export(filter_packaging_statements) export(find_newest_object) diff --git a/R/environment.R b/R/environment.R index 3ee6ac1..8ff1c12 100644 --- a/R/environment.R +++ b/R/environment.R @@ -34,9 +34,9 @@ env_get <- function() { #' @param skip_mn (logical) Optional. Skip contacting the MNode and filling in the $mn element of the environment. #' #' @return (list) A list of name-value pairs. -#' @export #' -#' @examples +#' +#' env_load <- function(name=NULL, path=NULL, skip_mn=FALSE) { # Determine the environment to load if (is.null(name)) { From 797a7a870325f8b18257465a34fb8f39cab0813d Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 2 Feb 2018 09:27:18 -0900 Subject: [PATCH 065/318] Remove sysmeta_to_other_entity entirely --- R/eml.R | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/R/eml.R b/R/eml.R index f39397e..54353bb 100644 --- a/R/eml.R +++ b/R/eml.R @@ -29,19 +29,6 @@ pid_to_eml_other_entity <- function(mn, pids) { sysmeta_to_eml_other_entity(sysmeta) } -#' This function is deprecated. See \link{pid_to_other_eml_entity}. -#' -#' @param mn (MNode) -#' @param pids (character) -#' -#' @return -#' -sysmeta_to_other_entity <- function(mn, pids) { - .Deprecated("pid_to_other_eml_entity", - package = "arcticdtautils", - old = "pid_to_other_entity") -} - #' Create EML physical objects for the given set of PIDs #' #' Note this is a wrapper around sysmeta_to_eml_physical which handles the task of From 6c9239be8e5a3d1a5b94f5d199edb69103987625 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 10:31:52 -0800 Subject: [PATCH 066/318] added examples --- R/helpers.R | 41 ++++++++++++++++++++++++++---- man/create_dummy_metadata.Rd | 11 ++++++++ man/create_dummy_object.Rd | 12 +++++++++ man/create_dummy_package.Rd | 14 +++++++++- man/create_dummy_parent_package.Rd | 8 ++++++ 5 files changed, 80 insertions(+), 6 deletions(-) diff --git a/R/helpers.R b/R/helpers.R index c853398..779b1ec 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -7,8 +7,16 @@ #' #' @param mn (MNode) The Member Node. #' @param data_pids (character) Optional. PIDs for data objects the metadata documents. -#' +#' @return pid (character) PID of published metadata document. #' @export +#' @examples +#'\dontrun{ +#' # Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") + +#' pid <- create_dummy_metadata(mn) +#' } create_dummy_metadata <- function(mn, data_pids=NULL) { pid <- paste0("urn:uuid:", uuid::UUIDgenerate()) me <- get_token_subject() @@ -49,10 +57,17 @@ create_dummy_metadata <- function(mn, data_pids=NULL) { #' #' @param mn (MNode) The Member Node. #' -#' @return +#' @return pid (character) The pid of the dummy object. #' @export #' #' @examples +#'\dontrun{ +#' # Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' +#' pid <- create_dummy_object(mn) +#'} create_dummy_object <- function(mn) { pid <- paste0("urn:uuid:", uuid::UUIDgenerate()) me <- get_token_subject() @@ -88,12 +103,19 @@ create_dummy_object <- function(mn) { #' Create a test package. #' #' @param mn (MNode) The Member Node. -#' @param size (numeric) The number of files in the package. +#' @param size (numeric) The number of files in the package, including the metadata file. #' -#' @return +#' @return pids (character) A named character vector of the data pids in the package. #' @export #' #' @examples +#'\dontrun{ +#' # Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' #Create dummy package with 5 data objects and 1 metadata object +#' pids <- create_dummy_package(mn, 6) +#' } create_dummy_package <- function(mn, size = 2) { me <- get_token_subject() @@ -147,10 +169,19 @@ create_dummy_package <- function(mn, size = 2) { #' @param mn (MNode) The Member Node. #' @param children (character) Child package (resource maps) PIDs. #' -#' @return +#' @return pid (character) Named character vector of PIDs including parent package and child package pids. #' @export #' #' @examples +#'\dontrun{ +#' # Set environment +# cn <- CNode("STAGING2") +# mn <- getMNode(cn,"urn:node:mnTestKNB") +# +# child_pid <- "urn:uuid:39a59f99-118b-4c81-9747-4b6c43308e00" +# +# create_dummy_parent_package(mn, child_pid) +#'} create_dummy_parent_package <- function(mn, children) { me <- get_token_subject() meta_pid <- create_dummy_metadata(mn) diff --git a/man/create_dummy_metadata.Rd b/man/create_dummy_metadata.Rd index bd55313..2712ffd 100644 --- a/man/create_dummy_metadata.Rd +++ b/man/create_dummy_metadata.Rd @@ -11,7 +11,18 @@ create_dummy_metadata(mn, data_pids = NULL) \item{data_pids}{(character) Optional. PIDs for data objects the metadata documents.} } +\value{ +pid (character) PID of published metadata document. +} \description{ Various helper functions for things like testing the package. Create a test metadata object. } +\examples{ +\dontrun{ +# Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pid <- create_dummy_metadata(mn) +} +} diff --git a/man/create_dummy_object.Rd b/man/create_dummy_object.Rd index 4e4ff7d..8e6fea3 100644 --- a/man/create_dummy_object.Rd +++ b/man/create_dummy_object.Rd @@ -9,6 +9,18 @@ create_dummy_object(mn) \arguments{ \item{mn}{(MNode) The Member Node.} } +\value{ +pid (character) The pid of the dummy object. +} \description{ Create a test object. } +\examples{ +\dontrun{ +# Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") + +pid <- create_dummy_object(mn) +} +} diff --git a/man/create_dummy_package.Rd b/man/create_dummy_package.Rd index 34cbf74..97047e0 100644 --- a/man/create_dummy_package.Rd +++ b/man/create_dummy_package.Rd @@ -9,8 +9,20 @@ create_dummy_package(mn, size = 2) \arguments{ \item{mn}{(MNode) The Member Node.} -\item{size}{(numeric) The number of files in the package.} +\item{size}{(numeric) The number of files in the package, including the metadata file.} +} +\value{ +pids (character) A named character vector of the data pids in the package. } \description{ Create a test package. } +\examples{ +\dontrun{ +# Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +#Create dummy package with 5 data objects and 1 metadata object +pids <- create_dummy_package(mn, 6) +} +} diff --git a/man/create_dummy_parent_package.Rd b/man/create_dummy_parent_package.Rd index e6202ef..a2f1058 100644 --- a/man/create_dummy_parent_package.Rd +++ b/man/create_dummy_parent_package.Rd @@ -11,6 +11,14 @@ create_dummy_parent_package(mn, children) \item{children}{(character) Child package (resource maps) PIDs.} } +\value{ +pid (character) Named character vector of PIDs including parent package and child package pids. +} \description{ Create a test parent package. } +\examples{ +\dontrun{ +# Set environment +} +} From 1a69ec4ce4b8079d82e70e9214095876fda7eb2e Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 12:30:17 -0800 Subject: [PATCH 067/318] this fn doesn't need to be exported --- NAMESPACE | 1 - R/inserting.R | 3 --- 2 files changed, 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8087915..d46153c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,7 +13,6 @@ export(create_dummy_metadata) export(create_dummy_object) export(create_dummy_package) export(create_dummy_parent_package) -export(create_from_folder) export(create_object) export(create_resource_map) export(create_sysmeta) diff --git a/R/inserting.R b/R/inserting.R index 34202a8..8411ced 100644 --- a/R/inserting.R +++ b/R/inserting.R @@ -15,9 +15,6 @@ #' @param data_pids (character) Optional. Manually specify the PIDs of data. This is useful if data were inserted outside this function and you want to re-use those objects. #' #' @return (list) All of the PIDs created. -#' @export -#' -#' @examples create_from_folder <- function(mn, path, data_pids=NULL) { # Validate args stopifnot(file.exists(path)) From 79c899663b97cc61d292faf5a1bcb37b7d9f74c2 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 12:46:13 -0800 Subject: [PATCH 068/318] remove exports from all fns in inventory --- NAMESPACE | 9 --------- R/inventory.R | 36 +++++++++--------------------------- 2 files changed, 9 insertions(+), 36 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d46153c..f2ab7d3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -52,15 +52,6 @@ export(get_token_subject) export(guess_format_id) export(insert_file) export(insert_package) -export(inv_add_extra_columns) -export(inv_add_parent_package_column) -export(inv_init) -export(inv_load_checksums) -export(inv_load_dois) -export(inv_load_files) -export(inv_load_identifiers) -export(inv_load_sizes) -export(inv_update) export(is_authorized) export(is_format_id) export(is_obsolete) diff --git a/R/inventory.R b/R/inventory.R index 5a2ecfd..5163a86 100644 --- a/R/inventory.R +++ b/R/inventory.R @@ -11,9 +11,7 @@ #' complicated. #' #' @return An empty data frame -#' @export -#' -#' @examples + inv_init <- function() { inventory <- data.frame(stringsAsFactors = FALSE) @@ -31,10 +29,7 @@ inv_init <- function() { #' @param filter (logical) Filter out versioned datasets. Default is TRUE. #' #' @return An inventory (data.frame) -#' -#' @export -#' -#' @examples + inv_load_files <- function(inventory, path, filter=TRUE) { stopifnot(file.exists(path)) stopifnot("inventory" %in% ls(), @@ -108,9 +103,7 @@ inv_load_files <- function(inventory, path, filter=TRUE) { #' #' @return (data.frame) An inventory #' -#' @export -#' -#' @examples + inv_load_sizes <- function(inventory, path) { stopifnot(file.exists(path)) stopifnot("inventory" %in% ls(), @@ -150,9 +143,7 @@ inv_load_sizes <- function(inventory, path) { #' #' @return An inventory (data.frame) #' -#' @export -#' -#' @examples + inv_load_checksums <- function(inventory, path) { stopifnot(file.exists(path)) stopifnot("inventory" %in% ls(), @@ -198,9 +189,7 @@ inv_load_checksums <- function(inventory, path) { #' #' @return (data.frame) The modified Inventory. #' -#' @export -#' -#' @examples + inv_load_dois <- function(inventory, path) { stopifnot(file.exists(path)) stopifnot(is.data.frame(inventory), @@ -233,9 +222,7 @@ inv_load_dois <- function(inventory, path) { #' #' @return (data.frame) An inventory. #' -#' @export -#' -#' @examples + inv_load_identifiers <- function(inventory, paths) { stopifnot(file.exists(path)) stopifnot(is.data.frame(inventory), @@ -272,7 +259,7 @@ inv_load_identifiers <- function(inventory, paths) { #' #' @return An inventory (data.frame) #' -#' @export + inv_add_extra_columns <- function(inventory) { stopifnot(is(inventory, "data.frame"), "file" %in% names(inventory)) @@ -366,9 +353,7 @@ inv_add_extra_columns <- function(inventory) { #' @param inventory (data.frame) An Inventory. #' #' @return inventory (data.frame) An Inventory. -#' @export -#' -#' @examples + inv_add_parent_package_column <- function(inventory) { stopifnot(all(c("file", "package", "is_metadata", "depth") %in% names(inventory))) @@ -438,10 +423,7 @@ inv_add_parent_package_column <- function(inventory) { #' @param inventory (data.frame) The old Inventory. #' @param new_state (data.frame) The new Inventory. #' -#' @return -#' @export -#' -#' @examples + inv_update <- function(inventory, new_state) { stopifnot(is.data.frame(inventory), is.data.frame(new_state), From 7d801f3ecae33c36d72976ef023e848884ea4e5d Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 12:50:15 -0800 Subject: [PATCH 069/318] removed export --- NAMESPACE | 1 - R/marking.R | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index f2ab7d3..379c8c1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -83,7 +83,6 @@ export(show_random_dataset) export(substitute_eml_party) export(sysmeta_to_eml_other_entity) export(sysmeta_to_eml_physical) -export(theme_packages) export(update_object) export(update_package) export(update_resource_map) diff --git a/R/marking.R b/R/marking.R index e1df919..d8dc40f 100644 --- a/R/marking.R +++ b/R/marking.R @@ -34,9 +34,7 @@ #' @param inventory (data.frame) An Inventory. #' #' @return (data.frame) An Inventory. -#' @export -#' -#' @examples + theme_packages <- function(inventory, nfiles_cutoff=100) { stopifnot(is.data.frame(inventory), "package_nfiles" %in% names(inventory)) From 10febb4db3d06d5afe3d7320212d38368ff48540 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 12:55:50 -0800 Subject: [PATCH 070/318] removed blank examples --- R/modify_metadata.R | 9 ++++----- man/fix_bad_enum.Rd | 3 --- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/R/modify_metadata.R b/R/modify_metadata.R index 870bcd9..cec7b50 100644 --- a/R/modify_metadata.R +++ b/R/modify_metadata.R @@ -21,6 +21,7 @@ # sapply(bad_enums, fix_bad_enums) + test_has_abstract <- function(path) { stopifnot(file.exists(path)) @@ -144,9 +145,7 @@ test_has_bad_enum <- function(path) { #' #' @param path #' -#' @return -#' -#' @examples + fix_bad_enum <- function(path) { stopifnot(file.exists(path)) @@ -189,7 +188,7 @@ fix_bad_enum <- function(path) { #' #' @return #' -#' @examples + fix_bad_topic <- function(path) { stopifnot(file.exists(path)) @@ -299,7 +298,7 @@ fix_bad_topic <- function(path) { #' #' @return Returns the result of the `system` command (0 = success) #' -#' @examples + pretty_print <- function(path) { stopifnot(file.exists(path), file.info(path)$size > 0) diff --git a/man/fix_bad_enum.Rd b/man/fix_bad_enum.Rd index bebd9ee..711e7ea 100644 --- a/man/fix_bad_enum.Rd +++ b/man/fix_bad_enum.Rd @@ -8,9 +8,6 @@ fix_bad_enum(path) } \arguments{ \item{path}{} -} -\value{ - } \description{ This is the case where the ISO schema says what's inside a From 438bc0a10174e3e1b74e950d741c6b4adba6327f Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 13:12:39 -0800 Subject: [PATCH 071/318] fixed various documentation errors, added an example for resource map parsing --- NAMESPACE | 12 ------ R/packaging.R | 76 ++++++++++++++---------------------- man/determine_child_pids.Rd | 4 +- man/generate_resource_map.Rd | 2 + man/parse_resource_map.Rd | 13 ++++++ man/update_package.Rd | 4 +- man/validate_environment.Rd | 2 +- man/validate_inventory.Rd | 2 +- 8 files changed, 51 insertions(+), 64 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 379c8c1..7a70ea3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,10 +13,7 @@ export(create_dummy_metadata) export(create_dummy_object) export(create_dummy_package) export(create_dummy_parent_package) -export(create_object) export(create_resource_map) -export(create_sysmeta) -export(determine_child_pids) export(eml_abstract) export(eml_add_entities) export(eml_address) @@ -31,13 +28,10 @@ export(eml_project) export(eml_validate_attributes) export(env_get) export(filter_obsolete_pids) -export(filter_packaging_statements) export(find_newest_object) export(find_newest_resource_map) export(format_eml) export(format_iso) -export(generate_resource_map) -export(generate_resource_map_pid) export(get_all_versions) export(get_current_version) export(get_identifier) @@ -45,13 +39,10 @@ export(get_latest_release) export(get_mn_base_url) export(get_ncdf4_attributes) export(get_netcdf_format_id) -export(get_or_create_pid) export(get_package) export(get_token) export(get_token_subject) export(guess_format_id) -export(insert_file) -export(insert_package) export(is_authorized) export(is_format_id) export(is_obsolete) @@ -84,10 +75,7 @@ export(substitute_eml_party) export(sysmeta_to_eml_other_entity) export(sysmeta_to_eml_physical) export(update_object) -export(update_package) export(update_resource_map) -export(validate_environment) -export(validate_inventory) export(view_profile) export(warn_current_version) import(EML) diff --git a/R/packaging.R b/R/packaging.R index 4d403a4..cf962eb 100644 --- a/R/packaging.R +++ b/R/packaging.R @@ -11,9 +11,7 @@ #' @param file (character) The fully-qualified relative path to the file. See examples. #' @param env (list) Optional. Specify an environment. #' -#' @export -#' -#' @examples + insert_file <- function(inventory, file, env=NULL) { validate_inventory(inventory) stopifnot(is.character(file), nchar(file) > 0, file %in% inventory$file) @@ -90,9 +88,7 @@ insert_file <- function(inventory, file, env=NULL) { #' #' @return A list containing PIDs and whether objects were inserted. (list) #' -#' @export -#' -#' @examples + insert_package <- function(inventory, package, env=NULL) { validate_inventory(inventory) stopifnot(is.character(package), nchar(package) > 0, package %in% inventory$package) @@ -308,12 +304,11 @@ insert_package <- function(inventory, package, env=NULL) { #' @param data_pids (character) PID(s) of the data Objects. #' @param child_pids (character) Optional. PID(s) of child Resource Maps. #' @param other_statements (data.frame) Extra statements to add to the Resource Map. -#' @param resource_map_pid +#' @param resource_map_pid (character) PID of resource map. #' @param resolve_base (character) Optional. The resolve service base URL. #' #' @return Absolute path to the Resource Map on disk (character) #' -#' @export #' #' @examples #' \dontrun{ @@ -491,10 +486,7 @@ generate_resource_map <- function(metadata_pid, #' #' @param metadata_pid #' -#' @return -#' @export -#' -#' @examples + generate_resource_map_pid <- function(metadata_pid) { stopifnot(is.character(metadata_pid), nchar(metadata_pid) > 0) @@ -513,9 +505,7 @@ generate_resource_map_pid <- function(metadata_pid) { #' @param scheme (character) The identifier scheme to use. #' #' @return The identifier (character) -#' @export -#' -#' @examples + get_or_create_pid <- function(file, mn, scheme="UUID") { stopifnot(is.data.frame(file), nrow(file) == 1, @@ -569,9 +559,7 @@ get_or_create_pid <- function(file, mn, scheme="UUID") { #' @param rights_holder (character) The rights holder DN string for the object. #' #' @return The sysmeta object (dataone::SystemMetadata) -#' @export -#' -#' @examples + create_sysmeta <- function(file, base_path, submitter, rights_holder) { stopifnot(is.data.frame(file), nrow(file) == 1) @@ -639,10 +627,7 @@ create_sysmeta <- function(file, base_path, submitter, rights_holder) { #' @param base_path (character) #' @param mn (MNode) #' -#' @return -#' @export -#' -#' @examples + create_object <- function(file, sysmeta, base_path, mn) { stopifnot(is.data.frame(file), nrow(file) == 1, @@ -709,12 +694,9 @@ create_object <- function(file, sysmeta, base_path, mn) { #' Validate an Inventory. #' -#' @param inventory -#' -#' @return -#' @export +#' @param inventory (data.frame) An Inventory. #' -#' @examples + validate_inventory <- function(inventory) { stopifnot(is.data.frame(inventory), nrow(inventory) > 0, @@ -731,12 +713,9 @@ validate_inventory <- function(inventory) { #' Validate an environment. #' -#' @param env +#' @param env (character) An environment #' -#' @return -#' @export -#' -#' @examples + validate_environment <- function(env) { env_default_components <- c("base_path", "alternate_path", @@ -757,13 +736,9 @@ validate_environment <- function(env) { #' Calculate a set of child PIDs for a given package. #' -#' @param inventory -#' @param package -#' -#' @return -#' @export -#' -#' @examples +#' @param inventory (data.frame) An Inventory. +#' @param package (character) The package identifier. + determine_child_pids <- function(inventory, package) { stopifnot(all(c("package", "parent_package", "is_metadata") %in% names(inventory))) @@ -805,13 +780,11 @@ determine_child_pids <- function(inventory, package) { #' exist on the Member Node before doing their work and will call createObject() #' instead of updateObject() if the object didn't already exist. #' -#' @param inventory (data.frame) -#' @param package (character) +#' @param inventory (data.frame) An inventory. +#' @param package (character) The package identifier. #' #' @return TRUE or FALSE depending on sucess (logical) -#' @export -#' -#' @examples + update_package <- function(inventory, package, env = NULL) { @@ -1038,6 +1011,17 @@ update_package <- function(inventory, #' @export #' #' @examples +#'\dontrun{ +#'# Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' +#' rm_pid <- "resource_map_urn:uuid:6b2e5753-4a94-4e6f-971c-36420a446ecb" +#' +#' # Write resource map to file +#' writeBin(getObject(mn, rm_pid), "~/Documents/resource_map.rdf") +#' df <- parse_resource_map("~/Documents/resource_map.rdf") +#' } parse_resource_map <- function(path) { stopifnot(file.exists(path)) @@ -1059,9 +1043,7 @@ parse_resource_map <- function(path) { #' @param statements (data.frame) A set of Statements to be filtered #' #' @return (data.frame) The filtered Statements -#' @export -#' -#' @examples + filter_packaging_statements <- function(statements) { stopifnot(is.data.frame(statements)) if (nrow(statements) == 0) return(statements) diff --git a/man/determine_child_pids.Rd b/man/determine_child_pids.Rd index 75f1978..9cb8fee 100644 --- a/man/determine_child_pids.Rd +++ b/man/determine_child_pids.Rd @@ -7,7 +7,9 @@ determine_child_pids(inventory, package) } \arguments{ -\item{package}{} +\item{inventory}{(data.frame) An Inventory.} + +\item{package}{(character) The package identifier.} } \description{ Calculate a set of child PIDs for a given package. diff --git a/man/generate_resource_map.Rd b/man/generate_resource_map.Rd index fe738d9..2b56e2b 100644 --- a/man/generate_resource_map.Rd +++ b/man/generate_resource_map.Rd @@ -21,6 +21,8 @@ generate_resource_map(metadata_pid, data_pids = NULL, child_pids = NULL, \item{other_statements}{(data.frame) Extra statements to add to the Resource Map.} \item{resolve_base}{(character) Optional. The resolve service base URL.} + +\item{resource_map_pid}{(character) PID of resource map.} } \value{ Absolute path to the Resource Map on disk (character) diff --git a/man/parse_resource_map.Rd b/man/parse_resource_map.Rd index 2abf8d0..b222499 100644 --- a/man/parse_resource_map.Rd +++ b/man/parse_resource_map.Rd @@ -15,3 +15,16 @@ parse_resource_map(path) \description{ Parse a Resource Map into a data.frame } +\examples{ +\dontrun{ +# Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") + +rm_pid <- "resource_map_urn:uuid:6b2e5753-4a94-4e6f-971c-36420a446ecb" + +# Write resource map to file +writeBin(getObject(mn, rm_pid), "~/Documents/resource_map.rdf") +df <- parse_resource_map("~/Documents/resource_map.rdf") +} +} diff --git a/man/update_package.Rd b/man/update_package.Rd index 83c3b04..daaee9c 100644 --- a/man/update_package.Rd +++ b/man/update_package.Rd @@ -7,9 +7,9 @@ update_package(inventory, package, env = NULL) } \arguments{ -\item{inventory}{(data.frame)} +\item{inventory}{(data.frame) An inventory.} -\item{package}{(character)} +\item{package}{(character) The package identifier.} } \value{ TRUE or FALSE depending on sucess (logical) diff --git a/man/validate_environment.Rd b/man/validate_environment.Rd index c8127bd..b0508c5 100644 --- a/man/validate_environment.Rd +++ b/man/validate_environment.Rd @@ -7,7 +7,7 @@ validate_environment(env) } \arguments{ -\item{env}{} +\item{env}{(character) An environment} } \description{ Validate an environment. diff --git a/man/validate_inventory.Rd b/man/validate_inventory.Rd index 83b84d0..6b9b953 100644 --- a/man/validate_inventory.Rd +++ b/man/validate_inventory.Rd @@ -7,7 +7,7 @@ validate_inventory(inventory) } \arguments{ -\item{inventory}{} +\item{inventory}{(data.frame) An Inventory.} } \description{ Validate an Inventory. From 45b27dff6f69aafd0d623c133b42bfa5f5779ed8 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 13:16:33 -0800 Subject: [PATCH 072/318] fixed brace --- R/quality.R | 2 +- man/mdq_run.Rd | 8 ++++++++ man/sysmeta_to_other_entity.Rd | 13 +------------ 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/R/quality.R b/R/quality.R index db21541..82e7ac2 100644 --- a/R/quality.R +++ b/R/quality.R @@ -9,7 +9,7 @@ #' @examples #' # Check an EML document you are authoring #' library(EML) -#' mdq_run(new("eml)) +#' mdq_run(new("eml")) #' #' # Check an EML document that is saved to disk #' mdq_run(system.file("examples", "example-eml-2.1.1.xml", package = "EML")) diff --git a/man/mdq_run.Rd b/man/mdq_run.Rd index 8b0f253..6f97bc3 100644 --- a/man/mdq_run.Rd +++ b/man/mdq_run.Rd @@ -17,3 +17,11 @@ mdq_run(document, suite_id = "arctic.data.center.suite.1") \description{ Score a metadata document against a MetaDIG Suite } +\examples{ +# Check an EML document you are authoring +library(EML) +mdq_run(new("eml")) + +# Check an EML document that is saved to disk +mdq_run(system.file("examples", "example-eml-2.1.1.xml", package = "EML")) +} diff --git a/man/sysmeta_to_other_entity.Rd b/man/sysmeta_to_other_entity.Rd index a798f4a..f3d62d6 100644 --- a/man/sysmeta_to_other_entity.Rd +++ b/man/sysmeta_to_other_entity.Rd @@ -2,27 +2,16 @@ % Please edit documentation in R/eml.R \name{sysmeta_to_other_entity} \alias{sysmeta_to_other_entity} -\alias{sysmeta_to_other_entity} -\title{This function is deprecated. See \link{pid_to_other_eml_entity}.} +\title{This function is deprecated. See \link{sysmeta_to_eml_other_entity}.} \usage{ -sysmeta_to_other_entity(sysmeta) - sysmeta_to_other_entity(sysmeta) } \arguments{ \item{sysmeta}{(SystemMetadata)} - -\item{mn}{(MNode)} - -\item{pids}{(character)} } \value{ - - } \description{ -This function is deprecated. See \link{pid_to_other_eml_entity}. - This function is deprecated. See \link{sysmeta_to_eml_other_entity}. } From cb22f54f3d7e0282260ce060debd216895f35746 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 13:21:54 -0800 Subject: [PATCH 073/318] removed export for most sysmeta fns --- NAMESPACE | 4 ---- R/sysmeta.R | 16 ++-------------- man/remove_public_access.Rd | 8 ++++++++ 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 7a70ea3..6e25774 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,13 +1,10 @@ # Generated by roxygen2: do not edit by hand -export(add_access_rules) export(add_additional_identifiers) -export(add_admin_group_access) export(add_methods_step) export(add_string_to_title) export(change_eml_name) export(clear_methods) -export(clear_replication_policy) export(convert_iso_to_eml) export(create_dummy_metadata) export(create_dummy_object) @@ -61,7 +58,6 @@ export(publish_object) export(publish_update) export(remove_public_read) export(replace_package_id) -export(replace_subject) export(set_abstract) export(set_access) export(set_file_name) diff --git a/R/sysmeta.R b/R/sysmeta.R index fafa1bd..bec371a 100644 --- a/R/sysmeta.R +++ b/R/sysmeta.R @@ -11,9 +11,7 @@ #' @param sysmeta (SystemMetadata) The SystemMetadata to add rules to. #' #' @return The modified SystemMetadata object -#' @export -#' -#' @examples + add_access_rules <- function(sysmeta) { if (!inherits(sysmeta, "SystemMetadata")) { stop(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) @@ -41,7 +39,7 @@ add_access_rules <- function(sysmeta) { #' #' @examples #' library(datapack) -#' sm <- new("SystemMetadata) +#' sm <- new("SystemMetadata") #' sm <- addAccessRule(sm, "public", "read") #' sm@accessPolicy #' sm <- remove_public_access(sm) @@ -61,10 +59,6 @@ remove_public_access <- function(sysmeta) { #' #' @param sysmeta #' -#' @return -#' @export -#' -#' @examples add_admin_group_access <- function(sysmeta) { if (!inherits(sysmeta, "SystemMetadata")) { message(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) @@ -90,9 +84,6 @@ add_admin_group_access <- function(sysmeta) { #' @param to (character) The DN string to put in place of `from`. #' #' @return The modified System Metadata (SystemMetadata) -#' @export -#' -#' @examples replace_subject <- function(sysmeta, from="cn=arctic-data-admins,dc=dataone,dc=org", to="CN=arctic-data-admins,DC=dataone,DC=org") { @@ -122,9 +113,6 @@ replace_subject <- function(sysmeta, #' @param sysmeta (SystemMetadata) The System Metadata object to clear the replication policy of. #' #' @return (SystemMetadata) The modified System Metadata object. -#' @export -#' -#' @examples clear_replication_policy <- function(sysmeta) { if (!(is(sysmeta, "SystemMetadata"))) { stop("First argument was not of class SystemMetadata.") diff --git a/man/remove_public_access.Rd b/man/remove_public_access.Rd index 86a63bc..38c9621 100644 --- a/man/remove_public_access.Rd +++ b/man/remove_public_access.Rd @@ -15,3 +15,11 @@ remove_public_access(sysmeta) \description{ Remove all public read access rules from a System Metadata document } +\examples{ +library(datapack) +sm <- new("SystemMetadata") +sm <- addAccessRule(sm, "public", "read") +sm@accessPolicy +sm <- remove_public_access(sm) +sm@accessPolicy +} From 89404a71778288e79fa7d398d4ffa5cdb0bc31b5 Mon Sep 17 00:00:00 2001 From: eodean Date: Fri, 2 Feb 2018 13:39:47 -0800 Subject: [PATCH 074/318] #59 pid_to_eml_datatable helper. Modified helpers.R as well to create -dummy attributes and enumeratedDomain data frames. --- R/eml.R | 53 +++++++++++++++++++++++++++++++++++++++ R/helpers.R | 50 ++++++++++++++++++++++++++++++++++++ tests/testthat/test_eml.R | 27 ++++++++++++++++++++ 3 files changed, 130 insertions(+) diff --git a/R/eml.R b/R/eml.R index 5ce11a8..fb9cb72 100644 --- a/R/eml.R +++ b/R/eml.R @@ -29,6 +29,59 @@ pid_to_eml_other_entity <- function(mn, pids) { sysmeta_to_eml_other_entity(sysmeta) } +#' Create an EML dataTable object for a given PID. +#' This function generates an attributeList object and physical and constructs a dataTable. +#' +#' @param mn (MNode) Member Node where the PID is associated with an object. +#' @param pids (character) The PID of the object to create the sub-tree for. +#' @param attributes (data.frame) Data frame of attributes. +#' @param enumeratedValues (data.frame) Data frame of enumerated attribute values. +#' @param name (character) Optional field to specify entityName, otherwise will be extracted from system metadata. +#' @param description (character) Optional field to specify entityDescription. +#' +#' @return (dataTable) The dataTable object +#' @export +#' +#' @examples +#' \dontrun{ +#' # Generate a dataTable for a given pid +#' pid <- "urn:uuid:xxxx" +#' attributes <- data.frame(attributeName = ..., attributeDefinition = ..., ...) +#' factors <- data.frame(attributeName = ..., code = ..., definition = ...) +#' name <- "1234.csv" +#' description <- "A description of this entity." +#' dataTable <- pid_to_eml_datatable(mn, pid, attributes, factors, name, description) +#' } +pid_to_eml_datatable <- function(mn, pid, attributes, factors=NULL, name=NULL, description=NULL) { + stopifnot(is(mn, "MNode")) + stopifnot(is.character(pids), + all(nchar(pids)) > 0) + stopifnot(is.data.frame(attributes)) + + if (is.null(factors)) { + attributes <- set_attributes(attributes) + } else { + stopifnot(is.data.frame(factors)) + attributes <- set_attributes(attributes, factors) + } + + if (is.null(name)) { + name <- getSystemMetadata(pid)@fileName + } + + stopifnot(eml_validate_attributes(attributes)) + + physicalObj <- pid_to_eml_physical(mn, pid) + + dataTable <- new("dataTable", + entityName = name, + entityDescription = description, + physical = physicalObj, + attributeList = attributes) + + dataTable +} + #' This function is deprecated. See \link{pid_to_other_eml_entity}. #' #' @param mn (MNode) diff --git a/R/helpers.R b/R/helpers.R index c853398..facfea8 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -184,3 +184,53 @@ create_dummy_parent_package <- function(mn, children) { list(parent = create_response, children = children) } + + +#' Create dummy attributes data frame +#' +#' @param numberAttributes (integer) Number of attributes to be created in the table +#' @param factors (character) Optional vector of factor names to include. +#' +#' @return dataframe +#' @export +#' +#' @examples +create_dummy_attributes_dataframe <- function(numberAttributes, factors=NULL) { + names <- sapply(1:numberAttributes, function(x){ paste0("Attribute ", x) }) + + if(!is.null(factors)) { + domains <- c(rep("textDomain", numberAttributes - length(factors)), + rep("enumeratedDomain", length(factors))) + names[(numberAttributes - length(factors) + 1):numberAttributes] <- factors + } + + df <- data.frame(attributeName = names, + attributeDefinition = names, + measurementScale = rep("nominal", numberAttributes), + domain = rep("textDomain", numberAttributes), + formatString = rep("NA", numberAttributes), + definition = names, + unit = rep("NA", numberAttributes), + numberType = rep("NA", numberAttributes), + missingValueCode = rep("NA", numberAttributes), + missingValueCodeExplanation = rep("NA", numberAttributes), + stringsAsFactors = F) + + df +} + +#' Create dummy enumeratedDomain data frame +#' +#' @param factors (character) Optional vector of factor names to include. +#' +#' @return +#' @export +#' +#' @examples +create_dummy_enumeratedDomain_dataframe <- function(factors) { + df <- data.frame(attributeName = rep(factors, 4), + code = rep(seq(1:4), 4), + definition = rep(factors, 4)) + + df +} diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 3c68982..6bf1caa 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -118,3 +118,30 @@ test_that("an other entity can be added from a pid", { unlink(data_path) }) + + +test_that("a data table can be added from a pid", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + data_path <- tempfile() + writeLines(LETTERS, data_path) + pid <- publish_object(mn, data_path, "text/csv") + + eml_path <- file.path(system.file(package = "arcticdatautils"), "example-eml.xml") + + doc <- EML::read_eml(eml_path) + + factors <- c("factor 1", "factor 2") + doc@dataset@dataTable <- pid_to_eml_datatable(mn, pid, + create_dummy_attributes_dataframe(10, factors), + create_dummy_enumeratedDomain_dataframe(factors)) + + testthat::expect_length(doc@dataset@dataTable, 1) + + unlink(data_path) +}) + + + From 2d3b1e13ca0eae206dab418ef2040ae06f5ef81c Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Fri, 2 Feb 2018 13:42:08 -0800 Subject: [PATCH 075/318] removed a bunch of exports, added and clarified a couple of examples --- NAMESPACE | 18 ------ R/util.R | 118 ++++++++++++++++++++------------------ man/convert_iso_to_eml.Rd | 6 ++ man/get_all_versions.Rd | 10 ++++ man/get_package.Rd | 10 ++++ man/get_package_direct.Rd | 3 - man/guess_format_id.Rd | 4 ++ man/is_obsolete.Rd | 10 ++++ man/new_uuid.Rd | 3 + man/object_exists.Rd | 10 ++++ man/view_profile.Rd | 7 +++ 11 files changed, 123 insertions(+), 76 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 6e25774..b19e389 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,6 @@ # Generated by roxygen2: do not edit by hand -export(add_additional_identifiers) export(add_methods_step) -export(add_string_to_title) -export(change_eml_name) export(clear_methods) export(convert_iso_to_eml) export(create_dummy_metadata) @@ -24,40 +21,28 @@ export(eml_personnel) export(eml_project) export(eml_validate_attributes) export(env_get) -export(filter_obsolete_pids) export(find_newest_object) -export(find_newest_resource_map) export(format_eml) export(format_iso) export(get_all_versions) -export(get_current_version) -export(get_identifier) -export(get_latest_release) export(get_mn_base_url) export(get_ncdf4_attributes) -export(get_netcdf_format_id) export(get_package) export(get_token) -export(get_token_subject) export(guess_format_id) export(is_authorized) -export(is_format_id) export(is_obsolete) -export(is_resource_map) export(is_token_expired) export(is_token_set) -export(log_message) export(mdq_run) export(new_uuid) export(object_exists) export(parse_resource_map) -export(path_join) export(pid_to_eml_other_entity) export(pid_to_eml_physical) export(publish_object) export(publish_update) export(remove_public_read) -export(replace_package_id) export(set_abstract) export(set_access) export(set_file_name) @@ -66,14 +51,11 @@ export(set_public_read) export(set_rights_and_access) export(set_rights_holder) export(show_indexing_status) -export(show_random_dataset) -export(substitute_eml_party) export(sysmeta_to_eml_other_entity) export(sysmeta_to_eml_physical) export(update_object) export(update_resource_map) export(view_profile) -export(warn_current_version) import(EML) import(XML) import(dataone) diff --git a/R/util.R b/R/util.R index 5b1c232..6611e19 100644 --- a/R/util.R +++ b/R/util.R @@ -78,6 +78,8 @@ dataone_format_mappings <- list("avi" = "ideo/avi", #' @export #' #' @examples +#'formatid <- guess_format_id("temperature_data.csv") +#' guess_format_id <- function(filenames) { extensions <- tolower(tools::file_ext(filenames)) filetypes <- vector(mode = "character", length = length(extensions)) @@ -101,9 +103,7 @@ guess_format_id <- function(filenames) { #' @param path (character) Full or relative path to the file in question. #' #' @return (character) The DataONE format ID. -#' @export -#' -#' @examples + get_netcdf_format_id <- function(path) { stopifnot(is.character(path), nchar(path) > 0, @@ -146,9 +146,7 @@ get_netcdf_format_id <- function(path) { #' @param n (numeric) Optional. The number of files to show. #' #' @return Nothing. -#' @export -#' -#' @examples + show_random_dataset <- function(inventory, theme=NULL, n=10) { stopifnot(is.data.frame(inventory), all(c("file", "folder", "filename", "theme") %in% names(inventory))) @@ -203,9 +201,7 @@ show_random_dataset <- function(inventory, theme=NULL, n=10) { #' @param message (character) Your log message. #' #' @return Nothing. -#' @export -#' -#' @examples + log_message <- function(message=NULL) { if (is.null(message) || !is.character(message) || nchar(message) < 1) { invisible(return(FALSE)) @@ -244,6 +240,14 @@ log_message <- function(message=NULL) { #' @export #' #' @examples +#'\dontrun{ +#' # Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' +#' object_exists(mn, pids) +#' } object_exists <- function(node, pids) { stopifnot(class(node) %in% c("MNode", "CNode"), is.character(pids)) @@ -279,6 +283,10 @@ object_exists <- function(node, pids) { #' @export #' #' @examples +#'\dontrun{ +#'iso_path <- "~/Docuements/ISO_metadata.xml" +#'eml_path <- convert_iso_to_eml(iso_path) +#'} convert_iso_to_eml <- function(path, style=NA) { # Load the XSLT if needed if (is.na(style)) { @@ -304,7 +312,7 @@ convert_iso_to_eml <- function(path, style=NA) { #' #' @return path (character) Path to the converted EML file. #' @import XML -#' @export + substitute_eml_party <- function(path) { # Read in the EML document doc = XML::xmlParse(path) @@ -341,7 +349,7 @@ substitute_eml_party <- function(path) { #' @return the modified XML node #' #' @import XML -#' @export + change_eml_name <- function(party) { # Check if there is an individualName element exists if (length(XML::getNodeSet(party, "./individualName")) == 0) { @@ -415,10 +423,7 @@ change_eml_name <- function(party) { #' @param path (character) Path to the XML file to edit. #' @param replacement (character) The new value. #' -#' @return -#' @export -#' -#' @examples + replace_package_id <- function(path, replacement) { stopifnot(file.exists(path)) stopifnot(is.character(replacement), @@ -440,10 +445,7 @@ replace_package_id <- function(path, replacement) { #' @param path (character) Path to the XML file to edit. #' @param string (character) The new value. #' -#' @return -#' @export -#' -#' @examples + add_string_to_title <- function(path, string) { stopifnot(file.exists(path)) stopifnot(is.character(string), @@ -476,9 +478,7 @@ add_string_to_title <- function(path, string) { #' @param identifiers (character) Set of identifiers to add. #' #' @return (character) Path to the modified document. -#' @export -#' -#' @examples + add_additional_identifiers <- function(path, identifiers) { stopifnot(is.character(path), nchar(path) > 0, @@ -510,9 +510,7 @@ add_additional_identifiers <- function(path, identifiers) { #' @param path_parts (character) #' #' @return (character)The joined path string. -#' @export -#' -#' @examples + path_join <- function(path_parts=c("")) { result <- paste0(path_parts, collapse = "") @@ -539,9 +537,7 @@ path_join <- function(path_parts=c("")) { #' @param format_id (character) #' #' @return (logical) -#' @export -#' -#' @examples + is_format_id <- function(node, pids, format_id) { stopifnot(class(node) %in% c("MNode", "CNode")) stopifnot(all(is.character(pids)), @@ -564,9 +560,7 @@ is_format_id <- function(node, pids, format_id) { #' @param pids (character) Vector of PIDs #' #' @return (logical) Whether or not the object(s) are resource maps -#' @export -#' -#' @examples + is_resource_map <- function(node, pids) { is_format_id(node, pids, "http://www.openarchives.org/ore/terms") } @@ -581,6 +575,14 @@ is_resource_map <- function(node, pids) { #' @export #' #' @examples +#'\dontrun{ +#' # Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pid <- "urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" +#' +#' is_obsolete(mn, pid) +#'} is_obsolete <- function(node, pids) { stopifnot(is(node, "MNode") || is(node, "CNode")) stopifnot(is.character(pids)) @@ -600,9 +602,8 @@ is_obsolete <- function(node, pids) { #' Returns the subject of the set dataone_test_token #' #' @return (character) The token subject. -#' @export #' -#' @examples +#' get_token_subject <- function() { info <- dataone::getTokenInfo(dataone::AuthenticationManager()) @@ -631,9 +632,7 @@ get_token_subject <- function() { #' @param dataone_response ("XMLInternalDocument" "XMLAbstractDocument") #' #' @return (character) The PID. -#' @export -#' -#' @examples + get_identifier <- function(dataone_response) { stopifnot("XMLInternalDocument" %in% class(dataone_response)) XML::xmlValue(XML::getNodeSet(dataone_response, "//d1:identifier/text()", namespaces = c("d1"="http://ns.dataone.org/service/types/v1"))[[1]]) @@ -646,6 +645,7 @@ get_identifier <- function(dataone_response) { #' @export #' #' @examples +#' id <- new_uuid() new_uuid <- function() { paste0("urn:uuid:", uuid::UUIDgenerate()) } @@ -657,9 +657,6 @@ new_uuid <- function() { #' version. #' #' @return (character) The current package version. -#' @export -#' -#' @examples get_current_version <- function() { desc_file <- file.path(system.file("DESCRIPTION", package = "arcticdatautils")) desc_lines <- readLines(desc_file) @@ -670,9 +667,6 @@ get_current_version <- function() { #' Use the GitHub API to find the latest release for the package. #' #' @return (character) The latest release. -#' @export -#' -#' @examples get_latest_release <- function() { req <- httr::GET("https://api.github.com/repos/NCEAS/arcticdatautils/releases") content <- httr::content(req) @@ -687,10 +681,7 @@ get_latest_release <- function() { #' Warns if the currently-installed version of the package is not the same #' version as the latest release on GitHub. #' -#' @return -#' @export -#' -#' @examples + warn_current_version <- function() { current <- get_current_version() latest <- get_latest_release() @@ -710,6 +701,14 @@ warn_current_version <- function() { #' @export #' #' @examples +#'\dontrun{ +#' #Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pid <- "urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" +#' +#' ids <- get_all_versions(mn, pid) +#' } get_all_versions <- function(node, pid) { stopifnot(class(node) %in% c("MNode", "CNode")) stopifnot(is.character(pid), @@ -768,6 +767,14 @@ get_all_versions <- function(node, pid) { #' @export #' #' @examples +#'\dontrun{ +#' #Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pid <- "resource_map_urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" +#' +#' ids <- get_package(mn, pid) +#' } get_package <- function(node, pid, file_names=FALSE, rows=1000) { stopifnot(is(node, "MNode") || is(node, "CNode")) stopifnot(is.character(pid), @@ -811,9 +818,7 @@ get_package <- function(node, pid, file_names=FALSE, rows=1000) { #' @param rows (numeric) The number of rows to return in the query. This is only #' useful to set if you are warned about the result set being truncated. #' -#' @return -#' -#' @examples + get_package_direct <- function(node, pid, file_names=FALSE, rows = 1000) { stopifnot(is(node, "MNode") || is(node, "CNode")) stopifnot(is.character(pid), @@ -889,9 +894,7 @@ get_package_direct <- function(node, pid, file_names=FALSE, rows = 1000) { #' more than enough. #' #' @return (character) The resource map(s) that contain `pid`. -#' @export -#' -#' @examples + find_newest_resource_map <- function(node, pid, rows = 1000) { stopifnot(class(node) %in% c("MNode", "CNode")) stopifnot(is.character(pid), @@ -986,9 +989,7 @@ find_newest_object <- function(node, identifiers, rows=1000) { #' @param pids (character) PIDs to check the obsoletion state of. #' #' @return (character) PIDs that are not obsoleted by another PID. -#' @export -#' -#' @examples + filter_obsolete_pids <- function(node, pids) { pids[is.na(sapply(pids, function(pid) { dataone::getSystemMetadata(node, pid)@obsoletedBy }, USE.NAMES = FALSE))] } @@ -1026,6 +1027,13 @@ filter_obsolete_pids <- function(node, pids) { #' #' // Get a custom set of fields #' view_profile(mn, me, "origin") +#' +#' # Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' +#' package_df <- view_profile(mn, "http://orcid.org/0000-0003-4703-1974", fields = c("title")) +#' #' } view_profile <- function(mn, subject, fields=c("identifier", "title")) { stopifnot(is(mn, "MNode")) diff --git a/man/convert_iso_to_eml.Rd b/man/convert_iso_to_eml.Rd index 4939de6..0efd0d7 100644 --- a/man/convert_iso_to_eml.Rd +++ b/man/convert_iso_to_eml.Rd @@ -17,3 +17,9 @@ convert_iso_to_eml(path, style = NA) \description{ Leave style=NA if you want to use the default ISO-to-EML stylesheet. } +\examples{ +\dontrun{ +iso_path <- "~/Docuements/ISO_metadata.xml" +eml_path <- convert_iso_to_eml(iso_path) +} +} diff --git a/man/get_all_versions.Rd b/man/get_all_versions.Rd index e3e5df5..c16a2a6 100644 --- a/man/get_all_versions.Rd +++ b/man/get_all_versions.Rd @@ -17,3 +17,13 @@ get_all_versions(node, pid) \description{ Get the PIDs of all versions of an object. } +\examples{ +\dontrun{ +#Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pid <- "urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" + +ids <- get_all_versions(mn, pid) +} +} diff --git a/man/get_package.Rd b/man/get_package.Rd index 7f7066d..9d7ccd9 100644 --- a/man/get_package.Rd +++ b/man/get_package.Rd @@ -23,3 +23,13 @@ useful to set if you are warned about the result set being truncated.} This is a wrapper function around `get_package_direct` which takes either a resource map PID or a metadata PID as its `pid` argument. } +\examples{ +\dontrun{ +#Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pid <- "resource_map_urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" + +ids <- get_package(mn, pid) +} +} diff --git a/man/get_package_direct.Rd b/man/get_package_direct.Rd index f4790f6..e3beebf 100644 --- a/man/get_package_direct.Rd +++ b/man/get_package_direct.Rd @@ -15,9 +15,6 @@ get_package_direct(node, pid, file_names = FALSE, rows = 1000) \item{rows}{(numeric) The number of rows to return in the query. This is only useful to set if you are warned about the result set being truncated.} -} -\value{ - } \description{ Get a structured list of PIDs for the objects in a package. diff --git a/man/guess_format_id.Rd b/man/guess_format_id.Rd index 6493807..f6e02ff 100644 --- a/man/guess_format_id.Rd +++ b/man/guess_format_id.Rd @@ -15,3 +15,7 @@ guess_format_id(filenames) \description{ Guess format from filename for a vector of filenames. } +\examples{ +formatid <- guess_format_id("temperature_data.csv") + +} diff --git a/man/is_obsolete.Rd b/man/is_obsolete.Rd index 9e5bcbb..18161be 100644 --- a/man/is_obsolete.Rd +++ b/man/is_obsolete.Rd @@ -17,3 +17,13 @@ is_obsolete(node, pids) \description{ Test whether the object is obsoleted by another object. } +\examples{ +\dontrun{ +# Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pid <- "urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" + +is_obsolete(mn, pid) +} +} diff --git a/man/new_uuid.Rd b/man/new_uuid.Rd index 58573e6..1c497ea 100644 --- a/man/new_uuid.Rd +++ b/man/new_uuid.Rd @@ -12,3 +12,6 @@ new_uuid() \description{ Helper function to generate a new UUID PID. } +\examples{ +id <- new_uuid() +} diff --git a/man/object_exists.Rd b/man/object_exists.Rd index 901cc81..0e756ea 100644 --- a/man/object_exists.Rd +++ b/man/object_exists.Rd @@ -18,3 +18,13 @@ object_exists(node, pids) This is a simple check for the HTTP status of a /meta/{PID} call on the provided member node. } +\examples{ +\dontrun{ +# Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") + +object_exists(mn, pids) +} +} diff --git a/man/view_profile.Rd b/man/view_profile.Rd index 38ab472..f044599 100644 --- a/man/view_profile.Rd +++ b/man/view_profile.Rd @@ -41,5 +41,12 @@ profile(mn, me) // Get a custom set of fields view_profile(mn, me, "origin") + +# Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") + +package_df <- view_profile(mn, "http://orcid.org/0000-0003-4703-1974", fields = c("title")) + } } From 39678f76b78f88d9b9f76886cb89036683b14dc0 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 2 Feb 2018 15:54:19 -0900 Subject: [PATCH 076/318] Fix broken example (missing semicolon) --- R/eml.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/eml.R b/R/eml.R index 54353bb..c8bbd18 100644 --- a/R/eml.R +++ b/R/eml.R @@ -655,8 +655,7 @@ eml_geographic_coverage <- function(description, north, east, south, west) { #' @export #' #' @examples -#' NCEASadd <- eml_address("735 State St #300", "Santa Barbara," "CA", "93101") - +#' NCEASadd <- eml_address("735 State St #300", "Santa Barbara", "CA", "93101") eml_address <- function(delivery_points, city, administrative_area, postal_code) { stopifnot(is.character(delivery_points), is.character(city), From c17afebe4611aa3e211967e72995af7f3e1b9a5c Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 2 Feb 2018 16:00:31 -0900 Subject: [PATCH 077/318] Removed remove_public_access remove_public_read already does this and works better --- R/sysmeta.R | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/R/sysmeta.R b/R/sysmeta.R index bec371a..9e36535 100644 --- a/R/sysmeta.R +++ b/R/sysmeta.R @@ -31,30 +31,6 @@ add_access_rules <- function(sysmeta) { } -#' Remove all public read access rules from a System Metadata document -#' -#' @param sysmeta (SystemMetadata) The System Metadata document. to change. -#' -#' @return (SystemMetadata) The potentially modified System Metadata document. -#' -#' @examples -#' library(datapack) -#' sm <- new("SystemMetadata") -#' sm <- addAccessRule(sm, "public", "read") -#' sm@accessPolicy -#' sm <- remove_public_access(sm) -#' sm@accessPolicy -remove_public_access <- function(sysmeta) { - if (!inherits(sysmeta, "SystemMetadata")) { - stop(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) - } - - sysmeta@accessPolicy <- sysmeta@accessPolicy[!(grepl("public", sysmeta@accessPolicy$subject) & grepl("read", sysmeta@accessPolicy$permission)),] - - sysmeta -} - - #' Adds access to the given System Metadata for the arctic-data-admins group #' #' @param sysmeta From 6b8ff8c0429ee9b45c87e9a3b7b61d182788b664 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 2 Feb 2018 16:00:54 -0900 Subject: [PATCH 078/318] Guard mdq_run's example in a dontrun since it's slow --- R/quality.R | 2 ++ man/mdq_run.Rd | 2 ++ 2 files changed, 4 insertions(+) diff --git a/R/quality.R b/R/quality.R index 82e7ac2..21f590f 100644 --- a/R/quality.R +++ b/R/quality.R @@ -7,12 +7,14 @@ #' @export #' #' @examples +#' \dontrun{ #' # Check an EML document you are authoring #' library(EML) #' mdq_run(new("eml")) #' #' # Check an EML document that is saved to disk #' mdq_run(system.file("examples", "example-eml-2.1.1.xml", package = "EML")) +#' } mdq_run <- function(document, suite_id = "arctic.data.center.suite.1") { if (is(document, "eml")) { metadata_path <- tempfile() diff --git a/man/mdq_run.Rd b/man/mdq_run.Rd index 6f97bc3..25afd34 100644 --- a/man/mdq_run.Rd +++ b/man/mdq_run.Rd @@ -18,6 +18,7 @@ mdq_run(document, suite_id = "arctic.data.center.suite.1") Score a metadata document against a MetaDIG Suite } \examples{ +\dontrun{ # Check an EML document you are authoring library(EML) mdq_run(new("eml")) @@ -25,3 +26,4 @@ mdq_run(new("eml")) # Check an EML document that is saved to disk mdq_run(system.file("examples", "example-eml-2.1.1.xml", package = "EML")) } +} From 4e96e345b866f9632ca36fc822e5a7e11a95a03a Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 2 Feb 2018 16:01:01 -0900 Subject: [PATCH 079/318] Redocument package --- man/eml_address.Rd | 2 +- man/remove_public_access.Rd | 25 ------------------------- 2 files changed, 1 insertion(+), 26 deletions(-) delete mode 100644 man/remove_public_access.Rd diff --git a/man/eml_address.Rd b/man/eml_address.Rd index b8d2246..9cceeed 100644 --- a/man/eml_address.Rd +++ b/man/eml_address.Rd @@ -22,5 +22,5 @@ eml_address(delivery_points, city, administrative_area, postal_code) Create an EML address element. } \examples{ -NCEASadd <- eml_address("735 State St #300", "Santa Barbara," "CA", "93101") +NCEASadd <- eml_address("735 State St #300", "Santa Barbara", "CA", "93101") } diff --git a/man/remove_public_access.Rd b/man/remove_public_access.Rd deleted file mode 100644 index 38c9621..0000000 --- a/man/remove_public_access.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sysmeta.R -\name{remove_public_access} -\alias{remove_public_access} -\title{Remove all public read access rules from a System Metadata document} -\usage{ -remove_public_access(sysmeta) -} -\arguments{ -\item{sysmeta}{(SystemMetadata) The System Metadata document. to change.} -} -\value{ -(SystemMetadata) The potentially modified System Metadata document. -} -\description{ -Remove all public read access rules from a System Metadata document -} -\examples{ -library(datapack) -sm <- new("SystemMetadata") -sm <- addAccessRule(sm, "public", "read") -sm@accessPolicy -sm <- remove_public_access(sm) -sm@accessPolicy -} From d6cd3a4b1cfe40c27a50014458849cdf94ae9803 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 2 Feb 2018 16:21:09 -0900 Subject: [PATCH 080/318] Add todos to maintenance.md --- MAINTENANCE.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/MAINTENANCE.md b/MAINTENANCE.md index 2988764..82bdc9a 100644 --- a/MAINTENANCE.md +++ b/MAINTENANCE.md @@ -1,5 +1,7 @@ # Maintenance +TODO: Add reasons for these things! + This document should serve as a guide for new maintainers of this package. It's a work in progress so expect it to change and, hopefully, get better and more complete over time. @@ -41,4 +43,7 @@ Since this package isn't on CRAN, we can release it as much as we like and we're ## Pull Requests +- Code style +- Commit style + TODO From eda10dac88249d7060d59537bab1a1cb308fc55a Mon Sep 17 00:00:00 2001 From: eodean Date: Mon, 5 Feb 2018 09:23:42 -0800 Subject: [PATCH 081/318] #59 --- NAMESPACE | 4 ++ man/create_dummy_attributes_dataframe.Rd | 19 +++++++++ ...create_dummy_enumeratedDomain_dataframe.Rd | 14 +++++++ man/pid_to_eml_datatable.Rd | 41 +++++++++++++++++++ man/sysmeta_to_other_entity.Rd | 2 - 5 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 man/create_dummy_attributes_dataframe.Rd create mode 100644 man/create_dummy_enumeratedDomain_dataframe.Rd create mode 100644 man/pid_to_eml_datatable.Rd diff --git a/NAMESPACE b/NAMESPACE index 8087915..7f91353 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,8 @@ export(change_eml_name) export(clear_methods) export(clear_replication_policy) export(convert_iso_to_eml) +export(create_dummy_attributes_dataframe) +export(create_dummy_enumeratedDomain_dataframe) export(create_dummy_metadata) export(create_dummy_object) export(create_dummy_package) @@ -74,6 +76,7 @@ export(new_uuid) export(object_exists) export(parse_resource_map) export(path_join) +export(pid_to_eml_datatable) export(pid_to_eml_other_entity) export(pid_to_eml_physical) export(publish_object) @@ -93,6 +96,7 @@ export(show_random_dataset) export(substitute_eml_party) export(sysmeta_to_eml_other_entity) export(sysmeta_to_eml_physical) +export(sysmeta_to_other_entity) export(theme_packages) export(update_object) export(update_package) diff --git a/man/create_dummy_attributes_dataframe.Rd b/man/create_dummy_attributes_dataframe.Rd new file mode 100644 index 0000000..3f7ba16 --- /dev/null +++ b/man/create_dummy_attributes_dataframe.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{create_dummy_attributes_dataframe} +\alias{create_dummy_attributes_dataframe} +\title{Create dummy attributes data frame} +\usage{ +create_dummy_attributes_dataframe(numberAttributes, factors = NULL) +} +\arguments{ +\item{numberAttributes}{(integer) Number of attributes to be created in the table} + +\item{factors}{(character) Optional vector of factor names to include.} +} +\value{ +dataframe +} +\description{ +Create dummy attributes data frame +} diff --git a/man/create_dummy_enumeratedDomain_dataframe.Rd b/man/create_dummy_enumeratedDomain_dataframe.Rd new file mode 100644 index 0000000..ca7fd5b --- /dev/null +++ b/man/create_dummy_enumeratedDomain_dataframe.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{create_dummy_enumeratedDomain_dataframe} +\alias{create_dummy_enumeratedDomain_dataframe} +\title{Create dummy enumeratedDomain data frame} +\usage{ +create_dummy_enumeratedDomain_dataframe(factors) +} +\arguments{ +\item{factors}{(character) Optional vector of factor names to include.} +} +\description{ +Create dummy enumeratedDomain data frame +} diff --git a/man/pid_to_eml_datatable.Rd b/man/pid_to_eml_datatable.Rd new file mode 100644 index 0000000..9602471 --- /dev/null +++ b/man/pid_to_eml_datatable.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{pid_to_eml_datatable} +\alias{pid_to_eml_datatable} +\title{Create an EML dataTable object for a given PID. +This function generates an attributeList object and physical and constructs a dataTable.} +\usage{ +pid_to_eml_datatable(mn, pid, attributes, factors = NULL, name = NULL, + description = NULL) +} +\arguments{ +\item{mn}{(MNode) Member Node where the PID is associated with an object.} + +\item{attributes}{(data.frame) Data frame of attributes.} + +\item{name}{(character) Optional field to specify entityName, otherwise will be extracted from system metadata.} + +\item{description}{(character) Optional field to specify entityDescription.} + +\item{pids}{(character) The PID of the object to create the sub-tree for.} + +\item{enumeratedValues}{(data.frame) Data frame of enumerated attribute values.} +} +\value{ +(dataTable) The dataTable object +} +\description{ +Create an EML dataTable object for a given PID. +This function generates an attributeList object and physical and constructs a dataTable. +} +\examples{ +\dontrun{ +# Generate a dataTable for a given pid +pid <- "urn:uuid:xxxx" +attributes <- data.frame(attributeName = ..., attributeDefinition = ..., ...) +factors <- data.frame(attributeName = ..., code = ..., definition = ...) +name <- "1234.csv" +description <- "A description of this entity." +dataTable <- pid_to_eml_datatable(mn, pid, attributes, factors, name, description) +} +} diff --git a/man/sysmeta_to_other_entity.Rd b/man/sysmeta_to_other_entity.Rd index a798f4a..d3130ee 100644 --- a/man/sysmeta_to_other_entity.Rd +++ b/man/sysmeta_to_other_entity.Rd @@ -18,8 +18,6 @@ sysmeta_to_other_entity(sysmeta) } \value{ - - } \description{ This function is deprecated. See \link{pid_to_other_eml_entity}. From 64bbcb6e4c41bf38b17f26ac2585dce9a5d1a7bf Mon Sep 17 00:00:00 2001 From: eodean Date: Mon, 5 Feb 2018 10:57:25 -0800 Subject: [PATCH 082/318] Fixes for #59 after running tests. --- NAMESPACE | 45 ------------------------------------- R/eml.R | 24 +++++++++++--------- R/helpers.R | 7 +++--- inst/example-eml.xml | 19 ++++++++-------- man/pid_to_eml_datatable.Rd | 20 ++++++++--------- tests/testthat/test_eml.R | 6 ++--- 6 files changed, 39 insertions(+), 82 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 7f91353..764fab8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,13 +1,7 @@ # Generated by roxygen2: do not edit by hand -export(add_access_rules) -export(add_additional_identifiers) -export(add_admin_group_access) export(add_methods_step) -export(add_string_to_title) -export(change_eml_name) export(clear_methods) -export(clear_replication_policy) export(convert_iso_to_eml) export(create_dummy_attributes_dataframe) export(create_dummy_enumeratedDomain_dataframe) @@ -15,11 +9,7 @@ export(create_dummy_metadata) export(create_dummy_object) export(create_dummy_package) export(create_dummy_parent_package) -export(create_from_folder) -export(create_object) export(create_resource_map) -export(create_sysmeta) -export(determine_child_pids) export(eml_abstract) export(eml_add_entities) export(eml_address) @@ -33,57 +23,29 @@ export(eml_personnel) export(eml_project) export(eml_validate_attributes) export(env_get) -export(filter_obsolete_pids) -export(filter_packaging_statements) export(find_newest_object) -export(find_newest_resource_map) export(format_eml) export(format_iso) -export(generate_resource_map) -export(generate_resource_map_pid) export(get_all_versions) -export(get_current_version) -export(get_identifier) -export(get_latest_release) export(get_mn_base_url) export(get_ncdf4_attributes) -export(get_netcdf_format_id) -export(get_or_create_pid) export(get_package) export(get_token) -export(get_token_subject) export(guess_format_id) -export(insert_file) -export(insert_package) -export(inv_add_extra_columns) -export(inv_add_parent_package_column) -export(inv_init) -export(inv_load_checksums) -export(inv_load_dois) -export(inv_load_files) -export(inv_load_identifiers) -export(inv_load_sizes) -export(inv_update) export(is_authorized) -export(is_format_id) export(is_obsolete) -export(is_resource_map) export(is_token_expired) export(is_token_set) -export(log_message) export(mdq_run) export(new_uuid) export(object_exists) export(parse_resource_map) -export(path_join) export(pid_to_eml_datatable) export(pid_to_eml_other_entity) export(pid_to_eml_physical) export(publish_object) export(publish_update) export(remove_public_read) -export(replace_package_id) -export(replace_subject) export(set_abstract) export(set_access) export(set_file_name) @@ -92,19 +54,12 @@ export(set_public_read) export(set_rights_and_access) export(set_rights_holder) export(show_indexing_status) -export(show_random_dataset) -export(substitute_eml_party) export(sysmeta_to_eml_other_entity) export(sysmeta_to_eml_physical) export(sysmeta_to_other_entity) -export(theme_packages) export(update_object) -export(update_package) export(update_resource_map) -export(validate_environment) -export(validate_inventory) export(view_profile) -export(warn_current_version) import(EML) import(XML) import(dataone) diff --git a/R/eml.R b/R/eml.R index 04da982..2e7321a 100644 --- a/R/eml.R +++ b/R/eml.R @@ -30,14 +30,14 @@ pid_to_eml_other_entity <- function(mn, pids) { } #' Create an EML dataTable object for a given PID. -#' This function generates an attributeList object and physical and constructs a dataTable. +#' This function generates an attributeList and physical and constructs a dataTable. #' #' @param mn (MNode) Member Node where the PID is associated with an object. -#' @param pids (character) The PID of the object to create the sub-tree for. +#' @param pid (character) The PID of the object to create the dataTable for. #' @param attributes (data.frame) Data frame of attributes. -#' @param enumeratedValues (data.frame) Data frame of enumerated attribute values. +#' @param factors (data.frame) Data frame of enumerated attribute values (factors). #' @param name (character) Optional field to specify entityName, otherwise will be extracted from system metadata. -#' @param description (character) Optional field to specify entityDescription. +#' @param description (character) Optional field to specify entityDescription, otherwise will match name. #' #' @return (dataTable) The dataTable object #' @export @@ -45,17 +45,15 @@ pid_to_eml_other_entity <- function(mn, pids) { #' @examples #' \dontrun{ #' # Generate a dataTable for a given pid -#' pid <- "urn:uuid:xxxx" -#' attributes <- data.frame(attributeName = ..., attributeDefinition = ..., ...) -#' factors <- data.frame(attributeName = ..., code = ..., definition = ...) +#' attributes <- create_dummy_attributes_dataframe(10) #' name <- "1234.csv" #' description <- "A description of this entity." -#' dataTable <- pid_to_eml_datatable(mn, pid, attributes, factors, name, description) +#' dataTable <- pid_to_eml_datatable(mn, pid, attributes, name=name, description=description) #' } pid_to_eml_datatable <- function(mn, pid, attributes, factors=NULL, name=NULL, description=NULL) { stopifnot(is(mn, "MNode")) - stopifnot(is.character(pids), - all(nchar(pids)) > 0) + stopifnot(is.character(pid), + nchar(pid) > 0) stopifnot(is.data.frame(attributes)) if (is.null(factors)) { @@ -66,7 +64,11 @@ pid_to_eml_datatable <- function(mn, pid, attributes, factors=NULL, name=NULL, d } if (is.null(name)) { - name <- getSystemMetadata(pid)@fileName + name <- getSystemMetadata(mn, pid)@fileName + } + + if (is.null(description)) { + description <- name } stopifnot(eml_validate_attributes(attributes)) diff --git a/R/helpers.R b/R/helpers.R index 76948fe..5d27539 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -259,9 +259,10 @@ create_dummy_attributes_dataframe <- function(numberAttributes, factors=NULL) { #' #' @examples create_dummy_enumeratedDomain_dataframe <- function(factors) { - df <- data.frame(attributeName = rep(factors, 4), - code = rep(seq(1:4), 4), - definition = rep(factors, 4)) + names <- rep(factors, 4) + df <- data.frame(attributeName = names, + code = paste0(names, 1:length(names)), + definition = names) df } diff --git a/inst/example-eml.xml b/inst/example-eml.xml index c936c5f..677aa49 100644 --- a/inst/example-eml.xml +++ b/inst/example-eml.xml @@ -1,4 +1,5 @@ - + + some-alternate-identifier-string arcticdata R package test @@ -68,24 +69,24 @@ NSF Award XXXXXX - - NA + + file2f9a550fb9b5b - NA - 27 - 5179254207aed0f39ded1add3d9bab3ea0e10b084c2c194ccf0a033b8f5e7789 + file2f9a550fb9b5b + 52 + da257f8aeced0fb2b9755b704e53ea5aaf9983ae - application/octet-stream + text/plain - ecogrid://knb/urn:uuid:89bec5d0-26db-48ac-ae54-e1b4c999c456 + https://cn.dataone.org/cn/v2/resolve/urn:uuid:200afcb5-8e26-4315-a875-48bbf0703cb5 Other - \ No newline at end of file + diff --git a/man/pid_to_eml_datatable.Rd b/man/pid_to_eml_datatable.Rd index 9602471..e4c9fa6 100644 --- a/man/pid_to_eml_datatable.Rd +++ b/man/pid_to_eml_datatable.Rd @@ -3,7 +3,7 @@ \name{pid_to_eml_datatable} \alias{pid_to_eml_datatable} \title{Create an EML dataTable object for a given PID. -This function generates an attributeList object and physical and constructs a dataTable.} +This function generates an attributeList and physical and constructs a dataTable.} \usage{ pid_to_eml_datatable(mn, pid, attributes, factors = NULL, name = NULL, description = NULL) @@ -11,31 +11,29 @@ pid_to_eml_datatable(mn, pid, attributes, factors = NULL, name = NULL, \arguments{ \item{mn}{(MNode) Member Node where the PID is associated with an object.} -\item{attributes}{(data.frame) Data frame of attributes.} +\item{pid}{(character) The PID of the object to create the dataTable for.} -\item{name}{(character) Optional field to specify entityName, otherwise will be extracted from system metadata.} +\item{attributes}{(data.frame) Data frame of attributes.} -\item{description}{(character) Optional field to specify entityDescription.} +\item{factors}{(data.frame) Data frame of enumerated attribute values (factors).} -\item{pids}{(character) The PID of the object to create the sub-tree for.} +\item{name}{(character) Optional field to specify entityName, otherwise will be extracted from system metadata.} -\item{enumeratedValues}{(data.frame) Data frame of enumerated attribute values.} +\item{description}{(character) Optional field to specify entityDescription, otherwise will match name.} } \value{ (dataTable) The dataTable object } \description{ Create an EML dataTable object for a given PID. -This function generates an attributeList object and physical and constructs a dataTable. +This function generates an attributeList and physical and constructs a dataTable. } \examples{ \dontrun{ # Generate a dataTable for a given pid -pid <- "urn:uuid:xxxx" -attributes <- data.frame(attributeName = ..., attributeDefinition = ..., ...) -factors <- data.frame(attributeName = ..., code = ..., definition = ...) +attributes <- create_dummy_attributes_dataframe(10) name <- "1234.csv" description <- "A description of this entity." -dataTable <- pid_to_eml_datatable(mn, pid, attributes, factors, name, description) +dataTable <- pid_to_eml_datatable(mn, pid, attributes, name=name, description=description) } } diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 6bf1caa..bb65135 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -134,9 +134,9 @@ test_that("a data table can be added from a pid", { doc <- EML::read_eml(eml_path) factors <- c("factor 1", "factor 2") - doc@dataset@dataTable <- pid_to_eml_datatable(mn, pid, - create_dummy_attributes_dataframe(10, factors), - create_dummy_enumeratedDomain_dataframe(factors)) + dummy_attributes <- create_dummy_attributes_dataframe(10, factors) + dummy_enumeratedDomain <- create_dummy_enumeratedDomain_dataframe(factors) + doc@dataset@dataTable <- as(list(pid_to_eml_datatable(mn, pid, dummy_attributes, dummy_enumeratedDomain)), "ListOfdataTable") testthat::expect_length(doc@dataset@dataTable, 1) From 6739131437837362e2710d1919a6be8f2682844b Mon Sep 17 00:00:00 2001 From: eodean Date: Mon, 5 Feb 2018 11:26:58 -0800 Subject: [PATCH 083/318] Fixing some merge conflicts that recreated sysmeta_to_other_entity. --- NAMESPACE | 1 - R/eml.R | 26 -------------------------- 2 files changed, 27 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 764fab8..16e4129 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -56,7 +56,6 @@ export(set_rights_holder) export(show_indexing_status) export(sysmeta_to_eml_other_entity) export(sysmeta_to_eml_physical) -export(sysmeta_to_other_entity) export(update_object) export(update_resource_map) export(view_profile) diff --git a/R/eml.R b/R/eml.R index 2e7321a..ac81835 100644 --- a/R/eml.R +++ b/R/eml.R @@ -84,19 +84,6 @@ pid_to_eml_datatable <- function(mn, pid, attributes, factors=NULL, name=NULL, d dataTable } -#' This function is deprecated. See \link{pid_to_other_eml_entity}. -#' -#' @param mn (MNode) -#' @param pids (character) -#' -#' @return -#' @export -sysmeta_to_other_entity <- function(mn, pids) { - .Deprecated("pid_to_other_eml_entity", - package = "arcticdtautils", - old = "pid_to_other_entity") -} - #' Create EML physical objects for the given set of PIDs #' #' Note this is a wrapper around sysmeta_to_eml_physical which handles the task of @@ -164,19 +151,6 @@ sysmeta_to_eml_other_entity <- function(sysmeta) { lapply(sysmeta, work) } - -#' This function is deprecated. See \link{sysmeta_to_eml_other_entity}. -#' -#' @param sysmeta (SystemMetadata) -#' -#' @return -#' -sysmeta_to_other_entity <- function(sysmeta) { - .Deprecated("sysmeta_to_eml_other_entity", - package = "arcticdtautils", - old = "sysmeta_to_other_entity") -} - #' Create an EML physical object from System Metadata #' #' This function creates a pre-canned EML physical object from what's in the From 242041001c7f09eb7208c44b4f852dd0a035fa6f Mon Sep 17 00:00:00 2001 From: eodean Date: Tue, 6 Feb 2018 11:29:14 -0800 Subject: [PATCH 084/318] Fixing a merge thing... --- inst/example-eml.xml | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/inst/example-eml.xml b/inst/example-eml.xml index 677aa49..c936c5f 100644 --- a/inst/example-eml.xml +++ b/inst/example-eml.xml @@ -1,5 +1,4 @@ - - + some-alternate-identifier-string arcticdata R package test @@ -69,24 +68,24 @@ NSF Award XXXXXX - - file2f9a550fb9b5b + + NA - file2f9a550fb9b5b - 52 - da257f8aeced0fb2b9755b704e53ea5aaf9983ae + NA + 27 + 5179254207aed0f39ded1add3d9bab3ea0e10b084c2c194ccf0a033b8f5e7789 - text/plain + application/octet-stream - https://cn.dataone.org/cn/v2/resolve/urn:uuid:200afcb5-8e26-4315-a875-48bbf0703cb5 + ecogrid://knb/urn:uuid:89bec5d0-26db-48ac-ae54-e1b4c999c456 Other - + \ No newline at end of file From d212ad868851396c2550c5e3fda1e6f1d9641e46 Mon Sep 17 00:00:00 2001 From: eodean Date: Tue, 6 Feb 2018 11:36:48 -0800 Subject: [PATCH 085/318] Somehow deleted sysmeta_to_other_entity in a merge. --- R/eml.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/R/eml.R b/R/eml.R index ac81835..dc08b67 100644 --- a/R/eml.R +++ b/R/eml.R @@ -151,6 +151,19 @@ sysmeta_to_eml_other_entity <- function(sysmeta) { lapply(sysmeta, work) } + +#' This function is deprecated. See \link{sysmeta_to_eml_other_entity}. +#' +#' @param sysmeta (SystemMetadata) +#' +#' @return +#' +sysmeta_to_other_entity <- function(sysmeta) { + .Deprecated("sysmeta_to_eml_other_entity", + package = "arcticdtautils", + old = "sysmeta_to_other_entity") +} + #' Create an EML physical object from System Metadata #' #' This function creates a pre-canned EML physical object from what's in the From eb726aa1abc9348f91efc60d903816a2219e855c Mon Sep 17 00:00:00 2001 From: eodean Date: Tue, 6 Feb 2018 11:41:34 -0800 Subject: [PATCH 086/318] Not optional. --- R/helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/helpers.R b/R/helpers.R index 5d27539..9ff5789 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -252,7 +252,7 @@ create_dummy_attributes_dataframe <- function(numberAttributes, factors=NULL) { #' Create dummy enumeratedDomain data frame #' -#' @param factors (character) Optional vector of factor names to include. +#' @param factors (character) Vector of factor names to include. #' #' @return #' @export From e8f59be7dc7ef6246016b8d8aff9ec2f3b7016d2 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Tue, 6 Feb 2018 13:57:40 -0800 Subject: [PATCH 087/318] fixing rd errors --- R/access.R | 3 ++- R/dataone.R | 3 ++- R/eml.R | 7 ++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/R/access.R b/R/access.R index 3585686..65538a2 100644 --- a/R/access.R +++ b/R/access.R @@ -20,7 +20,8 @@ #'\dontrun{ #' cn <- CNode("STAGING2") #' mn <- getMNode(cn,"urn:node:mnTestKNB") -#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") #' set_rights_holder(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX") #'} set_rights_holder <- function(mn, pids, subject) { diff --git a/R/dataone.R b/R/dataone.R index 663a2e5..094535c 100644 --- a/R/dataone.R +++ b/R/dataone.R @@ -137,7 +137,8 @@ get_mn_base_url <- function(mn) { #'\dontrun{ #' cn <- CNode('STAGING2') #' mn <- getMNode(cn,"urn:node:mnTestKNB") -#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") #' is_authorized(mn, pids, "write") #'} is_authorized <- function(node, ids, action) { diff --git a/R/eml.R b/R/eml.R index c8bbd18..7707ef2 100644 --- a/R/eml.R +++ b/R/eml.R @@ -273,7 +273,8 @@ get_doc_id <- function(sysmeta) { #' @examples #' \dontrun{ #' eml <- read_eml("~/Documents/metadata.xml") -#' eml <- add_methods_step(eml, "Field Sampling", "Samples were collected using a niskin water sampler.") +#' eml <- add_methods_step(eml, "Field Sampling", "Samples were +#' collected using a niskin water sampler.") #' } add_methods_step <- function(doc, title, description) { stopifnot(is(doc, "eml")) @@ -496,12 +497,12 @@ eml_associated_party <- function(...) { #' See \code{\link{eml_party}} for details. #' #' @param ... Arguments passed on to eml_party -#' +#' @param role (character) Personnel role, eg "principalInvestigator" #' @return (personnel) The new personnel #' @export #' #' @examples -#' eml_personnel("test", "user", email = "test@user.com", role = "Principal Investigator") +#' eml_personnel("test", "user", email = "test@user.com", role = "principalInvestigator") eml_personnel <- function(role = NULL, ...) { if(is.null(role)) { stop(call. = FALSE, From be8d4096eb77480fa5aa6f6a77f4b3cfc02d7439 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Tue, 6 Feb 2018 14:01:41 -0800 Subject: [PATCH 088/318] fixing too long lines --- R/editing.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/editing.R b/R/editing.R index 0692088..d2cf9d3 100644 --- a/R/editing.R +++ b/R/editing.R @@ -562,7 +562,8 @@ publish_update <- function(mn, #' mn <- getMNode(cn,"urn:node:mnTestKNB") #' #' meta_pid <- 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe' -#' dat_pid <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +#' dat_pid <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', +#' 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') #' #' #' create_resource_map(mn, metadata_pid = meta_pid, data_pids = dat_pid) @@ -654,7 +655,8 @@ create_resource_map <- function(mn, #' #' rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" #' meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" -#' data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") #' #' #' rm_new <- update_resource_map(mn, rm_pid, meta_pid, data_pids) From e9ea2b26cd85ba80b800e3c352ea87acb4c6c6ee Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Tue, 6 Feb 2018 14:26:21 -0800 Subject: [PATCH 089/318] more fixing of RD warnings --- R/access.R | 18 ++++++++++++------ R/editing.R | 3 ++- R/eml.R | 12 +++++------- R/interactive.R | 3 +++ R/inventory.R | 2 +- R/modify_metadata.R | 4 +--- R/packaging.R | 2 +- R/util.R | 5 +++-- man/add_methods_step.Rd | 3 ++- man/create_resource_map.Rd | 3 ++- man/eml_personnel.Rd | 4 +++- man/eml_project.Rd | 8 ++++---- man/find_newest_object.Rd | 2 +- man/fix_bad_topic.Rd | 5 +---- man/insert_package.Rd | 2 -- man/inv_load_identifiers.Rd | 2 +- man/is_authorized.Rd | 3 ++- man/object_exists.Rd | 3 ++- man/publish_update.Rd | 3 ++- man/remove_public_read.Rd | 3 ++- man/set_access.Rd | 6 ++++-- man/set_public_read.Rd | 3 ++- man/set_rights_and_access.Rd | 6 ++++-- man/set_rights_holder.Rd | 3 ++- man/sysmeta_to_other_entity.Rd | 5 +---- man/update_package.Rd | 2 ++ man/update_resource_map.Rd | 3 ++- man/view_packages.Rd | 3 +++ 28 files changed, 70 insertions(+), 51 deletions(-) diff --git a/R/access.R b/R/access.R index 65538a2..41d1a2c 100644 --- a/R/access.R +++ b/R/access.R @@ -101,8 +101,10 @@ set_rights_holder <- function(mn, pids, subject) { #'\dontrun{ #' cn <- CNode("STAGING2") #' mn <- getMNode(cn,"urn:node:mnTestKNB") -#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") -#' set_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX", permissions = c("read", "write", "changePermission")) +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' set_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX", +#' permissions = c("read", "write", "changePermission")) #'} set_access <- function(mn, pids, subjects, permissions=c("read", "write", "changePermission")) { if (!is(mn, "MNode")) { @@ -172,7 +174,8 @@ set_access <- function(mn, pids, subjects, permissions=c("read", "write", "chang #'\dontrun{ #' cn <- CNode("STAGING2") #' mn <- getMNode(cn,"urn:node:mnTestKNB") -#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") #' set_public_read(mn, pids) #'} set_public_read <- function(mn, pids) { @@ -190,7 +193,8 @@ set_public_read <- function(mn, pids) { #'\dontrun{ #' cn <- CNode("STAGING2") #' mn <- getMNode(cn,"urn:node:mnTestKNB") -#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") #' remove_public_read(mn, pids) #'} remove_public_read <- function(mn, pids) { @@ -276,8 +280,10 @@ remove_public_read <- function(mn, pids) { #'\dontrun{ #' cn <- CNode("STAGING2") #' mn <- getMNode(cn,"urn:node:mnTestKNB") -#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") -#' set_rights_and_access(mn, pids, "http://orcid.org/0000-000X-XXXX-XXXX", permissions = c("read", "write", "changePermission")) +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' set_rights_and_access(mn, pids, "http://orcid.org/0000-000X-XXXX-XXXX", +#' permissions = c("read", "write", "changePermission")) #'} set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "write", "changePermission")) { if (!is(mn, "MNode")) { diff --git a/R/editing.R b/R/editing.R index d2cf9d3..4af200f 100644 --- a/R/editing.R +++ b/R/editing.R @@ -258,7 +258,8 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) #' #' rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" #' meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" -#' data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") #' #' meta_path <- "/home/Documents/myMetadata.xml" #' diff --git a/R/eml.R b/R/eml.R index 7707ef2..92a4595 100644 --- a/R/eml.R +++ b/R/eml.R @@ -99,9 +99,7 @@ sysmeta_to_eml_other_entity <- function(sysmeta) { #' This function is deprecated. See \link{sysmeta_to_eml_other_entity}. #' -#' @param sysmeta (SystemMetadata) -#' -#' @return +#' @param sysmeta (SystemMetadata) A SystemMetadata object #' sysmeta_to_other_entity <- function(sysmeta) { .Deprecated("sysmeta_to_eml_other_entity", @@ -566,10 +564,10 @@ eml_individual_name <- function(given_names=NULL, sur_name) { #' @export #' #' @examples -#' eml_project("Some title", -#' c(eml_personnel("Bryce", "Mecum", role = "Test")), -#' c("Abstract paragraph 1", "Abstract paragraph 2"), -#' "#1 Best Scientist Award") +#' proj <- eml_project("Some title", +#' c(eml_personnel("Bryce", "Mecum", role = "principalInvestigator")), +#' c("Abstract paragraph 1", "Abstract paragraph 2"), +#' "Funding Agency: Award Number 12345") eml_project <- function(title, personnelList, abstract = NULL, diff --git a/R/interactive.R b/R/interactive.R index fac5d55..828d651 100644 --- a/R/interactive.R +++ b/R/interactive.R @@ -1,7 +1,10 @@ #' interactive.R #' Author: Bryce Mecum #' +#' #' Functions for interactive viewing of the Inventory and other objects. +#' +#' @param inventory (character) An inventory view_packages <- function(inventory) { stopifnot(is.data.frame(inventory), diff --git a/R/inventory.R b/R/inventory.R index 5163a86..7a6074c 100644 --- a/R/inventory.R +++ b/R/inventory.R @@ -217,7 +217,7 @@ inv_load_dois <- function(inventory, path) { #' removes the column 'identifier' from inventory before doing a #' left join. #' -#' @param path (character) Path(s) to files containing identifiers. +#' @param paths (character) Path(s) to files containing identifiers. #' @param inventory (data.frame) An inventory. #' #' @return (data.frame) An inventory. diff --git a/R/modify_metadata.R b/R/modify_metadata.R index cec7b50..7c43eca 100644 --- a/R/modify_metadata.R +++ b/R/modify_metadata.R @@ -184,9 +184,7 @@ fix_bad_enum <- function(path) { #' oceans #' #' -#' @param path -#' -#' @return +#' @param path (character) Path #' fix_bad_topic <- function(path) { diff --git a/R/packaging.R b/R/packaging.R index cf962eb..ba35f52 100644 --- a/R/packaging.R +++ b/R/packaging.R @@ -83,7 +83,6 @@ insert_file <- function(inventory, file, env=NULL) { #' #' @param inventory (data.frame) An Inventory. #' @param package (character) The package identifier. -#' @param child_pids (character) Resource Map PIDs for child Data Packages. #' @param env (list) Environment variables. #' #' @return A list containing PIDs and whether objects were inserted. (list) @@ -782,6 +781,7 @@ determine_child_pids <- function(inventory, package) { #' #' @param inventory (data.frame) An inventory. #' @param package (character) The package identifier. +#' @param env (character) Environment #' #' @return TRUE or FALSE depending on sucess (logical) diff --git a/R/util.R b/R/util.R index 6611e19..4a0a375 100644 --- a/R/util.R +++ b/R/util.R @@ -244,7 +244,8 @@ log_message <- function(message=NULL) { #' # Set environment #' cn <- CNode("STAGING2") #' mn <- getMNode(cn,"urn:node:mnTestKNB") -#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") #' #' object_exists(mn, pids) #' } @@ -941,7 +942,7 @@ find_newest_resource_map <- function(node, pid, rows = 1000) { #' Find the newest (by dateUploaded) object within a given set of objects. #' #' @param node (MNode | CNode) The node to query -#' @param identiifers (character) One or more identifiers +#' @param identifers (character) One or more identifiers #' @param rows (numeric) Optional. Specify the size of the query result set. #' #' @return (character) The PID of the newest object. In the case of a tie (very diff --git a/man/add_methods_step.Rd b/man/add_methods_step.Rd index 033d64b..4ac43fb 100644 --- a/man/add_methods_step.Rd +++ b/man/add_methods_step.Rd @@ -22,6 +22,7 @@ Adds a step to the methods document \examples{ \dontrun{ eml <- read_eml("~/Documents/metadata.xml") -eml <- add_methods_step(eml, "Field Sampling", "Samples were collected using a niskin water sampler.") +eml <- add_methods_step(eml, "Field Sampling", "Samples were +collected using a niskin water sampler.") } } diff --git a/man/create_resource_map.Rd b/man/create_resource_map.Rd index 0a54b53..d012e11 100644 --- a/man/create_resource_map.Rd +++ b/man/create_resource_map.Rd @@ -41,7 +41,8 @@ cn <- CNode('STAGING2') mn <- getMNode(cn,"urn:node:mnTestKNB") meta_pid <- 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe' -dat_pid <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +dat_pid <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', +'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') create_resource_map(mn, metadata_pid = meta_pid, data_pids = dat_pid) diff --git a/man/eml_personnel.Rd b/man/eml_personnel.Rd index a749e9d..2fb6acd 100644 --- a/man/eml_personnel.Rd +++ b/man/eml_personnel.Rd @@ -7,6 +7,8 @@ eml_personnel(role = NULL, ...) } \arguments{ +\item{role}{(character) Personnel role, eg "principalInvestigator"} + \item{...}{Arguments passed on to eml_party} } \value{ @@ -16,5 +18,5 @@ eml_personnel(role = NULL, ...) See \code{\link{eml_party}} for details. } \examples{ -eml_personnel("test", "user", email = "test@user.com", role = "Principal Investigator") +eml_personnel("test", "user", email = "test@user.com", role = "principalInvestigator") } diff --git a/man/eml_project.Rd b/man/eml_project.Rd index 5795212..fa862ff 100644 --- a/man/eml_project.Rd +++ b/man/eml_project.Rd @@ -34,8 +34,8 @@ fully fleshed out. Need to pass these objects in directly if you want to use them. } \examples{ -eml_project("Some title", - c(eml_personnel("Bryce", "Mecum", role = "Test")), - c("Abstract paragraph 1", "Abstract paragraph 2"), - "#1 Best Scientist Award") +proj <- eml_project("Some title", + c(eml_personnel("Bryce", "Mecum", role = "principalInvestigator")), + c("Abstract paragraph 1", "Abstract paragraph 2"), + "Funding Agency: Award Number 12345") } diff --git a/man/find_newest_object.Rd b/man/find_newest_object.Rd index 0812cdb..5655e69 100644 --- a/man/find_newest_object.Rd +++ b/man/find_newest_object.Rd @@ -11,7 +11,7 @@ find_newest_object(node, identifiers, rows = 1000) \item{rows}{(numeric) Optional. Specify the size of the query result set.} -\item{identiifers}{(character) One or more identifiers} +\item{identifers}{(character) One or more identifiers} } \value{ (character) The PID of the newest object. In the case of a tie (very diff --git a/man/fix_bad_topic.Rd b/man/fix_bad_topic.Rd index 82fded5..2d692d0 100644 --- a/man/fix_bad_topic.Rd +++ b/man/fix_bad_topic.Rd @@ -8,10 +8,7 @@ inside a single topicCategory element.} fix_bad_topic(path) } \arguments{ -\item{path}{} -} -\value{ - +\item{path}{(character) Path} } \description{ Example: diff --git a/man/insert_package.Rd b/man/insert_package.Rd index 5c9ec1d..5a907ad 100644 --- a/man/insert_package.Rd +++ b/man/insert_package.Rd @@ -12,8 +12,6 @@ insert_package(inventory, package, env = NULL) \item{package}{(character) The package identifier.} \item{env}{(list) Environment variables.} - -\item{child_pids}{(character) Resource Map PIDs for child Data Packages.} } \value{ A list containing PIDs and whether objects were inserted. (list) diff --git a/man/inv_load_identifiers.Rd b/man/inv_load_identifiers.Rd index e1bab1f..655bf78 100644 --- a/man/inv_load_identifiers.Rd +++ b/man/inv_load_identifiers.Rd @@ -11,7 +11,7 @@ inv_load_identifiers(inventory, paths) \arguments{ \item{inventory}{(data.frame) An inventory.} -\item{path}{(character) Path(s) to files containing identifiers.} +\item{paths}{(character) Path(s) to files containing identifiers.} } \value{ (data.frame) An inventory. diff --git a/man/is_authorized.Rd b/man/is_authorized.Rd index 6831971..ff82f15 100644 --- a/man/is_authorized.Rd +++ b/man/is_authorized.Rd @@ -23,7 +23,8 @@ Check if the user has authorization to perform an action on an object. \dontrun{ cn <- CNode('STAGING2') mn <- getMNode(cn,"urn:node:mnTestKNB") -pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") is_authorized(mn, pids, "write") } } diff --git a/man/object_exists.Rd b/man/object_exists.Rd index 0e756ea..5b75c2b 100644 --- a/man/object_exists.Rd +++ b/man/object_exists.Rd @@ -23,7 +23,8 @@ provided member node. # Set environment cn <- CNode("STAGING2") mn <- getMNode(cn,"urn:node:mnTestKNB") -pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") object_exists(mn, pids) } diff --git a/man/publish_update.Rd b/man/publish_update.Rd index 747b10e..a3dcde2 100644 --- a/man/publish_update.Rd +++ b/man/publish_update.Rd @@ -74,7 +74,8 @@ mn <- getMNode(cn,"urn:node:mnTestKNB") rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" -data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") meta_path <- "/home/Documents/myMetadata.xml" diff --git a/man/remove_public_read.Rd b/man/remove_public_read.Rd index 1b27566..52045a8 100644 --- a/man/remove_public_read.Rd +++ b/man/remove_public_read.Rd @@ -18,7 +18,8 @@ Remove public access on a set of objects. \dontrun{ cn <- CNode("STAGING2") mn <- getMNode(cn,"urn:node:mnTestKNB") -pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") remove_public_read(mn, pids) } } diff --git a/man/set_access.Rd b/man/set_access.Rd index f6e46b1..421cd2b 100644 --- a/man/set_access.Rd +++ b/man/set_access.Rd @@ -27,7 +27,9 @@ and moves on. System Metadata are only updated when a change was needed. \dontrun{ cn <- CNode("STAGING2") mn <- getMNode(cn,"urn:node:mnTestKNB") -pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") -set_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX", permissions = c("read", "write", "changePermission")) +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", + "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +set_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX", + permissions = c("read", "write", "changePermission")) } } diff --git a/man/set_public_read.Rd b/man/set_public_read.Rd index efa39e6..5adbd8c 100644 --- a/man/set_public_read.Rd +++ b/man/set_public_read.Rd @@ -18,7 +18,8 @@ Set public access on a set of objects. \dontrun{ cn <- CNode("STAGING2") mn <- getMNode(cn,"urn:node:mnTestKNB") -pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", + "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") set_public_read(mn, pids) } } diff --git a/man/set_rights_and_access.Rd b/man/set_rights_and_access.Rd index b821034..31028d0 100644 --- a/man/set_rights_and_access.Rd +++ b/man/set_rights_and_access.Rd @@ -29,7 +29,9 @@ needed. \dontrun{ cn <- CNode("STAGING2") mn <- getMNode(cn,"urn:node:mnTestKNB") -pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") -set_rights_and_access(mn, pids, "http://orcid.org/0000-000X-XXXX-XXXX", permissions = c("read", "write", "changePermission")) +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", + "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +set_rights_and_access(mn, pids, "http://orcid.org/0000-000X-XXXX-XXXX", + permissions = c("read", "write", "changePermission")) } } diff --git a/man/set_rights_holder.Rd b/man/set_rights_holder.Rd index c19d5db..fb0e480 100644 --- a/man/set_rights_holder.Rd +++ b/man/set_rights_holder.Rd @@ -26,7 +26,8 @@ the provided system metadata document on the given Member Node. \dontrun{ cn <- CNode("STAGING2") mn <- getMNode(cn,"urn:node:mnTestKNB") -pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") set_rights_holder(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX") } } diff --git a/man/sysmeta_to_other_entity.Rd b/man/sysmeta_to_other_entity.Rd index f3d62d6..c0b1578 100644 --- a/man/sysmeta_to_other_entity.Rd +++ b/man/sysmeta_to_other_entity.Rd @@ -7,10 +7,7 @@ sysmeta_to_other_entity(sysmeta) } \arguments{ -\item{sysmeta}{(SystemMetadata)} -} -\value{ - +\item{sysmeta}{(SystemMetadata) A SystemMetadata object} } \description{ This function is deprecated. See \link{sysmeta_to_eml_other_entity}. diff --git a/man/update_package.Rd b/man/update_package.Rd index daaee9c..e30259e 100644 --- a/man/update_package.Rd +++ b/man/update_package.Rd @@ -10,6 +10,8 @@ update_package(inventory, package, env = NULL) \item{inventory}{(data.frame) An inventory.} \item{package}{(character) The package identifier.} + +\item{env}{(character) Environment} } \value{ TRUE or FALSE depending on sucess (logical) diff --git a/man/update_resource_map.Rd b/man/update_resource_map.Rd index fb7f1bf..96e4ac4 100644 --- a/man/update_resource_map.Rd +++ b/man/update_resource_map.Rd @@ -67,7 +67,8 @@ mn <- getMNode(cn,"urn:node:mnTestKNB") rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" -data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") rm_new <- update_resource_map(mn, rm_pid, meta_pid, data_pids) diff --git a/man/view_packages.Rd b/man/view_packages.Rd index 632ac5e..ccde86b 100644 --- a/man/view_packages.Rd +++ b/man/view_packages.Rd @@ -7,6 +7,9 @@ Author: Bryce Mecum } \usage{ view_packages(inventory) } +\arguments{ +\item{inventory}{(character) An inventory} +} \description{ Functions for interactive viewing of the Inventory and other objects. } From 70ca802ee09cc1334f421461c2cbea67654d87f3 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Tue, 6 Feb 2018 14:40:37 -0800 Subject: [PATCH 090/318] more RD fixing --- R/dataone.R | 4 ++-- R/editing.R | 2 +- R/inventory.R | 2 +- R/marking.R | 1 + R/modify_metadata.R | 4 ++-- R/packaging.R | 2 +- R/sysmeta.R | 2 +- R/util.R | 4 ++-- man/add_admin_group_access.Rd | 2 +- man/find_newest_object.Rd | 4 ++-- man/fix_bad_enum.Rd | 2 +- man/generate_resource_map_pid.Rd | 2 +- man/get_mn_base_url.Rd | 2 +- man/inv_load_sizes.Rd | 4 ++-- man/is_token_expired.Rd | 3 +++ man/object_exists.Rd | 2 +- man/publish_object.Rd | 2 ++ man/publish_update.Rd | 2 -- man/test_has_abstract.Rd | 3 +++ man/theme_packages.Rd | 2 ++ 20 files changed, 30 insertions(+), 21 deletions(-) diff --git a/R/dataone.R b/R/dataone.R index 094535c..40ab5c9 100644 --- a/R/dataone.R +++ b/R/dataone.R @@ -60,7 +60,7 @@ get_token <- function(node) { #' Determine whether the set token is expired. -#' +#' @param node (character) A member node instance #' @return (logical) #' @export #' @@ -104,7 +104,7 @@ is_token_expired <- function(node) { #' Get the base URL of the Member Node. #' -#' @param mn +#' @param mn (character) A mn instance #' #' @return (character) The URL #' @export diff --git a/R/editing.R b/R/editing.R index 4af200f..6a91ee3 100644 --- a/R/editing.R +++ b/R/editing.R @@ -17,6 +17,7 @@ #' @param pid (character) Optional. The PID to use with the object. #' @param sid (character) Optional. The SID to use with the new object. #' @param clone_pid (character) PID of objet to clone System Metadata from +#' @param public (logical) TRUE/FALSE Whether object should be given public read access. #' #' @import dataone #' @import datapack @@ -243,7 +244,6 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) #' This applies to the new metadata PID and its resource map and data object. #' access policies are not affected. #' @param check_first (logical) Optional. Whether to check the PIDs passed in as aruments exist on the MN before continuing. Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements. -#' @param parent_data_pids (character) Optional. Data pids of a parent package to be updated. #' @return pids (character) Named character vector of pids in the data package, including pids for the metadata, resource map, and data objects. #' #' @import dataone diff --git a/R/inventory.R b/R/inventory.R index 7a6074c..da090d4 100644 --- a/R/inventory.R +++ b/R/inventory.R @@ -99,7 +99,7 @@ inv_load_files <- function(inventory, path, filter=TRUE) { #' 'size_bytes' from inventory before doing a left join. #' #' @param path (character) Path to a file containing sizes. -#' @param (data.frame) inventory A \code{data.frame}. +#' @param inventory (data.frame) inventory A \code{data.frame}. #' #' @return (data.frame) An inventory #' diff --git a/R/marking.R b/R/marking.R index d8dc40f..c67670b 100644 --- a/R/marking.R +++ b/R/marking.R @@ -32,6 +32,7 @@ #' #' #' @param inventory (data.frame) An Inventory. +#' @param nfiles_cutoff (integer) Number of cutoff files #' #' @return (data.frame) An Inventory. diff --git a/R/modify_metadata.R b/R/modify_metadata.R index 7c43eca..51c011e 100644 --- a/R/modify_metadata.R +++ b/R/modify_metadata.R @@ -21,7 +21,7 @@ # sapply(bad_enums, fix_bad_enums) - +#' @param path (character) a path test_has_abstract <- function(path) { stopifnot(file.exists(path)) @@ -143,7 +143,7 @@ test_has_bad_enum <- function(path) { #' #' 'oceans' != ' oceans ' #' -#' @param path +#' @param path (character) a path #' fix_bad_enum <- function(path) { diff --git a/R/packaging.R b/R/packaging.R index ba35f52..2646795 100644 --- a/R/packaging.R +++ b/R/packaging.R @@ -483,7 +483,7 @@ generate_resource_map <- function(metadata_pid, #' Generate a PID for a new resource map by appending "resource_map_" to it. #' -#' @param metadata_pid +#' @param metadata_pid (character) A metadata pid #' generate_resource_map_pid <- function(metadata_pid) { diff --git a/R/sysmeta.R b/R/sysmeta.R index 9e36535..698700a 100644 --- a/R/sysmeta.R +++ b/R/sysmeta.R @@ -33,7 +33,7 @@ add_access_rules <- function(sysmeta) { #' Adds access to the given System Metadata for the arctic-data-admins group #' -#' @param sysmeta +#' @param sysmeta (sysmeta) System Metadata object #' add_admin_group_access <- function(sysmeta) { if (!inherits(sysmeta, "SystemMetadata")) { diff --git a/R/util.R b/R/util.R index 4a0a375..b39f90b 100644 --- a/R/util.R +++ b/R/util.R @@ -234,7 +234,7 @@ log_message <- function(message=NULL) { #' provided member node. #' #' @param node (MNode|CNode) The Node to query. -#' @param pid (character) PID to check the existence of. +#' @param pids (character) PID to check the existence of. #' #' @return (logical) Whether the object exists. #' @export @@ -942,7 +942,7 @@ find_newest_resource_map <- function(node, pid, rows = 1000) { #' Find the newest (by dateUploaded) object within a given set of objects. #' #' @param node (MNode | CNode) The node to query -#' @param identifers (character) One or more identifiers +#' @param identifiers (character) One or more identifiers #' @param rows (numeric) Optional. Specify the size of the query result set. #' #' @return (character) The PID of the newest object. In the case of a tie (very diff --git a/man/add_admin_group_access.Rd b/man/add_admin_group_access.Rd index 767af4c..f2f3faa 100644 --- a/man/add_admin_group_access.Rd +++ b/man/add_admin_group_access.Rd @@ -7,7 +7,7 @@ add_admin_group_access(sysmeta) } \arguments{ -\item{sysmeta}{} +\item{sysmeta}{(sysmeta) System Metadata object} } \description{ Adds access to the given System Metadata for the arctic-data-admins group diff --git a/man/find_newest_object.Rd b/man/find_newest_object.Rd index 5655e69..ade4b1f 100644 --- a/man/find_newest_object.Rd +++ b/man/find_newest_object.Rd @@ -9,9 +9,9 @@ find_newest_object(node, identifiers, rows = 1000) \arguments{ \item{node}{(MNode | CNode) The node to query} -\item{rows}{(numeric) Optional. Specify the size of the query result set.} +\item{identifiers}{(character) One or more identifiers} -\item{identifers}{(character) One or more identifiers} +\item{rows}{(numeric) Optional. Specify the size of the query result set.} } \value{ (character) The PID of the newest object. In the case of a tie (very diff --git a/man/fix_bad_enum.Rd b/man/fix_bad_enum.Rd index 711e7ea..e170632 100644 --- a/man/fix_bad_enum.Rd +++ b/man/fix_bad_enum.Rd @@ -7,7 +7,7 @@ fix_bad_enum(path) } \arguments{ -\item{path}{} +\item{path}{(character) a path} } \description{ This is the case where the ISO schema says what's inside a diff --git a/man/generate_resource_map_pid.Rd b/man/generate_resource_map_pid.Rd index 1ba3679..b22d24c 100644 --- a/man/generate_resource_map_pid.Rd +++ b/man/generate_resource_map_pid.Rd @@ -7,7 +7,7 @@ generate_resource_map_pid(metadata_pid) } \arguments{ -\item{metadata_pid}{} +\item{metadata_pid}{(character) A metadata pid} } \description{ Generate a PID for a new resource map by appending "resource_map_" to it. diff --git a/man/get_mn_base_url.Rd b/man/get_mn_base_url.Rd index f015c69..6611501 100644 --- a/man/get_mn_base_url.Rd +++ b/man/get_mn_base_url.Rd @@ -7,7 +7,7 @@ get_mn_base_url(mn) } \arguments{ -\item{mn}{} +\item{mn}{(character) A mn instance} } \value{ (character) The URL diff --git a/man/inv_load_sizes.Rd b/man/inv_load_sizes.Rd index 703f906..e9a6dce 100644 --- a/man/inv_load_sizes.Rd +++ b/man/inv_load_sizes.Rd @@ -8,9 +8,9 @@ inv_load_sizes(inventory, path) } \arguments{ -\item{path}{(character) Path to a file containing sizes.} +\item{inventory}{(data.frame) inventory A \code{data.frame}.} -\item{(data.frame)}{inventory A \code{data.frame}.} +\item{path}{(character) Path to a file containing sizes.} } \value{ (data.frame) An inventory diff --git a/man/is_token_expired.Rd b/man/is_token_expired.Rd index 03e2584..7a4c5a0 100644 --- a/man/is_token_expired.Rd +++ b/man/is_token_expired.Rd @@ -6,6 +6,9 @@ \usage{ is_token_expired(node) } +\arguments{ +\item{node}{(character) A member node instance} +} \value{ (logical) } diff --git a/man/object_exists.Rd b/man/object_exists.Rd index 5b75c2b..201b1ad 100644 --- a/man/object_exists.Rd +++ b/man/object_exists.Rd @@ -9,7 +9,7 @@ object_exists(node, pids) \arguments{ \item{node}{(MNode|CNode) The Node to query.} -\item{pid}{(character) PID to check the existence of.} +\item{pids}{(character) PID to check the existence of.} } \value{ (logical) Whether the object exists. diff --git a/man/publish_object.Rd b/man/publish_object.Rd index 3291908..0c42f28 100644 --- a/man/publish_object.Rd +++ b/man/publish_object.Rd @@ -19,6 +19,8 @@ publish_object(mn, path, format_id = NULL, pid = NULL, sid = NULL, \item{sid}{(character) Optional. The SID to use with the new object.} \item{clone_pid}{(character) PID of objet to clone System Metadata from} + +\item{public}{(logical) TRUE/FALSE Whether object should be given public read access.} } \value{ pid (character). The PID of the published object. diff --git a/man/publish_update.Rd b/man/publish_update.Rd index a3dcde2..28e71b0 100644 --- a/man/publish_update.Rd +++ b/man/publish_update.Rd @@ -40,8 +40,6 @@ This applies to the new metadata PID and its resource map and data object. access policies are not affected.} \item{check_first}{(logical) Optional. Whether to check the PIDs passed in as aruments exist on the MN before continuing. Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements.} - -\item{parent_data_pids}{(character) Optional. Data pids of a parent package to be updated.} } \value{ pids (character) Named character vector of pids in the data package, including pids for the metadata, resource map, and data objects. diff --git a/man/test_has_abstract.Rd b/man/test_has_abstract.Rd index 8c63190..dd86895 100644 --- a/man/test_has_abstract.Rd +++ b/man/test_has_abstract.Rd @@ -7,6 +7,9 @@ Author: Bryce Mecum } \usage{ test_has_abstract(path) } +\arguments{ +\item{path}{(character) a path} +} \description{ Functions related to fixing invalid ISO metadata. } diff --git a/man/theme_packages.Rd b/man/theme_packages.Rd index 1d391ad..e95e947 100644 --- a/man/theme_packages.Rd +++ b/man/theme_packages.Rd @@ -9,6 +9,8 @@ theme_packages(inventory, nfiles_cutoff = 100) } \arguments{ \item{inventory}{(data.frame) An Inventory.} + +\item{nfiles_cutoff}{(integer) Number of cutoff files} } \value{ (data.frame) An Inventory. From 8be1379a9aeeac1d1294770b5998e5316c7c0476 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 6 Feb 2018 14:44:20 -0900 Subject: [PATCH 091/318] Remove warn_if_outdated as its no longer used --- R/zzz.R | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index e5aebe0..1285192 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -25,26 +25,3 @@ load_d1_formats_list <- function(url) { D1_FORMATS <<- vapply(format_id_nodes, function(x) { xml2::xml_text(x) }, "") } - -warn_if_outdated <- function() { - installed_version <- utils::packageVersion("arcticdatautils") - - req <- httr::GET("https://api.github.com/repos/nceas/arcticdatautils/releases/latest", - httr::add_headers("Accept", "application/vnd.github.v3+json")) - - release <- httr::content(req) - - if (httr::status_code(req) != 200) { - warning(paste0("The request to check whether the version of arcticdatautils you have installed is the latest, the response from GitHub's API failed. The response was: \n\n", req)) - return(invisible()) - } - - if (!("tag_name" %in% names(release))) { - warning(paste0("While checking to see if your installed version of arcticdatautils is the latest available, the response from GitHub's API was not as expected so checking was not done.")) - return(invisible()) - } - - if (!stringr::str_detect(release$tag_name, paste0("^v", stringr::str_replace_all(as.character(installed_version), "\\.", "\\\\."), "$"))) { - warning(paste0("You do not have the most recent version of arcticdatautils installed. This is just a reminder to update next time you get the chance. Visit https://github.com/NCEAS/arcticdatautils/releases to find the latest release.")) - } -} From 32d9c13dd7c40113339d692e987d50392d8eff2a Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 6 Feb 2018 16:01:50 -0900 Subject: [PATCH 092/318] Fix minor issues in DESCRIPTION title/description fields --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6d5d52e..caef707 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: arcticdatautils -Title: Arctic Data Utilities +Title: Utilities for the Arctic Data Center Version: 0.6.3.9000 Authors@R: c( person("Bryce", "Mecum", email = "mecum@nceas.ucsb.edu", role = c("aut", "cre")), @@ -8,8 +8,8 @@ Authors@R: c( person("Dominic", "Mullen", email = "dmullen17@gmail.com", role = "ctb"), person("Emily", "O'Dean", email="eodean10@gmail.com", role = "ctb") ) -Description: This package provides a set of utility methods for uploading - and editing data on the Arctic Data Catalog. +Description: A set of utilites for working with the Arctic Data Center + (https://arcticdata.io) Depends: R (>= 3.2.3) Imports: From 69f18062e5fccc49af303ebe78f40ea47b1d433a Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 6 Feb 2018 16:04:27 -0900 Subject: [PATCH 093/318] Re-factor how format_id checking is done The old way was super hacky and used global assignment (<<-). The call to the CN is super fast and now I just call it fresh each time the function is called. --- R/editing.R | 12 ++------ R/formats.R | 52 +++++++++++++++++++++++++++++++++++ R/zzz.R | 27 ------------------ man/check_format.Rd | 18 ++++++++++++ man/get_formats.Rd | 20 ++++++++++++++ tests/testthat/test_editing.R | 22 +++++++++++++++ tests/testthat/test_formats.R | 6 ++++ 7 files changed, 120 insertions(+), 37 deletions(-) create mode 100644 R/formats.R delete mode 100644 R/zzz.R create mode 100644 man/check_format.Rd create mode 100644 man/get_formats.Rd create mode 100644 tests/testthat/test_formats.R diff --git a/R/editing.R b/R/editing.R index 6a91ee3..0f9a779 100644 --- a/R/editing.R +++ b/R/editing.R @@ -53,11 +53,7 @@ publish_object <- function(mn, warning(paste0("No format_id was specified so a guess was made based upon the file extension: ", format_id, ".")) } - # Check if format ID is valid - if (!is.null(D1_FORMATS) && !(format_id %in% D1_FORMATS)) { - stop(call. = FALSE, - paste0("The provided format_id of '", format_id, "' is not a valid format ID. Check what you entered against the list of format IDs on https://cn.dataone.org/cn/v2/formats. Note that this list is cached when arcticdatautils is loaded so if you haven't restarted your R session in a while you may have an outdated copy and restarting your session may fix this.")) - } + check_format(format_id) # Set up some variables for use later on ######################################## @@ -155,11 +151,7 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) warning(paste0("No format_id was specified so a guess was made based upon the file extension: ", format_id, ".")) } - # Check if format ID is valid - if (!is.null(D1_FORMATS) && !(format_id %in% D1_FORMATS)) { - stop(call. = FALSE, - paste0("The provided format_id of '", format_id, "' is not a valid format ID. Check what you entered against the list of format IDs on https://cn.dataone.org/cn/v2/formats. Note that this list is cached when arcticdatautils is loaded so if you haven't restarted your R session in a while you may have an outdated copy and restarting your session may fix this.")) - } + check_format(format_id) message(paste0("Updating object ", pid, " with the file at ", path, ".")) diff --git a/R/formats.R b/R/formats.R new file mode 100644 index 0000000..cc98c8d --- /dev/null +++ b/R/formats.R @@ -0,0 +1,52 @@ +#' Get the list of valid formats from DataONE +#' +#' Note that this function is intended to return even if the request to the CN +#' fails. This is so other functions can call continue even if the request +#' fails. +#' +#' @param url (character) The listFormats endpoint. Defaults to the production +#' CN +#' +#' @return (character) +get_formats <- function(url = "https://cn.dataone.org/cn/v2/formats") { + req <- httr::GET(url) + + if (httr::status_code(req) != 200) { + warning(paste0("Failed to load an up-to-date list of format IDs from ", url, " because the request to the CN failed. Checking of format IDs is disabled.")) + return(vector("character")) + } + + formats_content <- httr::content(req, encoding = "UTF-8") + format_id_nodes <- xml2::xml_find_all(formats_content, "//formatId") + + if (length(format_id_nodes) == 0) { + return(vector("character")) + } + + vapply(format_id_nodes, function(x) { + xml2::xml_text(x) + }, + "") +} + +#' Check that the given format is valid +#' +#' Validity is determined by the given format being found in the list on +#' \url{https://cn.dataone.org/cn/v2/formats}. +#' +#' @param format (character) The format ID to check. +#' +#' @return (logical) Whether or not the format was valid. +check_format <- function(format) { + formats <- get_formats() + + if (!(format %in% formats)) + stop(call. = FALSE, + paste0("The provided format_id of '", + format, + "' is not a valid format ID. Check what you entered against ", + "the list of format IDs on ", + "https://cn.dataone.org/cn/v2/formats.")) + + invisible(TRUE) +} diff --git a/R/zzz.R b/R/zzz.R deleted file mode 100644 index 1285192..0000000 --- a/R/zzz.R +++ /dev/null @@ -1,27 +0,0 @@ -.onLoad <- function(libname, pkgname) { - load_d1_formats_list("https://cn.dataone.org/cn/v2/formats") - - invisible() -} - -# Warning: This function produces a side-effect of assigning into the package -# env with <<- to update the value of D1_FORMATS -D1_FORMATS <- NULL -load_d1_formats_list <- function(url) { - req <- httr::GET(url) - - if (httr::status_code(req) != 200) { - warning(paste0("Failed to load an up-to-date list of format IDs from ", url, " because the request to the CN failed. Checking of format IDs is disabled.")) - return(invisible()) - } - - formats_content <- httr::content(req, encoding = "UTF-8") - format_id_nodes <- xml2::xml_find_all(formats_content, "//formatId") - - if (length(format_id_nodes) == 0) { - warning(paste0("Failed to load an up-to-date list of format IDs from ", url, " because the response returned from the CN was not formatted as expected. Checking of format IDs is disabled.")) - return(invisible()) - } - - D1_FORMATS <<- vapply(format_id_nodes, function(x) { xml2::xml_text(x) }, "") -} diff --git a/man/check_format.Rd b/man/check_format.Rd new file mode 100644 index 0000000..172197c --- /dev/null +++ b/man/check_format.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/formats.R +\name{check_format} +\alias{check_format} +\title{Check that the given format is valid} +\usage{ +check_format(format) +} +\arguments{ +\item{format}{(character) The format ID to check.} +} +\value{ +(logical) Whether or not the format was valid. +} +\description{ +Validity is determined by the given format being found in the list on +\url{https://cn.dataone.org/cn/v2/formats}. +} diff --git a/man/get_formats.Rd b/man/get_formats.Rd new file mode 100644 index 0000000..179641d --- /dev/null +++ b/man/get_formats.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/formats.R +\name{get_formats} +\alias{get_formats} +\title{Get the list of valid formats from DataONE} +\usage{ +get_formats(url = "https://cn.dataone.org/cn/v2/formats") +} +\arguments{ +\item{url}{(character) The listFormats endpoint. Defaults to the production +CN} +} +\value{ +(character) +} +\description{ +Note that this function is intended to return even if the request to the CN +fails. This is so other functions can call continue even if the request +fails. +} diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index 3074b27..4b58f6b 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -237,3 +237,25 @@ test_that("publish_update removes the deprecated eml@access element", { new_eml <- EML::read_eml(updated_eml_path) expect_equal(0, length(new_eml@access@allow)) }) + +test_that("publishing an object with a valid format ID succeeds", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + tmp_path <- tempfile() + writeLines(LETTERS, tmp_path) + + expect_is(publish_object(mn, tmp_path, "text/plain"), "character") +}) + +test_that("publishing an object with an invalid format ID fails", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + tmp_path <- tempfile() + writeLines(LETTERS, tmp_path) + + expect_error(publish_object(mn, tmp_path, "asdf/asdf")) +}) diff --git a/tests/testthat/test_formats.R b/tests/testthat/test_formats.R new file mode 100644 index 0000000..98d3aee --- /dev/null +++ b/tests/testthat/test_formats.R @@ -0,0 +1,6 @@ +context("formats") + +test_that("valid formats are valid and invalid ones are not", { + expect_true(check_format("text/csv")) + expect_error(check_format("badformat")) +}) From b3c0e37e8bcb1d65aaa7fa8bb9830829b68a28d9 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 6 Feb 2018 16:05:10 -0900 Subject: [PATCH 094/318] Remove redundant load of the package in an example --- R/util.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/util.R b/R/util.R index b39f90b..22a596e 100644 --- a/R/util.R +++ b/R/util.R @@ -1085,7 +1085,6 @@ view_profile <- function(mn, subject, fields=c("identifier", "title")) { #' \dontrun{ #' # Create a package then check its indexing status #' library(dataone) -#' library(arcticdatautils) #' mn <- MNode(...) #' pkg <- create_dummy_package(mn) #' show_indexing_status(mn, pkg) From 45a861122ca291bcdd917280beba4c234baf3378 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 6 Feb 2018 16:05:21 -0900 Subject: [PATCH 095/318] Remove explicit file.remove call as its not needed --- tests/testthat/test_editing.R | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index 4b58f6b..c959cd3 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -149,7 +149,6 @@ test_that("we can publish an update to an object", { upd <- update_object(mn, old, tmp) }) - file.remove(tmp) sm <- dataone::getSystemMetadata(mn, upd) expect_equal(sm@fileName, basename(tmp)) From 198045475b125c97c701985ffae3e2ef6eb4be2a Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 6 Feb 2018 16:05:46 -0900 Subject: [PATCH 096/318] Fix regression from removing remove_public_access --- R/editing.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/editing.R b/R/editing.R index 0f9a779..d4e972b 100644 --- a/R/editing.R +++ b/R/editing.R @@ -454,7 +454,7 @@ publish_update <- function(mn, set_public_read(mn, data_pid) } } else { - metadata_updated_sysmeta <- remove_public_access(metadata_updated_sysmeta) + metadata_updated_sysmeta <- datapack::removeAccessRule(metadata_updated_sysmeta, "public", "read") } set_rights_holder(mn, metadata_pid, me) @@ -729,7 +729,7 @@ update_resource_map <- function(mn, if (public) { new_rm_sysmeta <- datapack::addAccessRule(new_rm_sysmeta, "public", "read") } else { - new_rm_sysmeta <- remove_public_access(new_rm_sysmeta) + new_rm_sysmeta <- datapack::removeAccessRule(new_rm_sysmeta, "public", "read") } # Update it From 4a027721401a3a221c7873a842f8099c68632272 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 6 Feb 2018 16:06:03 -0900 Subject: [PATCH 097/318] Update show_indexing_status docs --- man/show_indexing_status.Rd | 1 - 1 file changed, 1 deletion(-) diff --git a/man/show_indexing_status.Rd b/man/show_indexing_status.Rd index bb922ce..ac652fd 100644 --- a/man/show_indexing_status.Rd +++ b/man/show_indexing_status.Rd @@ -21,7 +21,6 @@ Show the indexing status of a set of PIDs \dontrun{ # Create a package then check its indexing status library(dataone) -library(arcticdatautils) mn <- MNode(...) pkg <- create_dummy_package(mn) show_indexing_status(mn, pkg) From 08cca39d7d5f22ff15d293eae7fdbc4fe87b82d7 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 6 Feb 2018 16:17:02 -0900 Subject: [PATCH 098/318] Fix a variety of R CMD CHECK issues --- .Rbuildignore | 1 + DESCRIPTION | 2 +- NAMESPACE | 12 ++++++++++++ R/arcticdatautils.R | 14 ++++++++++++++ R/inserting.R | 8 ++++---- R/packaging.R | 7 ++++--- man/arcticdatautils.Rd | 16 ++++++++++++++++ man/create_object.Rd | 7 ++++--- 8 files changed, 56 insertions(+), 11 deletions(-) create mode 100644 R/arcticdatautils.R create mode 100644 man/arcticdatautils.Rd diff --git a/.Rbuildignore b/.Rbuildignore index dbdfa42..795beb5 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,3 +3,4 @@ ^etc$ ^docs$ ^\.travis\.yml$ +MAINTENANCE.md diff --git a/DESCRIPTION b/DESCRIPTION index caef707..d499aad 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,7 @@ Authors@R: c( person("Emily", "O'Dean", email="eodean10@gmail.com", role = "ctb") ) Description: A set of utilites for working with the Arctic Data Center - (https://arcticdata.io) + (https://arcticdata.io). Depends: R (>= 3.2.3) Imports: diff --git a/NAMESPACE b/NAMESPACE index b19e389..e519dee 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -60,3 +60,15 @@ import(EML) import(XML) import(dataone) import(datapack) +importFrom(methods,"slot<-") +importFrom(methods,as) +importFrom(methods,is) +importFrom(methods,new) +importFrom(methods,slot) +importFrom(stats,na.omit) +importFrom(utils,URLencode) +importFrom(utils,head) +importFrom(utils,read.csv) +importFrom(utils,read.delim) +importFrom(utils,setTxtProgressBar) +importFrom(utils,txtProgressBar) diff --git a/R/arcticdatautils.R b/R/arcticdatautils.R new file mode 100644 index 0000000..d2320c5 --- /dev/null +++ b/R/arcticdatautils.R @@ -0,0 +1,14 @@ +#' arcticdatautils: Utilities for the Arctic Data Center +#' +#' The foo package provides three categories of important functions: +#' foo, bar and baz. +#' +#' @section Foo functions: +#' The foo functions ... +#' +#' @docType package +#' @name arcticdatautils +#' @importFrom methods as is new slot slot<- +#' @importFrom stats na.omit +#' @importFrom utils URLencode head read.csv read.delim setTxtProgressBar txtProgressBar +NULL diff --git a/R/inserting.R b/R/inserting.R index 8411ced..7d486b8 100644 --- a/R/inserting.R +++ b/R/inserting.R @@ -62,10 +62,10 @@ create_from_folder <- function(mn, path, data_pids=NULL) { stopifnot(EML::eml_validate(eml_path)) eml_package <- publish_update(mn, - metadata_old_pid = iso_pid, - resmap_old_pid = iso_resmap_pid, - data_old_pids = data_pids, - metadata_file_path = eml_path) + metadata_pid = iso_pid, + resource_map_pid = iso_resmap_pid, + data_pids = data_pids, + metadata_path = eml_path) list(iso_pid = iso_pid, iso_resource_map_pid = iso_resmap_pid, diff --git a/R/packaging.R b/R/packaging.R index 2646795..ee44a55 100644 --- a/R/packaging.R +++ b/R/packaging.R @@ -623,11 +623,12 @@ create_sysmeta <- function(file, base_path, submitter, rights_holder) { #' #' @param file (data.frame)A row from the inventory. #' @param sysmeta (SystemMetadata) The file's sysmeta. -#' @param base_path (character) -#' @param mn (MNode) +#' @param base_path (character) Base path, to be appended to the \code{file} +#' column to find the file to upload. +#' @param env (list) An environment. #' -create_object <- function(file, sysmeta, base_path, mn) { +create_object <- function(file, sysmeta, base_path, env) { stopifnot(is.data.frame(file), nrow(file) == 1, "pid" %in% names(file), diff --git a/man/arcticdatautils.Rd b/man/arcticdatautils.Rd new file mode 100644 index 0000000..0d3eb16 --- /dev/null +++ b/man/arcticdatautils.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/arcticdatautils.R +\docType{package} +\name{arcticdatautils} +\alias{arcticdatautils} +\alias{arcticdatautils-package} +\title{arcticdatautils: Utilities for the Arctic Data Center} +\description{ +The foo package provides three categories of important functions: +foo, bar and baz. +} +\section{Foo functions}{ + +The foo functions ... +} + diff --git a/man/create_object.Rd b/man/create_object.Rd index f36747a..21e629f 100644 --- a/man/create_object.Rd +++ b/man/create_object.Rd @@ -4,16 +4,17 @@ \alias{create_object} \title{Create an object from a row of the inventory.} \usage{ -create_object(file, sysmeta, base_path, mn) +create_object(file, sysmeta, base_path, env) } \arguments{ \item{file}{(data.frame)A row from the inventory.} \item{sysmeta}{(SystemMetadata) The file's sysmeta.} -\item{base_path}{(character)} +\item{base_path}{(character) Base path, to be appended to the \code{file} +column to find the file to upload.} -\item{mn}{(MNode)} +\item{env}{(list) An environment.} } \description{ Create an object from a row of the inventory. From 6ba0d378c040404782de6dae527eee9a5671af4b Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 6 Feb 2018 16:19:16 -0900 Subject: [PATCH 099/318] Stop EML otherEntity test from modifying a local file This uses a temporary file instead. Before this, running the test suite was modifying an example EML file which was under version control which was confusing --- tests/testthat/test_eml.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 3c68982..abb8f31 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -106,8 +106,8 @@ test_that("an other entity can be added from a pid", { writeLines(LETTERS, data_path) pid <- publish_object(mn, data_path, "text/plain") - eml_path <- file.path(system.file(package = "arcticdatautils"), "example-eml.xml") - + eml_path <- tempfile() + file.copy(file.path(system.file(package = "arcticdatautils"), "example-eml.xml"), eml_path) doc <- EML::read_eml(eml_path) doc@dataset@otherEntity <- new("ListOfotherEntity", list()) @@ -115,6 +115,4 @@ test_that("an other entity can be added from a pid", { doc <- EML::read_eml(eml_path) testthat::expect_length(doc@dataset@otherEntity, 1) - - unlink(data_path) }) From 985ca1053f1944a43138c711386652c1933e4f43 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Wed, 7 Feb 2018 16:19:45 -0800 Subject: [PATCH 100/318] fixed bug in test code that prevents environment from being loaded correctly --- tests/testthat/test_dataone.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_dataone.R b/tests/testthat/test_dataone.R index f005812..03622b3 100644 --- a/tests/testthat/test_dataone.R +++ b/tests/testthat/test_dataone.R @@ -1,6 +1,6 @@ context("dataone") -node <- env_load()$node +node <- env_load()$mn test_that("permissions can be checked", { if (!is_token_set(node)) { From 8d5fcc971971554bddc3c60b774aadf5f9eff9a9 Mon Sep 17 00:00:00 2001 From: eodean Date: Fri, 9 Feb 2018 11:21:44 -0800 Subject: [PATCH 101/318] #59, changed pid_to_eml_datatable to accept attributes as an optional argument, and validateAttributes as an optional argument. --- R/eml.R | 73 ++++++++++++------- R/helpers.R | 50 +++++++------ man/create_dummy_attributes_dataframe.Rd | 8 +- ...create_dummy_enumeratedDomain_dataframe.Rd | 11 ++- man/pid_to_eml_datatable.Rd | 26 ++++--- 5 files changed, 105 insertions(+), 63 deletions(-) diff --git a/R/eml.R b/R/eml.R index c8a9883..4a294b1 100644 --- a/R/eml.R +++ b/R/eml.R @@ -29,17 +29,19 @@ pid_to_eml_other_entity <- function(mn, pids) { sysmeta_to_eml_other_entity(sysmeta) } -#' Create an EML dataTable object for a given PID. -#' This function generates an attributeList and physical and constructs a dataTable. + +#' Create an EML code\code{dataTable} object for a given PID. +#' This function generates an \code{attributeList} and \code{physical} and constructs a \code{dataTable}. #' #' @param mn (MNode) Member Node where the PID is associated with an object. -#' @param pid (character) The PID of the object to create the dataTable for. -#' @param attributes (data.frame) Data frame of attributes. -#' @param factors (data.frame) Data frame of enumerated attribute values (factors). -#' @param name (character) Optional field to specify entityName, otherwise will be extracted from system metadata. -#' @param description (character) Optional field to specify entityDescription, otherwise will match name. +#' @param pid (character) The PID of the object to create the \code{dataTable} for. +#' @param attributes (data.frame) Optional data frame of attributes. Follows the convention in \link[EML]{set_attributes}. +#' @param factors (data.frame) Optional data frame of enumerated attribute values (factors). Follows the convention in \link[EML]{set_attributes}. +#' @param name (character) Optional field to specify \code{entityName}, otherwise will be extracted from system metadata. +#' @param description (character) Optional field to specify \code{entityDescription}, otherwise will match name. +#' @param validateAttributes (logical) If set to FALSE or if attributes are not passed into the function, attribute validation test will not run. #' -#' @return (dataTable) The dataTable object +#' @return (dataTable) The \code{dataTable} object #' @export #' #' @examples @@ -50,40 +52,55 @@ pid_to_eml_other_entity <- function(mn, pids) { #' description <- "A description of this entity." #' dataTable <- pid_to_eml_datatable(mn, pid, attributes, name=name, description=description) #' } -pid_to_eml_datatable <- function(mn, pid, attributes, factors=NULL, name=NULL, description=NULL) { +pid_to_eml_datatable <- function(mn, + pid, + attributes = NULL, + factors = NULL, + name = NULL, + description = NULL, + validateAttributes = TRUE) { stopifnot(is(mn, "MNode")) stopifnot(is.character(pid), nchar(pid) > 0) - stopifnot(is.data.frame(attributes)) - if (is.null(factors)) { - attributes <- set_attributes(attributes) - } else { - stopifnot(is.data.frame(factors)) - attributes <- set_attributes(attributes, factors) + dataTable <- new("dataTable", physical = pid_to_eml_physical(mn, pid)) + + if(!is.null(attributes)) { + stopifnot(is.data.frame(attributes)) + + if (is.null(factors)) { + attributes <- set_attributes(attributes) + } else { + stopifnot(is.data.frame(factors)) + attributes <- set_attributes(attributes, factors) + } + + if (validateAttributes == TRUE) { + stopifnot(eml_validate_attributes(attributes)) + } + + dataTable@attributeList <- attributes } if (is.null(name)) { name <- getSystemMetadata(mn, pid)@fileName + + if (is.null(name)) { + stop(call. = FALSE, + "'Name' must either be specified in the function call or must exist in the system metadata.") + } } - if (is.null(description)) { - description <- name - } - - stopifnot(eml_validate_attributes(attributes)) - - physicalObj <- pid_to_eml_physical(mn, pid) - - dataTable <- new("dataTable", - entityName = name, - entityDescription = description, - physical = physicalObj, - attributeList = attributes) + dataTable@entityName <- name + if (!is.null(description)) { + dataTable@entityDescription <- description + } + dataTable } + #' Create EML physical objects for the given set of PIDs #' #' Note this is a wrapper around sysmeta_to_eml_physical which handles the task of diff --git a/R/helpers.R b/R/helpers.R index 9ff5789..7108440 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -222,47 +222,55 @@ create_dummy_parent_package <- function(mn, children) { #' @param numberAttributes (integer) Number of attributes to be created in the table #' @param factors (character) Optional vector of factor names to include. #' -#' @return dataframe +#' @return (data.frame) Data frame of attributes #' @export #' #' @examples -create_dummy_attributes_dataframe <- function(numberAttributes, factors=NULL) { - names <- sapply(1:numberAttributes, function(x){ paste0("Attribute ", x) }) +#' \dontrun{ +#' # Create dummy attribute dataframe with 6 attributes and 1 factor +#' attributes <- create_dummy_attributes_dataframe(6, c("Factor1", "Factor2")) +#' } +create_dummy_attributes_dataframe <- function(numberAttributes, factors = NULL) { + names <- vapply(seq_len(numberAttributes), function(x) { paste0("Attribute ", x)}, "") if(!is.null(factors)) { domains <- c(rep("textDomain", numberAttributes - length(factors)), rep("enumeratedDomain", length(factors))) - names[(numberAttributes - length(factors) + 1):numberAttributes] <- factors + names[seq((numberAttributes - length(factors) + 1), numberAttributes)] <- factors } - df <- data.frame(attributeName = names, - attributeDefinition = names, - measurementScale = rep("nominal", numberAttributes), - domain = rep("textDomain", numberAttributes), - formatString = rep("NA", numberAttributes), - definition = names, - unit = rep("NA", numberAttributes), - numberType = rep("NA", numberAttributes), - missingValueCode = rep("NA", numberAttributes), - missingValueCodeExplanation = rep("NA", numberAttributes), - stringsAsFactors = F) + attributes <- data.frame(attributeName = names, + attributeDefinition = names, + measurementScale = rep("nominal", numberAttributes), + domain = domains, + formatString = rep(NA, numberAttributes), + definition = names, + unit = rep(NA, numberAttributes), + numberType = rep(NA, numberAttributes), + missingValueCode = rep(NA, numberAttributes), + missingValueCodeExplanation = rep(NA, numberAttributes), + stringsAsFactors = FALSE) - df + attributes } #' Create dummy enumeratedDomain data frame #' #' @param factors (character) Vector of factor names to include. #' -#' @return +#' @return (data.frame) Data frame of factors #' @export #' #' @examples +#' \dontrun{ +#' # Create dummy dataframe of 2 factors/enumerated domains +#' attributes <- create_dummy_enumeratedDomain_dataframe(c("Factor1", "Factor2")) +#' } create_dummy_enumeratedDomain_dataframe <- function(factors) { names <- rep(factors, 4) - df <- data.frame(attributeName = names, - code = paste0(names, 1:length(names)), - definition = names) + enumeratedDomains <- data.frame(attributeName = names, + code = paste0(names, 1:length(names)), + definition = names) - df + enumeratedDomains } diff --git a/man/create_dummy_attributes_dataframe.Rd b/man/create_dummy_attributes_dataframe.Rd index 3f7ba16..329836c 100644 --- a/man/create_dummy_attributes_dataframe.Rd +++ b/man/create_dummy_attributes_dataframe.Rd @@ -12,8 +12,14 @@ create_dummy_attributes_dataframe(numberAttributes, factors = NULL) \item{factors}{(character) Optional vector of factor names to include.} } \value{ -dataframe +(data.frame) Data frame of attributes } \description{ Create dummy attributes data frame } +\examples{ +\dontrun{ +# Create dummy attribute dataframe with 6 attributes and 1 factor +attributes <- create_dummy_attributes_dataframe(6, c("Factor1", "Factor2")) +} +} diff --git a/man/create_dummy_enumeratedDomain_dataframe.Rd b/man/create_dummy_enumeratedDomain_dataframe.Rd index ca7fd5b..d65bc43 100644 --- a/man/create_dummy_enumeratedDomain_dataframe.Rd +++ b/man/create_dummy_enumeratedDomain_dataframe.Rd @@ -7,8 +7,17 @@ create_dummy_enumeratedDomain_dataframe(factors) } \arguments{ -\item{factors}{(character) Optional vector of factor names to include.} +\item{factors}{(character) Vector of factor names to include.} +} +\value{ +(data.frame) Data frame of factors } \description{ Create dummy enumeratedDomain data frame } +\examples{ +\dontrun{ +# Create dummy dataframe of 2 factors/enumerated domains +attributes <- create_dummy_enumeratedDomain_dataframe(c("Factor1", "Factor2")) +} +} diff --git a/man/pid_to_eml_datatable.Rd b/man/pid_to_eml_datatable.Rd index e4c9fa6..25504ce 100644 --- a/man/pid_to_eml_datatable.Rd +++ b/man/pid_to_eml_datatable.Rd @@ -2,31 +2,33 @@ % Please edit documentation in R/eml.R \name{pid_to_eml_datatable} \alias{pid_to_eml_datatable} -\title{Create an EML dataTable object for a given PID. -This function generates an attributeList and physical and constructs a dataTable.} +\title{Create an EML code\code{dataTable} object for a given PID. +This function generates an \code{attributeList} and \code{physical} and constructs a \code{dataTable}.} \usage{ -pid_to_eml_datatable(mn, pid, attributes, factors = NULL, name = NULL, - description = NULL) +pid_to_eml_datatable(mn, pid, attributes = NULL, factors = NULL, + name = NULL, description = NULL, validateAttributes = TRUE) } \arguments{ \item{mn}{(MNode) Member Node where the PID is associated with an object.} -\item{pid}{(character) The PID of the object to create the dataTable for.} +\item{pid}{(character) The PID of the object to create the \code{dataTable} for.} -\item{attributes}{(data.frame) Data frame of attributes.} +\item{attributes}{(data.frame) Optional data frame of attributes. Follows the convention in \link[EML]{set_attributes}.} -\item{factors}{(data.frame) Data frame of enumerated attribute values (factors).} +\item{factors}{(data.frame) Optional data frame of enumerated attribute values (factors). Follows the convention in \link[EML]{set_attributes}.} -\item{name}{(character) Optional field to specify entityName, otherwise will be extracted from system metadata.} +\item{name}{(character) Optional field to specify \code{entityName}, otherwise will be extracted from system metadata.} -\item{description}{(character) Optional field to specify entityDescription, otherwise will match name.} +\item{description}{(character) Optional field to specify \code{entityDescription}, otherwise will match name.} + +\item{validateAttributes}{(logical) If set to FALSE or if attributes are not passed into the function, attribute validation test will not run.} } \value{ -(dataTable) The dataTable object +(dataTable) The \code{dataTable} object } \description{ -Create an EML dataTable object for a given PID. -This function generates an attributeList and physical and constructs a dataTable. +Create an EML code\code{dataTable} object for a given PID. +This function generates an \code{attributeList} and \code{physical} and constructs a \code{dataTable}. } \examples{ \dontrun{ From 692f18807537c95f3066734d27e29b21ac733b98 Mon Sep 17 00:00:00 2001 From: eodean Date: Fri, 9 Feb 2018 11:25:43 -0800 Subject: [PATCH 102/318] #71 seq_along instead of 1:x. --- R/helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/helpers.R b/R/helpers.R index 7108440..36a2fcc 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -269,7 +269,7 @@ create_dummy_attributes_dataframe <- function(numberAttributes, factors = NULL) create_dummy_enumeratedDomain_dataframe <- function(factors) { names <- rep(factors, 4) enumeratedDomains <- data.frame(attributeName = names, - code = paste0(names, 1:length(names)), + code = paste0(names, seq_along(names)), definition = names) enumeratedDomains From ad05a6fb97461bf0b5ade798ad3da46346e7aaf5 Mon Sep 17 00:00:00 2001 From: eodean Date: Fri, 9 Feb 2018 11:32:56 -0800 Subject: [PATCH 103/318] #59 whoops, sysmeta fileName would be NA. --- R/eml.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/eml.R b/R/eml.R index 4a294b1..cd7db8f 100644 --- a/R/eml.R +++ b/R/eml.R @@ -85,7 +85,7 @@ pid_to_eml_datatable <- function(mn, if (is.null(name)) { name <- getSystemMetadata(mn, pid)@fileName - if (is.null(name)) { + if (is.na(name)) { stop(call. = FALSE, "'Name' must either be specified in the function call or must exist in the system metadata.") } From e55008e3015b3a32adc1d5e808c5e00a175a312c Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 9 Feb 2018 13:36:47 -0900 Subject: [PATCH 104/318] Separate out title and description in pid_to_eml_datatable --- R/eml.R | 3 ++- man/pid_to_eml_datatable.Rd | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/R/eml.R b/R/eml.R index cd7db8f..e70b133 100644 --- a/R/eml.R +++ b/R/eml.R @@ -30,7 +30,8 @@ pid_to_eml_other_entity <- function(mn, pids) { } -#' Create an EML code\code{dataTable} object for a given PID. +#' Create an EML code\code{dataTable} object for a given PID. +#' #' This function generates an \code{attributeList} and \code{physical} and constructs a \code{dataTable}. #' #' @param mn (MNode) Member Node where the PID is associated with an object. diff --git a/man/pid_to_eml_datatable.Rd b/man/pid_to_eml_datatable.Rd index 25504ce..7aee3b7 100644 --- a/man/pid_to_eml_datatable.Rd +++ b/man/pid_to_eml_datatable.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/eml.R \name{pid_to_eml_datatable} \alias{pid_to_eml_datatable} -\title{Create an EML code\code{dataTable} object for a given PID. -This function generates an \code{attributeList} and \code{physical} and constructs a \code{dataTable}.} +\title{Create an EML code\code{dataTable} object for a given PID.} \usage{ pid_to_eml_datatable(mn, pid, attributes = NULL, factors = NULL, name = NULL, description = NULL, validateAttributes = TRUE) @@ -27,7 +26,6 @@ pid_to_eml_datatable(mn, pid, attributes = NULL, factors = NULL, (dataTable) The \code{dataTable} object } \description{ -Create an EML code\code{dataTable} object for a given PID. This function generates an \code{attributeList} and \code{physical} and constructs a \code{dataTable}. } \examples{ From 2fb649c476830e8b2a5df6daf7772b3e6d7bbfd2 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 9 Feb 2018 13:36:59 -0900 Subject: [PATCH 105/318] Fix whitesace issues in pid_to_eml_datatable --- R/eml.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R/eml.R b/R/eml.R index e70b133..7d3b582 100644 --- a/R/eml.R +++ b/R/eml.R @@ -41,7 +41,7 @@ pid_to_eml_other_entity <- function(mn, pids) { #' @param name (character) Optional field to specify \code{entityName}, otherwise will be extracted from system metadata. #' @param description (character) Optional field to specify \code{entityDescription}, otherwise will match name. #' @param validateAttributes (logical) If set to FALSE or if attributes are not passed into the function, attribute validation test will not run. -#' +#' #' @return (dataTable) The \code{dataTable} object #' @export #' @@ -63,37 +63,37 @@ pid_to_eml_datatable <- function(mn, stopifnot(is(mn, "MNode")) stopifnot(is.character(pid), nchar(pid) > 0) - + dataTable <- new("dataTable", physical = pid_to_eml_physical(mn, pid)) if(!is.null(attributes)) { stopifnot(is.data.frame(attributes)) - + if (is.null(factors)) { attributes <- set_attributes(attributes) } else { stopifnot(is.data.frame(factors)) attributes <- set_attributes(attributes, factors) } - + if (validateAttributes == TRUE) { stopifnot(eml_validate_attributes(attributes)) } - + dataTable@attributeList <- attributes } - + if (is.null(name)) { name <- getSystemMetadata(mn, pid)@fileName - + if (is.na(name)) { - stop(call. = FALSE, + stop(call. = FALSE, "'Name' must either be specified in the function call or must exist in the system metadata.") } } - + dataTable@entityName <- name - + if (!is.null(description)) { dataTable@entityDescription <- description } From 24ff4dc5c96343e90abdcbc81bdc99b703b3936a Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Thu, 15 Feb 2018 17:12:15 -0800 Subject: [PATCH 106/318] Added publish_update unit test, issue #36 --- tests/testthat/test_editing.R | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index c959cd3..d4fbfca 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -258,3 +258,33 @@ test_that("publishing an object with an invalid format ID fails", { expect_error(publish_object(mn, tmp_path, "asdf/asdf")) }) + +test_that("publish_update removes the target package from 'parent_parent_pids' argument", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + parent <- create_dummy_package(mn) + child <- create_dummy_package(mn) + + # Nest packages + parent["resource_map"] <- update_resource_map(mn, + parent$resource_map, + parent$metadata, + parent$data, + child$resource_map, + check_first = F) + + # Updating parent incorrectly should still run (with parent resource_map listed in 'parent_parent_pids') + child <- publish_update(mn, + child$metadata, + child$resource_map, + child$data, + parent_resmap_pid = parent$resource_map, + parent_metadata_pid = parent$metadata, + parent_data_pids = parent$data, + parent_child_pids = child$resource_map, check_first = F) + parent <- get_package(mn, child$parent_resource_map) + + expect_equal(child$resource_map, parent$child_packages) +}) From 84bace7c1574d6d5984d1ffca390d651f4470f82 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Thu, 15 Feb 2018 17:21:25 -0800 Subject: [PATCH 107/318] updated publish_update params to be more clear --- R/editing.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/editing.R b/R/editing.R index d4e972b..2b794d7 100644 --- a/R/editing.R +++ b/R/editing.R @@ -226,10 +226,10 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) #' @param data_pids (character) PID(s) of data objects that will go in the updated package. #' @param identifier (character) Manually specify the identifier for the new metadata object. #' @param use_doi (logical) Generate and use a DOI as the identifier for the updated metadata object. -#' @param parent_resmap_pid (character) Optional. PID of a parent package to be updated. -#' @param parent_metadata_pid (character) Optional. Identifier for the metadata document of the parent package. -#' @param parent_data_pids (character) Optional. Identifier for the data objects of the parent package. -#' @param parent_child_pids (character) Optional. Resource map identifier(s) of child packages in the parent package. +#' @param parent_resmap_pid (character) Optional. PID of a parent package to be updated. Not optional if a parent package exists. +#' @param parent_metadata_pid (character) Optional. Identifier for the metadata document of the parent package. Not optional if a parent package exists. +#' @param parent_data_pids (character) Optional. Identifier for the data objects of the parent package. Not optional if the parent package contains data objects. +#' @param parent_child_pids (character) Optional. Resource map identifier(s) of child packages in the parent package. \code{resource_map_pid} should not be included. Not optional if the parent package contains other child packages. #' @param child_pids (character) Optional. Child packages resource map PIDs. #' @param metadata_path (character) Optional. Path to a metadata file to update with. If this is not set, the existing metadata document will be used. #' @param public (logical) Optional. Make the update public. If FALSE, will set the metadata and resource map to private (but not the data objects). From 32e12d8c1c1f0701131e8bf5db5327874934fc60 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Thu, 15 Feb 2018 17:24:39 -0800 Subject: [PATCH 108/318] fixed typos --- tests/testthat/test_editing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index d4fbfca..320e5f9 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -259,7 +259,7 @@ test_that("publishing an object with an invalid format ID fails", { expect_error(publish_object(mn, tmp_path, "asdf/asdf")) }) -test_that("publish_update removes the target package from 'parent_parent_pids' argument", { +test_that("publish_update removes 'resource_map_pid' from 'parent_child_pids' argument", { if (!is_token_set(mn)) { skip("No token set. Skipping test.") } From 49dbfa8b282884f7a6b6ae985baae0dc73156ea6 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Sat, 10 Mar 2018 18:13:39 -0900 Subject: [PATCH 109/318] Add Jeanette and Jesse as maintainers --- DESCRIPTION | 3 ++- README.md | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d499aad..11e9e3a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,8 +3,9 @@ Title: Utilities for the Arctic Data Center Version: 0.6.3.9000 Authors@R: c( person("Bryce", "Mecum", email = "mecum@nceas.ucsb.edu", role = c("aut", "cre")), + person("Jeanette", "Clark", email = "jclark@nceas.ucsb.edu", role = c("cre", "ctb")), + person("Jesse", "Goldstein", email = "jgoldstein@nceas.ucsb.edu", role = c("cre", "ctb")), person("Matt", "Jones", email = "jones@nceas.ucsb.edu", role = "ctb"), - person("Jeanette", "Clark", email = "jclark@nceas.ucsb.edu", role = "ctb"), person("Dominic", "Mullen", email = "dmullen17@gmail.com", role = "ctb"), person("Emily", "O'Dean", email="eodean10@gmail.com", role = "ctb") ) diff --git a/README.md b/README.md index 2ca265f..edf6f1e 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,11 @@ remotes::install_github("nceas/arcticdatautils") - Run the tests and make sure they all pass > `devtools::test()` +## Support + +- Please submit bugs or other comments as [Issues](https://github.com/NCEAS/arcticdatautils/issues) +- Maintainers of the package are @jeanetteclark and @jagoldstein + ## Testing Note: The test suite contains a set of tests that call out to a remote server and whether or not these tests are run depends on whether `is_token_set()` returns true which just checks whether the `dataone_test_token` option is set. From cf92419408735774cb969a6dc38087df2cb250b5 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Sat, 10 Mar 2018 18:14:06 -0900 Subject: [PATCH 110/318] Minor readme tweaks --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index edf6f1e..cb5b3af 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,10 @@ [![Travis build status](https://travis-ci.org/NCEAS/arcticdatautils.svg?branch=master)](https://travis-ci.org/NCEAS/arcticdatautils) -The `articadatautils` R package contains code for doing lots of useful stuff that's too specific for the [dataone](https://github.com/DataONEorg/rdataone) package: +The `articadatautils` package contains code for doing lots of useful stuff that's too specific for the [dataone](https://github.com/DataONEorg/rdataone) package: -- Inserting large numbers of files into Metacat -- High-level [dataone](https://github.com/DataONEorg/rdataone) wrappers for working with Objects and Data Packages +- Inserting large numbers of files into a Metacat Member Node +- High-level [dataone](https://github.com/DataONEorg/rdataone) wrappers for working with Objects and Data Packages that streamline Arctic Data Center operations Note: The package is intended to be used by NCEAS staff and may not make much sense to others. From 89ba74a41ac530c304ecba1edee060a141df8734 Mon Sep 17 00:00:00 2001 From: Jesse Goldstein Date: Mon, 12 Mar 2018 13:16:44 -0700 Subject: [PATCH 111/318] removed extra 'creators' from Description file to enable installation of pkg --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 11e9e3a..be43b8c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,9 +3,9 @@ Title: Utilities for the Arctic Data Center Version: 0.6.3.9000 Authors@R: c( person("Bryce", "Mecum", email = "mecum@nceas.ucsb.edu", role = c("aut", "cre")), - person("Jeanette", "Clark", email = "jclark@nceas.ucsb.edu", role = c("cre", "ctb")), - person("Jesse", "Goldstein", email = "jgoldstein@nceas.ucsb.edu", role = c("cre", "ctb")), person("Matt", "Jones", email = "jones@nceas.ucsb.edu", role = "ctb"), + person("Jesse", "Goldstein", email = "jgoldstein@nceas.ucsb.edu", role = "ctb"), + person("Jeanette", "Clark", email = "jclark@nceas.ucsb.edu", role = "ctb"), person("Dominic", "Mullen", email = "dmullen17@gmail.com", role = "ctb"), person("Emily", "O'Dean", email="eodean10@gmail.com", role = "ctb") ) From fb2aa05a430d057a76c4d56e6a2f2ebd3078040e Mon Sep 17 00:00:00 2001 From: Jesse Goldstein Date: Mon, 12 Mar 2018 13:20:48 -0700 Subject: [PATCH 112/318] added comment args about "Maintainers" --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index be43b8c..67fdc2e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,8 +4,8 @@ Version: 0.6.3.9000 Authors@R: c( person("Bryce", "Mecum", email = "mecum@nceas.ucsb.edu", role = c("aut", "cre")), person("Matt", "Jones", email = "jones@nceas.ucsb.edu", role = "ctb"), - person("Jesse", "Goldstein", email = "jgoldstein@nceas.ucsb.edu", role = "ctb"), - person("Jeanette", "Clark", email = "jclark@nceas.ucsb.edu", role = "ctb"), + person("Jesse", "Goldstein", email = "jgoldstein@nceas.ucsb.edu", role = "ctb", comment = "Maintainer"), + person("Jeanette", "Clark", email = "jclark@nceas.ucsb.edu", role = "ctb", comment = "Maintainer"), person("Dominic", "Mullen", email = "dmullen17@gmail.com", role = "ctb"), person("Emily", "O'Dean", email="eodean10@gmail.com", role = "ctb") ) From 51b181484292e6794ab4d0f85b0d3cbcfe9cfd5d Mon Sep 17 00:00:00 2001 From: Irene <25118334+isteves@users.noreply.github.com> Date: Mon, 19 Mar 2018 12:16:44 -0700 Subject: [PATCH 113/318] Edited get_package documentation (metadata --> resource map) --- R/util.R | 2 +- man/get_package.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/util.R b/R/util.R index 22a596e..eaa5001 100644 --- a/R/util.R +++ b/R/util.R @@ -759,7 +759,7 @@ get_all_versions <- function(node, pid) { #' a resource map PID or a metadata PID as its `pid` argument. #' #' @param node (MNode|CNode) The Coordinating/Member Node to run the query on. -#' @param pid (character) The the metadata PID of the package. +#' @param pid (character) The the resource map PID of the package. #' @param file_names (logical) Whether to return file names for all objects. #' @param rows (numeric) The number of rows to return in the query. This is only #' useful to set if you are warned about the result set being truncated. diff --git a/man/get_package.Rd b/man/get_package.Rd index 9d7ccd9..f77090f 100644 --- a/man/get_package.Rd +++ b/man/get_package.Rd @@ -9,7 +9,7 @@ get_package(node, pid, file_names = FALSE, rows = 1000) \arguments{ \item{node}{(MNode|CNode) The Coordinating/Member Node to run the query on.} -\item{pid}{(character) The the metadata PID of the package.} +\item{pid}{(character) The the resource map PID of the package.} \item{file_names}{(logical) Whether to return file names for all objects.} From d9239b90998b50a259aca409e382cc9677e2041e Mon Sep 17 00:00:00 2001 From: eodean Date: Tue, 10 Apr 2018 10:57:53 -0700 Subject: [PATCH 114/318] Updates for projects with multiple titles. --- R/eml.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/eml.R b/R/eml.R index 7d3b582..d6df844 100644 --- a/R/eml.R +++ b/R/eml.R @@ -623,7 +623,7 @@ eml_individual_name <- function(given_names=NULL, sur_name) { #' fully fleshed out. Need to pass these objects in directly if you want to use #' them. #' -#' @param title (character) Title of the project (Required). +#' @param title (character) Title of the project (Required). May have multiple titles. #' @param personnelList (list of personnel) Personnel involved with the project. #' @param abstract (character) Project abstract. Can pass as a character vector #' for separate paragraphs. @@ -637,7 +637,7 @@ eml_individual_name <- function(given_names=NULL, sur_name) { #' @export #' #' @examples -#' proj <- eml_project("Some title", +#' proj <- eml_project(c("Some title", "A second title if needed"), #' c(eml_personnel("Bryce", "Mecum", role = "principalInvestigator")), #' c("Abstract paragraph 1", "Abstract paragraph 2"), #' "Funding Agency: Award Number 12345") @@ -650,15 +650,17 @@ eml_project <- function(title, relatedProject = NULL) { stopifnot(is.character(title), - nchar(title) > 0) + length(title) > 0, + all(nchar(title)) > 0) stopifnot(length(personnelList) > 0) # Project project <- new("project") # Title - project@title <- c(as(title, "title")) - + titles <- lapply(title, function(x) { as(x, "title") }) + project@title <- as(titles, "ListOftitle") + # Personnel if(!all(sapply(personnelList, function(x) { is(x, "personnel") }))) { stop(call. = FALSE, From 3649024a1f361521a63b662d8f2ecf8ef1507904 Mon Sep 17 00:00:00 2001 From: Irene <25118334+isteves@users.noreply.github.com> Date: Tue, 24 Apr 2018 14:56:13 -0700 Subject: [PATCH 115/318] added ... --- R/editing.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/editing.R b/R/editing.R index 2b794d7..60396b7 100644 --- a/R/editing.R +++ b/R/editing.R @@ -545,6 +545,7 @@ publish_update <- function(mn, #' @param check_first (logical) Optional. Whether to check the PIDs passed in as #' aruments exist on the MN before continuing. This speeds up the function, #' especially when `data_pids` has many elements. +#' @param ... Additional arguments that can be passed into \code{\link{publish_object}} #' #' @return (character) The created resource map's PID #' @export @@ -563,9 +564,10 @@ publish_update <- function(mn, #'} create_resource_map <- function(mn, metadata_pid, - data_pids=NULL, - child_pids=NULL, - check_first=TRUE) { + data_pids = NULL, + child_pids = NULL, + check_first = TRUE, + ...) { stopifnot(is(mn, "MNode")) stopifnot(is.character(metadata_pid), nchar(metadata_pid) > 0) @@ -591,7 +593,8 @@ create_resource_map <- function(mn, actual <- publish_object(mn, path, pid, - format_id = "http://www.openarchives.org/ore/terms") + format_id = "http://www.openarchives.org/ore/terms", + ...) stopifnot(pid == actual) From 01232fe2f9bd38f77366e0dabc01cf359f3f96e4 Mon Sep 17 00:00:00 2001 From: Irene <25118334+isteves@users.noreply.github.com> Date: Tue, 24 Apr 2018 15:25:41 -0700 Subject: [PATCH 116/318] updated documentation --- man/create_resource_map.Rd | 4 +++- man/eml_project.Rd | 4 ++-- man/publish_update.Rd | 8 ++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/man/create_resource_map.Rd b/man/create_resource_map.Rd index d012e11..a999d81 100644 --- a/man/create_resource_map.Rd +++ b/man/create_resource_map.Rd @@ -5,7 +5,7 @@ \title{Create a resource map Object on a Member Node.} \usage{ create_resource_map(mn, metadata_pid, data_pids = NULL, child_pids = NULL, - check_first = TRUE) + check_first = TRUE, ...) } \arguments{ \item{mn}{(MNode) The Member Node} @@ -22,6 +22,8 @@ nested under the package.} \item{check_first}{(logical) Optional. Whether to check the PIDs passed in as aruments exist on the MN before continuing. This speeds up the function, especially when `data_pids` has many elements.} + +\item{...}{Additional arguments that can be passed into \code{\link{publish_object}}} } \value{ (character) The created resource map's PID diff --git a/man/eml_project.Rd b/man/eml_project.Rd index fa862ff..f0a7b9f 100644 --- a/man/eml_project.Rd +++ b/man/eml_project.Rd @@ -9,7 +9,7 @@ eml_project(title, personnelList, abstract = NULL, funding = NULL, relatedProject = NULL) } \arguments{ -\item{title}{(character) Title of the project (Required).} +\item{title}{(character) Title of the project (Required). May have multiple titles.} \item{personnelList}{(list of personnel) Personnel involved with the project.} @@ -34,7 +34,7 @@ fully fleshed out. Need to pass these objects in directly if you want to use them. } \examples{ -proj <- eml_project("Some title", +proj <- eml_project(c("Some title", "A second title if needed"), c(eml_personnel("Bryce", "Mecum", role = "principalInvestigator")), c("Abstract paragraph 1", "Abstract paragraph 2"), "Funding Agency: Award Number 12345") diff --git a/man/publish_update.Rd b/man/publish_update.Rd index 28e71b0..3aaf5ba 100644 --- a/man/publish_update.Rd +++ b/man/publish_update.Rd @@ -27,13 +27,13 @@ publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL, \item{use_doi}{(logical) Generate and use a DOI as the identifier for the updated metadata object.} -\item{parent_resmap_pid}{(character) Optional. PID of a parent package to be updated.} +\item{parent_resmap_pid}{(character) Optional. PID of a parent package to be updated. Not optional if a parent package exists.} -\item{parent_metadata_pid}{(character) Optional. Identifier for the metadata document of the parent package.} +\item{parent_metadata_pid}{(character) Optional. Identifier for the metadata document of the parent package. Not optional if a parent package exists.} -\item{parent_data_pids}{(character) Optional. Identifier for the data objects of the parent package.} +\item{parent_data_pids}{(character) Optional. Identifier for the data objects of the parent package. Not optional if the parent package contains data objects.} -\item{parent_child_pids}{(character) Optional. Resource map identifier(s) of child packages in the parent package.} +\item{parent_child_pids}{(character) Optional. Resource map identifier(s) of child packages in the parent package. \code{resource_map_pid} should not be included. Not optional if the parent package contains other child packages.} \item{public}{(logical) Optional. Make the update public. If FALSE, will set the metadata and resource map to private (but not the data objects). This applies to the new metadata PID and its resource map and data object. From 4193b19d1f57935b5aea55af21278590b6d754e1 Mon Sep 17 00:00:00 2001 From: Irene <25118334+isteves@users.noreply.github.com> Date: Wed, 25 Apr 2018 17:41:22 -0700 Subject: [PATCH 117/318] added if statement to setting public read --- R/editing.R | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/R/editing.R b/R/editing.R index 60396b7..e3a1b77 100644 --- a/R/editing.R +++ b/R/editing.R @@ -33,11 +33,11 @@ #'} publish_object <- function(mn, path, - format_id=NULL, - pid=NULL, - sid=NULL, - clone_pid=NULL, - public=TRUE) { + format_id = NULL, + pid = NULL, + sid = NULL, + clone_pid = NULL, + public = TRUE) { stopifnot(is(mn, "MNode")) stopifnot(file.exists(path)) @@ -102,7 +102,9 @@ publish_object <- function(mn, } sysmeta <- add_admin_group_access(sysmeta) - sysmeta <- datapack::addAccessRule(sysmeta, "public", "read") + if(public == TRUE){ + sysmeta <- datapack::addAccessRule(sysmeta, "public", "read") + } sysmeta@fileName <- basename(path) dataone::createObject(mn, From a938752b0df3e39b25917f2274214d041043e330 Mon Sep 17 00:00:00 2001 From: Irene <25118334+isteves@users.noreply.github.com> Date: Wed, 9 May 2018 12:08:48 -0700 Subject: [PATCH 118/318] Add EML object as an alternative option for metadata_path in publish_update --- R/editing.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/R/editing.R b/R/editing.R index e3a1b77..40bf436 100644 --- a/R/editing.R +++ b/R/editing.R @@ -233,7 +233,7 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) #' @param parent_data_pids (character) Optional. Identifier for the data objects of the parent package. Not optional if the parent package contains data objects. #' @param parent_child_pids (character) Optional. Resource map identifier(s) of child packages in the parent package. \code{resource_map_pid} should not be included. Not optional if the parent package contains other child packages. #' @param child_pids (character) Optional. Child packages resource map PIDs. -#' @param metadata_path (character) Optional. Path to a metadata file to update with. If this is not set, the existing metadata document will be used. +#' @param metadata_path (character or eml) Optional. An eml class object or a path to a metadata file to update with. If this is not set, the existing metadata document will be used. #' @param public (logical) Optional. Make the update public. If FALSE, will set the metadata and resource map to private (but not the data objects). #' This applies to the new metadata PID and its resource map and data object. #' access policies are not affected. @@ -367,6 +367,15 @@ publish_update <- function(mn, # Get the metadata doc message("Getting metadata from the MN.") eml <- EML::read_eml(rawToChar(dataone::getObject(mn, metadata_pid)), asText = TRUE) + + } if(class(metadata_path) == "eml") { + # If an eml object is provided, use it directly after validating + if(!eml_validate(metadata_path)){ + stop("The EML object is not valid.") + } + + eml <- metadata_path + } else { # Alternatively, read an edited metadata file from disk if provided if (!file.exists(metadata_path)) { From c8287fbd8e2a4c2d3e933996a2047a50c9cc6d12 Mon Sep 17 00:00:00 2001 From: Irene <25118334+isteves@users.noreply.github.com> Date: Wed, 9 May 2018 13:32:21 -0700 Subject: [PATCH 119/318] fixed ifelse --- R/editing.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/editing.R b/R/editing.R index 40bf436..c1edeb1 100644 --- a/R/editing.R +++ b/R/editing.R @@ -368,13 +368,13 @@ publish_update <- function(mn, message("Getting metadata from the MN.") eml <- EML::read_eml(rawToChar(dataone::getObject(mn, metadata_pid)), asText = TRUE) - } if(class(metadata_path) == "eml") { - # If an eml object is provided, use it directly after validating - if(!eml_validate(metadata_path)){ - stop("The EML object is not valid.") - } + } else if(class(metadata_path) == "eml") { + # If an eml object is provided, use it directly after validating + if(!eml_validate(metadata_path)){ + stop("The EML object is not valid.") + } - eml <- metadata_path + eml <- metadata_path } else { # Alternatively, read an edited metadata file from disk if provided From 57812840c29fe6c429432c89f0499626ae0e2ed0 Mon Sep 17 00:00:00 2001 From: Irene <25118334+isteves@users.noreply.github.com> Date: Wed, 9 May 2018 13:32:28 -0700 Subject: [PATCH 120/318] updated man --- man/publish_update.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/publish_update.Rd b/man/publish_update.Rd index 3aaf5ba..76f4508 100644 --- a/man/publish_update.Rd +++ b/man/publish_update.Rd @@ -21,7 +21,7 @@ publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL, \item{child_pids}{(character) Optional. Child packages resource map PIDs.} -\item{metadata_path}{(character) Optional. Path to a metadata file to update with. If this is not set, the existing metadata document will be used.} +\item{metadata_path}{(character or eml) Optional. An eml class object or a path to a metadata file to update with. If this is not set, the existing metadata document will be used.} \item{identifier}{(character) Manually specify the identifier for the new metadata object.} From 2710025b1079ccb95d295de58098ab8a7d16fcfb Mon Sep 17 00:00:00 2001 From: Irene <25118334+isteves@users.noreply.github.com> Date: Wed, 9 May 2018 14:06:18 -0700 Subject: [PATCH 121/318] Try uncaching packages to see if devel/new release will work https://stat.ethz.ch/pipermail/r-sig-debian/2018-April/002829.html https://github.com/r-spatial/sf/issues/729 https://github.com/Rdatatable/data.table/issues/2801 --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index a4c91ba..da0bf51 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,6 @@ r: - release - devel sudo: false -cache: packages addons: apt: packages: From ac32bf34c536aa33eaa221d8b46211165f564330 Mon Sep 17 00:00:00 2001 From: isteves <25118334+isteves@users.noreply.github.com> Date: Wed, 9 May 2018 15:27:58 -0700 Subject: [PATCH 122/318] Try `update: true` Added `cache: packages` back in --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index da0bf51..03dd703 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,8 +6,10 @@ r: - release - devel sudo: false +cache: packages addons: apt: + update: true packages: - librdf0-dev - libnetcdf-dev From ccdfa2082d240ed892cb398c2a9e0f112be6a060 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Thu, 10 May 2018 15:02:48 -0700 Subject: [PATCH 123/318] Reduce dependencies on ncdf4 and yaml @isteves helpfully noted that the Travis CI builds started failing a while ago in https://github.com/NCEAS/arcticdatautils/issues/86. The failures indicate issues with Ubuntu PPAs and nothing to do with our tools. Though this happened around the time R 3.5.0 was released, which is a backwards incompatible R release indue to the introduction of ALTREP. This is: 1. My attempt to fix that so all the Travis builds pass 2. Reduce the deps arcticdatautils requires to install. ncdf4 and yaml are not packages that _need_ to be installed to get started using it so they should be in Suggests --- .travis.yml | 2 -- DESCRIPTION | 4 ++-- R/attributes.R | 6 ++++++ R/environment.R | 6 ++++++ R/util.R | 6 ++++++ 5 files changed, 20 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index a4c91ba..835edf6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,5 +11,3 @@ addons: apt: packages: - librdf0-dev - - libnetcdf-dev - - r-cran-ncdf4 diff --git a/DESCRIPTION b/DESCRIPTION index 67fdc2e..ac8d1f5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,12 +21,10 @@ Imports: EML, httr, methods, - ncdf4, stringr, stringi, tools, uuid, - yaml, xml2, XML License: MIT + file LICENSE @@ -35,7 +33,9 @@ Suggests: testthat, humaniformat, knitr, + ncdf4, rmarkdown, + yaml, xslt RoxygenNote: 6.0.1 VignetteBuilder: knitr diff --git a/R/attributes.R b/R/attributes.R index 8042a40..fed08e6 100644 --- a/R/attributes.R +++ b/R/attributes.R @@ -12,6 +12,12 @@ get_ncdf4_attributes <- function(nc) { stopifnot(is(nc, "ncdf4") || file.exists(nc)) + if (!requireNamespace("ncdf4")) { + stop(call. = FALSE, + "The package 'ncdf4' must be installed to run this function. ", + "Please install it and try again.") + } + # Read the file in if `nc` is a character vector if (is.character(nc)) { nc <- ncdf4::nc_open(nc) diff --git a/R/environment.R b/R/environment.R index 8ff1c12..66c32aa 100644 --- a/R/environment.R +++ b/R/environment.R @@ -38,6 +38,12 @@ env_get <- function() { #' #' env_load <- function(name=NULL, path=NULL, skip_mn=FALSE) { + if (!requireNamespace("yaml")) { + stop(call. = FALSE, + "The package 'yaml' must be installed to run this function. ", + "Please install it and try again.") + } + # Determine the environment to load if (is.null(name)) { name <- env_get() diff --git a/R/util.R b/R/util.R index eaa5001..2ef7984 100644 --- a/R/util.R +++ b/R/util.R @@ -109,6 +109,12 @@ get_netcdf_format_id <- function(path) { nchar(path) > 0, file.exists(path)) + if (!requireNamespace("ncdf4")) { + stop(call. = FALSE, + "The package 'ncdf4' must be installed to run this function. ", + "Please install it and try again.") + } + # Try to open the file, capturing errors cdf_file <- try({ ncdf4::nc_open(path) From 431593e088e234919e4bf88300d501f62227a62f Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Thu, 10 May 2018 15:26:04 -0700 Subject: [PATCH 124/318] Turn off forcing suggests in travis build --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 835edf6..fcd9c84 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,8 @@ r: - release - devel sudo: false +env: + - _R_CHECK_FORCE_SUGGESTS_=0 cache: packages addons: apt: From 9e0ea67708deccee4c3dd635e0b843b72cbedef6 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Thu, 10 May 2018 15:42:41 -0700 Subject: [PATCH 125/318] Move dplyr to Suggests --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ac8d1f5..1cddca5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,7 +15,6 @@ Depends: R (>= 3.2.3) Imports: digest, - dplyr, dataone, datapack, EML, @@ -30,6 +29,7 @@ Imports: License: MIT + file LICENSE LazyData: true Suggests: + dplyr, testthat, humaniformat, knitr, From 77ef74f042374cc334efe1d60f8a745ac71f1a3c Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Thu, 10 May 2018 15:52:10 -0700 Subject: [PATCH 126/318] Incorporate carl's great ideas https://github.com/NCEAS/arcticdatautils/pull/88#issuecomment-388208858 --- .travis.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index fcd9c84..aee4c81 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,11 +5,13 @@ r: - oldrel - release - devel +r_packages: ncdf4 sudo: false -env: - - _R_CHECK_FORCE_SUGGESTS_=0 cache: packages addons: apt: + update: true packages: - librdf0-dev + - libnetcdf-dev + - netcdf-bin From 0554abe993ee1e09a0449d5bb26a8ee3aa5d9412 Mon Sep 17 00:00:00 2001 From: Irene <25118334+isteves@users.noreply.github.com> Date: Wed, 9 May 2018 12:08:48 -0700 Subject: [PATCH 127/318] Add EML object as an alternative option for metadata_path in publish_update --- R/editing.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/R/editing.R b/R/editing.R index e3a1b77..40bf436 100644 --- a/R/editing.R +++ b/R/editing.R @@ -233,7 +233,7 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) #' @param parent_data_pids (character) Optional. Identifier for the data objects of the parent package. Not optional if the parent package contains data objects. #' @param parent_child_pids (character) Optional. Resource map identifier(s) of child packages in the parent package. \code{resource_map_pid} should not be included. Not optional if the parent package contains other child packages. #' @param child_pids (character) Optional. Child packages resource map PIDs. -#' @param metadata_path (character) Optional. Path to a metadata file to update with. If this is not set, the existing metadata document will be used. +#' @param metadata_path (character or eml) Optional. An eml class object or a path to a metadata file to update with. If this is not set, the existing metadata document will be used. #' @param public (logical) Optional. Make the update public. If FALSE, will set the metadata and resource map to private (but not the data objects). #' This applies to the new metadata PID and its resource map and data object. #' access policies are not affected. @@ -367,6 +367,15 @@ publish_update <- function(mn, # Get the metadata doc message("Getting metadata from the MN.") eml <- EML::read_eml(rawToChar(dataone::getObject(mn, metadata_pid)), asText = TRUE) + + } if(class(metadata_path) == "eml") { + # If an eml object is provided, use it directly after validating + if(!eml_validate(metadata_path)){ + stop("The EML object is not valid.") + } + + eml <- metadata_path + } else { # Alternatively, read an edited metadata file from disk if provided if (!file.exists(metadata_path)) { From 24b3c201e83d8389bce8b53fff1ea8a3893d04aa Mon Sep 17 00:00:00 2001 From: Irene <25118334+isteves@users.noreply.github.com> Date: Wed, 9 May 2018 13:32:21 -0700 Subject: [PATCH 128/318] fixed ifelse --- R/editing.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/editing.R b/R/editing.R index 40bf436..c1edeb1 100644 --- a/R/editing.R +++ b/R/editing.R @@ -368,13 +368,13 @@ publish_update <- function(mn, message("Getting metadata from the MN.") eml <- EML::read_eml(rawToChar(dataone::getObject(mn, metadata_pid)), asText = TRUE) - } if(class(metadata_path) == "eml") { - # If an eml object is provided, use it directly after validating - if(!eml_validate(metadata_path)){ - stop("The EML object is not valid.") - } + } else if(class(metadata_path) == "eml") { + # If an eml object is provided, use it directly after validating + if(!eml_validate(metadata_path)){ + stop("The EML object is not valid.") + } - eml <- metadata_path + eml <- metadata_path } else { # Alternatively, read an edited metadata file from disk if provided From f33cf75787730e660306d481a3adaf45be4e4f25 Mon Sep 17 00:00:00 2001 From: Irene <25118334+isteves@users.noreply.github.com> Date: Wed, 9 May 2018 13:32:28 -0700 Subject: [PATCH 129/318] updated man --- man/publish_update.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/publish_update.Rd b/man/publish_update.Rd index 3aaf5ba..76f4508 100644 --- a/man/publish_update.Rd +++ b/man/publish_update.Rd @@ -21,7 +21,7 @@ publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL, \item{child_pids}{(character) Optional. Child packages resource map PIDs.} -\item{metadata_path}{(character) Optional. Path to a metadata file to update with. If this is not set, the existing metadata document will be used.} +\item{metadata_path}{(character or eml) Optional. An eml class object or a path to a metadata file to update with. If this is not set, the existing metadata document will be used.} \item{identifier}{(character) Manually specify the identifier for the new metadata object.} From 8346ada8973ba78508dbb28b0ca77a8b06c59b4c Mon Sep 17 00:00:00 2001 From: Irene <25118334+isteves@users.noreply.github.com> Date: Wed, 9 May 2018 14:06:18 -0700 Subject: [PATCH 130/318] Try uncaching packages to see if devel/new release will work https://stat.ethz.ch/pipermail/r-sig-debian/2018-April/002829.html https://github.com/r-spatial/sf/issues/729 https://github.com/Rdatatable/data.table/issues/2801 --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index aee4c81..ee18f4c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,6 @@ r: - devel r_packages: ncdf4 sudo: false -cache: packages addons: apt: update: true From 96ae41fce845f5262c418431c78cc49f0f4c6d4a Mon Sep 17 00:00:00 2001 From: isteves <25118334+isteves@users.noreply.github.com> Date: Wed, 9 May 2018 15:27:58 -0700 Subject: [PATCH 131/318] Try `update: true` Added `cache: packages` back in --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index ee18f4c..aee4c81 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,7 @@ r: - devel r_packages: ncdf4 sudo: false +cache: packages addons: apt: update: true From b1a7fc2d04c14f0e80fe0837a98118a86cdbc9e7 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 14 May 2018 11:22:04 -0700 Subject: [PATCH 132/318] Update maintenance.md with more details on releases --- MAINTENANCE.md | 51 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/MAINTENANCE.md b/MAINTENANCE.md index 82bdc9a..34ca32c 100644 --- a/MAINTENANCE.md +++ b/MAINTENANCE.md @@ -7,30 +7,53 @@ It's a work in progress so expect it to change and, hopefully, get better and mo ## Releases -### When to release? +### Why release + +So users can use new features. Ideally, no one is installing from source, i.e., `remotes::install_github("nceas/arcticdatautils")`. + +### When to release Whenever, really. -Since this package isn't on CRAN, we can release it as much as we like and we're only bothering people we know and those people can tell us how displeased they are with how often we're releasing this package in person. +Since this package isn't on CRAN, you can release it as much as you like or need to. +You might want to release either when: + +- You accrue enough changes to write an interest release announcement ("Hey, look at his cool new release that fixes annoying bug X!") +- You accrue at least one change and users of the package need the fix immediately + +### How to release -### How to release? +There are a few steps in releasing a new version of the package: -- Increment the `Version` tag in the `DESCRIPTION` file to the appropriate next version. +1. Increment the `Version` tag in the `DESCRIPTION` file to the appropriate next version. + + What this is set to specifies what the user sees from R when they run `sessionInfo()` or `devtools::session_info()` and tell you what version they have installed. This package tries to use [Semantic Versioning](https://semver.org/) (semver) which can be summarized in three bullets: - + > Given a version number MAJOR.MINOR.PATCH, increment the: > > - MAJOR version when you make incompatible API changes, > - MINOR version when you add functionality in a backwards-compatible manner, and > - PATCH version when you make backwards-compatible bug fixes. - + Note: A common mistake people make is thinking that the next version after 0.9 is 1.0, but it could be 0.10, then 0.11, and so on. -- Make and push a commit with just that diff. + `git` and GitHub helps us a lot with determining _what_ has changed so we can determine what the next release version number should be. We can compare a previous release to `master` to get a list of all commits what were made between that release and now: - Example here: https://github.com/NCEAS/arcticdatautils/commit/87f91179f4820ecdb283672e2179984d4f6cd334 + > https://github.com/NCEAS/arcticdatautils/compare/v0.6.3...master -- Go to [the releases tab](https://github.com/NCEAS/arcticdatautils/releases) and click "Draft a new release" + +- Make and push a commit with just that diff, + + ```sh + git add DESCRIPTION + git commit -m "vx.y.z" + git push + ``` + + [Example here](https://github.com/NCEAS/arcticdatautils/commit/87f91179f4820ecdb283672e2179984d4f6cd334). + +2. Go to [the releases tab](https://github.com/NCEAS/arcticdatautils/releases) and click "Draft a new release" - Tag version and Release title should match v{MAJOR}.{MINOR}.{PATCH}, e.g., v6.4.5 - The release description should include: @@ -39,7 +62,15 @@ Since this package isn't on CRAN, we can release it as much as we like and we're Example: https://github.com/NCEAS/arcticdatautils/releases/tag/v0.6.2 -- You're done, now go tell people to upgrade! + - Make liberal use of GitHub's Compare feature: [Example](https://github.com/NCEAS/arcticdatautils/compare/v0.6.3...master) comparing `v0.6.3` to `master`. + +You're done, now go tell people to upgrade! + +```r +remotes::install_github("nceas/arcticdatautils@*release") +``` + +Note: `@*release` specifies that the latest release should be installed. ## Pull Requests From 3555412f3e8152b7848cf92483257d5d54bf16af Mon Sep 17 00:00:00 2001 From: Adam Reevesman Date: Thu, 17 May 2018 16:59:45 -0700 Subject: [PATCH 133/318] created pid_to_eml_entity function --- NAMESPACE | 1 + R/eml.R | 142 ++++++++++++++++++++++++++++++++- man/pid_to_eml_entity.Rd | 50 ++++++++++++ man/pid_to_eml_other_entity.Rd | 6 +- 4 files changed, 193 insertions(+), 6 deletions(-) create mode 100644 man/pid_to_eml_entity.Rd diff --git a/NAMESPACE b/NAMESPACE index 2c57263..70a142d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -41,6 +41,7 @@ export(new_uuid) export(object_exists) export(parse_resource_map) export(pid_to_eml_datatable) +export(pid_to_eml_entity) export(pid_to_eml_other_entity) export(pid_to_eml_physical) export(publish_object) diff --git a/R/eml.R b/R/eml.R index d6df844..ee6f7a9 100644 --- a/R/eml.R +++ b/R/eml.R @@ -3,6 +3,146 @@ #' Helpers for creating EML. +#' Create EML dataTable or otherEntity objects for a set of PIDs +#' +#' Note this is a useful alternative to pid_to_eml_datatable and +#' pid_to_eml_other_entity because it can create multiple of objects at once. +#' +#' @param mn (MNode) Member Node where the PID is associated with an object. +#' @param pids (character) The PIDs of the objects to create the entities for. Either a vector or list of names of the objects. Note that \code{pids[i]} or \code{pids[[i]]} must be a character vector of length one. +#' @param entityType (character) What kind of objects to create from the input. Either "dataTable" or "otherEntity". +#' @param names (character) Either a vector or list of names of the objects. Note that \code{names[i]} or \code{names[[i]]} must be \code{NA} or a character vector of length one and must correspond to \code{pids[i]} or \code{pids[[i]]}. +#' @param descriptions (character) Either a vector or list of names of the objects. Note that \code{descriptions[i]} or \code{descriptions[[i]]} must be \code{NA} if no entityDescription is needed for a particular pid or a character vector of length one and must correspond to \code{pids[i]} or \code{pids[[i]]}. +#' @param attributes (list) Required if entityType is set to "dataTable". \code{NULL} if entityType is set to "otherEntity" and no attribute tables are needed. A list of attribute tables. Note that \code{attributes[[i]]} must be a dataframe or possibly \code{NA} in the case where \code{entityType} is set to "otherEntity" and must correspond to \code{pids[i]} or \code{pids[[i]]}. +#' @param factors (list) A list of enumerated domain tables. \code{NULL} if no enumerated domains exist. Note that \code{factors[[i]]} must be a dataframe or \code{NA} in the case where enumerated domains are not present in the data. Must correspond to \code{pids[i]} or \code{pids[[i]]}. +#' @param validateAttributes (logical) If set to FALSE or if attributes are not passed into the function, attribute validation test will not run. +#' +#' @return (list) The otherEntity or dataTable object(s) +#' @export +#' +#' @examples +#' \dontrun{ +#' #Generate EML otherEntities for four pids +#' TEST_pid_to_eml_entity(mn, +#' entityType = 'otherEntity', +#' pids = c('pid1', 'pid2', 'pid3', 'pid4'), +#' names = c('name1', 'name2', 'name3', 'name4'), +#' descriptions = c('description1', 'description2', +#' 'description3', 'description4'), +#' attributes = list(atTbl1, atTbl2, atTbl3, atTbl4), +#' factors = rep(factors1, factors2, NA, NA))) +#' } +pid_to_eml_entity <- function(mn, + pids, + entityType, + names, + descriptions = NULL, + attributes = NULL, + factors = NULL, + validateAttributes = TRUE) { + + stopifnot(is(mn, "MNode")) + stopifnot(is.character(pids), + all(nchar(pids)) > 0) + + if ( (length(pids) != length(attributes)) & (!is.null(attributes)) ){ + stop(call. = FALSE, + "'attributes' must be NULL or have same length as pids") + } + if ( (length(pids) != length(factors)) & (!is.null(factors)) ){ + stop(call. = FALSE, + "'factors' must be NULL or have same length as pids") + } + if ( (length(pids) != length(names)) & (!is.null(names)) ){ + stop(call. = FALSE, + "'names' must be NULL or have same length as pids") + } + if ( (length(pids) != length(descriptions)) & (!is.null(descriptions)) ){ + stop(call. = FALSE, + "'descriptions' must be NULL or have same length as pids") + } + + + work <- function(i, some_list){ + + mn <- some_list$mn + entity_type <- some_list$entity_type + pid <- some_list$pid[[i]] + name <- some_list$name[[i]] + + description = some_list$description[[i]] + + + attribute_table <- l$attribute_table[[i]] + factors_table <- l$factors_table[[i]] + + entity <- new(Class = entity_type) + entity@scope <- new("xml_attribute", "document") + entity@physical@.Data[[1]] <- pid_to_eml_physical(mn, pid)[[1]] + if (entityType == "otherEntity"){ + entity@entityType <- "other" + } + + if (is.na(name)) { + stop(call. = FALSE, + paste("'Name' of entity ", i," must be specified in the function call", sep = '')) + } + + entity@entityName <- name + + if (!is.na(description)) { + entity@entityDescription <- description + } + + if(class(attribute_table) == "data.frame") { + stopifnot(is.data.frame(attribute_table)) + + if (class(factors_table) != "data.frame") { + attribute_list <- set_attributes(attribute_table) + } + else { + stopifnot(is.data.frame(factors_table)) + attribute_list <- set_attributes(attribute_table, factors_table) + } + + if (validateAttributes == TRUE) { + stopifnot(eml_validate_attributes(attribute_list)) + } + + entity@attributeList <- attribute_list + } + + else { + if (entity_type == "dataTable"){ + stop(call. = FALSE, + "An attribute table must be provided when creating a dataTable") + } + } + + entity + } + + l <- list(mn = mn, + entity_type = entityType, + pid = as.list(pids), + name = as.list(names), + description = ifelse(rep(is.null(descriptions),length(pids)), + rep(list(NA), + length(pids)), + as.list(descriptions)), + attribute_table = ifelse(rep(is.null(attributes),length(pids)), + rep(list(NA), + length(pids)), + attributes), + factors_table = ifelse(rep(is.null(factors),length(pids)), + rep(list(NA), + length(pids)), + factors)) + + lapply(seq_along(as.list(pids)), work, some_list = l) +} + + #' Create EML otherEntity objects for a set of PIDs #' #' Note this is a wrapper around sysmeta_to_other_entity which handles the task of @@ -660,7 +800,7 @@ eml_project <- function(title, # Title titles <- lapply(title, function(x) { as(x, "title") }) project@title <- as(titles, "ListOftitle") - + # Personnel if(!all(sapply(personnelList, function(x) { is(x, "personnel") }))) { stop(call. = FALSE, diff --git a/man/pid_to_eml_entity.Rd b/man/pid_to_eml_entity.Rd new file mode 100644 index 0000000..8faa7e1 --- /dev/null +++ b/man/pid_to_eml_entity.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{pid_to_eml_entity} +\alias{pid_to_eml_entity} +\title{eml.R} +\usage{ +pid_to_eml_entity(mn, pids, entityType, names, descriptions = NULL, + attributes = NULL, factors = NULL, validateAttributes = TRUE) +} +\arguments{ +\item{mn}{(MNode) Member Node where the PID is associated with an object.} + +\item{pids}{(character) The PIDs of the objects to create the entities for. Either a vector or list of names of the objects. Note that \code{pids[i]} or \code{pids[[i]]} must be a character vector of length one.} + +\item{entityType}{(character) What kind of objects to create from the input. Either "dataTable" or "otherEntity".} + +\item{names}{(character) Either a vector or list of names of the objects. Note that \code{names[i]} or \code{names[[i]]} must be \code{NA} or a character vector of length one and must correspond to \code{pids[i]} or \code{pids[[i]]}.} + +\item{descriptions}{(character) Either a vector or list of names of the objects. Note that \code{descriptions[i]} or \code{descriptions[[i]]} must be \code{NA} if no entityDescription is needed for a particular pid or a character vector of length one and must correspond to \code{pids[i]} or \code{pids[[i]]}.} + +\item{attributes}{(list) Required if entityType is set to "dataTable". \code{NULL} if entityType is set to "otherEntity" and no attribute tables are needed. A list of attribute tables. Note that \code{attributes[[i]]} must be a dataframe or possibly \code{NA} in the case where \code{entityType} is set to "otherEntity" and must correspond to \code{pids[i]} or \code{pids[[i]]}.} + +\item{factors}{(list) A list of enumerated domain tables. \code{NULL} if no enumerated domains exist. Note that \code{factors[[i]]} must be a dataframe or \code{NA} in the case where enumerated domains are not present in the data. Must correspond to \code{pids[i]} or \code{pids[[i]]}.} + +\item{validateAttributes}{(logical) If set to FALSE or if attributes are not passed into the function, attribute validation test will not run.} +} +\value{ +(list) The otherEntity or dataTable object(s) +} +\description{ +Helpers for creating EML. +Create EML dataTable or otherEntity objects for a set of PIDs +} +\details{ +Note this is a useful alternative to pid_to_eml_datatable and +pid_to_eml_other_entity because it can create multiple of objects at once. +} +\examples{ +\dontrun{ +#Generate EML otherEntities for four pids +TEST_pid_to_eml_entity(mn, + entityType = 'otherEntity', + pids = c('pid1', 'pid2', 'pid3', 'pid4'), + names = c('name1', 'name2', 'name3', 'name4'), + descriptions = c('description1', 'description2', + 'description3', 'description4'), + attributes = list(atTbl1, atTbl2, atTbl3, atTbl4), + factors = rep(factors1, factors2, NA, NA))) +} +} diff --git a/man/pid_to_eml_other_entity.Rd b/man/pid_to_eml_other_entity.Rd index d371226..1ec482f 100644 --- a/man/pid_to_eml_other_entity.Rd +++ b/man/pid_to_eml_other_entity.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/eml.R \name{pid_to_eml_other_entity} \alias{pid_to_eml_other_entity} -\title{eml.R} +\title{Create EML otherEntity objects for a set of PIDs} \usage{ pid_to_eml_other_entity(mn, pids) } @@ -15,10 +15,6 @@ pid_to_eml_other_entity(mn, pids) (list of otherEntity) The otherEntity object(s) } \description{ -Helpers for creating EML. -Create EML otherEntity objects for a set of PIDs -} -\details{ Note this is a wrapper around sysmeta_to_other_entity which handles the task of creating the EML otherEntity. } From 879edbcc12f49536855ac888010b9f84daf5ccce Mon Sep 17 00:00:00 2001 From: Adam Reevesman Date: Thu, 17 May 2018 17:15:19 -0700 Subject: [PATCH 134/318] cleaned up whitespace --- R/eml.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R/eml.R b/R/eml.R index ee6f7a9..0f93e88 100644 --- a/R/eml.R +++ b/R/eml.R @@ -62,17 +62,13 @@ pid_to_eml_entity <- function(mn, "'descriptions' must be NULL or have same length as pids") } - work <- function(i, some_list){ mn <- some_list$mn entity_type <- some_list$entity_type pid <- some_list$pid[[i]] name <- some_list$name[[i]] - description = some_list$description[[i]] - - attribute_table <- l$attribute_table[[i]] factors_table <- l$factors_table[[i]] From 75dddb383ed3fb6c2b1a4479f9b680dd483bc244 Mon Sep 17 00:00:00 2001 From: Jesse Goldstein Date: Fri, 25 May 2018 10:11:12 -0700 Subject: [PATCH 135/318] updated to reflect newest version: v0.6.4 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1cddca5..1691b4d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: arcticdatautils Title: Utilities for the Arctic Data Center -Version: 0.6.3.9000 +Version: 0.6.4 Authors@R: c( person("Bryce", "Mecum", email = "mecum@nceas.ucsb.edu", role = c("aut", "cre")), person("Matt", "Jones", email = "jones@nceas.ucsb.edu", role = "ctb"), From d9b00b20fc6f5df7a06431b9eb7d87f3971fd530 Mon Sep 17 00:00:00 2001 From: maier-m Date: Fri, 25 May 2018 13:04:36 -0700 Subject: [PATCH 136/318] updated example --- NAMESPACE | 4 - R/eml.R | 334 ++++++----------------------- man/pid_to_eml_datatable.Rd | 18 +- man/pid_to_eml_entity.Rd | 42 ++-- man/pid_to_eml_other_entity.Rd | 15 +- man/set_other_entities.Rd | 16 +- man/sysmeta_to_eml_other_entity.Rd | 15 +- tests/testthat/test_eml.R | 65 +++--- 8 files changed, 124 insertions(+), 385 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 70a142d..869a3fa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,9 +40,7 @@ export(mdq_run) export(new_uuid) export(object_exists) export(parse_resource_map) -export(pid_to_eml_datatable) export(pid_to_eml_entity) -export(pid_to_eml_other_entity) export(pid_to_eml_physical) export(publish_object) export(publish_update) @@ -50,12 +48,10 @@ export(remove_public_read) export(set_abstract) export(set_access) export(set_file_name) -export(set_other_entities) export(set_public_read) export(set_rights_and_access) export(set_rights_holder) export(show_indexing_status) -export(sysmeta_to_eml_other_entity) export(sysmeta_to_eml_physical) export(update_object) export(update_resource_map) diff --git a/R/eml.R b/R/eml.R index 0f93e88..212138a 100644 --- a/R/eml.R +++ b/R/eml.R @@ -3,172 +3,88 @@ #' Helpers for creating EML. -#' Create EML dataTable or otherEntity objects for a set of PIDs -#' -#' Note this is a useful alternative to pid_to_eml_datatable and -#' pid_to_eml_other_entity because it can create multiple of objects at once. +#' Create EML entity from a DataONE pid #' #' @param mn (MNode) Member Node where the PID is associated with an object. -#' @param pids (character) The PIDs of the objects to create the entities for. Either a vector or list of names of the objects. Note that \code{pids[i]} or \code{pids[[i]]} must be a character vector of length one. -#' @param entityType (character) What kind of objects to create from the input. Either "dataTable" or "otherEntity". -#' @param names (character) Either a vector or list of names of the objects. Note that \code{names[i]} or \code{names[[i]]} must be \code{NA} or a character vector of length one and must correspond to \code{pids[i]} or \code{pids[[i]]}. -#' @param descriptions (character) Either a vector or list of names of the objects. Note that \code{descriptions[i]} or \code{descriptions[[i]]} must be \code{NA} if no entityDescription is needed for a particular pid or a character vector of length one and must correspond to \code{pids[i]} or \code{pids[[i]]}. -#' @param attributes (list) Required if entityType is set to "dataTable". \code{NULL} if entityType is set to "otherEntity" and no attribute tables are needed. A list of attribute tables. Note that \code{attributes[[i]]} must be a dataframe or possibly \code{NA} in the case where \code{entityType} is set to "otherEntity" and must correspond to \code{pids[i]} or \code{pids[[i]]}. -#' @param factors (list) A list of enumerated domain tables. \code{NULL} if no enumerated domains exist. Note that \code{factors[[i]]} must be a dataframe or \code{NA} in the case where enumerated domains are not present in the data. Must correspond to \code{pids[i]} or \code{pids[[i]]}. -#' @param validateAttributes (logical) If set to FALSE or if attributes are not passed into the function, attribute validation test will not run. -#' -#' @return (list) The otherEntity or dataTable object(s) +#' @param pid (character) The PID of the object to create the sub-tree for. +#' @param entityType (character) What kind of objects to create from the input. Either "dataTable", +#' "spatialRaster", "spatialVector", "storedProcedure", "view", "otherEntity". +#' @param ... (optional) Additional arguments to be passed to \code{new(entityType, ...)}. See example +#' +#' @return (list) The entity object #' @export #' #' @examples #' \dontrun{ -#' #Generate EML otherEntities for four pids -#' TEST_pid_to_eml_entity(mn, -#' entityType = 'otherEntity', -#' pids = c('pid1', 'pid2', 'pid3', 'pid4'), -#' names = c('name1', 'name2', 'name3', 'name4'), -#' descriptions = c('description1', 'description2', -#' 'description3', 'description4'), -#' attributes = list(atTbl1, atTbl2, atTbl3, atTbl4), -#' factors = rep(factors1, factors2, NA, NA))) +#' #Generate EML otherEntity +#' pid_to_eml_entity(mn, +#' pid, +#' entityType = "otherEntity", +#' entityName = "Entity Name", +#' entityDescription = "Description about entity") +#' #' } pid_to_eml_entity <- function(mn, - pids, - entityType, - names, - descriptions = NULL, - attributes = NULL, - factors = NULL, - validateAttributes = TRUE) { + pid, + entityType = "otherEntity", + ...) { stopifnot(is(mn, "MNode")) - stopifnot(is.character(pids), - all(nchar(pids)) > 0) - - if ( (length(pids) != length(attributes)) & (!is.null(attributes)) ){ - stop(call. = FALSE, - "'attributes' must be NULL or have same length as pids") - } - if ( (length(pids) != length(factors)) & (!is.null(factors)) ){ - stop(call. = FALSE, - "'factors' must be NULL or have same length as pids") - } - if ( (length(pids) != length(names)) & (!is.null(names)) ){ - stop(call. = FALSE, - "'names' must be NULL or have same length as pids") - } - if ( (length(pids) != length(descriptions)) & (!is.null(descriptions)) ){ - stop(call. = FALSE, - "'descriptions' must be NULL or have same length as pids") - } - - work <- function(i, some_list){ - - mn <- some_list$mn - entity_type <- some_list$entity_type - pid <- some_list$pid[[i]] - name <- some_list$name[[i]] - description = some_list$description[[i]] - attribute_table <- l$attribute_table[[i]] - factors_table <- l$factors_table[[i]] - - entity <- new(Class = entity_type) - entity@scope <- new("xml_attribute", "document") - entity@physical@.Data[[1]] <- pid_to_eml_physical(mn, pid)[[1]] - if (entityType == "otherEntity"){ - entity@entityType <- "other" - } - - if (is.na(name)) { - stop(call. = FALSE, - paste("'Name' of entity ", i," must be specified in the function call", sep = '')) - } + stopifnot(is.character(pid), + nchar(pid) > 0) - entity@entityName <- name + stopifnot(entityType %in% c("dataTable", + "spatialRaster", + "spatialVector", + "storedProcedure", + "view", + "otherEntity")) - if (!is.na(description)) { - entity@entityDescription <- description - } + systmeta <- getSystemMetadata(mn, pid) + physical <- sysmeta_to_eml_physical(systmeta) - if(class(attribute_table) == "data.frame") { - stopifnot(is.data.frame(attribute_table)) + # Create entity + entity <- new(entityType, + physical = pid_to_eml_physical(mn, pid), + ...) - if (class(factors_table) != "data.frame") { - attribute_list <- set_attributes(attribute_table) - } - else { - stopifnot(is.data.frame(factors_table)) - attribute_list <- set_attributes(attribute_table, factors_table) - } + # Set entity slots + if (length(slot(entity, "id")) == 0) { + entity@id <- new("xml_attribute", systmeta@identifier) + } - if (validateAttributes == TRUE) { - stopifnot(eml_validate_attributes(attribute_list)) - } + if (length(slot(entity, "scope")) == 0) { + entity@scope <- new("xml_attribute", "document") + } - entity@attributeList <- attribute_list - } + if (length(slot(entity, "entityName")) == 0) { - else { - if (entity_type == "dataTable"){ - stop(call. = FALSE, - "An attribute table must be provided when creating a dataTable") - } + if (!is.na(systmeta@fileName)) { + entity@entityName <- new("entityName", systmeta@fileName) } + } - entity + if (entityType == "otherEntity" && length(slot(entity, "entityType")) == 0) { + entity@entityType <- "Other" } - l <- list(mn = mn, - entity_type = entityType, - pid = as.list(pids), - name = as.list(names), - description = ifelse(rep(is.null(descriptions),length(pids)), - rep(list(NA), - length(pids)), - as.list(descriptions)), - attribute_table = ifelse(rep(is.null(attributes),length(pids)), - rep(list(NA), - length(pids)), - attributes), - factors_table = ifelse(rep(is.null(factors),length(pids)), - rep(list(NA), - length(pids)), - factors)) - - lapply(seq_along(as.list(pids)), work, some_list = l) + return(entity) } -#' Create EML otherEntity objects for a set of PIDs -#' -#' Note this is a wrapper around sysmeta_to_other_entity which handles the task of -#' creating the EML otherEntity. +#' This function is deprecated. See \link{pid_to_eml_entity}. #' #' @param mn (MNode) Member Node where the PID is associated with an object. #' @param pids (character) The PID of the object to create the sub-tree for. #' -#' @return (list of otherEntity) The otherEntity object(s) -#' @export -#' -#' @examples -#' \dontrun{ -#' # Generate EML otherEntity objects for all the data in a package -#' pkg <- get_package(mn, pid) -#' pid_to_other_entity(mn, pkg$data) -#' } pid_to_eml_other_entity <- function(mn, pids) { - stopifnot(is(mn, "MNode")) - stopifnot(is.character(pids), - all(nchar(pids)) > 0) - - sysmeta <- lapply(pids, function(pid) { getSystemMetadata(mn, pid) }) - sysmeta_to_eml_other_entity(sysmeta) + .Deprecated(new = "pid_to_eml_entity", + package = "arcticdtautils", + old = "pid_to_eml_other_entity") } -#' Create an EML code\code{dataTable} object for a given PID. -#' -#' This function generates an \code{attributeList} and \code{physical} and constructs a \code{dataTable}. +#' This function is deprecated. See \link{pid_to_eml_entity}. #' #' @param mn (MNode) Member Node where the PID is associated with an object. #' @param pid (character) The PID of the object to create the \code{dataTable} for. @@ -176,19 +92,8 @@ pid_to_eml_other_entity <- function(mn, pids) { #' @param factors (data.frame) Optional data frame of enumerated attribute values (factors). Follows the convention in \link[EML]{set_attributes}. #' @param name (character) Optional field to specify \code{entityName}, otherwise will be extracted from system metadata. #' @param description (character) Optional field to specify \code{entityDescription}, otherwise will match name. -#' @param validateAttributes (logical) If set to FALSE or if attributes are not passed into the function, attribute validation test will not run. +#' @param validateAttributes (logical) If set to FALSE or if attributes are not passed into the function, attribute validatio #' -#' @return (dataTable) The \code{dataTable} object -#' @export -#' -#' @examples -#' \dontrun{ -#' # Generate a dataTable for a given pid -#' attributes <- create_dummy_attributes_dataframe(10) -#' name <- "1234.csv" -#' description <- "A description of this entity." -#' dataTable <- pid_to_eml_datatable(mn, pid, attributes, name=name, description=description) -#' } pid_to_eml_datatable <- function(mn, pid, attributes = NULL, @@ -196,45 +101,9 @@ pid_to_eml_datatable <- function(mn, name = NULL, description = NULL, validateAttributes = TRUE) { - stopifnot(is(mn, "MNode")) - stopifnot(is.character(pid), - nchar(pid) > 0) - - dataTable <- new("dataTable", physical = pid_to_eml_physical(mn, pid)) - - if(!is.null(attributes)) { - stopifnot(is.data.frame(attributes)) - - if (is.null(factors)) { - attributes <- set_attributes(attributes) - } else { - stopifnot(is.data.frame(factors)) - attributes <- set_attributes(attributes, factors) - } - - if (validateAttributes == TRUE) { - stopifnot(eml_validate_attributes(attributes)) - } - - dataTable@attributeList <- attributes - } - - if (is.null(name)) { - name <- getSystemMetadata(mn, pid)@fileName - - if (is.na(name)) { - stop(call. = FALSE, - "'Name' must either be specified in the function call or must exist in the system metadata.") - } - } - - dataTable@entityName <- name - - if (!is.null(description)) { - dataTable@entityDescription <- description - } - - dataTable + .Deprecated(new = "pid_to_eml_entity", + package = "arcticdtautils", + old = "pid_to_eml_other_entity") } @@ -264,45 +133,14 @@ pid_to_eml_physical <- function(mn, pids) { sysmeta_to_eml_physical(sysmeta) } -#' Create an EML otherEntity for the given object from the System Metadata +#' This function is deprecated. See \link{pid_to_eml_entity}. #' #' @param sysmeta (SystemMetadata) One or more System Metadata objects #' -#' @return (list of otherEntity) The otherEntity object(s) -#' @export -#' -#' @examples -#' \dontrun{ -#' # Generate EML otherEntity objects for all the data in a package -#' pkg <- get_package(mn, pid) -#' sm <- lapply(pkg$data, function(pid) { getSystemMetadata(mn, pid) }) -#' sysmeta_to_other_entity(sm) -#'} sysmeta_to_eml_other_entity <- function(sysmeta) { - work <- function(x) { - other_entity <- new("otherEntity") - other_entity@id <- new("xml_attribute", x@identifier) - other_entity@scope <- new("xml_attribute", "document") - - if (is.na(x@fileName)) { - other_entity@entityName <- new("entityName", "NA") - } - else { - other_entity@entityName <- new("entityName", x@fileName) - } - - other_entity@entityType <- "Other" - - phys <- sysmeta_to_eml_physical(x) - other_entity@physical <- new("ListOfphysical", phys) - - other_entity - } - - - if (!is(sysmeta, "list")) sysmeta <- list(sysmeta) - - lapply(sysmeta, work) + .Deprecated(new = "pid_to_eml_entity", + package = "arcticdtautils", + old = "sysmeta_to_other_entity") } @@ -372,56 +210,16 @@ sysmeta_to_eml_physical <- function(sysmeta) { lapply(sysmeta, work) } -#' Creates and sets EML otherEntity elements to an existing EML document, -#' replacing any existing otherEntities -#' -#' This function is slow because it needs get the System Metadata for each -#' element of `pids` in order to get the fileName, checksum, etc. +#' This function is deprecated. See \link{pid_to_eml_entity}. #' #' @param mn (MNode) The Member Node the objects exist on. #' @param path (character) The location on disk of the EML file. #' @param pids (character) One or more PIDs for the objects. #' -#' @return (character) The path to the updated EML file. -#' @export -#' -#' @examples -#' \dontrun{ -#' mn <- MNode(...) # Set up a connection to an MN -#' eml_path <- "/path/to/your/eml.xml" -#' set_other_entities(mn, eml_path, "a_data_pid") -#' } set_other_entities <- function(mn, path, pids) { - stopifnot(is(mn, "MNode")) - stopifnot(file.exists(path)) - stopifnot(all(is.character(pids)), - all(nchar(pids) > 0)) - - if (length(pids) == 0) { - message("Skipped adding EML otherEntity elements because no pids were specified.") - return(path) - } - - # Get the metadata document from the MN and load it as an EML document - doc <- EML::read_eml(path) - stopifnot(is(doc, "eml")) - - message("Setting EML otherEntity elements. This can take a while if there are lots of PIDs...") - - # Generate otherEntity elements - other_entities <- pid_to_eml_other_entity(mn, pids) - - # Concatenate the existing and new otherEntity elements and put back in the - # EML - if (length(other_entities) > 0) { - doc@dataset@otherEntity <- new("ListOfotherEntity", other_entities) - } - - # Write the modified document back to disk and stop - EML::write_eml(doc, path) - stopifnot(EML::eml_validate(path) == TRUE) - - path + .Deprecated(new = "pid_to_eml_entity", + package = "arcticdtautils", + old = "set_other_entities") } #' Get the Metacat docid for the given identifier @@ -739,10 +537,10 @@ eml_individual_name <- function(given_names=NULL, sur_name) { stopifnot(all(sapply(given_names, is.character))) stopifnot(all(lengths(given_names) > 0)) - givens <- lapply(given_names, function(given_name) { - x <- new("givenName") - x@.Data <- given_name - x + givens <- lapply(given_names, function(given_name) { + x <- new("givenName") + x@.Data <- given_name + x }) indiv_name@givenName <- new("ListOfgivenName", givens) diff --git a/man/pid_to_eml_datatable.Rd b/man/pid_to_eml_datatable.Rd index 7aee3b7..55020c1 100644 --- a/man/pid_to_eml_datatable.Rd +++ b/man/pid_to_eml_datatable.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/eml.R \name{pid_to_eml_datatable} \alias{pid_to_eml_datatable} -\title{Create an EML code\code{dataTable} object for a given PID.} +\title{This function is deprecated. See \link{pid_to_eml_entity}.} \usage{ pid_to_eml_datatable(mn, pid, attributes = NULL, factors = NULL, name = NULL, description = NULL, validateAttributes = TRUE) @@ -20,20 +20,8 @@ pid_to_eml_datatable(mn, pid, attributes = NULL, factors = NULL, \item{description}{(character) Optional field to specify \code{entityDescription}, otherwise will match name.} -\item{validateAttributes}{(logical) If set to FALSE or if attributes are not passed into the function, attribute validation test will not run.} -} -\value{ -(dataTable) The \code{dataTable} object +\item{validateAttributes}{(logical) If set to FALSE or if attributes are not passed into the function, attribute validatio} } \description{ -This function generates an \code{attributeList} and \code{physical} and constructs a \code{dataTable}. -} -\examples{ -\dontrun{ -# Generate a dataTable for a given pid -attributes <- create_dummy_attributes_dataframe(10) -name <- "1234.csv" -description <- "A description of this entity." -dataTable <- pid_to_eml_datatable(mn, pid, attributes, name=name, description=description) -} +This function is deprecated. See \link{pid_to_eml_entity}. } diff --git a/man/pid_to_eml_entity.Rd b/man/pid_to_eml_entity.Rd index 8faa7e1..3d9e4e1 100644 --- a/man/pid_to_eml_entity.Rd +++ b/man/pid_to_eml_entity.Rd @@ -4,47 +4,33 @@ \alias{pid_to_eml_entity} \title{eml.R} \usage{ -pid_to_eml_entity(mn, pids, entityType, names, descriptions = NULL, - attributes = NULL, factors = NULL, validateAttributes = TRUE) +pid_to_eml_entity(mn, pid, entityType = "otherEntity", ...) } \arguments{ \item{mn}{(MNode) Member Node where the PID is associated with an object.} -\item{pids}{(character) The PIDs of the objects to create the entities for. Either a vector or list of names of the objects. Note that \code{pids[i]} or \code{pids[[i]]} must be a character vector of length one.} +\item{pid}{(character) The PID of the object to create the sub-tree for.} -\item{entityType}{(character) What kind of objects to create from the input. Either "dataTable" or "otherEntity".} +\item{entityType}{(character) What kind of objects to create from the input. Either "dataTable", +"spatialRaster", "spatialVector", "storedProcedure", "view", "otherEntity".} -\item{names}{(character) Either a vector or list of names of the objects. Note that \code{names[i]} or \code{names[[i]]} must be \code{NA} or a character vector of length one and must correspond to \code{pids[i]} or \code{pids[[i]]}.} - -\item{descriptions}{(character) Either a vector or list of names of the objects. Note that \code{descriptions[i]} or \code{descriptions[[i]]} must be \code{NA} if no entityDescription is needed for a particular pid or a character vector of length one and must correspond to \code{pids[i]} or \code{pids[[i]]}.} - -\item{attributes}{(list) Required if entityType is set to "dataTable". \code{NULL} if entityType is set to "otherEntity" and no attribute tables are needed. A list of attribute tables. Note that \code{attributes[[i]]} must be a dataframe or possibly \code{NA} in the case where \code{entityType} is set to "otherEntity" and must correspond to \code{pids[i]} or \code{pids[[i]]}.} - -\item{factors}{(list) A list of enumerated domain tables. \code{NULL} if no enumerated domains exist. Note that \code{factors[[i]]} must be a dataframe or \code{NA} in the case where enumerated domains are not present in the data. Must correspond to \code{pids[i]} or \code{pids[[i]]}.} - -\item{validateAttributes}{(logical) If set to FALSE or if attributes are not passed into the function, attribute validation test will not run.} +\item{...}{(optional) Additional arguments to be passed to \code{new(entityType, ...)}. See example} } \value{ -(list) The otherEntity or dataTable object(s) +(list) The entity object } \description{ Helpers for creating EML. -Create EML dataTable or otherEntity objects for a set of PIDs -} -\details{ -Note this is a useful alternative to pid_to_eml_datatable and -pid_to_eml_other_entity because it can create multiple of objects at once. +Create EML entity from a DataONE pid } \examples{ \dontrun{ -#Generate EML otherEntities for four pids -TEST_pid_to_eml_entity(mn, - entityType = 'otherEntity', - pids = c('pid1', 'pid2', 'pid3', 'pid4'), - names = c('name1', 'name2', 'name3', 'name4'), - descriptions = c('description1', 'description2', - 'description3', 'description4'), - attributes = list(atTbl1, atTbl2, atTbl3, atTbl4), - factors = rep(factors1, factors2, NA, NA))) +#Generate EML otherEntity +pid_to_eml_entity(mn, + pid, + entityType = "otherEntity", + entityName = "Entity Name", + entityDescription = "Description about entity") + } } diff --git a/man/pid_to_eml_other_entity.Rd b/man/pid_to_eml_other_entity.Rd index 1ec482f..ebc80fa 100644 --- a/man/pid_to_eml_other_entity.Rd +++ b/man/pid_to_eml_other_entity.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/eml.R \name{pid_to_eml_other_entity} \alias{pid_to_eml_other_entity} -\title{Create EML otherEntity objects for a set of PIDs} +\title{This function is deprecated. See \link{pid_to_eml_entity}.} \usage{ pid_to_eml_other_entity(mn, pids) } @@ -11,17 +11,6 @@ pid_to_eml_other_entity(mn, pids) \item{pids}{(character) The PID of the object to create the sub-tree for.} } -\value{ -(list of otherEntity) The otherEntity object(s) -} \description{ -Note this is a wrapper around sysmeta_to_other_entity which handles the task of -creating the EML otherEntity. -} -\examples{ -\dontrun{ -# Generate EML otherEntity objects for all the data in a package -pkg <- get_package(mn, pid) -pid_to_other_entity(mn, pkg$data) -} +This function is deprecated. See \link{pid_to_eml_entity}. } diff --git a/man/set_other_entities.Rd b/man/set_other_entities.Rd index a5c9f40..e1c2ec4 100644 --- a/man/set_other_entities.Rd +++ b/man/set_other_entities.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/eml.R \name{set_other_entities} \alias{set_other_entities} -\title{Creates and sets EML otherEntity elements to an existing EML document, -replacing any existing otherEntities} +\title{This function is deprecated. See \link{pid_to_eml_entity}.} \usage{ set_other_entities(mn, path, pids) } @@ -14,17 +13,6 @@ set_other_entities(mn, path, pids) \item{pids}{(character) One or more PIDs for the objects.} } -\value{ -(character) The path to the updated EML file. -} \description{ -This function is slow because it needs get the System Metadata for each -element of `pids` in order to get the fileName, checksum, etc. -} -\examples{ -\dontrun{ -mn <- MNode(...) # Set up a connection to an MN -eml_path <- "/path/to/your/eml.xml" -set_other_entities(mn, eml_path, "a_data_pid") -} +This function is deprecated. See \link{pid_to_eml_entity}. } diff --git a/man/sysmeta_to_eml_other_entity.Rd b/man/sysmeta_to_eml_other_entity.Rd index a3d094d..2cde59e 100644 --- a/man/sysmeta_to_eml_other_entity.Rd +++ b/man/sysmeta_to_eml_other_entity.Rd @@ -2,24 +2,13 @@ % Please edit documentation in R/eml.R \name{sysmeta_to_eml_other_entity} \alias{sysmeta_to_eml_other_entity} -\title{Create an EML otherEntity for the given object from the System Metadata} +\title{This function is deprecated. See \link{pid_to_eml_entity}.} \usage{ sysmeta_to_eml_other_entity(sysmeta) } \arguments{ \item{sysmeta}{(SystemMetadata) One or more System Metadata objects} } -\value{ -(list of otherEntity) The otherEntity object(s) -} \description{ -Create an EML otherEntity for the given object from the System Metadata -} -\examples{ -\dontrun{ -# Generate EML otherEntity objects for all the data in a package -pkg <- get_package(mn, pid) -sm <- lapply(pkg$data, function(pid) { getSystemMetadata(mn, pid) }) -sysmeta_to_other_entity(sm) -} +This function is deprecated. See \link{pid_to_eml_entity}. } diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index d93fb19..9967a54 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -97,47 +97,52 @@ test_that("a project can be created with multiple personnel, an abstract can be expect_equal(xml2::xml_text(project@funding@para[[2]]@.Data[[1]]), "I won a second award, wow") }) -test_that("an other entity can be added from a pid", { +test_that("a dataTable and otherEntity can be added from a pid", { if (!is_token_set(mn)) { skip("No token set. Skipping test.") } data_path <- tempfile() writeLines(LETTERS, data_path) - pid <- publish_object(mn, data_path, "text/plain") - - eml_path <- tempfile() - file.copy(file.path(system.file(package = "arcticdatautils"), "example-eml.xml"), eml_path) - doc <- EML::read_eml(eml_path) - doc@dataset@otherEntity <- new("ListOfotherEntity", list()) + pid <- publish_object(mn, data_path, "text/csv") - set_other_entities(mn, eml_path, pid) + eml_path <- file.path(system.file(package = "arcticdatautils"), "example-eml.xml") doc <- EML::read_eml(eml_path) - testthat::expect_length(doc@dataset@otherEntity, 1) -}) + dummy_factors <- c("factor 1", "factor 2") + dummy_attributes <- create_dummy_attributes_dataframe(10, dummy_factors) + dummy_enumeratedDomain <- create_dummy_enumeratedDomain_dataframe(dummy_factors) + + dummy_attributeList <- EML::set_attributes(dummy_attributes, factors = dummy_enumeratedDomain) + dummy_entityName <- "Test_Name" + dummy_entityDescription <- "Test_Description" + + # Create an otherEntity + OE <- pid_to_eml_entity(mn, pid, + entityName = dummy_entityName, + entityDescription = dummy_entityDescription, + attributeList = dummy_attributeList) + expect_s4_class(OE, "otherEntity") + expect_true(slot(OE, "entityName") == dummy_entityName) + expect_true(slot(OE, "entityDescription") == dummy_entityDescription) + + # Create a dataTable + DT <- pid_to_eml_entity(mn, pid, + entityType = "dataTable", + entityName = dummy_entityName, + entityDescription = dummy_entityDescription, + attributeList = dummy_attributeList) + expect_s4_class(DT, "dataTable") + expect_true(slot(DT, "entityName") == dummy_entityName) + expect_true(slot(DT, "entityDescription") == dummy_entityDescription) + + doc@dataset@otherEntity[[1]] <- OE + expect_true(EML::eml_validate(doc)) + + doc@dataset@dataTable[[1]] <- DT + expect_true(EML::eml_validate(doc)) -test_that("a data table can be added from a pid", { - if (!is_token_set(mn)) { - skip("No token set. Skipping test.") - } - - data_path <- tempfile() - writeLines(LETTERS, data_path) - pid <- publish_object(mn, data_path, "text/csv") - - eml_path <- file.path(system.file(package = "arcticdatautils"), "example-eml.xml") - - doc <- EML::read_eml(eml_path) - - factors <- c("factor 1", "factor 2") - dummy_attributes <- create_dummy_attributes_dataframe(10, factors) - dummy_enumeratedDomain <- create_dummy_enumeratedDomain_dataframe(factors) - doc@dataset@dataTable <- as(list(pid_to_eml_datatable(mn, pid, dummy_attributes, dummy_enumeratedDomain)), "ListOfdataTable") - - testthat::expect_length(doc@dataset@dataTable, 1) - unlink(data_path) }) From 9623fb8dd7825296a547d637fdb8fae69114b263 Mon Sep 17 00:00:00 2001 From: Sharis Ochs Date: Tue, 12 Jun 2018 10:33:22 -0700 Subject: [PATCH 137/318] Added checks within function so it does not create dummy objects on a production node --- R/helpers.R | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/R/helpers.R b/R/helpers.R index 36a2fcc..3561958 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -18,6 +18,12 @@ #' pid <- create_dummy_metadata(mn) #' } create_dummy_metadata <- function(mn, data_pids=NULL) { + + # Make sure the node is not a production node + if (mn@env == "prod") { + stop('Can not create dummy metadata on production node.') + } + pid <- paste0("urn:uuid:", uuid::UUIDgenerate()) me <- get_token_subject() @@ -69,6 +75,12 @@ create_dummy_metadata <- function(mn, data_pids=NULL) { #' pid <- create_dummy_object(mn) #'} create_dummy_object <- function(mn) { + + # Make sure the node is not a production node + if (mn@env == "prod") { + stop('Can not create dummy object on production node.') + } + pid <- paste0("urn:uuid:", uuid::UUIDgenerate()) me <- get_token_subject() tmp <- tempfile() @@ -117,6 +129,12 @@ create_dummy_object <- function(mn) { #' pids <- create_dummy_package(mn, 6) #' } create_dummy_package <- function(mn, size = 2) { + + # Make sure the node is not a production node + if (mn@env == "prod") { + stop('Can not create dummy package on production node.') + } + me <- get_token_subject() # Data objects @@ -183,6 +201,12 @@ create_dummy_package <- function(mn, size = 2) { # create_dummy_parent_package(mn, child_pid) #'} create_dummy_parent_package <- function(mn, children) { + + # Make sure the node is not a production node + if (mn@env == "prod") { + stop('Can not create dummy parent package on production node.') + } + me <- get_token_subject() meta_pid <- create_dummy_metadata(mn) @@ -232,13 +256,13 @@ create_dummy_parent_package <- function(mn, children) { #' } create_dummy_attributes_dataframe <- function(numberAttributes, factors = NULL) { names <- vapply(seq_len(numberAttributes), function(x) { paste0("Attribute ", x)}, "") - + if(!is.null(factors)) { domains <- c(rep("textDomain", numberAttributes - length(factors)), rep("enumeratedDomain", length(factors))) names[seq((numberAttributes - length(factors) + 1), numberAttributes)] <- factors } - + attributes <- data.frame(attributeName = names, attributeDefinition = names, measurementScale = rep("nominal", numberAttributes), @@ -250,7 +274,7 @@ create_dummy_attributes_dataframe <- function(numberAttributes, factors = NULL) missingValueCode = rep(NA, numberAttributes), missingValueCodeExplanation = rep(NA, numberAttributes), stringsAsFactors = FALSE) - + attributes } @@ -258,12 +282,12 @@ create_dummy_attributes_dataframe <- function(numberAttributes, factors = NULL) #' #' @param factors (character) Vector of factor names to include. #' -#' @return (data.frame) Data frame of factors +#' @return (data.frame) Data frame of factors #' @export #' #' @examples #' \dontrun{ -#' # Create dummy dataframe of 2 factors/enumerated domains +#' # Create dummy dataframe of 2 factors/enumerated domains #' attributes <- create_dummy_enumeratedDomain_dataframe(c("Factor1", "Factor2")) #' } create_dummy_enumeratedDomain_dataframe <- function(factors) { @@ -271,6 +295,6 @@ create_dummy_enumeratedDomain_dataframe <- function(factors) { enumeratedDomains <- data.frame(attributeName = names, code = paste0(names, seq_along(names)), definition = names) - + enumeratedDomains } From a70a23b20b175261217d13eceaa80eb0c134c843 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Thu, 5 Jul 2018 16:59:43 -0700 Subject: [PATCH 138/318] initial eml_otherEnt_to_dt commit --- R/eml.R | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/R/eml.R b/R/eml.R index 212138a..c3ca8df 100644 --- a/R/eml.R +++ b/R/eml.R @@ -979,3 +979,31 @@ eml_add_entities <- function(doc, doc } + +#' Convert otherEntities to dataTables +#' +#' Convert an EML 'otherEntity' object to a 'dataTable' object +#' +#' @param eml (S4) An EML S4 object +#' @param otherEntity (S4 / character) Either an EML otherEntity object or the index +#' of an otherEntity within a ListOfotherEntity +#' +#' @author Dominic Mullen dmullen17@@gmail.com +#' +#' @export +eml_otherEntity_to_dataTable <- function(eml, otherEntity) { + # argument checks + + # convert otherEntity to dataTable + dt <- capture.output(otherEntity) %>% + str_trim() %>% + str_replace_all("otherEntity", "dataTable") %>% + paste(sep = "", collapse = "") %>% + read_eml() + + # Add dt to bottom of dt list + + # delete otherEntity and update the lsit + + # return eml +} From 06fd03b41caf62d0b01c7cb762db342d527a25f8 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Fri, 6 Jul 2018 10:40:22 -0700 Subject: [PATCH 139/318] eml_otherEntity_to_dataTable function --- R/eml.R | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/R/eml.R b/R/eml.R index c3ca8df..00c5a2a 100644 --- a/R/eml.R +++ b/R/eml.R @@ -982,28 +982,53 @@ eml_add_entities <- function(doc, #' Convert otherEntities to dataTables #' -#' Convert an EML 'otherEntity' object to a 'dataTable' object +#' Convert an EML 'otherEntity' object to a 'dataTable' object. It converts the +#' otherEntity as currently constructed - it does not add a physical or add attributes. +#' However, if these are already in their respective slots, they will be retained. #' #' @param eml (S4) An EML S4 object #' @param otherEntity (S4 / character) Either an EML otherEntity object or the index #' of an otherEntity within a ListOfotherEntity +#' @param validate_eml (logical) Optional. Specify whether or not to validate the eml after +#' completion. Defaults to \code{TRUE}. Recommended setting is \code{TRUE}. Setting this to +#' \code{FALSE} reduces execution time by ~50%. #' #' @author Dominic Mullen dmullen17@@gmail.com #' #' @export -eml_otherEntity_to_dataTable <- function(eml, otherEntity) { - # argument checks +eml_otherEntity_to_dataTable <- function(eml, otherEntity, eml_validate = TRUE) { + ## Argument checks + stopifnot(isS4(eml)) + stopifnot(any(is.numeric(otherEntity), methods::is(otherEntity, "otherEntity"))) + + ## Handle different inputs for 'otherEntity' + if (is.numeric(otherEntity)) { + index <- otherEntity + otherEntity <- eml@dataset@otherEntity[[index]] + } else { + index <- datamgmt::which_in_eml(eml@dataset@otherEntity, + "entityName", + otherEntity@entityName) + } - # convert otherEntity to dataTable + ## convert otherEntity to dataTable dt <- capture.output(otherEntity) %>% str_trim() %>% str_replace_all("otherEntity", "dataTable") %>% paste(sep = "", collapse = "") %>% read_eml() - # Add dt to bottom of dt list + ## Add dt to bottom of dt list + type <- "dataTable" + slot(eml@dataset, type) <- new(paste0("ListOf", type), c(slot(eml@dataset, type), + new(paste0("ListOf", type), list(dt)))) - # delete otherEntity and update the lsit + ## delete otherEntity from list + eml@dataset@otherEntity[[index]] <- NULL - # return eml + ## return eml + if (validate_eml == TRUE) { + eml_validate(eml) + } + return(eml) } From 34199e20056625577856bec963977038432db01f Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Fri, 6 Jul 2018 11:40:28 -0700 Subject: [PATCH 140/318] added unit tests --- R/eml.R | 6 +++++- tests/testthat/test_eml.R | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/R/eml.R b/R/eml.R index 00c5a2a..53d58e4 100644 --- a/R/eml.R +++ b/R/eml.R @@ -996,10 +996,11 @@ eml_add_entities <- function(doc, #' @author Dominic Mullen dmullen17@@gmail.com #' #' @export -eml_otherEntity_to_dataTable <- function(eml, otherEntity, eml_validate = TRUE) { +eml_otherEntity_to_dataTable <- function(eml, otherEntity, validate_eml = TRUE) { ## Argument checks stopifnot(isS4(eml)) stopifnot(any(is.numeric(otherEntity), methods::is(otherEntity, "otherEntity"))) + stopifnot(is.logical(validate_eml)) ## Handle different inputs for 'otherEntity' if (is.numeric(otherEntity)) { @@ -1009,6 +1010,9 @@ eml_otherEntity_to_dataTable <- function(eml, otherEntity, eml_validate = TRUE) index <- datamgmt::which_in_eml(eml@dataset@otherEntity, "entityName", otherEntity@entityName) + if (length(index) > 1) { + stop("Duplicate 'entityName' found in 'eml@dataset@otherEntity', please use a numeric index (1, 2, etc.) to specify which 'otherEntity' you would like to convert.") + } } ## convert otherEntity to dataTable diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 9967a54..1813833 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -146,5 +146,41 @@ test_that("a dataTable and otherEntity can be added from a pid", { unlink(data_path) }) +test_that("eml_otherEntity_to_dataTable fails gracefully", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) + + # incorrect inputs + expect_error(eml_otherEntity_to_dataTable("dummy input")) + expect_error(eml_otherEntity_to_dataTable(eml, "1")) + + # subscripts out of bounds + expect_error(eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[2]])) + expect_error(eml_otherEntity_to_dataTable(eml, 2)) + + # Duplicate entityNames found + eml@dataset@otherEntity[[2]] <- eml@dataset@otherEntity[[1]] + expect_error(eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[1]])) + +}) + +test_that("eml_otherEntity_to_dataTable fails gracefully", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) + otherEntity <- eml@dataset@otherEntity[[1]] + eml <- eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[1]]) + # test that otherEntity was removed + expect_length(eml@dataset@otherEntity, 0) + + # test that dataTable was added + expect_equal(otherEntity@entityName, eml@dataset@dataTable[[1]]@entityName) + expect_equivalent(otherEntity@physical, eml@dataset@dataTable[[1]]@physical) +}) From b482759b73079c24a09ba9848d0603c314a01e2d Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Fri, 6 Jul 2018 11:51:45 -0700 Subject: [PATCH 141/318] added examples --- R/eml.R | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/R/eml.R b/R/eml.R index 53d58e4..8991b33 100644 --- a/R/eml.R +++ b/R/eml.R @@ -987,8 +987,8 @@ eml_add_entities <- function(doc, #' However, if these are already in their respective slots, they will be retained. #' #' @param eml (S4) An EML S4 object -#' @param otherEntity (S4 / character) Either an EML otherEntity object or the index -#' of an otherEntity within a ListOfotherEntity +#' @param otherEntity (S4 / integer) Either an EML otherEntity object or the index +#' of an otherEntity within a ListOfotherEntity. Integer input is recommended. #' @param validate_eml (logical) Optional. Specify whether or not to validate the eml after #' completion. Defaults to \code{TRUE}. Recommended setting is \code{TRUE}. Setting this to #' \code{FALSE} reduces execution time by ~50%. @@ -996,10 +996,22 @@ eml_add_entities <- function(doc, #' @author Dominic Mullen dmullen17@@gmail.com #' #' @export +#' +#' @examples +#' \dontrun{ +#' eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) +#' +#' # The following two calls are equivalent: +#' eml <- eml_otherEntity_to_dataTable(eml, eml@@dataset@@otherEntity[[1]]) +#' eml <- eml_otherEntity_to_dataTable(eml, 1) +#' +#' # Integer input is recommended: +#' eml <- eml_otherEntity_to_dataTable(eml, 1) +#' } eml_otherEntity_to_dataTable <- function(eml, otherEntity, validate_eml = TRUE) { ## Argument checks stopifnot(isS4(eml)) - stopifnot(any(is.numeric(otherEntity), methods::is(otherEntity, "otherEntity"))) + stopifnot(any(is.integer(otherEntity), methods::is(otherEntity, "otherEntity"))) stopifnot(is.logical(validate_eml)) ## Handle different inputs for 'otherEntity' From 332d1a733e791eee584ffff8dee2d0d9018b689e Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Fri, 6 Jul 2018 11:52:29 -0700 Subject: [PATCH 142/318] updated documentation --- NAMESPACE | 1 + ...create_dummy_enumeratedDomain_dataframe.Rd | 2 +- man/eml_otherEntity_to_dataTable.Rd | 38 +++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 man/eml_otherEntity_to_dataTable.Rd diff --git a/NAMESPACE b/NAMESPACE index 869a3fa..a4f19d4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(eml_contact) export(eml_creator) export(eml_individual_name) export(eml_metadata_provider) +export(eml_otherEntity_to_dataTable) export(eml_party) export(eml_personnel) export(eml_project) diff --git a/man/create_dummy_enumeratedDomain_dataframe.Rd b/man/create_dummy_enumeratedDomain_dataframe.Rd index d65bc43..9cc8411 100644 --- a/man/create_dummy_enumeratedDomain_dataframe.Rd +++ b/man/create_dummy_enumeratedDomain_dataframe.Rd @@ -17,7 +17,7 @@ Create dummy enumeratedDomain data frame } \examples{ \dontrun{ -# Create dummy dataframe of 2 factors/enumerated domains +# Create dummy dataframe of 2 factors/enumerated domains attributes <- create_dummy_enumeratedDomain_dataframe(c("Factor1", "Factor2")) } } diff --git a/man/eml_otherEntity_to_dataTable.Rd b/man/eml_otherEntity_to_dataTable.Rd new file mode 100644 index 0000000..fe49fe4 --- /dev/null +++ b/man/eml_otherEntity_to_dataTable.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_otherEntity_to_dataTable} +\alias{eml_otherEntity_to_dataTable} +\title{Convert otherEntities to dataTables} +\usage{ +eml_otherEntity_to_dataTable(eml, otherEntity, validate_eml = TRUE) +} +\arguments{ +\item{eml}{(S4) An EML S4 object} + +\item{otherEntity}{(S4 / integer) Either an EML otherEntity object or the index +of an otherEntity within a ListOfotherEntity. Integer input is recommended.} + +\item{validate_eml}{(logical) Optional. Specify whether or not to validate the eml after +completion. Defaults to \code{TRUE}. Recommended setting is \code{TRUE}. Setting this to +\code{FALSE} reduces execution time by ~50%.} +} +\description{ +Convert an EML 'otherEntity' object to a 'dataTable' object. It converts the +otherEntity as currently constructed - it does not add a physical or add attributes. +However, if these are already in their respective slots, they will be retained. +} +\examples{ +\dontrun{ +eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) + +# The following two calls are equivalent: +eml <- eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[1]]) +eml <- eml_otherEntity_to_dataTable(eml, 1) + +# Integer input is recommended: +eml <- eml_otherEntity_to_dataTable(eml, 1) +} +} +\author{ +Dominic Mullen dmullen17@gmail.com +} From 0ca6da560339d3c153e2035d925955cc6c99d665 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Fri, 6 Jul 2018 12:05:08 -0700 Subject: [PATCH 143/318] travis fixes --- DESCRIPTION | 1 + R/eml.R | 14 +++++++------- man/eml_otherEntity_to_dataTable.Rd | 6 +++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1691b4d..e1c8308 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,5 +37,6 @@ Suggests: rmarkdown, yaml, xslt +Remotes: NCEAS/datamgmt RoxygenNote: 6.0.1 VignetteBuilder: knitr diff --git a/R/eml.R b/R/eml.R index 8991b33..520cc20 100644 --- a/R/eml.R +++ b/R/eml.R @@ -982,8 +982,8 @@ eml_add_entities <- function(doc, #' Convert otherEntities to dataTables #' -#' Convert an EML 'otherEntity' object to a 'dataTable' object. It converts the -#' otherEntity as currently constructed - it does not add a physical or add attributes. +#' Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an +#' otherEntity objectas currently constructed - it does not add a physical or add attributes. #' However, if these are already in their respective slots, they will be retained. #' #' @param eml (S4) An EML S4 object @@ -991,7 +991,7 @@ eml_add_entities <- function(doc, #' of an otherEntity within a ListOfotherEntity. Integer input is recommended. #' @param validate_eml (logical) Optional. Specify whether or not to validate the eml after #' completion. Defaults to \code{TRUE}. Recommended setting is \code{TRUE}. Setting this to -#' \code{FALSE} reduces execution time by ~50%. +#' \code{FALSE} reduces execution time by ~ 50 percent. #' #' @author Dominic Mullen dmullen17@@gmail.com #' @@ -1028,11 +1028,11 @@ eml_otherEntity_to_dataTable <- function(eml, otherEntity, validate_eml = TRUE) } ## convert otherEntity to dataTable - dt <- capture.output(otherEntity) %>% - str_trim() %>% - str_replace_all("otherEntity", "dataTable") %>% + dt <- utils::capture.output(otherEntity) %>% + stringr::str_trim() %>% + stringr::str_replace_all("otherEntity", "dataTable") %>% paste(sep = "", collapse = "") %>% - read_eml() + EML::read_eml() ## Add dt to bottom of dt list type <- "dataTable" diff --git a/man/eml_otherEntity_to_dataTable.Rd b/man/eml_otherEntity_to_dataTable.Rd index fe49fe4..83a1164 100644 --- a/man/eml_otherEntity_to_dataTable.Rd +++ b/man/eml_otherEntity_to_dataTable.Rd @@ -14,11 +14,11 @@ of an otherEntity within a ListOfotherEntity. Integer input is recommended.} \item{validate_eml}{(logical) Optional. Specify whether or not to validate the eml after completion. Defaults to \code{TRUE}. Recommended setting is \code{TRUE}. Setting this to -\code{FALSE} reduces execution time by ~50%.} +\code{FALSE} reduces execution time by ~ 50 percent.} } \description{ -Convert an EML 'otherEntity' object to a 'dataTable' object. It converts the -otherEntity as currently constructed - it does not add a physical or add attributes. +Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an +otherEntity objectas currently constructed - it does not add a physical or add attributes. However, if these are already in their respective slots, they will be retained. } \examples{ From f8e18ecd48c996c5a32206195174efc3e72af48c Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Fri, 6 Jul 2018 12:31:36 -0700 Subject: [PATCH 144/318] more travis fixes --- DESCRIPTION | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e1c8308..fbb8792 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,11 +14,13 @@ Description: A set of utilites for working with the Arctic Data Center Depends: R (>= 3.2.3) Imports: - digest, + datamgmt, dataone, datapack, + digest, EML, httr, + magrittr, methods, stringr, stringi, From b52283138614c01870ac171f4388dbc1b24a4bb1 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Fri, 6 Jul 2018 13:10:23 -0700 Subject: [PATCH 145/318] added mitchell's function --- DESCRIPTION | 2 - NAMESPACE | 1 + R/eml.R | 74 +++++++++++++++++++++++++++++++++++ man/which_in_eml.Rd | 41 ++++++++++++++++++++ tests/testthat/test_eml.R | 81 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 197 insertions(+), 2 deletions(-) create mode 100644 man/which_in_eml.Rd diff --git a/DESCRIPTION b/DESCRIPTION index fbb8792..e332b2e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,6 @@ Description: A set of utilites for working with the Arctic Data Center Depends: R (>= 3.2.3) Imports: - datamgmt, dataone, datapack, digest, @@ -39,6 +38,5 @@ Suggests: rmarkdown, yaml, xslt -Remotes: NCEAS/datamgmt RoxygenNote: 6.0.1 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index a4f19d4..16dada6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -57,6 +57,7 @@ export(sysmeta_to_eml_physical) export(update_object) export(update_resource_map) export(view_profile) +export(which_in_eml) import(EML) import(XML) import(dataone) diff --git a/R/eml.R b/R/eml.R index 520cc20..b795ba0 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1048,3 +1048,77 @@ eml_otherEntity_to_dataTable <- function(eml, otherEntity, validate_eml = TRUE) } return(eml) } + + +#' Search through EMLs +#' +#' This function returns indices within an EML list that contain an instance where \code{test == TRUE}. See examples for more information. +#' +#' @import EML +#' @param eml_list (S4/List) an EML list object +#' @param element (character) element to evaluate +#' @param test (function/character) A function to evaluate (see examples). If test is a character, will evaluate if \code{element == test} (see example 1). +#' +#' @keywords eml +#' +#' @author Mitchell Maier mitchell.maier@@gmail.com +#' +#' @examples +#' \dontrun{ +#' # Question: Which creators have a surName "Smith"? +#' n <- which_in_eml(eml@@dataset@@creator, "surName", "Smith") +#' # Answer: eml@@dataset@@creator[n] +#' +#' # Question: Which dataTables have an entityName that begins with "2016" +#' n <- which_in_eml(eml@@dataset@@dataTable, "entityName", function(x) {grepl("^2016", x)}) +#' # Answer: eml@@dataset@@dataTable[n] +#' +#' # Question: Which attributes in dataTable[[1]] have a numberType "natural"? +#' n <- which_in_eml(eml@@dataset@@dataTable[[1]]@@attributeList@@attribute, "numberType", "natural") +#' # Answer: eml@@dataset@@dataTable[[1]]@@attributeList@@attribute[n] +#' +#' #' # Question: Which dataTables have at least one attribute with a numberType "natural"? +#' n <- which_in_eml(eml@@dataset@@dataTable, "numberType", function(x) {"natural" %in% x}) +#' # Answer: eml@@dataset@@dataTable[n] +#' } +#' @export +#' +which_in_eml <- function(eml_list, element, test) { + + stopifnot(isS4(eml_list)) + stopifnot(methods::is(eml_list,"list")) + stopifnot(is.character(element)) + + if (is.character(test)) { + value = test + test = function(x) {x == value} + + } else { + stopifnot(is.function(test)) + } + + # Find location + location <- unlist(lapply(seq_along(eml_list), function(i) { + elements_test <- unlist(EML::eml_get(eml_list[[i]], element)) + + if (is.null(elements_test)) { + out <- NULL + + } else { + result <- test(elements_test) + + if (length(result) > 1) { + stop("Test must only return one value.") + + } else if (result == TRUE) { + out <- i + + } else { + out <- NULL + } + } + return(out) + })) + + return(location) +} diff --git a/man/which_in_eml.Rd b/man/which_in_eml.Rd new file mode 100644 index 0000000..4e56266 --- /dev/null +++ b/man/which_in_eml.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{which_in_eml} +\alias{which_in_eml} +\title{Search through EMLs} +\usage{ +which_in_eml(eml_list, element, test) +} +\arguments{ +\item{eml_list}{(S4/List) an EML list object} + +\item{element}{(character) element to evaluate} + +\item{test}{(function/character) A function to evaluate (see examples). If test is a character, will evaluate if \code{element == test} (see example 1).} +} +\description{ +This function returns indices within an EML list that contain an instance where \code{test == TRUE}. See examples for more information. +} +\examples{ +\dontrun{ +# Question: Which creators have a surName "Smith"? +n <- which_in_eml(eml@dataset@creator, "surName", "Smith") +# Answer: eml@dataset@creator[n] + +# Question: Which dataTables have an entityName that begins with "2016" +n <- which_in_eml(eml@dataset@dataTable, "entityName", function(x) {grepl("^2016", x)}) +# Answer: eml@dataset@dataTable[n] + +# Question: Which attributes in dataTable[[1]] have a numberType "natural"? +n <- which_in_eml(eml@dataset@dataTable[[1]]@attributeList@attribute, "numberType", "natural") +# Answer: eml@dataset@dataTable[[1]]@attributeList@attribute[n] + +#' # Question: Which dataTables have at least one attribute with a numberType "natural"? +n <- which_in_eml(eml@dataset@dataTable, "numberType", function(x) {"natural" \%in\% x}) +# Answer: eml@dataset@dataTable[n] +} +} +\author{ +Mitchell Maier mitchell.maier@gmail.com +} +\keyword{eml} diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 1813833..06e39b8 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -184,3 +184,84 @@ test_that("eml_otherEntity_to_dataTable fails gracefully", { expect_equal(otherEntity@entityName, eml@dataset@dataTable[[1]]@entityName) expect_equivalent(otherEntity@physical, eml@dataset@dataTable[[1]]@physical) }) + +test_that("which_in_eml Returns correct locations", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + attributes <- + data.frame( + attributeName = c( + "length_1", + "time_2", + "length_3"), + attributeDefinition = c( + "def 1", + "def 2", + "def 3"), + formatString = c( + NA, + NA, + NA), + measurementScale = c( + "ratio", + "ratio", + "ratio"), + domain = c( + "numericDomain", + "numericDomain", + "numericDomain"), + definition = c( + NA, + NA, + NA), + unit = c( + "meter", + "second", + "meter"), + numberType = c( + "real", + "real", + "real"), + stringsAsFactors = FALSE + ) + + attributeList <- EML::set_attributes(attributes) + + dataTable_1 <- new("dataTable", + entityName = "2016_data.csv", + entityDescription = "2016 data", + attributeList = attributeList) + + dataTable_2 <- dataTable_1 + + dataTable_3 <- new("dataTable", + entityName = "2015_data.csv", + entityDescription = "2016 data", + attributeList = attributeList) + + creator_1 <- new("creator", + individualName = new("individualName", + surName = "LAST", + givenName = "FIRST")) + creator_2 <- new("creator", + individualName = new("individualName", + surName = "LAST", + givenName = "FIRST_2")) + creator_3 <- creator_2 + + title <- "Title" + + dataset <- new("dataset", + title = title, + creator = c(creator_1, creator_2, creator_3), + dataTable = c(dataTable_1, dataTable_2, dataTable_3)) + + eml <- new("eml", + dataset = dataset) + + expect_equal(c(2,3), which_in_eml(eml@dataset@creator, "givenName", "FIRST_2")) + expect_error(which_in_eml(eml@dataset@dataTable, "attributeName", "length_3")) + expect_equal(c(1,3), which_in_eml(eml@dataset@dataTable[[1]]@attributeList@attribute, "attributeName", function(x) {grepl("^length", x)})) +}) From 0012d2855f11642fb2394ca955240f3ff8c4c3be Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Fri, 6 Jul 2018 13:13:30 -0700 Subject: [PATCH 146/318] syntax updates --- R/eml.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/eml.R b/R/eml.R index b795ba0..cf2e850 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1010,7 +1010,7 @@ eml_add_entities <- function(doc, #' } eml_otherEntity_to_dataTable <- function(eml, otherEntity, validate_eml = TRUE) { ## Argument checks - stopifnot(isS4(eml)) + stopifnot(methods::is(eml, "eml")) stopifnot(any(is.integer(otherEntity), methods::is(otherEntity, "otherEntity"))) stopifnot(is.logical(validate_eml)) @@ -1019,9 +1019,9 @@ eml_otherEntity_to_dataTable <- function(eml, otherEntity, validate_eml = TRUE) index <- otherEntity otherEntity <- eml@dataset@otherEntity[[index]] } else { - index <- datamgmt::which_in_eml(eml@dataset@otherEntity, - "entityName", - otherEntity@entityName) + index <- which_in_eml(eml@dataset@otherEntity, + "entityName", + otherEntity@entityName) if (length(index) > 1) { stop("Duplicate 'entityName' found in 'eml@dataset@otherEntity', please use a numeric index (1, 2, etc.) to specify which 'otherEntity' you would like to convert.") } From 946bb03b08bd5a7ce1b0ccce9b4dcbeaee5333a3 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Fri, 6 Jul 2018 14:20:48 -0700 Subject: [PATCH 147/318] Update eml.R --- R/eml.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/eml.R b/R/eml.R index cf2e850..bb13e95 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1011,7 +1011,7 @@ eml_add_entities <- function(doc, eml_otherEntity_to_dataTable <- function(eml, otherEntity, validate_eml = TRUE) { ## Argument checks stopifnot(methods::is(eml, "eml")) - stopifnot(any(is.integer(otherEntity), methods::is(otherEntity, "otherEntity"))) + stopifnot(any(is.numeric(otherEntity), methods::is(otherEntity, "otherEntity"))) stopifnot(is.logical(validate_eml)) ## Handle different inputs for 'otherEntity' From 30e572ebd2848018425534b9a64f10b74c53a70c Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Wed, 11 Jul 2018 13:09:34 -0700 Subject: [PATCH 148/318] added checks for current versions --- R/editing.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/R/editing.R b/R/editing.R index c1edeb1..c6f0856 100644 --- a/R/editing.R +++ b/R/editing.R @@ -311,6 +311,18 @@ publish_update <- function(mn, stopifnot(all(is.character(parent_child_pids))) } + # Check that resource_map_pid, and metadata_pid are the current versions + meta_versions <- get_all_versions(mn, metadata_pid) + current_meta_version <- tail(metadata_versions, 1) + if (metadata_pid != current_meta_version) { + stop("metadata_pid is already obsoleted by: ", current_meta_version) + } + rm_versions <- get_all_versions(mn, resource_map_pid) + current_rm_version <- tail(rm_versions, 1) + if (resource_map_pid != current_rm_version) { + stop("resource_map_pid is already obsoleted by: ", current_rm_version) + } + # Check to see if the obsoleted package is in the list of parent_child_pids # If it is notify the user and remove it from the list if (resource_map_pid %in% parent_child_pids) { From bff972dc90bf18a6b584860670325e2a3263c4b7 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Wed, 11 Jul 2018 13:18:26 -0700 Subject: [PATCH 149/318] switched checks to getSystemMetadata calls --- R/editing.R | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/R/editing.R b/R/editing.R index c6f0856..612da91 100644 --- a/R/editing.R +++ b/R/editing.R @@ -312,15 +312,13 @@ publish_update <- function(mn, } # Check that resource_map_pid, and metadata_pid are the current versions - meta_versions <- get_all_versions(mn, metadata_pid) - current_meta_version <- tail(metadata_versions, 1) - if (metadata_pid != current_meta_version) { - stop("metadata_pid is already obsoleted by: ", current_meta_version) - } - rm_versions <- get_all_versions(mn, resource_map_pid) - current_rm_version <- tail(rm_versions, 1) - if (resource_map_pid != current_rm_version) { - stop("resource_map_pid is already obsoleted by: ", current_rm_version) + meta_obsoletedBy <- dataone::getSystemMetadata(mn, metadata_pid)@obsoletedBy + if (!is.na(meta_obsoletedBy)) { + stop("metadata_pid is already obsoleted by: ", meta_obsoletedBy) + } + rm_obsoletedBy <- dataone::getSystemMetadata(mn, resource_map_pid)@obsoletedBy + if (!is.na(rm_obsoletedBy)) { + stop("resource_map_pid is already obsoleted by: ", rm_obsoletedBy) } # Check to see if the obsoleted package is in the list of parent_child_pids From f5079e4b6e32dc5e854baac6418135ea3ec8d6df Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Wed, 11 Jul 2018 13:43:52 -0700 Subject: [PATCH 150/318] added unit tests, moved new checks position in function --- tests/testthat/test_editing.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index 320e5f9..da2b48c 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -288,3 +288,15 @@ test_that("publish_update removes 'resource_map_pid' from 'parent_child_pids' ar expect_equal(child$resource_map, parent$child_packages) }) + +test_that("publish_update errors if the non-current resource map or metadata pid is provided", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + pkg1 <- create_dummy_package(mn) + pkg2 <- publish_update(mn, pkg1$metadata, pkg1$resource_map, pkg1$data) + + expect_error(publish_update(mn, pkg1$metadata, pkg2$resource_map, pkg2$data)) + expect_error(publish_update(mn, pkg2$metadata, pkg1$resource_map, pkg2$data)) +}) From e868981ca9a8c50abdba4e69faf3a782f88ce454 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Wed, 11 Jul 2018 13:44:10 -0700 Subject: [PATCH 151/318] travis fixes --- R/editing.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R/editing.R b/R/editing.R index 612da91..0e5d1b3 100644 --- a/R/editing.R +++ b/R/editing.R @@ -311,16 +311,6 @@ publish_update <- function(mn, stopifnot(all(is.character(parent_child_pids))) } - # Check that resource_map_pid, and metadata_pid are the current versions - meta_obsoletedBy <- dataone::getSystemMetadata(mn, metadata_pid)@obsoletedBy - if (!is.na(meta_obsoletedBy)) { - stop("metadata_pid is already obsoleted by: ", meta_obsoletedBy) - } - rm_obsoletedBy <- dataone::getSystemMetadata(mn, resource_map_pid)@obsoletedBy - if (!is.na(rm_obsoletedBy)) { - stop("resource_map_pid is already obsoleted by: ", rm_obsoletedBy) - } - # Check to see if the obsoleted package is in the list of parent_child_pids # If it is notify the user and remove it from the list if (resource_map_pid %in% parent_child_pids) { @@ -366,6 +356,16 @@ publish_update <- function(mn, stopifnot(object_exists(mn, parent_child_pids)) } + # Check that resource_map_pid, and metadata_pid are the current versions + meta_obsoletedBy <- dataone::getSystemMetadata(mn, metadata_pid)@obsoletedBy + if (!is.na(meta_obsoletedBy)) { + stop("metadata_pid is already obsoleted by: ", meta_obsoletedBy) + } + rm_obsoletedBy <- dataone::getSystemMetadata(mn, resource_map_pid)@obsoletedBy + if (!is.na(rm_obsoletedBy)) { + stop("resource_map_pid is already obsoleted by: ", rm_obsoletedBy) + } + # Prepare the response object response <- list() From 686e7bbcf66aac75d04bd6adb0c288699ffabfce Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Thu, 12 Jul 2018 10:14:30 -0700 Subject: [PATCH 152/318] revisions from bryce's comments --- R/editing.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/editing.R b/R/editing.R index 0e5d1b3..6c4409f 100644 --- a/R/editing.R +++ b/R/editing.R @@ -354,16 +354,17 @@ publish_update <- function(mn, stopifnot(object_exists(mn, parent_data_pids)) if (!is.null(parent_child_pids)) stopifnot(object_exists(mn, parent_child_pids)) + # Check for obsoleted metadata_pid + meta_obsoletedBy <- dataone::getSystemMetadata(mn, metadata_pid)@obsoletedBy + if (!is.na(meta_obsoletedBy)) { + stop("The value passed in for the argument 'metadata_pid' of '", metadata_pid, "' is already obsoleted by a newer version with PID '", meta_obsoletedBy, "'. All PID arguments to publish_update should be the latest versions of each Object series.") + } } - # Check that resource_map_pid, and metadata_pid are the current versions - meta_obsoletedBy <- dataone::getSystemMetadata(mn, metadata_pid)@obsoletedBy - if (!is.na(meta_obsoletedBy)) { - stop("metadata_pid is already obsoleted by: ", meta_obsoletedBy) - } + # Check for obsoleted resource_map_pid. The resource map and metadata can desassociate without this check. rm_obsoletedBy <- dataone::getSystemMetadata(mn, resource_map_pid)@obsoletedBy if (!is.na(rm_obsoletedBy)) { - stop("resource_map_pid is already obsoleted by: ", rm_obsoletedBy) + stop("The value passed in for the argument 'resource_map_pid' of '", resource_map_pid, "' is already obsoleted by a newer version with PID '", rm_obsoletedBy, "'. All PID arguments to publish_update should be the latest versions of each Object series.") } # Prepare the response object From 7e9b3252be3e0ee23d57f20df6af2fc266339a12 Mon Sep 17 00:00:00 2001 From: Derek Strong Date: Thu, 9 Aug 2018 15:15:18 -0700 Subject: [PATCH 153/318] Add get_all_sysmeta function --- DESCRIPTION | 2 +- R/helpers.R | 52 +++++++++++++++++++++++++++++++++++- man/create_resource_map.Rd | 4 +-- man/eml_party.Rd | 7 ++--- man/generate_resource_map.Rd | 4 +-- man/get_all_sysmeta.Rd | 40 +++++++++++++++++++++++++++ man/publish_update.Rd | 6 ++--- man/replace_subject.Rd | 3 ++- man/update_object.Rd | 3 ++- 9 files changed, 107 insertions(+), 14 deletions(-) create mode 100644 man/get_all_sysmeta.Rd diff --git a/DESCRIPTION b/DESCRIPTION index e332b2e..cc41528 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,5 +38,5 @@ Suggests: rmarkdown, yaml, xslt -RoxygenNote: 6.0.1 +RoxygenNote: 6.1.0 VignetteBuilder: knitr diff --git a/R/helpers.R b/R/helpers.R index 3561958..18c63d3 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -14,7 +14,6 @@ #' # Set environment #' cn <- CNode("STAGING2") #' mn <- getMNode(cn,"urn:node:mnTestKNB") - #' pid <- create_dummy_metadata(mn) #' } create_dummy_metadata <- function(mn, data_pids=NULL) { @@ -59,6 +58,7 @@ create_dummy_metadata <- function(mn, data_pids=NULL) { pid } + #' Create a test object. #' #' @param mn (MNode) The Member Node. @@ -112,6 +112,7 @@ create_dummy_object <- function(mn) { create_response } + #' Create a test package. #' #' @param mn (MNode) The Member Node. @@ -182,6 +183,7 @@ create_dummy_package <- function(mn, size = 2) { data = data_pids) } + #' Create a test parent package. #' #' @param mn (MNode) The Member Node. @@ -278,6 +280,7 @@ create_dummy_attributes_dataframe <- function(numberAttributes, factors = NULL) attributes } + #' Create dummy enumeratedDomain data frame #' #' @param factors (character) Vector of factor names to include. @@ -298,3 +301,50 @@ create_dummy_enumeratedDomain_dataframe <- function(factors) { enumeratedDomains } + + +#' Get system metadata for all objects in a data package +#' +#' This is a wrapper function around `dataone::getSystemMetadata` that retrieves +#' the system metadata for all objects in a data package and returns them as a list. +#' It is useful for inspecting system metadata and identifying changes where needed. +#' +#' @param node (MNode/CNode) The Coordinating/Member Node to run the query on +#' @param pid (character) The resource map PID of the package +#' +#' @return (list) A structured list of system metadata +#' +#' @examples +#'\dontrun{ +#' # Set environment +#' cn_staging <- CNode("STAGING2") +#' knb_test <- getMNode(cn_staging, "urn:node:mnTestKNB") +#' rm_pid <- "resource_map_urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" +#' +#' all <- get_all_sysmeta(knb_test, rm_pid) +#' +#' # View in RStudio data viewer to inspect +#' View(all) +#' +#' # Print specific elements to console +#' all[["Metadata"]]@rightsHolder +#' +#' # Create separate object +#' sysmeta_md <- all[["Metadata"]] +#' } +get_all_sysmeta <- function(node, pid) { + stopifnot(class(node) %in% c("MNode", "CNode")) + stopifnot(is.character(pid), nchar(pid) > 0) + stopifnot(is_resource_map(node, pid)) + + pkg <- get_package(node, pid, file_names = TRUE) + + all <- lapply(pkg$data, function(x) {dataone::getSystemMetadata(node, x)}) + all[[length(all) + 1]] <- dataone::getSystemMetadata(node, pkg$metadata) + all[[length(all) + 1]] <- dataone::getSystemMetadata(node, pkg$resource_map) + + names(all)[length(all) - 1] <- "Metadata" + names(all)[length(all)] <- "Resource_map" + + return(all) +} diff --git a/man/create_resource_map.Rd b/man/create_resource_map.Rd index a999d81..8ac7367 100644 --- a/man/create_resource_map.Rd +++ b/man/create_resource_map.Rd @@ -4,8 +4,8 @@ \alias{create_resource_map} \title{Create a resource map Object on a Member Node.} \usage{ -create_resource_map(mn, metadata_pid, data_pids = NULL, child_pids = NULL, - check_first = TRUE, ...) +create_resource_map(mn, metadata_pid, data_pids = NULL, + child_pids = NULL, check_first = TRUE, ...) } \arguments{ \item{mn}{(MNode) The Member Node} diff --git a/man/eml_party.Rd b/man/eml_party.Rd index 6abee51..f76b522 100644 --- a/man/eml_party.Rd +++ b/man/eml_party.Rd @@ -4,9 +4,10 @@ \alias{eml_party} \title{Low-level helper for creating EML parties} \usage{ -eml_party(type = "associatedParty", given_names = NULL, sur_name = NULL, - organization = NULL, position = NULL, email = NULL, phone = NULL, - address = NULL, userId = NULL, role = NULL) +eml_party(type = "associatedParty", given_names = NULL, + sur_name = NULL, organization = NULL, position = NULL, + email = NULL, phone = NULL, address = NULL, userId = NULL, + role = NULL) } \arguments{ \item{type}{(character) The type of party (e.g. 'contact')} diff --git a/man/generate_resource_map.Rd b/man/generate_resource_map.Rd index 2b56e2b..1e5a3de 100644 --- a/man/generate_resource_map.Rd +++ b/man/generate_resource_map.Rd @@ -6,8 +6,8 @@ This is a convenience wrapper around the constructor of the `ResourceMap` class from `DataPackage`.} \usage{ -generate_resource_map(metadata_pid, data_pids = NULL, child_pids = NULL, - other_statements = NULL, +generate_resource_map(metadata_pid, data_pids = NULL, + child_pids = NULL, other_statements = NULL, resolve_base = "https://cn.dataone.org/cn/v2/resolve", resource_map_pid = NULL) } diff --git a/man/get_all_sysmeta.Rd b/man/get_all_sysmeta.Rd new file mode 100644 index 0000000..d6cbfef --- /dev/null +++ b/man/get_all_sysmeta.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{get_all_sysmeta} +\alias{get_all_sysmeta} +\title{Get system metadata for all objects in a data package} +\usage{ +get_all_sysmeta(node, pid) +} +\arguments{ +\item{node}{(MNode/CNode) The Coordinating/Member Node to run the query on} + +\item{pid}{(character) The resource map PID of the package} +} +\value{ +(list) A structured list of system metadata +} +\description{ +This is a wrapper function around `dataone::getSystemMetadata` that retrieves +the system metadata for all objects in a data package and returns them as a list. +It is useful for inspecting system metadata and identifying changes where needed. +} +\examples{ +\dontrun{ +# Set environment +cn_staging <- CNode("STAGING2") +knb_test <- getMNode(cn_staging, "urn:node:mnTestKNB") +rm_pid <- "resource_map_urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" + +all <- get_all_sysmeta(knb_test, rm_pid) + +# View in RStudio data viewer to inspect +View(all) + +# Print specific elements to console +all[["Metadata"]]@rightsHolder + +# Create separate object +sysmeta_md <- all[["Metadata"]] +} +} diff --git a/man/publish_update.Rd b/man/publish_update.Rd index 76f4508..4597dc5 100644 --- a/man/publish_update.Rd +++ b/man/publish_update.Rd @@ -6,9 +6,9 @@ \usage{ publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL, child_pids = NULL, metadata_path = NULL, identifier = NULL, - use_doi = FALSE, parent_resmap_pid = NULL, parent_metadata_pid = NULL, - parent_data_pids = NULL, parent_child_pids = NULL, public = TRUE, - check_first = TRUE) + use_doi = FALSE, parent_resmap_pid = NULL, + parent_metadata_pid = NULL, parent_data_pids = NULL, + parent_child_pids = NULL, public = TRUE, check_first = TRUE) } \arguments{ \item{mn}{(MNode) The Member Node to update the object on.} diff --git a/man/replace_subject.Rd b/man/replace_subject.Rd index 963b5c2..87c7618 100644 --- a/man/replace_subject.Rd +++ b/man/replace_subject.Rd @@ -4,7 +4,8 @@ \alias{replace_subject} \title{Replace subjects in the accessPolicy section of a System Metadata entries.} \usage{ -replace_subject(sysmeta, from = "cn=arctic-data-admins,dc=dataone,dc=org", +replace_subject(sysmeta, + from = "cn=arctic-data-admins,dc=dataone,dc=org", to = "CN=arctic-data-admins,DC=dataone,DC=org") } \arguments{ diff --git a/man/update_object.Rd b/man/update_object.Rd index bc4d88b..7c525f4 100644 --- a/man/update_object.Rd +++ b/man/update_object.Rd @@ -4,7 +4,8 @@ \alias{update_object} \title{Update an object with a new file.} \usage{ -update_object(mn, pid, path, format_id = NULL, new_pid = NULL, sid = NULL) +update_object(mn, pid, path, format_id = NULL, new_pid = NULL, + sid = NULL) } \arguments{ \item{mn}{(MNode) The Member Node to update the object on.} From e700f11ec4f158db8dd70f3a15300a5da1ee2502 Mon Sep 17 00:00:00 2001 From: Derek Strong Date: Wed, 22 Aug 2018 17:41:33 -0700 Subject: [PATCH 154/318] Update get_all_sysmeta based on Solr query --- R/helpers.R | 129 +++++++++++++++++++++++++++++----- man/get_all_sysmeta.Rd | 22 +++--- tests/testthat/test_helpers.R | 25 +++++++ 3 files changed, 149 insertions(+), 27 deletions(-) diff --git a/R/helpers.R b/R/helpers.R index 18c63d3..ba53463 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -21,7 +21,7 @@ create_dummy_metadata <- function(mn, data_pids=NULL) { # Make sure the node is not a production node if (mn@env == "prod") { stop('Can not create dummy metadata on production node.') - } + } pid <- paste0("urn:uuid:", uuid::UUIDgenerate()) me <- get_token_subject() @@ -305,14 +305,17 @@ create_dummy_enumeratedDomain_dataframe <- function(factors) { #' Get system metadata for all objects in a data package #' -#' This is a wrapper function around `dataone::getSystemMetadata` that retrieves -#' the system metadata for all objects in a data package and returns them as a list. -#' It is useful for inspecting system metadata and identifying changes where needed. +#' This function retrieves the system metadata for all objects in a data package and returns them as a list. +#' It is useful for inspecting system metadata for an entire data package and identifying changes where needed. +#' +#' @param node (MNode/CNode) The Coordinating/Member Node to run the query on. +#' @param rm_pid (character) The resource map PID of the package. +#' @param nmax (numeric) Default 1000. The maximum number of object system metadata to return. The default is sufficient to get system metadata for all objects in most packages. +#' @param child_packages (logical) Default FALSE. If parent package, whether or not to include objects of child packages. #' -#' @param node (MNode/CNode) The Coordinating/Member Node to run the query on -#' @param pid (character) The resource map PID of the package +#' @return (list) A structured list of system metadata. #' -#' @return (list) A structured list of system metadata +#' @keywords internal #' #' @examples #'\dontrun{ @@ -327,24 +330,114 @@ create_dummy_enumeratedDomain_dataframe <- function(factors) { #' View(all) #' #' # Print specific elements to console -#' all[["Metadata"]]@rightsHolder +#' all[[1]]@rightsHolder #' #' # Create separate object -#' sysmeta_md <- all[["Metadata"]] +#' sysmeta_md <- all[[2]] #' } -get_all_sysmeta <- function(node, pid) { +get_all_sysmeta <- function(node, rm_pid, nmax = 1000, child_packages = FALSE) { stopifnot(class(node) %in% c("MNode", "CNode")) - stopifnot(is.character(pid), nchar(pid) > 0) - stopifnot(is_resource_map(node, pid)) + stopifnot(is.character(rm_pid), nchar(rm_pid) > 0, length(rm_pid) == 1) + stopifnot(is.numeric(nmax), length(nmax) == 1 , nmax >= 0) + stopifnot(is.logical(child_packages), length(child_packages) == 1) + stopifnot(is_resource_map(node, rm_pid)) + + query_params <- paste("identifier:", rm_pid, "+OR+resourceMap:", rm_pid, "", sep = "\"") + response <- dataone::query(node, list(q = query_params, rows = as.character(nmax))) + + if (length(response) == 0) { + stop(paste0("No results were found when searching for a package with resource map '", rm_pid, + "'.\nThis could be caused by not having appropriate access to read the resource map.")) + } - pkg <- get_package(node, pid, file_names = TRUE) + if (length(response) == nmax) { + warning("Query returned the maximum number of objects. It is possible there are more to retrieve. \nSpecify a larger number of objects with the 'nmax' argument.") + } - all <- lapply(pkg$data, function(x) {dataone::getSystemMetadata(node, x)}) - all[[length(all) + 1]] <- dataone::getSystemMetadata(node, pkg$metadata) - all[[length(all) + 1]] <- dataone::getSystemMetadata(node, pkg$resource_map) + # Check if child package + if (response[[1]]$formatType == "RESOURCE" && !is.null(response[[1]]$resourceMap)) { + message("The data package with this resource map is a child package.") + } + # Check if parent package + if (any(unlist(lapply(response[2:length(response)], function(x) ifelse(x$formatType == "RESOURCE", TRUE, FALSE))))) { + message("The data package with this resource map is a parent package.") + if (child_packages == TRUE) { + children <- Filter(function(x) x$formatType == "RESOURCE", response[2:length(response)]) + children2 <- vector("list", length(children)) + for (i in seq_along(children)) { + child_rm_pid <- children[[i]]$identifier + query_params2 <- paste("identifier:", child_rm_pid, "+OR+resourceMap:", child_rm_pid, "", sep = "\"") + children2[[i]] <- dataone::query(node, list(q = query_params2, rows = as.character(nmax))) + } + } + } - names(all)[length(all) - 1] <- "Metadata" - names(all)[length(all)] <- "Resource_map" + # Translate fields from Solr query to formal class SystemMetadata + translate <- function(x) { + sysmeta <- methods::new("SystemMetadata") + + sysmeta@serialVersion <- sysmeta@serialVersion + sysmeta@identifier <- if (is.null(x$identifier)) {sysmeta@identifier} else {x$identifier} + sysmeta@formatId <- if (is.null(x$formatId)) {sysmeta@formatId} else {x$formatId} + sysmeta@size <- if (is.null(x$size)) {sysmeta@size} else {x$size} + sysmeta@checksum <- if (is.null(x$checksum)) {sysmeta@checksum} else {x$checksum} + sysmeta@checksumAlgorithm <- if (is.null(x$checksumAlgorithm)) {sysmeta@checksumAlgorithm} else {x$checksumAlgorithm} + sysmeta@submitter <- if (is.null(x$submitter)) {sysmeta@submitter} else {x$submitter} + sysmeta@rightsHolder <- if (is.null(x$rightsHolder)) {sysmeta@rightsHolder} else {x$rightsHolder} + + read <- if (is.null(x$readPermission)) {} else {data.frame(subject = unlist(x$readPermission), + permission = "read")} + write <- if (is.null(x$writePermission)) {} else {data.frame(subject = unlist(x$writePermission), + permission = "write")} + change <- if (is.null(x$changePermission)) {} else {data.frame(subject = unlist(x$changePermission), + permission = "changePermission")} + sysmeta@accessPolicy <- rbind(read, write, change) + + sysmeta@replicationAllowed <- if (is.null(x$replicationAllowed)) {sysmeta@replicationAllowed} else {x$replicationAllowed} + sysmeta@numberReplicas <- if (is.null(x$numberReplicas)) {sysmeta@numberReplicas} else {x$numberReplicas} + sysmeta@preferredNodes <- if (is.null(x$preferredReplicationMN)) {sysmeta@preferredNodes} else {x$preferredReplicationMN} + sysmeta@blockedNodes <- if (is.null(x$blockedReplicationMN)) {sysmeta@blockedNodes} else {x$blockedReplicationMN} + sysmeta@obsoletes <- if (is.null(x$obsoletes)) {sysmeta@obsoletes} else {x$obsoletes} + sysmeta@obsoletedBy <- if (is.null(x$obsoletedBy)) {sysmeta@obsoletedBy} else {x$obsoletedBy} + sysmeta@archived <- sysmeta@archived + sysmeta@dateUploaded <- if (is.null(x$dateUploaded)) {sysmeta@dateUploaded} else {as.character(x$dateUploaded)} + sysmeta@dateSysMetadataModified <- if (is.null(x$dateModified)) {sysmeta@dateSysMetadataModified} else {as.character(x$dateModified)} + sysmeta@originMemberNode <- if (is.null(x$datasource)) {sysmeta@originMemberNode} else {x$datasource} + sysmeta@authoritativeMemberNode <- if (is.null(x$authoritativeMN)) {sysmeta@authoritativeMemberNode} else {x$authoritativeMN} + sysmeta@seriesId <- if (is.null(x$seriesId)) {sysmeta@seriesId} else {x$seriesId} + sysmeta@mediaType <- if (is.null(x$mediaType)) {sysmeta@mediaType} else {x$mediaType} + sysmeta@fileName <- if (is.null(x$fileName)) {sysmeta@fileName} else {x$fileName} + sysmeta@mediaTypeProperty <- if (is.null(x$mediaTypeProperty)) {sysmeta@mediaTypeProperty} else {x$mediaTypeProperty} + + return(sysmeta) + } + + if (child_packages == FALSE) { + all <- lapply(response, translate) + names(all) <- unlist(lapply(all, function(x) {x@fileName})) + for (i in seq_along(all)) { + if (is.na(names(all)[i])) {names(all)[i] <- paste0("missing_fileName", i)} + } + } else { + other <- Filter(function(x) x$formatType != "RESOURCE", response[2:length(response)]) + response2 <- c(list(response[[1]]), other) + parent <- lapply(response2, translate) + names(parent) <- unlist(lapply(parent, function(x) {x@fileName})) + for (i in seq_along(parent)) { + if (is.na(names(parent)[i])) {names(parent)[i] <- paste0("missing_fileName", i)} + } + + child <- lapply(children2, function(x) {lapply(x, translate)}) + for (i in seq_along(child)) { + names(child[[i]]) <- unlist(lapply(child[[i]], function(x) {x@fileName})) + for (j in seq_along(child[[i]])) { + if (is.na(names(child[[i]])[j])) {names(child[[i]])[j] <- paste0("missing_fileName", j)} + } + } + names(child) <- paste0("child", seq_along(child)) + + all <- c(parent, child) + } return(all) } diff --git a/man/get_all_sysmeta.Rd b/man/get_all_sysmeta.Rd index d6cbfef..0fa5461 100644 --- a/man/get_all_sysmeta.Rd +++ b/man/get_all_sysmeta.Rd @@ -4,20 +4,23 @@ \alias{get_all_sysmeta} \title{Get system metadata for all objects in a data package} \usage{ -get_all_sysmeta(node, pid) +get_all_sysmeta(node, rm_pid, nmax = 1000, child_packages = FALSE) } \arguments{ -\item{node}{(MNode/CNode) The Coordinating/Member Node to run the query on} +\item{node}{(MNode/CNode) The Coordinating/Member Node to run the query on.} -\item{pid}{(character) The resource map PID of the package} +\item{rm_pid}{(character) The resource map PID of the package.} + +\item{nmax}{(numeric) Default 1000. The maximum number of object system metadata to return. The default is sufficient to get system metadata for all objects in most packages.} + +\item{child_packages}{(logical) Default FALSE. If parent package, whether or not to include objects of child packages.} } \value{ -(list) A structured list of system metadata +(list) A structured list of system metadata. } \description{ -This is a wrapper function around `dataone::getSystemMetadata` that retrieves -the system metadata for all objects in a data package and returns them as a list. -It is useful for inspecting system metadata and identifying changes where needed. +This function retrieves the system metadata for all objects in a data package and returns them as a list. +It is useful for inspecting system metadata for an entire data package and identifying changes where needed. } \examples{ \dontrun{ @@ -32,9 +35,10 @@ all <- get_all_sysmeta(knb_test, rm_pid) View(all) # Print specific elements to console -all[["Metadata"]]@rightsHolder +all[[1]]@rightsHolder # Create separate object -sysmeta_md <- all[["Metadata"]] +sysmeta_md <- all[[2]] } } +\keyword{internal} diff --git a/tests/testthat/test_helpers.R b/tests/testthat/test_helpers.R index cc2f5d8..cda3b1d 100644 --- a/tests/testthat/test_helpers.R +++ b/tests/testthat/test_helpers.R @@ -17,3 +17,28 @@ test_that("a dummy package can be created", { expect_true(object_exists(mn, result$data)) expect_true(object_exists(mn, result$resource_map)) }) + + +test_that("all system metadata is retrieved", { + expect_error(get_all_sysmeta(node, rm_pid)) + + cn_staging <- CNode("STAGING") + adc_test <- getMNode(cn_staging, "urn:node:mnTestARCTIC") + + expect_error(get_all_sysmeta(adc_test, test)) + expect_error(get_all_sysmeta(adc_test, "")) + expect_error(get_all_sysmeta(adc_test, "urn:uuid:3e3bb5de-ec63-4f13-a549-813f0cf28610")) + expect_error(get_all_sysmeta(adc_test, rm_pid, nmax = -7)) + expect_error(get_all_sysmeta(adc_test, rm_pid, child_packages = 7)) + + + rm_pid <- "resource_map_urn:uuid:3e3bb5de-ec63-4f13-a549-813f0cf28610" + all <- get_all_sysmeta(adc_test, rm_pid) + + expect_message(get_all_sysmeta(adc_test, rm_pid)) + expect_type(all, "list") + expect_length(all, 5) + expect_equal(names(all)[1], "dummy_resource_map.xml") + + expect_message(get_all_sysmeta(adc_test, "resource_map_urn:uuid:924f81f6-2e68-4eb8-925f-53f5b66318ec")) +}) From 0bfb3dda6cd09e0072d2335ab8b4a1e6a7a7301d Mon Sep 17 00:00:00 2001 From: Robyn Thiessen-Bock Date: Thu, 23 Aug 2018 11:35:05 -0700 Subject: [PATCH 155/318] Created is_public_read function (access.R) and added a unit test; corrected error statement in set_access, set_rights_holder, set_rights_and_access --- R/access.R | 65 ++++++++++++++++++++++++++++++++++-- tests/testthat/test_access.R | 15 +++++++++ 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/R/access.R b/R/access.R index 41d1a2c..7e9ffbb 100644 --- a/R/access.R +++ b/R/access.R @@ -36,7 +36,7 @@ set_rights_holder <- function(mn, pids, subject) { if (!all(is.character(subject), nchar(subject) > 0)){ - stop("Argument 'pids' must be character class with non-zero number of characters.") + stop("Argument 'subject' must be character class with non-zero number of characters.") } if (grepl("^https:\\/\\/orcid\\.org", subject)) { @@ -118,7 +118,7 @@ set_access <- function(mn, pids, subjects, permissions=c("read", "write", "chang if (!all(is.character(subjects), all(nchar(subjects)) > 0)){ - stop("Argument 'pids' must be character class with non-zero number of characters.") + stop("Argument 'subjects' must be character class with non-zero number of characters.") } if (any(grepl("^https:\\/\\/orcid\\.org", subjects))) { @@ -297,7 +297,7 @@ set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "writ if (!all(is.character(subject), nchar(subject) > 0)){ - stop("Argument 'pids' must be character class with non-zero number of characters.") + stop("Argument 'subject' must be character class with non-zero number of characters.") } if (grepl("^https:\\/\\/orcid\\.org", subject)) { @@ -376,3 +376,62 @@ set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "writ results } + + +#' Check whether an object has public read access +#' +#' Check whether objects with given pids have public read access. +#' No token needs to be set to use this function. +#' +#' @param mn (MNode) The Member Node to send the query to. +#' @param pids (character) The PID(s) to check for public read access. +#' @param use.names (logical) Optional. If set to `TRUE` (the deafult), pids will +#' be used as names for the result, unless pids has names already, in which case, +#' those names will be used for the result. +#' +#' @return A vector of class logical. +#' @export +#' +#' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' is_public_read(mn, pids) +#'} +is_public_read <- function(mn, pids, use.names=TRUE){ + + if (!is(mn, "MNode")) { + stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) + } + + if (!all(is.character(pids), + all(nchar(pids) > 0))){ + stop("Argument 'pid' must be character class with non-zero number of characters.") + } + + if(!is.logical(use.names)){ + stop(paste0("Argument 'use.names' must be logical class, but was a ", class(use.names), " instead.")) + } + + vapply(pids, USE.NAMES = use.names, FUN.VALUE = logical(1), FUN=function(pid){ + + url <- paste(mn@endpoint, "meta", URLencode(pid, reserved=T), sep="/") + response <- dataone:::auth_get(url, node=mn) + + if(response$status_code != "200") { + error_desc <- dataone:::getErrorDescription(response) + if(grepl("READ not allowed", error_desc)){ + return(FALSE) + } else { + stop(error_desc) + } + } + + sysmeta <- datapack:::SystemMetadata(XML::xmlRoot(suppressMessages(XML::xmlParse((httr::content(response, as="text")))))) + return(datapack::hasAccessRule(sysmeta, "public", "read")) + + }) + +} diff --git a/tests/testthat/test_access.R b/tests/testthat/test_access.R index c9f4573..321143e 100644 --- a/tests/testthat/test_access.R +++ b/tests/testthat/test_access.R @@ -65,3 +65,18 @@ test_that("get_package works the same when given a metadata pid as it does when expect_equal(a, b) }) + +test_that("is_public_read returns true for public packages and false for private packages", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + pkg <- create_dummy_package(mn) + + public_response <- is_public_read(mn, pkg$resource_map) + remove_public_read(mn, pkg$resource_map) + private_response <- is_public_read(mn, pkg$resource_map) + + expect_true(public_response) + expect_false(private_response) +}) From bfca3959e6a41c6a8d99f4f433d32cd0f6547347 Mon Sep 17 00:00:00 2001 From: Robyn Thiessen-Bock Date: Thu, 23 Aug 2018 11:47:33 -0700 Subject: [PATCH 156/318] Added documentation for is_public_read function. --- DESCRIPTION | 2 +- NAMESPACE | 1 + man/is_public_read.Rd | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 man/is_public_read.Rd diff --git a/DESCRIPTION b/DESCRIPTION index e332b2e..cc41528 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,5 +38,5 @@ Suggests: rmarkdown, yaml, xslt -RoxygenNote: 6.0.1 +RoxygenNote: 6.1.0 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 16dada6..a073427 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -35,6 +35,7 @@ export(get_token) export(guess_format_id) export(is_authorized) export(is_obsolete) +export(is_public_read) export(is_token_expired) export(is_token_set) export(mdq_run) diff --git a/man/is_public_read.Rd b/man/is_public_read.Rd new file mode 100644 index 0000000..69403c0 --- /dev/null +++ b/man/is_public_read.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/access.R +\name{is_public_read} +\alias{is_public_read} +\title{Check whether an object has public read access} +\usage{ +is_public_read(mn, pids, use.names = TRUE) +} +\arguments{ +\item{mn}{(MNode) The Member Node to send the query to.} + +\item{pids}{(character) The PID(s) to check for public read access.} + +\item{use.names}{(logical) Optional. If set to `TRUE` (the deafult), pids will +be used as names for the result, unless pids has names already, in which case, +those names will be used for the result.} +} +\value{ +A vector of class logical. +} +\description{ +Check whether objects with given pids have public read access. +No token needs to be set to use this function. +} +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", + "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +is_public_read(mn, pids) +} +} From ede7fbfe094b34b8009a9ab64be6c8d080ccb45d Mon Sep 17 00:00:00 2001 From: Robyn Thiessen-Bock Date: Thu, 23 Aug 2018 13:00:26 -0700 Subject: [PATCH 157/318] clarified description for is_public_read() as per Dominic's feedback --- R/access.R | 4 ++-- man/is_public_read.Rd | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/access.R b/R/access.R index 7e9ffbb..ce54b61 100644 --- a/R/access.R +++ b/R/access.R @@ -380,7 +380,7 @@ set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "writ #' Check whether an object has public read access #' -#' Check whether objects with given pids have public read access. +#' Check whether DataOne objects have public read access set in their System Metadata. #' No token needs to be set to use this function. #' #' @param mn (MNode) The Member Node to send the query to. @@ -422,7 +422,7 @@ is_public_read <- function(mn, pids, use.names=TRUE){ if(response$status_code != "200") { error_desc <- dataone:::getErrorDescription(response) - if(grepl("READ not allowed", error_desc)){ + if(grepl("READ not allowed", error_desc, ignore.case = TRUE)){ return(FALSE) } else { stop(error_desc) diff --git a/man/is_public_read.Rd b/man/is_public_read.Rd index 69403c0..2d55a5a 100644 --- a/man/is_public_read.Rd +++ b/man/is_public_read.Rd @@ -19,7 +19,7 @@ those names will be used for the result.} A vector of class logical. } \description{ -Check whether objects with given pids have public read access. +Check whether DataOne objects have public read access set in their System Metadata. No token needs to be set to use this function. } \examples{ From d69ffa15fee870d2b0a8de054873cd6686a3c25b Mon Sep 17 00:00:00 2001 From: Robyn Thiessen-Bock Date: Thu, 23 Aug 2018 13:17:59 -0700 Subject: [PATCH 158/318] Added myself as a contributor in the description file --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index cc41528..6b73569 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,8 @@ Authors@R: c( person("Jesse", "Goldstein", email = "jgoldstein@nceas.ucsb.edu", role = "ctb", comment = "Maintainer"), person("Jeanette", "Clark", email = "jclark@nceas.ucsb.edu", role = "ctb", comment = "Maintainer"), person("Dominic", "Mullen", email = "dmullen17@gmail.com", role = "ctb"), - person("Emily", "O'Dean", email="eodean10@gmail.com", role = "ctb") + person("Emily", "O'Dean", email="eodean10@gmail.com", role = "ctb"), + person("Robyn", "Thiessen-Bock", email = "robyn.thiessenbock@gmail.com", role = "ctb") ) Description: A set of utilites for working with the Arctic Data Center (https://arcticdata.io). From 6af5fcb673d4272cfc1f3aa1f1abaed636a60d32 Mon Sep 17 00:00:00 2001 From: Derek Strong Date: Tue, 4 Sep 2018 19:07:40 -0700 Subject: [PATCH 159/318] Update dataone_format_mappings Partial update for #84 --- R/util.R | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/R/util.R b/R/util.R index 2ef7984..a959613 100644 --- a/R/util.R +++ b/R/util.R @@ -37,17 +37,25 @@ extract_local_identifier <- function(type, file) { } -dataone_format_mappings <- list("avi" = "ideo/avi", +dataone_format_mappings <- list("avi" = "video/avi", "bmp" = "image/bmp", "bz2" = "application/x-bzip2", "csv" = "text/csv", + "doc" = "application/msword", + "docx" = "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "fasta" = "application/x-fasta", "gif" = "image/gif", "gz" = "application/x-gzip", "html" = "text/html", + "ipynb" = "application/json", + "jp2" = "image/jp2", "jpg" = "image/jpeg", "jpeg" = "image/jpeg", "kml" = "application/vnd.google-earth.kml/xml", + "kmz" = "application/vnd.google-earth.kmz", + "md" = "text/markdown", + "mov" = "video/quicktime", + "mp3" = "audio/mpeg", "mp4" = "video/mp4", "mpg" = "video/mpeg", "mpeg" = "video/mpeg", @@ -56,13 +64,23 @@ dataone_format_mappings <- list("avi" = "ideo/avi", "pdf" = "application/pdf", "png" = "image/png", "ppt" = "application/vnd.ms-powerpoint", + "pptx" = "application/vnd.openxmlformats-officedocument.presentationml.presentation", "py" = "application/x-python", + "qt" = "video/quicktime", + "r" = "application/R", + "rar" = "application/x-rar-compressed", "rdf" = "application/rdf/xml", + "rmd" = "text/x-rmarkdown", + "sas" = "application/SAS", + "svg" = "image/svg/xml", "tar" = "application/x-tar", "tif" = "image/tiff", "tiff" = "image/tiff", "ttl" = "text/turtle", + "tsv" = "text/tsv", "txt" = "text/plain", + "wav" = "audio/x-wav", + "wma" = "audio/x-ms-wma", "wmv" = "video/x-ms-wmv", "xls" = "application/vnd.ms-excel", "xlsx" = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", @@ -103,14 +121,13 @@ guess_format_id <- function(filenames) { #' @param path (character) Full or relative path to the file in question. #' #' @return (character) The DataONE format ID. - get_netcdf_format_id <- function(path) { stopifnot(is.character(path), nchar(path) > 0, file.exists(path)) if (!requireNamespace("ncdf4")) { - stop(call. = FALSE, + stop(call. = FALSE, "The package 'ncdf4' must be installed to run this function. ", "Please install it and try again.") } From bff4e477ae8b2eb29ae6f611a790c67a9d693bef Mon Sep 17 00:00:00 2001 From: Derek Strong Date: Tue, 11 Sep 2018 16:48:07 -0700 Subject: [PATCH 160/318] Add update_package_object from datamgmt Close #96. Also move update_physical and create_dummy_package2 (renamed create_dummy_package_full) for dependency reasons. --- DESCRIPTION | 1 + NAMESPACE | 3 + R/editing.R | 151 ++++++++++++++++++++++ R/helpers.R | 110 ++++++++++++++++ R/util.R | 2 +- inst/example-eml-full.xml | 182 +++++++++++++++++++++++++++ man/create_dummy_package_full.Rd | 19 +++ man/create_resource_map.Rd | 4 +- man/eml_party.Rd | 7 +- man/generate_resource_map.Rd | 4 +- man/publish_update.Rd | 6 +- man/replace_subject.Rd | 3 +- man/update_object.Rd | 3 +- man/update_package_object.Rd | 58 +++++++++ man/update_physical.Rd | 22 ++++ tests/testthat/test_editing.R | 208 +++++++++++++++++++++++++++++++ tests/testthat/test_helpers.R | 9 ++ 17 files changed, 779 insertions(+), 13 deletions(-) create mode 100644 inst/example-eml-full.xml create mode 100644 man/create_dummy_package_full.Rd create mode 100644 man/update_package_object.Rd create mode 100644 man/update_physical.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 6b73569..697d84c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,6 +29,7 @@ Imports: xml2, XML License: MIT + file LICENSE +Encoding: UTF-8 LazyData: true Suggests: dplyr, diff --git a/NAMESPACE b/NAMESPACE index a073427..3011439 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export(create_dummy_enumeratedDomain_dataframe) export(create_dummy_metadata) export(create_dummy_object) export(create_dummy_package) +export(create_dummy_package_full) export(create_dummy_parent_package) export(create_resource_map) export(eml_abstract) @@ -56,6 +57,7 @@ export(set_rights_holder) export(show_indexing_status) export(sysmeta_to_eml_physical) export(update_object) +export(update_package_object) export(update_resource_map) export(view_profile) export(which_in_eml) @@ -69,6 +71,7 @@ importFrom(methods,is) importFrom(methods,new) importFrom(methods,slot) importFrom(stats,na.omit) +importFrom(stringr,str_detect) importFrom(utils,URLencode) importFrom(utils,head) importFrom(utils,read.csv) diff --git a/R/editing.R b/R/editing.R index 6c4409f..d7e55c5 100644 --- a/R/editing.R +++ b/R/editing.R @@ -813,3 +813,154 @@ set_file_name <- function(mn, pid, name) { sysmeta@fileName <- name dataone::updateSystemMetadata(mn, pid, sysmeta) } + + +#' Update physical of an updated data object +#' +#' This function updates the EML with the new physical +#' of a data object once it has been updated. +#' This is a helper function for \code{\link{update_package_object}}. +#' +#' @param eml (eml) An EML class object. +#' @param mn (MNode) The Member Node of the data package. +#' @param data_pid (character) The identifier of the data object to be updated. +#' @param new_data_pid (character) The new identifier of the updated data object. +#' +#' @importFrom stringr str_detect +update_physical <- function(eml, mn, data_pid, new_data_pid) { + stopifnot(is(eml, "eml")) + stopifnot(is(mn, "MNode")) + stopifnot(is.character(data_pid), nchar(data_pid) > 0) + stopifnot(is.character(new_data_pid), nchar(new_data_pid) > 0) + + all_url <- unlist(EML::eml_get(eml, "url")) + if (sum(stringr::str_detect(all_url, data_pid)) == 0) { + stop("The data PID does not match any physical sections, so the EML will not be updated.") + } + + dataTable_url <- unlist(EML::eml_get(eml@dataset@dataTable, "url")) + + if (any(stringr::str_detect(dataTable_url, data_pid))) { + position <- which(stringr::str_detect(dataTable_url, data_pid)) + new_phys <- pid_to_eml_physical(mn, new_data_pid) + eml@dataset@dataTable[[position]]@physical@.Data <- new_phys + } + + otherEntity_url <- unlist(EML::eml_get(eml@dataset@otherEntity, "url")) + + if (any(stringr::str_detect(otherEntity_url, data_pid))) { + position <- which(stringr::str_detect(otherEntity_url, data_pid)) + new_phys <- pid_to_eml_physical(mn, new_data_pid) + eml@dataset@otherEntity[[position]]@physical@.Data <- new_phys + } + + spatialVector_url <- unlist(EML::eml_get(eml@dataset@spatialVector, "url")) + + if (any(stringr::str_detect(spatialVector_url, data_pid))) { + position <- which(stringr::str_detect(spatialVector_url, data_pid)) + new_phys <- pid_to_eml_physical(mn, new_data_pid) + eml@dataset@spatialVector[[position]]@physical@.Data <- new_phys + } + + invisible(eml) +} + + +#' Update a data object and associated resource map and metadata +#' +#' This function updates a data object and then automatically +#' updates the package resource map with the new data PID. If an object +#' already has a \code{dataTable}, \code{otherEntity}, or \code{spatialVector} +#' with a working physical section, the EML will be updated with the new physical. +#' It is a convenience wrapper around \code{\link{update_object}} +#' and \code{\link{publish_update}}. +#' +#' @param mn (MNode) The Member Node of the data package. +#' @param data_pid (character) PID for data object to update. +#' @param new_data_path (character) Path to new data object. +#' @param resource_map_pid (character) PID for resource map to update. +#' @param format_id (character) Optional. The format ID to set for the object. +#' When not set, \code{\link{guess_format_id}} will be used +#' to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}. +#' @param public (logical) Optional. Make the update public. If FALSE, +#' will set the metadata and resource map to private (but not the data objects). +#' This applies to the new metadata PID and its resource map and data object. +#' Access policies are not affected. +#' @param use_doi (logical) Optional. If TRUE, a new DOI will be minted. +#' @param ... Other arguments to pass into \code{\link{publish_update}}. +#' +#' @return PIDs (character) Named character vector of PIDs in the data package, including PIDs +#' for the metadata, resource map, and data objects. +#' +#' @keywords update_object publish_update +#' +#' @import dataone +#' @import EML +#' +#' @examples +#' \dontrun{ +#' cnTest <- dataone::CNode("STAGING") +#' mnTest <- dataone::getMNode(cnTest,"urn:node:mnTestARCTIC") +#' +#' pkg <- create_dummy_package_full(mnTest, title = "My package") +#' +#' file.create("new_file.csv") +#' update_package_object(mnTest, pkg$data[1], "new_file.csv", pkg$resource_map, format_id = "text/csv") +#' file.remove("new_file.csv") +#' } +#' +#' @export +update_package_object <- function(mn, + data_pid, + new_data_path, + resource_map_pid, + format_id = NULL, + public = TRUE, + use_doi = FALSE, + ...) { + stopifnot(is(mn, "MNode")) + stopifnot(is.character(data_pid), nchar(data_pid) > 0) + stopifnot(is.character(new_data_path), nchar(new_data_path) > 0, file.exists(new_data_path)) + stopifnot(is.character(resource_map_pid), nchar(resource_map_pid) > 0) + stopifnot(is.logical(public)) + + pkg <- get_package(mn, resource_map_pid) + eml <- EML::read_eml(rawToChar(dataone::getObject(mn, pkg$metadata))) + + new_data_pid <- update_object(mn, + pid = data_pid, + path = new_data_path, + format_id = format_id) + + other_data_pids <- pkg$data[which(pkg$data != data_pid)] # wrapped in which for better NA handling + new_data_pids <- c(other_data_pids, new_data_pid) + + eml_new <- tryCatch(update_physical(eml = eml, + mn = mn, + data_pid = data_pid, + new_data_pid = new_data_pid), + error = function(e) { + print("The data PID does not match any physical sections, so the EML will not be updated.") + return(eml) + }) + + eml_path <- "science-metadata.xml" + file.create(eml_path) + EML::write_eml(eml_new, eml_path) + + pkg_new <- publish_update(mn, + metadata_pid = pkg$metadata, + resource_map_pid = pkg$resource_map, + metadata_path = eml_path, + data_pids = new_data_pids, + child_pids = pkg$child_packages, + public = public, + use_doi = use_doi, + ...) + + file.remove(eml_path) + + cat("\nThe new data pid is:", new_data_pid) + + return(pkg_new) +} diff --git a/R/helpers.R b/R/helpers.R index 3561958..40bf788 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -298,3 +298,113 @@ create_dummy_enumeratedDomain_dataframe <- function(factors) { enumeratedDomains } + + +#' Create dummy package with fuller metadata +#' +#' Creates a fuller package than \code{\link{create_dummy_package}} +#' but is otherwise based on the same concept. This dummy +#' package includes multiple data objects, responsible parties, +#' geographic locations, method steps, etc. +#' +#' @param mn (MNode) The Member Node. +#' @param title (character) Optional. Title of package. Defaults to "A Dummy Package". +#' +#' @import EML +#' @import dataone +#' +#' @export +create_dummy_package_full <- function(mn, title = "A Dummy Package") { + stopifnot(is(mn, "MNode")) + stopifnot(is.character(title), nchar(title) > 0) + if (mn@env == "prod") { + stop("Cannot create dummy package on production node.") + } + + # Create objects + file.create(c("dummy1.csv", "dummy2.csv", "dummy1.jpg", "dummy1.R")) + # TODO: add actual data to dummy files + + pid_csv1 <- publish_object(mn, + path = "dummy1.csv", + format_id = "text/csv") + + pid_csv2 <- publish_object(mn, + path = "dummy2.csv", + format_id = "text/csv") + + pid_jpg1 <- publish_object(mn, + path = "dummy1.jpg", + format_id = "image/jpeg") + + pid_R1 <- publish_object(mn, + path = "dummy1.R", + format_id = "application/R") + + data_pids <- c(pid_csv1, pid_csv2, pid_jpg1, pid_R1) + + # Import EML + eml_path_original <- file.path(system.file(package = "arcticdatautils"), "example-eml-full.xml") + eml <- EML::read_eml(eml_path_original) + + # Add objects to EML + eml@dataset@title[[1]]@.Data <- title + + attr <- data.frame( + attributeName = c("Date", "Location", "Salinity", "Temperature"), + attributeDefinition = c("Date sample was taken on", "Location code representing location where sample was taken", "Salinity of sample in PSU", "Temperature of sample"), + measurementScale = c("dateTime", "nominal","ratio", "interval"), + domain = c("dateTimeDomain", "enumeratedDomain","numericDomain", "numericDomain"), + formatString = c("MM-DD-YYYY", NA, NA, NA), + definition = c(NA,NA, NA, NA), + unit = c(NA, NA, "dimensionless", "celsius"), + numberType = c(NA, NA, "real", "real"), + missingValueCode = c(NA, NA, NA, NA), + missingValueCodeExplanation = c(NA, NA, NA, NA), + stringsAsFactors = FALSE) + + location <- c(CASC = "Cascade Lake", CHIK = "Chikumunik Lake", HEAR = "Heart Lake", NISH = "Nishlik Lake") + fact <- data.frame(attributeName = "Location", code = names(location), definition = unname(location)) + + attributeList <- EML::set_attributes(attributes = attr, factors = fact) + + dT1 <- pid_to_eml_entity(mn, + pid = pid_csv1, + entityType = "dataTable") + dT1@attributeList <- attributeList + + dT2 <- pid_to_eml_entity(mn, + pid = pid_csv2, + entityType = "dataTable") + dT2@attributeList <- attributeList + + eml@dataset@dataTable <- c(dT1, dT2) + + oE1 <- pid_to_eml_entity(mn, + pid = pid_jpg1, + entityType = "otherEntity") + + oE2 <- pid_to_eml_entity(mn, + pid = pid_R1, + entityType = "otherEntity") + + eml@dataset@otherEntity <- c(oE1, oE2) + + eml_path <- tempfile(fileext = ".xml") + EML::write_eml(eml, eml_path) + + pid_eml <- publish_object(mn, + path = eml_path, + format_id = "eml://ecoinformatics.org/eml-2.1.1") + + # Create resource map + resource_map_pid <- create_resource_map(mn, + metadata_pid = pid_eml, + data_pids = data_pids) + + file.remove(c("dummy1.csv", "dummy2.csv", "dummy1.jpg", "dummy1.R"), eml_path) + + return(list(resource_map = resource_map_pid, + metadata = pid_eml, + data = data_pids)) +} diff --git a/R/util.R b/R/util.R index a959613..34b0c48 100644 --- a/R/util.R +++ b/R/util.R @@ -810,7 +810,7 @@ get_package <- function(node, pid, file_names=FALSE, rows=1000) { resource_map_pids <- pid } else { warning(call. = FALSE, - paste0("The PID '", pid, "' is not for a Resource Map Object so the most likely candidate was found. This is usally fine! Specify a Resource Map PID instead to stop getting this warning.")) + paste0("The PID '", pid, "' is not for a Resource Map Object so the most likely candidate was found. This is usually fine! Specify a Resource Map PID instead to stop getting this warning.")) resource_map_pids <- find_newest_resource_map(node, pid) } diff --git a/inst/example-eml-full.xml b/inst/example-eml-full.xml new file mode 100644 index 0000000..b1e1859 --- /dev/null +++ b/inst/example-eml-full.xml @@ -0,0 +1,182 @@ + + + + A Dummy Package + + + Henrietta + High-Stakes + + Deadwood Saloon + Owner +
+ Wild West + Deadwood + CA +
+ (123) 456 - 7890 + dummy@dummy.com + http://orcid.org/XXXX-XXXX-XXXX-XXXX +
+ + + Henrietta + High-Stakes + + Deadwood Saloon + Owner +
+ Wild West + Deadwood + CA +
+ (123) 456 - 7890 + dummy@dummy.com + http://orcid.org/XXXX-XXXX-XXXX-XXXX +
+ + + Harry + High-Stakes + + Deadwood Saloon + Co-Owner + + + + Henrietta + High-Stakes + + Deadwood Saloon + Owner +
+ Wild West + Deadwood + CA +
+ (123) 456 - 7890 + dummy@dummy.com + http://orcid.org/XXXX-XXXX-XXXX-XXXX + principalInvestigator +
+ 2018 + + This is an abstract. + + + This work is dedicated to the public domain under the Creative Commons Universal 1.0 Public Domain Dedication. To view a copy of this dedication, visit https://creativecommons.org/publicdomain/zero/1.0/. + + + + Somewhere in the world + + 70 + 70 + 65 + 65 + + + + Another place in the world + + 80 + 80 + 75 + 75 + + + + + 2018 + + + + Everything was identified to species + + Species + Homo sapiens + + + Species + Canis lupus + + + + + + Henrietta + High-Stakes + + Deadwood Saloon + Owner +
+ Wild West + Deadwood + CA +
+ (123) 456 - 7890 + dummy@dummy.com + http://orcid.org/XXXX-XXXX-XXXX-XXXX +
+ + + Banker + Bob + + Deadwood Bank + Owner + + + + Jesse + Wales + + Deadwood Streets + Outlaw + + + + Elizabeth + Money + + Deadwood Land Investing + Land Investor + + + + + Some methods + + + + + Some more methods + + + + + + Lots of sampling + + + + More sampling + + + + + A Dummy Package + + + Harry + High-Stakes + + principalInvestigator + + + 1234 + 4567 + + +
+
diff --git a/man/create_dummy_package_full.Rd b/man/create_dummy_package_full.Rd new file mode 100644 index 0000000..9e87666 --- /dev/null +++ b/man/create_dummy_package_full.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{create_dummy_package_full} +\alias{create_dummy_package_full} +\title{Create dummy package with fuller metadata} +\usage{ +create_dummy_package_full(mn, title = "A Dummy Package") +} +\arguments{ +\item{mn}{(MNode) The Member Node.} + +\item{title}{(character) Optional. Title of package. Defaults to "A Dummy Package".} +} +\description{ +Creates a fuller package than \code{\link{create_dummy_package}} +but is otherwise based on the same concept. This dummy +package includes multiple data objects, responsible parties, +geographic locations, method steps, etc. +} diff --git a/man/create_resource_map.Rd b/man/create_resource_map.Rd index a999d81..8ac7367 100644 --- a/man/create_resource_map.Rd +++ b/man/create_resource_map.Rd @@ -4,8 +4,8 @@ \alias{create_resource_map} \title{Create a resource map Object on a Member Node.} \usage{ -create_resource_map(mn, metadata_pid, data_pids = NULL, child_pids = NULL, - check_first = TRUE, ...) +create_resource_map(mn, metadata_pid, data_pids = NULL, + child_pids = NULL, check_first = TRUE, ...) } \arguments{ \item{mn}{(MNode) The Member Node} diff --git a/man/eml_party.Rd b/man/eml_party.Rd index 6abee51..f76b522 100644 --- a/man/eml_party.Rd +++ b/man/eml_party.Rd @@ -4,9 +4,10 @@ \alias{eml_party} \title{Low-level helper for creating EML parties} \usage{ -eml_party(type = "associatedParty", given_names = NULL, sur_name = NULL, - organization = NULL, position = NULL, email = NULL, phone = NULL, - address = NULL, userId = NULL, role = NULL) +eml_party(type = "associatedParty", given_names = NULL, + sur_name = NULL, organization = NULL, position = NULL, + email = NULL, phone = NULL, address = NULL, userId = NULL, + role = NULL) } \arguments{ \item{type}{(character) The type of party (e.g. 'contact')} diff --git a/man/generate_resource_map.Rd b/man/generate_resource_map.Rd index 2b56e2b..1e5a3de 100644 --- a/man/generate_resource_map.Rd +++ b/man/generate_resource_map.Rd @@ -6,8 +6,8 @@ This is a convenience wrapper around the constructor of the `ResourceMap` class from `DataPackage`.} \usage{ -generate_resource_map(metadata_pid, data_pids = NULL, child_pids = NULL, - other_statements = NULL, +generate_resource_map(metadata_pid, data_pids = NULL, + child_pids = NULL, other_statements = NULL, resolve_base = "https://cn.dataone.org/cn/v2/resolve", resource_map_pid = NULL) } diff --git a/man/publish_update.Rd b/man/publish_update.Rd index 76f4508..4597dc5 100644 --- a/man/publish_update.Rd +++ b/man/publish_update.Rd @@ -6,9 +6,9 @@ \usage{ publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL, child_pids = NULL, metadata_path = NULL, identifier = NULL, - use_doi = FALSE, parent_resmap_pid = NULL, parent_metadata_pid = NULL, - parent_data_pids = NULL, parent_child_pids = NULL, public = TRUE, - check_first = TRUE) + use_doi = FALSE, parent_resmap_pid = NULL, + parent_metadata_pid = NULL, parent_data_pids = NULL, + parent_child_pids = NULL, public = TRUE, check_first = TRUE) } \arguments{ \item{mn}{(MNode) The Member Node to update the object on.} diff --git a/man/replace_subject.Rd b/man/replace_subject.Rd index 963b5c2..87c7618 100644 --- a/man/replace_subject.Rd +++ b/man/replace_subject.Rd @@ -4,7 +4,8 @@ \alias{replace_subject} \title{Replace subjects in the accessPolicy section of a System Metadata entries.} \usage{ -replace_subject(sysmeta, from = "cn=arctic-data-admins,dc=dataone,dc=org", +replace_subject(sysmeta, + from = "cn=arctic-data-admins,dc=dataone,dc=org", to = "CN=arctic-data-admins,DC=dataone,DC=org") } \arguments{ diff --git a/man/update_object.Rd b/man/update_object.Rd index bc4d88b..7c525f4 100644 --- a/man/update_object.Rd +++ b/man/update_object.Rd @@ -4,7 +4,8 @@ \alias{update_object} \title{Update an object with a new file.} \usage{ -update_object(mn, pid, path, format_id = NULL, new_pid = NULL, sid = NULL) +update_object(mn, pid, path, format_id = NULL, new_pid = NULL, + sid = NULL) } \arguments{ \item{mn}{(MNode) The Member Node to update the object on.} diff --git a/man/update_package_object.Rd b/man/update_package_object.Rd new file mode 100644 index 0000000..16d7883 --- /dev/null +++ b/man/update_package_object.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/editing.R +\name{update_package_object} +\alias{update_package_object} +\title{Update a data object and associated resource map and metadata} +\usage{ +update_package_object(mn, data_pid, new_data_path, resource_map_pid, + format_id = NULL, public = TRUE, use_doi = FALSE, ...) +} +\arguments{ +\item{mn}{(MNode) The Member Node of the data package.} + +\item{data_pid}{(character) PID for data object to update.} + +\item{new_data_path}{(character) Path to new data object.} + +\item{resource_map_pid}{(character) PID for resource map to update.} + +\item{format_id}{(character) Optional. The format ID to set for the object. +When not set, \code{\link{guess_format_id}} will be used +to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}.} + +\item{public}{(logical) Optional. Make the update public. If FALSE, +will set the metadata and resource map to private (but not the data objects). +This applies to the new metadata PID and its resource map and data object. +Access policies are not affected.} + +\item{use_doi}{(logical) Optional. If TRUE, a new DOI will be minted.} + +\item{...}{Other arguments to pass into \code{\link{publish_update}}.} +} +\value{ +PIDs (character) Named character vector of PIDs in the data package, including PIDs +for the metadata, resource map, and data objects. +} +\description{ +This function updates a data object and then automatically +updates the package resource map with the new data PID. If an object +already has a \code{dataTable}, \code{otherEntity}, or \code{spatialVector} +with a working physical section, the EML will be updated with the new physical. +It is a convenience wrapper around \code{\link{update_object}} +and \code{\link{publish_update}}. +} +\examples{ +\dontrun{ +cnTest <- dataone::CNode("STAGING") +mnTest <- dataone::getMNode(cnTest,"urn:node:mnTestARCTIC") + +pkg <- create_dummy_package_full(mnTest, title = "My package") + +file.create("new_file.csv") +update_package_object(mnTest, pkg$data[1], "new_file.csv", pkg$resource_map, format_id = "text/csv") +file.remove("new_file.csv") +} + +} +\keyword{publish_update} +\keyword{update_object} diff --git a/man/update_physical.Rd b/man/update_physical.Rd new file mode 100644 index 0000000..26ebe8c --- /dev/null +++ b/man/update_physical.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/editing.R +\name{update_physical} +\alias{update_physical} +\title{Update physical of an updated data object} +\usage{ +update_physical(eml, mn, data_pid, new_data_pid) +} +\arguments{ +\item{eml}{(eml) An EML class object.} + +\item{mn}{(MNode) The Member Node of the data package.} + +\item{data_pid}{(character) The identifier of the data object to be updated.} + +\item{new_data_pid}{(character) The new identifier of the updated data object.} +} +\description{ +This function updates the EML with the new physical +of a data object once it has been updated. +This is a helper function for \code{\link{update_package_object}}. +} diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index da2b48c..7811796 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -300,3 +300,211 @@ test_that("publish_update errors if the non-current resource map or metadata pid expect_error(publish_update(mn, pkg1$metadata, pkg2$resource_map, pkg2$data)) expect_error(publish_update(mn, pkg2$metadata, pkg1$resource_map, pkg2$data)) }) + + +test_that("update_physical works", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + pkg <- create_dummy_package_full(mn, title = "Update physical check") + + file.create("dummy_object.csv") + + new_data_pid <- update_object(mn, + pid = pkg$data[2], + path = "dummy_object.csv", + format_id = "text/csv") + + file.remove("dummy_object.csv") + + pkg_new <- publish_update(mn, + resource_map_pid = pkg$resource_map, + metadata_pid = pkg$metadata, + data_pids = c(pkg$data[-2], new_data_pid)) + + eml_original <- EML::read_eml(rawToChar(dataone::getObject(mn, pkg$metadata))) + + eml_new <- update_physical(eml_original, + mn, + data_pid = pkg$data[2], + new_data_pid = new_data_pid) + + url_original <- unlist(EML::eml_get(eml_original, "url")) + url_new <- unlist(EML::eml_get(eml_new, "url")) + + expect_equal(sum(stringr::str_detect(url_original, pkg$data[1])), 1) + expect_equal(sum(stringr::str_detect(url_original, pkg$data[2])), 1) + expect_equal(sum(stringr::str_detect(url_original, pkg$data[3])), 1) + expect_equal(sum(stringr::str_detect(url_original, pkg$data[4])), 1) + + expect_equal(sum(stringr::str_detect(url_new, new_data_pid)), 1) + expect_equal(sum(stringr::str_detect(url_new, pkg$data[1])), 1) + expect_equal(sum(stringr::str_detect(url_new, pkg$data[2])), 0) + expect_equal(sum(stringr::str_detect(url_new, pkg$data[3])), 1) + expect_equal(sum(stringr::str_detect(url_new, pkg$data[4])), 1) +}) + + +test_that("update_package_object changes specified data object and rest of package is intact", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + pkg <- create_dummy_package_full(mn, title = "Check update_package_object") + + new_data_path <- "test_file.csv" + file.create(new_data_path) + + data_pid <- pkg$data[2] + + pkg_new <- update_package_object(mn, + data_pid = data_pid, + new_data_path = new_data_path, + resource_map_pid = pkg$resource_map, + format_id = "text/csv") + + file.remove(new_data_path) + + # test: other objects are retained + expect_equal(all(pkg$data[-2] %in% pkg_new$data), TRUE) + + # test: metadata changes + expect_false(pkg$metadata == pkg_new$metadata) + + # test: new data PID is a version of old data PID + versions <- get_all_versions(mn, data_pid) + latest_version <- versions[length(versions)] + + new_data_pid <- pkg_new$data[!pkg_new$data %in% pkg$data] + + expect_equal(latest_version, new_data_pid) + + # test: EML is updated + eml_original <- EML::read_eml(rawToChar(dataone::getObject(mn, pkg$metadata))) + + eml_new <- EML::read_eml(rawToChar(dataone::getObject(mn, pkg_new$metadata))) + + url_original <- unlist(EML::eml_get(eml_original, "url")) + url_new <- unlist(EML::eml_get(eml_new, "url")) + + expect_true(url_original[2] != url_new[2]) + expect_equal(url_original[1], url_new[1]) + expect_equal(url_original[3], url_new[3]) + expect_equal(url_original[4], url_new[4]) + + expect_true(stringr::str_detect(url_new[2], new_data_pid)) +}) + + +test_that("update_package_object errors if wrong input", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + file_path <- tempfile(fileext = ".csv") + + expect_error(update_package_object(LETTERS, + data_pid = file_path, + new_data_path = "something", + rm_pid = "something")) + + expect_error(update_package_object(mn, + data_pid = c(1, 2), + new_data_path = "something", + rm_pid = "something")) + + expect_error(update_package_object(mn, + data_pid = "something", + new_data_path = "something", + rm_pid = "something")) + + expect_error(update_package_object(mn, + data_pid = "something", + new_data_path = TRUE, + rm_pid = "something")) + + expect_error(update_package_object(mn, + data_pid = file_path, + new_data_path = "something", + rm_pid = 1)) + file.remove(file_path) +}) + + +test_that("update_package_object updates EML", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + pkg <- create_dummy_package(mn, size = 4) + + attributes1 <- data.frame( + attributeName = c("col1", "col2"), + attributeDefinition = c("Numbers", "Letters in the alphabet"), + measurementScale = c("ratio", "nominal"), + domain = c("numericDomain", "textDomain"), + formatString = c(NA, NA), + definition = c(NA, "ABCDEFG..."), + unit = c("dimensionless", NA), + numberType = c("integer", NA), + missingValueCode = c(NA, NA), + missingValueCodeExplanation = c(NA, NA), + stringsAsFactors = FALSE) + + attributeList1 <- EML::set_attributes(attributes1) + phys <- pid_to_eml_physical(mn, pkg$data[1]) + + dummy_data_table <- new("dataTable", + entityName = "Dummy Data Table", + entityDescription = "Dummy Description", + physical = phys, + attributeList = attributeList1) + + eml <- EML::read_eml(rawToChar(getObject(mn, pkg$metadata))) + eml@dataset@dataTable <- c(dummy_data_table) + + otherEnts <- pid_to_eml_entity(mn, pkg$data[2:3], entityType = "otherEntity") + eml@dataset@otherEntity <- new("ListOfotherEntity", otherEnts) + + eml_path <- tempfile(fileext = ".xml") + EML::write_eml(eml, eml_path) + + pkg <- publish_update(mn, + metadata_pid = pkg$metadata, + resource_map_pid = pkg$resource_map, + data_pids = pkg$data, + metadata_path = eml_path, + public = TRUE, + use_doi = FALSE) + + dummy_data <- data.frame(col1 = 1:26, col2 = letters) + new_data_path <- tempfile(fileext = ".csv") + write.csv(dummy_data, new_data_path, row.names = FALSE) + + data_pid <- pkg$data[1] + + pkg_new <- update_package_object(mn, + data_pid, + new_data_path, + pkg$resource_map, + format_id = "text/csv", + public = TRUE, + use_doi = FALSE) + + url_initial <- unlist(EML::eml_get(eml, "url")) + expect_equal(sum(stringr::str_count(url_initial, data_pid)), 1) + + eml_new <- EML::read_eml(rawToChar(getObject(mn, pkg_new$metadata))) + url_final <- unlist(EML::eml_get(eml_new, "url")) + expect_equal(sum(stringr::str_count(url_final, data_pid)), 0) + + pid_matches <- lapply(seq_along(pkg_new$data), + function(i) {stringr::str_count(url_final, pkg_new$data[i])}) + + # confirm that URLs have a matching PID + # if new PID corresponds to a dataset that had a dataTable/otherEntity + # and has not been updated, expect_equal will error + expect_equal(sum(unlist(pid_matches)), + length(url_final)) +}) diff --git a/tests/testthat/test_helpers.R b/tests/testthat/test_helpers.R index cc2f5d8..3fa732f 100644 --- a/tests/testthat/test_helpers.R +++ b/tests/testthat/test_helpers.R @@ -17,3 +17,12 @@ test_that("a dummy package can be created", { expect_true(object_exists(mn, result$data)) expect_true(object_exists(mn, result$resource_map)) }) + +test_that("create_dummy_package_full errors if wrong input", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + expect_error(create_dummy_package_full(mn, title = 11)) + expect_error(create_dummy_package_full("mn")) +}) From 6f9c79823aa32e1e87a337d5d1f0c08b694e9dd9 Mon Sep 17 00:00:00 2001 From: Derek Strong Date: Wed, 12 Sep 2018 16:52:04 -0700 Subject: [PATCH 161/318] Check if getSystemMetadata returns class SystemMetadata Closes #38. --- DESCRIPTION | 1 + NAMESPACE | 1 + R/access.R | 188 +++++++++++++++++++++-------------- man/is_public_read.Rd | 12 +-- man/remove_public_read.Rd | 8 +- man/set_access.Rd | 16 +-- man/set_public_read.Rd | 11 +- man/set_rights_and_access.Rd | 18 ++-- man/set_rights_holder.Rd | 21 ++-- tests/testthat/test_access.R | 12 +++ 10 files changed, 173 insertions(+), 115 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6b73569..8cbee4d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,5 +39,6 @@ Suggests: rmarkdown, yaml, xslt +Encoding: UTF-8 RoxygenNote: 6.1.0 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index a073427..923464a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -63,6 +63,7 @@ import(EML) import(XML) import(dataone) import(datapack) +importFrom(httr,content) importFrom(methods,"slot<-") importFrom(methods,as) importFrom(methods,is) diff --git a/R/access.R b/R/access.R index ce54b61..92c6a14 100644 --- a/R/access.R +++ b/R/access.R @@ -1,21 +1,24 @@ -#' access.R -#' -#' High-level utility functions for getting and setting access rules for DataONE -#' objects. +# access.R +# +# High-level utility functions for getting and setting access rules for DataONE objects. -#' Set the rightsHolder field for a given PID. +#' Set the rights holder for an object #' -#' Update the rights holder to the provided subject for the object identified in -#' the provided system metadata document on the given Member Node. +#' Set the rights holder to the given subject for the given objects on the +#' given Member Node. This function checks if the rights holder is already set +#' and only updates the System Metadata when a change is needed. #' -#' @param mn (MNode) The MNode instance to be changed. -#' @param pids (character) The identifiers for the object to be changed. -#' @param subject (character) The identifier of the new rightsHolder, often an ORCID or DN. +#' @param mn (MNode) The Member Node. +#' @param pids (character) The PIDs of the objects to set the rights holder for. +#' @param subject (character) The identifier of the new rights holder, typially an ORCID or DN. #' #' @import dataone #' @import datapack +#' +#' @return (logical) Whether an update was needed. #' @export +#' #' @examples #'\dontrun{ #' cn <- CNode("STAGING2") @@ -30,17 +33,17 @@ set_rights_holder <- function(mn, pids, subject) { } if (!all(is.character(pids), - all(nchar(pids) > 0))){ + all(nchar(pids) > 0))) { stop("Argument 'pids' must be character class with non-zero number of characters.") } if (!all(is.character(subject), - nchar(subject) > 0)){ + nchar(subject) > 0)) { stop("Argument 'subject' must be character class with non-zero number of characters.") } if (grepl("^https:\\/\\/orcid\\.org", subject)) { - stop("Argument 'subjects' cannot contain 'https:', use 'http:' instead.") + stop("Argument 'subject' cannot contain 'https:', use 'http:' instead.") } @@ -50,7 +53,20 @@ set_rights_holder <- function(mn, pids, subject) { pid <- pids[i] # Get System Metadata - sysmeta <- dataone::getSystemMetadata(mn, pid) + sysmeta <- tryCatch({ + dataone::getSystemMetadata(mn, pid) + }, warning = function(w) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + w + }, error = function(e) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + message(e) + e + }) + + if (!inherits(sysmeta, "SystemMetadata")) { + stop("Failed to get System Metadata.") + } # Change rightsHolder (if needed) if (sysmeta@rightsHolder == subject) { @@ -84,17 +100,19 @@ set_rights_holder <- function(mn, pids, subject) { } -#' Set the access policy for a set of objects. +#' Set the access policy for an object #' -#' For each permission, this function checks if the permission is already set -#' and moves on. System Metadata are only updated when a change was needed. +#' Set the access policy for the given subjects for the given objects on the given Member Node. +#' For each type of permission, this function checks if the permission is already set +#' and only updates the System Metadata when a change is needed. #' #' @param mn (MNode) The Member Node. -#' @param pids (character) The object(s) to set the permissions on. -#' @param subjects (character) The subject(s) to set permissions for. -#' @param permissions (character) Optional. Vector of permissions. +#' @param pids (character) The PIDs of the objects to set permissions for. +#' @param subjects (character) The identifiers of the subjects to set permissions for, typially an ORCID or DN. +#' @param permissions (character) Optional. The permissions to set. Defaults to +#' read, write, and changePermission. #' -#' @return (logical) Named +#' @return (logical) Whether an update was needed. #' @export #' #' @examples @@ -106,18 +124,18 @@ set_rights_holder <- function(mn, pids, subject) { #' set_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX", #' permissions = c("read", "write", "changePermission")) #'} -set_access <- function(mn, pids, subjects, permissions=c("read", "write", "changePermission")) { +set_access <- function(mn, pids, subjects, permissions = c("read", "write", "changePermission")) { if (!is(mn, "MNode")) { stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) } if (!all(is.character(pids), - all(nchar(pids) > 0))){ + all(nchar(pids) > 0))) { stop("Argument 'pids' must be character class with non-zero number of characters.") } if (!all(is.character(subjects), - all(nchar(subjects)) > 0)){ + all(nchar(subjects)) > 0)) { stop("Argument 'subjects' must be character class with non-zero number of characters.") } @@ -135,7 +153,20 @@ set_access <- function(mn, pids, subjects, permissions=c("read", "write", "chang for (pid in pids) { changed <- FALSE - sysmeta <- dataone::getSystemMetadata(mn, pid) + sysmeta <- tryCatch({ + dataone::getSystemMetadata(mn, pid) + }, warning = function(w) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + w + }, error = function(e) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + message(e) + e + }) + + if (!inherits(sysmeta, "SystemMetadata")) { + stop("Failed to get System Metadata.") + } for (subject in subjects) { for (permission in permissions) { @@ -163,11 +194,14 @@ set_access <- function(mn, pids, subjects, permissions=c("read", "write", "chang } -#' Set public access on a set of objects. +#' Set public read access for an object #' -#' @param mn (MNode) -#' @param pids (character) A vector of PIDs to set public access on +#' Set public read access for an object. #' +#' @param mn (MNode) The Member Node. +#' @param pids (character) The PIDs of the objects to set public read access for. +#' +#' @return (logical) Whether an update was needed. #' @export #' #' @examples @@ -182,10 +216,13 @@ set_public_read <- function(mn, pids) { set_access(mn, pids, "public", "read") } -#' Remove public access on a set of objects. + +#' Remove public read access for an object #' -#' @param mn (MNode) -#' @param pids (character) A vector of PIDs to set public access on +#' Remove public read access for an object. +#' +#' @param mn (MNode) The Member Node. +#' @param pids (character) The PIDs of the objects to remove public read access for. #' #' @export #' @@ -203,7 +240,7 @@ remove_public_read <- function(mn, pids) { } if (!all(is.character(pids), - all(nchar(pids) > 0))){ + all(nchar(pids) > 0))) { stop("Argument 'pids' must be character class with non-zero number of characters.") } @@ -211,18 +248,20 @@ remove_public_read <- function(mn, pids) { # Store the results of each attempted update results <- c() - # Remove public access for each PID + # Remove public read access for each PID for (pid in pids) { sysmeta <- tryCatch({ dataone::getSystemMetadata(mn, pid) - }, - error = function(e) { - message(paste0("Failed to get system metadata for PID '", pid, "' on MN '", mn@endpoint, "'.\n")) + }, warning = function(w) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + w + }, error = function(e) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) message(e) e }) - if (inherits(sysmeta, "error")) { + if (!inherits(sysmeta, "SystemMetadata")) { stop("Failed to get System Metadata.") } @@ -237,7 +276,7 @@ remove_public_read <- function(mn, pids) { changed <- TRUE message(paste0("Removing public read access on ", pid, ".")) - sysmeta@accessPolicy <- sysmeta@accessPolicy[!(grepl("public", sysmeta@accessPolicy$subject) & grepl("read", sysmeta@accessPolicy$permission)),] + sysmeta@accessPolicy <- sysmeta@accessPolicy[!(grepl("public", sysmeta@accessPolicy$subject) & grepl("read", sysmeta@accessPolicy$permission)), ] # Update the sysmeta update_response <- tryCatch({ @@ -261,19 +300,19 @@ remove_public_read <- function(mn, pids) { } -#' Set the given subject as the rightsHolder and subject with write and -#' changePermission access for the given PID. +#' Set rights holder with access policy for an object #' -#' This function only updates the existing System Metadata if a change is -#' needed. +#' Set the given subject as the rights holder and with given permissions +#' for the given objects. This function only updates the existing +#' System Metadata when a change is needed. #' -#' @param mn (MNode) The Member Node to send the query. -#' @param pids (character) The PID(s) to set the access rule for. -#' @param subject (character)The subject of the rule(s). -#' @param permissions (character) The permissions for the rule. Defaults to +#' @param mn (MNode) The Member Node. +#' @param pids (character) The PIDs of the objects to set the rights holder and access policy for. +#' @param subject (character) The identifier of the new rights holder, typially an ORCID or DN. +#' @param permissions (character) Optional. The permissions to set. Defaults to #' read, write, and changePermission. #' -#' @return Whether an update was needed. +#' @return (logical) Whether an update was needed. #' @export #' #' @examples @@ -285,18 +324,18 @@ remove_public_read <- function(mn, pids) { #' set_rights_and_access(mn, pids, "http://orcid.org/0000-000X-XXXX-XXXX", #' permissions = c("read", "write", "changePermission")) #'} -set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "write", "changePermission")) { +set_rights_and_access <- function(mn, pids, subject, permissions = c("read", "write", "changePermission")) { if (!is(mn, "MNode")) { stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) } if (!all(is.character(pids), - all(nchar(pids) > 0))){ + all(nchar(pids) > 0))) { stop("Argument 'pids' must be character class with non-zero number of characters.") } if (!all(is.character(subject), - nchar(subject) > 0)){ + nchar(subject) > 0)) { stop("Argument 'subject' must be character class with non-zero number of characters.") } @@ -315,18 +354,20 @@ set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "writ for (pid in pids) { sysmeta <- tryCatch({ dataone::getSystemMetadata(mn, pid) - }, - error = function(e) { - message(paste0("Failed to get system metadata for PID '", pid, "' on MN '", mn@endpoint, "'.\n")) + }, warning = function(w) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + w + }, error = function(e) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) message(e) e }) - if (inherits(sysmeta, "error")) { + if (!inherits(sysmeta, "SystemMetadata")) { stop("Failed to get System Metadata.") } - # Track whether we have changed the record to avoid an uncessary update call + # Track whether we have changed the record to avoid an unnecessary update call changed <- FALSE # Set rights holder if needed @@ -380,16 +421,18 @@ set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "writ #' Check whether an object has public read access #' -#' Check whether DataOne objects have public read access set in their System Metadata. +#' Check whether objects have public read access. #' No token needs to be set to use this function. #' -#' @param mn (MNode) The Member Node to send the query to. -#' @param pids (character) The PID(s) to check for public read access. -#' @param use.names (logical) Optional. If set to `TRUE` (the deafult), pids will -#' be used as names for the result, unless pids has names already, in which case, +#' @param mn (MNode) The Member Node. +#' @param pids (character) The PIDs of the objects to check for public read access. +#' @param use.names (logical) Optional. If `TRUE` (the default), PIDs will +#' be used as names for the result unless PIDs have names already, in which case #' those names will be used for the result. #' -#' @return A vector of class logical. +#' @importFrom httr content +#' +#' @return (logical) Whether an object has public read access. #' @export #' #' @examples @@ -400,38 +443,35 @@ set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "writ #' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") #' is_public_read(mn, pids) #'} -is_public_read <- function(mn, pids, use.names=TRUE){ - +is_public_read <- function(mn, pids, use.names = TRUE) { if (!is(mn, "MNode")) { stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) } if (!all(is.character(pids), - all(nchar(pids) > 0))){ - stop("Argument 'pid' must be character class with non-zero number of characters.") + all(nchar(pids) > 0))) { + stop("Argument 'pids' must be character class with non-zero number of characters.") } - if(!is.logical(use.names)){ + if (!is.logical(use.names)) { stop(paste0("Argument 'use.names' must be logical class, but was a ", class(use.names), " instead.")) } - vapply(pids, USE.NAMES = use.names, FUN.VALUE = logical(1), FUN=function(pid){ + vapply(pids, USE.NAMES = use.names, FUN.VALUE = logical(1), FUN = function(pid) { - url <- paste(mn@endpoint, "meta", URLencode(pid, reserved=T), sep="/") - response <- dataone:::auth_get(url, node=mn) + url <- paste(mn@endpoint, "meta", utils::URLencode(pid, reserved = TRUE), sep = "/") + response <- dataone:::auth_get(url, node = mn) - if(response$status_code != "200") { + if (response$status_code != "200") { error_desc <- dataone:::getErrorDescription(response) - if(grepl("READ not allowed", error_desc, ignore.case = TRUE)){ + if (grepl("READ not allowed", error_desc, ignore.case = TRUE)) { return(FALSE) } else { stop(error_desc) } } - sysmeta <- datapack:::SystemMetadata(XML::xmlRoot(suppressMessages(XML::xmlParse((httr::content(response, as="text")))))) + sysmeta <- datapack:::SystemMetadata(XML::xmlRoot(suppressMessages(XML::xmlParse((httr::content(response, as = "text")))))) return(datapack::hasAccessRule(sysmeta, "public", "read")) - }) - } diff --git a/man/is_public_read.Rd b/man/is_public_read.Rd index 2d55a5a..fbee7b9 100644 --- a/man/is_public_read.Rd +++ b/man/is_public_read.Rd @@ -7,19 +7,19 @@ is_public_read(mn, pids, use.names = TRUE) } \arguments{ -\item{mn}{(MNode) The Member Node to send the query to.} +\item{mn}{(MNode) The Member Node.} -\item{pids}{(character) The PID(s) to check for public read access.} +\item{pids}{(character) The PIDs of the objects to check for public read access.} -\item{use.names}{(logical) Optional. If set to `TRUE` (the deafult), pids will -be used as names for the result, unless pids has names already, in which case, +\item{use.names}{(logical) Optional. If `TRUE` (the default), PIDs will +be used as names for the result unless PIDs have names already, in which case those names will be used for the result.} } \value{ -A vector of class logical. +(logical) Whether an object has public read access. } \description{ -Check whether DataOne objects have public read access set in their System Metadata. +Check whether objects have public read access. No token needs to be set to use this function. } \examples{ diff --git a/man/remove_public_read.Rd b/man/remove_public_read.Rd index 52045a8..b78480b 100644 --- a/man/remove_public_read.Rd +++ b/man/remove_public_read.Rd @@ -2,17 +2,17 @@ % Please edit documentation in R/access.R \name{remove_public_read} \alias{remove_public_read} -\title{Remove public access on a set of objects.} +\title{Remove public read access for an object} \usage{ remove_public_read(mn, pids) } \arguments{ -\item{mn}{(MNode)} +\item{mn}{(MNode) The Member Node.} -\item{pids}{(character) A vector of PIDs to set public access on} +\item{pids}{(character) The PIDs of the objects to remove public read access for.} } \description{ -Remove public access on a set of objects. +Remove public read access for an object. } \examples{ \dontrun{ diff --git a/man/set_access.Rd b/man/set_access.Rd index 421cd2b..c22db50 100644 --- a/man/set_access.Rd +++ b/man/set_access.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/access.R \name{set_access} \alias{set_access} -\title{Set the access policy for a set of objects.} +\title{Set the access policy for an object} \usage{ set_access(mn, pids, subjects, permissions = c("read", "write", "changePermission")) @@ -10,18 +10,20 @@ set_access(mn, pids, subjects, permissions = c("read", "write", \arguments{ \item{mn}{(MNode) The Member Node.} -\item{pids}{(character) The object(s) to set the permissions on.} +\item{pids}{(character) The PIDs of the objects to set permissions for.} -\item{subjects}{(character) The subject(s) to set permissions for.} +\item{subjects}{(character) The identifiers of the subjects to set permissions for, typially an ORCID or DN.} -\item{permissions}{(character) Optional. Vector of permissions.} +\item{permissions}{(character) Optional. The permissions to set. Defaults to +read, write, and changePermission.} } \value{ -(logical) Named +(logical) Whether an update was needed. } \description{ -For each permission, this function checks if the permission is already set -and moves on. System Metadata are only updated when a change was needed. +Set the access policy for the given subjects for the given objects on the given Member Node. +For each type of permission, this function checks if the permission is already set +and only updates the System Metadata when a change is needed. } \examples{ \dontrun{ diff --git a/man/set_public_read.Rd b/man/set_public_read.Rd index 5adbd8c..8cee05b 100644 --- a/man/set_public_read.Rd +++ b/man/set_public_read.Rd @@ -2,17 +2,20 @@ % Please edit documentation in R/access.R \name{set_public_read} \alias{set_public_read} -\title{Set public access on a set of objects.} +\title{Set public read access for an object} \usage{ set_public_read(mn, pids) } \arguments{ -\item{mn}{(MNode)} +\item{mn}{(MNode) The Member Node.} -\item{pids}{(character) A vector of PIDs to set public access on} +\item{pids}{(character) The PIDs of the objects to set public read access for.} +} +\value{ +(logical) Whether an update was needed. } \description{ -Set public access on a set of objects. +Set public read access for an object. } \examples{ \dontrun{ diff --git a/man/set_rights_and_access.Rd b/man/set_rights_and_access.Rd index 31028d0..8567b84 100644 --- a/man/set_rights_and_access.Rd +++ b/man/set_rights_and_access.Rd @@ -2,28 +2,28 @@ % Please edit documentation in R/access.R \name{set_rights_and_access} \alias{set_rights_and_access} -\title{Set the given subject as the rightsHolder and subject with write and -changePermission access for the given PID.} +\title{Set rights holder with access policy for an object} \usage{ set_rights_and_access(mn, pids, subject, permissions = c("read", "write", "changePermission")) } \arguments{ -\item{mn}{(MNode) The Member Node to send the query.} +\item{mn}{(MNode) The Member Node.} -\item{pids}{(character) The PID(s) to set the access rule for.} +\item{pids}{(character) The PIDs of the objects to set the rights holder and access policy for.} -\item{subject}{(character)The subject of the rule(s).} +\item{subject}{(character) The identifier of the new rights holder, typially an ORCID or DN.} -\item{permissions}{(character) The permissions for the rule. Defaults to +\item{permissions}{(character) Optional. The permissions to set. Defaults to read, write, and changePermission.} } \value{ -Whether an update was needed. +(logical) Whether an update was needed. } \description{ -This function only updates the existing System Metadata if a change is -needed. +Set the given subject as the rights holder and with given permissions +for the given objects. This function only updates the existing +System Metadata when a change is needed. } \examples{ \dontrun{ diff --git a/man/set_rights_holder.Rd b/man/set_rights_holder.Rd index fb0e480..d09a74d 100644 --- a/man/set_rights_holder.Rd +++ b/man/set_rights_holder.Rd @@ -2,25 +2,24 @@ % Please edit documentation in R/access.R \name{set_rights_holder} \alias{set_rights_holder} -\title{access.R} +\title{Set the rights holder for an object} \usage{ set_rights_holder(mn, pids, subject) } \arguments{ -\item{mn}{(MNode) The MNode instance to be changed.} +\item{mn}{(MNode) The Member Node.} -\item{pids}{(character) The identifiers for the object to be changed.} +\item{pids}{(character) The PIDs of the objects to set the rights holder for.} -\item{subject}{(character) The identifier of the new rightsHolder, often an ORCID or DN.} +\item{subject}{(character) The identifier of the new rights holder, typially an ORCID or DN.} } -\description{ -High-level utility functions for getting and setting access rules for DataONE -objects. -Set the rightsHolder field for a given PID. +\value{ +(logical) Whether an update was needed. } -\details{ -Update the rights holder to the provided subject for the object identified in -the provided system metadata document on the given Member Node. +\description{ +Set the rights holder to the given subject for the given objects on the +given Member Node. This function checks if the rights holder is already set +and only updates the System Metadata when a change is needed. } \examples{ \dontrun{ diff --git a/tests/testthat/test_access.R b/tests/testthat/test_access.R index 321143e..ed3d865 100644 --- a/tests/testthat/test_access.R +++ b/tests/testthat/test_access.R @@ -66,6 +66,18 @@ test_that("get_package works the same when given a metadata pid as it does when expect_equal(a, b) }) +test_that("access functions stop if system metadata is not found", { + expect_error(set_rights_holder(mn, "test", "http://orcid.org/0000-000X-XXXX-XXXX")) + + expect_error(set_access(mn, "test", "http://orcid.org/0000-000X-XXXX-XXXX")) + + expect_error(set_public_read(mn, "test")) + + expect_error(remove_public_read(mn, "test")) + + expect_error(set_rights_and_access(mn, "test", "http://orcid.org/0000-000X-XXXX-XXXX")) +}) + test_that("is_public_read returns true for public packages and false for private packages", { if (!is_token_set(mn)) { skip("No token set. Skipping test.") From d8293a524b044db9f99eeac55a9f533cdf295429 Mon Sep 17 00:00:00 2001 From: Derek Strong Date: Wed, 12 Sep 2018 17:36:04 -0700 Subject: [PATCH 162/318] Fix update_package_object error message and tempfile --- R/editing.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/editing.R b/R/editing.R index d7e55c5..f07d30f 100644 --- a/R/editing.R +++ b/R/editing.R @@ -835,7 +835,7 @@ update_physical <- function(eml, mn, data_pid, new_data_pid) { all_url <- unlist(EML::eml_get(eml, "url")) if (sum(stringr::str_detect(all_url, data_pid)) == 0) { - stop("The data PID does not match any physical sections, so the EML will not be updated.") + stop("The obsoleted data PID does not match any physical sections, so the EML will not be updated.") } dataTable_url <- unlist(EML::eml_get(eml@dataset@dataTable, "url")) @@ -897,6 +897,8 @@ update_physical <- function(eml, mn, data_pid, new_data_pid) { #' @import dataone #' @import EML #' +#' @export +#' #' @examples #' \dontrun{ #' cnTest <- dataone::CNode("STAGING") @@ -908,8 +910,6 @@ update_physical <- function(eml, mn, data_pid, new_data_pid) { #' update_package_object(mnTest, pkg$data[1], "new_file.csv", pkg$resource_map, format_id = "text/csv") #' file.remove("new_file.csv") #' } -#' -#' @export update_package_object <- function(mn, data_pid, new_data_path, @@ -940,12 +940,12 @@ update_package_object <- function(mn, data_pid = data_pid, new_data_pid = new_data_pid), error = function(e) { - print("The data PID does not match any physical sections, so the EML will not be updated.") + message("The obsoleted data PID does not match any physical sections, so the EML will not be updated.", + "\nCheck if the correct resource map PID was given.") return(eml) }) - eml_path <- "science-metadata.xml" - file.create(eml_path) + eml_path <- tempfile(fileext = ".xml") EML::write_eml(eml_new, eml_path) pkg_new <- publish_update(mn, From 86b506208920169c6df972c629ddd1bbcf84ea5f Mon Sep 17 00:00:00 2001 From: Jeanette Date: Thu, 13 Sep 2018 09:44:08 -0700 Subject: [PATCH 163/318] remove duplicate encoding line --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3054fd6..697d84c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,6 +40,5 @@ Suggests: rmarkdown, yaml, xslt -Encoding: UTF-8 RoxygenNote: 6.1.0 VignetteBuilder: knitr From ff023cb1cbc7caefc48ec03b09d5e4dbe27ddfb4 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Tue, 2 Oct 2018 13:07:49 -0700 Subject: [PATCH 164/318] fixed bug in pid_to_eml_physical --- R/eml.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/eml.R b/R/eml.R index bb13e95..cd31040 100644 --- a/R/eml.R +++ b/R/eml.R @@ -41,7 +41,6 @@ pid_to_eml_entity <- function(mn, "otherEntity")) systmeta <- getSystemMetadata(mn, pid) - physical <- sysmeta_to_eml_physical(systmeta) # Create entity entity <- new(entityType, @@ -128,6 +127,7 @@ pid_to_eml_physical <- function(mn, pids) { stopifnot(is(mn, "MNode")) stopifnot(is.character(pids), all(nchar(pids)) > 0) + names(pids) <- '' # Named inputs produce a named output list - which is invalid in EML sysmeta <- lapply(pids, function(pid) { getSystemMetadata(mn, pid) }) sysmeta_to_eml_physical(sysmeta) @@ -995,6 +995,8 @@ eml_add_entities <- function(doc, #' #' @author Dominic Mullen dmullen17@@gmail.com #' +#' @importFrom magrittr '%>%' +#' #' @export #' #' @examples From 38d09b269907a14e2e8712f1c43618647cabc7b8 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Wed, 3 Oct 2018 14:48:26 -0700 Subject: [PATCH 165/318] eml_set_shared_attributes --- R/eml.R | 90 +++++++++++++++++++++++++++++++++++++++ tests/testthat/test_eml.R | 35 +++++++++++++++ 2 files changed, 125 insertions(+) diff --git a/R/eml.R b/R/eml.R index cd31040..874192e 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1124,3 +1124,93 @@ which_in_eml <- function(eml_list, element, test) { return(location) } + + +#' Set a reference to an EML object +#' +#' This function creates a new object with the same class as \code{element_to_replace} +#' using a reference to \code{element_to_reference} +#' +#' @param element_to_reference (S4) An EML object to reference +#' @param element_to_replace (S4) An EML object to replace with a reference +#' +#' @author Dominic Mullen dmullen17@@gmail.com +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' +#' cn <- dataone::CNode('PROD') +#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +#' eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) +#' +#' # Set the first contact as a reference to the first creator +#' eml@@dataset@@contact[[1]] <- eml_set_reference(eml@@dataset@@creator[[1]], +#' eml@@dataset@@contact[[1]]) +#' +#' # This is also useful when we want to set references to a subset of 'dataTable' or 'otherEntity' objects +#' +#' +#' } +eml_set_reference <- function(element_to_reference, element_to_replace) { + if (length(element_to_reference@id) == 0) { + stop('No id detected at element_to_reference@id. Please add an id in order to use references.') + } + id <- element_to_reference@id + class <- class(element_to_replace)[1] + element_to_replace <- new(class, reference = id) + return(element_to_replace) +} + + +#' Set shared attribute references +#' +#' This function sets shared attributes using the attributes of the first \code{type} +#' selected and creates references for all remaining objects of equivalent \code{type}. +#' +#' @param eml (S4) An EML S4 object +#' @param attributeList (S4) Optional. An EML attributeList object. If not provided then it will default to the attributeList of the first \code{type} element +#' @param type (character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' attributeList objects with references. Defaults to 'dataTable' +#' +#' @author Dominic Mullen dmullen17@@gmail.com +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' +#' cn <- dataone::CNode('PROD') +#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +#' eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) +#' atts <- EML::set_attributes(EML::get_attributes(eml@@dataset@@dataTable[[1]]@@attributeList)$attributes) +#' +#' eml <- eml_set_shared_attributes(eml, atts, type = 'dataTable') +#' +#' } +eml_set_shared_attributes <- function(eml, attributeList = NULL, type = 'dataTable') { + stopifnot(methods::is(eml, 'eml')) + if(!is.null(attributeList)) { + stopifnot(methods::is(attributeList, 'attributeList')) + } + stopifnot(type %in% c('dataTable', 'otherEntity')) + + x <- slot(eml@dataset, type) + n <- length(x) + if (n <= 1) { + stop('1 or fewer entities') # add message + } + + # If a new attributeList is provided set it + if (!is.null(attributeList)) { + x[[1]]@attributeList <- attributeList + } + x[[1]]@id <- new('xml_attribute', uuid::UUIDgenerate(TRUE)) + # Apply references to all other elements + for (i in 2:n) { + x[[i]] <- eml_set_reference(x[[1]], x[[i]]) + } + + slot(eml@dataset, type) <- x + return(eml) +} diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 06e39b8..635c9f3 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -265,3 +265,38 @@ test_that("which_in_eml Returns correct locations", { expect_error(which_in_eml(eml@dataset@dataTable, "attributeName", "length_3")) expect_equal(c(1,3), which_in_eml(eml@dataset@dataTable[[1]]@attributeList@attribute, "attributeName", function(x) {grepl("^length", x)})) }) + +test_that('eml_set_reference sets a reference', { + eml_path <- file.path(system.file(package = "arcticdatautils"), "example-eml.xml") + doc <- EML::read_eml(eml_path) + + expect_error(eml_set_reference(doc@dataset@creator[[1]], doc@dataset@contact[[1]])) + + # Add id to use references + doc@dataset@creator[[1]]@id <- new('xml_attribute', 'creator_id') + doc@dataset@contact[[1]] <- eml_set_reference(doc@dataset@creator[[1]], doc@dataset@contact[[1]]) + + expect_equal(doc@dataset@creator[[1]]@id[1], doc@dataset@contact[[1]]@references[1]) +}) + +test_that('eml_set_shared_attributes creates shared attribute references', { + eml_path <- file.path(system.file(package = 'arcticdatautils'), 'example-eml.xml') + doc <- EML::read_eml(eml_path) + + attributes <- data.frame(attributeName = 'length_1', attributeDefinition = 'def1', + formatString = NA, measurementScale = 'ratio', domain = 'numericDomain', + definition = NA, unit = 'meter', numberType = 'real', + stringsAsFactors = FALSE) + attributeList <- EML::set_attributes(attributes) + + dataTable_1 <- new('dataTable', + entityName = '2016_data.csv', + entityDescription = '2016 data', + attributeList = attributeList) + dataTable_2 <- dataTable_1 + doc@dataset@dataTable <- c(dataTable_1, dataTable_2) + + doc <- eml_set_shared_attributes(doc) + + expect_equal(doc@dataset@dataTable[[1]]@id[1], doc@dataset@dataTable[[2]]@references[1]) +}) From 278327ec0a83764a7fe6bd2477955a8554d011e0 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Wed, 3 Oct 2018 15:11:15 -0700 Subject: [PATCH 166/318] fixed incomplete example --- R/eml.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/eml.R b/R/eml.R index 874192e..95ed7c7 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1150,8 +1150,16 @@ which_in_eml <- function(eml_list, element, test) { #' eml@@dataset@@contact[[1]]) #' #' # This is also useful when we want to set references to a subset of 'dataTable' or 'otherEntity' objects -#' -#' +#' # Add a few more objects to illustrate the use +#' eml@@dataset@@dataTable[[3]] <- eml@@dataset@@dataTable[[1]] +#' eml@@dataset@@dataTable[[4]] <- eml@@dataset@@dataTable[[1]] +#' # Add references to the second and third elements +#' for (i in 2:3) { +#' eml@@dataset@@dataTable[[i]] <- eml_set_reference(eml@@dataset@@dataTable[[1]], +#' eml@@dataset@@dataTable[[1]]) +#' } +#' # If we print the entire 'dataTable' list we see elements 2 and 3 have references while 4 does not. +#' eml@@dataset@@dataTable #' } eml_set_reference <- function(element_to_reference, element_to_replace) { if (length(element_to_reference@id) == 0) { From 82e94c65e1b0d311fd4635550655fc31da15fa66 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Thu, 4 Oct 2018 11:30:24 -0700 Subject: [PATCH 167/318] fixed xml_attribute conversion bug --- R/eml.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/eml.R b/R/eml.R index 95ed7c7..818b722 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1165,7 +1165,7 @@ eml_set_reference <- function(element_to_reference, element_to_replace) { if (length(element_to_reference@id) == 0) { stop('No id detected at element_to_reference@id. Please add an id in order to use references.') } - id <- element_to_reference@id + id <- element_to_reference@id[1] class <- class(element_to_replace)[1] element_to_replace <- new(class, reference = id) return(element_to_replace) From 60ceb11c69a8366885b0e61cdba2a88405019a74 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Mon, 8 Oct 2018 11:43:58 -0700 Subject: [PATCH 168/318] updated default rows arg in get_package --- R/util.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/util.R b/R/util.R index 34b0c48..29018da 100644 --- a/R/util.R +++ b/R/util.R @@ -785,7 +785,7 @@ get_all_versions <- function(node, pid) { #' @param pid (character) The the resource map PID of the package. #' @param file_names (logical) Whether to return file names for all objects. #' @param rows (numeric) The number of rows to return in the query. This is only -#' useful to set if you are warned about the result set being truncated. +#' useful to set if you are warned about the result set being truncated. Defaults to 5000. #' #' @return (list) A structured list of the members of the package. #' @export @@ -799,7 +799,7 @@ get_all_versions <- function(node, pid) { #' #' ids <- get_package(mn, pid) #' } -get_package <- function(node, pid, file_names=FALSE, rows=1000) { +get_package <- function(node, pid, file_names=FALSE, rows=5000) { stopifnot(is(node, "MNode") || is(node, "CNode")) stopifnot(is.character(pid), nchar(pid) > 0) @@ -840,10 +840,10 @@ get_package <- function(node, pid, file_names=FALSE, rows=1000) { #' @param pid (character) The the metadata PID of the package. #' @param file_names (logical) Whether to return file names for all objects. #' @param rows (numeric) The number of rows to return in the query. This is only -#' useful to set if you are warned about the result set being truncated. +#' useful to set if you are warned about the result set being truncated. Defaults to 5000. #' -get_package_direct <- function(node, pid, file_names=FALSE, rows = 1000) { +get_package_direct <- function(node, pid, file_names=FALSE, rows = 5000) { stopifnot(is(node, "MNode") || is(node, "CNode")) stopifnot(is.character(pid), nchar(pid) > 0) From 811a6529b0ee276304c5f0d183e5d3adc473e3c4 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Mon, 8 Oct 2018 13:42:29 -0700 Subject: [PATCH 169/318] updated docs --- NAMESPACE | 3 +++ man/eml_set_reference.Rd | 44 ++++++++++++++++++++++++++++++++ man/eml_set_shared_attributes.Rd | 35 +++++++++++++++++++++++++ man/get_package.Rd | 4 +-- man/get_package_direct.Rd | 4 +-- man/update_package_object.Rd | 1 - 6 files changed, 86 insertions(+), 5 deletions(-) create mode 100644 man/eml_set_reference.Rd create mode 100644 man/eml_set_shared_attributes.Rd diff --git a/NAMESPACE b/NAMESPACE index 3135fe0..36ede20 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,8 @@ export(eml_otherEntity_to_dataTable) export(eml_party) export(eml_personnel) export(eml_project) +export(eml_set_reference) +export(eml_set_shared_attributes) export(eml_validate_attributes) export(env_get) export(find_newest_object) @@ -66,6 +68,7 @@ import(XML) import(dataone) import(datapack) importFrom(httr,content) +importFrom(magrittr,'%>%') importFrom(methods,"slot<-") importFrom(methods,as) importFrom(methods,is) diff --git a/man/eml_set_reference.Rd b/man/eml_set_reference.Rd new file mode 100644 index 0000000..2d7fdf8 --- /dev/null +++ b/man/eml_set_reference.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_set_reference} +\alias{eml_set_reference} +\title{Set a reference to an EML object} +\usage{ +eml_set_reference(element_to_reference, element_to_replace) +} +\arguments{ +\item{element_to_reference}{(S4) An EML object to reference} + +\item{element_to_replace}{(S4) An EML object to replace with a reference} +} +\description{ +This function creates a new object with the same class as \code{element_to_replace} +using a reference to \code{element_to_reference} +} +\examples{ +\dontrun{ + +cn <- dataone::CNode('PROD') +adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) + +# Set the first contact as a reference to the first creator +eml@dataset@contact[[1]] <- eml_set_reference(eml@dataset@creator[[1]], +eml@dataset@contact[[1]]) + +# This is also useful when we want to set references to a subset of 'dataTable' or 'otherEntity' objects +# Add a few more objects to illustrate the use +eml@dataset@dataTable[[3]] <- eml@dataset@dataTable[[1]] +eml@dataset@dataTable[[4]] <- eml@dataset@dataTable[[1]] +# Add references to the second and third elements +for (i in 2:3) { + eml@dataset@dataTable[[i]] <- eml_set_reference(eml@dataset@dataTable[[1]], + eml@dataset@dataTable[[1]]) +} +# If we print the entire 'dataTable' list we see elements 2 and 3 have references while 4 does not. +eml@dataset@dataTable +} +} +\author{ +Dominic Mullen dmullen17@gmail.com +} diff --git a/man/eml_set_shared_attributes.Rd b/man/eml_set_shared_attributes.Rd new file mode 100644 index 0000000..c3d6bf6 --- /dev/null +++ b/man/eml_set_shared_attributes.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_set_shared_attributes} +\alias{eml_set_shared_attributes} +\title{Set shared attribute references} +\usage{ +eml_set_shared_attributes(eml, attributeList = NULL, + type = "dataTable") +} +\arguments{ +\item{eml}{(S4) An EML S4 object} + +\item{attributeList}{(S4) Optional. An EML attributeList object. If not provided then it will default to the attributeList of the first \code{type} element} + +\item{type}{(character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' attributeList objects with references. Defaults to 'dataTable'} +} +\description{ +This function sets shared attributes using the attributes of the first \code{type} +selected and creates references for all remaining objects of equivalent \code{type}. +} +\examples{ +\dontrun{ + +cn <- dataone::CNode('PROD') +adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) +atts <- EML::set_attributes(EML::get_attributes(eml@dataset@dataTable[[1]]@attributeList)$attributes) + +eml <- eml_set_shared_attributes(eml, atts, type = 'dataTable') + +} +} +\author{ +Dominic Mullen dmullen17@gmail.com +} diff --git a/man/get_package.Rd b/man/get_package.Rd index f77090f..ce7bb8c 100644 --- a/man/get_package.Rd +++ b/man/get_package.Rd @@ -4,7 +4,7 @@ \alias{get_package} \title{Get a structured list of PIDs for the objects in a package.} \usage{ -get_package(node, pid, file_names = FALSE, rows = 1000) +get_package(node, pid, file_names = FALSE, rows = 5000) } \arguments{ \item{node}{(MNode|CNode) The Coordinating/Member Node to run the query on.} @@ -14,7 +14,7 @@ get_package(node, pid, file_names = FALSE, rows = 1000) \item{file_names}{(logical) Whether to return file names for all objects.} \item{rows}{(numeric) The number of rows to return in the query. This is only -useful to set if you are warned about the result set being truncated.} +useful to set if you are warned about the result set being truncated. Defaults to 5000.} } \value{ (list) A structured list of the members of the package. diff --git a/man/get_package_direct.Rd b/man/get_package_direct.Rd index e3beebf..639cd9e 100644 --- a/man/get_package_direct.Rd +++ b/man/get_package_direct.Rd @@ -4,7 +4,7 @@ \alias{get_package_direct} \title{Get a structured list of PIDs for the objects in a package.} \usage{ -get_package_direct(node, pid, file_names = FALSE, rows = 1000) +get_package_direct(node, pid, file_names = FALSE, rows = 5000) } \arguments{ \item{node}{(MNode|CNode) The Coordinating/Member Node to run the query on.} @@ -14,7 +14,7 @@ get_package_direct(node, pid, file_names = FALSE, rows = 1000) \item{file_names}{(logical) Whether to return file names for all objects.} \item{rows}{(numeric) The number of rows to return in the query. This is only -useful to set if you are warned about the result set being truncated.} +useful to set if you are warned about the result set being truncated. Defaults to 5000.} } \description{ Get a structured list of PIDs for the objects in a package. diff --git a/man/update_package_object.Rd b/man/update_package_object.Rd index 16d7883..0668b9f 100644 --- a/man/update_package_object.Rd +++ b/man/update_package_object.Rd @@ -52,7 +52,6 @@ file.create("new_file.csv") update_package_object(mnTest, pkg$data[1], "new_file.csv", pkg$resource_map, format_id = "text/csv") file.remove("new_file.csv") } - } \keyword{publish_update} \keyword{update_object} From ed35fe45b3210b9a43aa5b54c2e752b42dab4e95 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Tue, 9 Oct 2018 13:49:02 -0700 Subject: [PATCH 170/318] updated eml_party argsto accept multiple inputs --- R/eml.R | 37 +++++++++++++++++++++++-------------- man/eml_contact.Rd | 3 +++ man/eml_creator.Rd | 3 +++ man/eml_party.Rd | 2 ++ man/eml_set_reference.Rd | 11 ++++++----- tests/testthat/test_eml.R | 12 ++++++++++++ 6 files changed, 49 insertions(+), 19 deletions(-) diff --git a/R/eml.R b/R/eml.R index 818b722..d943008 100644 --- a/R/eml.R +++ b/R/eml.R @@ -345,6 +345,8 @@ clear_methods <- function(doc) { #' @examples #' eml_party("creator", "Test", "User") #' eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") +#' eml_party("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), +#' c("Data Scientist", "Programmer")) eml_party <- function(type="associatedParty", given_names=NULL, sur_name=NULL, @@ -369,12 +371,12 @@ eml_party <- function(type="associatedParty", # Organization Name if (!is.null(organization)) { - party@organizationName <- c(new("organizationName", .Data = organization)) + party@organizationName <- new('ListOforganizationName', lapply(organization, function(x) {new('organizationName', .Data = x)})) } # Position if (!is.null(position)) { - party@positionName <- c(new("positionName", .Data = position)) + party@positionName <- new('ListOfpositionName', lapply(position, function(x) {new('positionName', .Data = x)})) } # Email @@ -446,7 +448,10 @@ eml_party <- function(type="associatedParty", #' @export #' #' @examples -#' eml_creator("test", "user", email = "test@user.com") +#' eml_creator("test", "user", email = "test@@user.com") +#' eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") +#' eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), +#' c("Data Scientist", "Programmer")) eml_creator <- function(...) { eml_party("creator", ...) } @@ -461,7 +466,10 @@ eml_creator <- function(...) { #' @export #' #' @examples -#' eml_contact("test", "user", email = "test@user.com") +#' eml_contact("test", "user", email = "test@@user.com") +#' eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") +#' eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), +#' c("Data Scientist", "Programmer")) eml_contact <- function(...) { eml_party("contact", ...) } @@ -477,7 +485,7 @@ eml_contact <- function(...) { #' @export #' #' @examples -#' eml_metadata_provider("test", "user", email = "test@user.com") +#' eml_metadata_provider("test", "user", email = "test@@user.com") eml_metadata_provider <- function(...) { eml_party("metadataProvider", ...) } @@ -492,7 +500,7 @@ eml_metadata_provider <- function(...) { #' @export #' #' @examples -#' eml_associated_party("test", "user", email = "test@user.com", role = "Principal Investigator") +#' eml_associated_party("test", "user", email = "test@@user.com", role = "Principal Investigator") eml_associated_party <- function(...) { eml_party("associatedParty", ...) } @@ -507,7 +515,7 @@ eml_associated_party <- function(...) { #' @export #' #' @examples -#' eml_personnel("test", "user", email = "test@user.com", role = "principalInvestigator") +#' eml_personnel("test", "user", email = "test@@user.com", role = "principalInvestigator") eml_personnel <- function(role = NULL, ...) { if(is.null(role)) { stop(call. = FALSE, @@ -1149,14 +1157,15 @@ which_in_eml <- function(eml_list, element, test) { #' eml@@dataset@@contact[[1]] <- eml_set_reference(eml@@dataset@@creator[[1]], #' eml@@dataset@@contact[[1]]) #' -#' # This is also useful when we want to set references to a subset of 'dataTable' or 'otherEntity' objects -#' # Add a few more objects to illustrate the use +#' # This is also useful when we want to set references to a subset of 'dataTable' +#' or 'otherEntity' objects +#' # Add a few more objects first to illustrate the use: #' eml@@dataset@@dataTable[[3]] <- eml@@dataset@@dataTable[[1]] #' eml@@dataset@@dataTable[[4]] <- eml@@dataset@@dataTable[[1]] -#' # Add references to the second and third elements +#' # Add references to the second and third elements only (not the 4th): #' for (i in 2:3) { -#' eml@@dataset@@dataTable[[i]] <- eml_set_reference(eml@@dataset@@dataTable[[1]], -#' eml@@dataset@@dataTable[[1]]) +#' eml@@dataset@@dataTable[[i]]@@attributeList <- eml_set_reference(eml@@dataset@@dataTable[[1]]@@attributeList, +#' eml@@dataset@@dataTable[[i]]@@attributeList) #' } #' # If we print the entire 'dataTable' list we see elements 2 and 3 have references while 4 does not. #' eml@@dataset@@dataTable @@ -1213,10 +1222,10 @@ eml_set_shared_attributes <- function(eml, attributeList = NULL, type = 'dataTab if (!is.null(attributeList)) { x[[1]]@attributeList <- attributeList } - x[[1]]@id <- new('xml_attribute', uuid::UUIDgenerate(TRUE)) + x[[1]]@attributeList@id <- new('xml_attribute', uuid::UUIDgenerate(TRUE)) # Apply references to all other elements for (i in 2:n) { - x[[i]] <- eml_set_reference(x[[1]], x[[i]]) + x[[i]]@attributeList <- eml_set_reference(x[[1]]@attributeList, x[[i]]@attributeList) } slot(eml@dataset, type) <- x diff --git a/man/eml_contact.Rd b/man/eml_contact.Rd index 7123874..e0c6a70 100644 --- a/man/eml_contact.Rd +++ b/man/eml_contact.Rd @@ -17,4 +17,7 @@ See \code{\link{eml_party}} for details. } \examples{ eml_contact("test", "user", email = "test@user.com") +eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") +eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), + c("Data Scientist", "Programmer")) } diff --git a/man/eml_creator.Rd b/man/eml_creator.Rd index bcde548..6fa9701 100644 --- a/man/eml_creator.Rd +++ b/man/eml_creator.Rd @@ -17,4 +17,7 @@ See \code{\link{eml_party}} for details. } \examples{ eml_creator("test", "user", email = "test@user.com") +eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") +eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), + c("Data Scientist", "Programmer")) } diff --git a/man/eml_party.Rd b/man/eml_party.Rd index f76b522..ab6369d 100644 --- a/man/eml_party.Rd +++ b/man/eml_party.Rd @@ -44,4 +44,6 @@ The \code{userId} argument assumes an ORCID so be sure to adjust for that. \examples{ eml_party("creator", "Test", "User") eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") +eml_party("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), + c("Data Scientist", "Programmer")) } diff --git a/man/eml_set_reference.Rd b/man/eml_set_reference.Rd index 2d7fdf8..35e0989 100644 --- a/man/eml_set_reference.Rd +++ b/man/eml_set_reference.Rd @@ -26,14 +26,15 @@ eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) eml@dataset@contact[[1]] <- eml_set_reference(eml@dataset@creator[[1]], eml@dataset@contact[[1]]) -# This is also useful when we want to set references to a subset of 'dataTable' or 'otherEntity' objects -# Add a few more objects to illustrate the use +# This is also useful when we want to set references to a subset of 'dataTable' + or 'otherEntity' objects +# Add a few more objects first to illustrate the use: eml@dataset@dataTable[[3]] <- eml@dataset@dataTable[[1]] eml@dataset@dataTable[[4]] <- eml@dataset@dataTable[[1]] -# Add references to the second and third elements +# Add references to the second and third elements only (not the 4th): for (i in 2:3) { - eml@dataset@dataTable[[i]] <- eml_set_reference(eml@dataset@dataTable[[1]], - eml@dataset@dataTable[[1]]) + eml@dataset@dataTable[[i]]@attributeList <- eml_set_reference(eml@dataset@dataTable[[1]]@attributeList, + eml@dataset@dataTable[[i]]@attributeList) } # If we print the entire 'dataTable' list we see elements 2 and 3 have references while 4 does not. eml@dataset@dataTable diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 635c9f3..1530520 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -277,6 +277,7 @@ test_that('eml_set_reference sets a reference', { doc@dataset@contact[[1]] <- eml_set_reference(doc@dataset@creator[[1]], doc@dataset@contact[[1]]) expect_equal(doc@dataset@creator[[1]]@id[1], doc@dataset@contact[[1]]@references[1]) + expect_true(EML::eml_validate(doc)) }) test_that('eml_set_shared_attributes creates shared attribute references', { @@ -299,4 +300,15 @@ test_that('eml_set_shared_attributes creates shared attribute references', { doc <- eml_set_shared_attributes(doc) expect_equal(doc@dataset@dataTable[[1]]@id[1], doc@dataset@dataTable[[2]]@references[1]) + expect_true(EML::eml_validate(doc)) +}) + +test_that('eml_party creates multiple giveName, organizationName, and positionName fields', { + creator <- eml_party('creator', c('John', 'and Jack'), 'Smith', c('NCEAS', 'UCSB'), + c('Programmers', 'brothers')) + + expect_is(creator, "creator") + expect_equal(unlist(EML::eml_get(creator, 'givenName')), c('John', 'and Jack')) + expect_equal(unlist(EML::eml_get(creator, 'organizationName')), c('NCEAS', 'UCSB')) + expect_equal(unlist(EML::eml_get(creator, 'positionName')), c('Programmers', 'brothers')) }) From 0c2975331d9420b68ef5e62eda684057f1505049 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Tue, 9 Oct 2018 14:17:45 -0700 Subject: [PATCH 171/318] travis fix - added dontrun wrapper to examples --- R/eml.R | 6 ++++++ man/eml_contact.Rd | 2 ++ man/eml_creator.Rd | 2 ++ man/eml_party.Rd | 2 ++ 4 files changed, 12 insertions(+) diff --git a/R/eml.R b/R/eml.R index d943008..fa1d7a6 100644 --- a/R/eml.R +++ b/R/eml.R @@ -343,10 +343,12 @@ clear_methods <- function(doc) { #' @export #' #' @examples +#' \dontrun{ #' eml_party("creator", "Test", "User") #' eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") #' eml_party("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), #' c("Data Scientist", "Programmer")) +#'} eml_party <- function(type="associatedParty", given_names=NULL, sur_name=NULL, @@ -448,10 +450,12 @@ eml_party <- function(type="associatedParty", #' @export #' #' @examples +#' \dontrun{ #' eml_creator("test", "user", email = "test@@user.com") #' eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") #' eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), #' c("Data Scientist", "Programmer")) +#'} eml_creator <- function(...) { eml_party("creator", ...) } @@ -466,10 +470,12 @@ eml_creator <- function(...) { #' @export #' #' @examples +#' \dontrun{ #' eml_contact("test", "user", email = "test@@user.com") #' eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") #' eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), #' c("Data Scientist", "Programmer")) +#'} eml_contact <- function(...) { eml_party("contact", ...) } diff --git a/man/eml_contact.Rd b/man/eml_contact.Rd index e0c6a70..2a88d60 100644 --- a/man/eml_contact.Rd +++ b/man/eml_contact.Rd @@ -16,8 +16,10 @@ eml_contact(...) See \code{\link{eml_party}} for details. } \examples{ +\dontrun{ eml_contact("test", "user", email = "test@user.com") eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), c("Data Scientist", "Programmer")) } +} diff --git a/man/eml_creator.Rd b/man/eml_creator.Rd index 6fa9701..5280013 100644 --- a/man/eml_creator.Rd +++ b/man/eml_creator.Rd @@ -16,8 +16,10 @@ eml_creator(...) See \code{\link{eml_party}} for details. } \examples{ +\dontrun{ eml_creator("test", "user", email = "test@user.com") eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), c("Data Scientist", "Programmer")) } +} diff --git a/man/eml_party.Rd b/man/eml_party.Rd index ab6369d..ba02a50 100644 --- a/man/eml_party.Rd +++ b/man/eml_party.Rd @@ -42,8 +42,10 @@ fine. The \code{userId} argument assumes an ORCID so be sure to adjust for that. } \examples{ +\dontrun{ eml_party("creator", "Test", "User") eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") eml_party("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), c("Data Scientist", "Programmer")) } +} From eb4ffe3a6a895e0ee3c78538422bb8f1dd801a19 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Fri, 2 Nov 2018 16:00:23 -0700 Subject: [PATCH 172/318] list_submissions function --- NAMESPACE | 1 + R/helpers.R | 88 +++++++++++++++++++++++++++++++++++++++++ man/get_orcid_name.Rd | 22 +++++++++++ man/list_submissions.Rd | 34 ++++++++++++++++ 4 files changed, 145 insertions(+) create mode 100644 man/get_orcid_name.Rd create mode 100644 man/list_submissions.Rd diff --git a/NAMESPACE b/NAMESPACE index 82c402f..0123e04 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,6 +42,7 @@ export(is_obsolete) export(is_public_read) export(is_token_expired) export(is_token_set) +export(list_submissions) export(mdq_run) export(new_uuid) export(object_exists) diff --git a/R/helpers.R b/R/helpers.R index 3f53f86..65da411 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -554,3 +554,91 @@ get_all_sysmeta <- function(mn, resource_map_pid, nmax = 1000, child_packages = return(all) } + +#' Retrieve a name from an orcid ID URL +#' +#' Retrieve first and last name from an orcid ID URL by scraping the page. +#' +#' @param orcid_url (character) A valid orcid ID URL address +#' +#' @return character +#' +#' @examples +#' \dontrun{ +#' pi_name <- get_orcid_name('https://orcid.org/0000-0002-2561-5840') +#' } +get_orcid_name <- function(orcid_url) { + req <- httr::GET(orcid_url) + if(req$status_code != 200) { + stop('Failed to read in ', orcid_url) + } + + name <- httr::content(req, "text") %>% + stringr::str_extract(".*<") %>% + stringr::str_split(" ") %>% + unlist() %>% + stringr::str_remove("<title>") + + return(paste(name[1], name[2])) +} + +#' List recent submissions to a DataOne Member Node +#' +#' List recent submissions to a DataOne Member Node from all submitters not present +#' in the administrator whitelist: https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org +#' +#' @param mn (MNode) A DataOne Member Node +#' @param from (character) the date at which the query begins in 'YYYY/MM/DD' format +#' @param to (character) the date at which the query ends in 'YYYY/MM/DD' format +#' @param formatType (character) the format of objects to query. Must be one of: RESOURCE, METADATA, DATA, or *. +#' +#' @export +#' +#' @author Dominic Mullen dmullen17@@gmail.com +#' +#' @examples +#' \dontrun{ +#' library(lubridate) +#' cn <- dataone::CNode('PROD') +#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +#' # Return all submitted objects in the past month for the 'adc' node: +#' View(list_submissions(adc, Sys.Date() %m+% months(-1), Sys.Date(), '*')) +#' } +list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType = '*') { + if (!is_token_set(mn)) { + stop('No token set') + } + stopifnot(methods::is(mn, 'MNode')) + if (!(is.Date(as.Date(from, '%Y/%M/%D')))){ + stop('"from" argument must be in YYYY/MM/DD format') + } + if (!(is.Date(as.Date(to, '%Y/%M/%D')))){ + stop('"to" argument must be in YYYY/MM/DD format') + } + if (!(formatType %in% c('RESOURCE', 'METADATA', 'DATA', '*'))) { + stop('formatType must be one of: RESOURCE, METADATA, DATA, or *') + } + + q = sprintf('dateUploaded:["%sT00:00:00Z" TO "%sT00:00:00Z"] AND formatType:%s', from, to, formatType) + results <- dataone::query(mn, list(q = q, + fl = "identifier AND submitter AND dateUploaded AND formatType AND fileName", + rows = 10000), + as = "data.frame") + + whitelist <- httr::GET("https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org") + if(whitelist$status_code != 200) { + warning('Failed to read in the whitelist: https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org of orcid Ids. Results will include admin submissions / edits.') + } + whitelist <- httr::content(whitelist, "text") + + # Remove rows from admin whitelist members + results <- results[-which(stringr::str_detect(whitelist, results$submitter)),] + + # Get orcid names + results$submitter_name <- purrr::map(results$submitter, get_orcid_name) + + # Arrange by dateUploaded + results <- dplyr::arrange(results, dateUploaded) + + return(results) +} diff --git a/man/get_orcid_name.Rd b/man/get_orcid_name.Rd new file mode 100644 index 0000000..3393bce --- /dev/null +++ b/man/get_orcid_name.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{get_orcid_name} +\alias{get_orcid_name} +\title{Retrieve a name from an orcid ID URL} +\usage{ +get_orcid_name(orcid_url) +} +\arguments{ +\item{orcid_url}{(character) A valid orcid ID URL address} +} +\value{ +character +} +\description{ +Retrieve first and last name from an orcid ID URL by scraping the page. +} +\examples{ +\dontrun{ +pi_name <- get_orcid_name('https://orcid.org/0000-0002-2561-5840') +} +} diff --git a/man/list_submissions.Rd b/man/list_submissions.Rd new file mode 100644 index 0000000..d03646f --- /dev/null +++ b/man/list_submissions.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{list_submissions} +\alias{list_submissions} +\title{List recent submissions to a DataOne Member Node} +\usage{ +list_submissions(mn, from = Sys.Date(), to = Sys.Date(), + formatType = "*") +} +\arguments{ +\item{mn}{(MNode) A DataOne Member Node} + +\item{from}{(character) the date at which the query begins in 'YYYY/MM/DD' format} + +\item{to}{(character) the date at which the query ends in 'YYYY/MM/DD' format} + +\item{formatType}{(character) the format of objects to query. Must be one of: RESOURCE, METADATA, DATA, or *.} +} +\description{ +List recent submissions to a DataOne Member Node from all submitters not present +in the administrator whitelist: https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org +} +\examples{ +\dontrun{ +library(lubridate) +cn <- dataone::CNode('PROD') +adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +# Return all submitted objects in the past month for the 'adc' node: +View(list_submissions(adc, Sys.Date() \%m+\% months(-1), Sys.Date(), '*')) +} +} +\author{ +Dominic Mullen dmullen17@gmail.com +} From 506f344291eaa543b819698b491db3e25e5f4494 Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Mon, 5 Nov 2018 11:13:01 -0800 Subject: [PATCH 173/318] list_submissions travis updates --- DESCRIPTION | 9 +++++--- R/helpers.R | 48 ++++++++++++++++++++++++++++------------- man/list_submissions.Rd | 18 ++++++++++++---- 3 files changed, 53 insertions(+), 22 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 697d84c..e7f61b1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,12 +33,15 @@ Encoding: UTF-8 LazyData: true Suggests: dplyr, - testthat, humaniformat, knitr, + lubridate, ncdf4, + RCurl, + purrr, rmarkdown, - yaml, - xslt + testthat, + xslt, + yaml RoxygenNote: 6.1.0 VignetteBuilder: knitr diff --git a/R/helpers.R b/R/helpers.R index 65da411..c712c6f 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -588,9 +588,12 @@ get_orcid_name <- function(orcid_url) { #' in the administrator whitelist: https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org #' #' @param mn (MNode) A DataOne Member Node -#' @param from (character) the date at which the query begins in 'YYYY/MM/DD' format -#' @param to (character) the date at which the query ends in 'YYYY/MM/DD' format +#' @param from (character) the date at which the query begins in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()} +#' @param to (character) the date at which the query ends in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()} #' @param formatType (character) the format of objects to query. Must be one of: RESOURCE, METADATA, DATA, or *. +#' @param whitelist (character) A list of admin orcid Identifiers. Can be a URL or a +#' character vector of length 1. +#' Defaults to https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org #' #' @export #' @@ -598,43 +601,58 @@ get_orcid_name <- function(orcid_url) { #' #' @examples #' \dontrun{ -#' library(lubridate) #' cn <- dataone::CNode('PROD') #' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +#' #' # Return all submitted objects in the past month for the 'adc' node: #' View(list_submissions(adc, Sys.Date() %m+% months(-1), Sys.Date(), '*')) +#' +#' # Return all submitted objects except for one user +#' View(list_submissions(adc, Sys.Date() %m+% months(-1), Sys.Date(), '*'), +#' whitelist = 'http://orcid.org/0000-0002-2561-5840') +#' #' } -list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType = '*') { +list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType = '*', + whitelist = 'https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org') { + if (!requireNamespace('lubridate', 'purrr', 'RCurl')) { + stop(call. = FALSE, + 'The packages "lubridate", "purrr", and "RCurl" must be installed to run this function. ', + 'Please install them and try again.') + } + stopifnot(methods::is(mn, 'MNode')) if (!is_token_set(mn)) { stop('No token set') } - stopifnot(methods::is(mn, 'MNode')) - if (!(is.Date(as.Date(from, '%Y/%M/%D')))){ + if (!(lubridate::is.Date(as.Date(from, '%Y/%M/%D')))){ stop('"from" argument must be in YYYY/MM/DD format') } - if (!(is.Date(as.Date(to, '%Y/%M/%D')))){ + if (!(lubridate::is.Date(as.Date(to, '%Y/%M/%D')))){ stop('"to" argument must be in YYYY/MM/DD format') } if (!(formatType %in% c('RESOURCE', 'METADATA', 'DATA', '*'))) { stop('formatType must be one of: RESOURCE, METADATA, DATA, or *') } + # Check if whitelist is a URL - if true then GET and return text content + if(RCurl::url.exists(whitelist)) { + req <- httr::GET(whitelist) + if(req$status_code != 200) { + warning('Failed to read in', whitelist, '. Results will include admin submissions / edits.') + } + whitelist <- httr::content(req, "text") + } + + # Construct query and return results q = sprintf('dateUploaded:["%sT00:00:00Z" TO "%sT00:00:00Z"] AND formatType:%s', from, to, formatType) results <- dataone::query(mn, list(q = q, fl = "identifier AND submitter AND dateUploaded AND formatType AND fileName", rows = 10000), as = "data.frame") - whitelist <- httr::GET("https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org") - if(whitelist$status_code != 200) { - warning('Failed to read in the whitelist: https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org of orcid Ids. Results will include admin submissions / edits.') - } - whitelist <- httr::content(whitelist, "text") - - # Remove rows from admin whitelist members + # Filter out rows where the submitter is in the whitelist results <- results[-which(stringr::str_detect(whitelist, results$submitter)),] - # Get orcid names + # Return full names based on orcid Id results$submitter_name <- purrr::map(results$submitter, get_orcid_name) # Arrange by dateUploaded diff --git a/man/list_submissions.Rd b/man/list_submissions.Rd index d03646f..684d4be 100644 --- a/man/list_submissions.Rd +++ b/man/list_submissions.Rd @@ -5,16 +5,21 @@ \title{List recent submissions to a DataOne Member Node} \usage{ list_submissions(mn, from = Sys.Date(), to = Sys.Date(), - formatType = "*") + formatType = "*", + whitelist = "https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org") } \arguments{ \item{mn}{(MNode) A DataOne Member Node} -\item{from}{(character) the date at which the query begins in 'YYYY/MM/DD' format} +\item{from}{(character) the date at which the query begins in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()}} -\item{to}{(character) the date at which the query ends in 'YYYY/MM/DD' format} +\item{to}{(character) the date at which the query ends in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()}} \item{formatType}{(character) the format of objects to query. Must be one of: RESOURCE, METADATA, DATA, or *.} + +\item{whitelist}{(character) A list of admin orcid Identifiers. Can be a URL or a +character vector of length 1. +Defaults to https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org} } \description{ List recent submissions to a DataOne Member Node from all submitters not present @@ -22,11 +27,16 @@ in the administrator whitelist: https://cn.dataone.org/cn/v2/accounts/CN=arctic- } \examples{ \dontrun{ -library(lubridate) cn <- dataone::CNode('PROD') adc <- dataone::getMNode(cn,'urn:node:ARCTIC') + # Return all submitted objects in the past month for the 'adc' node: View(list_submissions(adc, Sys.Date() \%m+\% months(-1), Sys.Date(), '*')) + +# Return all submitted objects except for one user +View(list_submissions(adc, Sys.Date() \%m+\% months(-1), Sys.Date(), '*'), + whitelist = 'http://orcid.org/0000-0002-2561-5840') + } } \author{ From 3d4ee8c09aa5b485b69db31c2dd09e71ee26f89f Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Mon, 5 Nov 2018 12:04:58 -0800 Subject: [PATCH 174/318] list_submissions unit tests + bug --- R/helpers.R | 19 +++++++------------ tests/testthat/test_helpers.R | 11 +++++++++++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/R/helpers.R b/R/helpers.R index c712c6f..a3eac40 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -591,9 +591,7 @@ get_orcid_name <- function(orcid_url) { #' @param from (character) the date at which the query begins in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()} #' @param to (character) the date at which the query ends in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()} #' @param formatType (character) the format of objects to query. Must be one of: RESOURCE, METADATA, DATA, or *. -#' @param whitelist (character) A list of admin orcid Identifiers. Can be a URL or a -#' character vector of length 1. -#' Defaults to https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org +#' @param whitelist (character) An xml list of admin orcid Identifiers. Defaults to https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org #' #' @export #' @@ -633,14 +631,11 @@ list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType stop('formatType must be one of: RESOURCE, METADATA, DATA, or *') } - # Check if whitelist is a URL - if true then GET and return text content - if(RCurl::url.exists(whitelist)) { - req <- httr::GET(whitelist) - if(req$status_code != 200) { - warning('Failed to read in', whitelist, '. Results will include admin submissions / edits.') - } - whitelist <- httr::content(req, "text") + req <- httr::GET(whitelist) + if(req$status_code != 200) { + warning('Failed to read in', whitelist, '. Results will include admin submissions / edits.') } + whitelist <- httr::content(req, "text") # Construct query and return results q = sprintf('dateUploaded:["%sT00:00:00Z" TO "%sT00:00:00Z"] AND formatType:%s', from, to, formatType) @@ -653,10 +648,10 @@ list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType results <- results[-which(stringr::str_detect(whitelist, results$submitter)),] # Return full names based on orcid Id - results$submitter_name <- purrr::map(results$submitter, get_orcid_name) + results$submitter_name <- purrr::map(results$submitter, get_orcid_name) %>% unlist() # Arrange by dateUploaded - results <- dplyr::arrange(results, dateUploaded) + #results <- dplyr::arrange(results, dateUploaded) return(results) } diff --git a/tests/testthat/test_helpers.R b/tests/testthat/test_helpers.R index 4c1d7ef..4e0dcca 100644 --- a/tests/testthat/test_helpers.R +++ b/tests/testthat/test_helpers.R @@ -50,3 +50,14 @@ test_that("all system metadata is retrieved", { expect_message(get_all_sysmeta(adc_test, "resource_map_urn:uuid:924f81f6-2e68-4eb8-925f-53f5b66318ec")) }) + +test_that('list_submissions returns correct output', { + cn <- dataone::CNode('PROD') + adc <- dataone::getMNode(cn,'urn:node:ARCTIC') + if (!is_token_set(adc)) { + skip("No token set. Skipping test.") + } + + out <- list_submissions(adc, '2018-10-01', '2018-10-03') + expect_equal(out$submitter_name[1], 'Baptiste Vandecrux') +}) From 92b607e61e0f9386379838b36bdb915fb6fafe9c Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Tue, 6 Nov 2018 13:27:25 -0800 Subject: [PATCH 175/318] Update helpers.R --- R/helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/helpers.R b/R/helpers.R index a3eac40..d766e7d 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -651,7 +651,7 @@ list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType results$submitter_name <- purrr::map(results$submitter, get_orcid_name) %>% unlist() # Arrange by dateUploaded - #results <- dplyr::arrange(results, dateUploaded) + results <- dplyr::arrange(results, dateUploaded) return(results) } From 697ba85dfc9e48b559b6c9b2fed907ade2cbd311 Mon Sep 17 00:00:00 2001 From: Derek Strong <dstrong@nceas.ucsb.edu> Date: Wed, 7 Nov 2018 16:34:18 -0800 Subject: [PATCH 176/318] Update documentation --- .Rbuildignore | 1 + DESCRIPTION | 14 +- MAINTENANCE.md | 19 +- NAMESPACE | 2 + R/access.R | 285 +++++++------ R/arcticdatautils.R | 11 +- R/attributes.R | 19 +- R/dataone.R | 51 ++- R/dataone_formats.R | 41 -- R/editing.R | 221 ++++++----- R/eml.R | 375 +++++++++--------- R/environment.R | 17 +- R/formats.R | 224 ++++++++++- R/helpers.R | 197 ++------- R/inserting.R | 13 +- R/interactive.R | 14 +- R/inventory.R | 103 ++--- R/marking.R | 20 +- R/{modify_metadata.R => metadata.R} | 59 ++- R/packaging.R | 141 ++++--- R/quality.R | 15 +- R/sysmeta.R | 210 +++++++--- R/util.R | 370 +++++++---------- README.md | 6 +- man/add_access_rules.Rd | 22 - man/add_additional_identifiers.Rd | 19 - man/add_admin_group_access.Rd | 14 - man/add_methods_step.Rd | 6 +- man/add_string_to_title.Rd | 16 - man/arcticdatautils.Rd | 9 +- man/change_eml_name.Rd | 19 - man/check_format.Rd | 18 - man/clear_methods.Rd | 6 +- man/clear_replication_policy.Rd | 17 - man/convert_iso_to_eml.Rd | 2 +- man/create_dummy_attributes_dataframe.Rd | 8 +- ...create_dummy_enumeratedDomain_dataframe.Rd | 6 +- man/create_dummy_metadata.Rd | 7 +- man/create_dummy_object.Rd | 6 +- man/create_dummy_package.Rd | 6 +- man/create_dummy_package_full.Rd | 5 +- man/create_dummy_parent_package.Rd | 6 +- man/create_from_folder.Rd | 28 -- man/create_object.Rd | 21 - man/create_resource_map.Rd | 22 +- man/create_sysmeta.Rd | 25 -- man/determine_child_pids.Rd | 16 - man/eml_abstract.Rd | 10 +- man/eml_add_entities.Rd | 10 +- man/eml_address.Rd | 10 +- man/eml_associated_party.Rd | 6 +- man/eml_contact.Rd | 6 +- man/eml_creator.Rd | 6 +- man/eml_geographic_coverage.Rd | 30 ++ man/eml_individual_name.Rd | 4 +- man/eml_metadata_provider.Rd | 6 +- man/eml_otherEntity_to_dataTable.Rd | 13 +- man/eml_party.Rd | 29 +- man/eml_personnel.Rd | 8 +- man/eml_project.Rd | 5 +- man/eml_set_reference.Rd | 7 +- man/eml_set_shared_attributes.Rd | 13 +- man/eml_validate_attributes.Rd | 14 +- man/env_get.Rd | 5 +- man/env_load.Rd | 27 -- man/extract_local_identifier.Rd | 20 - man/filter_obsolete_pids.Rd | 20 - man/filter_packaging_statements.Rd | 23 -- man/find_newest_object.Rd | 8 +- man/find_newest_resource_map.Rd | 23 -- man/fix_bad_enum.Rd | 20 - man/fix_bad_topic.Rd | 21 - man/format_eml.Rd | 9 +- man/format_iso.Rd | 11 +- man/generate_resource_map.Rd | 21 +- man/generate_resource_map_pid.Rd | 14 - man/get_all_sysmeta.Rd | 2 +- man/get_all_versions.Rd | 5 +- man/get_current_version.Rd | 15 - man/get_doc_id.Rd | 17 - man/get_formats.Rd | 20 - man/get_identifier.Rd | 22 - man/get_latest_release.Rd | 14 - man/get_mn_base_url.Rd | 8 +- man/get_ncdf4_attributes.Rd | 6 +- man/get_netcdf_format_id.Rd | 17 - man/get_or_create_pid.Rd | 21 - man/get_package.Rd | 8 +- man/get_package_direct.Rd | 21 - man/get_token.Rd | 6 +- man/get_token_subject.Rd | 14 - man/guess_format_id.Rd | 9 +- man/insert_file.Rd | 20 - man/insert_package.Rd | 21 - man/inv_add_extra_columns.Rd | 19 - man/inv_add_parent_package_column.Rd | 17 - man/inv_init.Rd | 21 - man/inv_load_checksums.Rd | 23 -- man/inv_load_dois.Rd | 19 - man/inv_load_files.Rd | 24 -- man/inv_load_identifiers.Rd | 23 -- man/inv_load_sizes.Rd | 21 - man/inv_update.Rd | 16 - man/is_authorized.Rd | 6 +- man/is_format_id.Rd | 21 - man/is_obsolete.Rd | 4 +- man/is_public_read.Rd | 2 +- man/is_resource_map.Rd | 19 - man/is_token_expired.Rd | 4 +- man/is_token_set.Rd | 5 +- man/log_message.Rd | 19 - man/mdq_run.Rd | 11 +- man/new_uuid.Rd | 4 +- man/object_exists.Rd | 8 +- man/parse_resource_map.Rd | 8 +- man/path_join.Rd | 18 - man/pid_to_eml_datatable.Rd | 27 -- man/pid_to_eml_entity.Rd | 16 +- man/pid_to_eml_other_entity.Rd | 16 - man/pid_to_eml_physical.Rd | 6 +- man/pretty_print.Rd | 29 -- man/publish_object.Rd | 22 +- man/publish_update.Rd | 38 +- man/replace_package_id.Rd | 18 - man/replace_subject.Rd | 24 -- man/set_abstract.Rd | 10 +- man/set_access.Rd | 2 +- man/set_file_name.Rd | 6 +- man/set_other_entities.Rd | 18 - man/set_rights_and_access.Rd | 2 +- man/set_rights_holder.Rd | 2 +- man/show_indexing_status.Rd | 8 +- man/show_random_dataset.Rd | 21 - man/substitute_eml_party.Rd | 19 - man/sysmeta_to_eml_other_entity.Rd | 14 - man/sysmeta_to_eml_physical.Rd | 8 +- man/sysmeta_to_other_entity.Rd | 14 - man/test_has_abstract.Rd | 32 -- man/theme_packages.Rd | 46 --- man/update_object.Rd | 13 +- man/update_package.Rd | 41 -- man/update_package_object.Rd | 18 +- man/update_physical.Rd | 22 - man/update_resource_map.Rd | 39 +- man/validate_environment.Rd | 14 - man/validate_inventory.Rd | 14 - man/view_packages.Rd | 15 - man/view_profile.Rd | 18 +- man/warn_current_version.Rd | 13 - man/which_in_eml.Rd | 11 +- 150 files changed, 1618 insertions(+), 2687 deletions(-) delete mode 100644 R/dataone_formats.R rename R/{modify_metadata.R => metadata.R} (88%) delete mode 100644 man/add_access_rules.Rd delete mode 100644 man/add_additional_identifiers.Rd delete mode 100644 man/add_admin_group_access.Rd delete mode 100644 man/add_string_to_title.Rd delete mode 100644 man/change_eml_name.Rd delete mode 100644 man/check_format.Rd delete mode 100644 man/clear_replication_policy.Rd delete mode 100644 man/create_from_folder.Rd delete mode 100644 man/create_object.Rd delete mode 100644 man/create_sysmeta.Rd delete mode 100644 man/determine_child_pids.Rd create mode 100644 man/eml_geographic_coverage.Rd delete mode 100644 man/env_load.Rd delete mode 100644 man/extract_local_identifier.Rd delete mode 100644 man/filter_obsolete_pids.Rd delete mode 100644 man/filter_packaging_statements.Rd delete mode 100644 man/find_newest_resource_map.Rd delete mode 100644 man/fix_bad_enum.Rd delete mode 100644 man/fix_bad_topic.Rd delete mode 100644 man/generate_resource_map_pid.Rd delete mode 100644 man/get_current_version.Rd delete mode 100644 man/get_doc_id.Rd delete mode 100644 man/get_formats.Rd delete mode 100644 man/get_identifier.Rd delete mode 100644 man/get_latest_release.Rd delete mode 100644 man/get_netcdf_format_id.Rd delete mode 100644 man/get_or_create_pid.Rd delete mode 100644 man/get_package_direct.Rd delete mode 100644 man/get_token_subject.Rd delete mode 100644 man/insert_file.Rd delete mode 100644 man/insert_package.Rd delete mode 100644 man/inv_add_extra_columns.Rd delete mode 100644 man/inv_add_parent_package_column.Rd delete mode 100644 man/inv_init.Rd delete mode 100644 man/inv_load_checksums.Rd delete mode 100644 man/inv_load_dois.Rd delete mode 100644 man/inv_load_files.Rd delete mode 100644 man/inv_load_identifiers.Rd delete mode 100644 man/inv_load_sizes.Rd delete mode 100644 man/inv_update.Rd delete mode 100644 man/is_format_id.Rd delete mode 100644 man/is_resource_map.Rd delete mode 100644 man/log_message.Rd delete mode 100644 man/path_join.Rd delete mode 100644 man/pid_to_eml_datatable.Rd delete mode 100644 man/pid_to_eml_other_entity.Rd delete mode 100644 man/pretty_print.Rd delete mode 100644 man/replace_package_id.Rd delete mode 100644 man/replace_subject.Rd delete mode 100644 man/set_other_entities.Rd delete mode 100644 man/show_random_dataset.Rd delete mode 100644 man/substitute_eml_party.Rd delete mode 100644 man/sysmeta_to_eml_other_entity.Rd delete mode 100644 man/sysmeta_to_other_entity.Rd delete mode 100644 man/test_has_abstract.Rd delete mode 100644 man/theme_packages.Rd delete mode 100644 man/update_package.Rd delete mode 100644 man/update_physical.Rd delete mode 100644 man/validate_environment.Rd delete mode 100644 man/validate_inventory.Rd delete mode 100644 man/view_packages.Rd delete mode 100644 man/warn_current_version.Rd diff --git a/.Rbuildignore b/.Rbuildignore index 795beb5..fc02950 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,3 +4,4 @@ ^docs$ ^\.travis\.yml$ MAINTENANCE.md +^LICENSE$ diff --git a/DESCRIPTION b/DESCRIPTION index 697d84c..b19f2da 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,10 +8,16 @@ Authors@R: c( person("Jeanette", "Clark", email = "jclark@nceas.ucsb.edu", role = "ctb", comment = "Maintainer"), person("Dominic", "Mullen", email = "dmullen17@gmail.com", role = "ctb"), person("Emily", "O'Dean", email="eodean10@gmail.com", role = "ctb"), - person("Robyn", "Thiessen-Bock", email = "robyn.thiessenbock@gmail.com", role = "ctb") + person("Robyn", "Thiessen-Bock", email = "robyn.thiessenbock@gmail.com", role = "ctb"), + person("Derek", "Strong", email = "dstrong@nceas.ucsb.edu", role = "ctb") ) -Description: A set of utilites for working with the Arctic Data Center +Description: A set of utilities for working with the Arctic Data Center (https://arcticdata.io). +License: Apache License (== 2.0) +URL: https://nceas.github.io/arcticdatautils/ +BugReports: https://github.com/NCEAS/arcticdatautils/issues +Encoding: UTF-8 +LazyData: true Depends: R (>= 3.2.3) Imports: @@ -28,9 +34,6 @@ Imports: uuid, xml2, XML -License: MIT + file LICENSE -Encoding: UTF-8 -LazyData: true Suggests: dplyr, testthat, @@ -40,5 +43,6 @@ Suggests: rmarkdown, yaml, xslt +Roxygen: list(markdown = TRUE) RoxygenNote: 6.1.0 VignetteBuilder: knitr diff --git a/MAINTENANCE.md b/MAINTENANCE.md index 34ca32c..1dcdf91 100644 --- a/MAINTENANCE.md +++ b/MAINTENANCE.md @@ -1,9 +1,9 @@ # Maintenance -TODO: Add reasons for these things! +This document serves as a guide for new maintainers of and contributors to the arcticdatautils package. + +*Note: This is a work-in-progress, so expect it to change and improve over time.* -This document should serve as a guide for new maintainers of this package. -It's a work in progress so expect it to change and, hopefully, get better and more complete over time. ## Releases @@ -72,9 +72,14 @@ remotes::install_github("nceas/arcticdatautils@*release") Note: `@*release` specifies that the latest release should be installed. -## Pull Requests -- Code style -- Commit style +## Pull Requests -TODO +- Follow the [tidyverse style conventions](http://style.tidyverse.org/), with the following specific style preferences: + - use underscore for all variable names unless referring to an EML object (e.g., otherEntity, publicationDate, etc.) + - include argument checks in the form of `stopifnot()` statements for all functions +- Before submitting a pull request, please update documentation, check package, and run tests: + - use `devtools::check()` + - fix any ERRORs and test failures to ensure the Travis CI build passes +- Commit messages should follow these [guidelines](https://chris.beams.io/posts/git-commit/) +- If fixing an issue, pull requests should reference that issue (e.g., "This update closes #93.") diff --git a/NAMESPACE b/NAMESPACE index 82c402f..d183809 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(eml_address) export(eml_associated_party) export(eml_contact) export(eml_creator) +export(eml_geographic_coverage) export(eml_individual_name) export(eml_metadata_provider) export(eml_otherEntity_to_dataTable) @@ -30,6 +31,7 @@ export(env_get) export(find_newest_object) export(format_eml) export(format_iso) +export(generate_resource_map) export(get_all_sysmeta) export(get_all_versions) export(get_mn_base_url) diff --git a/R/access.R b/R/access.R index 92c6a14..989a1fc 100644 --- a/R/access.R +++ b/R/access.R @@ -1,6 +1,52 @@ -# access.R -# -# High-level utility functions for getting and setting access rules for DataONE objects. +# High-level utility functions for getting and setting access rules for DataONE objects + + +#' Add access rules to the sysmeta object +#' +#' This is a function to add a standard set of access rules to +#' every object so that the access rules don't differ across objects. +#' +#' @param sysmeta (SystemMetadata) The SystemMetadata to add rules to. +#' +#' @return The modified SystemMetadata object. +#' +#' @noRd +add_access_rules <- function(sysmeta) { + if (!inherits(sysmeta, "SystemMetadata")) { + stop(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) + } + + # Add myself explicitly as changePermission/write so I can update objects + # in the dev environment + if (env_get() == "development") { + sysmeta <- datapack::addAccessRule(sysmeta, env_load(skip_mn = TRUE)$submitter, "changePermission") + } + + sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "read") + sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "write") + sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "changePermission") + + sysmeta +} + + +#' Add access to the given System Metadata for the arctic-data-admins group +#' +#' @param sysmeta (sysmeta) System Metadata object. +#' +#' @noRd +add_admin_group_access <- function(sysmeta) { + if (!inherits(sysmeta, "SystemMetadata")) { + message(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) + return(sysmeta) + } + + sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "read") + sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "write") + sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "changePermission") + + sysmeta +} #' Set the rights holder for an object @@ -11,12 +57,13 @@ #' #' @param mn (MNode) The Member Node. #' @param pids (character) The PIDs of the objects to set the rights holder for. -#' @param subject (character) The identifier of the new rights holder, typially an ORCID or DN. +#' @param subject (character) The identifier of the new rights holder, typically an ORCID or DN. +#' +#' @return (logical) Whether an update was needed. #' #' @import dataone #' @import datapack #' -#' @return (logical) Whether an update was needed. #' @export #' #' @examples @@ -108,11 +155,12 @@ set_rights_holder <- function(mn, pids, subject) { #' #' @param mn (MNode) The Member Node. #' @param pids (character) The PIDs of the objects to set permissions for. -#' @param subjects (character) The identifiers of the subjects to set permissions for, typially an ORCID or DN. +#' @param subjects (character) The identifiers of the subjects to set permissions for, typically an ORCID or DN. #' @param permissions (character) Optional. The permissions to set. Defaults to -#' read, write, and changePermission. +#' read, write, and changePermission. #' #' @return (logical) Whether an update was needed. +#' #' @export #' #' @examples @@ -194,35 +242,19 @@ set_access <- function(mn, pids, subjects, permissions = c("read", "write", "cha } -#' Set public read access for an object +#' Set rights holder with access policy for an object #' -#' Set public read access for an object. +#' Set the given subject as the rights holder and with given permissions +#' for the given objects. This function only updates the existing +#' System Metadata when a change is needed. #' #' @param mn (MNode) The Member Node. -#' @param pids (character) The PIDs of the objects to set public read access for. +#' @param pids (character) The PIDs of the objects to set the rights holder and access policy for. +#' @param subject (character) The identifier of the new rights holder, typically an ORCID or DN. +#' @param permissions (character) Optional. The permissions to set. Defaults to +#' read, write, and changePermission. #' #' @return (logical) Whether an update was needed. -#' @export -#' -#' @examples -#'\dontrun{ -#' cn <- CNode("STAGING2") -#' mn <- getMNode(cn,"urn:node:mnTestKNB") -#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", -#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") -#' set_public_read(mn, pids) -#'} -set_public_read <- function(mn, pids) { - set_access(mn, pids, "public", "read") -} - - -#' Remove public read access for an object -#' -#' Remove public read access for an object. -#' -#' @param mn (MNode) The Member Node. -#' @param pids (character) The PIDs of the objects to remove public read access for. #' #' @export #' @@ -231,10 +263,11 @@ set_public_read <- function(mn, pids) { #' cn <- CNode("STAGING2") #' mn <- getMNode(cn,"urn:node:mnTestKNB") #' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", -#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") -#' remove_public_read(mn, pids) +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' set_rights_and_access(mn, pids, "http://orcid.org/0000-000X-XXXX-XXXX", +#' permissions = c("read", "write", "changePermission")) #'} -remove_public_read <- function(mn, pids) { +set_rights_and_access <- function(mn, pids, subject, permissions = c("read", "write", "changePermission")) { if (!is(mn, "MNode")) { stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) } @@ -244,11 +277,23 @@ remove_public_read <- function(mn, pids) { stop("Argument 'pids' must be character class with non-zero number of characters.") } + if (!all(is.character(subject), + nchar(subject) > 0)) { + stop("Argument 'subject' must be character class with non-zero number of characters.") + } + + if (grepl("^https:\\/\\/orcid\\.org", subject)) { + stop("Argument 'subjects' cannot contain 'https:', use 'http:' instead.") + } + + if (!all(permissions %in% c("read", "write", "changePermission"))) { + stop("Argument 'permissions' must be one or more of: 'read', 'write', 'changePermission'") + } # Store the results of each attempted update results <- c() - # Remove public read access for each PID + # Set rights and access for each PID for (pid in pids) { sysmeta <- tryCatch({ dataone::getSystemMetadata(mn, pid) @@ -265,31 +310,48 @@ remove_public_read <- function(mn, pids) { stop("Failed to get System Metadata.") } - # Track whether we have changed the record to avoid an uncessary update call + # Track whether we have changed the record to avoid an unnecessary update call changed <- FALSE - if (!datapack::hasAccessRule(sysmeta, "public", "read")) { - message(paste0("Skipping setting public read because ", pid, " is not public.")) - next + # Set rights holder if needed + if (subject != sysmeta@rightsHolder) { + changed <- TRUE + + message(paste0("Setting rights holder to ", subject, ".")) + sysmeta@rightsHolder <- subject + } else { + message(paste0("Skipping setting rightsHolder as rightsHolder is already ", sysmeta@rightsHolder, ".\n")) } - changed <- TRUE + for (permission in permissions) { + if (datapack::hasAccessRule(sysmeta, subject, permission)) { + message(paste0("Skipping the addition of permission '", permission, "' for subject '", subject, "'\n")) + next + } - message(paste0("Removing public read access on ", pid, ".")) - sysmeta@accessPolicy <- sysmeta@accessPolicy[!(grepl("public", sysmeta@accessPolicy$subject) & grepl("read", sysmeta@accessPolicy$permission)), ] + changed <- TRUE - # Update the sysmeta - update_response <- tryCatch({ - dataone::updateSystemMetadata(mn, pid, sysmeta) - }, - error = function(e) { - message(paste0("Failed to update System Metadata for PID '", pid, "'.\n")) - message(e) - e - }) + message(paste0("Adding permission '", permission, "' for subject '", subject, "'\n")) + sysmeta <- datapack::addAccessRule(sysmeta, subject, permission) + } - if (inherits(update_response, "error")) { - stop("Failed update.") + if (changed == TRUE) { + message(paste0("Updating System Metadata for ", pid, ".")) + + update_response <- tryCatch({ + dataone::updateSystemMetadata(mn, pid, sysmeta) + }, + error = function(e) { + message(paste0("Failed to update System Metadata for PID '", pid, "'.\n")) + message(e) + e + }) + + if (inherits(update_response, "error")) { + stop("Failed update.") + } + } else { + message(paste0("No changes needed for ", pid, ".")) } # Save the result for this PID @@ -300,19 +362,15 @@ remove_public_read <- function(mn, pids) { } -#' Set rights holder with access policy for an object +#' Set public read access for an object #' -#' Set the given subject as the rights holder and with given permissions -#' for the given objects. This function only updates the existing -#' System Metadata when a change is needed. +#' Set public read access for an object. #' #' @param mn (MNode) The Member Node. -#' @param pids (character) The PIDs of the objects to set the rights holder and access policy for. -#' @param subject (character) The identifier of the new rights holder, typially an ORCID or DN. -#' @param permissions (character) Optional. The permissions to set. Defaults to -#' read, write, and changePermission. +#' @param pids (character) The PIDs of the objects to set public read access for. #' #' @return (logical) Whether an update was needed. +#' #' @export #' #' @examples @@ -320,11 +378,32 @@ remove_public_read <- function(mn, pids) { #' cn <- CNode("STAGING2") #' mn <- getMNode(cn,"urn:node:mnTestKNB") #' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", -#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") -#' set_rights_and_access(mn, pids, "http://orcid.org/0000-000X-XXXX-XXXX", -#' permissions = c("read", "write", "changePermission")) +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' set_public_read(mn, pids) #'} -set_rights_and_access <- function(mn, pids, subject, permissions = c("read", "write", "changePermission")) { +set_public_read <- function(mn, pids) { + set_access(mn, pids, "public", "read") +} + + +#' Remove public read access for an object +#' +#' Remove public read access for an object. +#' +#' @param mn (MNode) The Member Node. +#' @param pids (character) The PIDs of the objects to remove public read access for. +#' +#' @export +#' +#' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' remove_public_read(mn, pids) +#'} +remove_public_read <- function(mn, pids) { if (!is(mn, "MNode")) { stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) } @@ -334,23 +413,11 @@ set_rights_and_access <- function(mn, pids, subject, permissions = c("read", "wr stop("Argument 'pids' must be character class with non-zero number of characters.") } - if (!all(is.character(subject), - nchar(subject) > 0)) { - stop("Argument 'subject' must be character class with non-zero number of characters.") - } - - if (grepl("^https:\\/\\/orcid\\.org", subject)) { - stop("Argument 'subjects' cannot contain 'https:', use 'http:' instead.") - } - - if (!all(permissions %in% c("read", "write", "changePermission"))) { - stop("Argument 'permissions' must be one or more of: 'read', 'write', 'changePermission'") - } # Store the results of each attempted update results <- c() - # Set rights and access for each PID + # Remove public read access for each PID for (pid in pids) { sysmeta <- tryCatch({ dataone::getSystemMetadata(mn, pid) @@ -367,48 +434,31 @@ set_rights_and_access <- function(mn, pids, subject, permissions = c("read", "wr stop("Failed to get System Metadata.") } - # Track whether we have changed the record to avoid an unnecessary update call + # Track whether we have changed the record to avoid an uncessary update call changed <- FALSE - # Set rights holder if needed - if (subject != sysmeta@rightsHolder) { - changed <- TRUE - - message(paste0("Setting rights holder to ", subject, ".")) - sysmeta@rightsHolder <- subject - } else { - message(paste0("Skipping setting rightsHolder as rightsHolder is already ", sysmeta@rightsHolder, ".\n")) + if (!datapack::hasAccessRule(sysmeta, "public", "read")) { + message(paste0("Skipping setting public read because ", pid, " is not public.")) + next } - for (permission in permissions) { - if (datapack::hasAccessRule(sysmeta, subject, permission)) { - message(paste0("Skipping the addition of permission '", permission, "' for subject '", subject, "'\n")) - next - } - - changed <- TRUE - - message(paste0("Adding permission '", permission, "' for subject '", subject, "'\n")) - sysmeta <- datapack::addAccessRule(sysmeta, subject, permission) - } + changed <- TRUE - if (changed == TRUE) { - message(paste0("Updating System Metadata for ", pid, ".")) + message(paste0("Removing public read access on ", pid, ".")) + sysmeta@accessPolicy <- sysmeta@accessPolicy[!(grepl("public", sysmeta@accessPolicy$subject) & grepl("read", sysmeta@accessPolicy$permission)), ] - update_response <- tryCatch({ - dataone::updateSystemMetadata(mn, pid, sysmeta) - }, - error = function(e) { - message(paste0("Failed to update System Metadata for PID '", pid, "'.\n")) - message(e) - e - }) + # Update the sysmeta + update_response <- tryCatch({ + dataone::updateSystemMetadata(mn, pid, sysmeta) + }, + error = function(e) { + message(paste0("Failed to update System Metadata for PID '", pid, "'.\n")) + message(e) + e + }) - if (inherits(update_response, "error")) { - stop("Failed update.") - } - } else { - message(paste0("No changes needed for ", pid, ".")) + if (inherits(update_response, "error")) { + stop("Failed update.") } # Save the result for this PID @@ -426,13 +476,14 @@ set_rights_and_access <- function(mn, pids, subject, permissions = c("read", "wr #' #' @param mn (MNode) The Member Node. #' @param pids (character) The PIDs of the objects to check for public read access. -#' @param use.names (logical) Optional. If `TRUE` (the default), PIDs will -#' be used as names for the result unless PIDs have names already, in which case -#' those names will be used for the result. +#' @param use.names (logical) If `TRUE`, PIDs will +#' be used as names for the result unless PIDs have names already, in which case +#' those names will be used for the result. +#' +#' @return (logical) Whether an object has public read access. #' #' @importFrom httr content #' -#' @return (logical) Whether an object has public read access. #' @export #' #' @examples diff --git a/R/arcticdatautils.R b/R/arcticdatautils.R index d2320c5..0dd2de0 100644 --- a/R/arcticdatautils.R +++ b/R/arcticdatautils.R @@ -1,13 +1,14 @@ #' arcticdatautils: Utilities for the Arctic Data Center #' -#' The foo package provides three categories of important functions: -#' foo, bar and baz. -#' -#' @section Foo functions: -#' The foo functions ... +#' This package contains code for doing lots of useful stuff that's too specific for the +#' dataone package, primarily functions that streamline Arctic Data Center operations. #' #' @docType package #' @name arcticdatautils +#' +#' @import dataone +#' @import datapack +#' @import EML #' @importFrom methods as is new slot slot<- #' @importFrom stats na.omit #' @importFrom utils URLencode head read.csv read.delim setTxtProgressBar txtProgressBar diff --git a/R/attributes.R b/R/attributes.R index fed08e6..f2d77bc 100644 --- a/R/attributes.R +++ b/R/attributes.R @@ -1,8 +1,14 @@ +# Functions for editing metadata attributes + + #' Get a data.frame of attributes from a NetCDF object #' -#' @param nc (ncdf4 or character) Either a ncdf4 object or a file path +#' Get a data.frame of attributes from a NetCDF object. +#' +#' @param nc (ncdf4/character) Either a ncdf4 object or a file path. +#' +#' @return (data.frame) A data.frame of the attributes. #' -#' @return (data.frame) A data.frame of the attributes #' @export #' #' @examples @@ -13,11 +19,11 @@ get_ncdf4_attributes <- function(nc) { stopifnot(is(nc, "ncdf4") || file.exists(nc)) if (!requireNamespace("ncdf4")) { - stop(call. = FALSE, + stop(call. = FALSE, "The package 'ncdf4' must be installed to run this function. ", "Please install it and try again.") } - + # Read the file in if `nc` is a character vector if (is.character(nc)) { nc <- ncdf4::nc_open(nc) @@ -26,7 +32,7 @@ get_ncdf4_attributes <- function(nc) { unitlist <- c() - for (i in 1:length(dims)){ + for (i in 1:length(dims)) { unitlist[i] <- dims[[i]]$units } inds <- which(unitlist != '') @@ -35,7 +41,7 @@ get_ncdf4_attributes <- function(nc) { attributes <- c(names(nc$var), attributes(dims)$names) - result <- data.frame(attributeName=NA) + result <- data.frame(attributeName = NA) for (i in seq_along(attributes)) { result[i,"attributeName"] <- attributes[i] @@ -49,4 +55,3 @@ get_ncdf4_attributes <- function(nc) { result } - diff --git a/R/dataone.R b/R/dataone.R index 40ab5c9..3f6229b 100644 --- a/R/dataone.R +++ b/R/dataone.R @@ -1,12 +1,14 @@ -#' dataone.R -#' -#' Helpers for the DataONE R package. +# Helper functions for the DataONE R package + +#' Test whether a token is set +#' #' Test whether a token is set. #' -#' @param node (MNode|CNode) The CN or MN you want to find a token for. +#' @param node (MNode/CNode) The Member/Coordinating Node to query. #' #' @return (logical) +#' #' @export #' #' @examples @@ -27,11 +29,14 @@ is_token_set <- function(node) { } -#' Gets the currently set authentication token. +#' Get the currently set authentication token +#' +#' Get the currently set authentication token. #' -#' @param node (MNode|CNode) The CN or MN you want to find a token for. +#' @param node (MNode/CNode) The Member/Coordinating Node to query. #' #' @return (character) The token. +#' #' @export #' #' @examples @@ -59,17 +64,22 @@ get_token <- function(node) { } +#' Determine whether token is expired +#' #' Determine whether the set token is expired. -#' @param node (character) A member node instance +#' +#' @param node (character) The Member Node. +#' #' @return (logical) +#' #' @export #' #' @examples -#'\dontrun{ -#'cn <- CNode('STAGING2') -#'mn <- getMNode(cn,"urn:node:mnTestKNB") -#'is_token_expired(mn) -#'} +#' \dontrun{ +#' cn <- CNode('STAGING2') +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' is_token_expired(mn) +#' } is_token_expired <- function(node) { token_name <- ifelse(node@env == "prod", "dataone_token", "dataone_test_token") @@ -102,11 +112,14 @@ is_token_expired <- function(node) { } -#' Get the base URL of the Member Node. +#' Get base URL of a Member Node +#' +#' Get the base URL of a Member Node. #' -#' @param mn (character) A mn instance +#' @param mn (character) The Member Node. +#' +#' @return (character) The URL. #' -#' @return (character) The URL #' @export #' #' @examples @@ -125,12 +138,16 @@ get_mn_base_url <- function(mn) { } +#' Check if user has authorization to perform an action on an object +#' #' Check if the user has authorization to perform an action on an object. #' -#' @param node (MNode|CNode) The Node to query. +#' @param node (MNode/CNode) The Member/Coordinating Node to query. #' @param ids (character) The PID or SID to check. #' @param action (character) One of read, write, or changePermission. -#' @return (boolean) +#' +#' @return (logical) +#' #' @export #' #' @examples diff --git a/R/dataone_formats.R b/R/dataone_formats.R deleted file mode 100644 index af87ae7..0000000 --- a/R/dataone_formats.R +++ /dev/null @@ -1,41 +0,0 @@ -#' dataone_formats.R -#' -#' A set of thin functions which return the DataONE format ID string. These are -#' to aid in filling in function arguments and can't remember or don't want to -#' type in the full format ID. By putting these format ID strings into -#' functions, a user's autocompletion routine in their editor can help them -#' fill in the format ID they want. - -#' Helper function to generate the ISO 19139 format ID.w -#' -#' @return (character) The format ID for ISO 19139. -#' @export -#' -#' @examples -#' format_iso() -#' \dontrun{ -#' # Upload a local ISO19139 XML file: -#' env <- env_load() -#' publish_object(env$mn, "path_to_some_EML_file", format_iso()) -#' } -format_iso <- function() { - "http://www.isotc211.org/2005/gmd" -} - - -#' Helper function to generate the EML 2.1.1 format ID. -#' -#' @return (character) The format ID for EML 2.1.1. -#' @export -#' -#' @examples -#' format_eml -#' -#' \dontrun{ -#' # Upload a local EML 2.1.1 file: -#' env <- env_load() -#' publish_object(env$mn, "path_to_some_EML_file", format_eml()) -#' } -format_eml <- function() { - "eml://ecoinformatics.org/eml-2.1.1" -} diff --git a/R/editing.R b/R/editing.R index f07d30f..be1647b 100644 --- a/R/editing.R +++ b/R/editing.R @@ -1,29 +1,31 @@ -#' editing.R -#' -#' High-level functions for managing content. +# High-level functions for managing content -#' Publish an object on a member node +#' Publish an object on a Member Node #' -#' Use sensible defaults to publish an object on a member node. If identifier is provided, -#' use it, otherwise generate a UUID. If clone_id is provided, then retrieve the +#' Use sensible defaults to publish an object on a Member Node. If identifier is provided, +#' use it, otherwise generate a UUID. If clone_id is provided, then retrieve the #' system metadata for that identifier and use it to provide rightsHolder, accessPolicy, #' and replicationPolicy metadata. Note that this function only uploads the object to #' the Member Node, and does not add it to a data package, which can be done separately. #' #' @param mn (MNode) The Member Node to publish the object to. -#' @param path the path to the file to be published -#' @param format_id (character) Optional. The format ID to set for the object. When not set, \code{\link{guess_format_id}} will be used to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}. +#' @param path (character) The path to the file to be published. +#' @param format_id (character) Optional. The format ID to set for the object. +#' When not set, [guess_format_id()] will be used to guess the format ID. +#' Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}. #' @param pid (character) Optional. The PID to use with the object. #' @param sid (character) Optional. The SID to use with the new object. -#' @param clone_pid (character) PID of objet to clone System Metadata from -#' @param public (logical) TRUE/FALSE Whether object should be given public read access. +#' @param clone_pid (character) PID of object to clone System Metadata from. +#' @param public (logical) Whether object should be given public read access. +#' +#' @return pid (character) The PID of the published object. #' #' @import dataone #' @import datapack -#' @return pid (character). The PID of the published object. #' #' @export +#' #' @examples #'\dontrun{ #' cn <- CNode("STAGING2") @@ -102,7 +104,7 @@ publish_object <- function(mn, } sysmeta <- add_admin_group_access(sysmeta) - if(public == TRUE){ + if (public == TRUE) { sysmeta <- datapack::addAccessRule(sysmeta, "public", "read") } sysmeta@fileName <- basename(path) @@ -113,20 +115,25 @@ publish_object <- function(mn, sysmeta = sysmeta) } -#' Update an object with a new file. + +#' Update an object with a new file #' -#' This is a convenience wrapper around `dataone::updateObject` which copies in +#' This is a convenience wrapper around [dataone::updateObject()] which copies in #' fields from the old object's System Metadata such as the rightsHolder and #' accessPolicy and updates only what needs to be changed. #' #' @param mn (MNode) The Member Node to update the object on. #' @param pid (character) The PID of the object to update. #' @param path (character) The full path to the file to update with. -#' @param format_id (character) Optional. The format ID to set for the object. When not set, \code{\link{guess_format_id}} will be used to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}. -#' @param new_pid (character) Optional. Specify the PID for the new Object. Defaults to automatically generating a new, random UUID-style PID. -#' @param sid (character) Optiona. Specify a Series ID (SID) to use for the new Object. +#' @param format_id (character) Optional. The format ID to set for the object. +#' When not set, [guess_format_id()] will be used to guess the format ID. +#' Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}. +#' @param new_pid (character) Optional. Specify the PID for the new object. +#' Defaults to automatically generating a new, random UUID-style PID. +#' @param sid (character) Optional. Specify a Series ID (SID) to use for the new object. #' #' @return (character) The PID of the updated object. +#' #' @export #' #' @examples @@ -137,7 +144,7 @@ publish_object <- function(mn, #' my_path <- "/home/Documents/myfile.csv" #' new_pid <- update_object(mn, pid, my_path, format_id = "text/csv") #'} -update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) { +update_object <- function(mn, pid, path, format_id = NULL, new_pid = NULL, sid = NULL) { stopifnot(is(mn, "MNode")) stopifnot(object_exists(mn, pid)) stopifnot(file.exists(path)) @@ -201,7 +208,9 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) } -#' Publish an updated data package. +#' Publish an updated data package +#' +#' Publish an update to a data package after updating data files or metadata. #' #' This function can be used for a variety of tasks: #' @@ -214,11 +223,11 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) #' The metadata_pid and resource_map_pid provide the identifier of an EML metadata #' document and associated resource map, and the data_pids vector provides a list #' of PIDs of data objects in the package. Update the metadata file and resource map -#' by generating a new identifier (a DOI if use_doi is TRUE) and updating the Member +#' by generating a new identifier (a DOI if `use_doi = TRUE`) and updating the Member #' Node with a public version of the object. If metadata_file is not missing, it #' should be an edited version of the metadata to be used to update the original. If #' parent_resmap_pid is not missing, it indicates the PID of a parent package that -#' should be updated as well, using the parent_medata_pid, parent_data_pids, and +#' should be updated as well, using the parent_metadata_pid, parent_data_pids, and #' parent_child_pids as members of the updated package. In all cases, the objects #' are made publicly readable. #' @@ -228,23 +237,31 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) #' @param data_pids (character) PID(s) of data objects that will go in the updated package. #' @param identifier (character) Manually specify the identifier for the new metadata object. #' @param use_doi (logical) Generate and use a DOI as the identifier for the updated metadata object. -#' @param parent_resmap_pid (character) Optional. PID of a parent package to be updated. Not optional if a parent package exists. -#' @param parent_metadata_pid (character) Optional. Identifier for the metadata document of the parent package. Not optional if a parent package exists. -#' @param parent_data_pids (character) Optional. Identifier for the data objects of the parent package. Not optional if the parent package contains data objects. -#' @param parent_child_pids (character) Optional. Resource map identifier(s) of child packages in the parent package. \code{resource_map_pid} should not be included. Not optional if the parent package contains other child packages. +#' @param parent_resmap_pid (character) Optional. PID of a parent package to be updated. +#' Not optional if a parent package exists. +#' @param parent_metadata_pid (character) Optional. Identifier for the metadata document of the parent package. +#' Not optional if a parent package exists. +#' @param parent_data_pids (character) Optional. Identifier for the data objects of the parent package. +#' Not optional if the parent package contains data objects. +#' @param parent_child_pids (character) Optional. Resource map identifier(s) of child packages in the parent package. +#' \code{resource_map_pid} should not be included. Not optional if the parent package contains other child packages. #' @param child_pids (character) Optional. Child packages resource map PIDs. -#' @param metadata_path (character or eml) Optional. An eml class object or a path to a metadata file to update with. If this is not set, the existing metadata document will be used. -#' @param public (logical) Optional. Make the update public. If FALSE, will set the metadata and resource map to private (but not the data objects). -#' This applies to the new metadata PID and its resource map and data object. -#' access policies are not affected. -#' @param check_first (logical) Optional. Whether to check the PIDs passed in as aruments exist on the MN before continuing. Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements. -#' @return pids (character) Named character vector of pids in the data package, including pids for the metadata, resource map, and data objects. +#' @param metadata_path (character or eml) Optional. An eml class object or a path to a metadata file to update with. +#' If this is not set, the existing metadata document will be used. +#' @param public (logical) Optional. Make the update public. If `FALSE`, will set the metadata and resource map to private (but not the data objects). +#' This applies to the new metadata PID and its resource map and data object. +#' access policies are not affected. +#' @param check_first (logical) Optional. Whether to check the PIDs passed in as arguments exist on the MN before continuing. +#' Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements. +#' +#' @return (character) Named character vector of PIDs in the data package, including PIDs for the metadata, resource map, and data objects. #' #' @import dataone #' @import datapack #' @import EML #' #' @export +#' #' @examples #'\dontrun{ #' cn <- CNode("STAGING2") @@ -262,17 +279,17 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) publish_update <- function(mn, metadata_pid, resource_map_pid, - data_pids=NULL, - child_pids=NULL, - metadata_path=NULL, - identifier=NULL, - use_doi=FALSE, - parent_resmap_pid=NULL, - parent_metadata_pid=NULL, - parent_data_pids=NULL, - parent_child_pids=NULL, - public=TRUE, - check_first=TRUE) { + data_pids = NULL, + child_pids = NULL, + metadata_path = NULL, + identifier = NULL, + use_doi = FALSE, + parent_resmap_pid = NULL, + parent_metadata_pid = NULL, + parent_data_pids = NULL, + parent_child_pids = NULL, + public = TRUE, + check_first = TRUE) { # Don't allow setting a dataset to private when it uses a DOI if (use_doi && !public) { @@ -357,14 +374,14 @@ publish_update <- function(mn, # Check for obsoleted metadata_pid meta_obsoletedBy <- dataone::getSystemMetadata(mn, metadata_pid)@obsoletedBy if (!is.na(meta_obsoletedBy)) { - stop("The value passed in for the argument 'metadata_pid' of '", metadata_pid, "' is already obsoleted by a newer version with PID '", meta_obsoletedBy, "'. All PID arguments to publish_update should be the latest versions of each Object series.") + stop("The value passed in for the argument 'metadata_pid' of '", metadata_pid, "' is already obsoleted by a newer version with PID '", meta_obsoletedBy, "'. All PID arguments to publish_update should be the latest versions of each object series.") } } # Check for obsoleted resource_map_pid. The resource map and metadata can desassociate without this check. rm_obsoletedBy <- dataone::getSystemMetadata(mn, resource_map_pid)@obsoletedBy if (!is.na(rm_obsoletedBy)) { - stop("The value passed in for the argument 'resource_map_pid' of '", resource_map_pid, "' is already obsoleted by a newer version with PID '", rm_obsoletedBy, "'. All PID arguments to publish_update should be the latest versions of each Object series.") + stop("The value passed in for the argument 'resource_map_pid' of '", resource_map_pid, "' is already obsoleted by a newer version with PID '", rm_obsoletedBy, "'. All PID arguments to publish_update should be the latest versions of each object series.") } # Prepare the response object @@ -379,9 +396,9 @@ publish_update <- function(mn, message("Getting metadata from the MN.") eml <- EML::read_eml(rawToChar(dataone::getObject(mn, metadata_pid)), asText = TRUE) - } else if(class(metadata_path) == "eml") { + } else if (class(metadata_path) == "eml") { # If an eml object is provided, use it directly after validating - if(!eml_validate(metadata_path)){ + if (!eml_validate(metadata_path)) { stop("The EML object is not valid.") } @@ -548,28 +565,26 @@ publish_update <- function(mn, } -#' Create a resource map Object on a Member Node. +#' Create a resource map object on a Member Node #' #' This function first generates a new resource map RDF/XML document locally and -#' then uses the dataone::createObject function to create the Object on the +#' then uses the [dataone::createObject()] function to create the object on the #' specified MN. #' -#' If you only want to generate resource map RDF/XML, see -#' \code{\link{generate_resource_map}} +#' If you only want to generate resource map RDF/XML, see [generate_resource_map()]. #' #' @param mn (MNode) The Member Node -#' @param metadata_pid (character) The PID of the metadata object to go in the -#' package. -#' @param data_pids (character) The PID(s) of the data objects to go in the -#' package. +#' @param metadata_pid (character) The PID of the metadata object to go in the package. +#' @param data_pids (character) The PID(s) of the data objects to go in the package. #' @param child_pids (character) The resource map PIDs of the packages to be #' nested under the package. #' @param check_first (logical) Optional. Whether to check the PIDs passed in as -#' aruments exist on the MN before continuing. This speeds up the function, +#' arguments exist on the MN before continuing. This speeds up the function, #' especially when `data_pids` has many elements. -#' @param ... Additional arguments that can be passed into \code{\link{publish_object}} +#' @param ... Additional arguments that can be passed into [publish_object()]. +#' +#' @return (character) The PID of the created resource map. #' -#' @return (character) The created resource map's PID #' @export #' #' @examples @@ -581,7 +596,6 @@ publish_update <- function(mn, #' dat_pid <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', #' 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') #' -#' #' create_resource_map(mn, metadata_pid = meta_pid, data_pids = dat_pid) #'} create_resource_map <- function(mn, @@ -624,48 +638,46 @@ create_resource_map <- function(mn, } -#' Update an existing resource map Object on a Member Node. +#' Update an existing resource map object on a Member Node #' #' This function first generates a new resource map RDF/XML document locally and -#' then uses the dataone::updateObject function to update an Object on the +#' then uses the [dataone::updateObject()] function to update an object on the #' specified MN. #' -#' If you only want to generate resource map RDF/XML, see -#' \code{\link{generate_resource_map}}. +#' If you only want to generate resource map RDF/XML, see [generate_resource_map()]. #' -#' This function also can be used to be used to add a new child packages to a -#' parent package. For exmaple, if you have: +#' This function also can be used to add a new child packages to a +#' parent package. For example, if you have: #' #' Parent A B #' -#' and want to add C as a sibling package to A and B, e.g. +#' and want to add C as a sibling package to A and B, e.g.: #' #' Parent A B C #' -#' you could use this function. +#' then you could use this function. #' -#' Note: This function currently replaces the rightsHolder on the Resource Map +#' Note: This function currently replaces the rightsHolder on the resource map #' temporarily to allow updating but sets it back to the rightsHolder that was #' in place before the update. #' -#' @param mn (MNode) The Member Node -#' @param metadata_pid (character) The PID of the metadata object to go in the -#' package. -#' @param data_pids (character) The PID(s) of the data objects to go in the -#' package. -#' @param child_pids child_pids (character) The resource map PIDs of the packages to be +#' @param mn (MNode) The Member Node. +#' @param metadata_pid (character) The PID of the metadata object to go in the package. +#' @param data_pids (character) The PID(s) of the data objects to go in the package. +#' @param child_pids (character) The resource map PIDs of the packages to be #' nested under the package. -#' @param public Whether or not to make the new resource map public read -#' (logical) +#' @param public (logical) Whether or not to make the new resource map public read. #' @param check_first (logical) Optional. Whether to check the PIDs passed in as -#' aruments exist on the MN before continuing. This speeds up the function, +#' arguments exist on the MN before continuing. This speeds up the function, #' especially when `data_pids` has many elements. #' @param resource_map_pid (character) The PID of the resource map to be updated. -#' @param other_statements (data.frame) Extra statements to add to the Resource Map. +#' @param other_statements (data.frame) Extra statements to add to the resource map. #' @param identifier (character) Manually specify the identifier for the new metadata object. -#' @return pid (character) Updated resource map PID. +#' +#' @return (character) The PID of the updated resource map. #' #' @export +#' #' @examples #'\dontrun{ #' cn <- CNode('STAGING2') @@ -676,18 +688,17 @@ create_resource_map <- function(mn, #' data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", #' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") #' -#' #' rm_new <- update_resource_map(mn, rm_pid, meta_pid, data_pids) #'} update_resource_map <- function(mn, resource_map_pid, metadata_pid, - data_pids=NULL, - child_pids=NULL, - other_statements=NULL, - identifier=NULL, - public=FALSE, - check_first=TRUE) { + data_pids = NULL, + child_pids = NULL, + other_statements = NULL, + identifier = NULL, + public = FALSE, + check_first = TRUE) { # Check arguments stopifnot(is(mn, "MNode")) @@ -779,13 +790,16 @@ update_resource_map <- function(mn, } -#' Set the file name on an object +#' Set the file name for an object +#' +#' Set the file name for an object. #' #' @param mn (MNode) The Member Node. #' @param pid (character) The PID of the object to set the file name on. #' @param name (character) The file name. #' -#' @return (logical) Whether the update succeeded, FALSE means there was an error. +#' @return (logical) Whether the update succeeded. +#' #' @export #' #' @examples @@ -819,7 +833,7 @@ set_file_name <- function(mn, pid, name) { #' #' This function updates the EML with the new physical #' of a data object once it has been updated. -#' This is a helper function for \code{\link{update_package_object}}. +#' This is a helper function for [update_package_object()]. #' #' @param eml (eml) An EML class object. #' @param mn (MNode) The Member Node of the data package. @@ -827,6 +841,8 @@ set_file_name <- function(mn, pid, name) { #' @param new_data_pid (character) The new identifier of the updated data object. #' #' @importFrom stringr str_detect +#' +#' @noRd update_physical <- function(eml, mn, data_pid, new_data_pid) { stopifnot(is(eml, "eml")) stopifnot(is(mn, "MNode")) @@ -870,35 +886,34 @@ update_physical <- function(eml, mn, data_pid, new_data_pid) { #' #' This function updates a data object and then automatically #' updates the package resource map with the new data PID. If an object -#' already has a \code{dataTable}, \code{otherEntity}, or \code{spatialVector} +#' already has a `dataTable`, `otherEntity`, or `spatialVector` #' with a working physical section, the EML will be updated with the new physical. -#' It is a convenience wrapper around \code{\link{update_object}} -#' and \code{\link{publish_update}}. +#' It is a convenience wrapper around [update_object()] and [publish_update()]. #' #' @param mn (MNode) The Member Node of the data package. #' @param data_pid (character) PID for data object to update. #' @param new_data_path (character) Path to new data object. #' @param resource_map_pid (character) PID for resource map to update. #' @param format_id (character) Optional. The format ID to set for the object. -#' When not set, \code{\link{guess_format_id}} will be used -#' to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}. -#' @param public (logical) Optional. Make the update public. If FALSE, -#' will set the metadata and resource map to private (but not the data objects). -#' This applies to the new metadata PID and its resource map and data object. -#' Access policies are not affected. -#' @param use_doi (logical) Optional. If TRUE, a new DOI will be minted. -#' @param ... Other arguments to pass into \code{\link{publish_update}}. -#' -#' @return PIDs (character) Named character vector of PIDs in the data package, including PIDs +#' When not set, [guess_format_id()] will be used +#' to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}. +#' @param public (logical) Optional. Make the update public. If `FALSE`, +#' will set the metadata and resource map to private (but not the data objects). +#' This applies to the new metadata PID and its resource map and data object. +#' Access policies are not affected. +#' @param use_doi (logical) Optional. If `TRUE`, a new DOI will be minted. +#' @param ... Other arguments to pass into [publish_update()]. +#' +#' @return (character) Named character vector of PIDs in the data package, including PIDs #' for the metadata, resource map, and data objects. #' -#' @keywords update_object publish_update -#' #' @import dataone #' @import EML #' #' @export #' +#' @seealso [update_object()] [publish_update()] +#' #' @examples #' \dontrun{ #' cnTest <- dataone::CNode("STAGING") diff --git a/R/eml.R b/R/eml.R index fa1d7a6..f29a46c 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1,28 +1,26 @@ -#' eml.R -#' -#' Helpers for creating EML. +# Helper functions for creating EML metadata -#' Create EML entity from a DataONE pid +#' Create EML entity from a DataONE PID #' #' @param mn (MNode) Member Node where the PID is associated with an object. #' @param pid (character) The PID of the object to create the sub-tree for. -#' @param entityType (character) What kind of objects to create from the input. Either "dataTable", -#' "spatialRaster", "spatialVector", "storedProcedure", "view", "otherEntity". -#' @param ... (optional) Additional arguments to be passed to \code{new(entityType, ...)}. See example +#' @param entityType (character) What kind of objects to create from the input. One of "dataTable", +#' "spatialRaster", "spatialVector", "storedProcedure", "view", or "otherEntity". +#' @param ... (optional) Additional arguments to be passed to \code{new(entityType, ...)}. +#' +#' @return (list) The entity object. #' -#' @return (list) The entity object #' @export #' #' @examples #' \dontrun{ -#' #Generate EML otherEntity +#' # Generate EML otherEntity #' pid_to_eml_entity(mn, #' pid, #' entityType = "otherEntity", #' entityName = "Entity Name", #' entityDescription = "Description about entity") -#' #' } pid_to_eml_entity <- function(mn, pid, @@ -71,50 +69,16 @@ pid_to_eml_entity <- function(mn, } -#' This function is deprecated. See \link{pid_to_eml_entity}. -#' -#' @param mn (MNode) Member Node where the PID is associated with an object. -#' @param pids (character) The PID of the object to create the sub-tree for. -#' -pid_to_eml_other_entity <- function(mn, pids) { - .Deprecated(new = "pid_to_eml_entity", - package = "arcticdtautils", - old = "pid_to_eml_other_entity") -} - - -#' This function is deprecated. See \link{pid_to_eml_entity}. -#' -#' @param mn (MNode) Member Node where the PID is associated with an object. -#' @param pid (character) The PID of the object to create the \code{dataTable} for. -#' @param attributes (data.frame) Optional data frame of attributes. Follows the convention in \link[EML]{set_attributes}. -#' @param factors (data.frame) Optional data frame of enumerated attribute values (factors). Follows the convention in \link[EML]{set_attributes}. -#' @param name (character) Optional field to specify \code{entityName}, otherwise will be extracted from system metadata. -#' @param description (character) Optional field to specify \code{entityDescription}, otherwise will match name. -#' @param validateAttributes (logical) If set to FALSE or if attributes are not passed into the function, attribute validatio -#' -pid_to_eml_datatable <- function(mn, - pid, - attributes = NULL, - factors = NULL, - name = NULL, - description = NULL, - validateAttributes = TRUE) { - .Deprecated(new = "pid_to_eml_entity", - package = "arcticdtautils", - old = "pid_to_eml_other_entity") -} - - #' Create EML physical objects for the given set of PIDs #' -#' Note this is a wrapper around sysmeta_to_eml_physical which handles the task of -#' creating the EML physical +#' This is a wrapper around [sysmeta_to_eml_physical()] which handles the task of +#' creating the EML physical. #' #' @param mn (MNode) Member Node where the PID is associated with an object. #' @param pids (character) The PID of the object to create the sub-tree for. #' -#' @return (list of otherEntity) The otherEntity object(s) +#' @return (list) A list of otherEntity object(s). +#' #' @export #' #' @examples @@ -133,36 +97,17 @@ pid_to_eml_physical <- function(mn, pids) { sysmeta_to_eml_physical(sysmeta) } -#' This function is deprecated. See \link{pid_to_eml_entity}. -#' -#' @param sysmeta (SystemMetadata) One or more System Metadata objects -#' -sysmeta_to_eml_other_entity <- function(sysmeta) { - .Deprecated(new = "pid_to_eml_entity", - package = "arcticdtautils", - old = "sysmeta_to_other_entity") -} - - -#' This function is deprecated. See \link{sysmeta_to_eml_other_entity}. -#' -#' @param sysmeta (SystemMetadata) A SystemMetadata object -#' -sysmeta_to_other_entity <- function(sysmeta) { - .Deprecated("sysmeta_to_eml_other_entity", - package = "arcticdtautils", - old = "sysmeta_to_other_entity") -} -#' Create an EML physical object from System Metadata +#' Create an EML physical object from system metadata #' #' This function creates a pre-canned EML physical object from what's in the -#' System Metadata of an Object. Note that it sets an Online Distrubtion URL +#' System Metadata of an object. Note that it sets an Online Distribution URL #' of the DataONE v2 resolve service for the PID. #' -#' @param sysmeta (SystemMetadata) One or more System Metadata objects +#' @param sysmeta (SystemMetadata) One or more System Metadata objects. +#' +#' @return (list) A list of physical objects for each sysmeta. #' -#' @return (list of physical) The physical objects for each sysmeta #' @export #' #' @examples @@ -210,26 +155,16 @@ sysmeta_to_eml_physical <- function(sysmeta) { lapply(sysmeta, work) } -#' This function is deprecated. See \link{pid_to_eml_entity}. -#' -#' @param mn (MNode) The Member Node the objects exist on. -#' @param path (character) The location on disk of the EML file. -#' @param pids (character) One or more PIDs for the objects. -#' -set_other_entities <- function(mn, path, pids) { - .Deprecated(new = "pid_to_eml_entity", - package = "arcticdtautils", - old = "set_other_entities") -} #' Get the Metacat docid for the given identifier #' -#' @param sysmeta (SystemMetadata) The sysmeta of the object you want to find. -#' -#' @return (character) The docid +#' Get the Metacat docid for the given identifier. #' +#' @param sysmeta (SystemMetadata) The sysmeta of the object you want to find. #' +#' @return (character) The docid. #' +#' @noRd get_doc_id <- function(sysmeta) { stopifnot(is(sysmeta, "SystemMetadata")) @@ -266,13 +201,17 @@ get_doc_id <- function(sysmeta) { doc_id } -#' Adds a step to the methods document + +#' Add a methods step +#' +#' Add a methods step to an EML document. #' #' @param doc (eml) The EML document to add the method step to. #' @param title (character) The title of the method step. #' @param description (character) The description of the method. #' -#' @return (eml) The modified EML document +#' @return (eml) The modified EML document. +#' #' @export #' #' @examples @@ -299,11 +238,15 @@ add_methods_step <- function(doc, title, description) { doc } -#' Clear all methods from the document. + +#' Clear all methods +#' +#' Clear all methods from an EML document. #' #' @param doc (eml) The document to clear methods from. #' -#' @return (eml) The modified document. +#' @return (eml) The modified EML document. +#' #' @export #' #' @examples @@ -320,26 +263,27 @@ clear_methods <- function(doc) { doc } -#' Low-level helper for creating EML parties + +#' Create an EML party #' -#' You usually will want to use the high-level functions such as -#'\code{\link{eml_creator}} and \code{\link{eml_contact}} but using this is -#' fine. +#' You will usually want to use the high-level functions such as +#' [eml_creator()] and [eml_contact()] but using this is fine. #' -#' The \code{userId} argument assumes an ORCID so be sure to adjust for that. +#' The `userId` argument assumes an ORCID so be sure to adjust for that. #' -#' @param type (character) The type of party (e.g. 'contact') -#' @param given_names (character) The party's given name(s) -#' @param sur_name (character) The party's surname -#' @param organization (character) The party's organization name -#' @param position (character) The party's position -#' @param email (character) The party's email address(es) -#' @param phone (character) The party's phone number(s) -#' @param address (character) The party's address(es) -#' @param userId (character) The party's ORCID, in format https://orcid.org/WWWW-XXXX-YYYY-ZZZZ -#' @param role (character) The party's role +#' @param type (character) The type of party (e.g. 'contact'). +#' @param given_names (character) The party's given name(s). +#' @param sur_name (character) The party's surname. +#' @param organization (character) The party's organization name. +#' @param position (character) The party's position. +#' @param email (character) The party's email address(es). +#' @param phone (character) The party's phone number(s). +#' @param address (character) The party's address(es). +#' @param userId (character) The party's ORCID, in format https://orcid.org/WWWW-XXXX-YYYY-ZZZZ. +#' @param role (character) The party's role. +#' +#' @return (party) An instance of the party specified by the `type` argument. #' -#' @return An instance of the party specified by the in \code{type} argument #' @export #' #' @examples @@ -350,15 +294,15 @@ clear_methods <- function(doc) { #' c("Data Scientist", "Programmer")) #'} eml_party <- function(type="associatedParty", - given_names=NULL, - sur_name=NULL, - organization=NULL, - position=NULL, - email=NULL, - phone=NULL, - address=NULL, - userId=NULL, - role=NULL) { + given_names = NULL, + sur_name = NULL, + organization = NULL, + position = NULL, + email = NULL, + phone = NULL, + address = NULL, + userId = NULL, + role = NULL) { if (all(sapply(c(sur_name, organization, position), is.null))) { stop(call. = FALSE, "You must specify at least one of sur_name, organization, or position to make a valid creator") @@ -440,13 +384,15 @@ eml_party <- function(type="associatedParty", party } + #' Create an EML creator #' -#' See \code{\link{eml_party}} for details. +#' See [eml_party()] for details. #' -#' @param ... Arguments passed on to eml_party +#' @param ... Arguments passed on to [eml_party()]. +#' +#' @return (creator) The new creator. #' -#' @return (creator) The new creator #' @export #' #' @examples @@ -460,13 +406,15 @@ eml_creator <- function(...) { eml_party("creator", ...) } + #' Create an EML contact #' -#' See \code{\link{eml_party}} for details. +#' See [eml_party()] for details. +#' +#' @param ... Arguments passed on to [eml_party()]. #' -#' @param ... Arguments passed on to eml_party +#' @return (contact) The new contact. #' -#' @return (contact) The new contact #' @export #' #' @examples @@ -483,11 +431,12 @@ eml_contact <- function(...) { #' Create an EML metadataProvider #' -#' See \code{\link{eml_party}} for details. +#' See [eml_party()] for details. #' -#' @param ... Arguments passed on to eml_party +#' @param ... Arguments passed on to [eml_party()]. +#' +#' @return (metadataProvider) The new metadataProvider. #' -#' @return (metadataProvider) The new metadataProvider #' @export #' #' @examples @@ -496,13 +445,15 @@ eml_metadata_provider <- function(...) { eml_party("metadataProvider", ...) } + #' Create an EML associatedParty #' -#' See \code{\link{eml_party}} for details. +#' See [eml_party()] for details. +#' +#' @param ... Arguments passed on to [eml_party()]. #' -#' @param ... Arguments passed on to eml_party +#' @return (associatedParty) The new associatedParty. #' -#' @return (associatedParty) The new associatedParty #' @export #' #' @examples @@ -511,19 +462,22 @@ eml_associated_party <- function(...) { eml_party("associatedParty", ...) } + #' Create an EML personnel #' -#' See \code{\link{eml_party}} for details. +#' See [eml_party()] for details. +#' +#' @param ... Arguments passed on to [eml_party()]. +#' @param role (character) Personnel role, e.g. "principalInvestigator". +#' +#' @return (personnel) The new personnel. #' -#' @param ... Arguments passed on to eml_party -#' @param role (character) Personnel role, eg "principalInvestigator" -#' @return (personnel) The new personnel #' @export #' #' @examples #' eml_personnel("test", "user", email = "test@@user.com", role = "principalInvestigator") eml_personnel <- function(role = NULL, ...) { - if(is.null(role)) { + if (is.null(role)) { stop(call. = FALSE, "You must specify a role for a personnel.") } @@ -531,12 +485,16 @@ eml_personnel <- function(role = NULL, ...) { eml_party("personnel", role = role, ...) } + #' Create an EML individualName section #' +#' Create an EML individualName section. +#' #' @param given_names (character) One or more given names. #' @param sur_name (character) A sur (last) name. #' -#' @return (individualName) The new individualName section +#' @return (individualName) The new individualName section. +#' #' @export #' #' @examples @@ -565,7 +523,10 @@ eml_individual_name <- function(given_names=NULL, sur_name) { indiv_name } -#' Create an eml-project section. + +#' Create an EML project section +#' +#' Create an EML project section. #' #' Note - studyAreaDescription, designDescription, and relatedProject are not #' fully fleshed out. Need to pass these objects in directly if you want to use @@ -574,14 +535,15 @@ eml_individual_name <- function(given_names=NULL, sur_name) { #' @param title (character) Title of the project (Required). May have multiple titles. #' @param personnelList (list of personnel) Personnel involved with the project. #' @param abstract (character) Project abstract. Can pass as a character vector -#' for separate paragraphs. +#' for separate paragraphs. #' @param funding (character) Funding sources for the project such as grant and -#' contract numbers. Can pass as a character vector for separate paragraphs. +#' contract numbers. Can pass as a character vector for separate paragraphs. #' @param studyAreaDescription (studyAreaDescription) #' @param designDescription (designDescription) #' @param relatedProject (project) #' #' @return (project) The new project section. +#' #' @export #' #' @examples @@ -610,7 +572,7 @@ eml_project <- function(title, project@title <- as(titles, "ListOftitle") # Personnel - if(!all(sapply(personnelList, function(x) { is(x, "personnel") }))) { + if (!all(sapply(personnelList, function(x) { is(x, "personnel") }))) { stop(call. = FALSE, "All personnel in the list must be of type 'personnel'") } @@ -618,7 +580,7 @@ eml_project <- function(title, project@personnel <- as(personnelList, "ListOfpersonnel") # Abstract - if(!is.null(abstract)) { + if (!is.null(abstract)) { abstract_paras <- lapply(abstract, function(x) { as(list(xml2::xml_new_root("para", as.character(x))), "para") }) @@ -626,7 +588,7 @@ eml_project <- function(title, } # Funding - if(!is.null(funding)) { + if (!is.null(funding)) { funding_paras <- lapply(funding, function(x) { as(list(xml2::xml_new_root("para", as.character(x))), "para") }) @@ -634,17 +596,17 @@ eml_project <- function(title, } # Study area description - if(!is.null(studyAreaDescription)) { + if (!is.null(studyAreaDescription)) { project@studyAreaDescription <- studyAreaDescription } # Design description - if(!is.null(designDescription)) { + if (!is.null(designDescription)) { project@designDescription <- designDescription } # Related Project - if(!is.null(relatedProject)) { + if (!is.null(relatedProject)) { project@relatedProject <- relatedProject } @@ -652,6 +614,23 @@ eml_project <- function(title, } +#' Create an EML geographicCoverage section +#' +#' A simple way to create an EML geographicCoverage section. +#' +#' For a bounding box, all coordinates should be unique. +#' For a single point, the North and South bounding coordinates should be the same and +#' the East and West bounding coordinates should be the same. +#' +#' @param description (character) A textual description. +#' @param north (numeric) North bounding coordinate. +#' @param east (numeric) East bounding coordinate. +#' @param south (numeric) South bounding coordinate. +#' @param west (numeric) West bounding coordinate. +#' +#' @return (geographicCoverage) The new geographicCoverage section. +#' +#' @export eml_geographic_coverage <- function(description, north, east, south, west) { cov <- new("geographicCoverage") @@ -666,14 +645,17 @@ eml_geographic_coverage <- function(description, north, east, south, west) { } -#' Create an EML address element. +#' Create an EML address element +#' +#' A simple way to create an EML address element. #' #' @param delivery_points (character) One or more delivery points. -#' @param city (character) City -#' @param administrative_area (character) Administrative area -#' @param postal_code (character) Postal code +#' @param city (character) City. +#' @param administrative_area (character) Administrative area. +#' @param postal_code (character) Postal code. #' #' @return (address) An EML address object. +#' #' @export #' #' @examples @@ -715,16 +697,18 @@ eml_address <- function(delivery_points, city, administrative_area, postal_code) } - -#' Set the abstract on an EML document +#' Set the abstract for an EML document #' -#' @param doc (eml) An EML document +#' Set the abstract for an EML document. +#' +#' @param doc (eml) An EML document. #' @param text (character) The abstract text. If \code{text} is length one, an -#' abstract without \code{<para>} or \code{section} elements will be created. -#' If \code{text} is greater than one in length, \code{para} elementes will be -#' used for each element. +#' abstract without \code{<para>} or \code{<section>} elements will be created. +#' If \code{text} is greater than one in length, \code{para} elementes will be +#' used for each element. +#' +#' @return (eml) The modified EML document. #' -#' @return (eml) The modified EML document #' @export #' #' @examples @@ -750,12 +734,15 @@ set_abstract <- function(doc, text) { } -#' Minimalistic helper function to generate EML abstracts +#' Create an EML abstract #' -#' @param text (character) Paragraphs of text, one paragraph per element in the -#' character vector +#' Create an EML abstract. +#' +#' @param text (character) Paragraphs of text with one paragraph per element in the +#' character vector. +#' +#' @return (abstract) An EML abstract. #' -#' @return (abstract) An EML abstract #' @export #' #' @examples @@ -783,16 +770,14 @@ eml_abstract <- function(text) { #' #' The attributes passed into this function are validated one-by-one and the #' progress of going through each attribute is printed to the screen along -#' with any and all validation issues. +#' with any and all validation issues. This is done by, for each attribute in the list, +#' creating a minimum valid EML document and adding a new otherEntity with a new +#' attributeList containing the single attribute to be validated. #' -#' This is done by, for each attribute in the list, creating a minimum valid -#' EML document and adding a new otherEntity with a new attributeList containing -#' the single attribute to be validated. +#' @param attributes (attributeList) An attributeList. #' -#' @param attributes (attributeList) An attributeList +#' @return (logical) Named vector indicating which attributes are valid. #' -#' @return (boolean) Named vector of TRUE/FALSE indicating which attributes -#' are valid #' @export #' #' @examples @@ -856,16 +841,19 @@ eml_validate_attributes <- function(attributes) { } -#' Add new entity (otherEntity, dataTable, etc) elements to an EML document from a table. +#' Add new entity elements to an EML document from a table +#' +#' Add new entity elements to an EML document from a table. #' -#' @param doc (eml) An EML document +#' @param doc (eml) An EML document. #' @param entities (data.frame) A data.frame with columns type, path, pid, and -#' format_id +#' format_id. #' @param resolve_base (character) Optional. Specify a DataONE CN resolve base -#' URI which will be used for serializing download URLs into the EML. Most users +#' URI which will be used for serializing download URLs into the EML. Most users #' should not override the default value. #' #' @return (eml) The modified EML document. +#' #' @export #' #' @examples @@ -915,7 +903,7 @@ eml_add_entities <- function(doc, } # Warn about existing entities - for(type in entity_types) { + for (type in entity_types) { if (type %in% entities$type && length(slot(doc@dataset, type)) > 0) { warning(paste0("You are adding one or more ", type, " elements. This function only adds entities and does not remove/replace them.")) } @@ -996,16 +984,15 @@ eml_add_entities <- function(doc, #' Convert otherEntities to dataTables #' -#' Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an -#' otherEntity objectas currently constructed - it does not add a physical or add attributes. +#' Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an +#' otherEntity object as currently constructed - it does not add a physical or add attributes. #' However, if these are already in their respective slots, they will be retained. #' -#' @param eml (S4) An EML S4 object +#' @param eml (S4) An EML S4 object. #' @param otherEntity (S4 / integer) Either an EML otherEntity object or the index -#' of an otherEntity within a ListOfotherEntity. Integer input is recommended. -#' @param validate_eml (logical) Optional. Specify whether or not to validate the eml after -#' completion. Defaults to \code{TRUE}. Recommended setting is \code{TRUE}. Setting this to -#' \code{FALSE} reduces execution time by ~ 50 percent. +#' of an otherEntity within a ListOfotherEntity. Integer input is recommended. +#' @param validate_eml (logical) Optional. Whether or not to validate the EML after +#' completion. Setting this to `FALSE` reduces execution time by ~50 percent. #' #' @author Dominic Mullen dmullen17@@gmail.com #' @@ -1068,14 +1055,17 @@ eml_otherEntity_to_dataTable <- function(eml, otherEntity, validate_eml = TRUE) #' Search through EMLs #' -#' This function returns indices within an EML list that contain an instance where \code{test == TRUE}. See examples for more information. +#' This function returns indices within an EML list that contain an instance where +#' `test == TRUE`. See examples for more information. +#' +#' @param eml_list (S4/List) An EML list object. +#' @param element (character) Element to evaluate. +#' @param test (function/character) A function to evaluate (see examples). If test is a character, +#' will evaluate if \code{element == test} (see example 1). #' #' @import EML -#' @param eml_list (S4/List) an EML list object -#' @param element (character) element to evaluate -#' @param test (function/character) A function to evaluate (see examples). If test is a character, will evaluate if \code{element == test} (see example 1). #' -#' @keywords eml +#' @export #' #' @author Mitchell Maier mitchell.maier@@gmail.com #' @@ -1097,8 +1087,6 @@ eml_otherEntity_to_dataTable <- function(eml, otherEntity, validate_eml = TRUE) #' n <- which_in_eml(eml@@dataset@@dataTable, "numberType", function(x) {"natural" %in% x}) #' # Answer: eml@@dataset@@dataTable[n] #' } -#' @export -#' which_in_eml <- function(eml_list, element, test) { stopifnot(isS4(eml_list)) @@ -1143,10 +1131,10 @@ which_in_eml <- function(eml_list, element, test) { #' Set a reference to an EML object #' #' This function creates a new object with the same class as \code{element_to_replace} -#' using a reference to \code{element_to_reference} +#' using a reference to \code{element_to_reference}. #' -#' @param element_to_reference (S4) An EML object to reference -#' @param element_to_replace (S4) An EML object to replace with a reference +#' @param element_to_reference (S4) An EML object to reference. +#' @param element_to_replace (S4) An EML object to replace with a reference. #' #' @author Dominic Mullen dmullen17@@gmail.com #' @@ -1154,7 +1142,6 @@ which_in_eml <- function(eml_list, element, test) { #' #' @examples #' \dontrun{ -#' #' cn <- dataone::CNode('PROD') #' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') #' eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) @@ -1192,9 +1179,13 @@ eml_set_reference <- function(element_to_reference, element_to_replace) { #' This function sets shared attributes using the attributes of the first \code{type} #' selected and creates references for all remaining objects of equivalent \code{type}. #' -#' @param eml (S4) An EML S4 object -#' @param attributeList (S4) Optional. An EML attributeList object. If not provided then it will default to the attributeList of the first \code{type} element -#' @param type (character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' attributeList objects with references. Defaults to 'dataTable' +#' @param eml (eml) An EML object. +#' @param attributeList (attributeList) Optional. An EML attributeList object. If not provided +#' then it will default to the attributeList of the first \code{type} element. +#' @param type (character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' attributeList +#' objects with references. Defaults to 'dataTable'. +#' +#' @return (eml) The modified EML document. #' #' @author Dominic Mullen dmullen17@@gmail.com #' @@ -1202,18 +1193,16 @@ eml_set_reference <- function(element_to_reference, element_to_replace) { #' #' @examples #' \dontrun{ -#' #' cn <- dataone::CNode('PROD') #' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') #' eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) #' atts <- EML::set_attributes(EML::get_attributes(eml@@dataset@@dataTable[[1]]@@attributeList)$attributes) #' #' eml <- eml_set_shared_attributes(eml, atts, type = 'dataTable') -#' #' } eml_set_shared_attributes <- function(eml, attributeList = NULL, type = 'dataTable') { stopifnot(methods::is(eml, 'eml')) - if(!is.null(attributeList)) { + if (!is.null(attributeList)) { stopifnot(methods::is(attributeList, 'attributeList')) } stopifnot(type %in% c('dataTable', 'otherEntity')) diff --git a/R/environment.R b/R/environment.R index 66c32aa..ece3170 100644 --- a/R/environment.R +++ b/R/environment.R @@ -1,10 +1,9 @@ -#' environment.R -#' Author: Bryce Mecum <mecum@nceas.ucsb.edu> -#' -#' Functions related to loading configuriation based upon the environment -#' the code is being run under. +# Functions related to loading configuriation based upon the environment +# the code is being run under. +#' Get the current environment name +#' #' Get the current environment name. #' #' @return (character) The environment name. @@ -20,6 +19,9 @@ env_get <- function() { env } + +#' Load environmental variables from a YAML-formatted environment file +#' #' Load environmental variables from a YAML-formatted environment file. #' #' This file should be formatted in the following way: @@ -35,11 +37,10 @@ env_get <- function() { #' #' @return (list) A list of name-value pairs. #' -#' -#' +#' @noRd env_load <- function(name=NULL, path=NULL, skip_mn=FALSE) { if (!requireNamespace("yaml")) { - stop(call. = FALSE, + stop(call. = FALSE, "The package 'yaml' must be installed to run this function. ", "Please install it and try again.") } diff --git a/R/formats.R b/R/formats.R index cc98c8d..3fa01f2 100644 --- a/R/formats.R +++ b/R/formats.R @@ -1,13 +1,17 @@ +# Functions related to data object formats + + #' Get the list of valid formats from DataONE #' #' Note that this function is intended to return even if the request to the CN #' fails. This is so other functions can call continue even if the request #' fails. #' -#' @param url (character) The listFormats endpoint. Defaults to the production -#' CN +#' @param url (character) The listFormats endpoint. Defaults to the production CN. #' -#' @return (character) +#' @return (character) A vector of formats. +#' +#' @noRd get_formats <- function(url = "https://cn.dataone.org/cn/v2/formats") { req <- httr::GET(url) @@ -29,14 +33,17 @@ get_formats <- function(url = "https://cn.dataone.org/cn/v2/formats") { "") } + #' Check that the given format is valid #' -#' Validity is determined by the given format being found in the list on -#' \url{https://cn.dataone.org/cn/v2/formats}. +#' Check that the given format is valid. Validity is determined by the given format +#' being found in the list on <https://cn.dataone.org/cn/v2/formats>. #' #' @param format (character) The format ID to check. #' #' @return (logical) Whether or not the format was valid. +#' +#' @noRd check_format <- function(format) { formats <- get_formats() @@ -50,3 +57,210 @@ check_format <- function(format) { invisible(TRUE) } + + +#' Guess format from filename +#' +#' Guess format from filename for a vector of filenames. +#' +#' @param filenames (character) A vector of filenames. +#' +#' @return (character) DataONE format IDs. +#' +#' @export +#' +#' @examples +#' formatid <- guess_format_id("temperature_data.csv") +guess_format_id <- function(filenames) { + extensions <- tolower(tools::file_ext(filenames)) + filetypes <- vector(mode = "character", length = length(extensions)) + + for (i in seq_len(length(extensions))) { + extension <- extensions[i] + + if (extension %in% names(dataone_format_mappings)) { + filetypes[i] <- dataone_format_mappings[extension][[1]] + } else { + filetypes[i] <- "application/octet-stream" + } + } + + filetypes +} + + +# List of DataONE formats used in guess_format_id() +dataone_format_mappings <- list("avi" = "video/avi", + "bmp" = "image/bmp", + "bz2" = "application/x-bzip2", + "csv" = "text/csv", + "doc" = "application/msword", + "docx" = "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "fasta" = "application/x-fasta", + "gif" = "image/gif", + "gz" = "application/x-gzip", + "html" = "text/html", + "ipynb" = "application/json", + "jp2" = "image/jp2", + "jpg" = "image/jpeg", + "jpeg" = "image/jpeg", + "kml" = "application/vnd.google-earth.kml/xml", + "kmz" = "application/vnd.google-earth.kmz", + "md" = "text/markdown", + "mov" = "video/quicktime", + "mp3" = "audio/mpeg", + "mp4" = "video/mp4", + "mpg" = "video/mpeg", + "mpeg" = "video/mpeg", + "n3" = "text/n3", + "nc" = "netCDF-3", + "pdf" = "application/pdf", + "png" = "image/png", + "ppt" = "application/vnd.ms-powerpoint", + "pptx" = "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "py" = "application/x-python", + "qt" = "video/quicktime", + "r" = "application/R", + "rar" = "application/x-rar-compressed", + "rdf" = "application/rdf/xml", + "rmd" = "text/x-rmarkdown", + "sas" = "application/SAS", + "svg" = "image/svg/xml", + "tar" = "application/x-tar", + "tif" = "image/tiff", + "tiff" = "image/tiff", + "ttl" = "text/turtle", + "tsv" = "text/tsv", + "txt" = "text/plain", + "wav" = "audio/x-wav", + "wma" = "audio/x-ms-wma", + "wmv" = "video/x-ms-wmv", + "xls" = "application/vnd.ms-excel", + "xlsx" = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "xml" = "application/xml", + "zip" = "application/zip") + + +#' Determine the format ID for a NetCDF file +#' +#' Determine the DataONE format ID for a NetCDF file provided by path. +#' +#' @param path (character) Full or relative path to the file in question. +#' +#' @return (character) The DataONE format ID. +#' +#' @noRd +get_netcdf_format_id <- function(path) { + stopifnot(is.character(path), + nchar(path) > 0, + file.exists(path)) + + if (!requireNamespace("ncdf4")) { + stop(call. = FALSE, + "The package 'ncdf4' must be installed to run this function. ", + "Please install it and try again.") + } + + # Try to open the file, capturing errors + cdf_file <- try({ + ncdf4::nc_open(path) + }) + + # If we failed to open the file, we can assume it's not a valid NetCDF file + # and we just return application/octet-stream as the format ID + if (inherits(cdf_file, "try-error")) { + return("application/octet-stream") + } + + # Since we got this far, continue detecting the format + stopifnot("format" %in% names(cdf_file)) + format_string <- cdf_file$format + stopifnot(is.character(format_string), + nchar(format_string) > 0) + format_id = "" + + if (format_string == "NC_FORMAT_CLASSIC") { + format_id = "netCDF-3" + } else if (format_string == "NC_FORMAT_NETCDF4") { + format_id = "netCDF-4" + } else { + stop("Unknown NetCDF format discovered.") + } + + return(format_id) +} + + +#' Test whether an object has a particular format ID +#' +#' Test whether an object has a particular format ID. +#' +#' @param node (MNode|CNode) The Coordinating/Member Node to run the query on. +#' @param pids (character) The PID(s) for objects. +#' @param format_id (character) The format IDs. +#' +#' @return (logical) +#' +#' @noRd +is_format_id <- function(node, pids, format_id) { + stopifnot(class(node) %in% c("MNode", "CNode")) + stopifnot(all(is.character(pids)), + all(lengths(pids) > 0)) + stopifnot(is.character(format_id), + nchar(format_id) > 0) + + result <- vector("logical", length(pids)) + + for (i in seq_along(pids)) { + result[i] <- dataone::getSystemMetadata(node, pids[i])@formatId == format_id + } + + result +} + + +# The following are a set of thin functions which return the DataONE format ID string. +# These are to aid in filling in function arguments and can't remember or don't want to +# type in the full format ID. By putting these format ID strings into +# functions, a user's autocompletion routine in their editor can help them +# fill in the format ID they want. + + +#' Generate the ISO 19139 format ID +#' +#' Returns the ISO 19139 format ID. +#' +#' @return (character) The format ID for ISO 19139. +#' +#' @export +#' +#' @examples +#' format_iso() +#' \dontrun{ +#' # Upload a local ISO19139 XML file: +#' env <- env_load() +#' publish_object(env$mn, "path_to_some_EML_file", format_iso()) +#' } +format_iso <- function() { + "http://www.isotc211.org/2005/gmd" +} + + +#' Generate the EML 2.1.1 format ID +#' +#' Returns the EML 2.1.1 format ID. +#' +#' @return (character) The format ID for EML 2.1.1. +#' +#' @export +#' +#' @examples +#' format_eml() +#' \dontrun{ +#' # Upload a local EML 2.1.1 file: +#' env <- env_load() +#' publish_object(env$mn, "path_to_some_EML_file", format_eml()) +#' } +format_eml <- function() { + "eml://ecoinformatics.org/eml-2.1.1" +} diff --git a/R/helpers.R b/R/helpers.R index 3f53f86..2b56eef 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -1,14 +1,17 @@ -#' helpers.R -#' -#' Various helper functions for things like testing the package. +# Various helper functions for things like testing a package -#' Create a test metadata object. +#' Create a test metadata object +#' +#' Create a test EML metadata object. #' #' @param mn (MNode) The Member Node. #' @param data_pids (character) Optional. PIDs for data objects the metadata documents. -#' @return pid (character) PID of published metadata document. +#' +#' @return (character) PID of published metadata document. +#' #' @export +#' #' @examples #'\dontrun{ #' # Set environment @@ -59,11 +62,14 @@ create_dummy_metadata <- function(mn, data_pids=NULL) { } -#' Create a test object. +#' Create a test object +#' +#' Create a test data object. #' #' @param mn (MNode) The Member Node. #' -#' @return pid (character) The pid of the dummy object. +#' @return (character) The PID of the dummy object. +#' #' @export #' #' @examples @@ -113,12 +119,15 @@ create_dummy_object <- function(mn) { } -#' Create a test package. +#' Create a test package +#' +#' Create a test data package. #' #' @param mn (MNode) The Member Node. #' @param size (numeric) The number of files in the package, including the metadata file. #' -#' @return pids (character) A named character vector of the data pids in the package. +#' @return (character) A named character vector of the data PIDs in the package. +#' #' @export #' #' @examples @@ -184,12 +193,15 @@ create_dummy_package <- function(mn, size = 2) { } -#' Create a test parent package. +#' Create a test parent package +#' +#' Create a test parent data package. #' #' @param mn (MNode) The Member Node. #' @param children (character) Child package (resource maps) PIDs. #' -#' @return pid (character) Named character vector of PIDs including parent package and child package pids. +#' @return pid (character) A named character vector of PIDs, including parent package and child package PIDs. +#' #' @export #' #' @examples @@ -243,12 +255,15 @@ create_dummy_parent_package <- function(mn, children) { } -#' Create dummy attributes data frame +#' Create test attributes data.frame +#' +#' Create a test data.frame of attributes. #' -#' @param numberAttributes (integer) Number of attributes to be created in the table +#' @param numberAttributes (integer) Number of attributes to be created in the table. #' @param factors (character) Optional vector of factor names to include. #' -#' @return (data.frame) Data frame of attributes +#' @return (data.frame) A data.frame of attributes. +#' #' @export #' #' @examples @@ -281,11 +296,14 @@ create_dummy_attributes_dataframe <- function(numberAttributes, factors = NULL) } -#' Create dummy enumeratedDomain data frame +#' Create test enumeratedDomain data.frame +#' +#' Create a test data.frame of enumeratedDomains. #' #' @param factors (character) Vector of factor names to include. #' -#' @return (data.frame) Data frame of factors +#' @return (data.frame) A data.frame of factors. +#' #' @export #' #' @examples @@ -305,7 +323,7 @@ create_dummy_enumeratedDomain_dataframe <- function(factors) { #' Create dummy package with fuller metadata #' -#' Creates a fuller package than \code{\link{create_dummy_package}} +#' Creates a fuller package than [create_dummy_package()] #' but is otherwise based on the same concept. This dummy #' package includes multiple data objects, responsible parties, #' geographic locations, method steps, etc. @@ -313,6 +331,8 @@ create_dummy_enumeratedDomain_dataframe <- function(factors) { #' @param mn (MNode) The Member Node. #' @param title (character) Optional. Title of package. Defaults to "A Dummy Package". #' +#' @return (list) A list of package PIDs, inluding for the resource map, metadata, and data objects. +#' #' @import EML #' @import dataone #' @@ -411,146 +431,3 @@ create_dummy_package_full <- function(mn, title = "A Dummy Package") { metadata = pid_eml, data = data_pids)) } - - -#' Get system metadata for all elements of a data package -#' -#' This function retrieves the system metadata for all elements of a data package and returns them as a list. -#' It is useful for inspecting system metadata for an entire data package and identifying changes where needed. -#' -#' @param mn (MNode) The Member Node to query. -#' @param resource_map_pid (character) The PID for a resource map. -#' @param nmax (numeric) The maximum number of system metadata objects to return. -#' @param child_packages (logical) If parent package, whether or not to include child packages. -#' -#' @return (list) A list of system metadata objects. -#' -#' @import dataone -#' @importFrom methods is -#' @importFrom methods new -#' -#' @export -#' -#' @examples -#'\dontrun{ -#' cn_staging <- CNode("STAGING") -#' adc_test <- getMNode(cn_staging, "urn:node:mnTestARCTIC") -#' -#' rm_pid <- "resource_map_urn:uuid:..." -#' -#' all <- get_all_sysmeta(adc_test, rm_pid) -#' -#' # View in viewer to inspect -#' View(all) -#' -#' # Print specific elements to console -#' all[[1]]@rightsHolder -#' -#' # Create separate object -#' sysmeta_md <- all[[2]] -#' } -get_all_sysmeta <- function(mn, resource_map_pid, nmax = 1000, child_packages = FALSE) { - stopifnot(methods::is(mn, "MNode")) - stopifnot(is.character(resource_map_pid), nchar(resource_map_pid) > 0, length(resource_map_pid) == 1) - stopifnot(is_resource_map(mn, resource_map_pid)) - stopifnot(is.numeric(nmax), length(nmax) == 1 , nmax >= 0) - stopifnot(is.logical(child_packages), length(child_packages) == 1) - - query_params <- paste("identifier:", resource_map_pid, "+OR+resourceMap:", resource_map_pid, "", sep = "\"") - response <- dataone::query(mn, list(q = query_params, rows = as.character(nmax))) - - if (length(response) == 0) { - stop(paste0("No results were found when searching for a package with resource map '", resource_map_pid, - "'.\nThis could be caused by not having appropriate access to read the resource map.")) - } - - if (length(response) == nmax) { - warning(paste("Query returned the maximum number of objects. It is possible there are more to retrieve.", - "\nSpecify a larger number of objects with the 'nmax' argument.")) - } - - # Check if child package - if (response[[1]]$formatType == "RESOURCE" && !is.null(response[[1]]$resourceMap)) { - message("The data package with this resource map is a child package.") - } - # Check if parent package - if (any(unlist(lapply(response[2:length(response)], function(x) ifelse(x$formatType == "RESOURCE", TRUE, FALSE))))) { - message("The data package with this resource map is a parent package.") - if (child_packages == TRUE) { - children <- Filter(function(x) x$formatType == "RESOURCE", response[2:length(response)]) - children2 <- vector("list", length(children)) - for (i in seq_along(children)) { - child_resource_map_pid <- children[[i]]$identifier - query_params2 <- paste("identifier:", child_resource_map_pid, "+OR+resourceMap:", child_resource_map_pid, "", sep = "\"") - children2[[i]] <- dataone::query(mn, list(q = query_params2, rows = as.character(nmax))) - } - } - } - - # Translate fields from Solr query to formal class SystemMetadata - translate <- function(x) { - sysmeta <- methods::new("SystemMetadata") - - sysmeta@serialVersion <- sysmeta@serialVersion - sysmeta@identifier <- if (is.null(x$identifier)) {sysmeta@identifier} else {x$identifier} - sysmeta@formatId <- if (is.null(x$formatId)) {sysmeta@formatId} else {x$formatId} - sysmeta@size <- if (is.null(x$size)) {sysmeta@size} else {x$size} - sysmeta@checksum <- if (is.null(x$checksum)) {sysmeta@checksum} else {x$checksum} - sysmeta@checksumAlgorithm <- if (is.null(x$checksumAlgorithm)) {sysmeta@checksumAlgorithm} else {x$checksumAlgorithm} - sysmeta@submitter <- if (is.null(x$submitter)) {sysmeta@submitter} else {x$submitter} - sysmeta@rightsHolder <- if (is.null(x$rightsHolder)) {sysmeta@rightsHolder} else {x$rightsHolder} - read <- if (is.null(x$readPermission)) {} else {data.frame(subject = unlist(x$readPermission), - permission = "read")} - write <- if (is.null(x$writePermission)) {} else {data.frame(subject = unlist(x$writePermission), - permission = "write")} - change <- if (is.null(x$changePermission)) {} else {data.frame(subject = unlist(x$changePermission), - permission = "changePermission")} - sysmeta@accessPolicy <- rbind(read, write, change) - sysmeta@replicationAllowed <- if (is.null(x$replicationAllowed)) {sysmeta@replicationAllowed} else {x$replicationAllowed} - sysmeta@numberReplicas <- if (is.null(x$numberReplicas)) {sysmeta@numberReplicas} else {x$numberReplicas} - sysmeta@preferredNodes <- if (is.null(x$preferredReplicationMN)) {sysmeta@preferredNodes} else {x$preferredReplicationMN} - sysmeta@blockedNodes <- if (is.null(x$blockedReplicationMN)) {sysmeta@blockedNodes} else {x$blockedReplicationMN} - sysmeta@obsoletes <- if (is.null(x$obsoletes)) {sysmeta@obsoletes} else {x$obsoletes} - sysmeta@obsoletedBy <- if (is.null(x$obsoletedBy)) {sysmeta@obsoletedBy} else {x$obsoletedBy} - sysmeta@archived <- sysmeta@archived - sysmeta@dateUploaded <- if (is.null(x$dateUploaded)) {sysmeta@dateUploaded} else {as.character(x$dateUploaded)} - sysmeta@dateSysMetadataModified <- if (is.null(x$dateModified)) {sysmeta@dateSysMetadataModified} else {as.character(x$dateModified)} - sysmeta@originMemberNode <- if (is.null(x$datasource)) {sysmeta@originMemberNode} else {x$datasource} - sysmeta@authoritativeMemberNode <- if (is.null(x$authoritativeMN)) {sysmeta@authoritativeMemberNode} else {x$authoritativeMN} - sysmeta@seriesId <- if (is.null(x$seriesId)) {sysmeta@seriesId} else {x$seriesId} - sysmeta@mediaType <- if (is.null(x$mediaType)) {sysmeta@mediaType} else {x$mediaType} - sysmeta@fileName <- if (is.null(x$fileName)) {sysmeta@fileName} else {x$fileName} - sysmeta@mediaTypeProperty <- if (is.null(x$mediaTypeProperty)) {sysmeta@mediaTypeProperty} else {x$mediaTypeProperty} - - return(sysmeta) - } - - if (child_packages) { - other <- Filter(function(x) x$formatType != "RESOURCE", response[2:length(response)]) - response2 <- c(list(response[[1]]), other) - parent <- lapply(response2, translate) - names(parent) <- unlist(lapply(parent, function(x) {x@fileName})) - for (i in seq_along(parent)) { - if (is.na(names(parent)[i])) {names(parent)[i] <- paste0("missing_fileName", i)} - } - - child <- lapply(children2, function(x) {lapply(x, translate)}) - for (i in seq_along(child)) { - names(child[[i]]) <- unlist(lapply(child[[i]], function(x) {x@fileName})) - for (j in seq_along(child[[i]])) { - if (is.na(names(child[[i]])[j])) {names(child[[i]])[j] <- paste0("missing_fileName", j)} - } - } - names(child) <- paste0("child", seq_along(child)) - - all <- c(parent, child) - } else { - all <- lapply(response, translate) - names(all) <- unlist(lapply(all, function(x) {x@fileName})) - for (i in seq_along(all)) { - if (is.na(names(all)[i])) {names(all)[i] <- paste0("missing_fileName", i)} - } - } - - return(all) -} diff --git a/R/inserting.R b/R/inserting.R index 7d486b8..ba9fd43 100644 --- a/R/inserting.R +++ b/R/inserting.R @@ -1,9 +1,7 @@ -#' inserting.R -#' -#' A set of utilities for inserting packages from files and folders on disk. +# A set of utilities for inserting packages from files and folders on disk -#' Create a package from a folder containing an ISO package (legacy) +#' Create a package from a folder containing an ISO package #' #' This function handles the process of inserting the original ISO package #' and updating it with an EML package. @@ -12,9 +10,12 @@ #' #' @param mn (MNode) The Member Node to create the packages on. #' @param path (character) The path to the folder containing the files. -#' @param data_pids (character) Optional. Manually specify the PIDs of data. This is useful if data were inserted outside this function and you want to re-use those objects. +#' @param data_pids (character) Optional. Manually specify the PIDs of data objects. +#' This is useful if data were inserted outside this function and you want to re-use those objects. +#' +#' @return (list) A list of the PIDs created. #' -#' @return (list) All of the PIDs created. +#' @noRd create_from_folder <- function(mn, path, data_pids=NULL) { # Validate args stopifnot(file.exists(path)) diff --git a/R/interactive.R b/R/interactive.R index 828d651..a780cac 100644 --- a/R/interactive.R +++ b/R/interactive.R @@ -1,11 +1,11 @@ -#' interactive.R -#' Author: Bryce Mecum <mecum@nceas.ucsb.edu> -#' +# Functions for interactive viewing of the inventory and other objects + + +#' View packages #' -#' Functions for interactive viewing of the Inventory and other objects. +#' @param inventory (character) An inventory. #' -#' @param inventory (character) An inventory - +#' @noRd view_packages <- function(inventory) { stopifnot(is.data.frame(inventory), nrow(inventory) > 0) @@ -28,6 +28,7 @@ view_packages <- function(inventory) { } +# Helper function for view_packages() wait_for_key <- function() { response <- readline(prompt = "Press [S]top or [C]ontinue") response <- tolower(response) @@ -35,6 +36,7 @@ wait_for_key <- function() { } +# Helper function for view_packages() show_package <- function(inventory, package) { cat(paste0("Package: ", package, "\n")) diff --git a/R/inventory.R b/R/inventory.R index da090d4..00d7622 100644 --- a/R/inventory.R +++ b/R/inventory.R @@ -1,23 +1,21 @@ -#' inventory.R -#' Author: Bryce Mecum <mecum@nceas.ucsb.edu> -#' -#' Functions relating to keeping up an inventory of files that exist on the KNB -#' and may or may not be copied to another computer and untarred. -#' +# Functions relating to keeping up an inventory of files that exist on the KNB +# and may or may not be copied to another computer and untarred -#' Create an empty inventory data.frame. This doesn't need to be a function -#' but I'm making it one in case the initialization routine becomes more -#' complicated. +#' Create an empty inventory data.frame #' -#' @return An empty data frame - +#' @return (data.frame) An empty data.frame. +#' +#' @noRd inv_init <- function() { inventory <- data.frame(stringsAsFactors = FALSE) inventory } + +#' Load files into the inventory from a text file +#' #' Load files into the inventory from a text file. #' #' Files should be the output of the command: @@ -25,11 +23,12 @@ inv_init <- function() { #' you@server:/path/to/acadis$ find . -type f #' #' @param path (character) Path to a file containing a file listing. -#' @param inventory (character) A \code{data.frame}. -#' @param filter (logical) Filter out versioned datasets. Default is TRUE. +#' @param inventory (character) A data.frame. +#' @param filter (logical) Whether or not to filter out versioned datasets. #' -#' @return An inventory (data.frame) - +#' @return (data.frame) An inventory. +#' +#' @noRd inv_load_files <- function(inventory, path, filter=TRUE) { stopifnot(file.exists(path)) stopifnot("inventory" %in% ls(), @@ -55,7 +54,7 @@ inv_load_files <- function(inventory, path, filter=TRUE) { # Filter out versioned datasets if (filter) { size_before <- nrow(files) - files <- files[grep("v_\\d\\.\\d", files$file, invert = TRUE), "file", drop=FALSE] + files <- files[grep("v_\\d\\.\\d", files$file, invert = TRUE), "file", drop = FALSE] size_diff <- size_before - nrow(files) if (size_diff > 0) { cat("Removed", size_diff, "file(s) that were part of versioned datasets.\n") } @@ -95,15 +94,17 @@ inv_load_files <- function(inventory, path, filter=TRUE) { inventory } -#' Load file sizes into an inventory from a text file. Removes the column -#' 'size_bytes' from inventory before doing a left join. + +#' Load file sizes into an inventory from a text file +#' +#' Removes the column 'size_bytes' from inventory before doing a left join. #' #' @param path (character) Path to a file containing sizes. -#' @param inventory (data.frame) inventory A \code{data.frame}. +#' @param inventory (data.frame) A data.frame. #' -#' @return (data.frame) An inventory +#' @return (data.frame) An inventory. #' - +#' @noRd inv_load_sizes <- function(inventory, path) { stopifnot(file.exists(path)) stopifnot("inventory" %in% ls(), @@ -134,16 +135,18 @@ inv_load_sizes <- function(inventory, path) { inventory } -#' Load checksums into the inventory file from a text file. This function -#' removes the column 'checksum_sha256' from inventory before doing a + +#' Load checksums into the inventory file from a text file +#' +#' This function removes the column 'checksum_sha256' from inventory before doing a #' left join. #' #' @param path (character) Path to a file containing sizes. #' @param inventory (data.frame) An inventory. #' -#' @return An inventory (data.frame) +#' @return (data.frame) An inventory. #' - +#' @noRd inv_load_checksums <- function(inventory, path) { stopifnot(file.exists(path)) stopifnot("inventory" %in% ls(), @@ -182,14 +185,16 @@ inv_load_checksums <- function(inventory, path) { } -#' Load DOIs from a text file into the Inventory. +#' Load DOIs from a text file into the inventory #' -#' @param path Location of a text file with DOIs and file paths. (character) +#' Load DOIs from a text file into the inventory. +#' +#' @param path (character) Location of a text file with DOIs and file paths. #' @param inventory (data.frame) An inventory. #' -#' @return (data.frame) The modified Inventory. +#' @return (data.frame) The modified inventory. #' - +#' @noRd inv_load_dois <- function(inventory, path) { stopifnot(file.exists(path)) stopifnot(is.data.frame(inventory), @@ -213,8 +218,10 @@ inv_load_dois <- function(inventory, path) { inventory } -#' Load identifiers into the inventory file(s) from a text file. This function -#' removes the column 'identifier' from inventory before doing a + +#' Load identifiers into the inventory file(s) from a text file +#' +#' This function removes the column 'identifier' from inventory before doing a #' left join. #' #' @param paths (character) Path(s) to files containing identifiers. @@ -222,7 +229,7 @@ inv_load_dois <- function(inventory, path) { #' #' @return (data.frame) An inventory. #' - +#' @noRd inv_load_identifiers <- function(inventory, paths) { stopifnot(file.exists(path)) stopifnot(is.data.frame(inventory), @@ -252,14 +259,17 @@ inv_load_identifiers <- function(inventory, paths) { inventory } -#' Adds a set of extra columsn to the inventory that are useful for working + +#' Add a set of extra columns to the inventory +#' +#' Add a set of extra columns to the inventory that are useful for working #' with them. #' #' @param inventory (data.frame) An inventory. #' -#' @return An inventory (data.frame) +#' @return (data.frame) An inventory. #' - +#' @noRd inv_add_extra_columns <- function(inventory) { stopifnot(is(inventory, "data.frame"), "file" %in% names(inventory)) @@ -345,15 +355,13 @@ inv_add_extra_columns <- function(inventory) { } - - - -#' Add a column for parent packages. +#' Add a column for parent packages #' -#' @param inventory (data.frame) An Inventory. +#' @param inventory (data.frame) An inventory. #' -#' @return inventory (data.frame) An Inventory. - +#' @return (data.frame) An inventory. +#' +#' @noRd inv_add_parent_package_column <- function(inventory) { stopifnot(all(c("file", "package", "is_metadata", "depth") %in% names(inventory))) @@ -417,13 +425,12 @@ inv_add_parent_package_column <- function(inventory) { } - -#' Update an Inventory with a new Inventory. +#' Update an inventory with a new inventory #' -#' @param inventory (data.frame) The old Inventory. -#' @param new_state (data.frame) The new Inventory. +#' @param inventory (data.frame) The old inventory. +#' @param new_state (data.frame) The new inventory. #' - +#' @noRd inv_update <- function(inventory, new_state) { stopifnot(is.data.frame(inventory), is.data.frame(new_state), @@ -451,5 +458,3 @@ inv_update <- function(inventory, new_state) { inventory } - - diff --git a/R/marking.R b/R/marking.R index c67670b..fd7db07 100644 --- a/R/marking.R +++ b/R/marking.R @@ -1,11 +1,9 @@ -#' marking.R -#' Author: Bryce Mecum <mecum@nceas.ucsb.edu> -#' -#' R commands for marking datasets before adding. +# Functions for marking datasets before adding -#' Divide packages and their files into themes. +#' Divide packages and their files into themes #' +#' @description #' Themes divide packages into groups based upon how the actions we will take #' to insert them. Packages are divided into one of three themes: #' @@ -25,17 +23,18 @@ #' All other packages not in the above themes. #' #' Note: Adds a 'theme' column to 'inventory'. -#' Note: Depeneds on the following columns: +#' Note: Depends on the following columns: #' #' - filename #' - package_nfiles #' #' -#' @param inventory (data.frame) An Inventory. -#' @param nfiles_cutoff (integer) Number of cutoff files +#' @param inventory (data.frame) An inventory. +#' @param nfiles_cutoff (integer) Number of cutoff files. #' -#' @return (data.frame) An Inventory. - +#' @return (data.frame) An inventory. +#' +#' @noRd theme_packages <- function(inventory, nfiles_cutoff=100) { stopifnot(is.data.frame(inventory), "package_nfiles" %in% names(inventory)) @@ -76,4 +75,3 @@ theme_packages <- function(inventory, nfiles_cutoff=100) { inventory } - diff --git a/R/modify_metadata.R b/R/metadata.R similarity index 88% rename from R/modify_metadata.R rename to R/metadata.R index 51c011e..7b87fdb 100644 --- a/R/modify_metadata.R +++ b/R/metadata.R @@ -1,27 +1,23 @@ -#' modify_metadata.R -#' Author: Bryce Mecum <mecum@nceas.ucsb.edu> -#' -#' Functions related to fixing invalid ISO metadata. -#' -#' Some functions just test whether a validation issue is present. These are -#' prefixed with the text "test". Exactly what they are testing should be -#' described in the docstrings. -#' -#' Other functons fix the bad metadata in place (modifying the original file) -#' and these functions are prefixed with "fix_". Exactly what they are fixing -#' should be described in the docstrings. -#' -#' Example usage: -#' -#' # Find and fix documents in 'mydir' that have extra whitespace in their -#' # topicCategory element(s) -#' -#' the_files <- dir(mydir) -#' bad_enums <- the_files[which(sapply(the_files, test_has_bad_enum))] -# sapply(bad_enums, fix_bad_enums) +# Functions related to fixing invalid ISO metadata + +# Some functions just test whether a validation issue is present. These are +# prefixed with the text "test". Exactly what they are testing should be +# described in the docstrings. +# +# Other functons fix the bad metadata in place (modifying the original file) +# and these functions are prefixed with "fix_". Exactly what they are fixing +# should be described in the docstrings. +# +# Example usage: +# +# Find and fix documents in 'mydir' that have extra whitespace in their +# topicCategory element(s) +# +# the_files <- dir(mydir) +# bad_enums <- the_files[which(sapply(the_files, test_has_bad_enum))] +# sapply(bad_enums, fix_bad_enums) -#' @param path (character) a path test_has_abstract <- function(path) { stopifnot(file.exists(path)) @@ -134,7 +130,7 @@ test_has_bad_enum <- function(path) { } -#' Fix a metadata record with a bad topicCategory. +#' Fix a metadata record with a bad topicCategory #' #' This is the case where the ISO schema says what's inside a #' gmd:MD_TopicCategoryCode element should match items from a controlled @@ -143,9 +139,9 @@ test_has_bad_enum <- function(path) { #' #' 'oceans' != ' oceans ' #' -#' @param path (character) a path +#' @param path (character) A file path. #' - +#' @noRd fix_bad_enum <- function(path) { stopifnot(file.exists(path)) @@ -184,9 +180,9 @@ fix_bad_enum <- function(path) { #' <gmd:MD_TopicCategoryCode>oceans</gmd:MD_TopicCategoryCode> #' </gmd:topicCategory> #' -#' @param path (character) Path +#' @param path (character) A file path. #' - +#' @noRd fix_bad_topic <- function(path) { stopifnot(file.exists(path)) @@ -276,7 +272,7 @@ fix_bad_topic <- function(path) { } -#' Uses XMLStarlet to pretty-print/beautify an XML document. +#' Use XMLStarlet to pretty-print an XML document #' #' This command just runs `xmlstarlet path > path`, doing a simple #' pretty-printing of the file located at `path`. @@ -291,12 +287,11 @@ fix_bad_topic <- function(path) { #' format` on the same file as you redirect to, you get a weird parse error from #' xmlstarlet. #' +#' @param path (character) A file path. #' -#' @param path Path to your file you want pretty-printed. (character) +#' @return The result of the `system` command (0 = success). #' -#' @return Returns the result of the `system` command (0 = success) -#' - +#' @noRd pretty_print <- function(path) { stopifnot(file.exists(path), file.info(path)$size > 0) diff --git a/R/packaging.R b/R/packaging.R index ee44a55..e6554ac 100644 --- a/R/packaging.R +++ b/R/packaging.R @@ -1,17 +1,13 @@ -#' package.R -#' Author: Bryce Mecum <mecum@nceas.ucsb.edu> -#' -#' Code related to inserting datasets as Data Packages. - +# Functions for inserting datasets as data packages -#' Insert a file from a single row of the Inventory. +#' Insert a file from a single row of the inventory #' -#' @param inventory (data.frame) An Inventory. +#' @param inventory (data.frame) An inventory. #' @param file (character) The fully-qualified relative path to the file. See examples. #' @param env (list) Optional. Specify an environment. #' - +#' @noRd insert_file <- function(inventory, file, env=NULL) { validate_inventory(inventory) stopifnot(is.character(file), nchar(file) > 0, file %in% inventory$file) @@ -79,15 +75,15 @@ insert_file <- function(inventory, file, env=NULL) { } -#' Create a single package Data Package from files in the Inventory. +#' Create a single data package from files in the inventory #' -#' @param inventory (data.frame) An Inventory. +#' @param inventory (data.frame) An inventory. #' @param package (character) The package identifier. #' @param env (list) Environment variables. #' -#' @return A list containing PIDs and whether objects were inserted. (list) +#' @return (list) A list containing PIDs and whether objects were inserted. #' - +#' @noRd insert_package <- function(inventory, package, env=NULL) { validate_inventory(inventory) stopifnot(is.character(package), nchar(package) > 0, package %in% inventory$package) @@ -295,19 +291,22 @@ insert_package <- function(inventory, package, env=NULL) { return(files) } -#' Create a resource map RDF/XML file and save is to a temporary path. + +#' Create a resource map RDF/XML file and save is to a temporary path +#' #' This is a convenience wrapper around the constructor of the `ResourceMap` #' class from `DataPackage`. #' -#' @param metadata_pid (character) PID of the metadata Object. -#' @param data_pids (character) PID(s) of the data Objects. -#' @param child_pids (character) Optional. PID(s) of child Resource Maps. -#' @param other_statements (data.frame) Extra statements to add to the Resource Map. -#' @param resource_map_pid (character) PID of resource map. +#' @param metadata_pid (character) PID of the metadata object. +#' @param data_pids (character) PID(s) of the data objects. +#' @param child_pids (character) Optional. PID(s) of child resource maps. +#' @param other_statements (data.frame) Extra statements to add to the resource map. +#' @param resource_map_pid (character) The PID of a resource map. #' @param resolve_base (character) Optional. The resolve service base URL. #' -#' @return Absolute path to the Resource Map on disk (character) +#' @return (character) Absolute path to the resource map on disk. #' +#' @export #' #' @examples #' \dontrun{ @@ -317,11 +316,11 @@ insert_package <- function(inventory, package, env=NULL) { #' object="http://example.com/bar")) #' } generate_resource_map <- function(metadata_pid, - data_pids=NULL, - child_pids=NULL, - other_statements=NULL, - resolve_base="https://cn.dataone.org/cn/v2/resolve", - resource_map_pid=NULL) { + data_pids = NULL, + child_pids = NULL, + other_statements = NULL, + resolve_base = "https://cn.dataone.org/cn/v2/resolve", + resource_map_pid = NULL) { stopifnot(length(metadata_pid) == 1) # Generate a PID if needed @@ -437,7 +436,7 @@ generate_resource_map <- function(metadata_pid, } if (length(setdiff(names(relationships), names(other_statements))) != 0) { - warning("The column names of the relationships and other_statements data frames do not match: ", paste(names(relationships), collapse =", "), " vs. ", paste(names(other_statements), collapse = ", "), ".") + warning("The column names of the relationships and other_statements data frames do not match: ", paste(names(relationships), collapse = ", "), " vs. ", paste(names(other_statements), collapse = ", "), ".") } message("Adding ", nrow(other_statements), " custom statement(s) to the Resource Map.") @@ -481,11 +480,12 @@ generate_resource_map <- function(metadata_pid, outfilepath } -#' Generate a PID for a new resource map by appending "resource_map_" to it. + +#' Generate a PID for a new resource map by appending "resource_map_" to it #' -#' @param metadata_pid (character) A metadata pid +#' @param metadata_pid (character) A metadata PID. #' - +#' @noRd generate_resource_map_pid <- function(metadata_pid) { stopifnot(is.character(metadata_pid), nchar(metadata_pid) > 0) @@ -497,15 +497,17 @@ generate_resource_map_pid <- function(metadata_pid) { paste0("resource_map_", metadata_pid) } -#' Get the already-minted PID from the inventory or mint a new one. + +#' Get the already-minted PID from the inventory or mint a new one #' #' @param file (data.frame) A single row from the inventory. #' @param mn (MNode) The Member Node that will mint the new PID, if needed. #' @param scheme (character) The identifier scheme to use. #' -#' @return The identifier (character) - -get_or_create_pid <- function(file, mn, scheme="UUID") { +#' @return (character) The PID. +#' +#' @noRd +get_or_create_pid <- function(file, mn, scheme = "UUID") { stopifnot(is.data.frame(file), nrow(file) == 1, "pid" %in% names(file)) @@ -546,19 +548,21 @@ get_or_create_pid <- function(file, mn, scheme="UUID") { pid } -#' Create a sysmeta object. + +#' Create a sysmeta object #' #' This is a wrapper function around the constructor for a -#' dataone::SystemMetadata object. +#' SystemMetadata object. #' #' @param file (data.frame) A single row from the inventory. -#' @param base_path (character) The path prefix to use with the contents of `file[1,"filename]` that -#' will be used to locate the file on disk. +#' @param base_path (character) The path prefix to use with the contents of `file[1,"filename"]` that +#' will be used to locate the file on disk. #' @param submitter (character) The submitter DN string for the object. #' @param rights_holder (character) The rights holder DN string for the object. #' -#' @return The sysmeta object (dataone::SystemMetadata) - +#' @return (SystemMetadata) The sysmeta object. +#' +#' @noRd create_sysmeta <- function(file, base_path, submitter, rights_holder) { stopifnot(is.data.frame(file), nrow(file) == 1) @@ -619,15 +623,15 @@ create_sysmeta <- function(file, base_path, submitter, rights_holder) { } -#' Create an object from a row of the inventory. +#' Create an object from a row of the inventory #' -#' @param file (data.frame)A row from the inventory. +#' @param file (data.frame) A row from the inventory. #' @param sysmeta (SystemMetadata) The file's sysmeta. -#' @param base_path (character) Base path, to be appended to the \code{file} -#' column to find the file to upload. +#' @param base_path (character) Base path, to be appended to the 'file' +#' column to find the file to upload. #' @param env (list) An environment. #' - +#' @noRd create_object <- function(file, sysmeta, base_path, env) { stopifnot(is.data.frame(file), nrow(file) == 1, @@ -692,11 +696,11 @@ create_object <- function(file, sysmeta, base_path, env) { } -#' Validate an Inventory. +#' Validate an inventory #' -#' @param inventory (data.frame) An Inventory. +#' @param inventory (data.frame) An inventory. #' - +#' @noRd validate_inventory <- function(inventory) { stopifnot(is.data.frame(inventory), nrow(inventory) > 0, @@ -711,11 +715,12 @@ validate_inventory <- function(inventory) { "ready") %in% names(inventory))) } -#' Validate an environment. + +#' Validate an environment #' -#' @param env (character) An environment +#' @param env (character) An environment. #' - +#' @noRd validate_environment <- function(env) { env_default_components <- c("base_path", "alternate_path", @@ -734,11 +739,12 @@ validate_environment <- function(env) { } -#' Calculate a set of child PIDs for a given package. +#' Calculate a set of child PIDs for the given package #' -#' @param inventory (data.frame) An Inventory. +#' @param inventory (data.frame) An inventory. #' @param package (character) The package identifier. - +#' +#' @noRd determine_child_pids <- function(inventory, package) { stopifnot(all(c("package", "parent_package", "is_metadata") %in% names(inventory))) @@ -757,8 +763,9 @@ determine_child_pids <- function(inventory, package) { } -#' Update a package with modified metadata. +#' Update a package with modified metadata #' +#' @description #' The modified metadata should be set in the `env` variable. For example, if #' your original metadata is: #' @@ -782,10 +789,11 @@ determine_child_pids <- function(inventory, package) { #' #' @param inventory (data.frame) An inventory. #' @param package (character) The package identifier. -#' @param env (character) Environment +#' @param env (character) Environment. #' -#' @return TRUE or FALSE depending on sucess (logical) - +#' @return (logical) +#' +#' @noRd update_package <- function(inventory, package, env = NULL) { @@ -1004,11 +1012,14 @@ update_package <- function(inventory, } -#' Parse a Resource Map into a data.frame +#' Parse a resource map into a data.frame +#' +#' Parse a resource map into a data.frame. #' -#' @param path (character) Path to the resource map (an RDF/XML file) +#' @param path (character) Path to the resource map (an RDF/XML file). +#' +#' @return (data.frame) The statements in the resource map. #' -#' @return (data.frame) The statements in the Resource Map #' @export #' #' @examples @@ -1034,17 +1045,18 @@ parse_resource_map <- function(path) { #' Filter statements related to packaging #' -#' This is intended to be called after `datapack::getTriples` has been called -#' on a ResourceMap. +#' This is intended to be called after [datapack::getTriples()] has been called +#' on a resource map. #' #' This function was written specifically for the case of updating a resource #' map while preserving any extra statements that have been added such as PROV #' statements. Statements are filtered according to these rules: #' -#' @param statements (data.frame) A set of Statements to be filtered +#' @param statements (data.frame) A set of statements to be filtered. #' -#' @return (data.frame) The filtered Statements - +#' @return (data.frame) The filtered statements. +#' +#' @noRd filter_packaging_statements <- function(statements) { stopifnot(is.data.frame(statements)) if (nrow(statements) == 0) return(statements) @@ -1057,4 +1069,3 @@ filter_packaging_statements <- function(statements) { statements } - diff --git a/R/quality.R b/R/quality.R index 21f590f..172a205 100644 --- a/R/quality.R +++ b/R/quality.R @@ -1,9 +1,16 @@ -#' Score a metadata document against a MetaDIG Suite +# Functions related to metadata quality + + +#' Score a metadata document against a MetaDIG suite +#' +#' This function scores a metadata document against a MetaDIG suite. +#' The default suite is for the Arctic Data Center. +#' +#' @param document (eml/character) Either an EML object or path to a file on disk. +#' @param suite_id (character) Specify a suite ID. Should be one of <https://quality.nceas.ucsb.edu/quality/suites>. #' -#' @param document (eml or character) Either an EML object or path to a file on disk. -#' @param suite_id (character) Optional. Specificy a suite ID. Should be one of https://quality.nceas.ucsb.edu/quality/suites +#' @return (data.frame) A sorted data.frame of check results. #' -#' @return (data.frame) A sorted table of Check results #' @export #' #' @examples diff --git a/R/sysmeta.R b/R/sysmeta.R index 698700a..47018b0 100644 --- a/R/sysmeta.R +++ b/R/sysmeta.R @@ -1,68 +1,21 @@ -#' sysmeta.R -#' -#' Utility functions for modifying System Metadata objects. - - -#' Add access rules to the sysmeta object -#' -#' This is a function because I add a set of standard set of access rules to -#' every object and the access rules don't differ across objects. -#' -#' @param sysmeta (SystemMetadata) The SystemMetadata to add rules to. -#' -#' @return The modified SystemMetadata object - -add_access_rules <- function(sysmeta) { - if (!inherits(sysmeta, "SystemMetadata")) { - stop(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) - } - - # Add myself explicitly as changePermission/write so I can update objects - # in the dev environment - if (env_get() == "development") { - sysmeta <- datapack::addAccessRule(sysmeta, env_load(skip_mn = TRUE)$submitter, "changePermission") - } - - sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "read") - sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "write") - sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "changePermission") - - sysmeta -} +# Utility functions for modifying System Metadata objects -#' Adds access to the given System Metadata for the arctic-data-admins group +#' Replace subjects in the accessPolicy section of a System Metadata entries #' -#' @param sysmeta (sysmeta) System Metadata object -#' -add_admin_group_access <- function(sysmeta) { - if (!inherits(sysmeta, "SystemMetadata")) { - message(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) - return(sysmeta) - } - - sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "read") - sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "write") - sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "changePermission") - - sysmeta -} - - -#' Replace subjects in the accessPolicy section of a System Metadata entries. -#' -#' This function was written out to fix capitalization errors but in a set of +#' This function was written out to fix capitalization errors in a set of #' existing System Metadata entries but can be used to replace any subject. #' -#' #' @param sysmeta (SystemMetadata) The System Metadata object. #' @param from (character) The DN string to replace. -#' @param to (character) The DN string to put in place of `from`. +#' @param to (character) The DN string to put in place of 'from'. +#' +#' @return (SystemMetadata) The modified System Metadata. #' -#' @return The modified System Metadata (SystemMetadata) +#' @noRd replace_subject <- function(sysmeta, - from="cn=arctic-data-admins,dc=dataone,dc=org", - to="CN=arctic-data-admins,DC=dataone,DC=org") { + from = "cn=arctic-data-admins,dc=dataone,dc=org", + to = "CN=arctic-data-admins,DC=dataone,DC=org") { if (!inherits(sysmeta, "SystemMetadata")) { message(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) return(sysmeta) @@ -89,6 +42,8 @@ replace_subject <- function(sysmeta, #' @param sysmeta (SystemMetadata) The System Metadata object to clear the replication policy of. #' #' @return (SystemMetadata) The modified System Metadata object. +#' +#' @noRd clear_replication_policy <- function(sysmeta) { if (!(is(sysmeta, "SystemMetadata"))) { stop("First argument was not of class SystemMetadata.") @@ -100,3 +55,146 @@ clear_replication_policy <- function(sysmeta) { sysmeta } + + +#' Get system metadata for all elements of a data package +#' +#' This function retrieves the system metadata for all elements of a data package and returns them as a list. +#' It is useful for inspecting system metadata for an entire data package and identifying changes where needed. +#' +#' @param mn (MNode) The Member Node to query. +#' @param resource_map_pid (character) The PID for a resource map. +#' @param nmax (numeric) The maximum number of system metadata objects to return. +#' @param child_packages (logical) If parent package, whether or not to include child packages. +#' +#' @return (list) A list of system metadata objects. +#' +#' @import dataone +#' @importFrom methods is +#' @importFrom methods new +#' +#' @export +#' +#' @examples +#'\dontrun{ +#' cn_staging <- CNode("STAGING") +#' adc_test <- getMNode(cn_staging, "urn:node:mnTestARCTIC") +#' +#' rm_pid <- "resource_map_urn:uuid:..." +#' +#' all <- get_all_sysmeta(adc_test, rm_pid) +#' +#' # View in viewer to inspect +#' View(all) +#' +#' # Print specific elements to console +#' all[[1]]@rightsHolder +#' +#' # Create separate object +#' sysmeta_md <- all[[2]] +#' } +get_all_sysmeta <- function(mn, resource_map_pid, nmax = 1000, child_packages = FALSE) { + stopifnot(methods::is(mn, "MNode")) + stopifnot(is.character(resource_map_pid), nchar(resource_map_pid) > 0, length(resource_map_pid) == 1) + stopifnot(is_resource_map(mn, resource_map_pid)) + stopifnot(is.numeric(nmax), length(nmax) == 1 , nmax >= 0) + stopifnot(is.logical(child_packages), length(child_packages) == 1) + + query_params <- paste("identifier:", resource_map_pid, "+OR+resourceMap:", resource_map_pid, "", sep = "\"") + response <- dataone::query(mn, list(q = query_params, rows = as.character(nmax))) + + if (length(response) == 0) { + stop(paste0("No results were found when searching for a package with resource map '", resource_map_pid, + "'.\nThis could be caused by not having appropriate access to read the resource map.")) + } + + if (length(response) == nmax) { + warning(paste("Query returned the maximum number of objects. It is possible there are more to retrieve.", + "\nSpecify a larger number of objects with the 'nmax' argument.")) + } + + # Check if child package + if (response[[1]]$formatType == "RESOURCE" && !is.null(response[[1]]$resourceMap)) { + message("The data package with this resource map is a child package.") + } + # Check if parent package + if (any(unlist(lapply(response[2:length(response)], function(x) ifelse(x$formatType == "RESOURCE", TRUE, FALSE))))) { + message("The data package with this resource map is a parent package.") + if (child_packages == TRUE) { + children <- Filter(function(x) x$formatType == "RESOURCE", response[2:length(response)]) + children2 <- vector("list", length(children)) + for (i in seq_along(children)) { + child_resource_map_pid <- children[[i]]$identifier + query_params2 <- paste("identifier:", child_resource_map_pid, "+OR+resourceMap:", child_resource_map_pid, "", sep = "\"") + children2[[i]] <- dataone::query(mn, list(q = query_params2, rows = as.character(nmax))) + } + } + } + + # Translate fields from Solr query to formal class SystemMetadata + translate <- function(x) { + sysmeta <- methods::new("SystemMetadata") + + sysmeta@serialVersion <- sysmeta@serialVersion + sysmeta@identifier <- if (is.null(x$identifier)) {sysmeta@identifier} else {x$identifier} + sysmeta@formatId <- if (is.null(x$formatId)) {sysmeta@formatId} else {x$formatId} + sysmeta@size <- if (is.null(x$size)) {sysmeta@size} else {x$size} + sysmeta@checksum <- if (is.null(x$checksum)) {sysmeta@checksum} else {x$checksum} + sysmeta@checksumAlgorithm <- if (is.null(x$checksumAlgorithm)) {sysmeta@checksumAlgorithm} else {x$checksumAlgorithm} + sysmeta@submitter <- if (is.null(x$submitter)) {sysmeta@submitter} else {x$submitter} + sysmeta@rightsHolder <- if (is.null(x$rightsHolder)) {sysmeta@rightsHolder} else {x$rightsHolder} + read <- if (is.null(x$readPermission)) {} else {data.frame(subject = unlist(x$readPermission), + permission = "read")} + write <- if (is.null(x$writePermission)) {} else {data.frame(subject = unlist(x$writePermission), + permission = "write")} + change <- if (is.null(x$changePermission)) {} else {data.frame(subject = unlist(x$changePermission), + permission = "changePermission")} + sysmeta@accessPolicy <- rbind(read, write, change) + sysmeta@replicationAllowed <- if (is.null(x$replicationAllowed)) {sysmeta@replicationAllowed} else {x$replicationAllowed} + sysmeta@numberReplicas <- if (is.null(x$numberReplicas)) {sysmeta@numberReplicas} else {x$numberReplicas} + sysmeta@preferredNodes <- if (is.null(x$preferredReplicationMN)) {sysmeta@preferredNodes} else {x$preferredReplicationMN} + sysmeta@blockedNodes <- if (is.null(x$blockedReplicationMN)) {sysmeta@blockedNodes} else {x$blockedReplicationMN} + sysmeta@obsoletes <- if (is.null(x$obsoletes)) {sysmeta@obsoletes} else {x$obsoletes} + sysmeta@obsoletedBy <- if (is.null(x$obsoletedBy)) {sysmeta@obsoletedBy} else {x$obsoletedBy} + sysmeta@archived <- sysmeta@archived + sysmeta@dateUploaded <- if (is.null(x$dateUploaded)) {sysmeta@dateUploaded} else {as.character(x$dateUploaded)} + sysmeta@dateSysMetadataModified <- if (is.null(x$dateModified)) {sysmeta@dateSysMetadataModified} else {as.character(x$dateModified)} + sysmeta@originMemberNode <- if (is.null(x$datasource)) {sysmeta@originMemberNode} else {x$datasource} + sysmeta@authoritativeMemberNode <- if (is.null(x$authoritativeMN)) {sysmeta@authoritativeMemberNode} else {x$authoritativeMN} + sysmeta@seriesId <- if (is.null(x$seriesId)) {sysmeta@seriesId} else {x$seriesId} + sysmeta@mediaType <- if (is.null(x$mediaType)) {sysmeta@mediaType} else {x$mediaType} + sysmeta@fileName <- if (is.null(x$fileName)) {sysmeta@fileName} else {x$fileName} + sysmeta@mediaTypeProperty <- if (is.null(x$mediaTypeProperty)) {sysmeta@mediaTypeProperty} else {x$mediaTypeProperty} + + return(sysmeta) + } + + if (child_packages) { + other <- Filter(function(x) x$formatType != "RESOURCE", response[2:length(response)]) + response2 <- c(list(response[[1]]), other) + parent <- lapply(response2, translate) + names(parent) <- unlist(lapply(parent, function(x) {x@fileName})) + for (i in seq_along(parent)) { + if (is.na(names(parent)[i])) {names(parent)[i] <- paste0("missing_fileName", i)} + } + + child <- lapply(children2, function(x) {lapply(x, translate)}) + for (i in seq_along(child)) { + names(child[[i]]) <- unlist(lapply(child[[i]], function(x) {x@fileName})) + for (j in seq_along(child[[i]])) { + if (is.na(names(child[[i]])[j])) {names(child[[i]])[j] <- paste0("missing_fileName", j)} + } + } + names(child) <- paste0("child", seq_along(child)) + + all <- c(parent, child) + } else { + all <- lapply(response, translate) + names(all) <- unlist(lapply(all, function(x) {x@fileName})) + for (i in seq_along(all)) { + if (is.na(names(all)[i])) {names(all)[i] <- paste0("missing_fileName", i)} + } + } + + return(all) +} diff --git a/R/util.R b/R/util.R index 29018da..7228657 100644 --- a/R/util.R +++ b/R/util.R @@ -1,10 +1,15 @@ -#' Extract the local identifier for an ACADIS ISO metadata XML file. +# Various utility functions + + +#' Extract the local identifier for an ACADIS ISO metadata XML file #' #' @param type (character) A string, one of "gateway" or "field-projects". -#' @param file (character) A string, a connection, or raw vector -#' (same as \code{\link[xml2]{read_xml}}). +#' @param file (character) A string, connection, or raw vector +#' (same as [xml2::read_xml()]). #' -#' @return The identifier string. (character) +#' @return (character) The identifier string. +#' +#' @noRd extract_local_identifier <- function(type, file) { stopifnot(is.character(type), length(type) == 1) stopifnot(type %in% c("gateway", "field-projects")) @@ -37,139 +42,15 @@ extract_local_identifier <- function(type, file) { } -dataone_format_mappings <- list("avi" = "video/avi", - "bmp" = "image/bmp", - "bz2" = "application/x-bzip2", - "csv" = "text/csv", - "doc" = "application/msword", - "docx" = "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "fasta" = "application/x-fasta", - "gif" = "image/gif", - "gz" = "application/x-gzip", - "html" = "text/html", - "ipynb" = "application/json", - "jp2" = "image/jp2", - "jpg" = "image/jpeg", - "jpeg" = "image/jpeg", - "kml" = "application/vnd.google-earth.kml/xml", - "kmz" = "application/vnd.google-earth.kmz", - "md" = "text/markdown", - "mov" = "video/quicktime", - "mp3" = "audio/mpeg", - "mp4" = "video/mp4", - "mpg" = "video/mpeg", - "mpeg" = "video/mpeg", - "n3" = "text/n3", - "nc" = "netCDF-3", - "pdf" = "application/pdf", - "png" = "image/png", - "ppt" = "application/vnd.ms-powerpoint", - "pptx" = "application/vnd.openxmlformats-officedocument.presentationml.presentation", - "py" = "application/x-python", - "qt" = "video/quicktime", - "r" = "application/R", - "rar" = "application/x-rar-compressed", - "rdf" = "application/rdf/xml", - "rmd" = "text/x-rmarkdown", - "sas" = "application/SAS", - "svg" = "image/svg/xml", - "tar" = "application/x-tar", - "tif" = "image/tiff", - "tiff" = "image/tiff", - "ttl" = "text/turtle", - "tsv" = "text/tsv", - "txt" = "text/plain", - "wav" = "audio/x-wav", - "wma" = "audio/x-ms-wma", - "wmv" = "video/x-ms-wmv", - "xls" = "application/vnd.ms-excel", - "xlsx" = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "xml" = "application/xml", - "zip" = "application/zip") - - -#' Guess format from filename for a vector of filenames. -#' -#' @param filenames (character) -#' -#' @return (character) DataOne format identifiers strings. -#' @export -#' -#' @examples -#'formatid <- guess_format_id("temperature_data.csv") -#' -guess_format_id <- function(filenames) { - extensions <- tolower(tools::file_ext(filenames)) - filetypes <- vector(mode = "character", length = length(extensions)) - - for (i in seq_len(length(extensions))) { - extension <- extensions[i] - - if (extension %in% names(dataone_format_mappings)) { - filetypes[i] <- dataone_format_mappings[extension][[1]] - } else { - filetypes[i] <- "application/octet-stream" - } - } - - filetypes -} - - -#' Determine the DataONE format ID for the NetCDF file provided by path. -#' -#' @param path (character) Full or relative path to the file in question. -#' -#' @return (character) The DataONE format ID. -get_netcdf_format_id <- function(path) { - stopifnot(is.character(path), - nchar(path) > 0, - file.exists(path)) - - if (!requireNamespace("ncdf4")) { - stop(call. = FALSE, - "The package 'ncdf4' must be installed to run this function. ", - "Please install it and try again.") - } - - # Try to open the file, capturing errors - cdf_file <- try({ - ncdf4::nc_open(path) - }) - - # If we failed to open the file, we can assume it's not a valid NetCDF file - # and we just return application/octet-stream as the format ID - if (inherits(cdf_file, "try-error")) { - return("application/octet-stream") - } - - # Since we got this far, continue detecting the format - stopifnot("format" %in% names(cdf_file)) - format_string <- cdf_file$format - stopifnot(is.character(format_string), - nchar(format_string) > 0) - format_id = "" - - if (format_string == "NC_FORMAT_CLASSIC") { - format_id = "netCDF-3" - } else if (format_string == "NC_FORMAT_NETCDF4") { - format_id = "netCDF-4" - } else { - stop("Unknown NetCDF format discovered.") - } - - return(format_id) -} - - -#' Print a random dataset. +#' Print a random dataset #' #' @param inventory (data.frame) An inventory. #' @param theme (character) Optional. A package theme name. #' @param n (numeric) Optional. The number of files to show. #' -#' @return Nothing. - +#' @return `NULL` +#' +#' @noRd show_random_dataset <- function(inventory, theme=NULL, n=10) { stopifnot(is.data.frame(inventory), all(c("file", "folder", "filename", "theme") %in% names(inventory))) @@ -193,7 +74,7 @@ show_random_dataset <- function(inventory, theme=NULL, n=10) { base_dir <- sampled_pkg[which(sampled_pkg$is_metadata == TRUE),"folder"] # startDebug - if (length(base_dir) != 0){ + if (length(base_dir) != 0) { browser() } # endDebug @@ -215,7 +96,7 @@ show_random_dataset <- function(inventory, theme=NULL, n=10) { } -#' Log a message to the console and to a logfile. +#' Log a message to the console and to a logfile #' #' Reads from the environment variable 'LOG_PATH' and uses the value set there #' to decide the location of the log file. If that envvar isn't set, it defaults @@ -223,8 +104,9 @@ show_random_dataset <- function(inventory, theme=NULL, n=10) { #' #' @param message (character) Your log message. #' -#' @return Nothing. - +#' @return `NULL` +#' +#' @noRd log_message <- function(message=NULL) { if (is.null(message) || !is.character(message) || nchar(message) < 1) { invisible(return(FALSE)) @@ -251,15 +133,16 @@ log_message <- function(message=NULL) { } -#' Check if an object exists on a Member Node. +#' Check if an object exists on a Member Node #' #' This is a simple check for the HTTP status of a /meta/{PID} call on the -#' provided member node. +#' provided Member Mode. #' -#' @param node (MNode|CNode) The Node to query. -#' @param pids (character) PID to check the existence of. +#' @param node (MNode) The Member Node to query. +#' @param pids (character) The PID(s) to check the existence of. #' #' @return (logical) Whether the object exists. +#' #' @export #' #' @examples @@ -297,13 +180,15 @@ object_exists <- function(node, pids) { } -#' Convert and ISO document to EML using an XSLT. +#' Convert an ISO document to EML using an XSLT #' #' Leave style=NA if you want to use the default ISO-to-EML stylesheet. +#' #' @param path (character) Path to the file to convert. #' @param style (xslt) The XSLT object to be used for transformation. #' #' @return (character) Location of the converted file. +#' #' @export #' #' @examples @@ -329,14 +214,18 @@ convert_iso_to_eml <- function(path, style=NA) { } -#' Extract the EML responsible-party blocks in a document, and parse the -#' surName field to create proper givenName/surName structure +#' Modify name structure for EML parties +#' +#' Extract the EML responsible-party blocks in a document and parse the +#' surName field to create proper givenName/surName structure. +#' +#' @param path (character) The path to the EML document to process. #' -#' @param path file path to the EML document to process (character) +#' @return (character) The path to the converted EML file. #' -#' @return path (character) Path to the converted EML file. #' @import XML - +#' +#' @noRd substitute_eml_party <- function(path) { # Read in the EML document doc = XML::xmlParse(path) @@ -365,15 +254,19 @@ substitute_eml_party <- function(path) { return(path) } + +#' Change EML name +#' #' Utility function to extract a name string from an XML individualName node, -#' parse it into tokens,and reformat the individualName with new children nodes +#' parse it into tokens,and reformat the individualName with new children nodes. #' -#' @param party the XML node containing a subclass of eml:ResponsibleParty +#' @param party The XML node containing a subclass of eml:ResponsibleParty. #' -#' @return the modified XML node +#' @return The modified XML node. #' #' @import XML - +#' +#' @noRd change_eml_name <- function(party) { # Check if there is an individualName element exists if (length(XML::getNodeSet(party, "./individualName")) == 0) { @@ -441,13 +334,15 @@ change_eml_name <- function(party) { } +#' Replace EML packageId with value +#' #' Replace the EML 'packageId' attribute on the root element with a #' certain value. #' #' @param path (character) Path to the XML file to edit. #' @param replacement (character) The new value. #' - +#' @noRd replace_package_id <- function(path, replacement) { stopifnot(file.exists(path)) stopifnot(is.character(replacement), @@ -464,12 +359,13 @@ replace_package_id <- function(path, replacement) { path } -#' Adds a string to the title element in the given file. + +#' Add a string to the title element in the given file #' #' @param path (character) Path to the XML file to edit. #' @param string (character) The new value. #' - +#' @noRd add_string_to_title <- function(path, string) { stopifnot(file.exists(path)) stopifnot(is.character(string), @@ -496,13 +392,14 @@ add_string_to_title <- function(path, string) { } -#' Add a set of additional identifiers to an EML document. +#' Add a set of additional identifiers to an EML document #' #' @param path (character) Path to the EML document. #' @param identifiers (character) Set of identifiers to add. #' #' @return (character) Path to the modified document. - +#' +#' @noRd add_additional_identifiers <- function(path, identifiers) { stopifnot(is.character(path), nchar(path) > 0, @@ -526,15 +423,16 @@ add_additional_identifiers <- function(path, identifiers) { } -#' (Intelligently) join (possibly redudant) path parts together. +#' Intelligently join possibly redundant path parts together #' #' Joins path strings like "./" to "./my/dir" as "./my/dir" instead of as #' "././my/dir. #' #' @param path_parts (character) #' -#' @return (character)The joined path string. - +#' @return (character) The joined path string. +#' +#' @noRd path_join <- function(path_parts=c("")) { result <- paste0(path_parts, collapse = "") @@ -554,48 +452,27 @@ path_join <- function(path_parts=c("")) { result } -#' Test whether an object is a particular format ID. -#' -#' @param node (MNode|CNode) The Coordinating/Member Node to run the query on. -#' @param pids (character) -#' @param format_id (character) -#' -#' @return (logical) - -is_format_id <- function(node, pids, format_id) { - stopifnot(class(node) %in% c("MNode", "CNode")) - stopifnot(all(is.character(pids)), - all(lengths(pids) > 0)) - stopifnot(is.character(format_id), - nchar(format_id) > 0) - - result <- vector("logical", length(pids)) - - for (i in seq_along(pids)) { - result[i] <- dataone::getSystemMetadata(node, pids[i])@formatId == format_id - } - - result -} -#' Determines whether the object with the given PID is a resource map. +#' Determine whether the object with the given PID is a resource map #' #' @param node (MNode|CNode) The Coordinating/Member Node to run the query on. -#' @param pids (character) Vector of PIDs +#' @param pids (character) Vector of PIDs. #' -#' @return (logical) Whether or not the object(s) are resource maps - +#' @return (logical) Whether or not the object(s) are resource maps. +#' +#' @noRd is_resource_map <- function(node, pids) { is_format_id(node, pids, "http://www.openarchives.org/ore/terms") } -#' Test whether the object is obsoleted by another object. +#' Test whether the object is obsoleted by another object #' #' @param node (MNode|CNode) The Coordinating/Member Node to run the query on. #' @param pids (character) One or more PIDs to query against. #' #' @return (logical) Whether or not the object is obsoleted by another object. +#' #' @export #' #' @examples @@ -623,11 +500,11 @@ is_obsolete <- function(node, pids) { } -#' Returns the subject of the set dataone_test_token +#' Return the subject of the set dataone_test_token #' #' @return (character) The token subject. #' -#' +#' @noRd get_token_subject <- function() { info <- dataone::getTokenInfo(dataone::AuthenticationManager()) @@ -645,27 +522,31 @@ get_token_subject <- function() { } -#' Get the identifier from a DataONE response. +#' Get the identifier from a DataONE response #' -#' Example resposne: +#' Example response: #' #' <d1:identifier xmlns:d1="http://ns.dataone.org/service/types/v1"> #' urn:uuid:12aaf494-5840-434d-9cdb-c2597d58543e #' </d1:identifier> #' -#' @param dataone_response ("XMLInternalDocument" "XMLAbstractDocument") +#' @param dataone_response ("XMLInternalDocument"/"XMLAbstractDocument") #' #' @return (character) The PID. - +#' +#' @noRd get_identifier <- function(dataone_response) { stopifnot("XMLInternalDocument" %in% class(dataone_response)) - XML::xmlValue(XML::getNodeSet(dataone_response, "//d1:identifier/text()", namespaces = c("d1"="http://ns.dataone.org/service/types/v1"))[[1]]) + XML::xmlValue(XML::getNodeSet(dataone_response, "//d1:identifier/text()", namespaces = c("d1" = "http://ns.dataone.org/service/types/v1"))[[1]]) } -#' Helper function to generate a new UUID PID. +#' Generate a new UUID PID +#' +#' Generate a new UUID PID. #' #' @return (character) A new UUID PID. +#' #' @export #' #' @examples @@ -675,12 +556,14 @@ new_uuid <- function() { } -#' Get the current package version. +#' Get the current package version #' #' This function parses the installed DESCRIPTION file to get the latest -#' version. +#' version of the package. #' #' @return (character) The current package version. +#' +#' @noRd get_current_version <- function() { desc_file <- file.path(system.file("DESCRIPTION", package = "arcticdatautils")) desc_lines <- readLines(desc_file) @@ -688,9 +571,11 @@ get_current_version <- function() { } -#' Use the GitHub API to find the latest release for the package. +#' Use the GitHub API to find the latest release for the package #' #' @return (character) The latest release. +#' +#' @noRd get_latest_release <- function() { req <- httr::GET("https://api.github.com/repos/NCEAS/arcticdatautils/releases") content <- httr::content(req) @@ -702,10 +587,12 @@ get_latest_release <- function() { } +#' Warn if package version is outdated +#' #' Warns if the currently-installed version of the package is not the same #' version as the latest release on GitHub. #' - +#' @noRd warn_current_version <- function() { current <- get_current_version() latest <- get_latest_release() @@ -716,17 +603,19 @@ warn_current_version <- function() { } +#' Get the PIDs of all versions of an object +#' #' Get the PIDs of all versions of an object. #' -#' @param node (MNode|CNode) The node to query. +#' @param node (MNode) The Member Node to query. #' @param pid (character) Any object in the chain. #' #' @return (character) A vector of PIDs in the chain, in order. +#' #' @export #' #' @examples #'\dontrun{ -#' #Set environment #' cn <- CNode("STAGING2") #' mn <- getMNode(cn,"urn:node:mnTestKNB") #' pid <- "urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" @@ -776,18 +665,19 @@ get_all_versions <- function(node, pid) { } -#' Get a structured list of PIDs for the objects in a package. +#' Get a structured list of PIDs for the objects in a package #' -#' This is a wrapper function around `get_package_direct` which takes either -#' a resource map PID or a metadata PID as its `pid` argument. +#' Get a structured list of PIDs for the objects in a package, +#' including the resource map, metadata, and data objects. #' -#' @param node (MNode|CNode) The Coordinating/Member Node to run the query on. +#' @param node (MNode/CNode) The Coordinating/Member Node to run the query on. #' @param pid (character) The the resource map PID of the package. #' @param file_names (logical) Whether to return file names for all objects. #' @param rows (numeric) The number of rows to return in the query. This is only -#' useful to set if you are warned about the result set being truncated. Defaults to 5000. +#' useful to set if you are warned about the result set being truncated. Defaults to 5000. #' #' @return (list) A structured list of the members of the package. +#' #' @export #' #' @examples @@ -834,16 +724,18 @@ get_package <- function(node, pid, file_names=FALSE, rows=5000) { } -#' Get a structured list of PIDs for the objects in a package. +#' Get a structured list of PIDs for the objects in a package #' -#' @param node (MNode|CNode) The Coordinating/Member Node to run the query on. +#' This function is used within [get_package()]. +#' +#' @param node (MNode/CNode) The Coordinating/Member Node to run the query on. #' @param pid (character) The the metadata PID of the package. #' @param file_names (logical) Whether to return file names for all objects. #' @param rows (numeric) The number of rows to return in the query. This is only -#' useful to set if you are warned about the result set being truncated. Defaults to 5000. +#' useful to set if you are warned about the result set being truncated. Defaults to 5000. #' - -get_package_direct <- function(node, pid, file_names=FALSE, rows = 5000) { +#' @noRd +get_package_direct <- function(node, pid, file_names = FALSE, rows = 5000) { stopifnot(is(node, "MNode") || is(node, "CNode")) stopifnot(is.character(pid), nchar(pid) > 0) @@ -909,16 +801,17 @@ get_package_direct <- function(node, pid, file_names=FALSE, rows = 5000) { response } -#' Get the resource map(s) for the given object. + +#' Get the resource map(s) for the given object #' -#' @param node (MNode|CNode) The Node to query. +#' @param node (MNode/CNode) The Member Node to query. #' @param pid (character) The object to get the resource map(s) for. -#' @param rows (numeric) Optional. The number of query results to return. This -#' shouldn't need to be modified and the default, 1000, is very likely to be -#' more than enough. +#' @param rows (numeric) Optional. The number of query results to return. +#' The default, 1000, is very likely to be more than enough. #' #' @return (character) The resource map(s) that contain `pid`. - +#' +#' @noRd find_newest_resource_map <- function(node, pid, rows = 1000) { stopifnot(class(node) %in% c("MNode", "CNode")) stopifnot(is.character(pid), @@ -962,14 +855,17 @@ find_newest_resource_map <- function(node, pid, rows = 1000) { find_newest_object(node, all_resource_map_pids) } -#' Find the newest (by dateUploaded) object within a given set of objects. + +#' Find the newest object within the given set of objects +#' +#' Find the newest object, based on dateUploaded, within the given set of objects. #' -#' @param node (MNode | CNode) The node to query -#' @param identifiers (character) One or more identifiers +#' @param node (MNode/CNode) The Member Node to query. +#' @param identifiers (character) One or more identifiers. #' @param rows (numeric) Optional. Specify the size of the query result set. #' #' @return (character) The PID of the newest object. In the case of a tie (very -#' unlikely) the first element, in natural order, is returned. +#' unlikely) the first element, in natural order, is returned. #' #' @export #' @@ -1004,16 +900,17 @@ find_newest_object <- function(node, identifiers, rows=1000) { } -#' Filters PIDs that are obsolete. +#' Filters PIDs that are obsolete #' #' Whether or not a PID is obsolete is determined by whether its "obsoletedBy" -#' property is set to another PID (TRUE) or is NA (FALSE). +#' property is set to another PID (`TRUE`) or is `NA` (`FALSE`). #' -#' @param node (MNode|CNode) The Node to query. +#' @param node (MNode|CNode) The Member Node to query. #' @param pids (character) PIDs to check the obsoletion state of. #' #' @return (character) PIDs that are not obsoleted by another PID. - +#' +#' @noRd filter_obsolete_pids <- function(node, pids) { pids[is.na(sapply(pids, function(pid) { dataone::getSystemMetadata(node, pid)@obsoletedBy }, USE.NAMES = FALSE))] } @@ -1021,11 +918,12 @@ filter_obsolete_pids <- function(node, pids) { #' Get an approximate list of the datasets in a user's profile #' -#' This function is intended to be (poorly) simulate what a user sees when they +#' This function is intended to (poorly) simulate what a user sees when they #' browse to their "My Data Sets" page (their #profile URL). It uses a similar -#' Solr to what Metacat UI uses to generate the list. The results of this -#' function may be the same as what's on the #profile page but may be missing -#' some of the user's datasets when: +#' Solr query to what Metacat UI uses to generate the list. +#' +#' The results of this function may be the same as what's on the #profile page +#' but may be missing some of the user's datasets when: #' #' - The user has any datasets in their #profile that the person running the #' query (you) can't \code{read}. This is rare on arcticdata.io but possible @@ -1036,10 +934,11 @@ filter_obsolete_pids <- function(node, pids) { #' #' @param mn (MNode) The Member Node to query. #' @param subject (character) The subject to find the datasets for. This is -#' likely going to be your ORCID, e.g. http://orcid.org.... +#' likely going to be an ORCID, e.g. http://orcid.org.... #' @param fields (character) A vector of Solr fields to return. #' -#' @return (data.frame) data.frame with the results. +#' @return (data.frame) A data.frame with the results. +#' #' @export #' #' @examples @@ -1096,12 +995,16 @@ view_profile <- function(mn, subject, fields=c("identifier", "title")) { results } + #' Show the indexing status of a set of PIDs #' -#' @param mn (MNode) The Member Node to query -#' @param pids (character|list) One or more PIDs (or list of PIDs) +#' Show the indexing status of a set of PIDs. +#' +#' @param mn (MNode) The Member Node to query. +#' @param pids (character/list) One or more PIDs. +#' +#' @return `NULL` #' -#' @return Nothing #' @export #' #' @examples @@ -1154,4 +1057,3 @@ show_indexing_status <- function(mn, pids) { close(pb) } - diff --git a/README.md b/README.md index cb5b3af..9ea0963 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# arcticadatautils +# arcticdatautils [![Travis build status](https://travis-ci.org/NCEAS/arcticdatautils.svg?branch=master)](https://travis-ci.org/NCEAS/arcticdatautils) -The `articadatautils` package contains code for doing lots of useful stuff that's too specific for the [dataone](https://github.com/DataONEorg/rdataone) package: +The `arcticadatautils` package contains code for doing lots of useful stuff that's too specific for the [dataone](https://github.com/DataONEorg/rdataone) package: - Inserting large numbers of files into a Metacat Member Node - High-level [dataone](https://github.com/DataONEorg/rdataone) wrappers for working with Objects and Data Packages that streamline Arctic Data Center operations @@ -11,7 +11,7 @@ Note: The package is intended to be used by NCEAS staff and may not make much se ## Installing -I recommend installing from the latest [release](https://github.com/NCEAS/arcticdatautils/releases) (aka tag) instead of from `master`. Install the latest release with the [`remotes`](https://github.com/r-lib/remotes) package: +We recommend installing from the latest [release](https://github.com/NCEAS/arcticdatautils/releases) (aka tag) instead of from `master`. Install the latest release with the [`remotes`](https://github.com/r-lib/remotes) package: ```r remotes::install_github("nceas/arcticdatautils@*release") diff --git a/man/add_access_rules.Rd b/man/add_access_rules.Rd deleted file mode 100644 index 1823d62..0000000 --- a/man/add_access_rules.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sysmeta.R -\name{add_access_rules} -\alias{add_access_rules} -\title{sysmeta.R} -\usage{ -add_access_rules(sysmeta) -} -\arguments{ -\item{sysmeta}{(SystemMetadata) The SystemMetadata to add rules to.} -} -\value{ -The modified SystemMetadata object -} -\description{ -Utility functions for modifying System Metadata objects. -Add access rules to the sysmeta object -} -\details{ -This is a function because I add a set of standard set of access rules to -every object and the access rules don't differ across objects. -} diff --git a/man/add_additional_identifiers.Rd b/man/add_additional_identifiers.Rd deleted file mode 100644 index 56d2b65..0000000 --- a/man/add_additional_identifiers.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{add_additional_identifiers} -\alias{add_additional_identifiers} -\title{Add a set of additional identifiers to an EML document.} -\usage{ -add_additional_identifiers(path, identifiers) -} -\arguments{ -\item{path}{(character) Path to the EML document.} - -\item{identifiers}{(character) Set of identifiers to add.} -} -\value{ -(character) Path to the modified document. -} -\description{ -Add a set of additional identifiers to an EML document. -} diff --git a/man/add_admin_group_access.Rd b/man/add_admin_group_access.Rd deleted file mode 100644 index f2f3faa..0000000 --- a/man/add_admin_group_access.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sysmeta.R -\name{add_admin_group_access} -\alias{add_admin_group_access} -\title{Adds access to the given System Metadata for the arctic-data-admins group} -\usage{ -add_admin_group_access(sysmeta) -} -\arguments{ -\item{sysmeta}{(sysmeta) System Metadata object} -} -\description{ -Adds access to the given System Metadata for the arctic-data-admins group -} diff --git a/man/add_methods_step.Rd b/man/add_methods_step.Rd index 4ac43fb..3db34c1 100644 --- a/man/add_methods_step.Rd +++ b/man/add_methods_step.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/eml.R \name{add_methods_step} \alias{add_methods_step} -\title{Adds a step to the methods document} +\title{Add a methods step} \usage{ add_methods_step(doc, title, description) } @@ -14,10 +14,10 @@ add_methods_step(doc, title, description) \item{description}{(character) The description of the method.} } \value{ -(eml) The modified EML document +(eml) The modified EML document. } \description{ -Adds a step to the methods document +Add a methods step to an EML document. } \examples{ \dontrun{ diff --git a/man/add_string_to_title.Rd b/man/add_string_to_title.Rd deleted file mode 100644 index 5279e23..0000000 --- a/man/add_string_to_title.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{add_string_to_title} -\alias{add_string_to_title} -\title{Adds a string to the title element in the given file.} -\usage{ -add_string_to_title(path, string) -} -\arguments{ -\item{path}{(character) Path to the XML file to edit.} - -\item{string}{(character) The new value.} -} -\description{ -Adds a string to the title element in the given file. -} diff --git a/man/arcticdatautils.Rd b/man/arcticdatautils.Rd index 0d3eb16..6a356b8 100644 --- a/man/arcticdatautils.Rd +++ b/man/arcticdatautils.Rd @@ -6,11 +6,6 @@ \alias{arcticdatautils-package} \title{arcticdatautils: Utilities for the Arctic Data Center} \description{ -The foo package provides three categories of important functions: -foo, bar and baz. +This package contains code for doing lots of useful stuff that's too specific for the +dataone package, primarily functions that streamline Arctic Data Center operations. } -\section{Foo functions}{ - -The foo functions ... -} - diff --git a/man/change_eml_name.Rd b/man/change_eml_name.Rd deleted file mode 100644 index 4458d8e..0000000 --- a/man/change_eml_name.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{change_eml_name} -\alias{change_eml_name} -\title{Utility function to extract a name string from an XML individualName node, -parse it into tokens,and reformat the individualName with new children nodes} -\usage{ -change_eml_name(party) -} -\arguments{ -\item{party}{the XML node containing a subclass of eml:ResponsibleParty} -} -\value{ -the modified XML node -} -\description{ -Utility function to extract a name string from an XML individualName node, -parse it into tokens,and reformat the individualName with new children nodes -} diff --git a/man/check_format.Rd b/man/check_format.Rd deleted file mode 100644 index 172197c..0000000 --- a/man/check_format.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/formats.R -\name{check_format} -\alias{check_format} -\title{Check that the given format is valid} -\usage{ -check_format(format) -} -\arguments{ -\item{format}{(character) The format ID to check.} -} -\value{ -(logical) Whether or not the format was valid. -} -\description{ -Validity is determined by the given format being found in the list on -\url{https://cn.dataone.org/cn/v2/formats}. -} diff --git a/man/clear_methods.Rd b/man/clear_methods.Rd index 370b3a7..2dc4b08 100644 --- a/man/clear_methods.Rd +++ b/man/clear_methods.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/eml.R \name{clear_methods} \alias{clear_methods} -\title{Clear all methods from the document.} +\title{Clear all methods} \usage{ clear_methods(doc) } @@ -10,10 +10,10 @@ clear_methods(doc) \item{doc}{(eml) The document to clear methods from.} } \value{ -(eml) The modified document. +(eml) The modified EML document. } \description{ -Clear all methods from the document. +Clear all methods from an EML document. } \examples{ \dontrun{ diff --git a/man/clear_replication_policy.Rd b/man/clear_replication_policy.Rd deleted file mode 100644 index 9771474..0000000 --- a/man/clear_replication_policy.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sysmeta.R -\name{clear_replication_policy} -\alias{clear_replication_policy} -\title{Clear the replication policy from a System Metadata object} -\usage{ -clear_replication_policy(sysmeta) -} -\arguments{ -\item{sysmeta}{(SystemMetadata) The System Metadata object to clear the replication policy of.} -} -\value{ -(SystemMetadata) The modified System Metadata object. -} -\description{ -Clear the replication policy from a System Metadata object -} diff --git a/man/convert_iso_to_eml.Rd b/man/convert_iso_to_eml.Rd index 0efd0d7..a85eb5e 100644 --- a/man/convert_iso_to_eml.Rd +++ b/man/convert_iso_to_eml.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/util.R \name{convert_iso_to_eml} \alias{convert_iso_to_eml} -\title{Convert and ISO document to EML using an XSLT.} +\title{Convert an ISO document to EML using an XSLT} \usage{ convert_iso_to_eml(path, style = NA) } diff --git a/man/create_dummy_attributes_dataframe.Rd b/man/create_dummy_attributes_dataframe.Rd index 329836c..466d477 100644 --- a/man/create_dummy_attributes_dataframe.Rd +++ b/man/create_dummy_attributes_dataframe.Rd @@ -2,20 +2,20 @@ % Please edit documentation in R/helpers.R \name{create_dummy_attributes_dataframe} \alias{create_dummy_attributes_dataframe} -\title{Create dummy attributes data frame} +\title{Create test attributes data.frame} \usage{ create_dummy_attributes_dataframe(numberAttributes, factors = NULL) } \arguments{ -\item{numberAttributes}{(integer) Number of attributes to be created in the table} +\item{numberAttributes}{(integer) Number of attributes to be created in the table.} \item{factors}{(character) Optional vector of factor names to include.} } \value{ -(data.frame) Data frame of attributes +(data.frame) A data.frame of attributes. } \description{ -Create dummy attributes data frame +Create a test data.frame of attributes. } \examples{ \dontrun{ diff --git a/man/create_dummy_enumeratedDomain_dataframe.Rd b/man/create_dummy_enumeratedDomain_dataframe.Rd index 9cc8411..c58ccb3 100644 --- a/man/create_dummy_enumeratedDomain_dataframe.Rd +++ b/man/create_dummy_enumeratedDomain_dataframe.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/helpers.R \name{create_dummy_enumeratedDomain_dataframe} \alias{create_dummy_enumeratedDomain_dataframe} -\title{Create dummy enumeratedDomain data frame} +\title{Create test enumeratedDomain data.frame} \usage{ create_dummy_enumeratedDomain_dataframe(factors) } @@ -10,10 +10,10 @@ create_dummy_enumeratedDomain_dataframe(factors) \item{factors}{(character) Vector of factor names to include.} } \value{ -(data.frame) Data frame of factors +(data.frame) A data.frame of factors. } \description{ -Create dummy enumeratedDomain data frame +Create a test data.frame of enumeratedDomains. } \examples{ \dontrun{ diff --git a/man/create_dummy_metadata.Rd b/man/create_dummy_metadata.Rd index 2712ffd..9c016ee 100644 --- a/man/create_dummy_metadata.Rd +++ b/man/create_dummy_metadata.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/helpers.R \name{create_dummy_metadata} \alias{create_dummy_metadata} -\title{helpers.R} +\title{Create a test metadata object} \usage{ create_dummy_metadata(mn, data_pids = NULL) } @@ -12,11 +12,10 @@ create_dummy_metadata(mn, data_pids = NULL) \item{data_pids}{(character) Optional. PIDs for data objects the metadata documents.} } \value{ -pid (character) PID of published metadata document. +(character) PID of published metadata document. } \description{ -Various helper functions for things like testing the package. -Create a test metadata object. +Create a test EML metadata object. } \examples{ \dontrun{ diff --git a/man/create_dummy_object.Rd b/man/create_dummy_object.Rd index 8e6fea3..8083b7e 100644 --- a/man/create_dummy_object.Rd +++ b/man/create_dummy_object.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/helpers.R \name{create_dummy_object} \alias{create_dummy_object} -\title{Create a test object.} +\title{Create a test object} \usage{ create_dummy_object(mn) } @@ -10,10 +10,10 @@ create_dummy_object(mn) \item{mn}{(MNode) The Member Node.} } \value{ -pid (character) The pid of the dummy object. +(character) The PID of the dummy object. } \description{ -Create a test object. +Create a test data object. } \examples{ \dontrun{ diff --git a/man/create_dummy_package.Rd b/man/create_dummy_package.Rd index 97047e0..5ede34a 100644 --- a/man/create_dummy_package.Rd +++ b/man/create_dummy_package.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/helpers.R \name{create_dummy_package} \alias{create_dummy_package} -\title{Create a test package.} +\title{Create a test package} \usage{ create_dummy_package(mn, size = 2) } @@ -12,10 +12,10 @@ create_dummy_package(mn, size = 2) \item{size}{(numeric) The number of files in the package, including the metadata file.} } \value{ -pids (character) A named character vector of the data pids in the package. +(character) A named character vector of the data PIDs in the package. } \description{ -Create a test package. +Create a test data package. } \examples{ \dontrun{ diff --git a/man/create_dummy_package_full.Rd b/man/create_dummy_package_full.Rd index 9e87666..6c3d9b2 100644 --- a/man/create_dummy_package_full.Rd +++ b/man/create_dummy_package_full.Rd @@ -11,8 +11,11 @@ create_dummy_package_full(mn, title = "A Dummy Package") \item{title}{(character) Optional. Title of package. Defaults to "A Dummy Package".} } +\value{ +(list) A list of package PIDs, inluding for the resource map, metadata, and data objects. +} \description{ -Creates a fuller package than \code{\link{create_dummy_package}} +Creates a fuller package than \code{\link[=create_dummy_package]{create_dummy_package()}} but is otherwise based on the same concept. This dummy package includes multiple data objects, responsible parties, geographic locations, method steps, etc. diff --git a/man/create_dummy_parent_package.Rd b/man/create_dummy_parent_package.Rd index a2f1058..5cfa611 100644 --- a/man/create_dummy_parent_package.Rd +++ b/man/create_dummy_parent_package.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/helpers.R \name{create_dummy_parent_package} \alias{create_dummy_parent_package} -\title{Create a test parent package.} +\title{Create a test parent package} \usage{ create_dummy_parent_package(mn, children) } @@ -12,10 +12,10 @@ create_dummy_parent_package(mn, children) \item{children}{(character) Child package (resource maps) PIDs.} } \value{ -pid (character) Named character vector of PIDs including parent package and child package pids. +pid (character) A named character vector of PIDs, including parent package and child package PIDs. } \description{ -Create a test parent package. +Create a test parent data package. } \examples{ \dontrun{ diff --git a/man/create_from_folder.Rd b/man/create_from_folder.Rd deleted file mode 100644 index 4b7e933..0000000 --- a/man/create_from_folder.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inserting.R -\name{create_from_folder} -\alias{create_from_folder} -\title{inserting.R} -\usage{ -create_from_folder(mn, path, data_pids = NULL) -} -\arguments{ -\item{mn}{(MNode) The Member Node to create the packages on.} - -\item{path}{(character) The path to the folder containing the files.} - -\item{data_pids}{(character) Optional. Manually specify the PIDs of data. This is useful if data were inserted outside this function and you want to re-use those objects.} -} -\value{ -(list) All of the PIDs created. -} -\description{ -A set of utilities for inserting packages from files and folders on disk. -Create a package from a folder containing an ISO package (legacy) -} -\details{ -This function handles the process of inserting the original ISO package -and updating it with an EML package. - -Note: This only works for Gateway packages right now. -} diff --git a/man/create_object.Rd b/man/create_object.Rd deleted file mode 100644 index 21e629f..0000000 --- a/man/create_object.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{create_object} -\alias{create_object} -\title{Create an object from a row of the inventory.} -\usage{ -create_object(file, sysmeta, base_path, env) -} -\arguments{ -\item{file}{(data.frame)A row from the inventory.} - -\item{sysmeta}{(SystemMetadata) The file's sysmeta.} - -\item{base_path}{(character) Base path, to be appended to the \code{file} -column to find the file to upload.} - -\item{env}{(list) An environment.} -} -\description{ -Create an object from a row of the inventory. -} diff --git a/man/create_resource_map.Rd b/man/create_resource_map.Rd index 8ac7367..ae8a8ac 100644 --- a/man/create_resource_map.Rd +++ b/man/create_resource_map.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/editing.R \name{create_resource_map} \alias{create_resource_map} -\title{Create a resource map Object on a Member Node.} +\title{Create a resource map object on a Member Node} \usage{ create_resource_map(mn, metadata_pid, data_pids = NULL, child_pids = NULL, check_first = TRUE, ...) @@ -10,32 +10,29 @@ create_resource_map(mn, metadata_pid, data_pids = NULL, \arguments{ \item{mn}{(MNode) The Member Node} -\item{metadata_pid}{(character) The PID of the metadata object to go in the -package.} +\item{metadata_pid}{(character) The PID of the metadata object to go in the package.} -\item{data_pids}{(character) The PID(s) of the data objects to go in the -package.} +\item{data_pids}{(character) The PID(s) of the data objects to go in the package.} \item{child_pids}{(character) The resource map PIDs of the packages to be nested under the package.} \item{check_first}{(logical) Optional. Whether to check the PIDs passed in as -aruments exist on the MN before continuing. This speeds up the function, -especially when `data_pids` has many elements.} +arguments exist on the MN before continuing. This speeds up the function, +especially when \code{data_pids} has many elements.} -\item{...}{Additional arguments that can be passed into \code{\link{publish_object}}} +\item{...}{Additional arguments that can be passed into \code{\link[=publish_object]{publish_object()}}.} } \value{ -(character) The created resource map's PID +(character) The PID of the created resource map. } \description{ This function first generates a new resource map RDF/XML document locally and -then uses the dataone::createObject function to create the Object on the +then uses the \code{\link[dataone:createObject]{dataone::createObject()}} function to create the object on the specified MN. } \details{ -If you only want to generate resource map RDF/XML, see -\code{\link{generate_resource_map}} +If you only want to generate resource map RDF/XML, see \code{\link[=generate_resource_map]{generate_resource_map()}}. } \examples{ \dontrun{ @@ -46,7 +43,6 @@ meta_pid <- 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe' dat_pid <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') - create_resource_map(mn, metadata_pid = meta_pid, data_pids = dat_pid) } } diff --git a/man/create_sysmeta.Rd b/man/create_sysmeta.Rd deleted file mode 100644 index 5e00721..0000000 --- a/man/create_sysmeta.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{create_sysmeta} -\alias{create_sysmeta} -\title{Create a sysmeta object.} -\usage{ -create_sysmeta(file, base_path, submitter, rights_holder) -} -\arguments{ -\item{file}{(data.frame) A single row from the inventory.} - -\item{base_path}{(character) The path prefix to use with the contents of `file[1,"filename]` that -will be used to locate the file on disk.} - -\item{submitter}{(character) The submitter DN string for the object.} - -\item{rights_holder}{(character) The rights holder DN string for the object.} -} -\value{ -The sysmeta object (dataone::SystemMetadata) -} -\description{ -This is a wrapper function around the constructor for a -dataone::SystemMetadata object. -} diff --git a/man/determine_child_pids.Rd b/man/determine_child_pids.Rd deleted file mode 100644 index 9cb8fee..0000000 --- a/man/determine_child_pids.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{determine_child_pids} -\alias{determine_child_pids} -\title{Calculate a set of child PIDs for a given package.} -\usage{ -determine_child_pids(inventory, package) -} -\arguments{ -\item{inventory}{(data.frame) An Inventory.} - -\item{package}{(character) The package identifier.} -} -\description{ -Calculate a set of child PIDs for a given package. -} diff --git a/man/eml_abstract.Rd b/man/eml_abstract.Rd index c2a7190..e77b71e 100644 --- a/man/eml_abstract.Rd +++ b/man/eml_abstract.Rd @@ -2,19 +2,19 @@ % Please edit documentation in R/eml.R \name{eml_abstract} \alias{eml_abstract} -\title{Minimalistic helper function to generate EML abstracts} +\title{Create an EML abstract} \usage{ eml_abstract(text) } \arguments{ -\item{text}{(character) Paragraphs of text, one paragraph per element in the -character vector} +\item{text}{(character) Paragraphs of text with one paragraph per element in the +character vector.} } \value{ -(abstract) An EML abstract +(abstract) An EML abstract. } \description{ -Minimalistic helper function to generate EML abstracts +Create an EML abstract. } \examples{ # Set an abstract with a single paragraph diff --git a/man/eml_add_entities.Rd b/man/eml_add_entities.Rd index 1ef5a0c..227c182 100644 --- a/man/eml_add_entities.Rd +++ b/man/eml_add_entities.Rd @@ -2,26 +2,26 @@ % Please edit documentation in R/eml.R \name{eml_add_entities} \alias{eml_add_entities} -\title{Add new entity (otherEntity, dataTable, etc) elements to an EML document from a table.} +\title{Add new entity elements to an EML document from a table} \usage{ eml_add_entities(doc, entities, resolve_base = "https://cn.dataone.org/cn/v2/resolve/") } \arguments{ -\item{doc}{(eml) An EML document} +\item{doc}{(eml) An EML document.} \item{entities}{(data.frame) A data.frame with columns type, path, pid, and -format_id} +format_id.} \item{resolve_base}{(character) Optional. Specify a DataONE CN resolve base URI which will be used for serializing download URLs into the EML. Most users - should not override the default value.} +should not override the default value.} } \value{ (eml) The modified EML document. } \description{ -Add new entity (otherEntity, dataTable, etc) elements to an EML document from a table. +Add new entity elements to an EML document from a table. } \examples{ # Create entities from files on disk diff --git a/man/eml_address.Rd b/man/eml_address.Rd index 9cceeed..b10652a 100644 --- a/man/eml_address.Rd +++ b/man/eml_address.Rd @@ -2,24 +2,24 @@ % Please edit documentation in R/eml.R \name{eml_address} \alias{eml_address} -\title{Create an EML address element.} +\title{Create an EML address element} \usage{ eml_address(delivery_points, city, administrative_area, postal_code) } \arguments{ \item{delivery_points}{(character) One or more delivery points.} -\item{city}{(character) City} +\item{city}{(character) City.} -\item{administrative_area}{(character) Administrative area} +\item{administrative_area}{(character) Administrative area.} -\item{postal_code}{(character) Postal code} +\item{postal_code}{(character) Postal code.} } \value{ (address) An EML address object. } \description{ -Create an EML address element. +A simple way to create an EML address element. } \examples{ NCEASadd <- eml_address("735 State St #300", "Santa Barbara", "CA", "93101") diff --git a/man/eml_associated_party.Rd b/man/eml_associated_party.Rd index 8466cd4..99b15b4 100644 --- a/man/eml_associated_party.Rd +++ b/man/eml_associated_party.Rd @@ -7,13 +7,13 @@ eml_associated_party(...) } \arguments{ -\item{...}{Arguments passed on to eml_party} +\item{...}{Arguments passed on to \code{\link[=eml_party]{eml_party()}}.} } \value{ -(associatedParty) The new associatedParty +(associatedParty) The new associatedParty. } \description{ -See \code{\link{eml_party}} for details. +See \code{\link[=eml_party]{eml_party()}} for details. } \examples{ eml_associated_party("test", "user", email = "test@user.com", role = "Principal Investigator") diff --git a/man/eml_contact.Rd b/man/eml_contact.Rd index 2a88d60..7796621 100644 --- a/man/eml_contact.Rd +++ b/man/eml_contact.Rd @@ -7,13 +7,13 @@ eml_contact(...) } \arguments{ -\item{...}{Arguments passed on to eml_party} +\item{...}{Arguments passed on to \code{\link[=eml_party]{eml_party()}}.} } \value{ -(contact) The new contact +(contact) The new contact. } \description{ -See \code{\link{eml_party}} for details. +See \code{\link[=eml_party]{eml_party()}} for details. } \examples{ \dontrun{ diff --git a/man/eml_creator.Rd b/man/eml_creator.Rd index 5280013..491d7df 100644 --- a/man/eml_creator.Rd +++ b/man/eml_creator.Rd @@ -7,13 +7,13 @@ eml_creator(...) } \arguments{ -\item{...}{Arguments passed on to eml_party} +\item{...}{Arguments passed on to \code{\link[=eml_party]{eml_party()}}.} } \value{ -(creator) The new creator +(creator) The new creator. } \description{ -See \code{\link{eml_party}} for details. +See \code{\link[=eml_party]{eml_party()}} for details. } \examples{ \dontrun{ diff --git a/man/eml_geographic_coverage.Rd b/man/eml_geographic_coverage.Rd new file mode 100644 index 0000000..b44aafa --- /dev/null +++ b/man/eml_geographic_coverage.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_geographic_coverage} +\alias{eml_geographic_coverage} +\title{Create an EML geographicCoverage section} +\usage{ +eml_geographic_coverage(description, north, east, south, west) +} +\arguments{ +\item{description}{(character) A textual description.} + +\item{north}{(numeric) North bounding coordinate.} + +\item{east}{(numeric) East bounding coordinate.} + +\item{south}{(numeric) South bounding coordinate.} + +\item{west}{(numeric) West bounding coordinate.} +} +\value{ +(geographicCoverage) The new geographicCoverage section. +} +\description{ +A simple way to create an EML geographicCoverage section. +} +\details{ +For a bounding box, all coordinates should be unique. +For a single point, the North and South bounding coordinates should be the same and +the East and West bounding coordinates should be the same. +} diff --git a/man/eml_individual_name.Rd b/man/eml_individual_name.Rd index c59f165..409bdac 100644 --- a/man/eml_individual_name.Rd +++ b/man/eml_individual_name.Rd @@ -12,10 +12,10 @@ eml_individual_name(given_names = NULL, sur_name) \item{sur_name}{(character) A sur (last) name.} } \value{ -(individualName) The new individualName section +(individualName) The new individualName section. } \description{ -Create an EML individualName section +Create an EML individualName section. } \examples{ eml_individual_name("some", "user") diff --git a/man/eml_metadata_provider.Rd b/man/eml_metadata_provider.Rd index e85a878..19ca392 100644 --- a/man/eml_metadata_provider.Rd +++ b/man/eml_metadata_provider.Rd @@ -7,13 +7,13 @@ eml_metadata_provider(...) } \arguments{ -\item{...}{Arguments passed on to eml_party} +\item{...}{Arguments passed on to \code{\link[=eml_party]{eml_party()}}.} } \value{ -(metadataProvider) The new metadataProvider +(metadataProvider) The new metadataProvider. } \description{ -See \code{\link{eml_party}} for details. +See \code{\link[=eml_party]{eml_party()}} for details. } \examples{ eml_metadata_provider("test", "user", email = "test@user.com") diff --git a/man/eml_otherEntity_to_dataTable.Rd b/man/eml_otherEntity_to_dataTable.Rd index 83a1164..875886a 100644 --- a/man/eml_otherEntity_to_dataTable.Rd +++ b/man/eml_otherEntity_to_dataTable.Rd @@ -7,18 +7,17 @@ eml_otherEntity_to_dataTable(eml, otherEntity, validate_eml = TRUE) } \arguments{ -\item{eml}{(S4) An EML S4 object} +\item{eml}{(S4) An EML S4 object.} \item{otherEntity}{(S4 / integer) Either an EML otherEntity object or the index -of an otherEntity within a ListOfotherEntity. Integer input is recommended.} +of an otherEntity within a ListOfotherEntity. Integer input is recommended.} -\item{validate_eml}{(logical) Optional. Specify whether or not to validate the eml after -completion. Defaults to \code{TRUE}. Recommended setting is \code{TRUE}. Setting this to -\code{FALSE} reduces execution time by ~ 50 percent.} +\item{validate_eml}{(logical) Optional. Whether or not to validate the EML after +completion. Setting this to \code{FALSE} reduces execution time by ~50 percent.} } \description{ -Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an -otherEntity objectas currently constructed - it does not add a physical or add attributes. +Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an +otherEntity object as currently constructed - it does not add a physical or add attributes. However, if these are already in their respective slots, they will be retained. } \examples{ diff --git a/man/eml_party.Rd b/man/eml_party.Rd index ba02a50..bc8cef3 100644 --- a/man/eml_party.Rd +++ b/man/eml_party.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/eml.R \name{eml_party} \alias{eml_party} -\title{Low-level helper for creating EML parties} +\title{Create an EML party} \usage{ eml_party(type = "associatedParty", given_names = NULL, sur_name = NULL, organization = NULL, position = NULL, @@ -10,33 +10,32 @@ eml_party(type = "associatedParty", given_names = NULL, role = NULL) } \arguments{ -\item{type}{(character) The type of party (e.g. 'contact')} +\item{type}{(character) The type of party (e.g. 'contact').} -\item{given_names}{(character) The party's given name(s)} +\item{given_names}{(character) The party's given name(s).} -\item{sur_name}{(character) The party's surname} +\item{sur_name}{(character) The party's surname.} -\item{organization}{(character) The party's organization name} +\item{organization}{(character) The party's organization name.} -\item{position}{(character) The party's position} +\item{position}{(character) The party's position.} -\item{email}{(character) The party's email address(es)} +\item{email}{(character) The party's email address(es).} -\item{phone}{(character) The party's phone number(s)} +\item{phone}{(character) The party's phone number(s).} -\item{address}{(character) The party's address(es)} +\item{address}{(character) The party's address(es).} -\item{userId}{(character) The party's ORCID, in format https://orcid.org/WWWW-XXXX-YYYY-ZZZZ} +\item{userId}{(character) The party's ORCID, in format https://orcid.org/WWWW-XXXX-YYYY-ZZZZ.} -\item{role}{(character) The party's role} +\item{role}{(character) The party's role.} } \value{ -An instance of the party specified by the in \code{type} argument +(party) An instance of the party specified by the \code{type} argument. } \description{ -You usually will want to use the high-level functions such as -\code{\link{eml_creator}} and \code{\link{eml_contact}} but using this is -fine. +You will usually want to use the high-level functions such as +\code{\link[=eml_creator]{eml_creator()}} and \code{\link[=eml_contact]{eml_contact()}} but using this is fine. } \details{ The \code{userId} argument assumes an ORCID so be sure to adjust for that. diff --git a/man/eml_personnel.Rd b/man/eml_personnel.Rd index 2fb6acd..18edae0 100644 --- a/man/eml_personnel.Rd +++ b/man/eml_personnel.Rd @@ -7,15 +7,15 @@ eml_personnel(role = NULL, ...) } \arguments{ -\item{role}{(character) Personnel role, eg "principalInvestigator"} +\item{role}{(character) Personnel role, e.g. "principalInvestigator".} -\item{...}{Arguments passed on to eml_party} +\item{...}{Arguments passed on to \code{\link[=eml_party]{eml_party()}}.} } \value{ -(personnel) The new personnel +(personnel) The new personnel. } \description{ -See \code{\link{eml_party}} for details. +See \code{\link[=eml_party]{eml_party()}} for details. } \examples{ eml_personnel("test", "user", email = "test@user.com", role = "principalInvestigator") diff --git a/man/eml_project.Rd b/man/eml_project.Rd index f0a7b9f..1a0831e 100644 --- a/man/eml_project.Rd +++ b/man/eml_project.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/eml.R \name{eml_project} \alias{eml_project} -\title{Create an eml-project section.} +\title{Create an EML project section} \usage{ eml_project(title, personnelList, abstract = NULL, funding = NULL, studyAreaDescription = NULL, designDescription = NULL, @@ -29,6 +29,9 @@ contract numbers. Can pass as a character vector for separate paragraphs.} (project) The new project section. } \description{ +Create an EML project section. +} +\details{ Note - studyAreaDescription, designDescription, and relatedProject are not fully fleshed out. Need to pass these objects in directly if you want to use them. diff --git a/man/eml_set_reference.Rd b/man/eml_set_reference.Rd index 35e0989..4082eaa 100644 --- a/man/eml_set_reference.Rd +++ b/man/eml_set_reference.Rd @@ -7,17 +7,16 @@ eml_set_reference(element_to_reference, element_to_replace) } \arguments{ -\item{element_to_reference}{(S4) An EML object to reference} +\item{element_to_reference}{(S4) An EML object to reference.} -\item{element_to_replace}{(S4) An EML object to replace with a reference} +\item{element_to_replace}{(S4) An EML object to replace with a reference.} } \description{ This function creates a new object with the same class as \code{element_to_replace} -using a reference to \code{element_to_reference} +using a reference to \code{element_to_reference}. } \examples{ \dontrun{ - cn <- dataone::CNode('PROD') adc <- dataone::getMNode(cn,'urn:node:ARCTIC') eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) diff --git a/man/eml_set_shared_attributes.Rd b/man/eml_set_shared_attributes.Rd index c3d6bf6..6b0265c 100644 --- a/man/eml_set_shared_attributes.Rd +++ b/man/eml_set_shared_attributes.Rd @@ -8,11 +8,16 @@ eml_set_shared_attributes(eml, attributeList = NULL, type = "dataTable") } \arguments{ -\item{eml}{(S4) An EML S4 object} +\item{eml}{(eml) An EML object.} -\item{attributeList}{(S4) Optional. An EML attributeList object. If not provided then it will default to the attributeList of the first \code{type} element} +\item{attributeList}{(attributeList) Optional. An EML attributeList object. If not provided +then it will default to the attributeList of the first \code{type} element.} -\item{type}{(character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' attributeList objects with references. Defaults to 'dataTable'} +\item{type}{(character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' attributeList +objects with references. Defaults to 'dataTable'.} +} +\value{ +(eml) The modified EML document. } \description{ This function sets shared attributes using the attributes of the first \code{type} @@ -20,14 +25,12 @@ selected and creates references for all remaining objects of equivalent \code{ty } \examples{ \dontrun{ - cn <- dataone::CNode('PROD') adc <- dataone::getMNode(cn,'urn:node:ARCTIC') eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) atts <- EML::set_attributes(EML::get_attributes(eml@dataset@dataTable[[1]]@attributeList)$attributes) eml <- eml_set_shared_attributes(eml, atts, type = 'dataTable') - } } \author{ diff --git a/man/eml_validate_attributes.Rd b/man/eml_validate_attributes.Rd index 89c8f9f..8788864 100644 --- a/man/eml_validate_attributes.Rd +++ b/man/eml_validate_attributes.Rd @@ -7,21 +7,17 @@ eml_validate_attributes(attributes) } \arguments{ -\item{attributes}{(attributeList) An attributeList} +\item{attributes}{(attributeList) An attributeList.} } \value{ -(boolean) Named vector of TRUE/FALSE indicating which attributes -are valid +(logical) Named vector indicating which attributes are valid. } \description{ The attributes passed into this function are validated one-by-one and the progress of going through each attribute is printed to the screen along -with any and all validation issues. -} -\details{ -This is done by, for each attribute in the list, creating a minimum valid -EML document and adding a new otherEntity with a new attributeList containing -the single attribute to be validated. +with any and all validation issues. This is done by, for each attribute in the list, +creating a minimum valid EML document and adding a new otherEntity with a new +attributeList containing the single attribute to be validated. } \examples{ \dontrun{ diff --git a/man/env_get.Rd b/man/env_get.Rd index a94169e..6cb434a 100644 --- a/man/env_get.Rd +++ b/man/env_get.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/environment.R \name{env_get} \alias{env_get} -\title{environment.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>} +\title{Get the current environment name} \usage{ env_get() } @@ -11,7 +10,5 @@ env_get() (character) The environment name. } \description{ -Functions related to loading configuriation based upon the environment -the code is being run under. Get the current environment name. } diff --git a/man/env_load.Rd b/man/env_load.Rd deleted file mode 100644 index c8e9d4a..0000000 --- a/man/env_load.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/environment.R -\name{env_load} -\alias{env_load} -\title{Load environmental variables from a YAML-formatted environment file.} -\usage{ -env_load(name = NULL, path = NULL, skip_mn = FALSE) -} -\arguments{ -\item{name}{(character) Optional. The environment name.} - -\item{path}{(character) Optional. Path to an environment file.} - -\item{skip_mn}{(logical) Optional. Skip contacting the MNode and filling in the $mn element of the environment.} -} -\value{ -(list) A list of name-value pairs. -} -\description{ -This file should be formatted in the following way: -} -\details{ -some_environment: - var_one: some value - var_two: some value - var_three: some value -} diff --git a/man/extract_local_identifier.Rd b/man/extract_local_identifier.Rd deleted file mode 100644 index f74ff62..0000000 --- a/man/extract_local_identifier.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{extract_local_identifier} -\alias{extract_local_identifier} -\title{Extract the local identifier for an ACADIS ISO metadata XML file.} -\usage{ -extract_local_identifier(type, file) -} -\arguments{ -\item{type}{(character) A string, one of "gateway" or "field-projects".} - -\item{file}{(character) A string, a connection, or raw vector -(same as \code{\link[xml2]{read_xml}}).} -} -\value{ -The identifier string. (character) -} -\description{ -Extract the local identifier for an ACADIS ISO metadata XML file. -} diff --git a/man/filter_obsolete_pids.Rd b/man/filter_obsolete_pids.Rd deleted file mode 100644 index 8d22bc8..0000000 --- a/man/filter_obsolete_pids.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{filter_obsolete_pids} -\alias{filter_obsolete_pids} -\title{Filters PIDs that are obsolete.} -\usage{ -filter_obsolete_pids(node, pids) -} -\arguments{ -\item{node}{(MNode|CNode) The Node to query.} - -\item{pids}{(character) PIDs to check the obsoletion state of.} -} -\value{ -(character) PIDs that are not obsoleted by another PID. -} -\description{ -Whether or not a PID is obsolete is determined by whether its "obsoletedBy" -property is set to another PID (TRUE) or is NA (FALSE). -} diff --git a/man/filter_packaging_statements.Rd b/man/filter_packaging_statements.Rd deleted file mode 100644 index 77e94e0..0000000 --- a/man/filter_packaging_statements.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{filter_packaging_statements} -\alias{filter_packaging_statements} -\title{Filter statements related to packaging} -\usage{ -filter_packaging_statements(statements) -} -\arguments{ -\item{statements}{(data.frame) A set of Statements to be filtered} -} -\value{ -(data.frame) The filtered Statements -} -\description{ -This is intended to be called after `datapack::getTriples` has been called -on a ResourceMap. -} -\details{ -This function was written specifically for the case of updating a resource -map while preserving any extra statements that have been added such as PROV -statements. Statements are filtered according to these rules: -} diff --git a/man/find_newest_object.Rd b/man/find_newest_object.Rd index ade4b1f..2d02ceb 100644 --- a/man/find_newest_object.Rd +++ b/man/find_newest_object.Rd @@ -2,14 +2,14 @@ % Please edit documentation in R/util.R \name{find_newest_object} \alias{find_newest_object} -\title{Find the newest (by dateUploaded) object within a given set of objects.} +\title{Find the newest object within the given set of objects} \usage{ find_newest_object(node, identifiers, rows = 1000) } \arguments{ -\item{node}{(MNode | CNode) The node to query} +\item{node}{(MNode/CNode) The Member Node to query.} -\item{identifiers}{(character) One or more identifiers} +\item{identifiers}{(character) One or more identifiers.} \item{rows}{(numeric) Optional. Specify the size of the query result set.} } @@ -18,7 +18,7 @@ find_newest_object(node, identifiers, rows = 1000) unlikely) the first element, in natural order, is returned. } \description{ -Find the newest (by dateUploaded) object within a given set of objects. +Find the newest object, based on dateUploaded, within the given set of objects. } \examples{ \dontrun{ diff --git a/man/find_newest_resource_map.Rd b/man/find_newest_resource_map.Rd deleted file mode 100644 index d96609e..0000000 --- a/man/find_newest_resource_map.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{find_newest_resource_map} -\alias{find_newest_resource_map} -\title{Get the resource map(s) for the given object.} -\usage{ -find_newest_resource_map(node, pid, rows = 1000) -} -\arguments{ -\item{node}{(MNode|CNode) The Node to query.} - -\item{pid}{(character) The object to get the resource map(s) for.} - -\item{rows}{(numeric) Optional. The number of query results to return. This -shouldn't need to be modified and the default, 1000, is very likely to be -more than enough.} -} -\value{ -(character) The resource map(s) that contain `pid`. -} -\description{ -Get the resource map(s) for the given object. -} diff --git a/man/fix_bad_enum.Rd b/man/fix_bad_enum.Rd deleted file mode 100644 index e170632..0000000 --- a/man/fix_bad_enum.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modify_metadata.R -\name{fix_bad_enum} -\alias{fix_bad_enum} -\title{Fix a metadata record with a bad topicCategory.} -\usage{ -fix_bad_enum(path) -} -\arguments{ -\item{path}{(character) a path} -} -\description{ -This is the case where the ISO schema says what's inside a -gmd:MD_TopicCategoryCode element should match items from a controlled -vocabulary. But in the ISO metadata we have, there are newlines and spaces -around that text which causes a check for string equality to fail. i.e. -} -\details{ -'oceans' != ' oceans ' -} diff --git a/man/fix_bad_topic.Rd b/man/fix_bad_topic.Rd deleted file mode 100644 index 2d692d0..0000000 --- a/man/fix_bad_topic.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modify_metadata.R -\name{fix_bad_topic} -\alias{fix_bad_topic} -\title{Fix a metadata record with multiple MD_TopicCategory children elements -inside a single topicCategory element.} -\usage{ -fix_bad_topic(path) -} -\arguments{ -\item{path}{(character) Path} -} -\description{ -Example: -} -\details{ -<gmd:topicCategory> - <gmd:MD_TopicCategoryCode>imageryBaseMapsEarthCover</gmd:MD_TopicCategoryCode> - <gmd:MD_TopicCategoryCode>oceans</gmd:MD_TopicCategoryCode> -</gmd:topicCategory> -} diff --git a/man/format_eml.Rd b/man/format_eml.Rd index 95dff20..da87e4d 100644 --- a/man/format_eml.Rd +++ b/man/format_eml.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dataone_formats.R +% Please edit documentation in R/formats.R \name{format_eml} \alias{format_eml} -\title{Helper function to generate the EML 2.1.1 format ID.} +\title{Generate the EML 2.1.1 format ID} \usage{ format_eml() } @@ -10,11 +10,10 @@ format_eml() (character) The format ID for EML 2.1.1. } \description{ -Helper function to generate the EML 2.1.1 format ID. +Returns the EML 2.1.1 format ID. } \examples{ -format_eml - +format_eml() \dontrun{ # Upload a local EML 2.1.1 file: env <- env_load() diff --git a/man/format_iso.Rd b/man/format_iso.Rd index ebe61b6..19443d7 100644 --- a/man/format_iso.Rd +++ b/man/format_iso.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dataone_formats.R +% Please edit documentation in R/formats.R \name{format_iso} \alias{format_iso} -\title{dataone_formats.R} +\title{Generate the ISO 19139 format ID} \usage{ format_iso() } @@ -10,12 +10,7 @@ format_iso() (character) The format ID for ISO 19139. } \description{ -A set of thin functions which return the DataONE format ID string. These are -to aid in filling in function arguments and can't remember or don't want to -type in the full format ID. By putting these format ID strings into -functions, a user's autocompletion routine in their editor can help them -fill in the format ID they want. -Helper function to generate the ISO 19139 format ID.w +Returns the ISO 19139 format ID. } \examples{ format_iso() diff --git a/man/generate_resource_map.Rd b/man/generate_resource_map.Rd index 1e5a3de..266c32d 100644 --- a/man/generate_resource_map.Rd +++ b/man/generate_resource_map.Rd @@ -2,9 +2,7 @@ % Please edit documentation in R/packaging.R \name{generate_resource_map} \alias{generate_resource_map} -\title{Create a resource map RDF/XML file and save is to a temporary path. -This is a convenience wrapper around the constructor of the `ResourceMap` -class from `DataPackage`.} +\title{Create a resource map RDF/XML file and save is to a temporary path} \usage{ generate_resource_map(metadata_pid, data_pids = NULL, child_pids = NULL, other_statements = NULL, @@ -12,25 +10,24 @@ generate_resource_map(metadata_pid, data_pids = NULL, resource_map_pid = NULL) } \arguments{ -\item{metadata_pid}{(character) PID of the metadata Object.} +\item{metadata_pid}{(character) PID of the metadata object.} -\item{data_pids}{(character) PID(s) of the data Objects.} +\item{data_pids}{(character) PID(s) of the data objects.} -\item{child_pids}{(character) Optional. PID(s) of child Resource Maps.} +\item{child_pids}{(character) Optional. PID(s) of child resource maps.} -\item{other_statements}{(data.frame) Extra statements to add to the Resource Map.} +\item{other_statements}{(data.frame) Extra statements to add to the resource map.} \item{resolve_base}{(character) Optional. The resolve service base URL.} -\item{resource_map_pid}{(character) PID of resource map.} +\item{resource_map_pid}{(character) The PID of a resource map.} } \value{ -Absolute path to the Resource Map on disk (character) +(character) Absolute path to the resource map on disk. } \description{ -Create a resource map RDF/XML file and save is to a temporary path. -This is a convenience wrapper around the constructor of the `ResourceMap` -class from `DataPackage`. +This is a convenience wrapper around the constructor of the \code{ResourceMap} +class from \code{DataPackage}. } \examples{ \dontrun{ diff --git a/man/generate_resource_map_pid.Rd b/man/generate_resource_map_pid.Rd deleted file mode 100644 index b22d24c..0000000 --- a/man/generate_resource_map_pid.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{generate_resource_map_pid} -\alias{generate_resource_map_pid} -\title{Generate a PID for a new resource map by appending "resource_map_" to it.} -\usage{ -generate_resource_map_pid(metadata_pid) -} -\arguments{ -\item{metadata_pid}{(character) A metadata pid} -} -\description{ -Generate a PID for a new resource map by appending "resource_map_" to it. -} diff --git a/man/get_all_sysmeta.Rd b/man/get_all_sysmeta.Rd index dbc1bc2..4eee884 100644 --- a/man/get_all_sysmeta.Rd +++ b/man/get_all_sysmeta.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/helpers.R +% Please edit documentation in R/sysmeta.R \name{get_all_sysmeta} \alias{get_all_sysmeta} \title{Get system metadata for all elements of a data package} diff --git a/man/get_all_versions.Rd b/man/get_all_versions.Rd index c16a2a6..87b4a75 100644 --- a/man/get_all_versions.Rd +++ b/man/get_all_versions.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/util.R \name{get_all_versions} \alias{get_all_versions} -\title{Get the PIDs of all versions of an object.} +\title{Get the PIDs of all versions of an object} \usage{ get_all_versions(node, pid) } \arguments{ -\item{node}{(MNode|CNode) The node to query.} +\item{node}{(MNode) The Member Node to query.} \item{pid}{(character) Any object in the chain.} } @@ -19,7 +19,6 @@ Get the PIDs of all versions of an object. } \examples{ \dontrun{ -#Set environment cn <- CNode("STAGING2") mn <- getMNode(cn,"urn:node:mnTestKNB") pid <- "urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" diff --git a/man/get_current_version.Rd b/man/get_current_version.Rd deleted file mode 100644 index 39e3849..0000000 --- a/man/get_current_version.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{get_current_version} -\alias{get_current_version} -\title{Get the current package version.} -\usage{ -get_current_version() -} -\value{ -(character) The current package version. -} -\description{ -This function parses the installed DESCRIPTION file to get the latest -version. -} diff --git a/man/get_doc_id.Rd b/man/get_doc_id.Rd deleted file mode 100644 index d93bc9f..0000000 --- a/man/get_doc_id.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{get_doc_id} -\alias{get_doc_id} -\title{Get the Metacat docid for the given identifier} -\usage{ -get_doc_id(sysmeta) -} -\arguments{ -\item{sysmeta}{(SystemMetadata) The sysmeta of the object you want to find.} -} -\value{ -(character) The docid -} -\description{ -Get the Metacat docid for the given identifier -} diff --git a/man/get_formats.Rd b/man/get_formats.Rd deleted file mode 100644 index 179641d..0000000 --- a/man/get_formats.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/formats.R -\name{get_formats} -\alias{get_formats} -\title{Get the list of valid formats from DataONE} -\usage{ -get_formats(url = "https://cn.dataone.org/cn/v2/formats") -} -\arguments{ -\item{url}{(character) The listFormats endpoint. Defaults to the production -CN} -} -\value{ -(character) -} -\description{ -Note that this function is intended to return even if the request to the CN -fails. This is so other functions can call continue even if the request -fails. -} diff --git a/man/get_identifier.Rd b/man/get_identifier.Rd deleted file mode 100644 index cadcff4..0000000 --- a/man/get_identifier.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{get_identifier} -\alias{get_identifier} -\title{Get the identifier from a DataONE response.} -\usage{ -get_identifier(dataone_response) -} -\arguments{ -\item{dataone_response}{("XMLInternalDocument" "XMLAbstractDocument")} -} -\value{ -(character) The PID. -} -\description{ -Example resposne: -} -\details{ -<d1:identifier xmlns:d1="http://ns.dataone.org/service/types/v1"> - urn:uuid:12aaf494-5840-434d-9cdb-c2597d58543e -</d1:identifier> -} diff --git a/man/get_latest_release.Rd b/man/get_latest_release.Rd deleted file mode 100644 index ec17dfb..0000000 --- a/man/get_latest_release.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{get_latest_release} -\alias{get_latest_release} -\title{Use the GitHub API to find the latest release for the package.} -\usage{ -get_latest_release() -} -\value{ -(character) The latest release. -} -\description{ -Use the GitHub API to find the latest release for the package. -} diff --git a/man/get_mn_base_url.Rd b/man/get_mn_base_url.Rd index 6611501..46dd487 100644 --- a/man/get_mn_base_url.Rd +++ b/man/get_mn_base_url.Rd @@ -2,18 +2,18 @@ % Please edit documentation in R/dataone.R \name{get_mn_base_url} \alias{get_mn_base_url} -\title{Get the base URL of the Member Node.} +\title{Get base URL of a Member Node} \usage{ get_mn_base_url(mn) } \arguments{ -\item{mn}{(character) A mn instance} +\item{mn}{(character) The Member Node.} } \value{ -(character) The URL +(character) The URL. } \description{ -Get the base URL of the Member Node. +Get the base URL of a Member Node. } \examples{ \dontrun{ diff --git a/man/get_ncdf4_attributes.Rd b/man/get_ncdf4_attributes.Rd index cfbf874..87370e3 100644 --- a/man/get_ncdf4_attributes.Rd +++ b/man/get_ncdf4_attributes.Rd @@ -7,13 +7,13 @@ get_ncdf4_attributes(nc) } \arguments{ -\item{nc}{(ncdf4 or character) Either a ncdf4 object or a file path} +\item{nc}{(ncdf4/character) Either a ncdf4 object or a file path.} } \value{ -(data.frame) A data.frame of the attributes +(data.frame) A data.frame of the attributes. } \description{ -Get a data.frame of attributes from a NetCDF object +Get a data.frame of attributes from a NetCDF object. } \examples{ \dontrun{ diff --git a/man/get_netcdf_format_id.Rd b/man/get_netcdf_format_id.Rd deleted file mode 100644 index 9a51c68..0000000 --- a/man/get_netcdf_format_id.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{get_netcdf_format_id} -\alias{get_netcdf_format_id} -\title{Determine the DataONE format ID for the NetCDF file provided by path.} -\usage{ -get_netcdf_format_id(path) -} -\arguments{ -\item{path}{(character) Full or relative path to the file in question.} -} -\value{ -(character) The DataONE format ID. -} -\description{ -Determine the DataONE format ID for the NetCDF file provided by path. -} diff --git a/man/get_or_create_pid.Rd b/man/get_or_create_pid.Rd deleted file mode 100644 index 9836714..0000000 --- a/man/get_or_create_pid.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{get_or_create_pid} -\alias{get_or_create_pid} -\title{Get the already-minted PID from the inventory or mint a new one.} -\usage{ -get_or_create_pid(file, mn, scheme = "UUID") -} -\arguments{ -\item{file}{(data.frame) A single row from the inventory.} - -\item{mn}{(MNode) The Member Node that will mint the new PID, if needed.} - -\item{scheme}{(character) The identifier scheme to use.} -} -\value{ -The identifier (character) -} -\description{ -Get the already-minted PID from the inventory or mint a new one. -} diff --git a/man/get_package.Rd b/man/get_package.Rd index ce7bb8c..537d3dc 100644 --- a/man/get_package.Rd +++ b/man/get_package.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/util.R \name{get_package} \alias{get_package} -\title{Get a structured list of PIDs for the objects in a package.} +\title{Get a structured list of PIDs for the objects in a package} \usage{ get_package(node, pid, file_names = FALSE, rows = 5000) } \arguments{ -\item{node}{(MNode|CNode) The Coordinating/Member Node to run the query on.} +\item{node}{(MNode/CNode) The Coordinating/Member Node to run the query on.} \item{pid}{(character) The the resource map PID of the package.} @@ -20,8 +20,8 @@ useful to set if you are warned about the result set being truncated. Defaults t (list) A structured list of the members of the package. } \description{ -This is a wrapper function around `get_package_direct` which takes either -a resource map PID or a metadata PID as its `pid` argument. +Get a structured list of PIDs for the objects in a package, +including the resource map, metadata, and data objects. } \examples{ \dontrun{ diff --git a/man/get_package_direct.Rd b/man/get_package_direct.Rd deleted file mode 100644 index 639cd9e..0000000 --- a/man/get_package_direct.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{get_package_direct} -\alias{get_package_direct} -\title{Get a structured list of PIDs for the objects in a package.} -\usage{ -get_package_direct(node, pid, file_names = FALSE, rows = 5000) -} -\arguments{ -\item{node}{(MNode|CNode) The Coordinating/Member Node to run the query on.} - -\item{pid}{(character) The the metadata PID of the package.} - -\item{file_names}{(logical) Whether to return file names for all objects.} - -\item{rows}{(numeric) The number of rows to return in the query. This is only -useful to set if you are warned about the result set being truncated. Defaults to 5000.} -} -\description{ -Get a structured list of PIDs for the objects in a package. -} diff --git a/man/get_token.Rd b/man/get_token.Rd index ac66603..b48ca1d 100644 --- a/man/get_token.Rd +++ b/man/get_token.Rd @@ -2,18 +2,18 @@ % Please edit documentation in R/dataone.R \name{get_token} \alias{get_token} -\title{Gets the currently set authentication token.} +\title{Get the currently set authentication token} \usage{ get_token(node) } \arguments{ -\item{node}{(MNode|CNode) The CN or MN you want to find a token for.} +\item{node}{(MNode/CNode) The Member/Coordinating Node to query.} } \value{ (character) The token. } \description{ -Gets the currently set authentication token. +Get the currently set authentication token. } \examples{ \dontrun{ diff --git a/man/get_token_subject.Rd b/man/get_token_subject.Rd deleted file mode 100644 index 47ebf56..0000000 --- a/man/get_token_subject.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{get_token_subject} -\alias{get_token_subject} -\title{Returns the subject of the set dataone_test_token} -\usage{ -get_token_subject() -} -\value{ -(character) The token subject. -} -\description{ -Returns the subject of the set dataone_test_token -} diff --git a/man/guess_format_id.Rd b/man/guess_format_id.Rd index f6e02ff..c637862 100644 --- a/man/guess_format_id.Rd +++ b/man/guess_format_id.Rd @@ -1,21 +1,20 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R +% Please edit documentation in R/formats.R \name{guess_format_id} \alias{guess_format_id} -\title{Guess format from filename for a vector of filenames.} +\title{Guess format from filename} \usage{ guess_format_id(filenames) } \arguments{ -\item{filenames}{(character)} +\item{filenames}{(character) A vector of filenames.} } \value{ -(character) DataOne format identifiers strings. +(character) DataONE format IDs. } \description{ Guess format from filename for a vector of filenames. } \examples{ formatid <- guess_format_id("temperature_data.csv") - } diff --git a/man/insert_file.Rd b/man/insert_file.Rd deleted file mode 100644 index f2f7f2a..0000000 --- a/man/insert_file.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{insert_file} -\alias{insert_file} -\title{package.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>} -\usage{ -insert_file(inventory, file, env = NULL) -} -\arguments{ -\item{inventory}{(data.frame) An Inventory.} - -\item{file}{(character) The fully-qualified relative path to the file. See examples.} - -\item{env}{(list) Optional. Specify an environment.} -} -\description{ -Code related to inserting datasets as Data Packages. -Insert a file from a single row of the Inventory. -} diff --git a/man/insert_package.Rd b/man/insert_package.Rd deleted file mode 100644 index 5a907ad..0000000 --- a/man/insert_package.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{insert_package} -\alias{insert_package} -\title{Create a single package Data Package from files in the Inventory.} -\usage{ -insert_package(inventory, package, env = NULL) -} -\arguments{ -\item{inventory}{(data.frame) An Inventory.} - -\item{package}{(character) The package identifier.} - -\item{env}{(list) Environment variables.} -} -\value{ -A list containing PIDs and whether objects were inserted. (list) -} -\description{ -Create a single package Data Package from files in the Inventory. -} diff --git a/man/inv_add_extra_columns.Rd b/man/inv_add_extra_columns.Rd deleted file mode 100644 index 1caf78b..0000000 --- a/man/inv_add_extra_columns.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_add_extra_columns} -\alias{inv_add_extra_columns} -\title{Adds a set of extra columsn to the inventory that are useful for working -with them.} -\usage{ -inv_add_extra_columns(inventory) -} -\arguments{ -\item{inventory}{(data.frame) An inventory.} -} -\value{ -An inventory (data.frame) -} -\description{ -Adds a set of extra columsn to the inventory that are useful for working -with them. -} diff --git a/man/inv_add_parent_package_column.Rd b/man/inv_add_parent_package_column.Rd deleted file mode 100644 index 83ec12c..0000000 --- a/man/inv_add_parent_package_column.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_add_parent_package_column} -\alias{inv_add_parent_package_column} -\title{Add a column for parent packages.} -\usage{ -inv_add_parent_package_column(inventory) -} -\arguments{ -\item{inventory}{(data.frame) An Inventory.} -} -\value{ -inventory (data.frame) An Inventory. -} -\description{ -Add a column for parent packages. -} diff --git a/man/inv_init.Rd b/man/inv_init.Rd deleted file mode 100644 index 17450b9..0000000 --- a/man/inv_init.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_init} -\alias{inv_init} -\title{inventory.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>} -\usage{ -inv_init() -} -\value{ -An empty data frame -} -\description{ -Functions relating to keeping up an inventory of files that exist on the KNB -and may or may not be copied to another computer and untarred. -} -\details{ -Create an empty inventory data.frame. This doesn't need to be a function -but I'm making it one in case the initialization routine becomes more -complicated. -} diff --git a/man/inv_load_checksums.Rd b/man/inv_load_checksums.Rd deleted file mode 100644 index f88a278..0000000 --- a/man/inv_load_checksums.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_load_checksums} -\alias{inv_load_checksums} -\title{Load checksums into the inventory file from a text file. This function -removes the column 'checksum_sha256' from inventory before doing a -left join.} -\usage{ -inv_load_checksums(inventory, path) -} -\arguments{ -\item{inventory}{(data.frame) An inventory.} - -\item{path}{(character) Path to a file containing sizes.} -} -\value{ -An inventory (data.frame) -} -\description{ -Load checksums into the inventory file from a text file. This function -removes the column 'checksum_sha256' from inventory before doing a -left join. -} diff --git a/man/inv_load_dois.Rd b/man/inv_load_dois.Rd deleted file mode 100644 index c16bece..0000000 --- a/man/inv_load_dois.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_load_dois} -\alias{inv_load_dois} -\title{Load DOIs from a text file into the Inventory.} -\usage{ -inv_load_dois(inventory, path) -} -\arguments{ -\item{inventory}{(data.frame) An inventory.} - -\item{path}{Location of a text file with DOIs and file paths. (character)} -} -\value{ -(data.frame) The modified Inventory. -} -\description{ -Load DOIs from a text file into the Inventory. -} diff --git a/man/inv_load_files.Rd b/man/inv_load_files.Rd deleted file mode 100644 index 7a53821..0000000 --- a/man/inv_load_files.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_load_files} -\alias{inv_load_files} -\title{Load files into the inventory from a text file.} -\usage{ -inv_load_files(inventory, path, filter = TRUE) -} -\arguments{ -\item{inventory}{(character) A \code{data.frame}.} - -\item{path}{(character) Path to a file containing a file listing.} - -\item{filter}{(logical) Filter out versioned datasets. Default is TRUE.} -} -\value{ -An inventory (data.frame) -} -\description{ -Files should be the output of the command: -} -\details{ -you@server:/path/to/acadis$ find . -type f -} diff --git a/man/inv_load_identifiers.Rd b/man/inv_load_identifiers.Rd deleted file mode 100644 index 655bf78..0000000 --- a/man/inv_load_identifiers.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_load_identifiers} -\alias{inv_load_identifiers} -\title{Load identifiers into the inventory file(s) from a text file. This function -removes the column 'identifier' from inventory before doing a -left join.} -\usage{ -inv_load_identifiers(inventory, paths) -} -\arguments{ -\item{inventory}{(data.frame) An inventory.} - -\item{paths}{(character) Path(s) to files containing identifiers.} -} -\value{ -(data.frame) An inventory. -} -\description{ -Load identifiers into the inventory file(s) from a text file. This function -removes the column 'identifier' from inventory before doing a -left join. -} diff --git a/man/inv_load_sizes.Rd b/man/inv_load_sizes.Rd deleted file mode 100644 index e9a6dce..0000000 --- a/man/inv_load_sizes.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_load_sizes} -\alias{inv_load_sizes} -\title{Load file sizes into an inventory from a text file. Removes the column -'size_bytes' from inventory before doing a left join.} -\usage{ -inv_load_sizes(inventory, path) -} -\arguments{ -\item{inventory}{(data.frame) inventory A \code{data.frame}.} - -\item{path}{(character) Path to a file containing sizes.} -} -\value{ -(data.frame) An inventory -} -\description{ -Load file sizes into an inventory from a text file. Removes the column -'size_bytes' from inventory before doing a left join. -} diff --git a/man/inv_update.Rd b/man/inv_update.Rd deleted file mode 100644 index c98c210..0000000 --- a/man/inv_update.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_update} -\alias{inv_update} -\title{Update an Inventory with a new Inventory.} -\usage{ -inv_update(inventory, new_state) -} -\arguments{ -\item{inventory}{(data.frame) The old Inventory.} - -\item{new_state}{(data.frame) The new Inventory.} -} -\description{ -Update an Inventory with a new Inventory. -} diff --git a/man/is_authorized.Rd b/man/is_authorized.Rd index ff82f15..f502cd1 100644 --- a/man/is_authorized.Rd +++ b/man/is_authorized.Rd @@ -2,19 +2,19 @@ % Please edit documentation in R/dataone.R \name{is_authorized} \alias{is_authorized} -\title{Check if the user has authorization to perform an action on an object.} +\title{Check if user has authorization to perform an action on an object} \usage{ is_authorized(node, ids, action) } \arguments{ -\item{node}{(MNode|CNode) The Node to query.} +\item{node}{(MNode/CNode) The Member/Coordinating Node to query.} \item{ids}{(character) The PID or SID to check.} \item{action}{(character) One of read, write, or changePermission.} } \value{ -(boolean) +(logical) } \description{ Check if the user has authorization to perform an action on an object. diff --git a/man/is_format_id.Rd b/man/is_format_id.Rd deleted file mode 100644 index c74e975..0000000 --- a/man/is_format_id.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{is_format_id} -\alias{is_format_id} -\title{Test whether an object is a particular format ID.} -\usage{ -is_format_id(node, pids, format_id) -} -\arguments{ -\item{node}{(MNode|CNode) The Coordinating/Member Node to run the query on.} - -\item{pids}{(character)} - -\item{format_id}{(character)} -} -\value{ -(logical) -} -\description{ -Test whether an object is a particular format ID. -} diff --git a/man/is_obsolete.Rd b/man/is_obsolete.Rd index 18161be..ae55144 100644 --- a/man/is_obsolete.Rd +++ b/man/is_obsolete.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/util.R \name{is_obsolete} \alias{is_obsolete} -\title{Test whether the object is obsoleted by another object.} +\title{Test whether the object is obsoleted by another object} \usage{ is_obsolete(node, pids) } @@ -15,7 +15,7 @@ is_obsolete(node, pids) (logical) Whether or not the object is obsoleted by another object. } \description{ -Test whether the object is obsoleted by another object. +Test whether the object is obsoleted by another object } \examples{ \dontrun{ diff --git a/man/is_public_read.Rd b/man/is_public_read.Rd index fbee7b9..0f3851e 100644 --- a/man/is_public_read.Rd +++ b/man/is_public_read.Rd @@ -11,7 +11,7 @@ is_public_read(mn, pids, use.names = TRUE) \item{pids}{(character) The PIDs of the objects to check for public read access.} -\item{use.names}{(logical) Optional. If `TRUE` (the default), PIDs will +\item{use.names}{(logical) If \code{TRUE}, PIDs will be used as names for the result unless PIDs have names already, in which case those names will be used for the result.} } diff --git a/man/is_resource_map.Rd b/man/is_resource_map.Rd deleted file mode 100644 index 8be6761..0000000 --- a/man/is_resource_map.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{is_resource_map} -\alias{is_resource_map} -\title{Determines whether the object with the given PID is a resource map.} -\usage{ -is_resource_map(node, pids) -} -\arguments{ -\item{node}{(MNode|CNode) The Coordinating/Member Node to run the query on.} - -\item{pids}{(character) Vector of PIDs} -} -\value{ -(logical) Whether or not the object(s) are resource maps -} -\description{ -Determines whether the object with the given PID is a resource map. -} diff --git a/man/is_token_expired.Rd b/man/is_token_expired.Rd index 7a4c5a0..7df7e0e 100644 --- a/man/is_token_expired.Rd +++ b/man/is_token_expired.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/dataone.R \name{is_token_expired} \alias{is_token_expired} -\title{Determine whether the set token is expired.} +\title{Determine whether token is expired} \usage{ is_token_expired(node) } \arguments{ -\item{node}{(character) A member node instance} +\item{node}{(character) The Member Node.} } \value{ (logical) diff --git a/man/is_token_set.Rd b/man/is_token_set.Rd index 46b036c..022ec60 100644 --- a/man/is_token_set.Rd +++ b/man/is_token_set.Rd @@ -2,18 +2,17 @@ % Please edit documentation in R/dataone.R \name{is_token_set} \alias{is_token_set} -\title{dataone.R} +\title{Test whether a token is set} \usage{ is_token_set(node) } \arguments{ -\item{node}{(MNode|CNode) The CN or MN you want to find a token for.} +\item{node}{(MNode/CNode) The Member/Coordinating Node to query.} } \value{ (logical) } \description{ -Helpers for the DataONE R package. Test whether a token is set. } \examples{ diff --git a/man/log_message.Rd b/man/log_message.Rd deleted file mode 100644 index aac3eee..0000000 --- a/man/log_message.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{log_message} -\alias{log_message} -\title{Log a message to the console and to a logfile.} -\usage{ -log_message(message = NULL) -} -\arguments{ -\item{message}{(character) Your log message.} -} -\value{ -Nothing. -} -\description{ -Reads from the environment variable 'LOG_PATH' and uses the value set there -to decide the location of the log file. If that envvar isn't set, it defaults -to 'arcticdata-log.txt'. -} diff --git a/man/mdq_run.Rd b/man/mdq_run.Rd index 25afd34..7c06493 100644 --- a/man/mdq_run.Rd +++ b/man/mdq_run.Rd @@ -2,20 +2,21 @@ % Please edit documentation in R/quality.R \name{mdq_run} \alias{mdq_run} -\title{Score a metadata document against a MetaDIG Suite} +\title{Score a metadata document against a MetaDIG suite} \usage{ mdq_run(document, suite_id = "arctic.data.center.suite.1") } \arguments{ -\item{document}{(eml or character) Either an EML object or path to a file on disk.} +\item{document}{(eml/character) Either an EML object or path to a file on disk.} -\item{suite_id}{(character) Optional. Specificy a suite ID. Should be one of https://quality.nceas.ucsb.edu/quality/suites} +\item{suite_id}{(character) Specify a suite ID. Should be one of \url{https://quality.nceas.ucsb.edu/quality/suites}.} } \value{ -(data.frame) A sorted table of Check results +(data.frame) A sorted data.frame of check results. } \description{ -Score a metadata document against a MetaDIG Suite +This function scores a metadata document against a MetaDIG suite. +The default suite is for the Arctic Data Center. } \examples{ \dontrun{ diff --git a/man/new_uuid.Rd b/man/new_uuid.Rd index 1c497ea..608486e 100644 --- a/man/new_uuid.Rd +++ b/man/new_uuid.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/util.R \name{new_uuid} \alias{new_uuid} -\title{Helper function to generate a new UUID PID.} +\title{Generate a new UUID PID} \usage{ new_uuid() } @@ -10,7 +10,7 @@ new_uuid() (character) A new UUID PID. } \description{ -Helper function to generate a new UUID PID. +Generate a new UUID PID. } \examples{ id <- new_uuid() diff --git a/man/object_exists.Rd b/man/object_exists.Rd index 201b1ad..805db74 100644 --- a/man/object_exists.Rd +++ b/man/object_exists.Rd @@ -2,21 +2,21 @@ % Please edit documentation in R/util.R \name{object_exists} \alias{object_exists} -\title{Check if an object exists on a Member Node.} +\title{Check if an object exists on a Member Node} \usage{ object_exists(node, pids) } \arguments{ -\item{node}{(MNode|CNode) The Node to query.} +\item{node}{(MNode) The Member Node to query.} -\item{pids}{(character) PID to check the existence of.} +\item{pids}{(character) The PID(s) to check the existence of.} } \value{ (logical) Whether the object exists. } \description{ This is a simple check for the HTTP status of a /meta/{PID} call on the -provided member node. +provided Member Mode. } \examples{ \dontrun{ diff --git a/man/parse_resource_map.Rd b/man/parse_resource_map.Rd index b222499..906f8a1 100644 --- a/man/parse_resource_map.Rd +++ b/man/parse_resource_map.Rd @@ -2,18 +2,18 @@ % Please edit documentation in R/packaging.R \name{parse_resource_map} \alias{parse_resource_map} -\title{Parse a Resource Map into a data.frame} +\title{Parse a resource map into a data.frame} \usage{ parse_resource_map(path) } \arguments{ -\item{path}{(character) Path to the resource map (an RDF/XML file)} +\item{path}{(character) Path to the resource map (an RDF/XML file).} } \value{ -(data.frame) The statements in the Resource Map +(data.frame) The statements in the resource map. } \description{ -Parse a Resource Map into a data.frame +Parse a resource map into a data.frame. } \examples{ \dontrun{ diff --git a/man/path_join.Rd b/man/path_join.Rd deleted file mode 100644 index 8f74594..0000000 --- a/man/path_join.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{path_join} -\alias{path_join} -\title{(Intelligently) join (possibly redudant) path parts together.} -\usage{ -path_join(path_parts = c("")) -} -\arguments{ -\item{path_parts}{(character)} -} -\value{ -(character)The joined path string. -} -\description{ -Joins path strings like "./" to "./my/dir" as "./my/dir" instead of as -"././my/dir. -} diff --git a/man/pid_to_eml_datatable.Rd b/man/pid_to_eml_datatable.Rd deleted file mode 100644 index 55020c1..0000000 --- a/man/pid_to_eml_datatable.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{pid_to_eml_datatable} -\alias{pid_to_eml_datatable} -\title{This function is deprecated. See \link{pid_to_eml_entity}.} -\usage{ -pid_to_eml_datatable(mn, pid, attributes = NULL, factors = NULL, - name = NULL, description = NULL, validateAttributes = TRUE) -} -\arguments{ -\item{mn}{(MNode) Member Node where the PID is associated with an object.} - -\item{pid}{(character) The PID of the object to create the \code{dataTable} for.} - -\item{attributes}{(data.frame) Optional data frame of attributes. Follows the convention in \link[EML]{set_attributes}.} - -\item{factors}{(data.frame) Optional data frame of enumerated attribute values (factors). Follows the convention in \link[EML]{set_attributes}.} - -\item{name}{(character) Optional field to specify \code{entityName}, otherwise will be extracted from system metadata.} - -\item{description}{(character) Optional field to specify \code{entityDescription}, otherwise will match name.} - -\item{validateAttributes}{(logical) If set to FALSE or if attributes are not passed into the function, attribute validatio} -} -\description{ -This function is deprecated. See \link{pid_to_eml_entity}. -} diff --git a/man/pid_to_eml_entity.Rd b/man/pid_to_eml_entity.Rd index 3d9e4e1..252ec24 100644 --- a/man/pid_to_eml_entity.Rd +++ b/man/pid_to_eml_entity.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/eml.R \name{pid_to_eml_entity} \alias{pid_to_eml_entity} -\title{eml.R} +\title{Create EML entity from a DataONE PID} \usage{ pid_to_eml_entity(mn, pid, entityType = "otherEntity", ...) } @@ -11,26 +11,24 @@ pid_to_eml_entity(mn, pid, entityType = "otherEntity", ...) \item{pid}{(character) The PID of the object to create the sub-tree for.} -\item{entityType}{(character) What kind of objects to create from the input. Either "dataTable", -"spatialRaster", "spatialVector", "storedProcedure", "view", "otherEntity".} +\item{entityType}{(character) What kind of objects to create from the input. One of "dataTable", +"spatialRaster", "spatialVector", "storedProcedure", "view", or "otherEntity".} -\item{...}{(optional) Additional arguments to be passed to \code{new(entityType, ...)}. See example} +\item{...}{(optional) Additional arguments to be passed to \code{new(entityType, ...)}.} } \value{ -(list) The entity object +(list) The entity object. } \description{ -Helpers for creating EML. -Create EML entity from a DataONE pid +Create EML entity from a DataONE PID } \examples{ \dontrun{ -#Generate EML otherEntity +# Generate EML otherEntity pid_to_eml_entity(mn, pid, entityType = "otherEntity", entityName = "Entity Name", entityDescription = "Description about entity") - } } diff --git a/man/pid_to_eml_other_entity.Rd b/man/pid_to_eml_other_entity.Rd deleted file mode 100644 index ebc80fa..0000000 --- a/man/pid_to_eml_other_entity.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{pid_to_eml_other_entity} -\alias{pid_to_eml_other_entity} -\title{This function is deprecated. See \link{pid_to_eml_entity}.} -\usage{ -pid_to_eml_other_entity(mn, pids) -} -\arguments{ -\item{mn}{(MNode) Member Node where the PID is associated with an object.} - -\item{pids}{(character) The PID of the object to create the sub-tree for.} -} -\description{ -This function is deprecated. See \link{pid_to_eml_entity}. -} diff --git a/man/pid_to_eml_physical.Rd b/man/pid_to_eml_physical.Rd index 3b8d353..5ab4946 100644 --- a/man/pid_to_eml_physical.Rd +++ b/man/pid_to_eml_physical.Rd @@ -12,11 +12,11 @@ pid_to_eml_physical(mn, pids) \item{pids}{(character) The PID of the object to create the sub-tree for.} } \value{ -(list of otherEntity) The otherEntity object(s) +(list) A list of otherEntity object(s). } \description{ -Note this is a wrapper around sysmeta_to_eml_physical which handles the task of -creating the EML physical +This is a wrapper around \code{\link[=sysmeta_to_eml_physical]{sysmeta_to_eml_physical()}} which handles the task of +creating the EML physical. } \examples{ \dontrun{ diff --git a/man/pretty_print.Rd b/man/pretty_print.Rd deleted file mode 100644 index f000c7e..0000000 --- a/man/pretty_print.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modify_metadata.R -\name{pretty_print} -\alias{pretty_print} -\title{Uses XMLStarlet to pretty-print/beautify an XML document.} -\usage{ -pretty_print(path) -} -\arguments{ -\item{path}{Path to your file you want pretty-printed. (character)} -} -\value{ -Returns the result of the `system` command (0 = success) -} -\description{ -This command just runs `xmlstarlet path > path`, doing a simple -pretty-printing of the file located at `path`. -} -\details{ -Note that this function is doing an in-place pretty printing instead of -returning the pretty-printed text. - -Note that this command uses a temporary file as an intermediate step in the -pretty-printing process. For some reason, when running xmlstarlet from within -R, the same file can't be used as the input to `xmlstarlet format` and as the -shell redirection file (`> somefile.txt`). If you try to run `xmlstarlet -format` on the same file as you redirect to, you get a weird parse error from -xmlstarlet. -} diff --git a/man/publish_object.Rd b/man/publish_object.Rd index 0c42f28..f779ff2 100644 --- a/man/publish_object.Rd +++ b/man/publish_object.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/editing.R \name{publish_object} \alias{publish_object} -\title{editing.R} +\title{Publish an object on a Member Node} \usage{ publish_object(mn, path, format_id = NULL, pid = NULL, sid = NULL, clone_pid = NULL, public = TRUE) @@ -10,28 +10,26 @@ publish_object(mn, path, format_id = NULL, pid = NULL, sid = NULL, \arguments{ \item{mn}{(MNode) The Member Node to publish the object to.} -\item{path}{the path to the file to be published} +\item{path}{(character) The path to the file to be published.} -\item{format_id}{(character) Optional. The format ID to set for the object. When not set, \code{\link{guess_format_id}} will be used to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}.} +\item{format_id}{(character) Optional. The format ID to set for the object. +When not set, \code{\link[=guess_format_id]{guess_format_id()}} will be used to guess the format ID. +Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}.} \item{pid}{(character) Optional. The PID to use with the object.} \item{sid}{(character) Optional. The SID to use with the new object.} -\item{clone_pid}{(character) PID of objet to clone System Metadata from} +\item{clone_pid}{(character) PID of object to clone System Metadata from.} -\item{public}{(logical) TRUE/FALSE Whether object should be given public read access.} +\item{public}{(logical) Whether object should be given public read access.} } \value{ -pid (character). The PID of the published object. +pid (character) The PID of the published object. } \description{ -High-level functions for managing content. -Publish an object on a member node -} -\details{ -Use sensible defaults to publish an object on a member node. If identifier is provided, -use it, otherwise generate a UUID. If clone_id is provided, then retrieve the +Use sensible defaults to publish an object on a Member Node. If identifier is provided, +use it, otherwise generate a UUID. If clone_id is provided, then retrieve the system metadata for that identifier and use it to provide rightsHolder, accessPolicy, and replicationPolicy metadata. Note that this function only uploads the object to the Member Node, and does not add it to a data package, which can be done separately. diff --git a/man/publish_update.Rd b/man/publish_update.Rd index 4597dc5..49fbb69 100644 --- a/man/publish_update.Rd +++ b/man/publish_update.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/editing.R \name{publish_update} \alias{publish_update} -\title{Publish an updated data package.} +\title{Publish an updated data package} \usage{ publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL, child_pids = NULL, metadata_path = NULL, identifier = NULL, @@ -21,47 +21,55 @@ publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL, \item{child_pids}{(character) Optional. Child packages resource map PIDs.} -\item{metadata_path}{(character or eml) Optional. An eml class object or a path to a metadata file to update with. If this is not set, the existing metadata document will be used.} +\item{metadata_path}{(character or eml) Optional. An eml class object or a path to a metadata file to update with. +If this is not set, the existing metadata document will be used.} \item{identifier}{(character) Manually specify the identifier for the new metadata object.} \item{use_doi}{(logical) Generate and use a DOI as the identifier for the updated metadata object.} -\item{parent_resmap_pid}{(character) Optional. PID of a parent package to be updated. Not optional if a parent package exists.} +\item{parent_resmap_pid}{(character) Optional. PID of a parent package to be updated. +Not optional if a parent package exists.} -\item{parent_metadata_pid}{(character) Optional. Identifier for the metadata document of the parent package. Not optional if a parent package exists.} +\item{parent_metadata_pid}{(character) Optional. Identifier for the metadata document of the parent package. +Not optional if a parent package exists.} -\item{parent_data_pids}{(character) Optional. Identifier for the data objects of the parent package. Not optional if the parent package contains data objects.} +\item{parent_data_pids}{(character) Optional. Identifier for the data objects of the parent package. +Not optional if the parent package contains data objects.} -\item{parent_child_pids}{(character) Optional. Resource map identifier(s) of child packages in the parent package. \code{resource_map_pid} should not be included. Not optional if the parent package contains other child packages.} +\item{parent_child_pids}{(character) Optional. Resource map identifier(s) of child packages in the parent package. +\code{resource_map_pid} should not be included. Not optional if the parent package contains other child packages.} -\item{public}{(logical) Optional. Make the update public. If FALSE, will set the metadata and resource map to private (but not the data objects). +\item{public}{(logical) Optional. Make the update public. If \code{FALSE}, will set the metadata and resource map to private (but not the data objects). This applies to the new metadata PID and its resource map and data object. access policies are not affected.} -\item{check_first}{(logical) Optional. Whether to check the PIDs passed in as aruments exist on the MN before continuing. Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements.} +\item{check_first}{(logical) Optional. Whether to check the PIDs passed in as arguments exist on the MN before continuing. +Checks that objects exist and are of the right format type. This speeds up the function, especially when \code{data_pids} has many elements.} } \value{ -pids (character) Named character vector of pids in the data package, including pids for the metadata, resource map, and data objects. +(character) Named character vector of PIDs in the data package, including PIDs for the metadata, resource map, and data objects. } \description{ -This function can be used for a variety of tasks: +Publish an update to a data package after updating data files or metadata. } \details{ +This function can be used for a variety of tasks: + \itemize{ - \item Publish an existing package with a DOI - \item Update a package with new data objects - \item Update a package with new metadata +\item Publish an existing package with a DOI +\item Update a package with new data objects +\item Update a package with new metadata } The metadata_pid and resource_map_pid provide the identifier of an EML metadata document and associated resource map, and the data_pids vector provides a list of PIDs of data objects in the package. Update the metadata file and resource map -by generating a new identifier (a DOI if use_doi is TRUE) and updating the Member +by generating a new identifier (a DOI if \code{use_doi = TRUE}) and updating the Member Node with a public version of the object. If metadata_file is not missing, it should be an edited version of the metadata to be used to update the original. If parent_resmap_pid is not missing, it indicates the PID of a parent package that -should be updated as well, using the parent_medata_pid, parent_data_pids, and +should be updated as well, using the parent_metadata_pid, parent_data_pids, and parent_child_pids as members of the updated package. In all cases, the objects are made publicly readable. } diff --git a/man/replace_package_id.Rd b/man/replace_package_id.Rd deleted file mode 100644 index fdd67b2..0000000 --- a/man/replace_package_id.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{replace_package_id} -\alias{replace_package_id} -\title{Replace the EML 'packageId' attribute on the root element with a -certain value.} -\usage{ -replace_package_id(path, replacement) -} -\arguments{ -\item{path}{(character) Path to the XML file to edit.} - -\item{replacement}{(character) The new value.} -} -\description{ -Replace the EML 'packageId' attribute on the root element with a -certain value. -} diff --git a/man/replace_subject.Rd b/man/replace_subject.Rd deleted file mode 100644 index 87c7618..0000000 --- a/man/replace_subject.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sysmeta.R -\name{replace_subject} -\alias{replace_subject} -\title{Replace subjects in the accessPolicy section of a System Metadata entries.} -\usage{ -replace_subject(sysmeta, - from = "cn=arctic-data-admins,dc=dataone,dc=org", - to = "CN=arctic-data-admins,DC=dataone,DC=org") -} -\arguments{ -\item{sysmeta}{(SystemMetadata) The System Metadata object.} - -\item{from}{(character) The DN string to replace.} - -\item{to}{(character) The DN string to put in place of `from`.} -} -\value{ -The modified System Metadata (SystemMetadata) -} -\description{ -This function was written out to fix capitalization errors but in a set of -existing System Metadata entries but can be used to replace any subject. -} diff --git a/man/set_abstract.Rd b/man/set_abstract.Rd index 85a7349..cb30b5b 100644 --- a/man/set_abstract.Rd +++ b/man/set_abstract.Rd @@ -2,23 +2,23 @@ % Please edit documentation in R/eml.R \name{set_abstract} \alias{set_abstract} -\title{Set the abstract on an EML document} +\title{Set the abstract for an EML document} \usage{ set_abstract(doc, text) } \arguments{ -\item{doc}{(eml) An EML document} +\item{doc}{(eml) An EML document.} \item{text}{(character) The abstract text. If \code{text} is length one, an -abstract without \code{<para>} or \code{section} elements will be created. +abstract without \code{<para>} or \code{<section>} elements will be created. If \code{text} is greater than one in length, \code{para} elementes will be used for each element.} } \value{ -(eml) The modified EML document +(eml) The modified EML document. } \description{ -Set the abstract on an EML document +Set the abstract for an EML document. } \examples{ # Create a new EML document diff --git a/man/set_access.Rd b/man/set_access.Rd index c22db50..eff2dc5 100644 --- a/man/set_access.Rd +++ b/man/set_access.Rd @@ -12,7 +12,7 @@ set_access(mn, pids, subjects, permissions = c("read", "write", \item{pids}{(character) The PIDs of the objects to set permissions for.} -\item{subjects}{(character) The identifiers of the subjects to set permissions for, typially an ORCID or DN.} +\item{subjects}{(character) The identifiers of the subjects to set permissions for, typically an ORCID or DN.} \item{permissions}{(character) Optional. The permissions to set. Defaults to read, write, and changePermission.} diff --git a/man/set_file_name.Rd b/man/set_file_name.Rd index 3ae909b..d1166e9 100644 --- a/man/set_file_name.Rd +++ b/man/set_file_name.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/editing.R \name{set_file_name} \alias{set_file_name} -\title{Set the file name on an object} +\title{Set the file name for an object} \usage{ set_file_name(mn, pid, name) } @@ -14,10 +14,10 @@ set_file_name(mn, pid, name) \item{name}{(character) The file name.} } \value{ -(logical) Whether the update succeeded, FALSE means there was an error. +(logical) Whether the update succeeded. } \description{ -Set the file name on an object +Set the file name for an object. } \examples{ \dontrun{ diff --git a/man/set_other_entities.Rd b/man/set_other_entities.Rd deleted file mode 100644 index e1c2ec4..0000000 --- a/man/set_other_entities.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{set_other_entities} -\alias{set_other_entities} -\title{This function is deprecated. See \link{pid_to_eml_entity}.} -\usage{ -set_other_entities(mn, path, pids) -} -\arguments{ -\item{mn}{(MNode) The Member Node the objects exist on.} - -\item{path}{(character) The location on disk of the EML file.} - -\item{pids}{(character) One or more PIDs for the objects.} -} -\description{ -This function is deprecated. See \link{pid_to_eml_entity}. -} diff --git a/man/set_rights_and_access.Rd b/man/set_rights_and_access.Rd index 8567b84..45266cf 100644 --- a/man/set_rights_and_access.Rd +++ b/man/set_rights_and_access.Rd @@ -12,7 +12,7 @@ set_rights_and_access(mn, pids, subject, permissions = c("read", "write", \item{pids}{(character) The PIDs of the objects to set the rights holder and access policy for.} -\item{subject}{(character) The identifier of the new rights holder, typially an ORCID or DN.} +\item{subject}{(character) The identifier of the new rights holder, typically an ORCID or DN.} \item{permissions}{(character) Optional. The permissions to set. Defaults to read, write, and changePermission.} diff --git a/man/set_rights_holder.Rd b/man/set_rights_holder.Rd index d09a74d..a162643 100644 --- a/man/set_rights_holder.Rd +++ b/man/set_rights_holder.Rd @@ -11,7 +11,7 @@ set_rights_holder(mn, pids, subject) \item{pids}{(character) The PIDs of the objects to set the rights holder for.} -\item{subject}{(character) The identifier of the new rights holder, typially an ORCID or DN.} +\item{subject}{(character) The identifier of the new rights holder, typically an ORCID or DN.} } \value{ (logical) Whether an update was needed. diff --git a/man/show_indexing_status.Rd b/man/show_indexing_status.Rd index ac652fd..45d828e 100644 --- a/man/show_indexing_status.Rd +++ b/man/show_indexing_status.Rd @@ -7,15 +7,15 @@ show_indexing_status(mn, pids) } \arguments{ -\item{mn}{(MNode) The Member Node to query} +\item{mn}{(MNode) The Member Node to query.} -\item{pids}{(character|list) One or more PIDs (or list of PIDs)} +\item{pids}{(character/list) One or more PIDs.} } \value{ -Nothing +\code{NULL} } \description{ -Show the indexing status of a set of PIDs +Show the indexing status of a set of PIDs. } \examples{ \dontrun{ diff --git a/man/show_random_dataset.Rd b/man/show_random_dataset.Rd deleted file mode 100644 index 429ecaa..0000000 --- a/man/show_random_dataset.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{show_random_dataset} -\alias{show_random_dataset} -\title{Print a random dataset.} -\usage{ -show_random_dataset(inventory, theme = NULL, n = 10) -} -\arguments{ -\item{inventory}{(data.frame) An inventory.} - -\item{theme}{(character) Optional. A package theme name.} - -\item{n}{(numeric) Optional. The number of files to show.} -} -\value{ -Nothing. -} -\description{ -Print a random dataset. -} diff --git a/man/substitute_eml_party.Rd b/man/substitute_eml_party.Rd deleted file mode 100644 index ce89ef5..0000000 --- a/man/substitute_eml_party.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{substitute_eml_party} -\alias{substitute_eml_party} -\title{Extract the EML responsible-party blocks in a document, and parse the -surName field to create proper givenName/surName structure} -\usage{ -substitute_eml_party(path) -} -\arguments{ -\item{path}{file path to the EML document to process (character)} -} -\value{ -path (character) Path to the converted EML file. -} -\description{ -Extract the EML responsible-party blocks in a document, and parse the -surName field to create proper givenName/surName structure -} diff --git a/man/sysmeta_to_eml_other_entity.Rd b/man/sysmeta_to_eml_other_entity.Rd deleted file mode 100644 index 2cde59e..0000000 --- a/man/sysmeta_to_eml_other_entity.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{sysmeta_to_eml_other_entity} -\alias{sysmeta_to_eml_other_entity} -\title{This function is deprecated. See \link{pid_to_eml_entity}.} -\usage{ -sysmeta_to_eml_other_entity(sysmeta) -} -\arguments{ -\item{sysmeta}{(SystemMetadata) One or more System Metadata objects} -} -\description{ -This function is deprecated. See \link{pid_to_eml_entity}. -} diff --git a/man/sysmeta_to_eml_physical.Rd b/man/sysmeta_to_eml_physical.Rd index f86001b..61633da 100644 --- a/man/sysmeta_to_eml_physical.Rd +++ b/man/sysmeta_to_eml_physical.Rd @@ -2,19 +2,19 @@ % Please edit documentation in R/eml.R \name{sysmeta_to_eml_physical} \alias{sysmeta_to_eml_physical} -\title{Create an EML physical object from System Metadata} +\title{Create an EML physical object from system metadata} \usage{ sysmeta_to_eml_physical(sysmeta) } \arguments{ -\item{sysmeta}{(SystemMetadata) One or more System Metadata objects} +\item{sysmeta}{(SystemMetadata) One or more System Metadata objects.} } \value{ -(list of physical) The physical objects for each sysmeta +(list) A list of physical objects for each sysmeta. } \description{ This function creates a pre-canned EML physical object from what's in the -System Metadata of an Object. Note that it sets an Online Distrubtion URL +System Metadata of an object. Note that it sets an Online Distribution URL of the DataONE v2 resolve service for the PID. } \examples{ diff --git a/man/sysmeta_to_other_entity.Rd b/man/sysmeta_to_other_entity.Rd deleted file mode 100644 index c0b1578..0000000 --- a/man/sysmeta_to_other_entity.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{sysmeta_to_other_entity} -\alias{sysmeta_to_other_entity} -\title{This function is deprecated. See \link{sysmeta_to_eml_other_entity}.} -\usage{ -sysmeta_to_other_entity(sysmeta) -} -\arguments{ -\item{sysmeta}{(SystemMetadata) A SystemMetadata object} -} -\description{ -This function is deprecated. See \link{sysmeta_to_eml_other_entity}. -} diff --git a/man/test_has_abstract.Rd b/man/test_has_abstract.Rd deleted file mode 100644 index dd86895..0000000 --- a/man/test_has_abstract.Rd +++ /dev/null @@ -1,32 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modify_metadata.R -\name{test_has_abstract} -\alias{test_has_abstract} -\title{modify_metadata.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>} -\usage{ -test_has_abstract(path) -} -\arguments{ -\item{path}{(character) a path} -} -\description{ -Functions related to fixing invalid ISO metadata. -} -\details{ -Some functions just test whether a validation issue is present. These are -prefixed with the text "test". Exactly what they are testing should be -described in the docstrings. - -Other functons fix the bad metadata in place (modifying the original file) -and these functions are prefixed with "fix_". Exactly what they are fixing -should be described in the docstrings. - -Example usage: - -# Find and fix documents in 'mydir' that have extra whitespace in their -# topicCategory element(s) - -the_files <- dir(mydir) -bad_enums <- the_files[which(sapply(the_files, test_has_bad_enum))] -} diff --git a/man/theme_packages.Rd b/man/theme_packages.Rd deleted file mode 100644 index e95e947..0000000 --- a/man/theme_packages.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/marking.R -\name{theme_packages} -\alias{theme_packages} -\title{marking.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>} -\usage{ -theme_packages(inventory, nfiles_cutoff = 100) -} -\arguments{ -\item{inventory}{(data.frame) An Inventory.} - -\item{nfiles_cutoff}{(integer) Number of cutoff files} -} -\value{ -(data.frame) An Inventory. -} -\description{ -R commands for marking datasets before adding. -Divide packages and their files into themes. -} -\details{ -Themes divide packages into groups based upon how the actions we will take -to insert them. Packages are divided into one of three themes: - -"many-files" - -The package has more files than we'd like to include in a Resource Map - and we will want to archive its contents before inserting. - -"has-versions" - -The package has version information embedded into its folder structure. - These packages will be hand-verified and inserted manually when a plan - is developed to insert them. - -"ready-to-go" - -All other packages not in the above themes. - -Note: Adds a 'theme' column to 'inventory'. -Note: Depeneds on the following columns: - -- filename - - package_nfiles -} diff --git a/man/update_object.Rd b/man/update_object.Rd index 7c525f4..edac8a0 100644 --- a/man/update_object.Rd +++ b/man/update_object.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/editing.R \name{update_object} \alias{update_object} -\title{Update an object with a new file.} +\title{Update an object with a new file} \usage{ update_object(mn, pid, path, format_id = NULL, new_pid = NULL, sid = NULL) @@ -14,17 +14,20 @@ update_object(mn, pid, path, format_id = NULL, new_pid = NULL, \item{path}{(character) The full path to the file to update with.} -\item{format_id}{(character) Optional. The format ID to set for the object. When not set, \code{\link{guess_format_id}} will be used to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}.} +\item{format_id}{(character) Optional. The format ID to set for the object. +When not set, \code{\link[=guess_format_id]{guess_format_id()}} will be used to guess the format ID. +Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}.} -\item{new_pid}{(character) Optional. Specify the PID for the new Object. Defaults to automatically generating a new, random UUID-style PID.} +\item{new_pid}{(character) Optional. Specify the PID for the new object. +Defaults to automatically generating a new, random UUID-style PID.} -\item{sid}{(character) Optiona. Specify a Series ID (SID) to use for the new Object.} +\item{sid}{(character) Optional. Specify a Series ID (SID) to use for the new object.} } \value{ (character) The PID of the updated object. } \description{ -This is a convenience wrapper around `dataone::updateObject` which copies in +This is a convenience wrapper around \code{\link[dataone:updateObject]{dataone::updateObject()}} which copies in fields from the old object's System Metadata such as the rightsHolder and accessPolicy and updates only what needs to be changed. } diff --git a/man/update_package.Rd b/man/update_package.Rd deleted file mode 100644 index e30259e..0000000 --- a/man/update_package.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{update_package} -\alias{update_package} -\title{Update a package with modified metadata.} -\usage{ -update_package(inventory, package, env = NULL) -} -\arguments{ -\item{inventory}{(data.frame) An inventory.} - -\item{package}{(character) The package identifier.} - -\item{env}{(character) Environment} -} -\value{ -TRUE or FALSE depending on sucess (logical) -} -\description{ -The modified metadata should be set in the `env` variable. For example, if -your original metadata is: -} -\details{ -/home/you/originals/dir/a.xml - -and your modified metadata is in - -/home/someone_else/modified/dir/a.xml - -Then your env should be: - -env$base_path <- "/home/you/" -env$alternate_path <- "/home/someone_else" - -Note that the data files are not updated either so all that's happening is -the metadata object and resource map are being updated. - -Note that this function checks if the old objects (metadata and resource map) -exist on the Member Node before doing their work and will call createObject() -instead of updateObject() if the object didn't already exist. -} diff --git a/man/update_package_object.Rd b/man/update_package_object.Rd index 0668b9f..10dda82 100644 --- a/man/update_package_object.Rd +++ b/man/update_package_object.Rd @@ -17,20 +17,20 @@ update_package_object(mn, data_pid, new_data_path, resource_map_pid, \item{resource_map_pid}{(character) PID for resource map to update.} \item{format_id}{(character) Optional. The format ID to set for the object. -When not set, \code{\link{guess_format_id}} will be used +When not set, \code{\link[=guess_format_id]{guess_format_id()}} will be used to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}.} -\item{public}{(logical) Optional. Make the update public. If FALSE, +\item{public}{(logical) Optional. Make the update public. If \code{FALSE}, will set the metadata and resource map to private (but not the data objects). This applies to the new metadata PID and its resource map and data object. Access policies are not affected.} -\item{use_doi}{(logical) Optional. If TRUE, a new DOI will be minted.} +\item{use_doi}{(logical) Optional. If \code{TRUE}, a new DOI will be minted.} -\item{...}{Other arguments to pass into \code{\link{publish_update}}.} +\item{...}{Other arguments to pass into \code{\link[=publish_update]{publish_update()}}.} } \value{ -PIDs (character) Named character vector of PIDs in the data package, including PIDs +(character) Named character vector of PIDs in the data package, including PIDs for the metadata, resource map, and data objects. } \description{ @@ -38,8 +38,7 @@ This function updates a data object and then automatically updates the package resource map with the new data PID. If an object already has a \code{dataTable}, \code{otherEntity}, or \code{spatialVector} with a working physical section, the EML will be updated with the new physical. -It is a convenience wrapper around \code{\link{update_object}} -and \code{\link{publish_update}}. +It is a convenience wrapper around \code{\link[=update_object]{update_object()}} and \code{\link[=publish_update]{publish_update()}}. } \examples{ \dontrun{ @@ -53,5 +52,6 @@ update_package_object(mnTest, pkg$data[1], "new_file.csv", pkg$resource_map, for file.remove("new_file.csv") } } -\keyword{publish_update} -\keyword{update_object} +\seealso{ +\code{\link[=update_object]{update_object()}} \code{\link[=publish_update]{publish_update()}} +} diff --git a/man/update_physical.Rd b/man/update_physical.Rd deleted file mode 100644 index 26ebe8c..0000000 --- a/man/update_physical.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/editing.R -\name{update_physical} -\alias{update_physical} -\title{Update physical of an updated data object} -\usage{ -update_physical(eml, mn, data_pid, new_data_pid) -} -\arguments{ -\item{eml}{(eml) An EML class object.} - -\item{mn}{(MNode) The Member Node of the data package.} - -\item{data_pid}{(character) The identifier of the data object to be updated.} - -\item{new_data_pid}{(character) The new identifier of the updated data object.} -} -\description{ -This function updates the EML with the new physical -of a data object once it has been updated. -This is a helper function for \code{\link{update_package_object}}. -} diff --git a/man/update_resource_map.Rd b/man/update_resource_map.Rd index 96e4ac4..e47a9da 100644 --- a/man/update_resource_map.Rd +++ b/man/update_resource_map.Rd @@ -2,61 +2,57 @@ % Please edit documentation in R/editing.R \name{update_resource_map} \alias{update_resource_map} -\title{Update an existing resource map Object on a Member Node.} +\title{Update an existing resource map object on a Member Node} \usage{ update_resource_map(mn, resource_map_pid, metadata_pid, data_pids = NULL, child_pids = NULL, other_statements = NULL, identifier = NULL, public = FALSE, check_first = TRUE) } \arguments{ -\item{mn}{(MNode) The Member Node} +\item{mn}{(MNode) The Member Node.} \item{resource_map_pid}{(character) The PID of the resource map to be updated.} -\item{metadata_pid}{(character) The PID of the metadata object to go in the -package.} +\item{metadata_pid}{(character) The PID of the metadata object to go in the package.} -\item{data_pids}{(character) The PID(s) of the data objects to go in the -package.} +\item{data_pids}{(character) The PID(s) of the data objects to go in the package.} -\item{child_pids}{child_pids (character) The resource map PIDs of the packages to be +\item{child_pids}{(character) The resource map PIDs of the packages to be nested under the package.} -\item{other_statements}{(data.frame) Extra statements to add to the Resource Map.} +\item{other_statements}{(data.frame) Extra statements to add to the resource map.} \item{identifier}{(character) Manually specify the identifier for the new metadata object.} -\item{public}{Whether or not to make the new resource map public read -(logical)} +\item{public}{(logical) Whether or not to make the new resource map public read.} \item{check_first}{(logical) Optional. Whether to check the PIDs passed in as -aruments exist on the MN before continuing. This speeds up the function, -especially when `data_pids` has many elements.} +arguments exist on the MN before continuing. This speeds up the function, +especially when \code{data_pids} has many elements.} } \value{ -pid (character) Updated resource map PID. +(character) The PID of the updated resource map. } \description{ This function first generates a new resource map RDF/XML document locally and -then uses the dataone::updateObject function to update an Object on the +then uses the \code{\link[dataone:updateObject]{dataone::updateObject()}} function to update an object on the specified MN. } \details{ -If you only want to generate resource map RDF/XML, see -\code{\link{generate_resource_map}}. +If you only want to generate resource map RDF/XML, see \code{\link[=generate_resource_map]{generate_resource_map()}}. -This function also can be used to be used to add a new child packages to a -parent package. For exmaple, if you have: +This function also can be used to add a new child packages to a +parent package. For example, if you have: Parent A B -and want to add C as a sibling package to A and B, e.g. +and want to add C as a sibling package to A and B, e.g.: Parent A B C -you could use this function. +then you could use this function. -Note: This function currently replaces the rightsHolder on the Resource Map +Note: This function currently replaces the rightsHolder on the resource map temporarily to allow updating but sets it back to the rightsHolder that was in place before the update. } @@ -70,7 +66,6 @@ meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") - rm_new <- update_resource_map(mn, rm_pid, meta_pid, data_pids) } } diff --git a/man/validate_environment.Rd b/man/validate_environment.Rd deleted file mode 100644 index b0508c5..0000000 --- a/man/validate_environment.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{validate_environment} -\alias{validate_environment} -\title{Validate an environment.} -\usage{ -validate_environment(env) -} -\arguments{ -\item{env}{(character) An environment} -} -\description{ -Validate an environment. -} diff --git a/man/validate_inventory.Rd b/man/validate_inventory.Rd deleted file mode 100644 index 6b9b953..0000000 --- a/man/validate_inventory.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{validate_inventory} -\alias{validate_inventory} -\title{Validate an Inventory.} -\usage{ -validate_inventory(inventory) -} -\arguments{ -\item{inventory}{(data.frame) An Inventory.} -} -\description{ -Validate an Inventory. -} diff --git a/man/view_packages.Rd b/man/view_packages.Rd deleted file mode 100644 index ccde86b..0000000 --- a/man/view_packages.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/interactive.R -\name{view_packages} -\alias{view_packages} -\title{interactive.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>} -\usage{ -view_packages(inventory) -} -\arguments{ -\item{inventory}{(character) An inventory} -} -\description{ -Functions for interactive viewing of the Inventory and other objects. -} diff --git a/man/view_profile.Rd b/man/view_profile.Rd index f044599..605fd21 100644 --- a/man/view_profile.Rd +++ b/man/view_profile.Rd @@ -10,28 +10,30 @@ view_profile(mn, subject, fields = c("identifier", "title")) \item{mn}{(MNode) The Member Node to query.} \item{subject}{(character) The subject to find the datasets for. This is -likely going to be your ORCID, e.g. http://orcid.org....} +likely going to be an ORCID, e.g. http://orcid.org....} \item{fields}{(character) A vector of Solr fields to return.} } \value{ -(data.frame) data.frame with the results. +(data.frame) A data.frame with the results. } \description{ -This function is intended to be (poorly) simulate what a user sees when they +This function is intended to (poorly) simulate what a user sees when they browse to their "My Data Sets" page (their #profile URL). It uses a similar -Solr to what Metacat UI uses to generate the list. The results of this -function may be the same as what's on the #profile page but may be missing -some of the user's datasets when: +Solr query to what Metacat UI uses to generate the list. } \details{ -- The user has any datasets in their #profile that the person running the +The results of this function may be the same as what's on the #profile page +but may be missing some of the user's datasets when: +\itemize{ +\item The user has any datasets in their #profile that the person running the query (you) can't \code{read}. This is rare on arcticdata.io but possible because arctic-data-admins usually has read/write/changePermission permissions on every object. -- The user has datasets owned by an Equivalent Identity of the \code{subject} +\item The user has datasets owned by an Equivalent Identity of the \code{subject} being queried. This is rare, especially on arcticdata.io. } +} \examples{ \dontrun{ options(...set...your...token....) diff --git a/man/warn_current_version.Rd b/man/warn_current_version.Rd deleted file mode 100644 index e9ff0a7..0000000 --- a/man/warn_current_version.Rd +++ /dev/null @@ -1,13 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{warn_current_version} -\alias{warn_current_version} -\title{Warns if the currently-installed version of the package is not the same -version as the latest release on GitHub.} -\usage{ -warn_current_version() -} -\description{ -Warns if the currently-installed version of the package is not the same -version as the latest release on GitHub. -} diff --git a/man/which_in_eml.Rd b/man/which_in_eml.Rd index 4e56266..d1efe04 100644 --- a/man/which_in_eml.Rd +++ b/man/which_in_eml.Rd @@ -7,14 +7,16 @@ which_in_eml(eml_list, element, test) } \arguments{ -\item{eml_list}{(S4/List) an EML list object} +\item{eml_list}{(S4/List) An EML list object.} -\item{element}{(character) element to evaluate} +\item{element}{(character) Element to evaluate.} -\item{test}{(function/character) A function to evaluate (see examples). If test is a character, will evaluate if \code{element == test} (see example 1).} +\item{test}{(function/character) A function to evaluate (see examples). If test is a character, +will evaluate if \code{element == test} (see example 1).} } \description{ -This function returns indices within an EML list that contain an instance where \code{test == TRUE}. See examples for more information. +This function returns indices within an EML list that contain an instance where +\code{test == TRUE}. See examples for more information. } \examples{ \dontrun{ @@ -38,4 +40,3 @@ n <- which_in_eml(eml@dataset@dataTable, "numberType", function(x) {"natural" \% \author{ Mitchell Maier mitchell.maier@gmail.com } -\keyword{eml} From 79468d70a783cf597418e9e2dd16f41a029fbb37 Mon Sep 17 00:00:00 2001 From: Derek Strong <dstrong@nceas.ucsb.edu> Date: Wed, 7 Nov 2018 17:03:42 -0800 Subject: [PATCH 177/318] Update tests --- R/dataone.R | 2 +- tests/testthat/test_access.R | 2 +- tests/testthat/test_dataone.R | 2 +- tests/testthat/test_dataone_formats.R | 7 ------- tests/testthat/test_editing.R | 5 +---- tests/testthat/test_eml.R | 12 ++++++------ tests/testthat/test_environment.R | 8 +------- tests/testthat/test_formats.R | 8 +++++++- tests/testthat/test_helpers.R | 26 -------------------------- tests/testthat/test_inventory.R | 9 +++------ tests/testthat/test_packaging.R | 4 +--- tests/testthat/test_sysmeta.R | 24 +++++++++++++++++++++++- tests/testthat/test_util.R | 4 +--- 13 files changed, 46 insertions(+), 67 deletions(-) delete mode 100644 tests/testthat/test_dataone_formats.R diff --git a/R/dataone.R b/R/dataone.R index 3f6229b..3a6cb77 100644 --- a/R/dataone.R +++ b/R/dataone.R @@ -1,4 +1,4 @@ -# Helper functions for the DataONE R package +# Helper functions for the dataone package #' Test whether a token is set diff --git a/tests/testthat/test_access.R b/tests/testthat/test_access.R index ed3d865..5430a6d 100644 --- a/tests/testthat/test_access.R +++ b/tests/testthat/test_access.R @@ -1,4 +1,4 @@ -context("access") +context("Access rules") mn <- env_load()$mn diff --git a/tests/testthat/test_dataone.R b/tests/testthat/test_dataone.R index 03622b3..6caa830 100644 --- a/tests/testthat/test_dataone.R +++ b/tests/testthat/test_dataone.R @@ -1,4 +1,4 @@ -context("dataone") +context("Helpers for the dataone package") node <- env_load()$mn diff --git a/tests/testthat/test_dataone_formats.R b/tests/testthat/test_dataone_formats.R deleted file mode 100644 index 5dc6ce0..0000000 --- a/tests/testthat/test_dataone_formats.R +++ /dev/null @@ -1,7 +0,0 @@ -context("formats") - -test_that("a format can be returned", { - fmt <- format_eml() - expect_is(fmt, "character") - expect_gt(length(fmt), 0) -}) diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index 7811796..bcda829 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -1,4 +1,4 @@ -context("editing") +context("Editing and managing data packages") mn <- env_load()$mn @@ -345,7 +345,6 @@ test_that("update_physical works", { expect_equal(sum(stringr::str_detect(url_new, pkg$data[4])), 1) }) - test_that("update_package_object changes specified data object and rest of package is intact", { if (!is_token_set(mn)) { skip("No token set. Skipping test.") @@ -396,7 +395,6 @@ test_that("update_package_object changes specified data object and rest of packa expect_true(stringr::str_detect(url_new[2], new_data_pid)) }) - test_that("update_package_object errors if wrong input", { if (!is_token_set(mn)) { skip("No token set. Skipping test.") @@ -431,7 +429,6 @@ test_that("update_package_object errors if wrong input", { file.remove(file_path) }) - test_that("update_package_object updates EML", { if (!is_token_set(mn)) { skip("No token set. Skipping test.") diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 1530520..9c7db2d 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -53,7 +53,7 @@ test_that("a contact can be created", { }) test_that("a personnel can be created", { - personnel <- eml_personnel(given_names="test", sur_name="user", role="principalInvestigator") + personnel <- eml_personnel(given_names = "test", sur_name = "user", role = "principalInvestigator") expect_is(personnel, "personnel") expect_equal(personnel@individualName[[1]]@givenName[[1]]@.Data, "test") @@ -62,7 +62,7 @@ test_that("a personnel can be created", { }) test_that("a project can be created", { - test_personnel_1 <- eml_personnel(given_names="A", sur_name="User", organization="NCEAS", role="originator") + test_personnel_1 <- eml_personnel(given_names = "A", sur_name = "User", organization = "NCEAS", role = "originator") project <- eml_project("some title", list(test_personnel_1), @@ -79,8 +79,8 @@ test_that("a project can be created", { }) test_that("a project can be created with multiple personnel, an abstract can be created with multiple paragraphs, awards with multiple awards", { - test_personnel_1 <- eml_personnel(given_names="A", sur_name="User", organization="NCEAS", role="originator") - test_personnel_2 <- eml_personnel(given_names="Testy", sur_name="Mactesterson", organization="A Test Org", role=c("user", "author")) + test_personnel_1 <- eml_personnel(given_names = "A", sur_name = "User", organization = "NCEAS", role = "originator") + test_personnel_2 <- eml_personnel(given_names = "Testy", sur_name = "Mactesterson", organization = "A Test Org", role = c("user", "author")) project <- eml_project("some title", list(test_personnel_1, test_personnel_2), @@ -185,7 +185,7 @@ test_that("eml_otherEntity_to_dataTable fails gracefully", { expect_equivalent(otherEntity@physical, eml@dataset@dataTable[[1]]@physical) }) -test_that("which_in_eml Returns correct locations", { +test_that("which_in_eml returns correct locations", { if (!is_token_set(mn)) { skip("No token set. Skipping test.") } @@ -303,7 +303,7 @@ test_that('eml_set_shared_attributes creates shared attribute references', { expect_true(EML::eml_validate(doc)) }) -test_that('eml_party creates multiple giveName, organizationName, and positionName fields', { +test_that('eml_party creates multiple givenName, organizationName, and positionName fields', { creator <- eml_party('creator', c('John', 'and Jack'), 'Smith', c('NCEAS', 'UCSB'), c('Programmers', 'brothers')) diff --git a/tests/testthat/test_environment.R b/tests/testthat/test_environment.R index 20b45b9..ba42e7a 100644 --- a/tests/testthat/test_environment.R +++ b/tests/testthat/test_environment.R @@ -1,8 +1,4 @@ -#' test_environment.R -#' -#' Test functions related to loading handling application environment. - -context("environment") +context("Environment") test_that("can load a simple environment file", { x <- yaml::yaml.load_file(system.file("./environment.yml", package = "arcticdatautils")) @@ -11,14 +7,12 @@ test_that("can load a simple environment file", { expect_true(length(setdiff(c("development", "test", "production"), names(x))) == 0) }) - test_that("an environment string can be returned", { expect_is(env_get(), "character") expect_true(nchar(env_get()) > 0) }) test_that("can correctly load the environment", { - # Defaults to development if the env var isn't found Sys.setenv("ARCTICDATA_ENV" = "") expect_true(env_get() == "development") diff --git a/tests/testthat/test_formats.R b/tests/testthat/test_formats.R index 98d3aee..5988571 100644 --- a/tests/testthat/test_formats.R +++ b/tests/testthat/test_formats.R @@ -1,6 +1,12 @@ -context("formats") +context("Formats") test_that("valid formats are valid and invalid ones are not", { expect_true(check_format("text/csv")) expect_error(check_format("badformat")) }) + +test_that("a format can be returned", { + fmt <- format_eml() + expect_is(fmt, "character") + expect_gt(length(fmt), 0) +}) diff --git a/tests/testthat/test_helpers.R b/tests/testthat/test_helpers.R index 4c1d7ef..1c2472f 100644 --- a/tests/testthat/test_helpers.R +++ b/tests/testthat/test_helpers.R @@ -1,5 +1,3 @@ -#' test_helpers.R - context("Helpers") mn <- env_load()$mn @@ -18,7 +16,6 @@ test_that("a dummy package can be created", { expect_true(object_exists(mn, result$resource_map)) }) - test_that("create_dummy_package_full errors if wrong input", { if (!is_token_set(mn)) { skip("No token set. Skipping test.") @@ -27,26 +24,3 @@ test_that("create_dummy_package_full errors if wrong input", { expect_error(create_dummy_package_full(mn, title = 11)) expect_error(create_dummy_package_full("mn")) }) - - -test_that("all system metadata is retrieved", { - cn_staging <- CNode("STAGING") - adc_test <- getMNode(cn_staging, "urn:node:mnTestARCTIC") - - rm_pid <- "resource_map_urn:uuid:3e3bb5de-ec63-4f13-a549-813f0cf28610" - - expect_error(get_all_sysmeta(7, "")) - expect_error(get_all_sysmeta(adc_test, "")) - expect_error(get_all_sysmeta(adc_test, "urn:uuid:3e3bb5de-ec63-4f13-a549-813f0cf28610")) - expect_error(get_all_sysmeta(adc_test, rm_pid, nmax = -7)) - expect_error(get_all_sysmeta(adc_test, rm_pid, child_packages = 7)) - - all <- get_all_sysmeta(adc_test, rm_pid) - - expect_message(get_all_sysmeta(adc_test, rm_pid)) - expect_type(all, "list") - expect_length(all, 5) - expect_equal(names(all)[1], "dummy_resource_map.xml") - - expect_message(get_all_sysmeta(adc_test, "resource_map_urn:uuid:924f81f6-2e68-4eb8-925f-53f5b66318ec")) -}) diff --git a/tests/testthat/test_inventory.R b/tests/testthat/test_inventory.R index f3269b4..cac897f 100644 --- a/tests/testthat/test_inventory.R +++ b/tests/testthat/test_inventory.R @@ -1,6 +1,4 @@ -#' test_inventory.R - -context("inventory") +context("Inventory") test_that("an inventory can be created correctly", { x <- inv_init() @@ -8,10 +6,9 @@ test_that("an inventory can be created correctly", { expect_true(nrow(x) == 0) }) - test_that("an inventory can be updated with new information", { - test_inv <- data.frame(file = "A", pid="", created = FALSE, stringsAsFactors = FALSE) - new_inv <- data.frame(file = "A", pid="pidA", created = TRUE, stringsAsFactors = FALSE) + test_inv <- data.frame(file = "A", pid = "", created = FALSE, stringsAsFactors = FALSE) + new_inv <- data.frame(file = "A", pid = "pidA", created = TRUE, stringsAsFactors = FALSE) result <- inv_update(test_inv, new_inv) expect_true(result[1,"pid"] == "pidA") diff --git a/tests/testthat/test_packaging.R b/tests/testthat/test_packaging.R index 6bb9891..b65a06b 100644 --- a/tests/testthat/test_packaging.R +++ b/tests/testthat/test_packaging.R @@ -1,6 +1,4 @@ -#' test_packaging.R - -context("packaging") +context("Packaging") test_that("child pids are correctly determined", { inventory <- data.frame(pid = c("A", "B", "C"), diff --git a/tests/testthat/test_sysmeta.R b/tests/testthat/test_sysmeta.R index 798c57d..fc1cac9 100644 --- a/tests/testthat/test_sysmeta.R +++ b/tests/testthat/test_sysmeta.R @@ -1,4 +1,4 @@ -context("sysmeta") +context("System metadata") test_that("the replication policy gets cleared", { library(datapack) @@ -22,3 +22,25 @@ test_that("the replication policy gets defaulted correctly", { expect_equal(sysmeta@numberReplicas, 0) expect_identical(sysmeta@blockedNodes, list("urn:node:KNB", "urn:node:mnUCSB1")) }) + +test_that("all system metadata is retrieved", { + cn_staging <- CNode("STAGING") + adc_test <- getMNode(cn_staging, "urn:node:mnTestARCTIC") + + rm_pid <- "resource_map_urn:uuid:3e3bb5de-ec63-4f13-a549-813f0cf28610" + + expect_error(get_all_sysmeta(7, "")) + expect_error(get_all_sysmeta(adc_test, "")) + expect_error(get_all_sysmeta(adc_test, "urn:uuid:3e3bb5de-ec63-4f13-a549-813f0cf28610")) + expect_error(get_all_sysmeta(adc_test, rm_pid, nmax = -7)) + expect_error(get_all_sysmeta(adc_test, rm_pid, child_packages = 7)) + + all <- get_all_sysmeta(adc_test, rm_pid) + + expect_message(get_all_sysmeta(adc_test, rm_pid)) + expect_type(all, "list") + expect_length(all, 5) + expect_equal(names(all)[1], "dummy_resource_map.xml") + + expect_message(get_all_sysmeta(adc_test, "resource_map_urn:uuid:924f81f6-2e68-4eb8-925f-53f5b66318ec")) +}) diff --git a/tests/testthat/test_util.R b/tests/testthat/test_util.R index 5d70859..3ee1f61 100644 --- a/tests/testthat/test_util.R +++ b/tests/testthat/test_util.R @@ -1,6 +1,4 @@ -#' test_util.R - -context("util") +context("Utilities") test_that("paths can be joined", { expect_equal(path_join(""), "") From a1ee812abc45f3a92952269391b6e09ec8244bc9 Mon Sep 17 00:00:00 2001 From: Derek Strong <dstrong@nceas.ucsb.edu> Date: Wed, 7 Nov 2018 18:32:53 -0800 Subject: [PATCH 178/318] Update pkgdown site --- .Rbuildignore | 4 +- README.md | 2 +- _pkgdown.yml | 4 + docs/LICENSE | 201 --- .../{overview.html => a-overview.html} | 82 +- .../{basic-usage.html => b-basic-usage.html} | 91 +- docs/articles/index.html | 102 -- docs/authors.html | 87 +- docs/docsearch.css | 148 +++ docs/docsearch.js | 85 ++ docs/index.html | 108 +- docs/pkgdown.css | 153 ++- docs/pkgdown.js | 114 +- docs/pkgdown.yml | 7 + docs/reference/add_methods_step.html | 86 +- docs/reference/arcticdatautils.html | 155 +++ docs/reference/clear_methods.html | 85 +- docs/reference/convert_iso_to_eml.html | 81 +- .../create_dummy_attributes_dataframe.html | 181 +++ ...eate_dummy_enumeratedDomain_dataframe.html | 177 +++ docs/reference/create_dummy_metadata.html | 92 +- docs/reference/create_dummy_object.html | 92 +- docs/reference/create_dummy_package.html | 94 +- docs/reference/create_dummy_package_full.html | 180 +++ .../create_dummy_parent_package.html | 89 +- docs/reference/create_resource_map.html | 112 +- docs/reference/eml_abstract.html | 178 +++ docs/reference/eml_add_entities.html | 124 +- docs/reference/eml_address.html | 86 +- docs/reference/eml_associated_party.html | 105 +- docs/reference/eml_contact.html | 114 +- docs/reference/eml_creator.html | 110 +- docs/reference/eml_geographic_coverage.html | 194 +++ docs/reference/eml_individual_name.html | 81 +- docs/reference/eml_metadata_provider.html | 99 +- .../eml_otherEntity_to_dataTable.html | 195 +++ docs/reference/eml_party.html | 226 ++++ docs/reference/eml_personnel.html | 185 +++ docs/reference/eml_project.html | 127 +- docs/reference/eml_set_reference.html | 200 +++ docs/reference/eml_set_shared_attributes.html | 198 +++ docs/reference/eml_validate_attributes.html | 101 +- docs/reference/env_get.html | 76 +- docs/reference/find_newest_object.html | 92 +- docs/reference/format_eml.html | 86 +- docs/reference/format_iso.html | 87 +- docs/reference/generate_resource_map.html | 110 +- docs/reference/get_all_sysmeta.html | 205 +++ docs/reference/get_all_versions.html | 86 +- docs/reference/get_mn_base_url.html | 91 +- docs/reference/get_ncdf4_attributes.html | 80 +- docs/reference/get_package.html | 96 +- docs/reference/get_token.html | 86 +- docs/reference/guess_format_id.html | 82 +- docs/reference/index.html | 1157 +++++++---------- docs/reference/is_authorized.html | 92 +- docs/reference/is_obsolete.html | 87 +- docs/reference/is_public_read.html | 192 +++ docs/reference/is_token_expired.html | 98 +- docs/reference/is_token_set.html | 89 +- docs/reference/mdq_run.html | 187 +++ docs/reference/new_uuid.html | 78 +- docs/reference/object_exists.html | 95 +- docs/reference/parse_resource_map.html | 94 +- docs/reference/pid_to_eml_entity.html | 194 +++ docs/reference/pid_to_eml_physical.html | 184 +++ docs/reference/publish_object.html | 116 +- docs/reference/publish_update.html | 135 +- docs/reference/remove_public_read.html | 90 +- docs/reference/set_abstract.html | 93 +- docs/reference/set_access.html | 101 +- docs/reference/set_file_name.html | 88 +- docs/reference/set_public_read.html | 96 +- docs/reference/set_rights_and_access.html | 104 +- docs/reference/set_rights_holder.html | 105 +- docs/reference/show_indexing_status.html | 184 +++ docs/reference/sysmeta_to_eml_physical.html | 99 +- docs/reference/update_object.html | 102 +- docs/reference/update_package_object.html | 232 ++++ docs/reference/update_resource_map.html | 143 +- docs/reference/view_profile.html | 117 +- docs/reference/which_in_eml.html | 199 +++ index.md | 24 + vignettes/a-overview.Rmd | 23 + .../{basic-usage.Rmd => b-basic-usage.Rmd} | 0 vignettes/overview.Rmd | 23 - 86 files changed, 8595 insertions(+), 2138 deletions(-) create mode 100644 _pkgdown.yml delete mode 100644 docs/LICENSE rename docs/articles/{overview.html => a-overview.html} (50%) rename docs/articles/{basic-usage.html => b-basic-usage.html} (62%) delete mode 100644 docs/articles/index.html create mode 100644 docs/docsearch.css create mode 100644 docs/docsearch.js create mode 100644 docs/pkgdown.yml create mode 100644 docs/reference/arcticdatautils.html create mode 100644 docs/reference/create_dummy_attributes_dataframe.html create mode 100644 docs/reference/create_dummy_enumeratedDomain_dataframe.html create mode 100644 docs/reference/create_dummy_package_full.html create mode 100644 docs/reference/eml_abstract.html create mode 100644 docs/reference/eml_geographic_coverage.html create mode 100644 docs/reference/eml_otherEntity_to_dataTable.html create mode 100644 docs/reference/eml_party.html create mode 100644 docs/reference/eml_personnel.html create mode 100644 docs/reference/eml_set_reference.html create mode 100644 docs/reference/eml_set_shared_attributes.html create mode 100644 docs/reference/get_all_sysmeta.html create mode 100644 docs/reference/is_public_read.html create mode 100644 docs/reference/mdq_run.html create mode 100644 docs/reference/pid_to_eml_entity.html create mode 100644 docs/reference/pid_to_eml_physical.html create mode 100644 docs/reference/show_indexing_status.html create mode 100644 docs/reference/update_package_object.html create mode 100644 docs/reference/which_in_eml.html create mode 100644 index.md create mode 100644 vignettes/a-overview.Rmd rename vignettes/{basic-usage.Rmd => b-basic-usage.Rmd} (100%) delete mode 100644 vignettes/overview.Rmd diff --git a/.Rbuildignore b/.Rbuildignore index fc02950..f71e5af 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,6 +2,8 @@ ^\.Rproj\.user$ ^etc$ ^docs$ +^\_pkgdown\.yml$ +^index\.md$ ^\.travis\.yml$ -MAINTENANCE.md ^LICENSE$ +^MAINTENANCE\.md$ diff --git a/README.md b/README.md index 9ea0963..6a6687f 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Travis build status](https://travis-ci.org/NCEAS/arcticdatautils.svg?branch=master)](https://travis-ci.org/NCEAS/arcticdatautils) -The `arcticadatautils` package contains code for doing lots of useful stuff that's too specific for the [dataone](https://github.com/DataONEorg/rdataone) package: +The `arcticdatautils` package contains code for doing lots of useful stuff that's too specific for the [dataone](https://github.com/DataONEorg/rdataone) package: - Inserting large numbers of files into a Metacat Member Node - High-level [dataone](https://github.com/DataONEorg/rdataone) wrappers for working with Objects and Data Packages that streamline Arctic Data Center operations diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..3b06c25 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,4 @@ +navbar: + right: + - icon: fa-github + href: https://github.com/NCEAS/arcticdatautils diff --git a/docs/LICENSE b/docs/LICENSE deleted file mode 100644 index 5e0fd33..0000000 --- a/docs/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ -Apache License -Version 2.0, January 2004 -http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - -"License" shall mean the terms and conditions for use, reproduction, -and distribution as defined by Sections 1 through 9 of this document. - -"Licensor" shall mean the copyright owner or entity authorized by -the copyright owner that is granting the License. - -"Legal Entity" shall mean the union of the acting entity and all -other entities that control, are controlled by, or are under common -control with that entity. For the purposes of this definition, -"control" means (i) the power, direct or indirect, to cause the -direction or management of such entity, whether by contract or -otherwise, or (ii) ownership of fifty percent (50%) or more of the -outstanding shares, or (iii) beneficial ownership of such entity. - -"You" (or "Your") shall mean an individual or Legal Entity -exercising permissions granted by this License. - -"Source" form shall mean the preferred form for making modifications, -including but not limited to software source code, documentation -source, and configuration files. - -"Object" form shall mean any form resulting from mechanical -transformation or translation of a Source form, including but -not limited to compiled object code, generated documentation, -and conversions to other media types. - -"Work" shall mean the work of authorship, whether in Source or -Object form, made available under the License, as indicated by a -copyright notice that is included in or attached to the work -(an example is provided in the Appendix below). - -"Derivative Works" shall mean any work, whether in Source or Object -form, that is based on (or derived from) the Work and for which the -editorial revisions, annotations, elaborations, or other modifications -represent, as a whole, an original work of authorship. For the purposes -of this License, Derivative Works shall not include works that remain -separable from, or merely link (or bind by name) to the interfaces of, -the Work and Derivative Works thereof. - -"Contribution" shall mean any work of authorship, including -the original version of the Work and any modifications or additions -to that Work or Derivative Works thereof, that is intentionally -submitted to Licensor for inclusion in the Work by the copyright owner -or by an individual or Legal Entity authorized to submit on behalf of -the copyright owner. For the purposes of this definition, "submitted" -means any form of electronic, verbal, or written communication sent -to the Licensor or its representatives, including but not limited to -communication on electronic mailing lists, source code control systems, -and issue tracking systems that are managed by, or on behalf of, the -Licensor for the purpose of discussing and improving the Work, but -excluding communication that is conspicuously marked or otherwise -designated in writing by the copyright owner as "Not a Contribution." - -"Contributor" shall mean Licensor and any individual or Legal Entity -on behalf of whom a Contribution has been received by Licensor and -subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of -this License, each Contributor hereby grants to You a perpetual, -worldwide, non-exclusive, no-charge, royalty-free, irrevocable -copyright license to reproduce, prepare Derivative Works of, -publicly display, publicly perform, sublicense, and distribute the -Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of -this License, each Contributor hereby grants to You a perpetual, -worldwide, non-exclusive, no-charge, royalty-free, irrevocable -(except as stated in this section) patent license to make, have made, -use, offer to sell, sell, import, and otherwise transfer the Work, -where such license applies only to those patent claims licensable -by such Contributor that are necessarily infringed by their -Contribution(s) alone or by combination of their Contribution(s) -with the Work to which such Contribution(s) was submitted. If You -institute patent litigation against any entity (including a -cross-claim or counterclaim in a lawsuit) alleging that the Work -or a Contribution incorporated within the Work constitutes direct -or contributory patent infringement, then any patent licenses -granted to You under this License for that Work shall terminate -as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the -Work or Derivative Works thereof in any medium, with or without -modifications, and in Source or Object form, provided that You -meet the following conditions: - -(a) You must give any other recipients of the Work or -Derivative Works a copy of this License; and - -(b) You must cause any modified files to carry prominent notices -stating that You changed the files; and - -(c) You must retain, in the Source form of any Derivative Works -that You distribute, all copyright, patent, trademark, and -attribution notices from the Source form of the Work, -excluding those notices that do not pertain to any part of -the Derivative Works; and - -(d) If the Work includes a "NOTICE" text file as part of its -distribution, then any Derivative Works that You distribute must -include a readable copy of the attribution notices contained -within such NOTICE file, excluding those notices that do not -pertain to any part of the Derivative Works, in at least one -of the following places: within a NOTICE text file distributed -as part of the Derivative Works; within the Source form or -documentation, if provided along with the Derivative Works; or, -within a display generated by the Derivative Works, if and -wherever such third-party notices normally appear. The contents -of the NOTICE file are for informational purposes only and -do not modify the License. You may add Your own attribution -notices within Derivative Works that You distribute, alongside -or as an addendum to the NOTICE text from the Work, provided -that such additional attribution notices cannot be construed -as modifying the License. - -You may add Your own copyright statement to Your modifications and -may provide additional or different license terms and conditions -for use, reproduction, or distribution of Your modifications, or -for any such Derivative Works as a whole, provided Your use, -reproduction, and distribution of the Work otherwise complies with -the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, -any Contribution intentionally submitted for inclusion in the Work -by You to the Licensor shall be under the terms and conditions of -this License, without any additional terms or conditions. -Notwithstanding the above, nothing herein shall supersede or modify -the terms of any separate license agreement you may have executed -with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade -names, trademarks, service marks, or product names of the Licensor, -except as required for reasonable and customary use in describing the -origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or -agreed to in writing, Licensor provides the Work (and each -Contributor provides its Contributions) on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -implied, including, without limitation, any warranties or conditions -of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A -PARTICULAR PURPOSE. You are solely responsible for determining the -appropriateness of using or redistributing the Work and assume any -risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, -whether in tort (including negligence), contract, or otherwise, -unless required by applicable law (such as deliberate and grossly -negligent acts) or agreed to in writing, shall any Contributor be -liable to You for damages, including any direct, indirect, special, -incidental, or consequential damages of any character arising as a -result of this License or out of the use or inability to use the -Work (including but not limited to damages for loss of goodwill, -work stoppage, computer failure or malfunction, or any and all -other commercial damages or losses), even if such Contributor -has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing -the Work or Derivative Works thereof, You may choose to offer, -and charge a fee for, acceptance of support, warranty, indemnity, -or other liability obligations and/or rights consistent with this -License. However, in accepting such obligations, You may act only -on Your own behalf and on Your sole responsibility, not on behalf -of any other Contributor, and only if You agree to indemnify, -defend, and hold each Contributor harmless for any liability -incurred by, or claims asserted against, such Contributor by reason -of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - -To apply the Apache License to your work, attach the following -boilerplate notice, with the fields enclosed by brackets "{}" -replaced with your own identifying information. (Don't include -the brackets!) The text should be enclosed in the appropriate -comment syntax for the file format. We also recommend that a -file or class name and description of purpose be included on the -same "printed page" as the copyright notice for easier -identification within third-party archives. - -Copyright {yyyy} {name of copyright owner} - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/docs/articles/overview.html b/docs/articles/a-overview.html similarity index 50% rename from docs/articles/overview.html rename to docs/articles/a-overview.html index 0ecb52a..38b50b0 100644 --- a/docs/articles/overview.html +++ b/docs/articles/a-overview.html @@ -8,14 +8,17 @@ <title>Overview • arcticdatautils - - + + + + -
+
-
+
+

2018-11-07

+ + + + +
-

-Abbreviated API overview:

+Abbreviated API overview
  • -publish_update:
  • +publish_update(): +
    • Mint a DOI for a package
    • -
    • Replace the metadata for a package, from a local file
    • +
    • Replace the metadata for a package
    • Add/remove data in a package
    • +
    +
  • -publish_object: Use before publish_update if you’re adding new data to a package.
  • +publish_object(): Use before publish_update() if you’re adding new data to a package
  • -update_resource_map: Edit the set of child packages for a package
  • +update_resource_map(): Edit the set of child packages for a package
  • -create_resource_map: Useful for creating a new package from scratch. For both project-level metadata packages or dataset-level packages.
  • +create_resource_map(): Useful for creating a new package from scratch. For both project-level metadata packages or dataset-level packages
  • -set_rights_and_access: Use this to give a user edit rights to a package
  • +set_rights_and_access(): Use this to give a user edit rights to a package

The package does way more than this but the above are the most common tasks.

-
@@ -96,11 +134,13 @@

Contents

-

Site built with pkgdown.

+

Site built with pkgdown.

+ + diff --git a/docs/articles/basic-usage.html b/docs/articles/b-basic-usage.html similarity index 62% rename from docs/articles/basic-usage.html rename to docs/articles/b-basic-usage.html index e4f54ec..c7193ed 100644 --- a/docs/articles/basic-usage.html +++ b/docs/articles/b-basic-usage.html @@ -8,14 +8,17 @@ Basic Usage • arcticdatautils - - + + + + -
+
-
+
+

2018-11-07

+ + + + +
-

Usage scenarios

For a lot of editing tasks, we’ll first want to get some variables set up. For the following use cases, we’re going to be doing something to a package, which has a metadata file with the PID ‘X’ in it. Here’s how we set that up:

# Set up your environment first
 options(dataone_test_token = "...") # Set your token here
-env <- env_load("production")
+env <- env_load("production")
 
 # Set up some variables for later
 my_eml_file <- "/path/to/the/file/on/disk/eml.xml"
-pkg <- get_package(env$mn, metadata_pid)
+pkg <- get_package(env$mn, metadata_pid)

At this point, we can do a number of things.

Use: I want to update the metadata in a package with an edited EML file I have on my computer

-
publish_update(env$mn,
-               metadata_pid = pkg$metadata,
-               resource_map_pid = pkg$resource_map,
-               data_pids = pkg$data,
+
publish_update(env$mn,
+               metadata_pid = pkg$metadata,
+               resource_map_pid = pkg$resource_map,
+               data_pids = pkg$data,
                metadata_file_path = my_eml_file)

Use: Mint a DOI for the package

-
publish_update(env$mn,
-               metadata_pid = pkg$metadata,
-               resource_map_pid = pkg$resource_map,
-               data_pids = pkg$data,
+
publish_update(env$mn,
+               metadata_pid = pkg$metadata,
+               resource_map_pid = pkg$resource_map,
+               data_pids = pkg$data,
                use_doi = TRUE)
@@ -93,18 +128,18 @@

new_data_object_pid <- publish_object(filepath = new_object_path, format_id = "text/csv") -publish_update(env$mn, - metadata_pid = pkg$metadata, - resource_map_pid = pkg$resource_map, - data_pids = c(pkg$data, newnew_data_object_pid))

-
+publish_update(env$mn, + metadata_pid = pkg$metadata, + resource_map_pid = pkg$resource_map, + data_pids = c(pkg$data, newnew_data_object_pid))
+ + diff --git a/docs/articles/index.html b/docs/articles/index.html deleted file mode 100644 index 7e02f50..0000000 --- a/docs/articles/index.html +++ /dev/null @@ -1,102 +0,0 @@ - - - - - - - - -Articles • arcticdatautils - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - -
- - - -
-
-
-

All vignettes

-

- - -
-
-
- -
- - -
-

Site built with pkgdown.

-
- -
-
- - - diff --git a/docs/authors.html b/docs/authors.html index b2479ba..0e91c9f 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -18,14 +18,24 @@ + + + + + - + + + + + + - +
@@ -68,19 +106,43 @@ -
-
+
+
  • -

    Bryce Mecum. Author, maintainer. +

    Bryce Mecum. Author, maintainer.

  • -

    Matt Jones. Contributor. +

    Matt Jones. Contributor. +

    +
  • +
  • +

    Jesse Goldstein. Contributor. +
    Maintainer

    +
  • +
  • +

    Jeanette Clark. Contributor. +
    Maintainer

    +
  • +
  • +

    Dominic Mullen. Contributor. +

    +
  • +
  • +

    Emily O'Dean. Contributor. +

    +
  • +
  • +

    Robyn Thiessen-Bock. Contributor. +

    +
  • +
  • +

    Derek Strong. Contributor.

@@ -96,11 +158,14 @@

Authors

-

Site built with pkgdown.

+

Site built with pkgdown.

+ + + diff --git a/docs/docsearch.css b/docs/docsearch.css new file mode 100644 index 0000000..e5f1fe1 --- /dev/null +++ b/docs/docsearch.css @@ -0,0 +1,148 @@ +/* Docsearch -------------------------------------------------------------- */ +/* + Source: https://github.com/algolia/docsearch/ + License: MIT +*/ + +.algolia-autocomplete { + display: block; + -webkit-box-flex: 1; + -ms-flex: 1; + flex: 1 +} + +.algolia-autocomplete .ds-dropdown-menu { + width: 100%; + min-width: none; + max-width: none; + padding: .75rem 0; + background-color: #fff; + background-clip: padding-box; + border: 1px solid rgba(0, 0, 0, .1); + box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); +} + +@media (min-width:768px) { + .algolia-autocomplete .ds-dropdown-menu { + width: 175% + } +} + +.algolia-autocomplete .ds-dropdown-menu::before { + display: none +} + +.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { + padding: 0; + background-color: rgb(255,255,255); + border: 0; + max-height: 80vh; +} + +.algolia-autocomplete .ds-dropdown-menu .ds-suggestions { + margin-top: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion { + padding: 0; + overflow: visible +} + +.algolia-autocomplete .algolia-docsearch-suggestion--category-header { + padding: .125rem 1rem; + margin-top: 0; + font-size: 1.3em; + font-weight: 500; + color: #00008B; + border-bottom: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--wrapper { + float: none; + padding-top: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { + float: none; + width: auto; + padding: 0; + text-align: left +} + +.algolia-autocomplete .algolia-docsearch-suggestion--content { + float: none; + width: auto; + padding: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--content::before { + display: none +} + +.algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { + padding-top: .75rem; + margin-top: .75rem; + border-top: 1px solid rgba(0, 0, 0, .1) +} + +.algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { + display: block; + padding: .1rem 1rem; + margin-bottom: 0.1; + font-size: 1.0em; + font-weight: 400 + /* display: none */ +} + +.algolia-autocomplete .algolia-docsearch-suggestion--title { + display: block; + padding: .25rem 1rem; + margin-bottom: 0; + font-size: 0.9em; + font-weight: 400 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--text { + padding: 0 1rem .5rem; + margin-top: -.25rem; + font-size: 0.8em; + font-weight: 400; + line-height: 1.25 +} + +.algolia-autocomplete .algolia-docsearch-footer { + width: 110px; + height: 20px; + z-index: 3; + margin-top: 10.66667px; + float: right; + font-size: 0; + line-height: 0; +} + +.algolia-autocomplete .algolia-docsearch-footer--logo { + background-image: url("data:image/svg+xml;utf8,"); + background-repeat: no-repeat; + background-position: 50%; + background-size: 100%; + overflow: hidden; + text-indent: -9000px; + width: 100%; + height: 100%; + display: block; + transform: translate(-8px); +} + +.algolia-autocomplete .algolia-docsearch-suggestion--highlight { + color: #FF8C00; + background: rgba(232, 189, 54, 0.1) +} + + +.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { + box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) +} + +.algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { + background-color: rgba(192, 192, 192, .15) +} diff --git a/docs/docsearch.js b/docs/docsearch.js new file mode 100644 index 0000000..b35504c --- /dev/null +++ b/docs/docsearch.js @@ -0,0 +1,85 @@ +$(function() { + + // register a handler to move the focus to the search bar + // upon pressing shift + "/" (i.e. "?") + $(document).on('keydown', function(e) { + if (e.shiftKey && e.keyCode == 191) { + e.preventDefault(); + $("#search-input").focus(); + } + }); + + $(document).ready(function() { + // do keyword highlighting + /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ + var mark = function() { + + var referrer = document.URL ; + var paramKey = "q" ; + + if (referrer.indexOf("?") !== -1) { + var qs = referrer.substr(referrer.indexOf('?') + 1); + var qs_noanchor = qs.split('#')[0]; + var qsa = qs_noanchor.split('&'); + var keyword = ""; + + for (var i = 0; i < qsa.length; i++) { + var currentParam = qsa[i].split('='); + + if (currentParam.length !== 2) { + continue; + } + + if (currentParam[0] == paramKey) { + keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); + } + } + + if (keyword !== "") { + $(".contents").unmark({ + done: function() { + $(".contents").mark(keyword); + } + }); + } + } + }; + + mark(); + }); +}); + +/* Search term highlighting ------------------------------*/ + +function matchedWords(hit) { + var words = []; + + var hierarchy = hit._highlightResult.hierarchy; + // loop to fetch from lvl0, lvl1, etc. + for (var idx in hierarchy) { + words = words.concat(hierarchy[idx].matchedWords); + } + + var content = hit._highlightResult.content; + if (content) { + words = words.concat(content.matchedWords); + } + + // return unique words + var words_uniq = [...new Set(words)]; + return words_uniq; +} + +function updateHitURL(hit) { + + var words = matchedWords(hit); + var url = ""; + + if (hit.anchor) { + url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; + } else { + url = hit.url + '?q=' + escape(words.join(" ")); + } + + return url; +} diff --git a/docs/index.html b/docs/index.html index e08a7fe..478ed03 100644 --- a/docs/index.html +++ b/docs/index.html @@ -5,11 +5,15 @@ -Arctic Data Utilities • arcticdatautils +Utilities for the Arctic Data Center • arcticdatautils - - + + + + @@ -24,18 +28,47 @@ - arcticdatautils + + arcticdatautils + 0.6.4 +
+
@@ -46,50 +79,55 @@
-
+
-

The articadatautils R package contains code for:

+arcticdatautils
+ +

The arcticdatautils package contains code for doing lots of useful stuff that’s too specific for the dataone package:

    -
  • Inserting large numbers of files into Metacat
  • -
  • High-level rdataone wrappers for editing objects and Data Packages
  • +
  • Inserting large numbers of files into a Metacat Member Node
  • +
  • High-level dataone wrappers for working with Objects and Data Packages that streamline Arctic Data Center operations

Note: The package is intended to be used by NCEAS staff and may not make much sense to others.

Installing

-

I recommend installing from the latest release (aka tag) instead of from master. Install from release with:

-
devtools::install_github("NCEAS/arcticdatautils", ref = "{TAG_NAME_HERE}")
+

We recommend installing from the latest release (aka tag) instead of from master. Install the latest release with the remotes package:

+
remotes::install_github("nceas/arcticdatautils@*release")

If you’re feeling adventurous, you can install from the bleeding edge:

-
devtools::install_github("NCEAS/arcticdatautils")
-
-
-

-Contributing

-

Please submit suggestions or bugs as Issues.

-
-
-

-Testing

-

Some tests are dependent on an authentication token being set and be skipped if one is not set.

-
# Skips tests that depend on a Metacat instance:
-devtools::test()
-
-# Set a token to run skipped tests:
-options(dataone_test_token = "...")
-devtools::test()
+
remotes::install_github("nceas/arcticdatautils")
@@ -99,11 +137,13 @@

Developers

-

Site built with pkgdown.

+

Site built with pkgdown.

+ + diff --git a/docs/pkgdown.css b/docs/pkgdown.css index e704e06..6ca2f37 100644 --- a/docs/pkgdown.css +++ b/docs/pkgdown.css @@ -1,13 +1,32 @@ -/* Sticker footer */ +/* Sticky footer */ + +/** + * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ + * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css + * + * .Site -> body > .container + * .Site-content -> body > .container .row + * .footer -> footer + * + * Key idea seems to be to ensure that .container and __all its parents__ + * have height set to 100% + * + */ + +html, body { + height: 100%; +} + body > .container { display: flex; - padding-top: 60px; - min-height: calc(100vh); + height: 100%; flex-direction: column; + + padding-top: 60px; } body > .container .row { - flex: 1; + flex: 1 0 auto; } footer { @@ -16,6 +35,7 @@ footer { border-top: 1px solid #e5e5e5; color: #666; display: flex; + flex-shrink: 0; } footer p { margin-bottom: 0; @@ -34,13 +54,20 @@ img.icon { float: right; } -/* Section anchors ---------------------------------*/ +img { + max-width: 100%; +} -.hasAnchor { - margin-left: -30px; +/* Typographic tweaking ---------------------------------*/ + +.contents h1.page-header { + margin-top: calc(-60px + 1em); } +/* Section anchors ---------------------------------*/ + a.anchor { + margin-left: -30px; display:inline-block; width: 30px; height: 30px; @@ -56,13 +83,31 @@ a.anchor { visibility: visible; } +@media (max-width: 767px) { + .hasAnchor:hover a.anchor { + visibility: hidden; + } +} + + /* Fixes for fixed navbar --------------------------*/ .contents h1, .contents h2, .contents h3, .contents h4 { padding-top: 60px; - margin-top: -60px; + margin-top: -40px; +} + +/* Static header placement on mobile devices */ +@media (max-width: 767px) { + .navbar-fixed-top { + position: absolute; + } + .navbar { + padding: 0; + } } + /* Sidebar --------------------------*/ #sidebar { @@ -81,10 +126,14 @@ a.anchor { margin-bottom: 0.5em; } +.orcid { + height: 16px; + vertical-align: middle; +} + /* Reference index & topics ----------------------------------------------- */ .ref-index th {font-weight: normal;} -.ref-index h2 {font-size: 20px;} .ref-index td {vertical-align: top;} .ref-index .alias {width: 40%;} @@ -107,31 +156,77 @@ table { /* Syntax highlighting ---------------------------------------------------- */ -code { - background-color: #f7f7f7; - color: #333; +pre { + word-wrap: normal; + word-break: normal; + border: 1px solid #eee; } -code a { - color: #375f84; + +pre, code { + background-color: #f8f8f8; + color: #333; } -.warning { color: red; } -.message { font-weight: bolder; } -.error { color: red; font-weight: bolder; } +pre code { + overflow: auto; + word-wrap: normal; + white-space: pre; +} -.fl,.number {color:rgb(21,20,181);} -.fu,.functioncall {color:#264D66 ;} -.ch,.st,.string {color:#375D81 ;} -.kw,.keyword {color:black;} -.argument {color:#264D66 ;} -.co,.comment {color: #777;} -.formalargs {color: #264D66;} -.eqformalargs {color:#264D66;} -.slot {font-style:italic;} -.symbol {color:black ;} -.prompt {color:black ;} +pre .img { + margin: 5px 0; +} -pre img { +pre .img img { background-color: #fff; display: block; + height: auto; +} + +code a, pre a { + color: #375f84; +} + +a.sourceLine:hover { + text-decoration: none; +} + +.fl {color: #1514b5;} +.fu {color: #000000;} /* function */ +.ch,.st {color: #036a07;} /* string */ +.kw {color: #264D66;} /* keyword */ +.co {color: #888888;} /* comment */ + +.message { color: black; font-weight: bolder;} +.error { color: orange; font-weight: bolder;} +.warning { color: #6A0366; font-weight: bolder;} + +/* Clipboard --------------------------*/ + +.hasCopyButton { + position: relative; +} + +.btn-copy-ex { + position: absolute; + right: 0; + top: 0; + visibility: hidden; +} + +.hasCopyButton:hover button.btn-copy-ex { + visibility: visible; +} + +/* mark.js ----------------------------*/ + +mark { + background-color: rgba(255, 255, 51, 0.5); + border-bottom: 2px solid rgba(255, 153, 51, 0.3); + padding: 1px; +} + +/* vertical spacing after htmlwidgets */ +.html-widget { + margin-bottom: 10px; } diff --git a/docs/pkgdown.js b/docs/pkgdown.js index c8b38c4..de9bd72 100644 --- a/docs/pkgdown.js +++ b/docs/pkgdown.js @@ -1,8 +1,110 @@ -$(function() { - $("#sidebar").stick_in_parent({offset_top: 40}); - $('body').scrollspy({ - target: '#sidebar', - offset: 60 +/* http://gregfranko.com/blog/jquery-best-practices/ */ +(function($) { + $(function() { + + $("#sidebar") + .stick_in_parent({offset_top: 40}) + .on('sticky_kit:bottom', function(e) { + $(this).parent().css('position', 'static'); + }) + .on('sticky_kit:unbottom', function(e) { + $(this).parent().css('position', 'relative'); + }); + + $('body').scrollspy({ + target: '#sidebar', + offset: 60 + }); + + $('[data-toggle="tooltip"]').tooltip(); + + var cur_path = paths(location.pathname); + var links = $("#navbar ul li a"); + var max_length = -1; + var pos = -1; + for (var i = 0; i < links.length; i++) { + if (links[i].getAttribute("href") === "#") + continue; + var path = paths(links[i].pathname); + + var length = prefix_length(cur_path, path); + if (length > max_length) { + max_length = length; + pos = i; + } + } + + // Add class to parent
  • , and enclosing
  • if in dropdown + if (pos >= 0) { + var menu_anchor = $(links[pos]); + menu_anchor.parent().addClass("active"); + menu_anchor.closest("li.dropdown").addClass("active"); + } }); -}); + function paths(pathname) { + var pieces = pathname.split("/"); + pieces.shift(); // always starts with / + + var end = pieces[pieces.length - 1]; + if (end === "index.html" || end === "") + pieces.pop(); + return(pieces); + } + + function prefix_length(needle, haystack) { + if (needle.length > haystack.length) + return(0); + + // Special case for length-0 haystack, since for loop won't run + if (haystack.length === 0) { + return(needle.length === 0 ? 1 : 0); + } + + for (var i = 0; i < haystack.length; i++) { + if (needle[i] != haystack[i]) + return(i); + } + + return(haystack.length); + } + + /* Clipboard --------------------------*/ + + function changeTooltipMessage(element, msg) { + var tooltipOriginalTitle=element.getAttribute('data-original-title'); + element.setAttribute('data-original-title', msg); + $(element).tooltip('show'); + element.setAttribute('data-original-title', tooltipOriginalTitle); + } + + if(Clipboard.isSupported()) { + $(document).ready(function() { + var copyButton = ""; + + $(".examples, div.sourceCode").addClass("hasCopyButton"); + + // Insert copy buttons: + $(copyButton).prependTo(".hasCopyButton"); + + // Initialize tooltips: + $('.btn-copy-ex').tooltip({container: 'body'}); + + // Initialize clipboard: + var clipboardBtnCopies = new Clipboard('[data-clipboard-copy]', { + text: function(trigger) { + return trigger.parentNode.textContent; + } + }); + + clipboardBtnCopies.on('success', function(e) { + changeTooltipMessage(e.trigger, 'Copied!'); + e.clearSelection(); + }); + + clipboardBtnCopies.on('error', function() { + changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); + }); + }); + } +})(window.jQuery || window.$) diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml new file mode 100644 index 0000000..a346e19 --- /dev/null +++ b/docs/pkgdown.yml @@ -0,0 +1,7 @@ +pandoc: 1.19.2.1 +pkgdown: 1.1.0 +pkgdown_sha: ~ +articles: + a-overview: a-overview.html + b-basic-usage: b-basic-usage.html + diff --git a/docs/reference/add_methods_step.html b/docs/reference/add_methods_step.html index 1c33a86..656e147 100644 --- a/docs/reference/add_methods_step.html +++ b/docs/reference/add_methods_step.html @@ -6,7 +6,7 @@ -Adds a step to the methods document — add_methods_step • arcticdatautils +Add a methods step — add_methods_step • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +
    -

    Adds a step to the methods document

    +

    Add a methods step to an EML document.

    +
    -
    add_methods_step(doc, title, description)
    +
    add_methods_step(doc, title, description)
    -

    Arguments

    +

    Arguments

    @@ -99,9 +144,15 @@

    Ar

    Value

    -

    (eml) The modified EML document

    +

    (eml) The modified EML document.

    +

    Examples

    +
    # NOT RUN {
    +eml <- read_eml("~/Documents/metadata.xml")
    +eml <- add_methods_step(eml, "Field Sampling", "Samples were
    +collected using a niskin water sampler.")
    +# }
    @@ -120,11 +173,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/arcticdatautils.html b/docs/reference/arcticdatautils.html new file mode 100644 index 0000000..501ff50 --- /dev/null +++ b/docs/reference/arcticdatautils.html @@ -0,0 +1,155 @@ + + + + + + + + +arcticdatautils: Utilities for the Arctic Data Center — arcticdatautils • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This package contains code for doing lots of useful stuff that's too specific for the +dataone package, primarily functions that streamline Arctic Data Center operations.

    + +
    + + + +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/clear_methods.html b/docs/reference/clear_methods.html index d3af406..d6e8a08 100644 --- a/docs/reference/clear_methods.html +++ b/docs/reference/clear_methods.html @@ -6,7 +6,7 @@ -Clear all methods from the document. — clear_methods • arcticdatautils +Clear all methods — clear_methods • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +
    -

    Clear all methods from the document.

    +

    Clear all methods from an EML document.

    +
    -
    clear_methods(doc)
    +
    clear_methods(doc)
    -

    Arguments

    +

    Arguments

    @@ -91,9 +136,14 @@

    Ar

    Value

    -

    (eml) The modified document.

    +

    (eml) The modified EML document.

    +

    Examples

    +
    # NOT RUN {
    +eml <- read_eml("~/Documents/metadata.xml")
    +eml <- clear_methods(eml)
    +# }
    @@ -112,11 +164,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/convert_iso_to_eml.html b/docs/reference/convert_iso_to_eml.html index 64e1b01..55ab348 100644 --- a/docs/reference/convert_iso_to_eml.html +++ b/docs/reference/convert_iso_to_eml.html @@ -6,7 +6,7 @@ -Convert and ISO document to EML using an XSLT. — convert_iso_to_eml • arcticdatautils +Convert an ISO document to EML using an XSLT — convert_iso_to_eml • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +

    Leave style=NA if you want to use the default ISO-to-EML stylesheet.

    +
    -
    convert_iso_to_eml(path, style = NA)
    +
    convert_iso_to_eml(path, style = NA)
    -

    Arguments

    +

    Arguments

    @@ -98,6 +143,11 @@

    Value

    (character) Location of the converted file.

    +

    Examples

    +
    # NOT RUN {
    +iso_path <- "~/Docuements/ISO_metadata.xml"
    +eml_path <- convert_iso_to_eml(iso_path)
    +# }
    @@ -116,11 +168,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/create_dummy_attributes_dataframe.html b/docs/reference/create_dummy_attributes_dataframe.html new file mode 100644 index 0000000..cfe18eb --- /dev/null +++ b/docs/reference/create_dummy_attributes_dataframe.html @@ -0,0 +1,181 @@ + + + + + + + + +Create test attributes data.frame — create_dummy_attributes_dataframe • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Create a test data.frame of attributes.

    + +
    + +
    create_dummy_attributes_dataframe(numberAttributes, factors = NULL)
    + +

    Arguments

    +
    + + + + + + + + + +
    numberAttributes

    (integer) Number of attributes to be created in the table.

    factors

    (character) Optional vector of factor names to include.

    + +

    Value

    + +

    (data.frame) A data.frame of attributes.

    + + +

    Examples

    +
    # NOT RUN {
    +# Create dummy attribute dataframe with 6 attributes and 1 factor
    +attributes <- create_dummy_attributes_dataframe(6, c("Factor1", "Factor2"))
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/create_dummy_enumeratedDomain_dataframe.html b/docs/reference/create_dummy_enumeratedDomain_dataframe.html new file mode 100644 index 0000000..1a1ffc7 --- /dev/null +++ b/docs/reference/create_dummy_enumeratedDomain_dataframe.html @@ -0,0 +1,177 @@ + + + + + + + + +Create test enumeratedDomain data.frame — create_dummy_enumeratedDomain_dataframe • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Create a test data.frame of enumeratedDomains.

    + +
    + +
    create_dummy_enumeratedDomain_dataframe(factors)
    + +

    Arguments

    + + + + + + +
    factors

    (character) Vector of factor names to include.

    + +

    Value

    + +

    (data.frame) A data.frame of factors.

    + + +

    Examples

    +
    # NOT RUN {
    +# Create dummy dataframe of 2 factors/enumerated domains
    +attributes <- create_dummy_enumeratedDomain_dataframe(c("Factor1", "Factor2"))
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/create_dummy_metadata.html b/docs/reference/create_dummy_metadata.html index 06d315f..7c2efab 100644 --- a/docs/reference/create_dummy_metadata.html +++ b/docs/reference/create_dummy_metadata.html @@ -6,7 +6,7 @@ -helpers.R — create_dummy_metadata • arcticdatautils +Create a test metadata object — create_dummy_metadata • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,20 +109,23 @@ -
    +
    +
    -

    Various helper functions for things like testing the package. -Create a test metadata object.

    +

    Create a test EML metadata object.

    +
    -
    create_dummy_metadata(mn, data_pids = NULL)
    +
    create_dummy_metadata(mn, data_pids = NULL)
    -

    Arguments

    +

    Arguments

    @@ -94,13 +138,28 @@

    Ar

    +

    Value

    +

    (character) PID of published metadata document.

    + + +

    Examples

    +
    # NOT RUN {
    +# Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pid <- create_dummy_metadata(mn)
    +# }
  • Value
  • + +
  • Examples
  • + @@ -111,11 +170,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/create_dummy_object.html b/docs/reference/create_dummy_object.html index 9b7af7d..0b7490f 100644 --- a/docs/reference/create_dummy_object.html +++ b/docs/reference/create_dummy_object.html @@ -6,7 +6,7 @@ -Create a test object. — create_dummy_object • arcticdatautils +Create a test object — create_dummy_object • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +
    -

    Create a test object.

    +

    Create a test data object.

    +
    -
    create_dummy_object(mn)
    +
    create_dummy_object(mn)
    -

    Arguments

    +

    Arguments

    @@ -89,13 +134,29 @@

    Ar

    +

    Value

    + +

    (character) The PID of the dummy object.

    + +

    Examples

    +
    # NOT RUN {
    +# Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +
    +pid <- create_dummy_object(mn)
    +# }
    @@ -106,11 +167,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/create_dummy_package.html b/docs/reference/create_dummy_package.html index 8dc3f51..c6025cb 100644 --- a/docs/reference/create_dummy_package.html +++ b/docs/reference/create_dummy_package.html @@ -6,7 +6,7 @@ -Create a test package. — create_dummy_package • arcticdatautils +Create a test package — create_dummy_package • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +
    -

    Create a test package.

    +

    Create a test data package.

    +
    -
    create_dummy_package(mn, size = 2)
    +
    create_dummy_package(mn, size = 2)
    -

    Arguments

    +

    Arguments

    @@ -89,17 +134,33 @@

    Ar

    - +
    size

    (numeric) The number of files in the package.

    (numeric) The number of files in the package, including the metadata file.

    +

    Value

    +

    (character) A named character vector of the data PIDs in the package.

    + + +

    Examples

    +
    # NOT RUN {
    +# Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +#Create dummy package with 5 data objects and 1 metadata object
    +pids <- create_dummy_package(mn, 6)
    +# }
    @@ -110,11 +171,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/create_dummy_package_full.html b/docs/reference/create_dummy_package_full.html new file mode 100644 index 0000000..de2b87e --- /dev/null +++ b/docs/reference/create_dummy_package_full.html @@ -0,0 +1,180 @@ + + + + + + + + +Create dummy package with fuller metadata — create_dummy_package_full • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Creates a fuller package than create_dummy_package() +but is otherwise based on the same concept. This dummy +package includes multiple data objects, responsible parties, +geographic locations, method steps, etc.

    + +
    + +
    create_dummy_package_full(mn, title = "A Dummy Package")
    + +

    Arguments

    + + + + + + + + + + +
    mn

    (MNode) The Member Node.

    title

    (character) Optional. Title of package. Defaults to "A Dummy Package".

    + +

    Value

    + +

    (list) A list of package PIDs, inluding for the resource map, metadata, and data objects.

    + + +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/create_dummy_parent_package.html b/docs/reference/create_dummy_parent_package.html index 5e97b41..cd47a79 100644 --- a/docs/reference/create_dummy_parent_package.html +++ b/docs/reference/create_dummy_parent_package.html @@ -6,7 +6,7 @@ -Create a test parent package. — create_dummy_parent_package • arcticdatautils +Create a test parent package — create_dummy_parent_package • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +
    -

    Create a test parent package.

    +

    Create a test parent data package.

    +
    -
    create_dummy_parent_package(mn, children)
    +
    create_dummy_parent_package(mn, children)
    -

    Arguments

    +

    Arguments

    @@ -93,13 +138,26 @@

    Ar

    +

    Value

    +

    pid (character) A named character vector of PIDs, including parent package and child package PIDs.

    + + +

    Examples

    +
    # NOT RUN {
    +# Set environment
    +# }
    +
    @@ -110,11 +168,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/create_resource_map.html b/docs/reference/create_resource_map.html index 683785d..3cd7788 100644 --- a/docs/reference/create_resource_map.html +++ b/docs/reference/create_resource_map.html @@ -6,7 +6,7 @@ -Create a resource map Object on a Member Node. — create_resource_map • arcticdatautils +Create a resource map object on a Member Node — create_resource_map • arcticdatautils @@ -18,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -68,22 +111,26 @@ -
    +
    +

    This function first generates a new resource map RDF/XML document locally and -then uses the dataone::createObject function to create the Object on the +then uses the dataone::createObject() function to create the object on the specified MN.

    +
    -
    create_resource_map(mn, metadata_pid, data_pids = NULL, child_pids = NULL,
    -  check_first = TRUE)
    +
    create_resource_map(mn, metadata_pid, data_pids = NULL,
    +  child_pids = NULL, check_first = TRUE, ...)
    -

    Arguments

    +

    Arguments

    @@ -92,13 +139,11 @@

    Ar

    - + - + @@ -108,21 +153,35 @@

    Ar

    +arguments exist on the MN before continuing. This speeds up the function, +especially when data_pids has many elements.

    + + + +
    metadata_pid

    (character) The PID of the metadata object to go in the -package.

    (character) The PID of the metadata object to go in the package.

    data_pids

    (character) The PID(s) of the data objects to go in the -package.

    (character) The PID(s) of the data objects to go in the package.

    child_pids
    check_first

    (logical) Optional. Whether to check the PIDs passed in as -aruments exist on the MN before continuing. This speeds up the function, -especially when `data_pids` has many elements.

    ...

    Additional arguments that can be passed into publish_object().

    Value

    -

    (character) The created resource map's PID

    +

    (character) The PID of the created resource map.

    Details

    -

    If you only want to generate resource map RDF/XML, see -generate_resource_map

    +

    If you only want to generate resource map RDF/XML, see generate_resource_map().

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode('STAGING2')
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +
    +meta_pid <- 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe'
    +dat_pid <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1',
    +'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe')
    +
    +create_resource_map(mn, metadata_pid = meta_pid, data_pids = dat_pid)
    +# }
    @@ -143,11 +204,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_abstract.html b/docs/reference/eml_abstract.html new file mode 100644 index 0000000..16caca3 --- /dev/null +++ b/docs/reference/eml_abstract.html @@ -0,0 +1,178 @@ + + + + + + + + +Create an EML abstract — eml_abstract • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Create an EML abstract.

    + +
    + +
    eml_abstract(text)
    + +

    Arguments

    + + + + + + +
    text

    (character) Paragraphs of text with one paragraph per element in the +character vector.

    + +

    Value

    + +

    (abstract) An EML abstract.

    + + +

    Examples

    +
    # Set an abstract with a single paragraph +eml_abstract("Test abstract...")
    #> <abstract>hi</abstract>
    +# Or one with multiple paragraphs +eml_abstract(c("First para...", "second para..."))
    #> <abstract/>
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/eml_add_entities.html b/docs/reference/eml_add_entities.html index 227f2d2..158f3ea 100644 --- a/docs/reference/eml_add_entities.html +++ b/docs/reference/eml_add_entities.html @@ -6,7 +6,7 @@ -Add new entity (otherEntity, dataTable, etc) elements to an EML document from a table. — eml_add_entities • arcticdatautils +Add new entity elements to an EML document from a table — eml_add_entities • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,36 +109,40 @@ -
    +
    +
    -

    Add new entity (otherEntity, dataTable, etc) elements to an EML document from a table.

    +

    Add new entity elements to an EML document from a table.

    +
    -
    eml_add_entities(doc, entities,
    +    
    eml_add_entities(doc, entities,
       resolve_base = "https://cn.dataone.org/cn/v2/resolve/")
    -

    Arguments

    +

    Arguments

    - + - + +should not override the default value.

    doc

    (eml) An EML document

    (eml) An EML document.

    entities

    (data.frame) A data.frame with columns path, pid, and -format_id

    (data.frame) A data.frame with columns type, path, pid, and +format_id.

    resolve_base

    (character) Optional. Specify a DataONE CN resolve base URI which will be used for serializing download URLs into the EML. Most users - should not override the default value.

    @@ -107,22 +152,32 @@

    Value

    Examples

    -
    # Create entities from files on disk -## Not run: ------------------------------------ -# types <- c("dataTable") -# paths <- list.files(., full.names = TRUE) # Get full paths to some files -# pids <- vapply(paths, function(x) { paste0("urn:uuid:", uuid::UUIDgenerate()) }, "") # Generate some UUID PIDs -# format_ids <- guess_format_id(paths) # Try to guess format IDs, you should check this afterwards -# -# entity_df <- data.frame(type = types, -# path = paths, -# pid = pids, -# format_id = format_ids, -# stringsAsFactors = FALSE) -# -# doc <- new("eml") -# doc <- eml_add_entities(doc, entity_df) -## ---------------------------------------------
    +
    # Create entities from files on disk +
    # NOT RUN { + types <- c("dataTable") + paths <- list.files(., full.names = TRUE) # Get full paths to some files + pids <- vapply(paths, function(x) { + paste0("urn:uuid:", uuid::UUIDgenerate()) + }, "") # Generate some UUID PIDs +Try to guess format IDs, you should check this afterwards + format_ids <- guess_format_id(paths) + + entity_df <- data.frame(type = types, + path = paths, + pid = pids, + format_id = format_ids, + stringsAsFactors = FALSE) + + doc <- new("eml") + doc <- eml_add_entities(doc, entity_df) +# } +
    +# Read in a CSV containing the info about files on disk +
    # NOT RUN { + entity_df <- read.csv("./my_entities.csv", stringsAsFactors = FALSE) + doc <- new("eml") + doc <- eml_add_entities(doc, entity_df) +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_address.html b/docs/reference/eml_address.html index 93ee051..5daf6e7 100644 --- a/docs/reference/eml_address.html +++ b/docs/reference/eml_address.html @@ -6,7 +6,7 @@ -Create an EML address element. — eml_address • arcticdatautils +Create an EML address element — eml_address • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,19 +109,23 @@ -
    +
    +
    -

    Create an EML address element.

    +

    A simple way to create an EML address element.

    +
    -
    eml_address(delivery_points, city, administrative_area, postal_code)
    +
    eml_address(delivery_points, city, administrative_area, postal_code)
    -

    Arguments

    +

    Arguments

    @@ -89,15 +134,15 @@

    Ar

    - + - + - +
    city

    (character) City

    (character) City.

    administrative_area

    (character) Administrative area

    (character) Administrative area.

    postal_code

    (character) Postal code

    (character) Postal code.

    @@ -106,6 +151,8 @@

    Value

    (address) An EML address object.

    +

    Examples

    +
    NCEASadd <- eml_address("735 State St #300", "Santa Barbara", "CA", "93101")
    @@ -124,11 +173,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_associated_party.html b/docs/reference/eml_associated_party.html index 3a73689..f957bb8 100644 --- a/docs/reference/eml_associated_party.html +++ b/docs/reference/eml_associated_party.html @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,64 +109,43 @@ -
    +
    +
    -

    Create an EML associatedParty

    +

    See eml_party() for details.

    +
    -
    eml_associated_party(given_names, sur_name, organization = NULL,
    -  email = NULL, phone = NULL, address = NULL, role)
    +
    eml_associated_party(...)
    -

    Arguments

    +

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - + +
    given_names

    (character) One or more given (first) names.

    sur_name

    (character) A sur (last) name.

    organization

    (character) One or more organization names.

    email

    (character) An email address.

    phone

    (character) A phone number.

    address

    (address) An object of type 'address' (EML).

    role

    (character) A role

    ...

    Arguments passed on to eml_party().

    Value

    -

    (associatedParty) The new associatedParty

    +

    (associatedParty) The new associatedParty.

    Examples

    -
    eml_associated_party("test", "user", "test@user.com", role = "Principal Investigator")
    #> <associatedParty system="uuid"> +
    eml_associated_party("test", "user", email = "test@user.com", role = "Principal Investigator")
    #> <associatedParty system="uuid"> #> <individualName> #> <givenName>test</givenName> #> <surName>user</surName> #> </individualName> -#> <organizationName>test@user.com</organizationName> +#> <electronicMailAddress>test@user.com</electronicMailAddress> #> <role>Principal Investigator</role> #> </associatedParty>
    @@ -148,11 +168,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_contact.html b/docs/reference/eml_contact.html index 6057737..060384c 100644 --- a/docs/reference/eml_contact.html +++ b/docs/reference/eml_contact.html @@ -6,7 +6,7 @@ -Create an EML contact. — eml_contact • arcticdatautils +Create an EML contact — eml_contact • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,61 +109,43 @@ -
    +
    +
    -

    Create an EML contact.

    +

    See eml_party() for details.

    +
    -
    eml_contact(given_names, sur_name, organization = NULL, email = NULL,
    -  phone = NULL, address = NULL)
    +
    eml_contact(...)
    -

    Arguments

    +

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - + +
    given_names

    (character) One or more given (first) names.

    sur_name

    (character) A sur (last) name.

    organization

    (character) One or more organization names.

    email

    (character) An email address.

    phone

    (character) A phone number.

    address

    (address) An object of type 'address' (EML).

    ...

    Arguments passed on to eml_party().

    Value

    -

    (contact) The new contact

    +

    (contact) The new contact.

    Examples

    -
    eml_contact("test", "user", "test@user.com")
    #> <contact system="uuid"> -#> <individualName> -#> <givenName>test</givenName> -#> <surName>user</surName> -#> </individualName> -#> <organizationName>test@user.com</organizationName> -#> </contact>
    +
    # NOT RUN {
    +eml_contact("test", "user", email = "test@user.com")
    +eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766")
    +eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"),
    +            c("Data Scientist", "Programmer"))
    +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_creator.html b/docs/reference/eml_creator.html index a1003fd..fa35872 100644 --- a/docs/reference/eml_creator.html +++ b/docs/reference/eml_creator.html @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,61 +109,43 @@ -
    +
    +
    -

    Create an EML creator

    +

    See eml_party() for details.

    +
    -
    eml_creator(given_names, sur_name, organization = NULL, email = NULL,
    -  phone = NULL, address = NULL)
    +
    eml_creator(...)
    -

    Arguments

    +

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - + +
    given_names

    (character) One or more given (first) names.

    sur_name

    (character) A sur (last) name.

    organization

    (character) One or more organization names.

    email

    (character) An email address.

    phone

    (character) A phone number.

    address

    (address) An object of type 'address' (EML).

    ...

    Arguments passed on to eml_party().

    Value

    -

    (creator) The new creator

    +

    (creator) The new creator.

    Examples

    -
    eml_creator("test", "user", "test@user.com")
    #> <creator system="uuid"> -#> <individualName> -#> <givenName>test</givenName> -#> <surName>user</surName> -#> </individualName> -#> <organizationName>test@user.com</organizationName> -#> </creator>
    +
    # NOT RUN {
    +eml_creator("test", "user", email = "test@user.com")
    +eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766")
    +eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"),
    +            c("Data Scientist", "Programmer"))
    +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_geographic_coverage.html b/docs/reference/eml_geographic_coverage.html new file mode 100644 index 0000000..eeb7953 --- /dev/null +++ b/docs/reference/eml_geographic_coverage.html @@ -0,0 +1,194 @@ + + + + + + + + +Create an EML geographicCoverage section — eml_geographic_coverage • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    A simple way to create an EML geographicCoverage section.

    + +
    + +
    eml_geographic_coverage(description, north, east, south, west)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + +
    description

    (character) A textual description.

    north

    (numeric) North bounding coordinate.

    east

    (numeric) East bounding coordinate.

    south

    (numeric) South bounding coordinate.

    west

    (numeric) West bounding coordinate.

    + +

    Value

    + +

    (geographicCoverage) The new geographicCoverage section.

    + +

    Details

    + +

    For a bounding box, all coordinates should be unique. +For a single point, the North and South bounding coordinates should be the same and +the East and West bounding coordinates should be the same.

    + + +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/eml_individual_name.html b/docs/reference/eml_individual_name.html index 871b2f8..437815d 100644 --- a/docs/reference/eml_individual_name.html +++ b/docs/reference/eml_individual_name.html @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,19 +109,23 @@ -
    +
    +
    -

    Create an EML individualName section

    +

    Create an EML individualName section.

    +
    -
    eml_individual_name(given_names, sur_name)
    +
    eml_individual_name(given_names = NULL, sur_name)
    -

    Arguments

    +

    Arguments

    @@ -95,9 +140,14 @@

    Ar

    Value

    -

    (individualName) The new individualName section

    +

    (individualName) The new individualName section.

    +

    Examples

    +
    eml_individual_name("some", "user")
    #> <individualName> +#> <givenName>some</givenName> +#> <surName>user</surName> +#> </individualName>
    @@ -116,11 +168,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_metadata_provider.html b/docs/reference/eml_metadata_provider.html index c8bd5ee..9ffd574 100644 --- a/docs/reference/eml_metadata_provider.html +++ b/docs/reference/eml_metadata_provider.html @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,45 +109,28 @@ -
    +
    +
    -

    Create an EML metadataProvider

    +

    See eml_party() for details.

    +
    -
    eml_metadata_provider(given_names, sur_name, organization = NULL,
    -  email = NULL, phone = NULL, address = NULL)
    +
    eml_metadata_provider(...)
    -

    Arguments

    +

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - + +
    given_names

    (character) One or more given (first) names.

    sur_name

    (character) A sur (last) name.

    organization

    (character) One or more organization names.

    email

    (character) An email address.

    phone

    (character) A phone number.

    address

    (address) An object of type 'address' (EML).

    ...

    Arguments passed on to eml_party().

    @@ -116,12 +140,12 @@

    Value

    Examples

    -
    eml_metadata_provider("test", "user", "test@user.com")
    #> <metadataProvider system="uuid"> +
    eml_metadata_provider("test", "user", email = "test@user.com")
    #> <metadataProvider system="uuid"> #> <individualName> #> <givenName>test</givenName> #> <surName>user</surName> #> </individualName> -#> <organizationName>test@user.com</organizationName> +#> <electronicMailAddress>test@user.com</electronicMailAddress> #> </metadataProvider>
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_otherEntity_to_dataTable.html b/docs/reference/eml_otherEntity_to_dataTable.html new file mode 100644 index 0000000..019994a --- /dev/null +++ b/docs/reference/eml_otherEntity_to_dataTable.html @@ -0,0 +1,195 @@ + + + + + + + + +Convert otherEntities to dataTables — eml_otherEntity_to_dataTable • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an +otherEntity object as currently constructed - it does not add a physical or add attributes. +However, if these are already in their respective slots, they will be retained.

    + +
    + +
    eml_otherEntity_to_dataTable(eml, otherEntity, validate_eml = TRUE)
    + +

    Arguments

    + + + + + + + + + + + + + + +
    eml

    (S4) An EML S4 object.

    otherEntity

    (S4 / integer) Either an EML otherEntity object or the index +of an otherEntity within a ListOfotherEntity. Integer input is recommended.

    validate_eml

    (logical) Optional. Whether or not to validate the EML after +completion. Setting this to FALSE reduces execution time by ~50 percent.

    + + +

    Examples

    +
    # NOT RUN {
    +eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils"))
    +
    +# The following two calls are equivalent:
    +eml <- eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[1]])
    +eml <- eml_otherEntity_to_dataTable(eml, 1)
    +
    +# Integer input is recommended:
    +eml <- eml_otherEntity_to_dataTable(eml, 1)
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/eml_party.html b/docs/reference/eml_party.html new file mode 100644 index 0000000..87c3a18 --- /dev/null +++ b/docs/reference/eml_party.html @@ -0,0 +1,226 @@ + + + + + + + + +Create an EML party — eml_party • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    You will usually want to use the high-level functions such as +eml_creator() and eml_contact() but using this is fine.

    + +
    + +
    eml_party(type = "associatedParty", given_names = NULL,
    +  sur_name = NULL, organization = NULL, position = NULL,
    +  email = NULL, phone = NULL, address = NULL, userId = NULL,
    +  role = NULL)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    type

    (character) The type of party (e.g. 'contact').

    given_names

    (character) The party's given name(s).

    sur_name

    (character) The party's surname.

    organization

    (character) The party's organization name.

    position

    (character) The party's position.

    email

    (character) The party's email address(es).

    phone

    (character) The party's phone number(s).

    address

    (character) The party's address(es).

    userId

    (character) The party's ORCID, in format https://orcid.org/WWWW-XXXX-YYYY-ZZZZ.

    role

    (character) The party's role.

    + +

    Value

    + +

    (party) An instance of the party specified by the type argument.

    + +

    Details

    + +

    The userId argument assumes an ORCID so be sure to adjust for that.

    + + +

    Examples

    +
    # NOT RUN {
    +eml_party("creator", "Test", "User")
    +eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766")
    +eml_party("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"),
    +          c("Data Scientist", "Programmer"))
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/eml_personnel.html b/docs/reference/eml_personnel.html new file mode 100644 index 0000000..dd69951 --- /dev/null +++ b/docs/reference/eml_personnel.html @@ -0,0 +1,185 @@ + + + + + + + + +Create an EML personnel — eml_personnel • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    See eml_party() for details.

    + +
    + +
    eml_personnel(role = NULL, ...)
    + +

    Arguments

    + + + + + + + + + + +
    role

    (character) Personnel role, e.g. "principalInvestigator".

    ...

    Arguments passed on to eml_party().

    + +

    Value

    + +

    (personnel) The new personnel.

    + + +

    Examples

    +
    eml_personnel("test", "user", email = "test@user.com", role = "principalInvestigator")
    #> <personnel system="uuid"> +#> <individualName> +#> <givenName>test</givenName> +#> <surName>user</surName> +#> </individualName> +#> <electronicMailAddress>test@user.com</electronicMailAddress> +#> <role>principalInvestigator</role> +#> </personnel>
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/eml_project.html b/docs/reference/eml_project.html index 114e5b5..686c5ba 100644 --- a/docs/reference/eml_project.html +++ b/docs/reference/eml_project.html @@ -6,7 +6,7 @@ -Create an eml-project section. — eml_project • arcticdatautils +Create an EML project section — eml_project • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,45 +109,56 @@ -
    +
    +
    -

    Note: This is super-limited right now.

    +

    Create an EML project section.

    +
    -
    eml_project(title, awards, first, last, organizations = NULL,
    -  role = "originator")
    +
    eml_project(title, personnelList, abstract = NULL, funding = NULL,
    +  studyAreaDescription = NULL, designDescription = NULL,
    +  relatedProject = NULL)
    -

    Arguments

    +

    Arguments

    - + + + + + - - + + - - + + - - + + - - + + - - + +
    title

    (character) Title of the project.

    (character) Title of the project (Required). May have multiple titles.

    personnelList

    (list of personnel) Personnel involved with the project.

    awards

    (character) One or more awards for the project.

    abstract

    (character) Project abstract. Can pass as a character vector +for separate paragraphs.

    first

    (character) First name of the person with role `role`.

    funding

    (character) Funding sources for the project such as grant and +contract numbers. Can pass as a character vector for separate paragraphs.

    last

    (character) Last name of the person with role `role`.

    studyAreaDescription

    (studyAreaDescription)

    organizations

    (character) Optional. One or more organization strings.

    designDescription

    (designDescription)

    role

    (character) Optional. Specify an alternate role.

    relatedProject

    (project)

    @@ -114,18 +166,18 @@

    Value

    (project) The new project section.

    +

    Details

    + +

    Note - studyAreaDescription, designDescription, and relatedProject are not +fully fleshed out. Need to pass these objects in directly if you want to use +them.

    +

    Examples

    -
    eml_project("Some title", "51231", "Some", "User")
    #> <project system="uuid"> -#> <title>Some title</title> -#> <personnel> -#> <individualName> -#> <givenName>Some</givenName> -#> <surName>User</surName> -#> </individualName> -#> <role>originator</role> -#> </personnel> -#> </project>
    +
    proj <- eml_project(c("Some title", "A second title if needed"), + c(eml_personnel("Bryce", "Mecum", role = "principalInvestigator")), + c("Abstract paragraph 1", "Abstract paragraph 2"), + "Funding Agency: Award Number 12345")
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_set_reference.html b/docs/reference/eml_set_reference.html new file mode 100644 index 0000000..f34dee5 --- /dev/null +++ b/docs/reference/eml_set_reference.html @@ -0,0 +1,200 @@ + + + + + + + + +Set a reference to an EML object — eml_set_reference • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This function creates a new object with the same class as element_to_replace +using a reference to element_to_reference.

    + +
    + +
    eml_set_reference(element_to_reference, element_to_replace)
    + +

    Arguments

    + + + + + + + + + + +
    element_to_reference

    (S4) An EML object to reference.

    element_to_replace

    (S4) An EML object to replace with a reference.

    + + +

    Examples

    +
    # NOT RUN {
    +cn <- dataone::CNode('PROD')
    +adc <- dataone::getMNode(cn,'urn:node:ARCTIC')
    +eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M'))
    +
    +# Set the first contact as a reference to the first creator
    +eml@dataset@contact[[1]] <- eml_set_reference(eml@dataset@creator[[1]],
    +eml@dataset@contact[[1]])
    +
    +# This is also useful when we want to set references to a subset of 'dataTable'
    +  or 'otherEntity' objects
    +# Add a few more objects first to illustrate the use:
    +eml@dataset@dataTable[[3]] <- eml@dataset@dataTable[[1]]
    +eml@dataset@dataTable[[4]] <- eml@dataset@dataTable[[1]]
    +# Add references to the second and third elements only (not the 4th):
    +for (i in 2:3) {
    +    eml@dataset@dataTable[[i]]@attributeList <- eml_set_reference(eml@dataset@dataTable[[1]]@attributeList,
    +                                                      eml@dataset@dataTable[[i]]@attributeList)
    +}
    +# If we print the entire 'dataTable' list we see elements 2 and 3 have references while 4 does not.
    +eml@dataset@dataTable
    +# }
    +
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/eml_set_shared_attributes.html b/docs/reference/eml_set_shared_attributes.html new file mode 100644 index 0000000..9d610de --- /dev/null +++ b/docs/reference/eml_set_shared_attributes.html @@ -0,0 +1,198 @@ + + + + + + + + +Set shared attribute references — eml_set_shared_attributes • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This function sets shared attributes using the attributes of the first type +selected and creates references for all remaining objects of equivalent type.

    + +
    + +
    eml_set_shared_attributes(eml, attributeList = NULL,
    +  type = "dataTable")
    + +

    Arguments

    + + + + + + + + + + + + + + +
    eml

    (eml) An EML object.

    attributeList

    (attributeList) Optional. An EML attributeList object. If not provided +then it will default to the attributeList of the first type element.

    type

    (character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' attributeList +objects with references. Defaults to 'dataTable'.

    + +

    Value

    + +

    (eml) The modified EML document.

    + + +

    Examples

    +
    # NOT RUN {
    +cn <- dataone::CNode('PROD')
    +adc <- dataone::getMNode(cn,'urn:node:ARCTIC')
    +eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M'))
    +atts <- EML::set_attributes(EML::get_attributes(eml@dataset@dataTable[[1]]@attributeList)$attributes)
    +
    +eml <- eml_set_shared_attributes(eml, atts, type = 'dataTable')
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/eml_validate_attributes.html b/docs/reference/eml_validate_attributes.html index e836533..e6cd92e 100644 --- a/docs/reference/eml_validate_attributes.html +++ b/docs/reference/eml_validate_attributes.html @@ -18,14 +18,31 @@ + + + + + - + + + + + + + + + - +
    @@ -68,48 +113,47 @@ -
    +
    +

    The attributes passed into this function are validated one-by-one and the progress of going through each attribute is printed to the screen along -with any and all validation issues.

    +with any and all validation issues. This is done by, for each attribute in the list, +creating a minimum valid EML document and adding a new otherEntity with a new +attributeList containing the single attribute to be validated.

    +
    -
    eml_validate_attributes(attributes)
    +
    eml_validate_attributes(attributes)
    -

    Arguments

    +

    Arguments

    - +
    attributes

    (attributeList) An attributeList

    (attributeList) An attributeList.

    Value

    -

    (boolean) Named vector of TRUE/FALSE indicating which attributes -are valid

    - -

    Details

    - -

    This is done by, for each attribute in the list, creating a minimum valid -EML document and adding a new otherEntity with a new attributeList containing -the single attribute to be validated.

    +

    (logical) Named vector indicating which attributes are valid.

    Examples

    -
    ## Not run: ------------------------------------ -# atts_df <- read.csv('attributes_table.csv', stringsAsFactors = F) -# enum_domain <- read.csv('enumerated_domain.csv') # optional -# attributes <- EML::set_attributes(atts_df, factor = enum_domain) -# eml_validate_attributes(attributes) -## ---------------------------------------------
    +
    # NOT RUN {
    +atts_df <- read.csv('attributes_table.csv', stringsAsFactors = F)
    +enum_domain <- read.csv('enumerated_domain.csv') # optional
    +attributes <- EML::set_attributes(atts_df, factor = enum_domain)
    +eml_validate_attributes(attributes)
    +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/env_get.html b/docs/reference/env_get.html index 76e4c73..3a3adbe 100644 --- a/docs/reference/env_get.html +++ b/docs/reference/env_get.html @@ -6,8 +6,7 @@ -environment.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu> — env_get • arcticdatautils +Get the current environment name — env_get • arcticdatautils @@ -19,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -69,20 +109,21 @@ -
    +
    +
    -

    Functions related to loading configuriation based upon the environment -the code is being run under. -Get the current environment name.

    +

    Get the current environment name.

    +
    -
    env_get()
    +
    env_get()

    Value

    @@ -106,11 +147,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/find_newest_object.html b/docs/reference/find_newest_object.html index 0dc25d3..bd3c004 100644 --- a/docs/reference/find_newest_object.html +++ b/docs/reference/find_newest_object.html @@ -6,7 +6,7 @@ -Find the newest (by dateUploaded) object within a given set of objects. — find_newest_object • arcticdatautils +Find the newest object within the given set of objects — find_newest_object • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,32 +109,36 @@ -
    +
    +
    -

    Find the newest (by dateUploaded) object within a given set of objects.

    +

    Find the newest object, based on dateUploaded, within the given set of objects.

    +
    -
    find_newest_object(node, identifiers, rows = 1000)
    +
    find_newest_object(node, identifiers, rows = 1000)
    -

    Arguments

    +

    Arguments

    - + - - + + - - + +
    node

    (MNode | CNode) The node to query

    (MNode/CNode) The Member Node to query.

    rows

    (numeric) Optional. Specify the size of the query result set.

    identifiers

    (character) One or more identifiers.

    identiifers

    (character) One or more identifiers

    rows

    (numeric) Optional. Specify the size of the query result set.

    @@ -104,10 +149,10 @@

    Value

    Examples

    -
    ## Not run: ------------------------------------ -# mn <- MNode(...) -# find_newest_object(mn, c("PIDX", "PIDY", "PIDZ")) -## ---------------------------------------------
    +
    # NOT RUN {
    +mn <- MNode(...)
    +find_newest_object(mn, c("PIDX", "PIDY", "PIDZ"))
    +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/format_eml.html b/docs/reference/format_eml.html index 092895a..15d9c74 100644 --- a/docs/reference/format_eml.html +++ b/docs/reference/format_eml.html @@ -6,7 +6,7 @@ -Helper function to generate the EML 2.1.1 format ID. — format_eml • arcticdatautils +Generate the EML 2.1.1 format ID — format_eml • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,17 +109,21 @@ -
    +
    +
    -

    Helper function to generate the EML 2.1.1 format ID.

    +

    Returns the EML 2.1.1 format ID.

    +
    -
    format_eml()
    +
    format_eml()

    Value

    @@ -86,15 +131,11 @@

    Value

    Examples

    -
    format_eml
    #> function() { -#> "eml://ecoinformatics.org/eml-2.1.1" -#> } -#> <environment: namespace:arcticdatautils>
    -## Not run: ------------------------------------ -# # Upload a local EML 2.1.1 file: -# env <- env_load() -# publish_object(env$mn, "path_to_some_EML_file", format_eml()) -## ---------------------------------------------
    +
    format_eml()
    #> [1] "eml://ecoinformatics.org/eml-2.1.1"
    # NOT RUN { +# Upload a local EML 2.1.1 file: +env <- env_load() +publish_object(env$mn, "path_to_some_EML_file", format_eml()) +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/format_iso.html b/docs/reference/format_iso.html index a1763b5..f5d68c8 100644 --- a/docs/reference/format_iso.html +++ b/docs/reference/format_iso.html @@ -6,7 +6,7 @@ -dataone_formats.R — format_iso • arcticdatautils +Generate the ISO 19139 format ID — format_iso • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,22 +109,21 @@ -
    +
    +
    -

    A set of thin functions which return the DataONE format ID string. These are -to aid in filling in function arguments and can't remember or don't want to -type in the full format ID. By putting these format ID strings into -functions, a user's autocompletion routine in their editor can help them -fill in the format ID they want. -Helper function to generate the ISO 19139 format ID.w

    +

    Returns the ISO 19139 format ID.

    +
    -
    format_iso()
    +
    format_iso()

    Value

    @@ -91,11 +131,11 @@

    Value

    Examples

    -
    format_iso()
    #> [1] "http://www.isotc211.org/2005/gmd"
    ## Not run: ------------------------------------ -# # Upload a local ISO19139 XML file: -# env <- env_load() -# publish_object(env$mn, "path_to_some_EML_file", format_iso()) -## ---------------------------------------------
    +
    format_iso()
    #> [1] "http://www.isotc211.org/2005/gmd"
    # NOT RUN { +# Upload a local ISO19139 XML file: +env <- env_load() +publish_object(env$mn, "path_to_some_EML_file", format_iso()) +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/generate_resource_map.html b/docs/reference/generate_resource_map.html index 5af2e1f..a3ed02d 100644 --- a/docs/reference/generate_resource_map.html +++ b/docs/reference/generate_resource_map.html @@ -6,9 +6,7 @@ -Create a resource map RDF/XML file and save is to a temporary path. -This is a convenience wrapper around the constructor of the `ResourceMap` -class from `DataPackage`. — generate_resource_map • arcticdatautils +Create a resource map RDF/XML file and save is to a temporary path — generate_resource_map • arcticdatautils @@ -20,14 +18,28 @@ + + + + + - + + + + + + + + + - +
    @@ -70,62 +110,67 @@ -
    +
    +
    -

    Create a resource map RDF/XML file and save is to a temporary path. -This is a convenience wrapper around the constructor of the `ResourceMap` -class from `DataPackage`.

    +

    This is a convenience wrapper around the constructor of the ResourceMap +class from DataPackage.

    +
    -
    generate_resource_map(metadata_pid, data_pids = NULL, child_pids = NULL,
    -  other_statements = NULL,
    +    
    generate_resource_map(metadata_pid, data_pids = NULL,
    +  child_pids = NULL, other_statements = NULL,
       resolve_base = "https://cn.dataone.org/cn/v2/resolve",
       resource_map_pid = NULL)
    -

    Arguments

    +

    Arguments

    - + - + - + - + + + + +
    metadata_pid

    (character) PID of the metadata Object.

    (character) PID of the metadata object.

    data_pids

    (character) PID(s) of the data Objects.

    (character) PID(s) of the data objects.

    child_pids

    (character) Optional. PID(s) of child Resource Maps.

    (character) Optional. PID(s) of child resource maps.

    other_statements

    (data.frame) Extra statements to add to the Resource Map.

    (data.frame) Extra statements to add to the resource map.

    resolve_base

    (character) Optional. The resolve service base URL.

    resource_map_pid

    (character) The PID of a resource map.

    Value

    -

    Absolute path to the Resource Map on disk (character)

    +

    (character) Absolute path to the resource map on disk.

    Examples

    -
    ## Not run: ------------------------------------ -# generate_resource_map("X", "Y", "Z", -# other_statements = data.frame(subject="http://example.com/me", -# predicate="http://example.com/foo", -# object="http://example.com/bar")) -## ---------------------------------------------
    +
    # NOT RUN {
    +generate_resource_map("X", "Y", "Z",
    +                      other_statements = data.frame(subject="http://example.com/me",
    +                                                    predicate="http://example.com/foo",
    +                                                    object="http://example.com/bar"))
    +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/get_all_sysmeta.html b/docs/reference/get_all_sysmeta.html new file mode 100644 index 0000000..39be3c6 --- /dev/null +++ b/docs/reference/get_all_sysmeta.html @@ -0,0 +1,205 @@ + + + + + + + + +Get system metadata for all elements of a data package — get_all_sysmeta • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This function retrieves the system metadata for all elements of a data package and returns them as a list. +It is useful for inspecting system metadata for an entire data package and identifying changes where needed.

    + +
    + +
    get_all_sysmeta(mn, resource_map_pid, nmax = 1000,
    +  child_packages = FALSE)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    mn

    (MNode) The Member Node to query.

    resource_map_pid

    (character) The PID for a resource map.

    nmax

    (numeric) The maximum number of system metadata objects to return.

    child_packages

    (logical) If parent package, whether or not to include child packages.

    + +

    Value

    + +

    (list) A list of system metadata objects.

    + + +

    Examples

    +
    # NOT RUN {
    +cn_staging <- CNode("STAGING")
    +adc_test <- getMNode(cn_staging, "urn:node:mnTestARCTIC")
    +
    +rm_pid <- "resource_map_urn:uuid:..."
    +
    +all <- get_all_sysmeta(adc_test, rm_pid)
    +
    +# View in viewer to inspect
    +View(all)
    +
    +# Print specific elements to console
    +all[[1]]@rightsHolder
    +
    +# Create separate object
    +sysmeta_md <- all[[2]]
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/get_all_versions.html b/docs/reference/get_all_versions.html index 79e7286..ae5c09d 100644 --- a/docs/reference/get_all_versions.html +++ b/docs/reference/get_all_versions.html @@ -6,7 +6,7 @@ -Get the PIDs of all versions of an object. — get_all_versions • arcticdatautils +Get the PIDs of all versions of an object — get_all_versions • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,24 +109,28 @@ -
    +
    +

    Get the PIDs of all versions of an object.

    +
    -
    get_all_versions(node, pid)
    +
    get_all_versions(node, pid)
    -

    Arguments

    +

    Arguments

    - + @@ -98,6 +143,14 @@

    Value

    (character) A vector of PIDs in the chain, in order.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pid <- "urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1"
    +
    +ids <- get_all_versions(mn, pid)
    +# }
    @@ -116,11 +171,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/get_mn_base_url.html b/docs/reference/get_mn_base_url.html index 7c07e6d..2fb6ef5 100644 --- a/docs/reference/get_mn_base_url.html +++ b/docs/reference/get_mn_base_url.html @@ -6,7 +6,7 @@ -Get the base URL of the Member Node. — get_mn_base_url • arcticdatautils +Get base URL of a Member Node — get_mn_base_url • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,34 +109,51 @@ -
    +
    +
    -

    Get the base URL of the Member Node.

    +

    Get the base URL of a Member Node.

    +
    -
    get_mn_base_url(mn)
    +
    get_mn_base_url(mn)
    -

    Arguments

    +

    Arguments

    node

    (MNode|CNode) The node to query.

    (MNode) The Member Node to query.

    pid
    - +
    mn

    (character) The Member Node.

    +

    Value

    +

    (character) The URL.

    + + +

    Examples

    +
    # NOT RUN {
    +cn <- CNode('STAGING2')
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +# }
    @@ -106,11 +164,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/get_ncdf4_attributes.html b/docs/reference/get_ncdf4_attributes.html index d8b5dd6..c0dbc54 100644 --- a/docs/reference/get_ncdf4_attributes.html +++ b/docs/reference/get_ncdf4_attributes.html @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,36 +109,40 @@ -
    +
    +
    -

    Get a data.frame of attributes from a NetCDF object

    +

    Get a data.frame of attributes from a NetCDF object.

    +
    -
    get_ncdf4_attributes(nc)
    +
    get_ncdf4_attributes(nc)
    -

    Arguments

    +

    Arguments

    - +
    nc

    (ncdf4 or character) Either a ncdf4 object or a file path

    (ncdf4/character) Either a ncdf4 object or a file path.

    Value

    -

    (data.frame) A data.frame of the attributes

    +

    (data.frame) A data.frame of the attributes.

    Examples

    -
    ## Not run: ------------------------------------ -# get_ncdf4_attributes("./path/to/my.nc") -## ---------------------------------------------
    +
    # NOT RUN {
    +get_ncdf4_attributes("./path/to/my.nc")
    +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/get_package.html b/docs/reference/get_package.html index f7a7e89..7091f7c 100644 --- a/docs/reference/get_package.html +++ b/docs/reference/get_package.html @@ -6,7 +6,7 @@ -Get a structured list of PIDs for the objects in a package. — get_package • arcticdatautils +Get a structured list of PIDs for the objects in a package — get_package • arcticdatautils @@ -18,14 +18,28 @@ + + + + + - + + + + + + + + + - +
    @@ -68,29 +110,33 @@ -
    +
    +
    -

    This is a wrapper function around `get_package_direct` which takes either -a resource map PID or a metadata PID as its `pid` argument.

    +

    Get a structured list of PIDs for the objects in a package, +including the resource map, metadata, and data objects.

    +
    -
    get_package(node, pid, file_names = FALSE, rows = 1000)
    +
    get_package(node, pid, file_names = FALSE, rows = 5000)
    -

    Arguments

    +

    Arguments

    - + - + @@ -99,7 +145,7 @@

    Ar

    +useful to set if you are warned about the result set being truncated. Defaults to 5000.

    node

    (MNode|CNode) The Coordinating/Member Node to run the query on.

    (MNode/CNode) The Coordinating/Member Node to run the query on.

    pid

    (character) The the metadata PID of the package.

    (character) The the resource map PID of the package.

    file_names
    rows

    (numeric) The number of rows to return in the query. This is only -useful to set if you are warned about the result set being truncated.

    @@ -108,6 +154,15 @@

    Value

    (list) A structured list of the members of the package.

    +

    Examples

    +
    # NOT RUN {
    +#Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pid <- "resource_map_urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1"
    +
    +ids <- get_package(mn, pid)
    +# }
    @@ -126,11 +183,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/get_token.html b/docs/reference/get_token.html index bb02aed..ab6c47e 100644 --- a/docs/reference/get_token.html +++ b/docs/reference/get_token.html @@ -6,7 +6,7 @@ -Gets the currently set authentication token. — get_token • arcticdatautils +Get the currently set authentication token — get_token • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,24 +109,28 @@ -
    +
    +
    -

    Gets the currently set authentication token.

    +

    Get the currently set authentication token.

    +
    -
    get_token(node)
    +
    get_token(node)
    -

    Arguments

    +

    Arguments

    - +
    node

    (MNode|CNode) The CN or MN you want to find a token for.

    (MNode/CNode) The Member/Coordinating Node to query.

    @@ -94,6 +139,12 @@

    Value

    (character) The token.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode('STAGING2')
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +get_token(mn)
    +# }
    @@ -112,11 +165,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/guess_format_id.html b/docs/reference/guess_format_id.html index c66849b..061ba66 100644 --- a/docs/reference/guess_format_id.html +++ b/docs/reference/guess_format_id.html @@ -6,7 +6,7 @@ -Guess format from filename for a vector of filenames. — guess_format_id • arcticdatautils +Guess format from filename — guess_format_id • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,32 +109,38 @@ -
    +
    +

    Guess format from filename for a vector of filenames.

    +
    -
    guess_format_id(filenames)
    +
    guess_format_id(filenames)
    -

    Arguments

    +

    Arguments

    - +
    filenames

    (character)

    (character) A vector of filenames.

    Value

    -

    (character) DataOne format identifiers strings.

    +

    (character) DataONE format IDs.

    +

    Examples

    +
    formatid <- guess_format_id("temperature_data.csv")
    @@ -112,11 +161,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/index.html b/docs/reference/index.html index 90e4f06..1e7dd43 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -18,14 +18,24 @@ + + + + + - + + + + + + - + @@ -68,692 +106,432 @@ -
    -
    +
    +
    -
    - +
    - - - - + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -

    All functions

    -

    -
    -

    add_access_rules

    -

    sysmeta.R

    -

    add_additional_identifiers

    -

    Add a set of additional identifiers to an EML document.

    -

    add_admin_group_access

    -

    Adds access to the given System Metadata for the arctic-data-admins group

    -

    add_methods_step

    -

    Adds a step to the methods document

    -

    add_string_to_title

    -

    Adds a string to the title element in the given file.

    -

    change_eml_name

    -

    Utility function to extract a name string from an XML individualName node, -parse it into tokens,and reformat the individualName with new children nodes

    -

    clear_methods

    -

    Clear all methods from the document.

    -

    clear_replication_policy

    -

    Clear the replication policy from a System Metadata object

    -

    convert_iso_to_eml

    -

    Convert and ISO document to EML using an XSLT.

    -

    create_dummy_metadata

    -

    helpers.R

    -

    create_dummy_object

    -

    Create a test object.

    -

    create_dummy_package

    -

    Create a test package.

    -

    create_dummy_parent_package

    -

    Create a test parent package.

    -

    create_from_folder

    -

    inserting.R

    -

    create_object

    -

    Create an object from a row of the inventory.

    -

    create_resource_map

    -

    Create a resource map Object on a Member Node.

    -

    create_sysmeta

    -

    Create a sysmeta object.

    -

    determine_child_pids

    -

    Calculate a set of child PIDs for a given package.

    -

    eml_add_entities

    -

    Add new entity (otherEntity, dataTable, etc) elements to an EML document from a table.

    -

    eml_address

    -

    Create an EML address element.

    -

    eml_associated_party

    -

    Create an EML associatedParty

    -

    eml_contact

    -

    Create an EML contact.

    -

    eml_creator

    -

    Create an EML creator

    -

    eml_individual_name

    -

    Create an EML individualName section

    -

    eml_metadata_provider

    -

    Create an EML metadataProvider

    -

    eml_project

    -

    Create an eml-project section.

    -

    eml_validate_attributes

    -

    Validate an EML attributeList attribute-by-attribute

    -

    env_get

    -

    environment.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>

    -

    env_load

    -

    Load environmental variables from a YAML-formatted environment file.

    -

    extract_local_identifier

    -

    util.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>

    -

    filter_obsolete_pids

    -

    Filters PIDs that are obsolete.

    -

    filter_packaging_statements

    -

    Filter statements related to packaging

    -

    find_newest_object

    -

    Find the newest (by dateUploaded) object within a given set of objects.

    -

    find_newest_resource_map

    -

    Get the resource map(s) for the given object.

    -

    fix_bad_enum

    -

    Fix a metadata record with a bad topicCategory.

    -

    fix_bad_topic

    -

    Fix a metadata record with multiple MD_TopicCategory children elements -inside a single topicCategory element.

    -

    format_eml

    -

    Helper function to generate the EML 2.1.1 format ID.

    -

    format_iso

    -

    dataone_formats.R

    -

    generate_resource_map_pid

    -

    Generate a PID for a new resource map by appending "resource_map_" to it.

    -

    generate_resource_map

    -

    Create a resource map RDF/XML file and save is to a temporary path. -This is a convenience wrapper around the constructor of the `ResourceMap` -class from `DataPackage`.

    -

    get_all_versions

    -

    Get the PIDs of all versions of an object.

    -

    get_chain_neighbors

    -

    Get the obsoleted/obsoletedBy properties of an object as a named list.

    -

    get_current_version

    -

    Get the current package version.

    -

    get_doc_id

    -

    Get the Metacat docid for the given identifier

    -

    get_identifier

    -

    Get the identifier from a DataONE response.

    -

    get_latest_release

    -

    Use the GitHub API to find the latest release for the package.

    -

    get_mn_base_url

    -

    Get the base URL of the Member Node.

    -

    get_ncdf4_attributes

    -

    Get a data.frame of attributes from a NetCDF object

    -

    get_netcdf_format_id

    -

    Determine the DataONE format ID for the NetCDF file provided by path.

    -

    get_or_create_pid

    -

    Get the already-minted PID from the inventory or mint a new one.

    -

    get_package_direct

    -

    Get a structured list of PIDs for the objects in a package.

    -

    get_package

    -

    Get a structured list of PIDs for the objects in a package.

    -

    get_token_subject

    -

    Returns the subject of the set dataone_test_token

    -

    get_token

    -

    Gets the currently set authentication token.

    -

    guess_format_id

    -

    Guess format from filename for a vector of filenames.

    -

    insert_file

    -

    package.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>

    -

    insert_package

    -

    Create a single package Data Package from files in the Inventory.

    -

    inv_add_extra_columns

    -

    Adds a set of extra columsn to the inventory that are useful for working -with them.

    -

    inv_add_parent_package_column

    -

    Add a column for parent packages.

    -

    inv_init

    -

    inventory.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>

    -

    inv_load_checksums

    -

    Load checksums into the inventory file from a text file. This function -removes the column 'checksum_sha256' from inventory before doing a -left join.

    -

    inv_load_dois

    -

    Load DOIs from a text file into the Inventory.

    -

    inv_load_files

    -

    Load files into the inventory from a text file.

    -

    inv_load_identifiers

    -

    Load identifiers into the inventory file(s) from a text file. This function -removes the column 'identifier' from inventory before doing a -left join.

    -

    inv_load_sizes

    -

    Load file sizes into an inventory from a text file. Removes the column -'size_bytes' from inventory before doing a left join.

    -

    inv_update

    -

    Update an Inventory with a new Inventory.

    -

    is_authorized

    -

    Check if the user has authorization to perform an action on an object.

    -

    is_format_id

    -

    Test whether an object is a particular format ID.

    -

    is_obsolete

    -

    Test whether the object is obsoleted by another object.

    -

    is_resource_map

    -

    Determines whether the object with the given PID is a resource map.

    -

    is_token_expired

    -

    Determine whether the set token is expired.

    -

    is_token_set

    -

    dataone.R

    -

    log_message

    -

    Log a message to the console and to a logfile.

    -

    new_uuid

    -

    Helper function to generate a new UUID PID.

    -

    object_exists

    -

    Check if an object exists on a Member Node.

    -

    parse_resource_map

    -

    Parse a Resource Map into a data.frame

    -

    path_join

    -

    (Intelligently) join (possibly redudant) path parts together.

    -

    pid_to_other_entity

    -

    eml.R

    -

    pretty_print

    -

    Uses XMLStarlet to pretty-print/beautify an XML document.

    -

    publish_object

    -

    editing.R

    -

    publish_update

    -

    Publish an updated data package.

    -

    remove_public_access

    -

    Remove all public read access rules from a System Metadata document

    -

    remove_public_read

    -

    Remove public access on a set of objects.

    -

    replace_package_id

    -

    Replace the EML 'packageId' attribute on the root element with a -certain value.

    -

    replace_subject

    -

    Replace subjects in the accessPolicy section of a System Metadata entries.

    -

    set_abstract

    -

    Set the abstract on an EML document

    -

    set_access

    -

    Set the access policy for a set of objects.

    -

    set_file_name

    -

    Set the file name on an object

    -

    set_other_entities

    -

    Creates and sets EML otherEntity elements to an existing EML document, -replacing any existing otherEntities

    -

    set_public_read

    -

    Set public access on a set of objects.

    -

    set_rights_and_access

    -

    Set the given subject as the rightsHolder and subject with write and -changePermission access for the given PID.

    -

    set_rights_holder

    -

    access.R

    -

    show_random_dataset

    -

    Print a random dataset.

    -

    substitute_eml_party

    -

    Extract the EML responsible-party blocks in a document, and parse the -surName field to create proper givenName/surName structure

    -

    sysmeta_to_eml_physical

    -

    Create an EML physical subtree from a System Metadata instance

    -

    sysmeta_to_other_entity

    -

    Create an EML otherEntity sub-tree for the given object.

    -

    test_has_abstract

    -

    modify_metadata.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>

    -

    theme_packages

    -

    marking.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>

    -

    update_object

    -

    Update an object with a new file.

    -

    update_package

    -

    Update a package with modified metadata.

    -

    update_resource_map

    -

    Update an existing resource map Object on a Member Node.

    -

    validate_environment

    -

    Validate an environment.

    -

    validate_inventory

    -

    Validate an Inventory.

    -

    view_packages

    -

    interactive.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>

    -

    view_profile

    -

    Get an approximate list of the datasets in a user's profile

    -

    warn_current_version

    -

    Warns if the currently-installed version of the package is not the same -version as the latest release on GitHub.

    -
    + + + +

    All functions

    +

    + + + + + +

    add_methods_step()

    + +

    Add a methods step

    + + + +

    arcticdatautils

    + +

    arcticdatautils: Utilities for the Arctic Data Center

    + + + +

    clear_methods()

    + +

    Clear all methods

    + + + +

    convert_iso_to_eml()

    + +

    Convert an ISO document to EML using an XSLT

    + + + +

    create_dummy_attributes_dataframe()

    + +

    Create test attributes data.frame

    + + + +

    create_dummy_enumeratedDomain_dataframe()

    + +

    Create test enumeratedDomain data.frame

    + + + +

    create_dummy_metadata()

    + +

    Create a test metadata object

    + + + +

    create_dummy_object()

    + +

    Create a test object

    + + + +

    create_dummy_package()

    + +

    Create a test package

    + + + +

    create_dummy_package_full()

    + +

    Create dummy package with fuller metadata

    + + + +

    create_dummy_parent_package()

    + +

    Create a test parent package

    + + + +

    create_resource_map()

    + +

    Create a resource map object on a Member Node

    + + + +

    eml_abstract()

    + +

    Create an EML abstract

    + + + +

    eml_add_entities()

    + +

    Add new entity elements to an EML document from a table

    + + + +

    eml_address()

    + +

    Create an EML address element

    + + + +

    eml_associated_party()

    + +

    Create an EML associatedParty

    + + + +

    eml_contact()

    + +

    Create an EML contact

    + + + +

    eml_creator()

    + +

    Create an EML creator

    + + + +

    eml_geographic_coverage()

    + +

    Create an EML geographicCoverage section

    + + + +

    eml_individual_name()

    + +

    Create an EML individualName section

    + + + +

    eml_metadata_provider()

    + +

    Create an EML metadataProvider

    + + + +

    eml_otherEntity_to_dataTable()

    + +

    Convert otherEntities to dataTables

    + + + +

    eml_party()

    + +

    Create an EML party

    + + + +

    eml_personnel()

    + +

    Create an EML personnel

    + + + +

    eml_project()

    + +

    Create an EML project section

    + + + +

    eml_set_reference()

    + +

    Set a reference to an EML object

    + + + +

    eml_set_shared_attributes()

    + +

    Set shared attribute references

    + + + +

    eml_validate_attributes()

    + +

    Validate an EML attributeList attribute-by-attribute

    + + + +

    env_get()

    + +

    Get the current environment name

    + + + +

    find_newest_object()

    + +

    Find the newest object within the given set of objects

    + + + +

    format_eml()

    + +

    Generate the EML 2.1.1 format ID

    + + + +

    format_iso()

    + +

    Generate the ISO 19139 format ID

    + + + +

    generate_resource_map()

    + +

    Create a resource map RDF/XML file and save is to a temporary path

    + + + +

    get_all_sysmeta()

    + +

    Get system metadata for all elements of a data package

    + + + +

    get_all_versions()

    + +

    Get the PIDs of all versions of an object

    + + + +

    get_mn_base_url()

    + +

    Get base URL of a Member Node

    + + + +

    get_ncdf4_attributes()

    + +

    Get a data.frame of attributes from a NetCDF object

    + + + +

    get_package()

    + +

    Get a structured list of PIDs for the objects in a package

    + + + +

    get_token()

    + +

    Get the currently set authentication token

    + + + +

    guess_format_id()

    + +

    Guess format from filename

    + + + +

    is_authorized()

    + +

    Check if user has authorization to perform an action on an object

    + + + +

    is_obsolete()

    + +

    Test whether the object is obsoleted by another object

    + + + +

    is_public_read()

    + +

    Check whether an object has public read access

    + + + +

    is_token_expired()

    + +

    Determine whether token is expired

    + + + +

    is_token_set()

    + +

    Test whether a token is set

    + + + +

    mdq_run()

    + +

    Score a metadata document against a MetaDIG suite

    + + + +

    new_uuid()

    + +

    Generate a new UUID PID

    + + + +

    object_exists()

    + +

    Check if an object exists on a Member Node

    + + + +

    parse_resource_map()

    + +

    Parse a resource map into a data.frame

    + + + +

    pid_to_eml_entity()

    + +

    Create EML entity from a DataONE PID

    + + + +

    pid_to_eml_physical()

    + +

    Create EML physical objects for the given set of PIDs

    + + + +

    publish_object()

    + +

    Publish an object on a Member Node

    + + + +

    publish_update()

    + +

    Publish an updated data package

    + + + +

    remove_public_read()

    + +

    Remove public read access for an object

    + + + +

    set_abstract()

    + +

    Set the abstract for an EML document

    + + + +

    set_access()

    + +

    Set the access policy for an object

    + + + +

    set_file_name()

    + +

    Set the file name for an object

    + + + +

    set_public_read()

    + +

    Set public read access for an object

    + + + +

    set_rights_and_access()

    + +

    Set rights holder with access policy for an object

    + + + +

    set_rights_holder()

    + +

    Set the rights holder for an object

    + + + +

    show_indexing_status()

    + +

    Show the indexing status of a set of PIDs

    + + + +

    sysmeta_to_eml_physical()

    + +

    Create an EML physical object from system metadata

    + + + +

    update_object()

    + +

    Update an object with a new file

    + + + +

    update_package_object()

    + +

    Update a data object and associated resource map and metadata

    + + + +

    update_resource_map()

    + +

    Update an existing resource map object on a Member Node

    + + + +

    view_profile()

    + +

    Get an approximate list of the datasets in a user's profile

    + + + +

    which_in_eml()

    + +

    Search through EMLs

    + + +
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/is_authorized.html b/docs/reference/is_authorized.html index 7c712a9..734b739 100644 --- a/docs/reference/is_authorized.html +++ b/docs/reference/is_authorized.html @@ -6,7 +6,7 @@ -Check if the user has authorization to perform an action on an object. — is_authorized • arcticdatautils +Check if user has authorization to perform an action on an object — is_authorized • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,24 +109,28 @@ -
    +
    +

    Check if the user has authorization to perform an action on an object.

    +
    -
    is_authorized(node, ids, action)
    +
    is_authorized(node, ids, action)
    -

    Arguments

    +

    Arguments

    - + @@ -97,13 +142,29 @@

    Ar

    node

    (MNode|CNode) The Node to query.

    (MNode/CNode) The Member/Coordinating Node to query.

    ids
    +

    Value

    +

    (logical)

    + + +

    Examples

    +
    # NOT RUN {
    +cn <- CNode('STAGING2')
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +is_authorized(mn, pids, "write")
    +# }
    @@ -114,11 +175,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/is_obsolete.html b/docs/reference/is_obsolete.html index ad326ca..7b07aa0 100644 --- a/docs/reference/is_obsolete.html +++ b/docs/reference/is_obsolete.html @@ -6,7 +6,7 @@ -Test whether the object is obsoleted by another object. — is_obsolete • arcticdatautils +Test whether the object is obsoleted by another object — is_obsolete • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +
    -

    Test whether the object is obsoleted by another object.

    +

    Test whether the object is obsoleted by another object

    +
    -
    is_obsolete(node, pids)
    +
    is_obsolete(node, pids)
    -

    Arguments

    +

    Arguments

    @@ -98,6 +143,15 @@

    Value

    (logical) Whether or not the object is obsoleted by another object.

    +

    Examples

    +
    # NOT RUN {
    +# Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pid <- "urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1"
    +
    +is_obsolete(mn, pid)
    +# }
    @@ -116,11 +172,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/is_public_read.html b/docs/reference/is_public_read.html new file mode 100644 index 0000000..8e3630b --- /dev/null +++ b/docs/reference/is_public_read.html @@ -0,0 +1,192 @@ + + + + + + + + +Check whether an object has public read access — is_public_read • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Check whether objects have public read access. +No token needs to be set to use this function.

    + +
    + +
    is_public_read(mn, pids, use.names = TRUE)
    + +

    Arguments

    +
    + + + + + + + + + + + + + +
    mn

    (MNode) The Member Node.

    pids

    (character) The PIDs of the objects to check for public read access.

    use.names

    (logical) If TRUE, PIDs will +be used as names for the result unless PIDs have names already, in which case +those names will be used for the result.

    + +

    Value

    + +

    (logical) Whether an object has public read access.

    + + +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +    "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +is_public_read(mn, pids)
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/is_token_expired.html b/docs/reference/is_token_expired.html index 856e1d2..c637b07 100644 --- a/docs/reference/is_token_expired.html +++ b/docs/reference/is_token_expired.html @@ -6,7 +6,7 @@ -Determine whether the set token is expired. — is_token_expired • arcticdatautils +Determine whether token is expired — is_token_expired • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,24 +109,52 @@ -
    +
    +

    Determine whether the set token is expired.

    +
    + +
    is_token_expired(node)
    + +

    Arguments

    + + + + + + +
    node

    (character) The Member Node.

    + +

    Value

    -
    is_token_expired(node)
    - +

    (logical)

    + +

    Examples

    +
    # NOT RUN {
    +cn <- CNode('STAGING2')
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +is_token_expired(mn)
    +# }
    @@ -96,11 +165,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/is_token_set.html b/docs/reference/is_token_set.html index b40f86f..dae1161 100644 --- a/docs/reference/is_token_set.html +++ b/docs/reference/is_token_set.html @@ -6,7 +6,7 @@ -dataone.R — is_token_set • arcticdatautils +Test whether a token is set — is_token_set • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,33 +109,42 @@ -
    +
    +
    -

    Helpers for the DataONE R package. -Test whether a token is set.

    +

    Test whether a token is set.

    +
    -
    is_token_set(node)
    +
    is_token_set(node)
    -

    Arguments

    +

    Arguments

    - +
    node

    (MNode|CNode) The CN or MN you want to find a token for.

    (MNode/CNode) The Member/Coordinating Node to query.

    Value

    -

    (boolean)

    +

    (logical)

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode('STAGING2')
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +is_token_set(mn)
    +# }
    @@ -113,11 +165,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/mdq_run.html b/docs/reference/mdq_run.html new file mode 100644 index 0000000..8d29372 --- /dev/null +++ b/docs/reference/mdq_run.html @@ -0,0 +1,187 @@ + + + + + + + + +Score a metadata document against a MetaDIG suite — mdq_run • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This function scores a metadata document against a MetaDIG suite. +The default suite is for the Arctic Data Center.

    + +
    + +
    mdq_run(document, suite_id = "arctic.data.center.suite.1")
    + +

    Arguments

    + + + + + + + + + + +
    document

    (eml/character) Either an EML object or path to a file on disk.

    suite_id

    (character) Specify a suite ID. Should be one of https://quality.nceas.ucsb.edu/quality/suites.

    + +

    Value

    + +

    (data.frame) A sorted data.frame of check results.

    + + +

    Examples

    +
    # NOT RUN {
    +# Check an EML document you are authoring
    +library(EML)
    +mdq_run(new("eml"))
    +
    +# Check an EML document that is saved to disk
    +mdq_run(system.file("examples", "example-eml-2.1.1.xml", package = "EML"))
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/new_uuid.html b/docs/reference/new_uuid.html index f90734b..4c11a79 100644 --- a/docs/reference/new_uuid.html +++ b/docs/reference/new_uuid.html @@ -6,7 +6,7 @@ -Helper function to generate a new UUID PID. — new_uuid • arcticdatautils +Generate a new UUID PID — new_uuid • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,30 +109,38 @@ -
    +
    +
    -

    Helper function to generate a new UUID PID.

    +

    Generate a new UUID PID.

    +
    -
    new_uuid()
    +
    new_uuid()

    Value

    (character) A new UUID PID.

    +

    Examples

    +
    id <- new_uuid()
    @@ -102,11 +151,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/object_exists.html b/docs/reference/object_exists.html index 59b5b2d..7dc52db 100644 --- a/docs/reference/object_exists.html +++ b/docs/reference/object_exists.html @@ -6,7 +6,7 @@ -Check if an object exists on a Member Node. — object_exists • arcticdatautils +Check if an object exists on a Member Node — object_exists • arcticdatautils @@ -18,14 +18,28 @@ + + + + + - + + + + + + + + + - + @@ -68,29 +110,33 @@ -
    +
    +

    This is a simple check for the HTTP status of a /meta/PID call on the -provided member node.

    +provided Member Mode.

    +
    -
    object_exists(node, pids)
    +
    object_exists(node, pids)
    -

    Arguments

    +

    Arguments

    - + - - + +
    node

    (MNode|CNode) The Node to query.

    (MNode) The Member Node to query.

    pid

    (character) PID to check the existence of.

    pids

    (character) The PID(s) to check the existence of.

    @@ -99,6 +145,16 @@

    Value

    (logical) Whether the object exists.

    +

    Examples

    +
    # NOT RUN {
    +# Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +
    +object_exists(mn, pids)
    +# }
    @@ -117,11 +175,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/parse_resource_map.html b/docs/reference/parse_resource_map.html index 7c80498..1b210d3 100644 --- a/docs/reference/parse_resource_map.html +++ b/docs/reference/parse_resource_map.html @@ -6,7 +6,7 @@ -Parse a Resource Map into a data.frame — parse_resource_map • arcticdatautils +Parse a resource map into a data.frame — parse_resource_map • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,32 +109,48 @@ -
    +
    +
    -

    Parse a Resource Map into a data.frame

    +

    Parse a resource map into a data.frame.

    +
    -
    parse_resource_map(path)
    +
    parse_resource_map(path)
    -

    Arguments

    +

    Arguments

    - +
    path

    (character) Path to the resource map (an RDF/XML file)

    (character) Path to the resource map (an RDF/XML file).

    Value

    -

    (data.frame) The statements in the Resource Map

    +

    (data.frame) The statements in the resource map.

    +

    Examples

    +
    # NOT RUN {
    +# Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +
    +rm_pid <- "resource_map_urn:uuid:6b2e5753-4a94-4e6f-971c-36420a446ecb"
    +
    +# Write resource map to file
    +writeBin(getObject(mn, rm_pid), "~/Documents/resource_map.rdf")
    +df <- parse_resource_map("~/Documents/resource_map.rdf")
    +# }
    @@ -112,11 +171,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/pid_to_eml_entity.html b/docs/reference/pid_to_eml_entity.html new file mode 100644 index 0000000..022e66b --- /dev/null +++ b/docs/reference/pid_to_eml_entity.html @@ -0,0 +1,194 @@ + + + + + + + + +Create EML entity from a DataONE PID — pid_to_eml_entity • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Create EML entity from a DataONE PID

    + +
    + +
    pid_to_eml_entity(mn, pid, entityType = "otherEntity", ...)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    mn

    (MNode) Member Node where the PID is associated with an object.

    pid

    (character) The PID of the object to create the sub-tree for.

    entityType

    (character) What kind of objects to create from the input. One of "dataTable", +"spatialRaster", "spatialVector", "storedProcedure", "view", or "otherEntity".

    ...

    (optional) Additional arguments to be passed to new(entityType, ...).

    + +

    Value

    + +

    (list) The entity object.

    + + +

    Examples

    +
    # NOT RUN {
    +# Generate EML otherEntity
    +pid_to_eml_entity(mn,
    +                  pid,
    +                  entityType = "otherEntity",
    +                  entityName = "Entity Name",
    +                  entityDescription = "Description about entity")
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/pid_to_eml_physical.html b/docs/reference/pid_to_eml_physical.html new file mode 100644 index 0000000..938d4ef --- /dev/null +++ b/docs/reference/pid_to_eml_physical.html @@ -0,0 +1,184 @@ + + + + + + + + +Create EML physical objects for the given set of PIDs — pid_to_eml_physical • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This is a wrapper around sysmeta_to_eml_physical() which handles the task of +creating the EML physical.

    + +
    + +
    pid_to_eml_physical(mn, pids)
    + +

    Arguments

    + + + + + + + + + + +
    mn

    (MNode) Member Node where the PID is associated with an object.

    pids

    (character) The PID of the object to create the sub-tree for.

    + +

    Value

    + +

    (list) A list of otherEntity object(s).

    + + +

    Examples

    +
    # NOT RUN {
    +# Generate EML physical objects for all the data in a package
    +pkg <- get_package(mn, pid)
    +pid_to_eml_physical(mn, pkg$data)
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/publish_object.html b/docs/reference/publish_object.html index b379574..281acf8 100644 --- a/docs/reference/publish_object.html +++ b/docs/reference/publish_object.html @@ -6,7 +6,7 @@ -editing.R — publish_object • arcticdatautils +Publish an object on a Member Node — publish_object • arcticdatautils @@ -18,14 +18,31 @@ + + + + + - + + + + + + + + + - + @@ -68,21 +113,28 @@ -
    +
    +
    -

    High-level functions for managing content. -Publish an object on a member node

    +

    Use sensible defaults to publish an object on a Member Node. If identifier is provided, +use it, otherwise generate a UUID. If clone_id is provided, then retrieve the +system metadata for that identifier and use it to provide rightsHolder, accessPolicy, +and replicationPolicy metadata. Note that this function only uploads the object to +the Member Node, and does not add it to a data package, which can be done separately.

    +
    -
    publish_object(mn, path, format_id = NULL, pid = NULL, sid = NULL,
    +    
    publish_object(mn, path, format_id = NULL, pid = NULL, sid = NULL,
       clone_pid = NULL, public = TRUE)
    -

    Arguments

    +

    Arguments

    @@ -91,11 +143,13 @@

    Ar

    - + - + @@ -107,27 +161,36 @@

    Ar

    - + + + + +
    path

    the path to the file to be published

    (character) The path to the file to be published.

    format_id

    (character) Optional. The format ID to set for the object. When not set, guess_format_id will be used to guess the format ID. Should be a DataONE format ID.

    (character) Optional. The format ID to set for the object. +When not set, guess_format_id() will be used to guess the format ID. +Should be a DataONE format ID.

    pid
    clone_pid

    (character) PID of objet to clone System Metadata from

    (character) PID of object to clone System Metadata from.

    public

    (logical) Whether object should be given public read access.

    -

    Details

    +

    Value

    -

    Use sensible defaults to publish an object on a member node. If identifier is provided, -use it, otherwise generate a UUID. If clone_id is provided, then retrieve the -system metadata for that identifier and use it to provide rightsHolder, accessPolicy, -and replicationPolicy metadata. Note that this function only uploads the object to -the Member Node, and does not add it to a data package, which can be done separately.

    +

    pid (character) The PID of the published object.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +my_path <- "/home/Documents/myfile.csv"
    +pid <- publish_object(mn, path = my_path, format_id = "text/csv", public = FALSE)
    +# }
    @@ -138,11 +201,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/publish_update.html b/docs/reference/publish_update.html index c4e08aa..20e07d9 100644 --- a/docs/reference/publish_update.html +++ b/docs/reference/publish_update.html @@ -6,7 +6,7 @@ -Publish an updated data package. — publish_update • arcticdatautils +Publish an updated data package — publish_update • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,23 +109,27 @@ -
    +
    +
    -

    This function can be used for a variety of tasks:

    +

    Publish an update to a data package after updating data files or metadata.

    +
    -
    publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL,
    +    
    publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL,
       child_pids = NULL, metadata_path = NULL, identifier = NULL,
    -  use_doi = FALSE, parent_resmap_pid = NULL, parent_metadata_pid = NULL,
    -  parent_data_pids = NULL, parent_child_pids = NULL, public = TRUE,
    -  check_first = TRUE)
    + use_doi = FALSE, parent_resmap_pid = NULL, + parent_metadata_pid = NULL, parent_data_pids = NULL, + parent_child_pids = NULL, public = TRUE, check_first = TRUE)
    -

    Arguments

    +

    Arguments

    @@ -109,7 +154,8 @@

    Ar

    - + @@ -121,39 +167,45 @@

    Ar

    - + - + - + - + - - - - - - +
    metadata_path

    (character) Optional. Path to a metadata file to update with. If this is not set, the existing metadata document will be used.

    (character or eml) Optional. An eml class object or a path to a metadata file to update with. +If this is not set, the existing metadata document will be used.

    identifier
    parent_resmap_pid

    (character) Optional. PID of a parent package to be updated.

    (character) Optional. PID of a parent package to be updated. +Not optional if a parent package exists.

    parent_metadata_pid

    (character) Optional. Identifier for the metadata document of the parent package.

    (character) Optional. Identifier for the metadata document of the parent package. +Not optional if a parent package exists.

    parent_data_pids

    (character) Optional. Identifier for the data objects of the parent package.

    (character) Optional. Identifier for the data objects of the parent package. +Not optional if the parent package contains data objects.

    parent_child_pids

    (character) Optional. Resource map identifier(s) of child packages in the parent package.

    (character) Optional. Resource map identifier(s) of child packages in the parent package. +resource_map_pid should not be included. Not optional if the parent package contains other child packages.

    public

    (logical) Optional. Make the update public. If FALSE, will set the metadata and resource map to private (but not the data objects). +

    (logical) Optional. Make the update public. If FALSE, will set the metadata and resource map to private (but not the data objects). This applies to the new metadata PID and its resource map and data object. access policies are not affected.

    check_first

    (logical) Optional. Whether to check the PIDs passed in as aruments exist on the MN before continuing. Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements.

    parent_data_pids

    (logical) Optional. Whether to check the PIDs passed in as arguments exist on the MN before continuing. +Checks that objects exist and are of the right format type. This speeds up the function, especially when data_pids has many elements.

    +

    Value

    + +

    (character) Named character vector of PIDs in the data package, including PIDs for the metadata, resource map, and data objects.

    +

    Details

    -
      +

      This function can be used for a variety of tasks:

      +
      • Publish an existing package with a DOI

      • Update a package with new data objects

      • Update a package with new metadata

      • @@ -161,23 +213,41 @@

        Details

        The metadata_pid and resource_map_pid provide the identifier of an EML metadata document and associated resource map, and the data_pids vector provides a list of PIDs of data objects in the package. Update the metadata file and resource map -by generating a new identifier (a DOI if use_doi is TRUE) and updating the Member +by generating a new identifier (a DOI if use_doi = TRUE) and updating the Member Node with a public version of the object. If metadata_file is not missing, it should be an edited version of the metadata to be used to update the original. If parent_resmap_pid is not missing, it indicates the PID of a parent package that -should be updated as well, using the parent_medata_pid, parent_data_pids, and +should be updated as well, using the parent_metadata_pid, parent_data_pids, and parent_child_pids as members of the updated package. In all cases, the objects are made publicly readable.

        +

        Examples

        +
        # NOT RUN {
        +cn <- CNode("STAGING2")
        +mn <- getMNode(cn,"urn:node:mnTestKNB")
        +
        +rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe"
        +meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe"
        +data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
        +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
        +
        +meta_path <- "/home/Documents/myMetadata.xml"
        +
        +publish_update(mn, meta_pid, rm_pid, data_pids, meta_path, public = TRUE)
        +# }
    @@ -188,11 +258,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/remove_public_read.html b/docs/reference/remove_public_read.html index 8024207..03f549c 100644 --- a/docs/reference/remove_public_read.html +++ b/docs/reference/remove_public_read.html @@ -6,7 +6,7 @@ -Remove public access on a set of objects. — remove_public_read • arcticdatautils +Remove public read access for an object — remove_public_read • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,38 +109,52 @@ -
    +
    +
    -

    Remove public access on a set of objects.

    +

    Remove public read access for an object.

    +
    -
    remove_public_read(mn, pids)
    +
    remove_public_read(mn, pids)
    -

    Arguments

    +

    Arguments

    - + - +
    mn

    (MNode)

    (MNode) The Member Node.

    pids

    (character) A vector of PIDs to set public access on

    (character) The PIDs of the objects to remove public read access for.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +remove_public_read(mn, pids)
    +# }
    @@ -110,11 +165,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/set_abstract.html b/docs/reference/set_abstract.html index 188cbf6..2ad556c 100644 --- a/docs/reference/set_abstract.html +++ b/docs/reference/set_abstract.html @@ -6,7 +6,7 @@ -Set the abstract on an EML document — set_abstract • arcticdatautils +Set the abstract for an EML document — set_abstract • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,29 +109,33 @@ -
    +
    +
    -

    Set the abstract on an EML document

    +

    Set the abstract for an EML document.

    +
    -
    set_abstract(doc, text)
    +
    set_abstract(doc, text)
    -

    Arguments

    +

    Arguments

    - + @@ -98,11 +143,22 @@

    Ar

    Value

    -

    (eml) The modified EML document

    +

    (eml) The modified EML document.

    Examples

    -
    set_abstract(doc, c("Test abstract..."))
    #> Error in is(doc, "eml"): object 'doc' not found
    set_abstract(doc, c("First para", "second para"))
    #> Error in is(doc, "eml"): object 'doc' not found
    +
    # Create a new EML document +library(EML) +doc <- new("eml") + +# Set an abstract with a single paragraph +set_abstract(doc, c("Test abstract..."))
    #> <eml packageId="d187f1c6-9241-420d-904d-16608c0f5402" system="uuid" xsi:schemaLocation="eml://ecoinformatics.org/eml-2.1.1 eml.xsd"> +#> <dataset> +#> <abstract>hi</abstract> +#> </dataset> +#> </eml>
    +# Or one with multiple paragraphs +set_abstract(doc, c("First para...", "second para..."))
    #> <eml packageId="eb9651d3-575e-4b61-a577-0b8c63a53f4f" system="uuid" xsi:schemaLocation="eml://ecoinformatics.org/eml-2.1.1 eml.xsd"/>
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/set_access.html b/docs/reference/set_access.html index 780ea11..0d36aac 100644 --- a/docs/reference/set_access.html +++ b/docs/reference/set_access.html @@ -6,7 +6,7 @@ -Set the access policy for a set of objects. — set_access • arcticdatautils +Set the access policy for an object — set_access • arcticdatautils @@ -18,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -68,21 +111,26 @@ -
    +
    +
    -

    For each permission, this function checks if the permission is already set -and moves on. System Metadata are only updated when a change was needed.

    +

    Set the access policy for the given subjects for the given objects on the given Member Node. +For each type of permission, this function checks if the permission is already set +and only updates the System Metadata when a change is needed.

    +
    -
    set_access(mn, pids, subjects, permissions = c("read", "write",
    +    
    set_access(mn, pids, subjects, permissions = c("read", "write",
       "changePermission"))
    -

    Arguments

    +

    Arguments

    doc

    (eml) An EML document

    (eml) An EML document.

    text

    (character) The abstract text. If text is length one, an -abstract without <para> or section elements will be created. +abstract without <para> or <section> elements will be created. If text is greater than one in length, para elementes will be used for each element.

    @@ -91,23 +139,33 @@

    Ar

    - + - + - +
    pids

    (character) The object(s) to set the permissions on.

    (character) The PIDs of the objects to set permissions for.

    subjects

    (character) The subject(s) to set permissions for.

    (character) The identifiers of the subjects to set permissions for, typically an ORCID or DN.

    permissions

    (character) Optional. Vector of permissions.

    (character) Optional. The permissions to set. Defaults to +read, write, and changePermission.

    Value

    -

    (logical) Named

    +

    (logical) Whether an update was needed.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +   "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +set_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX",
    +   permissions = c("read", "write", "changePermission"))
    +# }
    @@ -126,11 +186,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/set_file_name.html b/docs/reference/set_file_name.html index e20ff77..fbf82a8 100644 --- a/docs/reference/set_file_name.html +++ b/docs/reference/set_file_name.html @@ -6,7 +6,7 @@ -Set the file name on an object — set_file_name • arcticdatautils +Set the file name for an object — set_file_name • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +
    -

    Set the file name on an object

    +

    Set the file name for an object.

    +
    -
    set_file_name(mn, pid, name)
    +
    set_file_name(mn, pid, name)
    -

    Arguments

    +

    Arguments

    @@ -99,9 +144,17 @@

    Ar

    Value

    -

    (logical) Whether the update succeeded, FALSE means there was an error.

    +

    (logical) Whether the update succeeded.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn, "urn:node:mnTestKNB")
    +
    +pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe"
    +set_file_name(mn, pid, "myfile.csv")
    +# }
    @@ -120,11 +175,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/set_public_read.html b/docs/reference/set_public_read.html index dec18e8..e66ae45 100644 --- a/docs/reference/set_public_read.html +++ b/docs/reference/set_public_read.html @@ -6,7 +6,7 @@ -Set public access on a set of objects. — set_public_read • arcticdatautils +Set public read access for an object — set_public_read • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,38 +109,58 @@ -
    +
    +
    -

    Set public access on a set of objects.

    +

    Set public read access for an object.

    +
    -
    set_public_read(mn, pids)
    +
    set_public_read(mn, pids)
    -

    Arguments

    +

    Arguments

    - + - +
    mn

    (MNode)

    (MNode) The Member Node.

    pids

    (character) A vector of PIDs to set public access on

    (character) The PIDs of the objects to set public read access for.

    +

    Value

    +

    (logical) Whether an update was needed.

    + + +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +   "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +set_public_read(mn, pids)
    +# }
    @@ -110,11 +171,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/set_rights_and_access.html b/docs/reference/set_rights_and_access.html index 6657f5b..d026c73 100644 --- a/docs/reference/set_rights_and_access.html +++ b/docs/reference/set_rights_and_access.html @@ -6,8 +6,7 @@ -Set the given subject as the rightsHolder and subject with write and -changePermission access for the given PID. — set_rights_and_access • arcticdatautils +Set rights holder with access policy for an object — set_rights_and_access • arcticdatautils @@ -19,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -69,48 +111,61 @@ -
    +
    +
    -

    This function only updates the existing System Metadata if a change is -needed.

    +

    Set the given subject as the rights holder and with given permissions +for the given objects. This function only updates the existing +System Metadata when a change is needed.

    +
    -
    set_rights_and_access(mn, pids, subject, permissions = c("read", "write",
    +    
    set_rights_and_access(mn, pids, subject, permissions = c("read", "write",
       "changePermission"))
    -

    Arguments

    +

    Arguments

    - + - + - + -
    mn

    (MNode) The Member Node to send the query.

    (MNode) The Member Node.

    pids

    (character) The PID(s) to set the access rule for.

    (character) The PIDs of the objects to set the rights holder and access policy for.

    subject

    (character)The subject of the rule(s).

    (character) The identifier of the new rights holder, typically an ORCID or DN.

    permissions

    (character) The permissions for the rule. Defaults to +

    (character) Optional. The permissions to set. Defaults to read, write, and changePermission.

    Value

    -

    Whether an update was needed.

    +

    (logical) Whether an update was needed.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +    "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +set_rights_and_access(mn, pids, "http://orcid.org/0000-000X-XXXX-XXXX",
    +    permissions = c("read", "write", "changePermission"))
    +# }
    @@ -129,11 +186,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/set_rights_holder.html b/docs/reference/set_rights_holder.html index 8ccc98e..8963e51 100644 --- a/docs/reference/set_rights_holder.html +++ b/docs/reference/set_rights_holder.html @@ -6,7 +6,7 @@ -access.R — set_rights_holder • arcticdatautils +Set the rights holder for an object — set_rights_holder • arcticdatautils @@ -18,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -68,51 +111,64 @@ -
    +
    +
    -

    High-level utility functions for getting and setting access rules for DataONE -objects. -Set the rightsHolder field for a given PID.

    +

    Set the rights holder to the given subject for the given objects on the +given Member Node. This function checks if the rights holder is already set +and only updates the System Metadata when a change is needed.

    +
    -
    set_rights_holder(mn, pids, subject)
    +
    set_rights_holder(mn, pids, subject)
    -

    Arguments

    +

    Arguments

    - + - + - +
    mn

    (MNode) The MNode instance to be changed.

    (MNode) The Member Node.

    pids

    (character) The identifiers for the object to be changed.

    (character) The PIDs of the objects to set the rights holder for.

    subject

    (character) The identifier of the new rightsHolder, often an ORCID or DN.

    (character) The identifier of the new rights holder, typically an ORCID or DN.

    -

    Details

    +

    Value

    -

    Update the rights holder to the provided subject for the object identified in -the provided system metadata document on the given Member Node.

    +

    (logical) Whether an update was needed.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +set_rights_holder(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX")
    +# }
    @@ -123,11 +179,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/show_indexing_status.html b/docs/reference/show_indexing_status.html new file mode 100644 index 0000000..3babec5 --- /dev/null +++ b/docs/reference/show_indexing_status.html @@ -0,0 +1,184 @@ + + + + + + + + +Show the indexing status of a set of PIDs — show_indexing_status • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Show the indexing status of a set of PIDs.

    + +
    + +
    show_indexing_status(mn, pids)
    + +

    Arguments

    + + + + + + + + + + +
    mn

    (MNode) The Member Node to query.

    pids

    (character/list) One or more PIDs.

    + +

    Value

    + +

    NULL

    + + +

    Examples

    +
    # NOT RUN {
    +# Create a package then check its indexing status
    +library(dataone)
    +mn <- MNode(...)
    +pkg <- create_dummy_package(mn)
    +show_indexing_status(mn, pkg)
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/sysmeta_to_eml_physical.html b/docs/reference/sysmeta_to_eml_physical.html index aff001b..2f7b3f6 100644 --- a/docs/reference/sysmeta_to_eml_physical.html +++ b/docs/reference/sysmeta_to_eml_physical.html @@ -6,7 +6,7 @@ -Create an EML physical subtree from a System Metadata instance — sysmeta_to_eml_physical • arcticdatautils +Create an EML physical object from system metadata — sysmeta_to_eml_physical • arcticdatautils @@ -18,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -68,36 +111,57 @@ -
    +
    +
    -

    This function creates a pre-canned EML physical subtree from what's in the -System Metadata of an Object. Note that it sets an Online Distrubtion URL +

    This function creates a pre-canned EML physical object from what's in the +System Metadata of an object. Note that it sets an Online Distribution URL of the DataONE v2 resolve service for the PID.

    +
    -
    sysmeta_to_eml_physical(sysmeta)
    +
    sysmeta_to_eml_physical(sysmeta)
    -

    Arguments

    +

    Arguments

    - +
    sysmeta

    (SystemMetadata) The System Metadata of the object.

    (SystemMetadata) One or more System Metadata objects.

    +

    Value

    +

    (list) A list of physical objects for each sysmeta.

    + + +

    Examples

    +
    # NOT RUN {
    +# Generate EML physical objects for all the data in a package
    +pkg <- get_package(mn, pid)
    +sm <- lapply(pkg$data, function(pid) {
    +  getSystemMetadata(mn, pid)
    +})
    +sysmeta_to_eml_physical(sm)
    +# }
    @@ -108,11 +172,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/update_object.html b/docs/reference/update_object.html index 34b3048..20f511f 100644 --- a/docs/reference/update_object.html +++ b/docs/reference/update_object.html @@ -6,7 +6,7 @@ -Update an object with a new file. — update_object • arcticdatautils +Update an object with a new file — update_object • arcticdatautils @@ -18,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -68,21 +111,26 @@ -
    +
    +
    -

    This is a convenience wrapper around `dataone::updateObject` which copies in +

    This is a convenience wrapper around dataone::updateObject() which copies in fields from the old object's System Metadata such as the rightsHolder and accessPolicy and updates only what needs to be changed.

    +
    -
    update_object(mn, pid, path, format_id = NULL, new_pid = NULL, sid = NULL)
    +
    update_object(mn, pid, path, format_id = NULL, new_pid = NULL,
    +  sid = NULL)
    -

    Arguments

    +

    Arguments

    @@ -99,7 +147,18 @@

    Ar

    - + + + + + + + + +
    format_id

    (character) Optional. The format ID to set for the object. When not set, guess_format_id will be used to guess the format ID. Should be a DataONE format ID.

    (character) Optional. The format ID to set for the object. +When not set, guess_format_id() will be used to guess the format ID. +Should be a DataONE format ID.

    new_pid

    (character) Optional. Specify the PID for the new object. +Defaults to automatically generating a new, random UUID-style PID.

    sid

    (character) Optional. Specify a Series ID (SID) to use for the new object.

    @@ -108,6 +167,14 @@

    Value

    (character) The PID of the updated object.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe"
    +my_path <- "/home/Documents/myfile.csv"
    +new_pid <- update_object(mn, pid, my_path, format_id = "text/csv")
    +# }
    @@ -126,11 +195,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/update_package_object.html b/docs/reference/update_package_object.html new file mode 100644 index 0000000..29f3eeb --- /dev/null +++ b/docs/reference/update_package_object.html @@ -0,0 +1,232 @@ + + + + + + + + +Update a data object and associated resource map and metadata — update_package_object • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This function updates a data object and then automatically +updates the package resource map with the new data PID. If an object +already has a dataTable, otherEntity, or spatialVector +with a working physical section, the EML will be updated with the new physical. +It is a convenience wrapper around update_object() and publish_update().

    + +
    + +
    update_package_object(mn, data_pid, new_data_path, resource_map_pid,
    +  format_id = NULL, public = TRUE, use_doi = FALSE, ...)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    mn

    (MNode) The Member Node of the data package.

    data_pid

    (character) PID for data object to update.

    new_data_path

    (character) Path to new data object.

    resource_map_pid

    (character) PID for resource map to update.

    format_id

    (character) Optional. The format ID to set for the object. +When not set, guess_format_id() will be used +to guess the format ID. Should be a DataONE format ID.

    public

    (logical) Optional. Make the update public. If FALSE, +will set the metadata and resource map to private (but not the data objects). +This applies to the new metadata PID and its resource map and data object. +Access policies are not affected.

    use_doi

    (logical) Optional. If TRUE, a new DOI will be minted.

    ...

    Other arguments to pass into publish_update().

    + +

    Value

    + +

    (character) Named character vector of PIDs in the data package, including PIDs +for the metadata, resource map, and data objects.

    + +

    See also

    + + + + +

    Examples

    +
    # NOT RUN {
    +cnTest <- dataone::CNode("STAGING")
    +mnTest <- dataone::getMNode(cnTest,"urn:node:mnTestARCTIC")
    +
    +pkg <- create_dummy_package_full(mnTest, title = "My package")
    +
    +file.create("new_file.csv")
    +update_package_object(mnTest, pkg$data[1], "new_file.csv", pkg$resource_map, format_id = "text/csv")
    +file.remove("new_file.csv")
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/update_resource_map.html b/docs/reference/update_resource_map.html index 366fd1f..691e962 100644 --- a/docs/reference/update_resource_map.html +++ b/docs/reference/update_resource_map.html @@ -6,7 +6,7 @@ -Update an existing resource map Object on a Member Node. — update_resource_map • arcticdatautils +Update an existing resource map object on a Member Node — update_resource_map • arcticdatautils @@ -18,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -68,69 +111,112 @@ -
    +
    +

    This function first generates a new resource map RDF/XML document locally and -then uses the dataone::updateObject function to update an Object on the +then uses the dataone::updateObject() function to update an object on the specified MN.

    +
    -
    update_resource_map(mn, resource_map_pid, metadata_pid, data_pids = NULL,
    +    
    update_resource_map(mn, resource_map_pid, metadata_pid, data_pids = NULL,
       child_pids = NULL, other_statements = NULL, identifier = NULL,
       public = FALSE, check_first = TRUE)
    -

    Arguments

    +

    Arguments

    + + + + + + + + + + + + + + + + + + + + - + - + - + +arguments exist on the MN before continuing. This speeds up the function, +especially when data_pids has many elements.

    mn

    (MNode) The Member Node.

    resource_map_pid

    (character) The PID of the resource map to be updated.

    metadata_pid

    (character) The PID of the metadata object to go in the package.

    data_pids

    (character) The PID(s) of the data objects to go in the package.

    child_pids

    (character) The resource map PIDs of the packages to be +nested under the package.

    other_statements

    (data.frame) Extra statements to add to the Resource Map.

    (data.frame) Extra statements to add to the resource map.

    identifier

    (character) Manually specify the identifier for the new metadata object.

    public

    Whether or not to make the new resource map public read -(logical)

    (logical) Whether or not to make the new resource map public read.

    check_first

    (logical) Optional. Whether to check the PIDs passed in as -aruments exist on the MN before continuing. This speeds up the function, -especially when `data_pids` has many elements.

    +

    Value

    + +

    (character) The PID of the updated resource map.

    +

    Details

    -

    If you only want to generate resource map RDF/XML, see -generate_resource_map.

    -

    This function also can be used to be used to add a new child packages to a -parent package. For exmaple, if you have:

    +

    If you only want to generate resource map RDF/XML, see generate_resource_map().

    +

    This function also can be used to add a new child packages to a +parent package. For example, if you have:

    Parent A B

    -

    and want to add C as a sibling package to A and B, e.g.

    +

    and want to add C as a sibling package to A and B, e.g.:

    Parent A B C

    -

    you could use this function.

    -

    Note: This function currently replaces the rightsHolder on the Resource Map +

    then you could use this function.

    +

    Note: This function currently replaces the rightsHolder on the resource map temporarily to allow updating but sets it back to the rightsHolder that was in place before the update.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode('STAGING2')
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +
    +rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe"
    +meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe"
    +data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +
    +rm_new <- update_resource_map(mn, rm_pid, meta_pid, data_pids)
    +# }
    @@ -141,11 +227,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/view_profile.html b/docs/reference/view_profile.html index 9245607..d542095 100644 --- a/docs/reference/view_profile.html +++ b/docs/reference/view_profile.html @@ -18,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -68,23 +111,25 @@ -
    +
    +
    -

    This function is intended to be (poorly) simulate what a user sees when they +

    This function is intended to (poorly) simulate what a user sees when they browse to their "My Data Sets" page (their #profile URL). It uses a similar -Solr to what Metacat UI uses to generate the list. The results of this -function may be the same as what's on the #profile page but may be missing -some of the user's datasets when:

    +Solr query to what Metacat UI uses to generate the list.

    +
    -
    view_profile(mn, subject, fields = c("identifier", "title"))
    +
    view_profile(mn, subject, fields = c("identifier", "title"))
    -

    Arguments

    +

    Arguments

    @@ -94,7 +139,7 @@

    Ar

    +likely going to be an ORCID, e.g. http://orcid.org....

    @@ -104,28 +149,39 @@

    Ar

    Value

    -

    (data.frame) data.frame with the results.

    +

    (data.frame) A data.frame with the results.

    Details

    -

    - The user has any datasets in their #profile that the person running the +

    The results of this function may be the same as what's on the #profile page +but may be missing some of the user's datasets when:

      +
    • The user has any datasets in their #profile that the person running the query (you) can't read. This is rare on arcticdata.io but possible because arctic-data-admins usually has read/write/changePermission -permissions on every object. -- The user has datasets owned by an Equivalent Identity of the subject -being queried. This is rare, especially on arcticdata.io.

      +permissions on every object.

    • +
    • The user has datasets owned by an Equivalent Identity of the subject +being queried. This is rare, especially on arcticdata.io.

    • +

    Examples

    -
    ## Not run: ------------------------------------ -# options(...set...your...token....) -# mn <- env_load('production')$mn -# me <- get_token_subject() -# profile(mn, me) -# -# // Get a custom set of fields -# view_profile(mn, me, "origin") -## ---------------------------------------------
    +
    # NOT RUN {
    +options(...set...your...token....)
    +mn <- env_load('production')$mn
    +me <- get_token_subject()
    +profile(mn, me)
    +
    +// Get a custom set of fields
    +view_profile(mn, me, "origin")
    +
    +# Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +
    +package_df <- view_profile(mn, "http://orcid.org/0000-0003-4703-1974", fields = c("title"))
    +
    +# }
    +
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/which_in_eml.html b/docs/reference/which_in_eml.html new file mode 100644 index 0000000..a6b1ab4 --- /dev/null +++ b/docs/reference/which_in_eml.html @@ -0,0 +1,199 @@ + + + + + + + + +Search through EMLs — which_in_eml • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This function returns indices within an EML list that contain an instance where +test == TRUE. See examples for more information.

    + +
    + +
    which_in_eml(eml_list, element, test)
    + +

    Arguments

    +
    subject

    (character) The subject to find the datasets for. This is -likely going to be your ORCID, e.g. http://orcid.org....

    fields
    + + + + + + + + + + + + + +
    eml_list

    (S4/List) An EML list object.

    element

    (character) Element to evaluate.

    test

    (function/character) A function to evaluate (see examples). If test is a character, +will evaluate if element == test (see example 1).

    + + +

    Examples

    +
    # NOT RUN {
    +# Question: Which creators have a surName "Smith"?
    +n <- which_in_eml(eml@dataset@creator, "surName", "Smith")
    +# Answer: eml@dataset@creator[n]
    +
    +# Question: Which dataTables have an entityName that begins with "2016"
    +n <- which_in_eml(eml@dataset@dataTable, "entityName", function(x) {grepl("^2016", x)})
    +# Answer: eml@dataset@dataTable[n]
    +
    +# Question: Which attributes in dataTable[[1]] have a numberType "natural"?
    +n <- which_in_eml(eml@dataset@dataTable[[1]]@attributeList@attribute, "numberType", "natural")
    +# Answer: eml@dataset@dataTable[[1]]@attributeList@attribute[n]
    +
    +#' # Question: Which dataTables have at least one attribute with a numberType "natural"?
    +n <- which_in_eml(eml@dataset@dataTable, "numberType", function(x) {"natural" %in% x})
    +# Answer: eml@dataset@dataTable[n]
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/index.md b/index.md new file mode 100644 index 0000000..f6f90e1 --- /dev/null +++ b/index.md @@ -0,0 +1,24 @@ +# arcticdatautils + +[![Travis build status](https://travis-ci.org/NCEAS/arcticdatautils.svg?branch=master)](https://travis-ci.org/NCEAS/arcticdatautils) + +The `arcticdatautils` package contains code for doing lots of useful stuff that's too specific for the [dataone](https://github.com/DataONEorg/rdataone) package: + +- Inserting large numbers of files into a Metacat Member Node +- High-level [dataone](https://github.com/DataONEorg/rdataone) wrappers for working with Objects and Data Packages that streamline Arctic Data Center operations + +Note: The package is intended to be used by NCEAS staff and may not make much sense to others. + +## Installing + +We recommend installing from the latest [release](https://github.com/NCEAS/arcticdatautils/releases) (aka tag) instead of from `master`. Install the latest release with the [`remotes`](https://github.com/r-lib/remotes) package: + +```r +remotes::install_github("nceas/arcticdatautils@*release") +``` + +If you're feeling adventurous, you can install from the bleeding edge: + +```r +remotes::install_github("nceas/arcticdatautils") +``` diff --git a/vignettes/a-overview.Rmd b/vignettes/a-overview.Rmd new file mode 100644 index 0000000..232c585 --- /dev/null +++ b/vignettes/a-overview.Rmd @@ -0,0 +1,23 @@ +--- +title: "Overview" +author: "Bryce Mecum" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Overview} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +## Abbreviated API overview + +- `publish_update()`: + - Mint a DOI for a package + - Replace the metadata for a package + - Add/remove data in a package +- `publish_object()`: Use before `publish_update()` if you're adding new data to a package +- `update_resource_map()`: Edit the set of child packages for a package +- `create_resource_map()`: Useful for creating a new package from scratch. For both project-level metadata packages or dataset-level packages +- `set_rights_and_access()`: Use this to give a user edit rights to a package + +The package does way more than this but the above are the most common tasks. diff --git a/vignettes/basic-usage.Rmd b/vignettes/b-basic-usage.Rmd similarity index 100% rename from vignettes/basic-usage.Rmd rename to vignettes/b-basic-usage.Rmd diff --git a/vignettes/overview.Rmd b/vignettes/overview.Rmd deleted file mode 100644 index dddc18c..0000000 --- a/vignettes/overview.Rmd +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: "Overview" -author: "Bryce Mecum" -date: "`r Sys.Date()`" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Overview} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -## Abbreviated API overview: - -- `publish_update`: - - Mint a DOI for a package - - Replace the metadata for a package, from a local file - - Add/remove data in a package -- `publish_object`: Use before `publish_update` if you're adding new data to a package. -- `update_resource_map`: Edit the set of child packages for a package -- `create_resource_map`: Useful for creating a new package from scratch. For both project-level metadata packages or dataset-level packages. -- `set_rights_and_access`: Use this to give a user edit rights to a package - -The package does way more than this but the above are the most common tasks. From a879d982e09a54594c5cccb876ce584bf57a20e5 Mon Sep 17 00:00:00 2001 From: Derek Strong Date: Thu, 8 Nov 2018 11:11:20 -0800 Subject: [PATCH 179/318] Fix merge conflicts --- DESCRIPTION | 13 +++-- NAMESPACE | 1 + R/helpers.R | 107 ++++++++++++++++++++++++++++++++++ tests/testthat/test_helpers.R | 11 ++++ 4 files changed, 127 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b19f2da..f64a852 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,7 @@ Authors@R: c( person("Jesse", "Goldstein", email = "jgoldstein@nceas.ucsb.edu", role = "ctb", comment = "Maintainer"), person("Jeanette", "Clark", email = "jclark@nceas.ucsb.edu", role = "ctb", comment = "Maintainer"), person("Dominic", "Mullen", email = "dmullen17@gmail.com", role = "ctb"), - person("Emily", "O'Dean", email="eodean10@gmail.com", role = "ctb"), + person("Emily", "O'Dean", email = "eodean10@gmail.com", role = "ctb"), person("Robyn", "Thiessen-Bock", email = "robyn.thiessenbock@gmail.com", role = "ctb"), person("Derek", "Strong", email = "dstrong@nceas.ucsb.edu", role = "ctb") ) @@ -36,13 +36,16 @@ Imports: XML Suggests: dplyr, - testthat, humaniformat, knitr, + lubridate, ncdf4, + RCurl, + purrr, rmarkdown, - yaml, - xslt -Roxygen: list(markdown = TRUE) + testthat, + xslt, + yaml RoxygenNote: 6.1.0 +Roxygen: list(markdown = TRUE) VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index d183809..8e3c3b2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -44,6 +44,7 @@ export(is_obsolete) export(is_public_read) export(is_token_expired) export(is_token_set) +export(list_submissions) export(mdq_run) export(new_uuid) export(object_exists) diff --git a/R/helpers.R b/R/helpers.R index 2b56eef..58af560 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -431,3 +431,110 @@ create_dummy_package_full <- function(mn, title = "A Dummy Package") { metadata = pid_eml, data = data_pids)) } + + +#' Retrieve a name from an ORCID URL +#' +#' Retrieve first and last name from an ORCID URL by scraping the page. +#' +#' @param orcid_url (character) A valid ORCID URL address. +#' +#' @return (character) First and last name. +#' +#' @noRd +#' +#' @examples +#' \dontrun{ +#' pi_name <- get_orcid_name('https://orcid.org/0000-0002-2561-5840') +#' } +get_orcid_name <- function(orcid_url) { + req <- httr::GET(orcid_url) + if (req$status_code != 200) { + stop('Failed to read in ', orcid_url) + } + + name <- httr::content(req, "text") %>% + stringr::str_extract(".*<") %>% + stringr::str_split(" ") %>% + unlist() %>% + stringr::str_remove("<title>") + + return(paste(name[1], name[2])) +} + + +#' List recent submissions to a DataONE Member Node +#' +#' List recent submissions to a DataONE Member Node from all submitters not present +#' in the administrator whitelist: <https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org> +#' +#' @param mn (MNode) A DataONE Member Node. +#' @param from (character) The date at which the query begins in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()}. +#' @param to (character) The date at which the query ends in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()}. +#' @param formatType (character) The format of objects to query. Must be one of: RESOURCE, METADATA, DATA, or *. +#' @param whitelist (character) An xml list of admin orcid Identifiers. +#' Defaults to https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org. +#' +#' @return (data.frame) +#' +#' @export +#' +#' @author Dominic Mullen dmullen17@@gmail.com +#' +#' @examples +#' \dontrun{ +#' cn <- dataone::CNode('PROD') +#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +#' +#' # Return all submitted objects in the past month for the 'adc' node: +#' View(list_submissions(adc, Sys.Date() %m+% months(-1), Sys.Date(), '*')) +#' +#' # Return all submitted objects except for one user +#' View(list_submissions(adc, Sys.Date() %m+% months(-1), Sys.Date(), '*'), +#' whitelist = 'http://orcid.org/0000-0002-2561-5840') +#' } +list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType = '*', + whitelist = 'https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org') { + if (!requireNamespace('lubridate', 'purrr', 'RCurl')) { + stop(call. = FALSE, + 'The packages "lubridate", "purrr", and "RCurl" must be installed to run this function. ', + 'Please install them and try again.') + } + stopifnot(methods::is(mn, 'MNode')) + if (!is_token_set(mn)) { + stop('No token set') + } + if (!(lubridate::is.Date(as.Date(from, '%Y/%M/%D')))) { + stop('"from" argument must be in YYYY/MM/DD format') + } + if (!(lubridate::is.Date(as.Date(to, '%Y/%M/%D')))) { + stop('"to" argument must be in YYYY/MM/DD format') + } + if (!(formatType %in% c('RESOURCE', 'METADATA', 'DATA', '*'))) { + stop('formatType must be one of: RESOURCE, METADATA, DATA, or *') + } + + req <- httr::GET(whitelist) + if (req$status_code != 200) { + warning('Failed to read in', whitelist, '. Results will include admin submissions / edits.') + } + whitelist <- httr::content(req, "text") + + # Construct query and return results + q = sprintf('dateUploaded:["%sT00:00:00Z" TO "%sT00:00:00Z"] AND formatType:%s', from, to, formatType) + results <- dataone::query(mn, list(q = q, + fl = "identifier AND submitter AND dateUploaded AND formatType AND fileName", + rows = 10000), + as = "data.frame") + + # Filter out rows where the submitter is in the whitelist + results <- results[-which(stringr::str_detect(whitelist, results$submitter)),] + + # Return full names based on orcid Id + results$submitter_name <- purrr::map(results$submitter, get_orcid_name) %>% unlist() + + # Arrange by dateUploaded + results <- dplyr::arrange(results, dateUploaded) + + return(results) +} diff --git a/tests/testthat/test_helpers.R b/tests/testthat/test_helpers.R index 1c2472f..6f56aa2 100644 --- a/tests/testthat/test_helpers.R +++ b/tests/testthat/test_helpers.R @@ -24,3 +24,14 @@ test_that("create_dummy_package_full errors if wrong input", { expect_error(create_dummy_package_full(mn, title = 11)) expect_error(create_dummy_package_full("mn")) }) + +test_that("list_submissions returns correct output", { + cn <- dataone::CNode('PROD') + adc <- dataone::getMNode(cn, 'urn:node:ARCTIC') + if (!is_token_set(adc)) { + skip("No token set. Skipping test.") + } + + out <- list_submissions(adc, '2018-10-01', '2018-10-03') + expect_equal(out$submitter_name[1], 'Baptiste Vandecrux') +}) From e5a254db3d74c3fc5c79448ba4ce71b2ae5647da Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Fri, 9 Nov 2018 15:38:16 -0800 Subject: [PATCH 180/318] list_submissions paramter fix --- R/helpers.R | 6 +++++- man/list_submissions.Rd | 8 +++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/R/helpers.R b/R/helpers.R index a3eac40..9c9c3f8 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -602,10 +602,14 @@ get_orcid_name <- function(orcid_url) { #' cn <- dataone::CNode('PROD') #' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') #' +#' View(arcticdatautils::list_submissions(adc, '2018-10-01', '2018-10-07')) +#' #' # Return all submitted objects in the past month for the 'adc' node: +#' library(lubridate) #' View(list_submissions(adc, Sys.Date() %m+% months(-1), Sys.Date(), '*')) #' #' # Return all submitted objects except for one user +#' library(lubridate) #' View(list_submissions(adc, Sys.Date() %m+% months(-1), Sys.Date(), '*'), #' whitelist = 'http://orcid.org/0000-0002-2561-5840') #' @@ -638,7 +642,7 @@ list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType whitelist <- httr::content(req, "text") # Construct query and return results - q = sprintf('dateUploaded:["%sT00:00:00Z" TO "%sT00:00:00Z"] AND formatType:%s', from, to, formatType) + q = sprintf('dateUploaded:["%sT00:00:00Z" TO "%sT23:59:59Z"] AND formatType:%s', from, to, formatType) results <- dataone::query(mn, list(q = q, fl = "identifier AND submitter AND dateUploaded AND formatType AND fileName", rows = 10000), diff --git a/man/list_submissions.Rd b/man/list_submissions.Rd index 684d4be..84145ae 100644 --- a/man/list_submissions.Rd +++ b/man/list_submissions.Rd @@ -17,9 +17,7 @@ list_submissions(mn, from = Sys.Date(), to = Sys.Date(), \item{formatType}{(character) the format of objects to query. Must be one of: RESOURCE, METADATA, DATA, or *.} -\item{whitelist}{(character) A list of admin orcid Identifiers. Can be a URL or a -character vector of length 1. -Defaults to https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org} +\item{whitelist}{(character) An xml list of admin orcid Identifiers. Defaults to https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org} } \description{ List recent submissions to a DataOne Member Node from all submitters not present @@ -30,10 +28,14 @@ in the administrator whitelist: https://cn.dataone.org/cn/v2/accounts/CN=arctic- cn <- dataone::CNode('PROD') adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +View(arcticdatautils::list_submissions(adc, '2018-10-01', '2018-10-07')) + # Return all submitted objects in the past month for the 'adc' node: +library(lubridate) View(list_submissions(adc, Sys.Date() \%m+\% months(-1), Sys.Date(), '*')) # Return all submitted objects except for one user +library(lubridate) View(list_submissions(adc, Sys.Date() \%m+\% months(-1), Sys.Date(), '*'), whitelist = 'http://orcid.org/0000-0002-2561-5840') From 3cae9e1fdde14c6b0bbf92d2baa6663def101ebf Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Thu, 15 Nov 2018 16:54:25 -0800 Subject: [PATCH 181/318] set_public_read_all_versions --- R/util.R | 35 +++++++++++++++++++++++++++++++++++ tests/testthat/test_util.R | 17 +++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/R/util.R b/R/util.R index 7228657..5b6e736 100644 --- a/R/util.R +++ b/R/util.R @@ -1057,3 +1057,38 @@ show_indexing_status <- function(mn, pids) { close(pb) } + +#' Set public READ access on all versions of PIDs in data package. +#' +#' Set public READ access on all versions of PIDs in data package. +#' +#' @param mn (MNode) The Member Node to query. +#' @param resource_map_pid (character) The resource map identifier (PID). +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' cn_staging <- CNode('STAGING') +#' adc_test <- getMNode(cn_staging,'urn:node:mnTestARCTIC') +#' # Create a dummy package then create another version with 'publish_update()' +#' pkg <- create_dummy_package(adc_test) +#' remove_public_read(mn, unlist(pkg)) +#' pkg_v2 <- publish_update(adc_test, pkg$metadata, pkg$resource_map, pkg$data, public = FALSE) +#' # Set public read on all versions +#' set_public_read_all_versions(adc_test, pkg$resource_map) +#' } +set_public_read_all_versions <- function(mn, resource_map_pid) { + stopifnot(is(mn, 'MNode')) + stopifnot(is_token_set(mn)) + stopifnot(is.character(resource_map_pid)) + stopifnot(arcticdatautils:::is_resource_map(mn, resource_map_pid)) + + versions <- get_all_versions(mn, resource_map_pid) + pids <- map(versions, get_package, node = mn) %>% + unlist() %>% + unique() + set_public_read(mn, pids) + + return(invisible()) +} diff --git a/tests/testthat/test_util.R b/tests/testthat/test_util.R index 3ee1f61..676e500 100644 --- a/tests/testthat/test_util.R +++ b/tests/testthat/test_util.R @@ -15,3 +15,20 @@ test_that("paths can be joined", { # Other tests expect_equal(path_join("~/src/arcticdata./inst/asdf"), "~/src/arcticdata/inst/asdf") }) + +test_that('we can set public READ on all versions of a data package', { + cn <- CNode('STAGING') + mn <- getMNode(mn,'urn:node:mnTestARCTIC') + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + pkg <- create_dummy_package(mn) + remove_public_read(mn, unlist(pkg)) + pkg_v2 <- publish_update(mn, pkg$metadata, pkg$resource_map, pkg$data, public = FALSE) + # Set public read on all versions + set_public_read_all_versions(mn, pkg$resource_map) + pids <- c(unlist(pkg), unlist(pkg_v2)) + + expect_true(all(is_public_read(mn, pids))) +}) From b3c32378757e80830a25507213d2d6c7b4e8c725 Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Mon, 19 Nov 2018 10:03:40 -0800 Subject: [PATCH 182/318] fixed typo in set_public_read_all_versions unit test --- tests/testthat/test_util.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_util.R b/tests/testthat/test_util.R index 676e500..1fe1855 100644 --- a/tests/testthat/test_util.R +++ b/tests/testthat/test_util.R @@ -18,7 +18,7 @@ test_that("paths can be joined", { test_that('we can set public READ on all versions of a data package', { cn <- CNode('STAGING') - mn <- getMNode(mn,'urn:node:mnTestARCTIC') + mn <- getMNode(cn,'urn:node:mnTestARCTIC') if (!is_token_set(mn)) { skip("No token set. Skipping test.") } From a6a96ed31ab8a814a21ed2d540bfb17becf66341 Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Mon, 19 Nov 2018 17:02:02 -0800 Subject: [PATCH 183/318] updates to set_public_read_all_versions from derek code review --- R/util.R | 4 ++-- tests/testthat/test_util.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/util.R b/R/util.R index 5b6e736..ac1e152 100644 --- a/R/util.R +++ b/R/util.R @@ -1082,10 +1082,10 @@ set_public_read_all_versions <- function(mn, resource_map_pid) { stopifnot(is(mn, 'MNode')) stopifnot(is_token_set(mn)) stopifnot(is.character(resource_map_pid)) - stopifnot(arcticdatautils:::is_resource_map(mn, resource_map_pid)) + stopifnot(is_resource_map(mn, resource_map_pid)) versions <- get_all_versions(mn, resource_map_pid) - pids <- map(versions, get_package, node = mn) %>% + pids <- lapply(versions, get_package, node = mn) %>% unlist() %>% unique() set_public_read(mn, pids) diff --git a/tests/testthat/test_util.R b/tests/testthat/test_util.R index 1fe1855..5f0fd2a 100644 --- a/tests/testthat/test_util.R +++ b/tests/testthat/test_util.R @@ -17,8 +17,8 @@ test_that("paths can be joined", { }) test_that('we can set public READ on all versions of a data package', { - cn <- CNode('STAGING') - mn <- getMNode(cn,'urn:node:mnTestARCTIC') + cn <- dataone::CNode('STAGING') + mn <- dataone::getMNode(cn,'urn:node:mnTestARCTIC') if (!is_token_set(mn)) { skip("No token set. Skipping test.") } From 5563ed05d74dd09b769bf1908d688d4dd6f9e714 Mon Sep 17 00:00:00 2001 From: Derek Strong <dstrong@nceas.ucsb.edu> Date: Mon, 19 Nov 2018 18:02:01 -0800 Subject: [PATCH 184/318] Update docs --- DESCRIPTION | 2 +- R/editing.R | 2 +- R/eml.R | 4 +- R/helpers.R | 117 ++--------------------------- README.md | 1 + man/create_dummy_metadata.Rd | 2 +- man/create_dummy_package.Rd | 2 +- man/create_dummy_package_full.Rd | 2 +- man/create_dummy_parent_package.Rd | 2 +- man/get_orcid_name.Rd | 22 ------ man/pid_to_eml_physical.Rd | 2 +- man/sysmeta_to_eml_physical.Rd | 2 +- 12 files changed, 18 insertions(+), 142 deletions(-) delete mode 100644 man/get_orcid_name.Rd diff --git a/DESCRIPTION b/DESCRIPTION index f64a852..35beec3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -46,6 +46,6 @@ Suggests: testthat, xslt, yaml -RoxygenNote: 6.1.0 +RoxygenNote: 6.1.1 Roxygen: list(markdown = TRUE) VignetteBuilder: knitr diff --git a/R/editing.R b/R/editing.R index be1647b..c501617 100644 --- a/R/editing.R +++ b/R/editing.R @@ -293,7 +293,7 @@ publish_update <- function(mn, # Don't allow setting a dataset to private when it uses a DOI if (use_doi && !public) { - stop("You cannot use a DOI and set public=FALSE as the same time.") + stop("You cannot use a DOI and set 'public = FALSE' at the same time.") } # Do a simple sanity check on the PIDs passed in diff --git a/R/eml.R b/R/eml.R index f29a46c..889e29e 100644 --- a/R/eml.R +++ b/R/eml.R @@ -77,7 +77,7 @@ pid_to_eml_entity <- function(mn, #' @param mn (MNode) Member Node where the PID is associated with an object. #' @param pids (character) The PID of the object to create the sub-tree for. #' -#' @return (list) A list of otherEntity object(s). +#' @return (list) A list of physical objects. #' #' @export #' @@ -106,7 +106,7 @@ pid_to_eml_physical <- function(mn, pids) { #' #' @param sysmeta (SystemMetadata) One or more System Metadata objects. #' -#' @return (list) A list of physical objects for each sysmeta. +#' @return (list) A list of physical objects. #' #' @export #' diff --git a/R/helpers.R b/R/helpers.R index f52266e..28e5cf4 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -8,7 +8,7 @@ #' @param mn (MNode) The Member Node. #' @param data_pids (character) Optional. PIDs for data objects the metadata documents. #' -#' @return (character) PID of published metadata document. +#' @return (character) The PID of the published metadata document. #' #' @export #' @@ -19,7 +19,7 @@ #' mn <- getMNode(cn,"urn:node:mnTestKNB") #' pid <- create_dummy_metadata(mn) #' } -create_dummy_metadata <- function(mn, data_pids=NULL) { +create_dummy_metadata <- function(mn, data_pids = NULL) { # Make sure the node is not a production node if (mn@env == "prod") { @@ -126,7 +126,7 @@ create_dummy_object <- function(mn) { #' @param mn (MNode) The Member Node. #' @param size (numeric) The number of files in the package, including the metadata file. #' -#' @return (character) A named character vector of the data PIDs in the package. +#' @return (list) The PIDs for all elements in the data package. #' #' @export #' @@ -200,7 +200,7 @@ create_dummy_package <- function(mn, size = 2) { #' @param mn (MNode) The Member Node. #' @param children (character) Child package (resource maps) PIDs. #' -#' @return pid (character) A named character vector of PIDs, including parent package and child package PIDs. +#' @return (list) The resource map PIDs for both the parent and child packages. #' #' @export #' @@ -331,7 +331,7 @@ create_dummy_enumeratedDomain_dataframe <- function(factors) { #' @param mn (MNode) The Member Node. #' @param title (character) Optional. Title of package. Defaults to "A Dummy Package". #' -#' @return (list) A list of package PIDs, inluding for the resource map, metadata, and data objects. +#' @return (list) The PIDs for all elements in the data package. #' #' @import EML #' @import dataone @@ -463,109 +463,6 @@ get_orcid_name <- function(orcid_url) { } -#' List recent submissions to a DataONE Member Node -#' -#' List recent submissions to a DataONE Member Node from all submitters not present -#' in the administrator whitelist: <https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org> -#' -#' @param mn (MNode) A DataONE Member Node. -#' @param from (character) The date at which the query begins in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()}. -#' @param to (character) The date at which the query ends in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()}. -#' @param formatType (character) The format of objects to query. Must be one of: RESOURCE, METADATA, DATA, or *. -#' @param whitelist (character) An xml list of admin orcid Identifiers. -#' Defaults to https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org. -#' -#' @return (data.frame) -#' -#' @export -#' -#' @author Dominic Mullen dmullen17@@gmail.com -#' -#' @examples -#' \dontrun{ -#' cn <- dataone::CNode('PROD') -#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') -#' -#' # Return all submitted objects in the past month for the 'adc' node: -#' View(list_submissions(adc, Sys.Date() %m+% months(-1), Sys.Date(), '*')) -#' -#' # Return all submitted objects except for one user -#' View(list_submissions(adc, Sys.Date() %m+% months(-1), Sys.Date(), '*'), -#' whitelist = 'http://orcid.org/0000-0002-2561-5840') -#' } -list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType = '*', - whitelist = 'https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org') { - if (!requireNamespace('lubridate', 'purrr', 'RCurl')) { - stop(call. = FALSE, - 'The packages "lubridate", "purrr", and "RCurl" must be installed to run this function. ', - 'Please install them and try again.') - } - stopifnot(methods::is(mn, 'MNode')) - if (!is_token_set(mn)) { - stop('No token set') - } - if (!(lubridate::is.Date(as.Date(from, '%Y/%M/%D')))) { - stop('"from" argument must be in YYYY/MM/DD format') - } - if (!(lubridate::is.Date(as.Date(to, '%Y/%M/%D')))) { - stop('"to" argument must be in YYYY/MM/DD format') - } - if (!(formatType %in% c('RESOURCE', 'METADATA', 'DATA', '*'))) { - stop('formatType must be one of: RESOURCE, METADATA, DATA, or *') - } - - req <- httr::GET(whitelist) - if (req$status_code != 200) { - warning('Failed to read in', whitelist, '. Results will include admin submissions / edits.') - } - whitelist <- httr::content(req, "text") - - # Construct query and return results - q = sprintf('dateUploaded:["%sT00:00:00Z" TO "%sT00:00:00Z"] AND formatType:%s', from, to, formatType) - results <- dataone::query(mn, list(q = q, - fl = "identifier AND submitter AND dateUploaded AND formatType AND fileName", - rows = 10000), - as = "data.frame") - - # Filter out rows where the submitter is in the whitelist - results <- results[-which(stringr::str_detect(whitelist, results$submitter)),] - - # Return full names based on orcid Id - results$submitter_name <- purrr::map(results$submitter, get_orcid_name) %>% unlist() - - # Arrange by dateUploaded - results <- dplyr::arrange(results, dateUploaded) - - return(results) -} - -#' Retrieve a name from an orcid ID URL -#' -#' Retrieve first and last name from an orcid ID URL by scraping the page. -#' -#' @param orcid_url (character) A valid orcid ID URL address -#' -#' @return character -#' -#' @examples -#' \dontrun{ -#' pi_name <- get_orcid_name('https://orcid.org/0000-0002-2561-5840') -#' } -get_orcid_name <- function(orcid_url) { - req <- httr::GET(orcid_url) - if(req$status_code != 200) { - stop('Failed to read in ', orcid_url) - } - - name <- httr::content(req, "text") %>% - stringr::str_extract("<title>.*<") %>% - stringr::str_split(" ") %>% - unlist() %>% - stringr::str_remove("<title>") - - return(paste(name[1], name[2])) -} - #' List recent submissions to a DataOne Member Node #' #' List recent submissions to a DataOne Member Node from all submitters not present @@ -628,8 +525,8 @@ list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType # Construct query and return results q = sprintf('dateUploaded:["%sT00:00:00Z" TO "%sT23:59:59Z"] AND formatType:%s', from, to, formatType) results <- dataone::query(mn, list(q = q, - fl = "identifier AND submitter AND dateUploaded AND formatType AND fileName", - rows = 10000), + fl = "identifier AND submitter AND dateUploaded AND formatType AND fileName", + rows = 10000), as = "data.frame") # Filter out rows where the submitter is in the whitelist diff --git a/README.md b/README.md index 6a6687f..b5e15a8 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ remotes::install_github("nceas/arcticdatautils") ## Support +- Explore the pkgdown site for documentation: https://nceas.github.io/arcticdatautils/ - Please submit bugs or other comments as [Issues](https://github.com/NCEAS/arcticdatautils/issues) - Maintainers of the package are @jeanetteclark and @jagoldstein diff --git a/man/create_dummy_metadata.Rd b/man/create_dummy_metadata.Rd index 9c016ee..4cebc97 100644 --- a/man/create_dummy_metadata.Rd +++ b/man/create_dummy_metadata.Rd @@ -12,7 +12,7 @@ create_dummy_metadata(mn, data_pids = NULL) \item{data_pids}{(character) Optional. PIDs for data objects the metadata documents.} } \value{ -(character) PID of published metadata document. +(character) The PID of the published metadata document. } \description{ Create a test EML metadata object. diff --git a/man/create_dummy_package.Rd b/man/create_dummy_package.Rd index 5ede34a..032eacb 100644 --- a/man/create_dummy_package.Rd +++ b/man/create_dummy_package.Rd @@ -12,7 +12,7 @@ create_dummy_package(mn, size = 2) \item{size}{(numeric) The number of files in the package, including the metadata file.} } \value{ -(character) A named character vector of the data PIDs in the package. +(list) The PIDs for all elements in the data package. } \description{ Create a test data package. diff --git a/man/create_dummy_package_full.Rd b/man/create_dummy_package_full.Rd index 6c3d9b2..c8bf203 100644 --- a/man/create_dummy_package_full.Rd +++ b/man/create_dummy_package_full.Rd @@ -12,7 +12,7 @@ create_dummy_package_full(mn, title = "A Dummy Package") \item{title}{(character) Optional. Title of package. Defaults to "A Dummy Package".} } \value{ -(list) A list of package PIDs, inluding for the resource map, metadata, and data objects. +(list) The PIDs for all elements in the data package. } \description{ Creates a fuller package than \code{\link[=create_dummy_package]{create_dummy_package()}} diff --git a/man/create_dummy_parent_package.Rd b/man/create_dummy_parent_package.Rd index 5cfa611..7dd7ef2 100644 --- a/man/create_dummy_parent_package.Rd +++ b/man/create_dummy_parent_package.Rd @@ -12,7 +12,7 @@ create_dummy_parent_package(mn, children) \item{children}{(character) Child package (resource maps) PIDs.} } \value{ -pid (character) A named character vector of PIDs, including parent package and child package PIDs. +(list) The resource map PIDs for both the parent and child packages. } \description{ Create a test parent data package. diff --git a/man/get_orcid_name.Rd b/man/get_orcid_name.Rd deleted file mode 100644 index 3393bce..0000000 --- a/man/get_orcid_name.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/helpers.R -\name{get_orcid_name} -\alias{get_orcid_name} -\title{Retrieve a name from an orcid ID URL} -\usage{ -get_orcid_name(orcid_url) -} -\arguments{ -\item{orcid_url}{(character) A valid orcid ID URL address} -} -\value{ -character -} -\description{ -Retrieve first and last name from an orcid ID URL by scraping the page. -} -\examples{ -\dontrun{ -pi_name <- get_orcid_name('https://orcid.org/0000-0002-2561-5840') -} -} diff --git a/man/pid_to_eml_physical.Rd b/man/pid_to_eml_physical.Rd index 5ab4946..e89eeb0 100644 --- a/man/pid_to_eml_physical.Rd +++ b/man/pid_to_eml_physical.Rd @@ -12,7 +12,7 @@ pid_to_eml_physical(mn, pids) \item{pids}{(character) The PID of the object to create the sub-tree for.} } \value{ -(list) A list of otherEntity object(s). +(list) A list of physical objects. } \description{ This is a wrapper around \code{\link[=sysmeta_to_eml_physical]{sysmeta_to_eml_physical()}} which handles the task of diff --git a/man/sysmeta_to_eml_physical.Rd b/man/sysmeta_to_eml_physical.Rd index 61633da..45e89aa 100644 --- a/man/sysmeta_to_eml_physical.Rd +++ b/man/sysmeta_to_eml_physical.Rd @@ -10,7 +10,7 @@ sysmeta_to_eml_physical(sysmeta) \item{sysmeta}{(SystemMetadata) One or more System Metadata objects.} } \value{ -(list) A list of physical objects for each sysmeta. +(list) A list of physical objects. } \description{ This function creates a pre-canned EML physical object from what's in the From 24e03a28ab7e676fea6bd1c2323a58b589a6376c Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Thu, 29 Nov 2018 17:29:46 -0800 Subject: [PATCH 185/318] starting refactor process --- R/eml.R | 60 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/R/eml.R b/R/eml.R index f29a46c..a9e92ff 100644 --- a/R/eml.R +++ b/R/eml.R @@ -40,29 +40,33 @@ pid_to_eml_entity <- function(mn, systmeta <- getSystemMetadata(mn, pid) - # Create entity - entity <- new(entityType, - physical = pid_to_eml_physical(mn, pid), - ...) + if (entityType == "otherEntity"){ + entity <- eml$otherEntity(physical = pid_to_eml_physical(mn, pid)) + } + else if (entityType == "dataTable"){ + entity <- eml$dataTable(physical = pid_to_eml_physical(mn, pid)) + } # Set entity slots - if (length(slot(entity, "id")) == 0) { - entity@id <- new("xml_attribute", systmeta@identifier) + if (length(entity$id) == 0) { + # entity$id <- list(xml_attribute = systmeta@identifier) + entity$id <- systmeta@identifier } - if (length(slot(entity, "scope")) == 0) { - entity@scope <- new("xml_attribute", "document") + if (length(entity$scope) == 0) { + #entity$scope <- list(xml_attribute = "document") + entity$scope <- "document" } - if (length(slot(entity, "entityName")) == 0) { + if (length(entity$entityName) == 0) { if (!is.na(systmeta@fileName)) { - entity@entityName <- new("entityName", systmeta@fileName) + entity$entityName <- systmeta@fileName } } - if (entityType == "otherEntity" && length(slot(entity, "entityType")) == 0) { - entity@entityType <- "Other" + if (entityType == "otherEntity" && length(entity$entityType) == 0) { + entity$entityType <- "Other" } return(entity) @@ -94,7 +98,8 @@ pid_to_eml_physical <- function(mn, pids) { names(pids) <- '' # Named inputs produce a named output list - which is invalid in EML sysmeta <- lapply(pids, function(pid) { getSystemMetadata(mn, pid) }) - sysmeta_to_eml_physical(sysmeta) + + eml$physical(sysmeta_to_eml_physical(sysmeta)) } @@ -121,31 +126,26 @@ pid_to_eml_physical <- function(mn, pids) { #' } sysmeta_to_eml_physical <- function(sysmeta) { work <- function(x) { - phys <- new("physical") - phys@scope <- new("xml_attribute", "document") + phys <- eml$physical() + phys$scope <- "document" if (is.na(x@fileName)) { - phys@objectName <- new("objectName", "NA") + ob_name <- "NA" } else { - phys@objectName <- new("objectName", x@fileName) + ob_name <- x@fileName } - phys@size <- new("size", format(x@size, scientific = FALSE)) - phys@size@unit <- new("xml_attribute", "bytes") - - phys@authentication <- new("ListOfauthentication", list(new("authentication", x@checksum))) - phys@authentication[[1]]@method <- new("xml_attribute", x@checksumAlgorithm) - phys@dataFormat <- new("dataFormat") - phys@dataFormat@externallyDefinedFormat <- new("externallyDefinedFormat") - phys@dataFormat@externallyDefinedFormat@formatName <- x@formatId + phys <- set_physical(objectName = ob_name, + id = x@identifier, + size = format(x@size, scientific = FALSE), + sizeUnit = "bytes", + authentication = x@checksum, + authMethod = x@checksumAlgorithm, + url = paste0("https://cn.dataone.org/cn/v2/resolve/", x@identifier)) - phys@distribution <- new("ListOfdistribution", list(new("distribution"))) - phys@distribution[[1]]@scope <- new("xml_attribute", "document") - phys@distribution[[1]]@online <- new("online") - phys@distribution[[1]]@online@url <- new("url", paste0("https://cn.dataone.org/cn/v2/resolve/", x@identifier)) - slot(phys@distribution[[1]]@online@url, "function") <- new("xml_attribute", "download") + phys$dataFormat <- eml$dataFormat(externallyDefinedFormat = list(formatName = x@formatId)) phys } From d29429131bf8decf8bb11532a6e3e288c8770021 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Thu, 29 Nov 2018 17:48:42 -0800 Subject: [PATCH 186/318] fixed last bug, now pid_to_eml_entity produces valid eml! --- R/eml.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/R/eml.R b/R/eml.R index a9e92ff..4533d64 100644 --- a/R/eml.R +++ b/R/eml.R @@ -97,9 +97,11 @@ pid_to_eml_physical <- function(mn, pids) { all(nchar(pids)) > 0) names(pids) <- '' # Named inputs produce a named output list - which is invalid in EML - sysmeta <- lapply(pids, function(pid) { getSystemMetadata(mn, pid) }) + #it really doesn't like this in a liset + #sysmeta <- lapply(pids, function(pid) { getSystemMetadata(mn, pid) }) + sysmeta <- getSystemMetadata(mn, pids) - eml$physical(sysmeta_to_eml_physical(sysmeta)) + sysmeta_to_eml_physical(sysmeta) } @@ -126,8 +128,6 @@ pid_to_eml_physical <- function(mn, pids) { #' } sysmeta_to_eml_physical <- function(sysmeta) { work <- function(x) { - phys <- eml$physical() - phys$scope <- "document" if (is.na(x@fileName)) { ob_name <- "NA" @@ -137,14 +137,13 @@ sysmeta_to_eml_physical <- function(sysmeta) { phys <- set_physical(objectName = ob_name, - id = x@identifier, size = format(x@size, scientific = FALSE), sizeUnit = "bytes", authentication = x@checksum, authMethod = x@checksumAlgorithm, url = paste0("https://cn.dataone.org/cn/v2/resolve/", x@identifier)) - + phys$scope <- "document" phys$dataFormat <- eml$dataFormat(externallyDefinedFormat = list(formatName = x@formatId)) phys From e75640d66eef7a0400bc174961ba9dfebe25e927 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 30 Nov 2018 10:00:47 -0800 Subject: [PATCH 187/318] initial commit to branch --- R/eml.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/eml.R b/R/eml.R index f29a46c..ef71b6f 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1,6 +1,8 @@ # Helper functions for creating EML metadata + + #' Create EML entity from a DataONE PID #' #' @param mn (MNode) Member Node where the PID is associated with an object. From b93e583eca3a0ff884c719fce4d6f1f200b865b8 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 30 Nov 2018 10:14:16 -0800 Subject: [PATCH 188/318] remove ability to pass a list of pids to `physical` related functions --- R/eml.R | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/R/eml.R b/R/eml.R index 4533d64..daa0f0c 100644 --- a/R/eml.R +++ b/R/eml.R @@ -29,7 +29,8 @@ pid_to_eml_entity <- function(mn, stopifnot(is(mn, "MNode")) stopifnot(is.character(pid), - nchar(pid) > 0) + nchar(pid) > 0, + length(pid) == 1) stopifnot(entityType %in% c("dataTable", "spatialRaster", @@ -87,19 +88,17 @@ pid_to_eml_entity <- function(mn, #' #' @examples #' \dontrun{ -#' # Generate EML physical objects for all the data in a package -#' pkg <- get_package(mn, pid) -#' pid_to_eml_physical(mn, pkg$data) +#' # Generate EML physical sections for an object in a data package +#' pid_to_eml_physical(mn, pid) #' } -pid_to_eml_physical <- function(mn, pids) { +pid_to_eml_physical <- function(mn, pid) { stopifnot(is(mn, "MNode")) - stopifnot(is.character(pids), - all(nchar(pids)) > 0) - names(pids) <- '' # Named inputs produce a named output list - which is invalid in EML + stopifnot(is.character(pid), + all(nchar(pid)) > 0, + length(pid) == 1) + names(pid) <- '' # Named inputs produce a named output list - which is invalid in EML - #it really doesn't like this in a liset - #sysmeta <- lapply(pids, function(pid) { getSystemMetadata(mn, pid) }) - sysmeta <- getSystemMetadata(mn, pids) + sysmeta <- getSystemMetadata(mn, pid) sysmeta_to_eml_physical(sysmeta) } @@ -119,15 +118,12 @@ pid_to_eml_physical <- function(mn, pids) { #' #' @examples #' \dontrun{ -#' # Generate EML physical objects for all the data in a package -#' pkg <- get_package(mn, pid) -#' sm <- lapply(pkg$data, function(pid) { -#' getSystemMetadata(mn, pid) -#' }) +#' # Generate EML physical object from a system metadata object +#' sm <- getSystemMetadata(mn, pid) #' sysmeta_to_eml_physical(sm) #' } sysmeta_to_eml_physical <- function(sysmeta) { - work <- function(x) { + stopifnot(is(sysmeta, "SystemMetadata")) if (is.na(x@fileName)) { ob_name <- "NA" @@ -145,13 +141,7 @@ sysmeta_to_eml_physical <- function(sysmeta) { phys$scope <- "document" phys$dataFormat <- eml$dataFormat(externallyDefinedFormat = list(formatName = x@formatId)) - phys - } - - if (!is(sysmeta, "list")) sysmeta <- list(sysmeta) - - lapply(sysmeta, work) } From 5a11b94587efad8ff317609c4ca7c48d0387ea32 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 30 Nov 2018 10:40:19 -0800 Subject: [PATCH 189/318] enable additional arguments to be passed where appropriate --- R/eml.R | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/R/eml.R b/R/eml.R index daa0f0c..3ef550e 100644 --- a/R/eml.R +++ b/R/eml.R @@ -42,10 +42,22 @@ pid_to_eml_entity <- function(mn, systmeta <- getSystemMetadata(mn, pid) if (entityType == "otherEntity"){ - entity <- eml$otherEntity(physical = pid_to_eml_physical(mn, pid)) + entity <- eml$otherEntity(physical = pid_to_eml_physical(mn, pid), ...) } else if (entityType == "dataTable"){ - entity <- eml$dataTable(physical = pid_to_eml_physical(mn, pid)) + entity <- eml$dataTable(physical = pid_to_eml_physical(mn, pid), ...) + } + else if (entityType == "spatialRaster"){ + entity <- eml$spatialRaster(physical = pid_to_eml_physical(mn, pid), ...) + } + else if (entityType == "spatialVector"){ + entity <- eml$spatialVector(physical = pid_to_eml_physical(mn, pid), ...) + } + else if (entityType == "storedProcedure"){ + entity <- eml$storedProcedure(physical = pid_to_eml_physical(mn, pid), ...) + } + else if (entityType == "view"){ + entity <- eml$view(physical = pid_to_eml_physical(mn, pid), ...) } # Set entity slots From 48e10ac291a28bfb1139fd5b8b130dab01261ad4 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 30 Nov 2018 14:12:38 -0800 Subject: [PATCH 190/318] renaming entity type argument to entity_type since entityType is a slot in EML and it makes the code confusing otherwise --- R/eml.R | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/R/eml.R b/R/eml.R index 3ef550e..3d3b566 100644 --- a/R/eml.R +++ b/R/eml.R @@ -24,7 +24,7 @@ #' } pid_to_eml_entity <- function(mn, pid, - entityType = "otherEntity", + entity_type = "otherEntity", ...) { stopifnot(is(mn, "MNode")) @@ -32,7 +32,7 @@ pid_to_eml_entity <- function(mn, nchar(pid) > 0, length(pid) == 1) - stopifnot(entityType %in% c("dataTable", + stopifnot(entity_type %in% c("dataTable", "spatialRaster", "spatialVector", "storedProcedure", @@ -41,28 +41,28 @@ pid_to_eml_entity <- function(mn, systmeta <- getSystemMetadata(mn, pid) - if (entityType == "otherEntity"){ + if (entity_type == "otherEntity"){ entity <- eml$otherEntity(physical = pid_to_eml_physical(mn, pid), ...) } - else if (entityType == "dataTable"){ + else if (entity_type == "dataTable"){ entity <- eml$dataTable(physical = pid_to_eml_physical(mn, pid), ...) } - else if (entityType == "spatialRaster"){ + else if (entity_type == "spatialRaster"){ entity <- eml$spatialRaster(physical = pid_to_eml_physical(mn, pid), ...) } - else if (entityType == "spatialVector"){ + else if (entity_type == "spatialVector"){ entity <- eml$spatialVector(physical = pid_to_eml_physical(mn, pid), ...) } - else if (entityType == "storedProcedure"){ + else if (entity_type == "storedProcedure"){ entity <- eml$storedProcedure(physical = pid_to_eml_physical(mn, pid), ...) } - else if (entityType == "view"){ + else if (entity_type == "view"){ entity <- eml$view(physical = pid_to_eml_physical(mn, pid), ...) } # Set entity slots if (length(entity$id) == 0) { - # entity$id <- list(xml_attribute = systmeta@identifier) + # entity$id <- list(xml_attribute = systmeta@identifier) entity$id <- systmeta@identifier } @@ -78,7 +78,7 @@ pid_to_eml_entity <- function(mn, } } - if (entityType == "otherEntity" && length(entity$entityType) == 0) { + if (entity_type == "otherEntity" && length(entity$entity_type) == 0) { entity$entityType <- "Other" } @@ -151,8 +151,8 @@ sysmeta_to_eml_physical <- function(sysmeta) { authMethod = x@checksumAlgorithm, url = paste0("https://cn.dataone.org/cn/v2/resolve/", x@identifier)) - phys$scope <- "document" phys$dataFormat <- eml$dataFormat(externallyDefinedFormat = list(formatName = x@formatId)) + phys } From a1cc0f93a34b0daa56b527ff07e16bc395e77b20 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 30 Nov 2018 14:15:08 -0800 Subject: [PATCH 191/318] rewrite pid_to_entity test --- tests/testthat/test_eml.R | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 9c7db2d..8efd1c5 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -104,7 +104,8 @@ test_that("a dataTable and otherEntity can be added from a pid", { data_path <- tempfile() writeLines(LETTERS, data_path) - pid <- publish_object(mn, data_path, "text/csv") + pid1 <- publish_object(mn, data_path, "text/csv") + pid2 <- publish_object(mn, data_path, "text/csv") eml_path <- file.path(system.file(package = "arcticdatautils"), "example-eml.xml") @@ -119,28 +120,28 @@ test_that("a dataTable and otherEntity can be added from a pid", { dummy_entityDescription <- "Test_Description" # Create an otherEntity - OE <- pid_to_eml_entity(mn, pid, + OE <- pid_to_eml_entity(mn, pid1, entityName = dummy_entityName, entityDescription = dummy_entityDescription, attributeList = dummy_attributeList) - expect_s4_class(OE, "otherEntity") - expect_true(slot(OE, "entityName") == dummy_entityName) - expect_true(slot(OE, "entityDescription") == dummy_entityDescription) + + expect_true(OE$entityName == dummy_entityName) + expect_true(OE$entityDescription == dummy_entityDescription) # Create a dataTable - DT <- pid_to_eml_entity(mn, pid, + DT <- pid_to_eml_entity(mn, pid2, entityType = "dataTable", entityName = dummy_entityName, entityDescription = dummy_entityDescription, attributeList = dummy_attributeList) - expect_s4_class(DT, "dataTable") - expect_true(slot(DT, "entityName") == dummy_entityName) - expect_true(slot(DT, "entityDescription") == dummy_entityDescription) - doc@dataset@otherEntity[[1]] <- OE + expect_true(DT$entityName == dummy_entityName) + expect_true(DT$entityDescription == dummy_entityDescription) + + doc$dataset$otherEntity <- OE expect_true(EML::eml_validate(doc)) - doc@dataset@dataTable[[1]] <- DT + doc$dataset$dataTable <- DT expect_true(EML::eml_validate(doc)) unlink(data_path) From 7d27f951bf2ea22ef37ac90ab74e341908f10a60 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 30 Nov 2018 15:16:17 -0800 Subject: [PATCH 192/318] remove methods helpers --- R/eml.R | 62 --------------------------------------- tests/testthat/test_eml.R | 11 ------- 2 files changed, 73 deletions(-) diff --git a/R/eml.R b/R/eml.R index 3d3b566..2f3d99f 100644 --- a/R/eml.R +++ b/R/eml.R @@ -203,68 +203,6 @@ get_doc_id <- function(sysmeta) { } -#' Add a methods step -#' -#' Add a methods step to an EML document. -#' -#' @param doc (eml) The EML document to add the method step to. -#' @param title (character) The title of the method step. -#' @param description (character) The description of the method. -#' -#' @return (eml) The modified EML document. -#' -#' @export -#' -#' @examples -#' \dontrun{ -#' eml <- read_eml("~/Documents/metadata.xml") -#' eml <- add_methods_step(eml, "Field Sampling", "Samples were -#' collected using a niskin water sampler.") -#' } -add_methods_step <- function(doc, title, description) { - stopifnot(is(doc, "eml")) - stopifnot(is(doc@dataset, "dataset")) - stopifnot(is.character(title), - nchar(title) > 0) - stopifnot(is.character(description), - nchar(description) > 0) - - new_step <- new("methodStep", - description = new("description", - section = new("section", list(newXMLNode("title", title), - newXMLNode("para", description))))) - - doc@dataset@methods@methodStep[[length(doc@dataset@methods@methodStep) + 1]] <- new_step - - doc -} - - -#' Clear all methods -#' -#' Clear all methods from an EML document. -#' -#' @param doc (eml) The document to clear methods from. -#' -#' @return (eml) The modified EML document. -#' -#' @export -#' -#' @examples -#' \dontrun{ -#' eml <- read_eml("~/Documents/metadata.xml") -#' eml <- clear_methods(eml) -#' } -clear_methods <- function(doc) { - stopifnot(is(doc, "eml")) - - # Clear the methods out - doc@dataset@methods <- new("MethodsType") - - doc -} - - #' Create an EML party #' #' You will usually want to use the high-level functions such as diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 8efd1c5..884b172 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -2,17 +2,6 @@ context("EML") mn <- env_load()$mn -test_that("a methods step can be added to an EML document", { - library(XML) - library(EML) - - doc <- new("eml") - doc <- add_methods_step(doc, "title", "description") - - expect_equal(XML::xmlValue(doc@dataset@methods@methodStep[[1]]@description@section[[1]]@.Data[[1]]), "title") - expect_equal(XML::xmlValue(doc@dataset@methods@methodStep[[1]]@description@section[[1]]@.Data[[2]]), "description") -}) - test_that("multiple method steps can be added to an EML document", { library(XML) library(EML) From e3b36ed36e0e90e32d00d10b6a80e996de2badcc Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 30 Nov 2018 15:34:48 -0800 Subject: [PATCH 193/318] update eml_individual_name --- R/eml.R | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/R/eml.R b/R/eml.R index 2f3d99f..2947cdb 100644 --- a/R/eml.R +++ b/R/eml.R @@ -442,22 +442,16 @@ eml_individual_name <- function(given_names=NULL, sur_name) { stopifnot(is.character(sur_name) && nchar(sur_name) > 0) # Create <individualName> - indiv_name <- new("individualName") + indiv_name <- eml$individualName() if (!is.null(given_names)) { stopifnot(all(sapply(given_names, is.character))) stopifnot(all(lengths(given_names) > 0)) - givens <- lapply(given_names, function(given_name) { - x <- new("givenName") - x@.Data <- given_name - x - }) - - indiv_name@givenName <- new("ListOfgivenName", givens) + indiv_name$givenName = given_names } - indiv_name@surName <- new("surName", .Data = sur_name) + indiv_name$surName <- sur_name indiv_name } From 32d4faea9e04ec23c3fdf4bf3038fd94fd10e79a Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 30 Nov 2018 16:13:57 -0800 Subject: [PATCH 194/318] update eml_party --- R/eml.R | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/R/eml.R b/R/eml.R index 2947cdb..a10307a 100644 --- a/R/eml.R +++ b/R/eml.R @@ -247,51 +247,52 @@ eml_party <- function(type="associatedParty", "You must specify at least one of sur_name, organization, or position to make a valid creator") } - party <- new(type) + if (type == "creator"){ + party <- eml$creator() + } + else if (type == "contact"){ + party <- eml$creator() + } + else if (type == "associatedParty"){ + party <- eml$creator() + } + else if (type == "metadataProvider"){ + party <- eml$creator() + } + else if (type == "personnel"){ + party <- eml$creator() + } + # Individual Name if (!is.null(sur_name)) { - party@individualName <- c(eml_individual_name(given_names, sur_name)) + party$individualName <- c(eml_individual_name(given_names, sur_name)) } # Organization Name if (!is.null(organization)) { - party@organizationName <- new('ListOforganizationName', lapply(organization, function(x) {new('organizationName', .Data = x)})) + party$organizationName <- organization } # Position if (!is.null(position)) { - party@positionName <- new('ListOfpositionName', lapply(position, function(x) {new('positionName', .Data = x)})) + party$positionName <- position } # Email if (!is.null(email)) { - party@electronicMailAddress <- new("ListOfelectronicMailAddress", lapply(email, function(x) { new("electronicMailAddress", .Data = x )})) + party$electronicMailAddress <- email } # Address if (!is.null(address)) { - # Upgade to a ListOfaddress if needed - if (is(address, "address")) { - address <- c(address) - } - - party@address <- address + # This crams the entire address into the delivery point...not ideal + party$address <- eml$address(address) } # Phone if (!is.null(phone)) { - # Upgrade to phone is needed - if (is.character(phone)) { - phone <- new("ListOfphone", lapply(phone, function(x) as(x, "phone"))) - } - - # Upgade to a ListOfphone if needed - if (is(phone, "phone")) { - phone <- c(phone) - } - - party@phone <- phone + party$phone <- phone } # userId @@ -301,7 +302,9 @@ eml_party <- function(type="associatedParty", warning(paste0("The provided `userId` of '", userId, "' does not look like an ORCID and the `userId` argument assumes the given `userId` is an ORCID. ORCIDs should be passed in like https://orcid.org/WWWW-XXXX-YYYY-ZZZZ.")) } - party@userId <- c(new("userId", .Data = userId, directory = "https://orcid.org")) + party$userId <- eml$userId() + party$userId$userId <- userID + party$userId$directory = "https://orcid.org" } # Role @@ -314,9 +317,7 @@ eml_party <- function(type="associatedParty", # If type is personnel, role needs to be ListOfrole, otherwise just role if (type == "personnel") { - party@role <- as(lapply(role, as, Class = "role"), "ListOfrole") - } else { - party@role <- as(role, "role") + party$role <- role } } From 06e971b78524d3d995f39d7c5699366473938776 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 30 Nov 2018 16:27:25 -0800 Subject: [PATCH 195/318] replace if statements thanks to some Bryce magic --- R/eml.R | 36 ++---------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/R/eml.R b/R/eml.R index a10307a..99b3586 100644 --- a/R/eml.R +++ b/R/eml.R @@ -41,24 +41,7 @@ pid_to_eml_entity <- function(mn, systmeta <- getSystemMetadata(mn, pid) - if (entity_type == "otherEntity"){ - entity <- eml$otherEntity(physical = pid_to_eml_physical(mn, pid), ...) - } - else if (entity_type == "dataTable"){ - entity <- eml$dataTable(physical = pid_to_eml_physical(mn, pid), ...) - } - else if (entity_type == "spatialRaster"){ - entity <- eml$spatialRaster(physical = pid_to_eml_physical(mn, pid), ...) - } - else if (entity_type == "spatialVector"){ - entity <- eml$spatialVector(physical = pid_to_eml_physical(mn, pid), ...) - } - else if (entity_type == "storedProcedure"){ - entity <- eml$storedProcedure(physical = pid_to_eml_physical(mn, pid), ...) - } - else if (entity_type == "view"){ - entity <- eml$view(physical = pid_to_eml_physical(mn, pid), ...) - } + entity <- eml[[entity_type]](physical = pid_to_eml_physical(mn, pid), ...) # Set entity slots if (length(entity$id) == 0) { @@ -247,22 +230,7 @@ eml_party <- function(type="associatedParty", "You must specify at least one of sur_name, organization, or position to make a valid creator") } - if (type == "creator"){ - party <- eml$creator() - } - else if (type == "contact"){ - party <- eml$creator() - } - else if (type == "associatedParty"){ - party <- eml$creator() - } - else if (type == "metadataProvider"){ - party <- eml$creator() - } - else if (type == "personnel"){ - party <- eml$creator() - } - + party <- eml[[type]]() # Individual Name if (!is.null(sur_name)) { From ee50dd83e6f57df47fbdd7ffd18888a3743381b9 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 30 Nov 2018 16:27:39 -0800 Subject: [PATCH 196/318] remove methods tests, update personnel tests --- tests/testthat/test_eml.R | 38 +++++++------------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 884b172..2ed6c64 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -2,52 +2,28 @@ context("EML") mn <- env_load()$mn -test_that("multiple method steps can be added to an EML document", { - library(XML) - library(EML) - - doc <- new("eml") - doc <- add_methods_step(doc, "title", "description") - doc <- add_methods_step(doc, "another", "method") - - expect_length(doc@dataset@methods@methodStep, 2) -}) - -test_that("methods can be cleared from an EML document", { - library(EML) - - doc <- new("eml") - doc <- add_methods_step(doc, "title", "description") - - expect_length(doc@dataset@methods@methodStep, 1) - - doc <- clear_methods(doc) - expect_length(doc@dataset@methods@methodStep, 0) -}) - test_that("a creator can be created", { creator <- eml_creator("test", "user") - expect_is(creator, "creator") - expect_equal(creator@individualName[[1]]@givenName[[1]]@.Data, "test") - expect_equal(creator@individualName[[1]]@surName@.Data, "user") + expect_equal(creator$individualName$givenName, "test") + expect_equal(creator$individualName$surName, "user") }) test_that("a contact can be created", { contact <- eml_contact("test", "user") expect_is(contact, "contact") - expect_equal(contact@individualName[[1]]@givenName[[1]]@.Data, "test") - expect_equal(contact@individualName[[1]]@surName@.Data, "user") + expect_equal(contact$individualName$givenName, "test") + expect_equal(contact$individualName$surName, "user") }) test_that("a personnel can be created", { personnel <- eml_personnel(given_names = "test", sur_name = "user", role = "principalInvestigator") expect_is(personnel, "personnel") - expect_equal(personnel@individualName[[1]]@givenName[[1]]@.Data, "test") - expect_equal(personnel@individualName[[1]]@surName@.Data, "user") - expect_equal(personnel@role[[1]]@.Data, "principalInvestigator") + expect_equal(personnel$individualName$givenName, "test") + expect_equal(personnel$individualName$surName, "user") + expect_equal(personnel$role, "principalInvestigator") }) test_that("a project can be created", { From fc9c84393609e6c37ba3fc39025fa8647cfa3986 Mon Sep 17 00:00:00 2001 From: Jesse Goldstein <jgoldstein@nceas.ucsb.edu> Date: Wed, 5 Dec 2018 11:57:25 -0800 Subject: [PATCH 197/318] Updated set_public_read_all_versions() to catch all versions of all object types. --- NAMESPACE | 1 + R/util.R | 9 +++++---- man/set_public_read_all_versions.Rd | 28 ++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 man/set_public_read_all_versions.Rd diff --git a/NAMESPACE b/NAMESPACE index 8e3c3b2..76e50d4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -58,6 +58,7 @@ export(set_abstract) export(set_access) export(set_file_name) export(set_public_read) +export(set_public_read_all_versions) export(set_rights_and_access) export(set_rights_holder) export(show_indexing_status) diff --git a/R/util.R b/R/util.R index ac1e152..6526065 100644 --- a/R/util.R +++ b/R/util.R @@ -1082,13 +1082,14 @@ set_public_read_all_versions <- function(mn, resource_map_pid) { stopifnot(is(mn, 'MNode')) stopifnot(is_token_set(mn)) stopifnot(is.character(resource_map_pid)) - stopifnot(is_resource_map(mn, resource_map_pid)) + stopifnot(arcticdatautils:::is_resource_map(mn, resource_map_pid)) - versions <- get_all_versions(mn, resource_map_pid) - pids <- lapply(versions, get_package, node = mn) %>% + pids <- get_package(mn, resource_map_pid) %>% + unlist() + versions <- lapply(pids, get_all_versions, node = mn) %>% unlist() %>% unique() - set_public_read(mn, pids) + set_public_read(mn, versions) return(invisible()) } diff --git a/man/set_public_read_all_versions.Rd b/man/set_public_read_all_versions.Rd new file mode 100644 index 0000000..4f2e196 --- /dev/null +++ b/man/set_public_read_all_versions.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{set_public_read_all_versions} +\alias{set_public_read_all_versions} +\title{Set public READ access on all versions of PIDs in data package.} +\usage{ +set_public_read_all_versions(mn, resource_map_pid) +} +\arguments{ +\item{mn}{(MNode) The Member Node to query.} + +\item{resource_map_pid}{(character) The resource map identifier (PID).} +} +\description{ +Set public READ access on all versions of PIDs in data package. +} +\examples{ +\dontrun{ +cn_staging <- CNode('STAGING') +adc_test <- getMNode(cn_staging,'urn:node:mnTestARCTIC') +# Create a dummy package then create another version with 'publish_update()' +pkg <- create_dummy_package(adc_test) +remove_public_read(mn, unlist(pkg)) +pkg_v2 <- publish_update(adc_test, pkg$metadata, pkg$resource_map, pkg$data, public = FALSE) +# Set public read on all versions +set_public_read_all_versions(adc_test, pkg$resource_map) +} +} From 1ff16e12d848b29025186e8986ff5edc9732bdbc Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 4 Jan 2019 14:06:36 -0800 Subject: [PATCH 198/318] refactor `eml_project` and relevant tests --- R/eml.R | 50 +++++++++++++++------------------------ tests/testthat/test_eml.R | 38 ++++++++++++++--------------- 2 files changed, 37 insertions(+), 51 deletions(-) diff --git a/R/eml.R b/R/eml.R index 99b3586..539552e 100644 --- a/R/eml.R +++ b/R/eml.R @@ -212,8 +212,8 @@ get_doc_id <- function(sysmeta) { #' \dontrun{ #' eml_party("creator", "Test", "User") #' eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") -#' eml_party("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), -#' c("Data Scientist", "Programmer")) +#' eml_party("creator", list("Dominic", "'Dom'"), "Mullen", list("NCEAS", "UCSB"), +#' list("Data Scientist", "Programmer")) #'} eml_party <- function(type="associatedParty", given_names = NULL, @@ -234,7 +234,7 @@ eml_party <- function(type="associatedParty", # Individual Name if (!is.null(sur_name)) { - party$individualName <- c(eml_individual_name(given_names, sur_name)) + party$individualName <- list(eml_individual_name(given_names, sur_name)) } # Organization Name @@ -271,7 +271,7 @@ eml_party <- function(type="associatedParty", } party$userId <- eml$userId() - party$userId$userId <- userID + party$userId$userId <- userId party$userId$directory = "https://orcid.org" } @@ -434,12 +434,12 @@ eml_individual_name <- function(given_names=NULL, sur_name) { #' fully fleshed out. Need to pass these objects in directly if you want to use #' them. #' -#' @param title (character) Title of the project (Required). May have multiple titles. +#' @param title (character) Title of the project (Required). May have multiple titles constructed using `list`. #' @param personnelList (list of personnel) Personnel involved with the project. -#' @param abstract (character) Project abstract. Can pass as a character vector +#' @param abstract (character) Project abstract. Can pass as a list #' for separate paragraphs. #' @param funding (character) Funding sources for the project such as grant and -#' contract numbers. Can pass as a character vector for separate paragraphs. +#' contract numbers. Can pass as a list for separate paragraphs. #' @param studyAreaDescription (studyAreaDescription) #' @param designDescription (designDescription) #' @param relatedProject (project) @@ -449,9 +449,9 @@ eml_individual_name <- function(given_names=NULL, sur_name) { #' @export #' #' @examples -#' proj <- eml_project(c("Some title", "A second title if needed"), -#' c(eml_personnel("Bryce", "Mecum", role = "principalInvestigator")), -#' c("Abstract paragraph 1", "Abstract paragraph 2"), +#' proj <- eml_project(list("Some title", "A second title if needed"), +#' list(eml_personnel("Bryce", "Mecum", role = "principalInvestigator")), +#' list("Abstract paragraph 1", "Abstract paragraph 2"), #' "Funding Agency: Award Number 12345") eml_project <- function(title, personnelList, @@ -466,50 +466,38 @@ eml_project <- function(title, all(nchar(title)) > 0) stopifnot(length(personnelList) > 0) - # Project - project <- new("project") + project <- eml$project() # Title - titles <- lapply(title, function(x) { as(x, "title") }) - project@title <- as(titles, "ListOftitle") + project$title <- title - # Personnel - if (!all(sapply(personnelList, function(x) { is(x, "personnel") }))) { - stop(call. = FALSE, - "All personnel in the list must be of type 'personnel'") - } + project$personnel <- personnelList - project@personnel <- as(personnelList, "ListOfpersonnel") + doc$dataset$project <- project # Abstract if (!is.null(abstract)) { - abstract_paras <- lapply(abstract, function(x) { - as(list(xml2::xml_new_root("para", as.character(x))), "para") - }) - project@abstract@para <- as(abstract_paras, "ListOfpara") + project$abstract <- eml$abstract(para = abstract) } # Funding if (!is.null(funding)) { - funding_paras <- lapply(funding, function(x) { - as(list(xml2::xml_new_root("para", as.character(x))), "para") - }) - project@funding@para <- as(funding_paras, "ListOfpara") + project$funding <- eml$funding(para = funding) } # Study area description if (!is.null(studyAreaDescription)) { - project@studyAreaDescription <- studyAreaDescription + project$studyAreaDescription <- studyAreaDescription } # Design description if (!is.null(designDescription)) { - project@designDescription <- designDescription + project$designDescription <- designDescription } # Related Project if (!is.null(relatedProject)) { - project@relatedProject <- relatedProject + project$relatedProject <- relatedProject } project diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 2ed6c64..cc9800c 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -18,7 +18,7 @@ test_that("a contact can be created", { }) test_that("a personnel can be created", { - personnel <- eml_personnel(given_names = "test", sur_name = "user", role = "principalInvestigator") + personnel <- eml_personnel(given_names = "test", sur_name = "user", role = "principalInvestigator", userId = "https://orcid.org/WWWW-XXXX-YYYY-ZZZZ") expect_is(personnel, "personnel") expect_equal(personnel$individualName$givenName, "test") @@ -34,32 +34,30 @@ test_that("a project can be created", { "This is a test abstract", "I won an award, yay") - expect_is(project, "project") - expect_equal(project@title[[1]]@.Data, "some title") - expect_equal(project@personnel[[1]]@individualName[[1]]@givenName[[1]]@.Data, "A") - expect_equal(project@personnel[[1]]@individualName[[1]]@surName@.Data, "User") - expect_equal(project@personnel[[1]]@organizationName[[1]]@.Data, "NCEAS") - expect_equal(project@personnel[[1]]@role[[1]]@.Data, "originator") - expect_equal(xml2::xml_text(project@funding@para[[1]]@.Data[[1]]), "I won an award, yay") + expect_equal(project$title[[1]], "some title") + expect_equal(project$personnel[[1]]$individualName[[1]]$givenName[[1]], "A") + expect_equal(project$personnel[[1]]$individualName[[1]]$surName, "User") + expect_equal(project$personnel[[1]]$organizationName[[1]], "NCEAS") + expect_equal(project$personnel[[1]]$role[[1]], "originator") + expect_equal(project$funding$para[[1]], "I won an award, yay") }) test_that("a project can be created with multiple personnel, an abstract can be created with multiple paragraphs, awards with multiple awards", { test_personnel_1 <- eml_personnel(given_names = "A", sur_name = "User", organization = "NCEAS", role = "originator") - test_personnel_2 <- eml_personnel(given_names = "Testy", sur_name = "Mactesterson", organization = "A Test Org", role = c("user", "author")) + test_personnel_2 <- eml_personnel(given_names = "Testy", sur_name = "Mactesterson", organization = "A Test Org", role = list("user", "author")) project <- eml_project("some title", list(test_personnel_1, test_personnel_2), - c("This is a test abstract", "This is the second paragraph"), - c("I won an award, yay", "I won a second award, wow")) - - expect_is(project, "project") - expect_equal(project@title[[1]]@.Data, "some title") - expect_equal(project@personnel[[2]]@individualName[[1]]@givenName[[1]]@.Data, "Testy") - expect_equal(project@personnel[[2]]@individualName[[1]]@surName@.Data, "Mactesterson") - expect_equal(project@personnel[[2]]@organizationName[[1]]@.Data, "A Test Org") - expect_equal(project@personnel[[2]]@role[[2]]@.Data, "author") - expect_equal(xml2::xml_text(project@abstract@para[[2]]@.Data[[1]]), "This is the second paragraph") - expect_equal(xml2::xml_text(project@funding@para[[2]]@.Data[[1]]), "I won a second award, wow") + list("This is a test abstract", "This is the second paragraph"), + list("I won an award, yay", "I won a second award, wow")) + + expect_equal(project$title[[1]], "some title") + expect_equal(project$personnel[[2]]$individualName[[1]]$givenName[[1]], "Testy") + expect_equal(project$personnel[[2]]$individualName[[1]]$surName, "Mactesterson") + expect_equal(project$personnel[[2]]$organizationName[[1]], "A Test Org") + expect_equal(project$personnel[[2]]$role[[2]], "author") + expect_equal(project$abstract$para[[2]], "This is the second paragraph") + expect_equal(project$funding$para[[2]], "I won a second award, wow") }) test_that("a dataTable and otherEntity can be added from a pid", { From 1f8a5cd0aa0db7c0cb2ff74740dd95be81c9db60 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 4 Jan 2019 14:09:12 -0800 Subject: [PATCH 199/318] refactor two tests --- tests/testthat/test_eml.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index cc9800c..cebbed7 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -122,12 +122,12 @@ test_that("eml_otherEntity_to_dataTable fails gracefully", { expect_error(eml_otherEntity_to_dataTable(eml, "1")) # subscripts out of bounds - expect_error(eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[2]])) + expect_error(eml_otherEntity_to_dataTable(eml, eml$dataset$otherEntity[[2]])) expect_error(eml_otherEntity_to_dataTable(eml, 2)) # Duplicate entityNames found - eml@dataset@otherEntity[[2]] <- eml@dataset@otherEntity[[1]] - expect_error(eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[1]])) + eml$dataset$otherEntity[[2]] <- eml$dataset$otherEntity[[1]] + expect_error(eml_otherEntity_to_dataTable(eml, eml$dataset$otherEntity[[1]])) }) @@ -137,16 +137,16 @@ test_that("eml_otherEntity_to_dataTable fails gracefully", { } eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) - otherEntity <- eml@dataset@otherEntity[[1]] + otherEntity <- eml$dataset$otherEntity[[1]] - eml <- eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[1]]) + eml <- eml_otherEntity_to_dataTable(eml, eml$dataset$otherEntity[[1]]) # test that otherEntity was removed - expect_length(eml@dataset@otherEntity, 0) + expect_length(eml$dataset$otherEntity, 0) # test that dataTable was added - expect_equal(otherEntity@entityName, eml@dataset@dataTable[[1]]@entityName) - expect_equivalent(otherEntity@physical, eml@dataset@dataTable[[1]]@physical) + expect_equal(otherEntity$entityName, eml$dataset$dataTable[[1]]$entityName) + expect_equivalent(otherEntity$physical, eml$dataset$dataTable[[1]]$physical) }) test_that("which_in_eml returns correct locations", { From 7cb3b4c7302ebfc488b0030fe698d67ad9bf7dd9 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 4 Jan 2019 14:51:06 -0800 Subject: [PATCH 200/318] refactor geocov and and address --- R/eml.R | 47 +++++++++++++++-------------------------------- 1 file changed, 15 insertions(+), 32 deletions(-) diff --git a/R/eml.R b/R/eml.R index 539552e..586e64a 100644 --- a/R/eml.R +++ b/R/eml.R @@ -512,6 +512,8 @@ eml_project <- function(title, #' For a single point, the North and South bounding coordinates should be the same and #' the East and West bounding coordinates should be the same. #' +#' Note that EML::set_coverage() provides the same (and more) functionality +#' #' @param description (character) A textual description. #' @param north (numeric) North bounding coordinate. #' @param east (numeric) East bounding coordinate. @@ -520,16 +522,15 @@ eml_project <- function(title, #' #' @return (geographicCoverage) The new geographicCoverage section. #' -#' @export eml_geographic_coverage <- function(description, north, east, south, west) { - cov <- new("geographicCoverage") + cov <- eml$geographicCoverage() - cov@geographicDescription <- description + cov$geographicDescription <- description - cov@boundingCoordinates@northBoundingCoordinate <- new("northBoundingCoordinate", as.character(north)) - cov@boundingCoordinates@eastBoundingCoordinate <- new("eastBoundingCoordinate", as.character(east)) - cov@boundingCoordinates@southBoundingCoordinate <- new("southBoundingCoordinate", as.character(south)) - cov@boundingCoordinates@westBoundingCoordinate <- new("westBoundingCoordinate", as.character(west)) + cov$boundingCoordinates$northBoundingCoordinate <- as.character(north) + cov$boundingCoordinates$eastBoundingCoordinate <- as.character(east) + cov$boundingCoordinates$southBoundingCoordinate <- as.character(south) + cov$boundingCoordinates$westBoundingCoordinate <- as.character(west) cov } @@ -539,6 +540,8 @@ eml_geographic_coverage <- function(description, north, east, south, west) { #' #' A simple way to create an EML address element. #' +#' Note that EML::eml$address() provides the same functionality +#' #' @param delivery_points (character) One or more delivery points. #' @param city (character) City. #' @param administrative_area (character) Administrative area. @@ -546,7 +549,6 @@ eml_geographic_coverage <- function(description, north, east, south, west) { #' #' @return (address) An EML address object. #' -#' @export #' #' @examples #' NCEASadd <- eml_address("735 State St #300", "Santa Barbara", "CA", "93101") @@ -556,32 +558,13 @@ eml_address <- function(delivery_points, city, administrative_area, postal_code) is.character(administrative_area), (is.character(postal_code) || is.numeric(postal_code))) - address <- new("address") - - # Delivery point(s) - dps <- lapply(delivery_points, function(dp) { - x <- new("deliveryPoint") - x@.Data <- dp - x - }) - - # City - ct <- new("city") - ct@.Data <- city - - # Administrative area - aa <- new("administrativeArea") - aa@.Data <- administrative_area + address <- eml$address() - # Postal Code - pc <- new("postalCode") - pc@.Data <- as.character(postal_code) + address$deliveryPoint <- delivery_points + address$city <- city + address$administrativeArea <- administrative_area + address$postalCode <- as.character(postal_code) - # Put them all together - address@deliveryPoint <- new("ListOfdeliveryPoint", dps) - address@city <- ct - address@administrativeArea <- aa - address@postalCode <- pc address } From 86bb7a2c1a72f8db6e70516e43cc2a315d6c5b9d Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 4 Jan 2019 15:19:28 -0800 Subject: [PATCH 201/318] abstract and validate attributes functions --- R/eml.R | 57 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/R/eml.R b/R/eml.R index 586e64a..44dd3c0 100644 --- a/R/eml.R +++ b/R/eml.R @@ -587,20 +587,21 @@ eml_address <- function(delivery_points, city, administrative_area, postal_code) #' @examples #' # Create a new EML document #' library(EML) -#' doc <- new("eml") +#' doc <- eml() #' #' # Set an abstract with a single paragraph -#' set_abstract(doc, c("Test abstract...")) +#' set_abstract(doc, list("Test abstract...")) #' #' # Or one with multiple paragraphs -#' set_abstract(doc, c("First para...", "second para...")) +#' set_abstract(doc, list("First para...", "second para...")) set_abstract <- function(doc, text) { - stopifnot(is(doc, "eml")) + # need to rewrite this test + # stopifnot(is(doc, "eml")) if (length(text) == 1) { - doc@dataset@abstract <- eml_abstract(text) + doc$dataset$abstract <- eml_abstract(text) } else if (length(text) > 1) { - doc@dataset@abstract <- eml_abstract(text) + doc$dataset$abstract <- eml_abstract(text) } doc @@ -611,29 +612,26 @@ set_abstract <- function(doc, text) { #' #' Create an EML abstract. #' +#' Note that eml$abstract() provides the same functionality. +#' #' @param text (character) Paragraphs of text with one paragraph per element in the -#' character vector. +#' character vector, constructed using `list` #' #' @return (abstract) An EML abstract. #' -#' @export #' #' @examples #' # Set an abstract with a single paragraph #' eml_abstract("Test abstract...") #' #' # Or one with multiple paragraphs -#' eml_abstract(c("First para...", "second para...")) +#' eml_abstract(list("First para...", "second para...")) eml_abstract <- function(text) { stopifnot(is.character(text), length(text) > 0, all(nchar(text)) > 0) - if (length(text) == 1) { - abstract <- new("abstract", .Data = new("TextType", .Data = "hi")) - } else if (length(text) > 1) { - abstract <- new("abstract", para = new("ListOfpara", lapply(text, function(x) new("para", x)))) - } + abstract <- eml$abstract(para = text) abstract } @@ -661,13 +659,14 @@ eml_abstract <- function(text) { #' eml_validate_attributes(attributes) #' } eml_validate_attributes <- function(attributes) { - stopifnot(is(attributes, "attributeList")) + # need to rewrite this check + # stopifnot(is(attributes, "attributeList")) # Define an interal applyable function to validate each attribute eml_validate_attribute <- function(attribute) { - stopifnot(is(attribute, "attribute")) + # stopifnot(is(attribute, "attribute")) - doc@dataset@otherEntity[[1]]@attributeList@attribute[[1]] <- attribute + doc$dataset$otherEntity$attributeList$attribute[[1]] <- attribute # Validate! eml_validate(doc) @@ -676,20 +675,20 @@ eml_validate_attributes <- function(attributes) { # Create a minimum valid EML doc we'll re-use each time we validate a single # attribute - doc <- new("eml", packageId = "test", system = " test") - doc@dataset@title <- c(new("title", .Data = "test")) - doc@dataset@creator <- new("ListOfcreator", list(eml_creator("Test", "test"))) - doc@dataset@contact <- new("ListOfcontact", list(eml_contact("Test", "test"))) + # Create a dummy otherEntity with our attributeList - entity <- new("otherEntity", - entityName = "name", - entityType = "type") - entity@attributeList <- new("attributeList") - doc@dataset@otherEntity <- new("ListOfotherEntity", list(entity)) - results <- sapply(attributes@attribute, function(attribute) { - cat(paste0("Validating single attribute '", attribute@attributeName@.Data, "': ")) + doc <- list(packageId = "test", + system = "test", + dataset = eml$dataset( + title = "test", + creator = eml$creator(individualName = eml$individualName(givenName = "test", surName = "test")), + contact = eml$contact(individualName = eml$individualName(givenName = "test", surName = "test")), + otherEntity = eml$otherEntity(entityName = "name", entityType = "otherEntity"))) + + results <- sapply(attributes$attribute, function(attribute) { + cat(paste0("Validating single attribute '", attribute$attributeName, "': ")) result <- NULL result <- tryCatch({ @@ -708,7 +707,7 @@ eml_validate_attributes <- function(attributes) { } }) - names(results) <- sapply(attributes@attribute, function(x) x@attributeName) + names(results) <- sapply(attributes$attribute, function(x) x$attributeName) results } From 09591d0c56ae96477fed2255eeca6961ff2341e8 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 29 Jan 2019 09:00:51 -0800 Subject: [PATCH 202/318] remove function this function is too specific and complex to be useful to most users --- R/eml.R | 142 -------------------------------------------------------- 1 file changed, 142 deletions(-) diff --git a/R/eml.R b/R/eml.R index 44dd3c0..a3d8982 100644 --- a/R/eml.R +++ b/R/eml.R @@ -712,148 +712,6 @@ eml_validate_attributes <- function(attributes) { results } - -#' Add new entity elements to an EML document from a table -#' -#' Add new entity elements to an EML document from a table. -#' -#' @param doc (eml) An EML document. -#' @param entities (data.frame) A data.frame with columns type, path, pid, and -#' format_id. -#' @param resolve_base (character) Optional. Specify a DataONE CN resolve base -#' URI which will be used for serializing download URLs into the EML. Most users -#' should not override the default value. -#' -#' @return (eml) The modified EML document. -#' -#' @export -#' -#' @examples -#' # Create entities from files on disk -#' \dontrun{ -#' types <- c("dataTable") -#' paths <- list.files(., full.names = TRUE) # Get full paths to some files -#' pids <- vapply(paths, function(x) { -#' paste0("urn:uuid:", uuid::UUIDgenerate()) -#' }, "") # Generate some UUID PIDs -#' Try to guess format IDs, you should check this afterwards -#' format_ids <- guess_format_id(paths) -#' -#' entity_df <- data.frame(type = types, -#' path = paths, -#' pid = pids, -#' format_id = format_ids, -#' stringsAsFactors = FALSE) -#' -#' doc <- new("eml") -#' doc <- eml_add_entities(doc, entity_df) -#'} -#' -#' # Read in a CSV containing the info about files on disk -#' \dontrun{ -#' entity_df <- read.csv("./my_entities.csv", stringsAsFactors = FALSE) -#' doc <- new("eml") -#' doc <- eml_add_entities(doc, entity_df) -#' } -eml_add_entities <- function(doc, - entities, - resolve_base="https://cn.dataone.org/cn/v2/resolve/") { - stopifnot(is(doc, "eml")) - - if (!is(entities, "data.frame")) { - stop("The argument 'entities' must be a 'data.frame'.") - } - - if (!identical(sort(names(entities)), c("format_id", "path", "pid", "type"))) { - stop("The columns in the data.frame you passed in for the 'entities' argument did not have the expected column names of type, path, pid, format_id and it must.", call. = FALSE) - } - - entity_types <- c("dataTable", "spatialRaster", "spatialVector", "storedProcedure", "view", "otherEntity") - - if (!all(entities$type %in% entity_types)) { - stop(call. = FALSE, paste0("The `type` column must only include values from: ", paste(entity_types, collapse = ", "), ".")) - } - - # Warn about existing entities - for (type in entity_types) { - if (type %in% entities$type && length(slot(doc@dataset, type)) > 0) { - warning(paste0("You are adding one or more ", type, " elements. This function only adds entities and does not remove/replace them.")) - } - } - - # Internal function to create a single entity - eml_entity <- function(type, path, pid, format_id) { - # Convert args to character vectors if needed - if (is.factor(type)) type <- as.character(type) - if (is.factor(path)) path <- as.character(path) - if (is.factor(pid)) pid <- as.character(pid) - if (is.factor(format_id)) format_id <- as.character(format_id) - - stopifnot(file.exists(path)) - stopifnot(is.character(path), nchar(path) > 0) - stopifnot(is.character(pid), nchar(pid) > 0) - stopifnot(is.character(format_id), nchar(format_id) > 0) - - file_name <- basename(path) - - entity <- new(type) - entity@id <- new("xml_attribute", pid) - entity@scope <- new("xml_attribute", "document") - - entity@entityName <- new("entityName", .Data = file_name) - - if (type == "otherEntity") { - entity@entityType <- "Other" - } - - # otherEntity/physical - physical <- new("physical") - physical@scope <- new("xml_attribute", "document") - physical@objectName <- new("objectName", file_name) - - physical@size <- new("size", format(file.size(path), scientific = FALSE)) - physical@size@unit <- new("xml_attribute", "bytes") - physical@authentication <- new("ListOfauthentication", list(new("authentication", digest::digest(path, algo = "sha1", file = TRUE)))) - physical@authentication[[1]]@method <- new("xml_attribute", "SHA-1") - - physical@dataFormat <- new("dataFormat") - physical@dataFormat@externallyDefinedFormat <- new("externallyDefinedFormat") - physical@dataFormat@externallyDefinedFormat@formatName <- format_id - - physical@distribution <- new("ListOfdistribution", list(new("distribution"))) - physical@distribution[[1]]@scope <- new("xml_attribute", "document") - physical@distribution[[1]]@online <- new("online") - physical@distribution[[1]]@online@url <- new("url", paste0(resolve_base, pid)) - - slot(physical@distribution[[1]]@online@url, "function") <- new("xml_attribute", "download") - - entity@physical <- new("ListOfphysical", list(physical)) - - entity - } - - # Create new entities - new_entities <- lapply(entity_types, function(type) { - lapply(which(entities$type == type), function(i) { - eml_entity(entities[i, "type"], - entities[i,"path"], - entities[i,"pid"], - entities[i,"format_id"]) - }) - }) - - names(new_entities) <- entity_types # Name the list so we can [[ by type - - # Merge new entities into existing - for (type in entity_types) { - slot(doc@dataset, type) <- new(paste0("ListOf", type), c(slot(doc@dataset, type), - new(paste0("ListOf", type), new_entities[[type]]))) - } - - doc -} - - #' Convert otherEntities to dataTables #' #' Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an From 63f83c550fe917ebbd271f282bcbf46608bedc93 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 29 Jan 2019 11:27:26 -0800 Subject: [PATCH 203/318] updates to more functions --- R/eml.R | 114 +++++++++++++++++++++++++------------------------------- 1 file changed, 50 insertions(+), 64 deletions(-) diff --git a/R/eml.R b/R/eml.R index a3d8982..be347f1 100644 --- a/R/eml.R +++ b/R/eml.R @@ -718,9 +718,9 @@ eml_validate_attributes <- function(attributes) { #' otherEntity object as currently constructed - it does not add a physical or add attributes. #' However, if these are already in their respective slots, they will be retained. #' -#' @param eml (S4) An EML S4 object. -#' @param otherEntity (S4 / integer) Either an EML otherEntity object or the index -#' of an otherEntity within a ListOfotherEntity. Integer input is recommended. +#' @param doc (list) An EML document. +#' @param otherEntity (integer/"ALL") Either "ALL", to indicate all otherEntities, or the +#' index of the otherEntities to be transformed. #' @param validate_eml (logical) Optional. Whether or not to validate the EML after #' completion. Setting this to `FALSE` reduces execution time by ~50 percent. #' @@ -732,54 +732,45 @@ eml_validate_attributes <- function(attributes) { #' #' @examples #' \dontrun{ -#' eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) +#' doc <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) #' #' # The following two calls are equivalent: -#' eml <- eml_otherEntity_to_dataTable(eml, eml@@dataset@@otherEntity[[1]]) -#' eml <- eml_otherEntity_to_dataTable(eml, 1) +#' doc <- eml_otherEntity_to_dataTable(doc, doc$dataset$otherEntity[[1]]) +#' doc <- eml_otherEntity_to_dataTable(doc, 1) #' #' # Integer input is recommended: -#' eml <- eml_otherEntity_to_dataTable(eml, 1) +#' doc <- eml_otherEntity_to_dataTable(doc, 1) #' } -eml_otherEntity_to_dataTable <- function(eml, otherEntity, validate_eml = TRUE) { - ## Argument checks - stopifnot(methods::is(eml, "eml")) - stopifnot(any(is.numeric(otherEntity), methods::is(otherEntity, "otherEntity"))) - stopifnot(is.logical(validate_eml)) - - ## Handle different inputs for 'otherEntity' - if (is.numeric(otherEntity)) { - index <- otherEntity - otherEntity <- eml@dataset@otherEntity[[index]] - } else { - index <- which_in_eml(eml@dataset@otherEntity, - "entityName", - otherEntity@entityName) - if (length(index) > 1) { - stop("Duplicate 'entityName' found in 'eml@dataset@otherEntity', please use a numeric index (1, 2, etc.) to specify which 'otherEntity' you would like to convert.") - } +eml_otherEntity_to_dataTable <- function(doc, otherEntity_index = "ALL", validate_eml = TRUE) { + stopifnot(methods::is(doc, "emld")) + stopifnot(is.logical(eml_validate(doc))) + stopifnot(is.numeric(otherEntity_index) | otherEntity_index == "ALL") + + if (otherEntity_index == "ALL"){ + index <- seq(1:length(doc$dataset$otherEntity)) } + else index = otherEntity_index - ## convert otherEntity to dataTable - dt <- utils::capture.output(otherEntity) %>% - stringr::str_trim() %>% - stringr::str_replace_all("otherEntity", "dataTable") %>% - paste(sep = "", collapse = "") %>% - EML::read_eml() + otherEntity <- doc$dataset$otherEntity[index] + + ## Set entity type to NULL for converted OEs + for (i in 1:length(index)){ + otherEntity[[i]]$entityType <- NULL + } ## Add dt to bottom of dt list - type <- "dataTable" - slot(eml@dataset, type) <- new(paste0("ListOf", type), c(slot(eml@dataset, type), - new(paste0("ListOf", type), list(dt)))) + dts <- doc$dataset$dataTable + + doc$dataset$dataTable <- c(dts, otherEntity) ## delete otherEntity from list - eml@dataset@otherEntity[[index]] <- NULL + doc$dataset$otherEntity[index] <- NULL ## return eml if (validate_eml == TRUE) { - eml_validate(eml) + eml_validate(doc) } - return(eml) + return(doc) } @@ -788,7 +779,7 @@ eml_otherEntity_to_dataTable <- function(eml, otherEntity, validate_eml = TRUE) #' This function returns indices within an EML list that contain an instance where #' `test == TRUE`. See examples for more information. #' -#' @param eml_list (S4/List) An EML list object. +#' @param doc (list) An EML object. #' @param element (character) Element to evaluate. #' @param test (function/character) A function to evaluate (see examples). If test is a character, #' will evaluate if \code{element == test} (see example 1). @@ -802,25 +793,24 @@ eml_otherEntity_to_dataTable <- function(eml, otherEntity, validate_eml = TRUE) #' @examples #' \dontrun{ #' # Question: Which creators have a surName "Smith"? -#' n <- which_in_eml(eml@@dataset@@creator, "surName", "Smith") -#' # Answer: eml@@dataset@@creator[n] +#' n <- which_in_eml(eml$dataset$creator, "surName", "Smith") +#' # Answer: eml$dataset$creator[n] #' #' # Question: Which dataTables have an entityName that begins with "2016" -#' n <- which_in_eml(eml@@dataset@@dataTable, "entityName", function(x) {grepl("^2016", x)}) -#' # Answer: eml@@dataset@@dataTable[n] +#' n <- which_in_eml(eml$dataset$dataTable, "entityName", function(x) {grepl("^2016", x)}) +#' # Answer: eml$dataset$dataTable[n] #' #' # Question: Which attributes in dataTable[[1]] have a numberType "natural"? -#' n <- which_in_eml(eml@@dataset@@dataTable[[1]]@@attributeList@@attribute, "numberType", "natural") -#' # Answer: eml@@dataset@@dataTable[[1]]@@attributeList@@attribute[n] +#' n <- which_in_eml(eml$dataset$dataTable[[1]]$attributeList$attribute, "numberType", "natural") +#' # Answer: eml$dataset$dataTable[[1]]$attributeList$attribute[n] #' #' #' # Question: Which dataTables have at least one attribute with a numberType "natural"? -#' n <- which_in_eml(eml@@dataset@@dataTable, "numberType", function(x) {"natural" %in% x}) -#' # Answer: eml@@dataset@@dataTable[n] +#' n <- which_in_eml(eml$dataset$dataTable, "numberType", function(x) {"natural" %in% x}) +#' # Answer: eml$dataset$dataTable[n] #' } -which_in_eml <- function(eml_list, element, test) { +which_in_eml <- function(doc, element, test) { - stopifnot(isS4(eml_list)) - stopifnot(methods::is(eml_list,"list")) + stopifnot(methods::is(doc, "emld")) stopifnot(is.character(element)) if (is.character(test)) { @@ -831,28 +821,24 @@ which_in_eml <- function(eml_list, element, test) { stopifnot(is.function(test)) } - # Find location - location <- unlist(lapply(seq_along(eml_list), function(i) { - elements_test <- unlist(EML::eml_get(eml_list[[i]], element)) + elements_test <- eml_get(eml_list, element) - if (is.null(elements_test)) { - out <- NULL + if (is.null(elements_test)) { + location <- NULL - } else { - result <- test(elements_test) + } else { + result <- test(elements_test) - if (length(result) > 1) { - stop("Test must only return one value.") + if (length(isTRUE(result)) > 1) { + stop("Test must only return one value.") - } else if (result == TRUE) { - out <- i + } else if (length(isTRUE(result)) == 1){ + location <- which(result == TRUE) - } else { - out <- NULL - } + } else { + location <- NULL } - return(out) - })) + } return(location) } From 027c0aa8e61f7086d9e194c6baf7d8351d4709e3 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 29 Jan 2019 11:27:35 -0800 Subject: [PATCH 204/318] rewriting tests --- tests/testthat/test_eml.R | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index cebbed7..ad7b863 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -193,41 +193,40 @@ test_that("which_in_eml returns correct locations", { attributeList <- EML::set_attributes(attributes) - dataTable_1 <- new("dataTable", + dataTable_1 <- eml$dataTable( entityName = "2016_data.csv", entityDescription = "2016 data", attributeList = attributeList) dataTable_2 <- dataTable_1 - dataTable_3 <- new("dataTable", + dataTable_3 <- eml$dataTable( entityName = "2015_data.csv", entityDescription = "2016 data", attributeList = attributeList) - creator_1 <- new("creator", - individualName = new("individualName", + creator_1 <- eml$creator( + individualName = eml$individualName( surName = "LAST", givenName = "FIRST")) - creator_2 <- new("creator", - individualName = new("individualName", + creator_2 <- eml$creator( + individualName = eml$individualName( surName = "LAST", givenName = "FIRST_2")) creator_3 <- creator_2 title <- "Title" - dataset <- new("dataset", + dataset <- eml$dataset( title = title, - creator = c(creator_1, creator_2, creator_3), - dataTable = c(dataTable_1, dataTable_2, dataTable_3)) + creator = list(creator_1, creator_2, creator_3), + dataTable = list(dataTable_1, dataTable_2, dataTable_3)) - eml <- new("eml", - dataset = dataset) + doc <- list(dataset = dataset) - expect_equal(c(2,3), which_in_eml(eml@dataset@creator, "givenName", "FIRST_2")) - expect_error(which_in_eml(eml@dataset@dataTable, "attributeName", "length_3")) - expect_equal(c(1,3), which_in_eml(eml@dataset@dataTable[[1]]@attributeList@attribute, "attributeName", function(x) {grepl("^length", x)})) + expect_equal(c(2,3), which_in_eml(doc$dataset$creator, "givenName", "FIRST_2")) + expect_error(which_in_eml(doc$dataset$dataTable, "attributeName", "length_3")) + expect_equal(c(1,3), which_in_eml(doc$dataset$dataTable[[1]]$attribute, "attributeName", function(x) {grepl("^length", x)})) }) test_that('eml_set_reference sets a reference', { From 1c1b759f753e3adaaf945bfb3a3e1bbf78569d18 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 29 Jan 2019 12:45:19 -0800 Subject: [PATCH 205/318] publish_update will update EML with new pids --- R/editing.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R/editing.R b/R/editing.R index be1647b..0764d7b 100644 --- a/R/editing.R +++ b/R/editing.R @@ -394,15 +394,15 @@ publish_update <- function(mn, if (is.null(metadata_path)) { # Get the metadata doc message("Getting metadata from the MN.") - eml <- EML::read_eml(rawToChar(dataone::getObject(mn, metadata_pid)), asText = TRUE) + doc <- EML::read_eml(rawToChar(dataone::getObject(mn, metadata_pid)), asText = TRUE) - } else if (class(metadata_path) == "eml") { + } else if (class(metadata_path)[1] == "emld") { # If an eml object is provided, use it directly after validating if (!eml_validate(metadata_path)) { stop("The EML object is not valid.") } - eml <- metadata_path + doc <- metadata_path } else { # Alternatively, read an edited metadata file from disk if provided @@ -411,7 +411,7 @@ publish_update <- function(mn, } message(paste0("Getting metadata from the path: ", metadata_path, ".")) - eml <- EML::read_eml(metadata_path) + doc <- EML::read_eml(metadata_path) } # get the metadata sysmeta from the node @@ -440,22 +440,22 @@ publish_update <- function(mn, # Update the metadata # Replace packageId - eml@packageId <- new("xml_attribute", metadata_updated_pid) + doc$packageId <- metadata_updated_pid # Replace system if needed - if (eml@system != "https://arcticdata.io") { - eml@system <- new("xml_attribute", "https://arcticdata.io") + if (doc$system != "https://search.dataone.org") { + doc$system <- "https://search.dataone.org" } # Replace access if needed - if (length(eml@access@allow) & (!is.null(metadata_path))) { - eml@access <- new("access") + if (length(doc$access$allow) & (!is.null(metadata_path))) { + doc$access <- eml$access() } # Write out the document to disk. We do this in part because # set_other_entities takes a path to the doc. eml_path <- tempfile() - EML::write_eml(eml, eml_path) + EML::write_eml(doc, eml_path) # Create System Metadata for the updated EML file metadata_updated_sysmeta <- new("SystemMetadata", From 0578ff6d51c5a9d87b5bc575edb6adbdb7b18e09 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 29 Jan 2019 13:11:56 -0800 Subject: [PATCH 206/318] minor update to docs --- R/eml.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/eml.R b/R/eml.R index be347f1..9e8ef66 100644 --- a/R/eml.R +++ b/R/eml.R @@ -7,7 +7,7 @@ #' @param pid (character) The PID of the object to create the sub-tree for. #' @param entityType (character) What kind of objects to create from the input. One of "dataTable", #' "spatialRaster", "spatialVector", "storedProcedure", "view", or "otherEntity". -#' @param ... (optional) Additional arguments to be passed to \code{new(entityType, ...)}. +#' @param ... (optional) Additional arguments to be passed to \code{eml$entityType())}. #' #' @return (list) The entity object. #' @@ -77,7 +77,7 @@ pid_to_eml_entity <- function(mn, #' @param mn (MNode) Member Node where the PID is associated with an object. #' @param pids (character) The PID of the object to create the sub-tree for. #' -#' @return (list) A list of otherEntity object(s). +#' @return (list) A list of physical object(s). #' #' @export #' From 314be5557154d4e14c52241be5d6ba3696813ad6 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 29 Jan 2019 16:25:40 -0800 Subject: [PATCH 207/318] only change the system if none is given --- R/editing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/editing.R b/R/editing.R index 0764d7b..8556756 100644 --- a/R/editing.R +++ b/R/editing.R @@ -443,7 +443,7 @@ publish_update <- function(mn, doc$packageId <- metadata_updated_pid # Replace system if needed - if (doc$system != "https://search.dataone.org") { + if (is.null(doc$system)) { doc$system <- "https://search.dataone.org" } From 54c191d3a3b5511e3ae9a67a4cd8afcc2443b04f Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 29 Jan 2019 16:26:31 -0800 Subject: [PATCH 208/318] fix argument names and remove unncessary constructor calls --- R/eml.R | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/R/eml.R b/R/eml.R index 9e8ef66..7cd0eaf 100644 --- a/R/eml.R +++ b/R/eml.R @@ -120,21 +120,21 @@ pid_to_eml_physical <- function(mn, pid) { sysmeta_to_eml_physical <- function(sysmeta) { stopifnot(is(sysmeta, "SystemMetadata")) - if (is.na(x@fileName)) { + if (is.na(sysmeta@fileName)) { ob_name <- "NA" } else { - ob_name <- x@fileName + ob_name <- sysmeta@fileName } phys <- set_physical(objectName = ob_name, - size = format(x@size, scientific = FALSE), + size = format(sysmeta@size, scientific = FALSE), sizeUnit = "bytes", - authentication = x@checksum, - authMethod = x@checksumAlgorithm, - url = paste0("https://cn.dataone.org/cn/v2/resolve/", x@identifier)) + authentication = sysmeta@checksum, + authMethod = sysmeta@checksumAlgorithm, + url = paste0("https://cn.dataone.org/cn/v2/resolve/", sysmeta@identifier)) - phys$dataFormat <- eml$dataFormat(externallyDefinedFormat = list(formatName = x@formatId)) + phys$dataFormat <- eml$dataFormat(externallyDefinedFormat = list(formatName = sysmeta@formatId)) phys } @@ -230,11 +230,10 @@ eml_party <- function(type="associatedParty", "You must specify at least one of sur_name, organization, or position to make a valid creator") } - party <- eml[[type]]() # Individual Name if (!is.null(sur_name)) { - party$individualName <- list(eml_individual_name(given_names, sur_name)) + party$individualName <- eml_individual_name(given_names, sur_name) } # Organization Name @@ -255,7 +254,7 @@ eml_party <- function(type="associatedParty", # Address if (!is.null(address)) { # This crams the entire address into the delivery point...not ideal - party$address <- eml$address(address) + party$address <- address } # Phone @@ -270,7 +269,6 @@ eml_party <- function(type="associatedParty", warning(paste0("The provided `userId` of '", userId, "' does not look like an ORCID and the `userId` argument assumes the given `userId` is an ORCID. ORCIDs should be passed in like https://orcid.org/WWWW-XXXX-YYYY-ZZZZ.")) } - party$userId <- eml$userId() party$userId$userId <- userId party$userId$directory = "https://orcid.org" } @@ -587,7 +585,7 @@ eml_address <- function(delivery_points, city, administrative_area, postal_code) #' @examples #' # Create a new EML document #' library(EML) -#' doc <- eml() +#' doc <- list() #' #' # Set an abstract with a single paragraph #' set_abstract(doc, list("Test abstract...")) @@ -599,9 +597,9 @@ set_abstract <- function(doc, text) { # stopifnot(is(doc, "eml")) if (length(text) == 1) { - doc$dataset$abstract <- eml_abstract(text) + doc$dataset$abstract <- eml$abstract(text) } else if (length(text) > 1) { - doc$dataset$abstract <- eml_abstract(text) + doc$dataset$abstract <- eml$abstract(text) } doc @@ -719,8 +717,7 @@ eml_validate_attributes <- function(attributes) { #' However, if these are already in their respective slots, they will be retained. #' #' @param doc (list) An EML document. -#' @param otherEntity (integer/"ALL") Either "ALL", to indicate all otherEntities, or the -#' index of the otherEntities to be transformed. +#' @param otherEntity (integer/) The indicies of the otherEntities to be transformed. #' @param validate_eml (logical) Optional. Whether or not to validate the EML after #' completion. Setting this to `FALSE` reduces execution time by ~50 percent. #' @@ -741,15 +738,10 @@ eml_validate_attributes <- function(attributes) { #' # Integer input is recommended: #' doc <- eml_otherEntity_to_dataTable(doc, 1) #' } -eml_otherEntity_to_dataTable <- function(doc, otherEntity_index = "ALL", validate_eml = TRUE) { +eml_otherEntity_to_dataTable <- function(doc, index, validate_eml = TRUE) { stopifnot(methods::is(doc, "emld")) stopifnot(is.logical(eml_validate(doc))) - stopifnot(is.numeric(otherEntity_index) | otherEntity_index == "ALL") - - if (otherEntity_index == "ALL"){ - index <- seq(1:length(doc$dataset$otherEntity)) - } - else index = otherEntity_index + stopifnot(is.numeric(index)) otherEntity <- doc$dataset$otherEntity[index] @@ -760,8 +752,14 @@ eml_otherEntity_to_dataTable <- function(doc, otherEntity_index = "ALL", validat ## Add dt to bottom of dt list dts <- doc$dataset$dataTable + if (length(dts > 0)){ + doc$dataset$dataTable <- list(dts, otherEntity) + } + else{ + doc$dataset$dataTable <- otherEntity + } + - doc$dataset$dataTable <- c(dts, otherEntity) ## delete otherEntity from list doc$dataset$otherEntity[index] <- NULL From b8fe9f739cde4da74137952c7c07c483cc6bee97 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 29 Jan 2019 16:27:29 -0800 Subject: [PATCH 209/318] documentation updates --- DESCRIPTION | 2 +- NAMESPACE | 6 ---- man/add_methods_step.Rd | 28 --------------- man/clear_methods.Rd | 23 ------------- man/eml_abstract.Rd | 7 ++-- man/eml_add_entities.Rd | 53 ----------------------------- man/eml_address.Rd | 3 ++ man/eml_geographic_coverage.Rd | 2 ++ man/eml_otherEntity_to_dataTable.Rd | 17 +++++---- man/eml_party.Rd | 4 +-- man/eml_project.Rd | 12 +++---- man/list_submissions.Rd | 36 +++++++++++++++++++- man/pid_to_eml_entity.Rd | 6 ++-- man/pid_to_eml_physical.Rd | 9 +++-- man/set_abstract.Rd | 6 ++-- man/sysmeta_to_eml_physical.Rd | 7 ++-- man/which_in_eml.Rd | 20 +++++------ tests/testthat/test_eml.R | 28 +++++++-------- 18 files changed, 98 insertions(+), 171 deletions(-) delete mode 100644 man/add_methods_step.Rd delete mode 100644 man/clear_methods.Rd delete mode 100644 man/eml_add_entities.Rd diff --git a/DESCRIPTION b/DESCRIPTION index f64a852..35beec3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -46,6 +46,6 @@ Suggests: testthat, xslt, yaml -RoxygenNote: 6.1.0 +RoxygenNote: 6.1.1 Roxygen: list(markdown = TRUE) VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 8e3c3b2..69ba3d3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,5 @@ # Generated by roxygen2: do not edit by hand -export(add_methods_step) -export(clear_methods) export(convert_iso_to_eml) export(create_dummy_attributes_dataframe) export(create_dummy_enumeratedDomain_dataframe) @@ -11,13 +9,9 @@ export(create_dummy_package) export(create_dummy_package_full) export(create_dummy_parent_package) export(create_resource_map) -export(eml_abstract) -export(eml_add_entities) -export(eml_address) export(eml_associated_party) export(eml_contact) export(eml_creator) -export(eml_geographic_coverage) export(eml_individual_name) export(eml_metadata_provider) export(eml_otherEntity_to_dataTable) diff --git a/man/add_methods_step.Rd b/man/add_methods_step.Rd deleted file mode 100644 index 3db34c1..0000000 --- a/man/add_methods_step.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{add_methods_step} -\alias{add_methods_step} -\title{Add a methods step} -\usage{ -add_methods_step(doc, title, description) -} -\arguments{ -\item{doc}{(eml) The EML document to add the method step to.} - -\item{title}{(character) The title of the method step.} - -\item{description}{(character) The description of the method.} -} -\value{ -(eml) The modified EML document. -} -\description{ -Add a methods step to an EML document. -} -\examples{ -\dontrun{ -eml <- read_eml("~/Documents/metadata.xml") -eml <- add_methods_step(eml, "Field Sampling", "Samples were -collected using a niskin water sampler.") -} -} diff --git a/man/clear_methods.Rd b/man/clear_methods.Rd deleted file mode 100644 index 2dc4b08..0000000 --- a/man/clear_methods.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{clear_methods} -\alias{clear_methods} -\title{Clear all methods} -\usage{ -clear_methods(doc) -} -\arguments{ -\item{doc}{(eml) The document to clear methods from.} -} -\value{ -(eml) The modified EML document. -} -\description{ -Clear all methods from an EML document. -} -\examples{ -\dontrun{ -eml <- read_eml("~/Documents/metadata.xml") -eml <- clear_methods(eml) -} -} diff --git a/man/eml_abstract.Rd b/man/eml_abstract.Rd index e77b71e..cc49f92 100644 --- a/man/eml_abstract.Rd +++ b/man/eml_abstract.Rd @@ -8,7 +8,7 @@ eml_abstract(text) } \arguments{ \item{text}{(character) Paragraphs of text with one paragraph per element in the -character vector.} +character vector, constructed using \code{list}} } \value{ (abstract) An EML abstract. @@ -16,10 +16,13 @@ character vector.} \description{ Create an EML abstract. } +\details{ +Note that eml$abstract() provides the same functionality. +} \examples{ # Set an abstract with a single paragraph eml_abstract("Test abstract...") # Or one with multiple paragraphs -eml_abstract(c("First para...", "second para...")) +eml_abstract(list("First para...", "second para...")) } diff --git a/man/eml_add_entities.Rd b/man/eml_add_entities.Rd deleted file mode 100644 index 227c182..0000000 --- a/man/eml_add_entities.Rd +++ /dev/null @@ -1,53 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{eml_add_entities} -\alias{eml_add_entities} -\title{Add new entity elements to an EML document from a table} -\usage{ -eml_add_entities(doc, entities, - resolve_base = "https://cn.dataone.org/cn/v2/resolve/") -} -\arguments{ -\item{doc}{(eml) An EML document.} - -\item{entities}{(data.frame) A data.frame with columns type, path, pid, and -format_id.} - -\item{resolve_base}{(character) Optional. Specify a DataONE CN resolve base -URI which will be used for serializing download URLs into the EML. Most users -should not override the default value.} -} -\value{ -(eml) The modified EML document. -} -\description{ -Add new entity elements to an EML document from a table. -} -\examples{ -# Create entities from files on disk -\dontrun{ - types <- c("dataTable") - paths <- list.files(., full.names = TRUE) # Get full paths to some files - pids <- vapply(paths, function(x) { - paste0("urn:uuid:", uuid::UUIDgenerate()) - }, "") # Generate some UUID PIDs -Try to guess format IDs, you should check this afterwards - format_ids <- guess_format_id(paths) - - entity_df <- data.frame(type = types, - path = paths, - pid = pids, - format_id = format_ids, - stringsAsFactors = FALSE) - - doc <- new("eml") - doc <- eml_add_entities(doc, entity_df) -} - -# Read in a CSV containing the info about files on disk -\dontrun{ - entity_df <- read.csv("./my_entities.csv", stringsAsFactors = FALSE) - doc <- new("eml") - doc <- eml_add_entities(doc, entity_df) -} -} diff --git a/man/eml_address.Rd b/man/eml_address.Rd index b10652a..0dbf1f5 100644 --- a/man/eml_address.Rd +++ b/man/eml_address.Rd @@ -21,6 +21,9 @@ eml_address(delivery_points, city, administrative_area, postal_code) \description{ A simple way to create an EML address element. } +\details{ +Note that EML::eml$address() provides the same functionality +} \examples{ NCEASadd <- eml_address("735 State St #300", "Santa Barbara", "CA", "93101") } diff --git a/man/eml_geographic_coverage.Rd b/man/eml_geographic_coverage.Rd index b44aafa..efe825f 100644 --- a/man/eml_geographic_coverage.Rd +++ b/man/eml_geographic_coverage.Rd @@ -27,4 +27,6 @@ A simple way to create an EML geographicCoverage section. For a bounding box, all coordinates should be unique. For a single point, the North and South bounding coordinates should be the same and the East and West bounding coordinates should be the same. + +Note that EML::set_coverage() provides the same (and more) functionality } diff --git a/man/eml_otherEntity_to_dataTable.Rd b/man/eml_otherEntity_to_dataTable.Rd index 875886a..10cc7fa 100644 --- a/man/eml_otherEntity_to_dataTable.Rd +++ b/man/eml_otherEntity_to_dataTable.Rd @@ -4,16 +4,15 @@ \alias{eml_otherEntity_to_dataTable} \title{Convert otherEntities to dataTables} \usage{ -eml_otherEntity_to_dataTable(eml, otherEntity, validate_eml = TRUE) +eml_otherEntity_to_dataTable(doc, index, validate_eml = TRUE) } \arguments{ -\item{eml}{(S4) An EML S4 object.} - -\item{otherEntity}{(S4 / integer) Either an EML otherEntity object or the index -of an otherEntity within a ListOfotherEntity. Integer input is recommended.} +\item{doc}{(list) An EML document.} \item{validate_eml}{(logical) Optional. Whether or not to validate the EML after completion. Setting this to \code{FALSE} reduces execution time by ~50 percent.} + +\item{otherEntity}{(integer/) The indicies of the otherEntities to be transformed.} } \description{ Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an @@ -22,14 +21,14 @@ However, if these are already in their respective slots, they will be retained. } \examples{ \dontrun{ -eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) +doc <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) # The following two calls are equivalent: -eml <- eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[1]]) -eml <- eml_otherEntity_to_dataTable(eml, 1) +doc <- eml_otherEntity_to_dataTable(doc, doc$dataset$otherEntity[[1]]) +doc <- eml_otherEntity_to_dataTable(doc, 1) # Integer input is recommended: -eml <- eml_otherEntity_to_dataTable(eml, 1) +doc <- eml_otherEntity_to_dataTable(doc, 1) } } \author{ diff --git a/man/eml_party.Rd b/man/eml_party.Rd index bc8cef3..67a0af7 100644 --- a/man/eml_party.Rd +++ b/man/eml_party.Rd @@ -44,7 +44,7 @@ The \code{userId} argument assumes an ORCID so be sure to adjust for that. \dontrun{ eml_party("creator", "Test", "User") eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") -eml_party("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), - c("Data Scientist", "Programmer")) +eml_party("creator", list("Dominic", "'Dom'"), "Mullen", list("NCEAS", "UCSB"), + list("Data Scientist", "Programmer")) } } diff --git a/man/eml_project.Rd b/man/eml_project.Rd index 1a0831e..c8a7d38 100644 --- a/man/eml_project.Rd +++ b/man/eml_project.Rd @@ -9,15 +9,15 @@ eml_project(title, personnelList, abstract = NULL, funding = NULL, relatedProject = NULL) } \arguments{ -\item{title}{(character) Title of the project (Required). May have multiple titles.} +\item{title}{(character) Title of the project (Required). May have multiple titles constructed using \code{list}.} \item{personnelList}{(list of personnel) Personnel involved with the project.} -\item{abstract}{(character) Project abstract. Can pass as a character vector +\item{abstract}{(character) Project abstract. Can pass as a list for separate paragraphs.} \item{funding}{(character) Funding sources for the project such as grant and -contract numbers. Can pass as a character vector for separate paragraphs.} +contract numbers. Can pass as a list for separate paragraphs.} \item{studyAreaDescription}{(studyAreaDescription)} @@ -37,8 +37,8 @@ fully fleshed out. Need to pass these objects in directly if you want to use them. } \examples{ -proj <- eml_project(c("Some title", "A second title if needed"), - c(eml_personnel("Bryce", "Mecum", role = "principalInvestigator")), - c("Abstract paragraph 1", "Abstract paragraph 2"), +proj <- eml_project(list("Some title", "A second title if needed"), + list(eml_personnel("Bryce", "Mecum", role = "principalInvestigator")), + list("Abstract paragraph 1", "Abstract paragraph 2"), "Funding Agency: Award Number 12345") } diff --git a/man/list_submissions.Rd b/man/list_submissions.Rd index 84145ae..1558a77 100644 --- a/man/list_submissions.Rd +++ b/man/list_submissions.Rd @@ -2,13 +2,28 @@ % Please edit documentation in R/helpers.R \name{list_submissions} \alias{list_submissions} -\title{List recent submissions to a DataOne Member Node} +\title{List recent submissions to a DataONE Member Node} \usage{ +list_submissions(mn, from = Sys.Date(), to = Sys.Date(), + formatType = "*", + whitelist = "https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org") + list_submissions(mn, from = Sys.Date(), to = Sys.Date(), formatType = "*", whitelist = "https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org") } \arguments{ +\item{mn}{(MNode) A DataONE Member Node.} + +\item{from}{(character) The date at which the query begins in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()}.} + +\item{to}{(character) The date at which the query ends in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()}.} + +\item{formatType}{(character) The format of objects to query. Must be one of: RESOURCE, METADATA, DATA, or *.} + +\item{whitelist}{(character) An xml list of admin orcid Identifiers. +Defaults to https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org.} + \item{mn}{(MNode) A DataOne Member Node} \item{from}{(character) the date at which the query begins in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()}} @@ -19,7 +34,13 @@ list_submissions(mn, from = Sys.Date(), to = Sys.Date(), \item{whitelist}{(character) An xml list of admin orcid Identifiers. Defaults to https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org} } +\value{ +(data.frame) +} \description{ +List recent submissions to a DataONE Member Node from all submitters not present +in the administrator whitelist: \url{https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org} + List recent submissions to a DataOne Member Node from all submitters not present in the administrator whitelist: https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org } @@ -28,6 +49,17 @@ in the administrator whitelist: https://cn.dataone.org/cn/v2/accounts/CN=arctic- cn <- dataone::CNode('PROD') adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +# Return all submitted objects in the past month for the 'adc' node: +View(list_submissions(adc, Sys.Date() \%m+\% months(-1), Sys.Date(), '*')) + +# Return all submitted objects except for one user +View(list_submissions(adc, Sys.Date() \%m+\% months(-1), Sys.Date(), '*'), + whitelist = 'http://orcid.org/0000-0002-2561-5840') +} +\dontrun{ +cn <- dataone::CNode('PROD') +adc <- dataone::getMNode(cn,'urn:node:ARCTIC') + View(arcticdatautils::list_submissions(adc, '2018-10-01', '2018-10-07')) # Return all submitted objects in the past month for the 'adc' node: @@ -42,5 +74,7 @@ View(list_submissions(adc, Sys.Date() \%m+\% months(-1), Sys.Date(), '*'), } } \author{ +Dominic Mullen dmullen17@gmail.com + Dominic Mullen dmullen17@gmail.com } diff --git a/man/pid_to_eml_entity.Rd b/man/pid_to_eml_entity.Rd index 252ec24..0eff471 100644 --- a/man/pid_to_eml_entity.Rd +++ b/man/pid_to_eml_entity.Rd @@ -4,17 +4,17 @@ \alias{pid_to_eml_entity} \title{Create EML entity from a DataONE PID} \usage{ -pid_to_eml_entity(mn, pid, entityType = "otherEntity", ...) +pid_to_eml_entity(mn, pid, entity_type = "otherEntity", ...) } \arguments{ \item{mn}{(MNode) Member Node where the PID is associated with an object.} \item{pid}{(character) The PID of the object to create the sub-tree for.} +\item{...}{(optional) Additional arguments to be passed to \code{eml$entityType())}.} + \item{entityType}{(character) What kind of objects to create from the input. One of "dataTable", "spatialRaster", "spatialVector", "storedProcedure", "view", or "otherEntity".} - -\item{...}{(optional) Additional arguments to be passed to \code{new(entityType, ...)}.} } \value{ (list) The entity object. diff --git a/man/pid_to_eml_physical.Rd b/man/pid_to_eml_physical.Rd index 5ab4946..1389c94 100644 --- a/man/pid_to_eml_physical.Rd +++ b/man/pid_to_eml_physical.Rd @@ -4,7 +4,7 @@ \alias{pid_to_eml_physical} \title{Create EML physical objects for the given set of PIDs} \usage{ -pid_to_eml_physical(mn, pids) +pid_to_eml_physical(mn, pid) } \arguments{ \item{mn}{(MNode) Member Node where the PID is associated with an object.} @@ -12,7 +12,7 @@ pid_to_eml_physical(mn, pids) \item{pids}{(character) The PID of the object to create the sub-tree for.} } \value{ -(list) A list of otherEntity object(s). +(list) A list of physical object(s). } \description{ This is a wrapper around \code{\link[=sysmeta_to_eml_physical]{sysmeta_to_eml_physical()}} which handles the task of @@ -20,8 +20,7 @@ creating the EML physical. } \examples{ \dontrun{ -# Generate EML physical objects for all the data in a package -pkg <- get_package(mn, pid) -pid_to_eml_physical(mn, pkg$data) +# Generate EML physical sections for an object in a data package +pid_to_eml_physical(mn, pid) } } diff --git a/man/set_abstract.Rd b/man/set_abstract.Rd index cb30b5b..ea5569d 100644 --- a/man/set_abstract.Rd +++ b/man/set_abstract.Rd @@ -23,11 +23,11 @@ Set the abstract for an EML document. \examples{ # Create a new EML document library(EML) -doc <- new("eml") +doc <- list() # Set an abstract with a single paragraph -set_abstract(doc, c("Test abstract...")) +set_abstract(doc, list("Test abstract...")) # Or one with multiple paragraphs -set_abstract(doc, c("First para...", "second para...")) +set_abstract(doc, list("First para...", "second para...")) } diff --git a/man/sysmeta_to_eml_physical.Rd b/man/sysmeta_to_eml_physical.Rd index 61633da..ea6bd08 100644 --- a/man/sysmeta_to_eml_physical.Rd +++ b/man/sysmeta_to_eml_physical.Rd @@ -19,11 +19,8 @@ of the DataONE v2 resolve service for the PID. } \examples{ \dontrun{ -# Generate EML physical objects for all the data in a package -pkg <- get_package(mn, pid) -sm <- lapply(pkg$data, function(pid) { - getSystemMetadata(mn, pid) -}) +# Generate EML physical object from a system metadata object +sm <- getSystemMetadata(mn, pid) sysmeta_to_eml_physical(sm) } } diff --git a/man/which_in_eml.Rd b/man/which_in_eml.Rd index d1efe04..85ab3d2 100644 --- a/man/which_in_eml.Rd +++ b/man/which_in_eml.Rd @@ -4,10 +4,10 @@ \alias{which_in_eml} \title{Search through EMLs} \usage{ -which_in_eml(eml_list, element, test) +which_in_eml(doc, element, test) } \arguments{ -\item{eml_list}{(S4/List) An EML list object.} +\item{doc}{(list) An EML object.} \item{element}{(character) Element to evaluate.} @@ -21,20 +21,20 @@ This function returns indices within an EML list that contain an instance where \examples{ \dontrun{ # Question: Which creators have a surName "Smith"? -n <- which_in_eml(eml@dataset@creator, "surName", "Smith") -# Answer: eml@dataset@creator[n] +n <- which_in_eml(eml$dataset$creator, "surName", "Smith") +# Answer: eml$dataset$creator[n] # Question: Which dataTables have an entityName that begins with "2016" -n <- which_in_eml(eml@dataset@dataTable, "entityName", function(x) {grepl("^2016", x)}) -# Answer: eml@dataset@dataTable[n] +n <- which_in_eml(eml$dataset$dataTable, "entityName", function(x) {grepl("^2016", x)}) +# Answer: eml$dataset$dataTable[n] # Question: Which attributes in dataTable[[1]] have a numberType "natural"? -n <- which_in_eml(eml@dataset@dataTable[[1]]@attributeList@attribute, "numberType", "natural") -# Answer: eml@dataset@dataTable[[1]]@attributeList@attribute[n] +n <- which_in_eml(eml$dataset$dataTable[[1]]$attributeList$attribute, "numberType", "natural") +# Answer: eml$dataset$dataTable[[1]]$attributeList$attribute[n] #' # Question: Which dataTables have at least one attribute with a numberType "natural"? -n <- which_in_eml(eml@dataset@dataTable, "numberType", function(x) {"natural" \%in\% x}) -# Answer: eml@dataset@dataTable[n] +n <- which_in_eml(eml$dataset$dataTable, "numberType", function(x) {"natural" \%in\% x}) +# Answer: eml$dataset$dataTable[n] } } \author{ diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index ad7b863..3f8a023 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -3,9 +3,9 @@ context("EML") mn <- env_load()$mn test_that("a creator can be created", { - creator <- eml_creator("test", "user") + creator <- eml_creator("tester", "user") - expect_equal(creator$individualName$givenName, "test") + expect_equal(creator$individualName$givenName, "tester") expect_equal(creator$individualName$surName, "user") }) @@ -115,19 +115,19 @@ test_that("eml_otherEntity_to_dataTable fails gracefully", { skip("No token set. Skipping test.") } - eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) + doc <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) # incorrect inputs expect_error(eml_otherEntity_to_dataTable("dummy input")) - expect_error(eml_otherEntity_to_dataTable(eml, "1")) + expect_error(eml_otherEntity_to_dataTable(doc, "1")) # subscripts out of bounds - expect_error(eml_otherEntity_to_dataTable(eml, eml$dataset$otherEntity[[2]])) - expect_error(eml_otherEntity_to_dataTable(eml, 2)) + expect_error(eml_otherEntity_to_dataTable(doc, doc$dataset$otherEntity[[2]])) + expect_error(eml_otherEntity_to_dataTable(doc, 2)) # Duplicate entityNames found - eml$dataset$otherEntity[[2]] <- eml$dataset$otherEntity[[1]] - expect_error(eml_otherEntity_to_dataTable(eml, eml$dataset$otherEntity[[1]])) + doc$dataset$otherEntity[[2]] <- doc$dataset$otherEntity[[1]] + expect_error(eml_otherEntity_to_dataTable(doc, doc$dataset$otherEntity[[1]])) }) @@ -136,17 +136,17 @@ test_that("eml_otherEntity_to_dataTable fails gracefully", { skip("No token set. Skipping test.") } - eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) - otherEntity <- eml$dataset$otherEntity[[1]] + doc <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) + otherEntity <- doc$dataset$otherEntity[[1]] - eml <- eml_otherEntity_to_dataTable(eml, eml$dataset$otherEntity[[1]]) + doc <- eml_otherEntity_to_dataTable(doc, doc$dataset$otherEntity[[1]]) # test that otherEntity was removed - expect_length(eml$dataset$otherEntity, 0) + expect_length(doc$dataset$otherEntity, 0) # test that dataTable was added - expect_equal(otherEntity$entityName, eml$dataset$dataTable[[1]]$entityName) - expect_equivalent(otherEntity$physical, eml$dataset$dataTable[[1]]$physical) + expect_equal(otherEntity$entityName, doc$dataset$dataTable[[1]]$entityName) + expect_equivalent(otherEntity$physical, doc$dataset$dataTable[[1]]$physical) }) test_that("which_in_eml returns correct locations", { From 1e1f507720070bb0ee00ec69b4b826a56436f14a Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Wed, 30 Jan 2019 14:09:49 -0800 Subject: [PATCH 210/318] update references functions and remove unnecessary function --- R/eml.R | 77 +++++++++++++++++---------------------------------------- 1 file changed, 22 insertions(+), 55 deletions(-) diff --git a/R/eml.R b/R/eml.R index 7cd0eaf..453bf90 100644 --- a/R/eml.R +++ b/R/eml.R @@ -230,10 +230,11 @@ eml_party <- function(type="associatedParty", "You must specify at least one of sur_name, organization, or position to make a valid creator") } + party <- eml[[type]]() # Individual Name if (!is.null(sur_name)) { - party$individualName <- eml_individual_name(given_names, sur_name) + party$individualName <- list(givenName = given_names, surName = sur_name) } # Organization Name @@ -281,12 +282,11 @@ eml_party <- function(type="associatedParty", paste0("Setting a role is only valid on an associatedParty or personnel, not a ", type, ".")) } - # If type is personnel, role needs to be ListOfrole, otherwise just role - if (type == "personnel") { - party$role <- role - } + party$role <- role } + + party } @@ -391,39 +391,6 @@ eml_personnel <- function(role = NULL, ...) { eml_party("personnel", role = role, ...) } - -#' Create an EML individualName section -#' -#' Create an EML individualName section. -#' -#' @param given_names (character) One or more given names. -#' @param sur_name (character) A sur (last) name. -#' -#' @return (individualName) The new individualName section. -#' -#' @export -#' -#' @examples -#' eml_individual_name("some", "user") -eml_individual_name <- function(given_names=NULL, sur_name) { - stopifnot(is.character(sur_name) && nchar(sur_name) > 0) - - # Create <individualName> - indiv_name <- eml$individualName() - - if (!is.null(given_names)) { - stopifnot(all(sapply(given_names, is.character))) - stopifnot(all(lengths(given_names) > 0)) - - indiv_name$givenName = given_names - } - - indiv_name$surName <- sur_name - - indiv_name -} - - #' Create an EML project section #' #' Create an EML project section. @@ -878,12 +845,12 @@ which_in_eml <- function(doc, element, test) { #' eml@@dataset@@dataTable #' } eml_set_reference <- function(element_to_reference, element_to_replace) { - if (length(element_to_reference@id) == 0) { + if (length(element_to_reference$id) == 0) { stop('No id detected at element_to_reference@id. Please add an id in order to use references.') } - id <- element_to_reference@id[1] + id <- element_to_reference$id[1] class <- class(element_to_replace)[1] - element_to_replace <- new(class, reference = id) + element_to_replace <- list(references = id) return(element_to_replace) } @@ -909,19 +876,19 @@ eml_set_reference <- function(element_to_reference, element_to_replace) { #' \dontrun{ #' cn <- dataone::CNode('PROD') #' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') -#' eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) -#' atts <- EML::set_attributes(EML::get_attributes(eml@@dataset@@dataTable[[1]]@@attributeList)$attributes) +#' doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) +#' atts <- EML::set_attributes(EML::get_attributes(eml$dataset$dataTable[[1]]$attributeList)$attributes) #' #' eml <- eml_set_shared_attributes(eml, atts, type = 'dataTable') #' } -eml_set_shared_attributes <- function(eml, attributeList = NULL, type = 'dataTable') { - stopifnot(methods::is(eml, 'eml')) - if (!is.null(attributeList)) { - stopifnot(methods::is(attributeList, 'attributeList')) - } - stopifnot(type %in% c('dataTable', 'otherEntity')) - - x <- slot(eml@dataset, type) +eml_set_shared_attributes <- function(doc, attributeList = NULL, type = 'dataTable') { + stopifnot(methods::is(doc, 'emld')) + #if (!is.null(attributeList)) { + # stopifnot(methods::is(attributeList, 'attributeList')) + #} + #stopifnot(type %in% c('dataTable', 'otherEntity')) + + x <- doc$dataset[[type]] n <- length(x) if (n <= 1) { stop('1 or fewer entities') # add message @@ -931,12 +898,12 @@ eml_set_shared_attributes <- function(eml, attributeList = NULL, type = 'dataTab if (!is.null(attributeList)) { x[[1]]@attributeList <- attributeList } - x[[1]]@attributeList@id <- new('xml_attribute', uuid::UUIDgenerate(TRUE)) + x[[1]]$attributeList$id <- stringi::stri_rand_strings(1, length = 10) # generate random identifier # Apply references to all other elements for (i in 2:n) { - x[[i]]@attributeList <- eml_set_reference(x[[1]]@attributeList, x[[i]]@attributeList) + x[[i]]$attributeList <- eml_set_reference(x[[1]]$attributeList, x[[i]]$attributeList) } - slot(eml@dataset, type) <- x - return(eml) + doc$dataset[[type]] <- x + return(doc) } From ec2b2c0629eab6a301f7ddfe7c890519261bfeef Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Wed, 30 Jan 2019 15:54:51 -0800 Subject: [PATCH 211/318] documentation updates --- DESCRIPTION | 3 ++- NAMESPACE | 2 +- man/eml_abstract.Rd | 2 ++ man/eml_individual_name.Rd | 22 ---------------------- man/eml_set_reference.Rd | 16 ++++++++-------- man/eml_set_shared_attributes.Rd | 10 +++++----- 6 files changed, 18 insertions(+), 37 deletions(-) delete mode 100644 man/eml_individual_name.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 35beec3..d05d501 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,7 +24,7 @@ Imports: dataone, datapack, digest, - EML, + EML (>= 1.99.0), httr, magrittr, methods, @@ -34,6 +34,7 @@ Imports: uuid, xml2, XML +Remotes: ropensci/EML Suggests: dplyr, humaniformat, diff --git a/NAMESPACE b/NAMESPACE index 69ba3d3..541c8a9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,10 +9,10 @@ export(create_dummy_package) export(create_dummy_package_full) export(create_dummy_parent_package) export(create_resource_map) +export(eml_address) export(eml_associated_party) export(eml_contact) export(eml_creator) -export(eml_individual_name) export(eml_metadata_provider) export(eml_otherEntity_to_dataTable) export(eml_party) diff --git a/man/eml_abstract.Rd b/man/eml_abstract.Rd index cc49f92..9d0b868 100644 --- a/man/eml_abstract.Rd +++ b/man/eml_abstract.Rd @@ -20,9 +20,11 @@ Create an EML abstract. Note that eml$abstract() provides the same functionality. } \examples{ +\dontrun{ # Set an abstract with a single paragraph eml_abstract("Test abstract...") # Or one with multiple paragraphs eml_abstract(list("First para...", "second para...")) } +} diff --git a/man/eml_individual_name.Rd b/man/eml_individual_name.Rd deleted file mode 100644 index 409bdac..0000000 --- a/man/eml_individual_name.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{eml_individual_name} -\alias{eml_individual_name} -\title{Create an EML individualName section} -\usage{ -eml_individual_name(given_names = NULL, sur_name) -} -\arguments{ -\item{given_names}{(character) One or more given names.} - -\item{sur_name}{(character) A sur (last) name.} -} -\value{ -(individualName) The new individualName section. -} -\description{ -Create an EML individualName section. -} -\examples{ -eml_individual_name("some", "user") -} diff --git a/man/eml_set_reference.Rd b/man/eml_set_reference.Rd index 4082eaa..07615dc 100644 --- a/man/eml_set_reference.Rd +++ b/man/eml_set_reference.Rd @@ -19,24 +19,24 @@ using a reference to \code{element_to_reference}. \dontrun{ cn <- dataone::CNode('PROD') adc <- dataone::getMNode(cn,'urn:node:ARCTIC') -eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) +doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) # Set the first contact as a reference to the first creator -eml@dataset@contact[[1]] <- eml_set_reference(eml@dataset@creator[[1]], -eml@dataset@contact[[1]]) +doc$dataset$contact[[1]] <- eml_set_reference(doc$dataset$creator[[1]], +doc$dataset$contact[[1]]) # This is also useful when we want to set references to a subset of 'dataTable' or 'otherEntity' objects # Add a few more objects first to illustrate the use: -eml@dataset@dataTable[[3]] <- eml@dataset@dataTable[[1]] -eml@dataset@dataTable[[4]] <- eml@dataset@dataTable[[1]] +doc$dataset$dataTable[[3]] <- doc$dataset$dataTable[[1]] +doc$dataset$dataTable[[4]] <- doc$dataset$dataTable[[1]] # Add references to the second and third elements only (not the 4th): for (i in 2:3) { - eml@dataset@dataTable[[i]]@attributeList <- eml_set_reference(eml@dataset@dataTable[[1]]@attributeList, - eml@dataset@dataTable[[i]]@attributeList) + doc$dataset$dataTable[[i]]$attributeList <- eml_set_reference(doc$dataset$dataTable[[1]]$attributeList, + doc$dataset$dataTable[[i]]$attributeList) } # If we print the entire 'dataTable' list we see elements 2 and 3 have references while 4 does not. -eml@dataset@dataTable +doc$dataset$dataTable } } \author{ diff --git a/man/eml_set_shared_attributes.Rd b/man/eml_set_shared_attributes.Rd index 6b0265c..0350fd7 100644 --- a/man/eml_set_shared_attributes.Rd +++ b/man/eml_set_shared_attributes.Rd @@ -4,17 +4,17 @@ \alias{eml_set_shared_attributes} \title{Set shared attribute references} \usage{ -eml_set_shared_attributes(eml, attributeList = NULL, +eml_set_shared_attributes(doc, attributeList = NULL, type = "dataTable") } \arguments{ -\item{eml}{(eml) An EML object.} - \item{attributeList}{(attributeList) Optional. An EML attributeList object. If not provided then it will default to the attributeList of the first \code{type} element.} \item{type}{(character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' attributeList objects with references. Defaults to 'dataTable'.} + +\item{eml}{(eml) An EML object.} } \value{ (eml) The modified EML document. @@ -27,8 +27,8 @@ selected and creates references for all remaining objects of equivalent \code{ty \dontrun{ cn <- dataone::CNode('PROD') adc <- dataone::getMNode(cn,'urn:node:ARCTIC') -eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) -atts <- EML::set_attributes(EML::get_attributes(eml@dataset@dataTable[[1]]@attributeList)$attributes) +doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) +atts <- EML::set_attributes(EML::get_attributes(eml$dataset$dataTable[[1]]$attributeList)$attributes) eml <- eml_set_shared_attributes(eml, atts, type = 'dataTable') } From 2b5b9c6a2817a205739df1ca1daf3afde8863fc3 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Wed, 30 Jan 2019 15:55:03 -0800 Subject: [PATCH 212/318] update tests --- tests/testthat/test_eml.R | 44 ++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 3f8a023..464c18d 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -12,7 +12,6 @@ test_that("a creator can be created", { test_that("a contact can be created", { contact <- eml_contact("test", "user") - expect_is(contact, "contact") expect_equal(contact$individualName$givenName, "test") expect_equal(contact$individualName$surName, "user") }) @@ -20,7 +19,6 @@ test_that("a contact can be created", { test_that("a personnel can be created", { personnel <- eml_personnel(given_names = "test", sur_name = "user", role = "principalInvestigator", userId = "https://orcid.org/WWWW-XXXX-YYYY-ZZZZ") - expect_is(personnel, "personnel") expect_equal(personnel$individualName$givenName, "test") expect_equal(personnel$individualName$surName, "user") expect_equal(personnel$role, "principalInvestigator") @@ -34,12 +32,12 @@ test_that("a project can be created", { "This is a test abstract", "I won an award, yay") - expect_equal(project$title[[1]], "some title") - expect_equal(project$personnel[[1]]$individualName[[1]]$givenName[[1]], "A") - expect_equal(project$personnel[[1]]$individualName[[1]]$surName, "User") - expect_equal(project$personnel[[1]]$organizationName[[1]], "NCEAS") - expect_equal(project$personnel[[1]]$role[[1]], "originator") - expect_equal(project$funding$para[[1]], "I won an award, yay") + expect_equal(project$title, "some title") + expect_equal(project$personnel[[1]]$individualName$givenName, "A") + expect_equal(project$personnel[[1]]$individualName$surName, "User") + expect_equal(project$personnel[[1]]$organizationName, "NCEAS") + expect_equal(project$personnel[[1]]$role, "originator") + expect_equal(project$funding$para, "I won an award, yay") }) test_that("a project can be created with multiple personnel, an abstract can be created with multiple paragraphs, awards with multiple awards", { @@ -51,10 +49,10 @@ test_that("a project can be created with multiple personnel, an abstract can be list("This is a test abstract", "This is the second paragraph"), list("I won an award, yay", "I won a second award, wow")) - expect_equal(project$title[[1]], "some title") - expect_equal(project$personnel[[2]]$individualName[[1]]$givenName[[1]], "Testy") - expect_equal(project$personnel[[2]]$individualName[[1]]$surName, "Mactesterson") - expect_equal(project$personnel[[2]]$organizationName[[1]], "A Test Org") + expect_equal(project$title, "some title") + expect_equal(project$personnel[[2]]$individualName$givenName, "Testy") + expect_equal(project$personnel[[2]]$individualName$surName, "Mactesterson") + expect_equal(project$personnel[[2]]$organizationName, "A Test Org") expect_equal(project$personnel[[2]]$role[[2]], "author") expect_equal(project$abstract$para[[2]], "This is the second paragraph") expect_equal(project$funding$para[[2]], "I won a second award, wow") @@ -233,13 +231,13 @@ test_that('eml_set_reference sets a reference', { eml_path <- file.path(system.file(package = "arcticdatautils"), "example-eml.xml") doc <- EML::read_eml(eml_path) - expect_error(eml_set_reference(doc@dataset@creator[[1]], doc@dataset@contact[[1]])) + expect_error(eml_set_reference(doc$dataset$creator, doc$dataset$contact)) # Add id to use references - doc@dataset@creator[[1]]@id <- new('xml_attribute', 'creator_id') - doc@dataset@contact[[1]] <- eml_set_reference(doc@dataset@creator[[1]], doc@dataset@contact[[1]]) + doc$dataset$creator$id <- 'creator_id' + doc$dataset$contact <- eml_set_reference(doc$dataset$creator, doc$dataset$contact) - expect_equal(doc@dataset@creator[[1]]@id[1], doc@dataset@contact[[1]]@references[1]) + expect_equal(doc$dataset$creator$id, doc$dataset$contact$references) expect_true(EML::eml_validate(doc)) }) @@ -253,16 +251,15 @@ test_that('eml_set_shared_attributes creates shared attribute references', { stringsAsFactors = FALSE) attributeList <- EML::set_attributes(attributes) - dataTable_1 <- new('dataTable', - entityName = '2016_data.csv', + dataTable_1 <- list(entityName = '2016_data.csv', entityDescription = '2016 data', attributeList = attributeList) dataTable_2 <- dataTable_1 - doc@dataset@dataTable <- c(dataTable_1, dataTable_2) + doc$dataset$dataTable <- list(dataTable_1, dataTable_2) doc <- eml_set_shared_attributes(doc) - expect_equal(doc@dataset@dataTable[[1]]@id[1], doc@dataset@dataTable[[2]]@references[1]) + expect_equal(doc$dataset$dataTable[[1]]$attributeList$id, doc$dataset$dataTable[[2]]$attributeList$references) expect_true(EML::eml_validate(doc)) }) @@ -270,8 +267,7 @@ test_that('eml_party creates multiple givenName, organizationName, and positionN creator <- eml_party('creator', c('John', 'and Jack'), 'Smith', c('NCEAS', 'UCSB'), c('Programmers', 'brothers')) - expect_is(creator, "creator") - expect_equal(unlist(EML::eml_get(creator, 'givenName')), c('John', 'and Jack')) - expect_equal(unlist(EML::eml_get(creator, 'organizationName')), c('NCEAS', 'UCSB')) - expect_equal(unlist(EML::eml_get(creator, 'positionName')), c('Programmers', 'brothers')) + expect_equal(EML::eml_get(creator, 'givenName'), EML::as_emld(list('John', 'and Jack'))) + expect_equal(EML::eml_get(creator, 'organizationName'), EML::as_emld(list('NCEAS', 'UCSB'))) + expect_equal(EML::eml_get(creator, 'positionName'), EML::as_emld(list('Programmers', 'brothers'))) }) From e3fd9ecce072146ca5a01bf9215a356e577bfa84 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Wed, 30 Jan 2019 15:55:23 -0800 Subject: [PATCH 213/318] make eml helper functions independent of the eml$...() constructor syntax --- R/eml.R | 51 +++++++++++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/R/eml.R b/R/eml.R index 453bf90..ba504f2 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1,6 +1,5 @@ # Helper functions for creating EML metadata - #' Create EML entity from a DataONE PID #' #' @param mn (MNode) Member Node where the PID is associated with an object. @@ -41,7 +40,7 @@ pid_to_eml_entity <- function(mn, systmeta <- getSystemMetadata(mn, pid) - entity <- eml[[entity_type]](physical = pid_to_eml_physical(mn, pid), ...) + entity <- list(physical = pid_to_eml_physical(mn, pid), ...) # Set entity slots if (length(entity$id) == 0) { @@ -134,7 +133,7 @@ sysmeta_to_eml_physical <- function(sysmeta) { authMethod = sysmeta@checksumAlgorithm, url = paste0("https://cn.dataone.org/cn/v2/resolve/", sysmeta@identifier)) - phys$dataFormat <- eml$dataFormat(externallyDefinedFormat = list(formatName = sysmeta@formatId)) + phys$dataFormat <- list(dataformat = list(externallyDefinedFormat = list(formatName = sysmeta@formatId))) phys } @@ -230,7 +229,7 @@ eml_party <- function(type="associatedParty", "You must specify at least one of sur_name, organization, or position to make a valid creator") } - party <- eml[[type]]() + party <- list() # Individual Name if (!is.null(sur_name)) { @@ -426,28 +425,22 @@ eml_project <- function(title, designDescription = NULL, relatedProject = NULL) { - stopifnot(is.character(title), - length(title) > 0, - all(nchar(title)) > 0) - stopifnot(length(personnelList) > 0) - project <- eml$project() + project <- list() # Title project$title <- title project$personnel <- personnelList - doc$dataset$project <- project - # Abstract if (!is.null(abstract)) { - project$abstract <- eml$abstract(para = abstract) + project$abstract <- list(para = abstract) } # Funding if (!is.null(funding)) { - project$funding <- eml$funding(para = funding) + project$funding <- list(para = funding) } # Study area description @@ -488,7 +481,7 @@ eml_project <- function(title, #' @return (geographicCoverage) The new geographicCoverage section. #' eml_geographic_coverage <- function(description, north, east, south, west) { - cov <- eml$geographicCoverage() + cov <- list() cov$geographicDescription <- description @@ -514,6 +507,7 @@ eml_geographic_coverage <- function(description, north, east, south, west) { #' #' @return (address) An EML address object. #' +#' @export #' #' @examples #' NCEASadd <- eml_address("735 State St #300", "Santa Barbara", "CA", "93101") @@ -523,7 +517,7 @@ eml_address <- function(delivery_points, city, administrative_area, postal_code) is.character(administrative_area), (is.character(postal_code) || is.numeric(postal_code))) - address <- eml$address() + address <- list() address$deliveryPoint <- delivery_points address$city <- city @@ -564,9 +558,9 @@ set_abstract <- function(doc, text) { # stopifnot(is(doc, "eml")) if (length(text) == 1) { - doc$dataset$abstract <- eml$abstract(text) + doc$dataset$abstract <- list(abstract = text) } else if (length(text) > 1) { - doc$dataset$abstract <- eml$abstract(text) + doc$dataset$abstract <- list(abstract = text) } doc @@ -586,17 +580,19 @@ set_abstract <- function(doc, text) { #' #' #' @examples +#' \dontrun{ #' # Set an abstract with a single paragraph #' eml_abstract("Test abstract...") #' #' # Or one with multiple paragraphs #' eml_abstract(list("First para...", "second para...")) +#' } eml_abstract <- function(text) { stopifnot(is.character(text), length(text) > 0, all(nchar(text)) > 0) - abstract <- eml$abstract(para = text) + abstract <- list(abstract = list(para = text)) abstract } @@ -825,31 +821,30 @@ which_in_eml <- function(doc, element, test) { #' \dontrun{ #' cn <- dataone::CNode('PROD') #' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') -#' eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) +#' doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) #' #' # Set the first contact as a reference to the first creator -#' eml@@dataset@@contact[[1]] <- eml_set_reference(eml@@dataset@@creator[[1]], -#' eml@@dataset@@contact[[1]]) +#' doc$dataset$contact[[1]] <- eml_set_reference(doc$dataset$creator[[1]], +#' doc$dataset$contact[[1]]) #' #' # This is also useful when we want to set references to a subset of 'dataTable' #' or 'otherEntity' objects #' # Add a few more objects first to illustrate the use: -#' eml@@dataset@@dataTable[[3]] <- eml@@dataset@@dataTable[[1]] -#' eml@@dataset@@dataTable[[4]] <- eml@@dataset@@dataTable[[1]] +#' doc$dataset$dataTable[[3]] <- doc$dataset$dataTable[[1]] +#' doc$dataset$dataTable[[4]] <- doc$dataset$dataTable[[1]] #' # Add references to the second and third elements only (not the 4th): #' for (i in 2:3) { -#' eml@@dataset@@dataTable[[i]]@@attributeList <- eml_set_reference(eml@@dataset@@dataTable[[1]]@@attributeList, -#' eml@@dataset@@dataTable[[i]]@@attributeList) +#' doc$dataset$dataTable[[i]]$attributeList <- eml_set_reference(doc$dataset$dataTable[[1]]$attributeList, +#' doc$dataset$dataTable[[i]]$attributeList) #' } #' # If we print the entire 'dataTable' list we see elements 2 and 3 have references while 4 does not. -#' eml@@dataset@@dataTable +#' doc$dataset$dataTable #' } eml_set_reference <- function(element_to_reference, element_to_replace) { if (length(element_to_reference$id) == 0) { stop('No id detected at element_to_reference@id. Please add an id in order to use references.') } id <- element_to_reference$id[1] - class <- class(element_to_replace)[1] element_to_replace <- list(references = id) return(element_to_replace) } @@ -896,7 +891,7 @@ eml_set_shared_attributes <- function(doc, attributeList = NULL, type = 'dataTab # If a new attributeList is provided set it if (!is.null(attributeList)) { - x[[1]]@attributeList <- attributeList + x[[1]]$attributeList <- attributeList } x[[1]]$attributeList$id <- stringi::stri_rand_strings(1, length = 10) # generate random identifier # Apply references to all other elements From e83f19193f64ddb6848877689d919fc78967506f Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 1 Feb 2019 15:05:16 -0800 Subject: [PATCH 214/318] fix bug adding the data format --- R/eml.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/eml.R b/R/eml.R index bae022c..e683f1d 100644 --- a/R/eml.R +++ b/R/eml.R @@ -133,7 +133,7 @@ sysmeta_to_eml_physical <- function(sysmeta) { authMethod = sysmeta@checksumAlgorithm, url = paste0("https://cn.dataone.org/cn/v2/resolve/", sysmeta@identifier)) - phys$dataFormat <- list(dataformat = list(externallyDefinedFormat = list(formatName = sysmeta@formatId))) + phys$dataFormat <- list(externallyDefinedFormat = list(formatName = sysmeta@formatId)) phys } From f65b86d309a234888ec26ba2dcaff233c1e0cf87 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 1 Feb 2019 15:49:02 -0800 Subject: [PATCH 215/318] documentation updates --- R/eml.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/eml.R b/R/eml.R index e683f1d..3652e15 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1,6 +1,6 @@ # Helper functions for creating EML metadata -#' Create EML entity from a DataONE PID +#' Create EML entity with physical section from a DataONE PID #' #' @param mn (MNode) Member Node where the PID is associated with an object. #' @param pid (character) The PID of the object to create the sub-tree for. @@ -199,7 +199,7 @@ get_doc_id <- function(sysmeta) { #' @param position (character) The party's position. #' @param email (character) The party's email address(es). #' @param phone (character) The party's phone number(s). -#' @param address (character) The party's address(es). +#' @param address (character) The party's address(es) as a valid EML address #' @param userId (character) The party's ORCID, in format https://orcid.org/WWWW-XXXX-YYYY-ZZZZ. #' @param role (character) The party's role. #' @@ -211,8 +211,10 @@ get_doc_id <- function(sysmeta) { #' \dontrun{ #' eml_party("creator", "Test", "User") #' eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") -#' eml_party("creator", list("Dominic", "'Dom'"), "Mullen", list("NCEAS", "UCSB"), -#' list("Data Scientist", "Programmer")) +#' eml_party("creator", given_names = list("Dominic", "'Dom'"), +#' sur_name = "Mullen", list("NCEAS", "UCSB"), +#' position = list("Data Scientist", "Programmer"), +#' address = eml$address(deliveryPoint = "735 State St", city = "Santa Barbara", administrativeArea = "CA", postalCode = "85719")) #'} eml_party <- function(type="associatedParty", given_names = NULL, From d1148a17a1ddd067f45529f89d7da58d55a4b4a3 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 5 Feb 2019 14:09:29 -0800 Subject: [PATCH 216/318] debugging eml OE to DT function, and added a helper function --- R/eml.R | 82 ++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 64 insertions(+), 18 deletions(-) diff --git a/R/eml.R b/R/eml.R index 3652e15..671b7c7 100644 --- a/R/eml.R +++ b/R/eml.R @@ -682,7 +682,7 @@ eml_validate_attributes <- function(attributes) { #' However, if these are already in their respective slots, they will be retained. #' #' @param doc (list) An EML document. -#' @param otherEntity (integer/) The indicies of the otherEntities to be transformed. +#' @param otherEntity (integer) The indicies of the otherEntities to be transformed. #' @param validate_eml (logical) Optional. Whether or not to validate the EML after #' completion. Setting this to `FALSE` reduces execution time by ~50 percent. #' @@ -696,38 +696,51 @@ eml_validate_attributes <- function(attributes) { #' \dontrun{ #' doc <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) #' -#' # The following two calls are equivalent: -#' doc <- eml_otherEntity_to_dataTable(doc, doc$dataset$otherEntity[[1]]) -#' doc <- eml_otherEntity_to_dataTable(doc, 1) -#' -#' # Integer input is recommended: #' doc <- eml_otherEntity_to_dataTable(doc, 1) #' } eml_otherEntity_to_dataTable <- function(doc, index, validate_eml = TRUE) { stopifnot(methods::is(doc, "emld")) stopifnot(is.logical(eml_validate(doc))) stopifnot(is.numeric(index)) + stopifnot(length(eml_get_simple(doc$dataset$otherEntity, "entityName")) >= length(index)) - otherEntity <- doc$dataset$otherEntity[index] + ## set OE entityTypes to NULL and select the ones we want to use - ## Set entity type to NULL for converted OEs - for (i in 1:length(index)){ - otherEntity[[i]]$entityType <- NULL + if (length(eml_get_simple(doc$dataset$otherEntity, "entityName")) == 1){ + # prepare OE to copy + otherEntity <- doc$dataset$otherEntity + otherEntity$entityType <- NULL + otherEntity <- list(otherEntity) + ## delete otherEntity from list + doc$dataset$otherEntity <- NULL } - ## Add dt to bottom of dt list - dts <- doc$dataset$dataTable - if (length(dts > 0)){ - doc$dataset$dataTable <- list(dts, otherEntity) + + else { + otherEntity <- doc$dataset$otherEntity[index] + + for (i in 1:length(index)){ + otherEntity[[i]]$entityType <- NULL + } + ## delete otherEntity from list + doc$dataset$otherEntity <- doc$dataset$otherEntity[-index] } - else{ - doc$dataset$dataTable <- otherEntity + + + dts <- doc$dataset$dataTable + + ## handle case where there is a single dataTable already in dataset + if (length(eml_get_simple(dts, "entityName")) == 1){ + dts <- list(dts) + doc$dataset$dataTable <- c(dts, otherEntity) } + ## otherwise can use c() as opposed to list() + else { + doc$dataset$dataTable <- c(dts, otherEntity) + } - ## delete otherEntity from list - doc$dataset$otherEntity[index] <- NULL ## return eml if (validate_eml == TRUE) { @@ -904,3 +917,36 @@ eml_set_shared_attributes <- function(doc, attributeList = NULL, type = 'dataTab doc$dataset[[type]] <- x return(doc) } + +#' Get a simple list output from EML::eml_get() +#' +#' This function is a convenience wrapper around EML::eml_get() which +#' returns the output as a simple list as opposed to an object of type +#' `emld` by removing the attributes and context from the object. If an +#' element containing children is returned all of it's children will be +#' flattened into a named character vector. +#' +#' @param doc (list) An EML object or child/descendant object +#' @param element (character) Name of the element to be extracted. If +#' multiple occurrences are found, will extract all. +#' +#' @return out (vector) A list of values contained in element given +#' +#' @export +#' +#' @examples +#' cn <- dataone::CNode('PROD') +#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +#' +#' doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) +#' +#' datatable_names <- eml_get_simple(doc$dataset$dataTable, element = "entityName") +#' +#' +eml_get_simple <- function(doc, element){ + out <- eml_get(doc, element, from = "list") + out$`@context` <- NULL + attributes(out) <- NULL + out <- unlist(out) + return(out) +} From 92c7515e7e7107949f1a7c9fcf0a3a09f4b324c8 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 5 Feb 2019 14:16:21 -0800 Subject: [PATCH 217/318] doc update --- R/eml.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/eml.R b/R/eml.R index 671b7c7..e2522e3 100644 --- a/R/eml.R +++ b/R/eml.R @@ -924,7 +924,8 @@ eml_set_shared_attributes <- function(doc, attributeList = NULL, type = 'dataTab #' returns the output as a simple list as opposed to an object of type #' `emld` by removing the attributes and context from the object. If an #' element containing children is returned all of it's children will be -#' flattened into a named character vector. +#' flattened into a named character vector. This function is best used +#' to extract values from elements that have no children. #' #' @param doc (list) An EML object or child/descendant object #' @param element (character) Name of the element to be extracted. If From 78d628bedcc28107c4cd81a68f8e8691cea2de73 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 5 Feb 2019 14:42:05 -0800 Subject: [PATCH 218/318] added extra validation to `eml_party` --- R/eml.R | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/R/eml.R b/R/eml.R index e2522e3..099d5ad 100644 --- a/R/eml.R +++ b/R/eml.R @@ -4,7 +4,7 @@ #' #' @param mn (MNode) Member Node where the PID is associated with an object. #' @param pid (character) The PID of the object to create the sub-tree for. -#' @param entityType (character) What kind of objects to create from the input. One of "dataTable", +#' @param entityType (character) What kind of object to create from the input. One of "dataTable", #' "spatialRaster", "spatialVector", "storedProcedure", "view", or "otherEntity". #' @param ... (optional) Additional arguments to be passed to \code{eml$entityType())}. #' @@ -74,7 +74,7 @@ pid_to_eml_entity <- function(mn, #' creating the EML physical. #' #' @param mn (MNode) Member Node where the PID is associated with an object. -#' @param pids (character) The PID of the object to create the sub-tree for. +#' @param pids (character) The PID of the object to create the physical for. #' #' @return (list) A physical object. #' @@ -83,7 +83,7 @@ pid_to_eml_entity <- function(mn, #' @examples #' \dontrun{ #' # Generate EML physical sections for an object in a data package -#' pid_to_eml_physical(mn, pid) +#' phys <- pid_to_eml_physical(mn, pid) #' } pid_to_eml_physical <- function(mn, pid) { stopifnot(is(mn, "MNode")) @@ -230,6 +230,14 @@ eml_party <- function(type="associatedParty", stop(call. = FALSE, "You must specify at least one of sur_name, organization, or position to make a valid creator") } + if (!is.null(address) & + !"deliveryPoint" %in% names(address) & + !"administrativeArea" %in% names(address) & + !"postalCode" %in% names(address) & + !"city" %in% names(address)) { + stop(call. = FALSE, + "An address was given but no deliveryPoint, administrativeArea, city, or postalCode child elements were specified.") + } party <- list() @@ -255,7 +263,6 @@ eml_party <- function(type="associatedParty", # Address if (!is.null(address)) { - # This crams the entire address into the delivery point...not ideal party$address <- address } From aa5beff498e1a952328f8eecae82e4791e7532df Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 5 Feb 2019 16:11:34 -0800 Subject: [PATCH 219/318] added some more checks, and removed a few exports --- R/eml.R | 46 +++++++++++++++++----------------------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/R/eml.R b/R/eml.R index 099d5ad..5c7616b 100644 --- a/R/eml.R +++ b/R/eml.R @@ -435,6 +435,11 @@ eml_project <- function(title, relatedProject = NULL) { + if (is.null(eml_get_simple(personnelList, "role"))) { + stop(call. = FALSE, + "Each person in the personnelList must have a role.") + } + project <- list() # Title @@ -516,10 +521,6 @@ eml_geographic_coverage <- function(description, north, east, south, west) { #' #' @return (address) An EML address object. #' -#' @export -#' -#' @examples -#' NCEASadd <- eml_address("735 State St #300", "Santa Barbara", "CA", "93101") eml_address <- function(delivery_points, city, administrative_area, postal_code) { stopifnot(is.character(delivery_points), is.character(city), @@ -550,18 +551,6 @@ eml_address <- function(delivery_points, city, administrative_area, postal_code) #' #' @return (eml) The modified EML document. #' -#' @export -#' -#' @examples -#' # Create a new EML document -#' library(EML) -#' doc <- list() -#' -#' # Set an abstract with a single paragraph -#' set_abstract(doc, list("Test abstract...")) -#' -#' # Or one with multiple paragraphs -#' set_abstract(doc, list("First para...", "second para...")) set_abstract <- function(doc, text) { # need to rewrite this test # stopifnot(is(doc, "eml")) @@ -629,12 +618,15 @@ eml_abstract <- function(text) { #' eml_validate_attributes(attributes) #' } eml_validate_attributes <- function(attributes) { - # need to rewrite this check - # stopifnot(is(attributes, "attributeList")) + + if (class(attributes)[1] != "emld") { + stop(call. = FALSE, + "Attributes must be an emld object generated by EML::set_attributes()") + } # Define an interal applyable function to validate each attribute eml_validate_attribute <- function(attribute) { - # stopifnot(is(attribute, "attribute")) + stopifnot(!is.null(names(attribute))) doc$dataset$otherEntity$attributeList$attribute[[1]] <- attribute @@ -714,7 +706,7 @@ eml_otherEntity_to_dataTable <- function(doc, index, validate_eml = TRUE) { ## set OE entityTypes to NULL and select the ones we want to use if (length(eml_get_simple(doc$dataset$otherEntity, "entityName")) == 1){ - # prepare OE to copy + ## prepare OE to copy otherEntity <- doc$dataset$otherEntity otherEntity$entityType <- NULL otherEntity <- list(otherEntity) @@ -742,7 +734,6 @@ eml_otherEntity_to_dataTable <- function(doc, index, validate_eml = TRUE) { doc$dataset$dataTable <- c(dts, otherEntity) } - ## otherwise can use c() as opposed to list() else { doc$dataset$dataTable <- c(dts, otherEntity) } @@ -832,8 +823,8 @@ which_in_eml <- function(doc, element, test) { #' This function creates a new object with the same class as \code{element_to_replace} #' using a reference to \code{element_to_reference}. #' -#' @param element_to_reference (S4) An EML object to reference. -#' @param element_to_replace (S4) An EML object to replace with a reference. +#' @param element_to_reference (list) An EML element to reference. +#' @param element_to_replace (list) An EML element to replace with a reference. #' #' @author Dominic Mullen dmullen17@@gmail.com #' @@ -864,7 +855,7 @@ which_in_eml <- function(doc, element, test) { #' } eml_set_reference <- function(element_to_reference, element_to_replace) { if (length(element_to_reference$id) == 0) { - stop('No id detected at element_to_reference@id. Please add an id in order to use references.') + stop('No id detected at element_to_reference$id. Please add an id in order to use references.') } id <- element_to_reference$id[1] element_to_replace <- list(references = id) @@ -877,7 +868,7 @@ eml_set_reference <- function(element_to_reference, element_to_replace) { #' This function sets shared attributes using the attributes of the first \code{type} #' selected and creates references for all remaining objects of equivalent \code{type}. #' -#' @param eml (eml) An EML object. +#' @param eml (emld) An EML object. #' @param attributeList (attributeList) Optional. An EML attributeList object. If not provided #' then it will default to the attributeList of the first \code{type} element. #' @param type (character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' attributeList @@ -900,10 +891,7 @@ eml_set_reference <- function(element_to_reference, element_to_replace) { #' } eml_set_shared_attributes <- function(doc, attributeList = NULL, type = 'dataTable') { stopifnot(methods::is(doc, 'emld')) - #if (!is.null(attributeList)) { - # stopifnot(methods::is(attributeList, 'attributeList')) - #} - #stopifnot(type %in% c('dataTable', 'otherEntity')) + stopifnot(type %in% c('dataTable', 'otherEntity')) x <- doc$dataset[[type]] n <- length(x) From 48f51d51d231eef46972727f046016db8c5d51df Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 5 Feb 2019 16:11:53 -0800 Subject: [PATCH 220/318] documentation updates --- NAMESPACE | 3 +-- man/eml_address.Rd | 3 --- man/eml_get_simple.Rd | 35 +++++++++++++++++++++++++++++ man/eml_otherEntity_to_dataTable.Rd | 7 +----- man/eml_party.Rd | 8 ++++--- man/eml_set_reference.Rd | 4 ++-- man/eml_set_shared_attributes.Rd | 2 +- man/pid_to_eml_entity.Rd | 6 ++--- man/pid_to_eml_physical.Rd | 4 ++-- man/set_abstract.Rd | 11 --------- 10 files changed, 50 insertions(+), 33 deletions(-) create mode 100644 man/eml_get_simple.Rd diff --git a/NAMESPACE b/NAMESPACE index d918d5a..959e39d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,10 +9,10 @@ export(create_dummy_package) export(create_dummy_package_full) export(create_dummy_parent_package) export(create_resource_map) -export(eml_address) export(eml_associated_party) export(eml_contact) export(eml_creator) +export(eml_get_simple) export(eml_metadata_provider) export(eml_otherEntity_to_dataTable) export(eml_party) @@ -48,7 +48,6 @@ export(pid_to_eml_physical) export(publish_object) export(publish_update) export(remove_public_read) -export(set_abstract) export(set_access) export(set_file_name) export(set_public_read) diff --git a/man/eml_address.Rd b/man/eml_address.Rd index 0dbf1f5..5ef855a 100644 --- a/man/eml_address.Rd +++ b/man/eml_address.Rd @@ -24,6 +24,3 @@ A simple way to create an EML address element. \details{ Note that EML::eml$address() provides the same functionality } -\examples{ -NCEASadd <- eml_address("735 State St #300", "Santa Barbara", "CA", "93101") -} diff --git a/man/eml_get_simple.Rd b/man/eml_get_simple.Rd new file mode 100644 index 0000000..1ebe1f8 --- /dev/null +++ b/man/eml_get_simple.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_get_simple} +\alias{eml_get_simple} +\title{Get a simple list output from EML::eml_get()} +\usage{ +eml_get_simple(doc, element) +} +\arguments{ +\item{doc}{(list) An EML object or child/descendant object} + +\item{element}{(character) Name of the element to be extracted. If +multiple occurrences are found, will extract all.} +} +\value{ +out (vector) A list of values contained in element given +} +\description{ +This function is a convenience wrapper around EML::eml_get() which +returns the output as a simple list as opposed to an object of type +\code{emld} by removing the attributes and context from the object. If an +element containing children is returned all of it's children will be +flattened into a named character vector. This function is best used +to extract values from elements that have no children. +} +\examples{ +cn <- dataone::CNode('PROD') +adc <- dataone::getMNode(cn,'urn:node:ARCTIC') + +doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) + +datatable_names <- eml_get_simple(doc$dataset$dataTable, element = "entityName") + + +} diff --git a/man/eml_otherEntity_to_dataTable.Rd b/man/eml_otherEntity_to_dataTable.Rd index 10cc7fa..901ecca 100644 --- a/man/eml_otherEntity_to_dataTable.Rd +++ b/man/eml_otherEntity_to_dataTable.Rd @@ -12,7 +12,7 @@ eml_otherEntity_to_dataTable(doc, index, validate_eml = TRUE) \item{validate_eml}{(logical) Optional. Whether or not to validate the EML after completion. Setting this to \code{FALSE} reduces execution time by ~50 percent.} -\item{otherEntity}{(integer/) The indicies of the otherEntities to be transformed.} +\item{otherEntity}{(integer) The indicies of the otherEntities to be transformed.} } \description{ Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an @@ -23,11 +23,6 @@ However, if these are already in their respective slots, they will be retained. \dontrun{ doc <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) -# The following two calls are equivalent: -doc <- eml_otherEntity_to_dataTable(doc, doc$dataset$otherEntity[[1]]) -doc <- eml_otherEntity_to_dataTable(doc, 1) - -# Integer input is recommended: doc <- eml_otherEntity_to_dataTable(doc, 1) } } diff --git a/man/eml_party.Rd b/man/eml_party.Rd index 67a0af7..e0f40ec 100644 --- a/man/eml_party.Rd +++ b/man/eml_party.Rd @@ -24,7 +24,7 @@ eml_party(type = "associatedParty", given_names = NULL, \item{phone}{(character) The party's phone number(s).} -\item{address}{(character) The party's address(es).} +\item{address}{(character) The party's address(es) as a valid EML address} \item{userId}{(character) The party's ORCID, in format https://orcid.org/WWWW-XXXX-YYYY-ZZZZ.} @@ -44,7 +44,9 @@ The \code{userId} argument assumes an ORCID so be sure to adjust for that. \dontrun{ eml_party("creator", "Test", "User") eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") -eml_party("creator", list("Dominic", "'Dom'"), "Mullen", list("NCEAS", "UCSB"), - list("Data Scientist", "Programmer")) +eml_party("creator", given_names = list("Dominic", "'Dom'"), + sur_name = "Mullen", list("NCEAS", "UCSB"), + position = list("Data Scientist", "Programmer"), + address = eml$address(deliveryPoint = "735 State St", city = "Santa Barbara", administrativeArea = "CA", postalCode = "85719")) } } diff --git a/man/eml_set_reference.Rd b/man/eml_set_reference.Rd index 07615dc..894e1ab 100644 --- a/man/eml_set_reference.Rd +++ b/man/eml_set_reference.Rd @@ -7,9 +7,9 @@ eml_set_reference(element_to_reference, element_to_replace) } \arguments{ -\item{element_to_reference}{(S4) An EML object to reference.} +\item{element_to_reference}{(list) An EML element to reference.} -\item{element_to_replace}{(S4) An EML object to replace with a reference.} +\item{element_to_replace}{(list) An EML element to replace with a reference.} } \description{ This function creates a new object with the same class as \code{element_to_replace} diff --git a/man/eml_set_shared_attributes.Rd b/man/eml_set_shared_attributes.Rd index 0350fd7..4ed9f6a 100644 --- a/man/eml_set_shared_attributes.Rd +++ b/man/eml_set_shared_attributes.Rd @@ -14,7 +14,7 @@ then it will default to the attributeList of the first \code{type} element.} \item{type}{(character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' attributeList objects with references. Defaults to 'dataTable'.} -\item{eml}{(eml) An EML object.} +\item{eml}{(emld) An EML object.} } \value{ (eml) The modified EML document. diff --git a/man/pid_to_eml_entity.Rd b/man/pid_to_eml_entity.Rd index 0eff471..d9574d1 100644 --- a/man/pid_to_eml_entity.Rd +++ b/man/pid_to_eml_entity.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/eml.R \name{pid_to_eml_entity} \alias{pid_to_eml_entity} -\title{Create EML entity from a DataONE PID} +\title{Create EML entity with physical section from a DataONE PID} \usage{ pid_to_eml_entity(mn, pid, entity_type = "otherEntity", ...) } @@ -13,14 +13,14 @@ pid_to_eml_entity(mn, pid, entity_type = "otherEntity", ...) \item{...}{(optional) Additional arguments to be passed to \code{eml$entityType())}.} -\item{entityType}{(character) What kind of objects to create from the input. One of "dataTable", +\item{entityType}{(character) What kind of object to create from the input. One of "dataTable", "spatialRaster", "spatialVector", "storedProcedure", "view", or "otherEntity".} } \value{ (list) The entity object. } \description{ -Create EML entity from a DataONE PID +Create EML entity with physical section from a DataONE PID } \examples{ \dontrun{ diff --git a/man/pid_to_eml_physical.Rd b/man/pid_to_eml_physical.Rd index f9f4035..b2c9930 100644 --- a/man/pid_to_eml_physical.Rd +++ b/man/pid_to_eml_physical.Rd @@ -9,7 +9,7 @@ pid_to_eml_physical(mn, pid) \arguments{ \item{mn}{(MNode) Member Node where the PID is associated with an object.} -\item{pids}{(character) The PID of the object to create the sub-tree for.} +\item{pids}{(character) The PID of the object to create the physical for.} } \value{ (list) A physical object. @@ -21,6 +21,6 @@ creating the EML physical. \examples{ \dontrun{ # Generate EML physical sections for an object in a data package -pid_to_eml_physical(mn, pid) +phys <- pid_to_eml_physical(mn, pid) } } diff --git a/man/set_abstract.Rd b/man/set_abstract.Rd index ea5569d..044bec3 100644 --- a/man/set_abstract.Rd +++ b/man/set_abstract.Rd @@ -20,14 +20,3 @@ used for each element.} \description{ Set the abstract for an EML document. } -\examples{ -# Create a new EML document -library(EML) -doc <- list() - -# Set an abstract with a single paragraph -set_abstract(doc, list("Test abstract...")) - -# Or one with multiple paragraphs -set_abstract(doc, list("First para...", "second para...")) -} From cb4b0e95be7de15c845c76e2107dfaeec36cea1f Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Thu, 7 Feb 2019 10:44:29 -0800 Subject: [PATCH 221/318] last bug to fix in this function (I hope) we have to handle 3 cases, where there are no existing datables, where there is 1, and where there is more than 1 --- R/eml.R | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/R/eml.R b/R/eml.R index 5c7616b..be5676e 100644 --- a/R/eml.R +++ b/R/eml.R @@ -728,18 +728,24 @@ eml_otherEntity_to_dataTable <- function(doc, index, validate_eml = TRUE) { dts <- doc$dataset$dataTable - ## handle case where there is a single dataTable already in dataset - if (length(eml_get_simple(dts, "entityName")) == 1){ - dts <- list(dts) - doc$dataset$dataTable <- c(dts, otherEntity) - } + ## handle various datatable length cases + if (length(dts) == 0){ + doc$dataset$dataTable <- otherEntity + } + else{ + if (length(eml_get_simple(dts, "entityName")) == 1){ + dts <- list(dts) + doc$dataset$dataTable <- c(dts, otherEntity) + } - else { - doc$dataset$dataTable <- c(dts, otherEntity) + else { + doc$dataset$dataTable <- c(dts, otherEntity) + } } + ## return eml if (validate_eml == TRUE) { eml_validate(doc) From 80aad01bff729cd15742417ead37fc6f4e0a375c Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Thu, 21 Feb 2019 09:29:18 -0800 Subject: [PATCH 222/318] fix unnecessary ::: --- R/access.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/access.R b/R/access.R index 989a1fc..6187e53 100644 --- a/R/access.R +++ b/R/access.R @@ -522,7 +522,7 @@ is_public_read <- function(mn, pids, use.names = TRUE) { } } - sysmeta <- datapack:::SystemMetadata(XML::xmlRoot(suppressMessages(XML::xmlParse((httr::content(response, as = "text")))))) + sysmeta <- datapack::SystemMetadata(XML::xmlRoot(suppressMessages(XML::xmlParse((httr::content(response, as = "text")))))) return(datapack::hasAccessRule(sysmeta, "public", "read")) }) } From dd315ec03152d34faf9629c05807060714b99e4d Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Thu, 21 Feb 2019 09:29:31 -0800 Subject: [PATCH 223/318] update documentation and add function --- R/eml.R | 64 +++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/R/eml.R b/R/eml.R index be5676e..feda656 100644 --- a/R/eml.R +++ b/R/eml.R @@ -74,7 +74,7 @@ pid_to_eml_entity <- function(mn, #' creating the EML physical. #' #' @param mn (MNode) Member Node where the PID is associated with an object. -#' @param pids (character) The PID of the object to create the physical for. +#' @param pid (character) The PID of the object to create the physical for. #' #' @return (list) A physical object. #' @@ -214,7 +214,10 @@ get_doc_id <- function(sysmeta) { #' eml_party("creator", given_names = list("Dominic", "'Dom'"), #' sur_name = "Mullen", list("NCEAS", "UCSB"), #' position = list("Data Scientist", "Programmer"), -#' address = eml$address(deliveryPoint = "735 State St", city = "Santa Barbara", administrativeArea = "CA", postalCode = "85719")) +#' address = eml$address(deliveryPoint = "735 State St", +#' city = "Santa Barbara", +#' administrativeArea = "CA", +#' postalCode = "85719")) #'} eml_party <- function(type="associatedParty", given_names = NULL, @@ -407,7 +410,8 @@ eml_personnel <- function(role = NULL, ...) { #' fully fleshed out. Need to pass these objects in directly if you want to use #' them. #' -#' @param title (character) Title of the project (Required). May have multiple titles constructed using `list`. +#' @param title (character) Title of the project (Required). May have multiple titles +#' constructed using `list`. #' @param personnelList (list of personnel) Personnel involved with the project. #' @param abstract (character) Project abstract. Can pass as a list #' for separate paragraphs. @@ -853,10 +857,13 @@ which_in_eml <- function(doc, element, test) { #' doc$dataset$dataTable[[4]] <- doc$dataset$dataTable[[1]] #' # Add references to the second and third elements only (not the 4th): #' for (i in 2:3) { -#' doc$dataset$dataTable[[i]]$attributeList <- eml_set_reference(doc$dataset$dataTable[[1]]$attributeList, +#' doc$dataset$dataTable[[i]]$attributeList <- eml_set_reference( +#' doc$dataset$dataTable[[1]]$attributeList, #' doc$dataset$dataTable[[i]]$attributeList) #' } -#' # If we print the entire 'dataTable' list we see elements 2 and 3 have references while 4 does not. +#' # If we print the entire 'dataTable' list we see elements 2 and 3 have +#' references while 4 does not. +#' #' doc$dataset$dataTable #' } eml_set_reference <- function(element_to_reference, element_to_replace) { @@ -877,8 +884,8 @@ eml_set_reference <- function(element_to_reference, element_to_replace) { #' @param eml (emld) An EML object. #' @param attributeList (attributeList) Optional. An EML attributeList object. If not provided #' then it will default to the attributeList of the first \code{type} element. -#' @param type (character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' attributeList -#' objects with references. Defaults to 'dataTable'. +#' @param type (character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' +#' attributeList objects with references. Defaults to 'dataTable'. #' #' @return (eml) The modified EML document. #' @@ -891,7 +898,8 @@ eml_set_reference <- function(element_to_reference, element_to_replace) { #' cn <- dataone::CNode('PROD') #' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') #' doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) -#' atts <- EML::set_attributes(EML::get_attributes(eml$dataset$dataTable[[1]]$attributeList)$attributes) +#' atts <- EML::set_attributes( +#' EML::get_attributes(eml$dataset$dataTable[[1]]$attributeList)$attributes) #' #' eml <- eml_set_shared_attributes(eml, atts, type = 'dataTable') #' } @@ -952,3 +960,43 @@ eml_get_simple <- function(doc, element){ out <- unlist(out) return(out) } + +#' Reorder a named list of objects according to the order in the metadata +#' +#' This function takes a named list of data objects, such as what is +#' returned from `get_package`, and reorders them according to the order +#' they are given in the EML document. +#' +#' @param pid_list (list) A named list of data pids +#' @param doc (list) an `emld` document +#' +#' @return ordered_pids (list) A list of reordered pids +#' +#' @export +#' +#' @examples +#' cn <- dataone::CNode('PROD') +#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +# +#' ids <- get_package(adc, 'resource_map_doi:10.18739/A2S17SS1M', file_names = TRUE) +#' doc <- EML::read_eml(dataone::getObject(adc, ids$metadata)) +#' +#' # return all entity types +#' ordered_pids <- reorder_pids(ids$data, doc) +#' +reorder_pids <- function(pid_list, doc){ + stopifnot(!is.null(names(pid_list))) + + entity_names <- eml_get_simple(doc, "entityName") + + if (is.null(entity_names)){ + stop("No entity names were found.") + } + + if (length(entity_names) != length(pid_list)){ + stop("Number of entities in EML and resource map do not match") + } + + ordered_pids <- pid_list[order(match(names(pid_list), entity_names))] + return(ordered_pids) +} From 538b70859fc3202741d628bde16db1abee1875b0 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Thu, 21 Feb 2019 09:29:43 -0800 Subject: [PATCH 224/318] add tests for new function --- tests/testthat/test_eml.R | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 464c18d..b1fa94f 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -271,3 +271,36 @@ test_that('eml_party creates multiple givenName, organizationName, and positionN expect_equal(EML::eml_get(creator, 'organizationName'), EML::as_emld(list('NCEAS', 'UCSB'))) expect_equal(EML::eml_get(creator, 'positionName'), EML::as_emld(list('Programmers', 'brothers'))) }) + +test_that('reorder_pids orders pids correctly', { + me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) + oe1 <- list(entityName = "object one", entityType = "other") + oe2 <- list(entityName = "object two", entityType = "other") + doc <- list(packageId = "an id", system = "a system", + dataset = list( + title = "A Mimimal Valid EML Dataset", + creator = me, + contact = me, + otherEntity = list(oe1, oe2))) + + pid_list <- list("object two" = "some identifier2", "object one" = "some identifier1") + + ordered_pids <- reorder_pids(pid_list, doc) + entity_names <- eml_get_simple(doc, "entityName") + expect_equal(names(ordered_pids), entity_names) +}) + +test_that('reorder_pids fails gracefully', { + me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) + oe1 <- list(entityName = "object one", entityType = "other") + doc <- list(packageId = "an id", system = "a system", + dataset = list( + title = "A Mimimal Valid EML Dataset", + creator = me, + contact = me, + otherEntity = list(oe1))) + + pid_list <- list("object two" = "some identifier2", "object one" = "some identifier1") + + expect_error(reorder_pids(pid_list, doc)) +}) From a08d5e0d27e42d60b73f076ccb2b22e81d701044 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Thu, 21 Feb 2019 09:33:16 -0800 Subject: [PATCH 225/318] remove call to global eml variable --- R/editing.R | 2 +- R/eml.R | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/editing.R b/R/editing.R index 91624ff..9b6c0cb 100644 --- a/R/editing.R +++ b/R/editing.R @@ -449,7 +449,7 @@ publish_update <- function(mn, # Replace access if needed if (length(doc$access$allow) & (!is.null(metadata_path))) { - doc$access <- eml$access() + doc$access <- list() } # Write out the document to disk. We do this in part because diff --git a/R/eml.R b/R/eml.R index feda656..47ce5d3 100644 --- a/R/eml.R +++ b/R/eml.R @@ -647,11 +647,11 @@ eml_validate_attributes <- function(attributes) { doc <- list(packageId = "test", system = "test", - dataset = eml$dataset( + dataset = list( title = "test", - creator = eml$creator(individualName = eml$individualName(givenName = "test", surName = "test")), - contact = eml$contact(individualName = eml$individualName(givenName = "test", surName = "test")), - otherEntity = eml$otherEntity(entityName = "name", entityType = "otherEntity"))) + creator = list(individualName = eml$individualName(givenName = "test", surName = "test")), + contact = list(individualName = eml$individualName(givenName = "test", surName = "test")), + otherEntity = list(entityName = "name", entityType = "otherEntity"))) results <- sapply(attributes$attribute, function(attribute) { cat(paste0("Validating single attribute '", attribute$attributeName, "': ")) From b86ad8f78e7b4639492753952644e3b756cc2b32 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Thu, 21 Feb 2019 09:47:45 -0800 Subject: [PATCH 226/318] update documentation after running checks --- NAMESPACE | 1 + R/eml.R | 10 +++++----- man/eml_otherEntity_to_dataTable.Rd | 4 ++-- man/eml_party.Rd | 5 ++++- man/eml_project.Rd | 3 ++- man/eml_set_reference.Rd | 7 +++++-- man/eml_set_shared_attributes.Rd | 13 ++++++------ man/pid_to_eml_entity.Rd | 8 ++++---- man/pid_to_eml_physical.Rd | 2 +- man/reorder_pids.Rd | 31 +++++++++++++++++++++++++++++ 10 files changed, 62 insertions(+), 22 deletions(-) create mode 100644 man/reorder_pids.Rd diff --git a/NAMESPACE b/NAMESPACE index 959e39d..545c0b5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,6 +48,7 @@ export(pid_to_eml_physical) export(publish_object) export(publish_update) export(remove_public_read) +export(reorder_pids) export(set_access) export(set_file_name) export(set_public_read) diff --git a/R/eml.R b/R/eml.R index 47ce5d3..d5cc571 100644 --- a/R/eml.R +++ b/R/eml.R @@ -4,7 +4,7 @@ #' #' @param mn (MNode) Member Node where the PID is associated with an object. #' @param pid (character) The PID of the object to create the sub-tree for. -#' @param entityType (character) What kind of object to create from the input. One of "dataTable", +#' @param entity_type (character) What kind of object to create from the input. One of "dataTable", #' "spatialRaster", "spatialVector", "storedProcedure", "view", or "otherEntity". #' @param ... (optional) Additional arguments to be passed to \code{eml$entityType())}. #' @@ -17,7 +17,7 @@ #' # Generate EML otherEntity #' pid_to_eml_entity(mn, #' pid, -#' entityType = "otherEntity", +#' entity_type = "otherEntity", #' entityName = "Entity Name", #' entityDescription = "Description about entity") #' } @@ -685,7 +685,7 @@ eml_validate_attributes <- function(attributes) { #' However, if these are already in their respective slots, they will be retained. #' #' @param doc (list) An EML document. -#' @param otherEntity (integer) The indicies of the otherEntities to be transformed. +#' @param index (integer) The indicies of the otherEntities to be transformed. #' @param validate_eml (logical) Optional. Whether or not to validate the EML after #' completion. Setting this to `FALSE` reduces execution time by ~50 percent. #' @@ -881,13 +881,13 @@ eml_set_reference <- function(element_to_reference, element_to_replace) { #' This function sets shared attributes using the attributes of the first \code{type} #' selected and creates references for all remaining objects of equivalent \code{type}. #' -#' @param eml (emld) An EML object. +#' @param doc (emld) An EML object. #' @param attributeList (attributeList) Optional. An EML attributeList object. If not provided #' then it will default to the attributeList of the first \code{type} element. #' @param type (character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' #' attributeList objects with references. Defaults to 'dataTable'. #' -#' @return (eml) The modified EML document. +#' @return (doc) The modified EML document. #' #' @author Dominic Mullen dmullen17@@gmail.com #' diff --git a/man/eml_otherEntity_to_dataTable.Rd b/man/eml_otherEntity_to_dataTable.Rd index 901ecca..2b363eb 100644 --- a/man/eml_otherEntity_to_dataTable.Rd +++ b/man/eml_otherEntity_to_dataTable.Rd @@ -9,10 +9,10 @@ eml_otherEntity_to_dataTable(doc, index, validate_eml = TRUE) \arguments{ \item{doc}{(list) An EML document.} +\item{index}{(integer) The indicies of the otherEntities to be transformed.} + \item{validate_eml}{(logical) Optional. Whether or not to validate the EML after completion. Setting this to \code{FALSE} reduces execution time by ~50 percent.} - -\item{otherEntity}{(integer) The indicies of the otherEntities to be transformed.} } \description{ Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an diff --git a/man/eml_party.Rd b/man/eml_party.Rd index e0f40ec..43eac76 100644 --- a/man/eml_party.Rd +++ b/man/eml_party.Rd @@ -47,6 +47,9 @@ eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-038 eml_party("creator", given_names = list("Dominic", "'Dom'"), sur_name = "Mullen", list("NCEAS", "UCSB"), position = list("Data Scientist", "Programmer"), - address = eml$address(deliveryPoint = "735 State St", city = "Santa Barbara", administrativeArea = "CA", postalCode = "85719")) + address = eml$address(deliveryPoint = "735 State St", + city = "Santa Barbara", + administrativeArea = "CA", + postalCode = "85719")) } } diff --git a/man/eml_project.Rd b/man/eml_project.Rd index c8a7d38..9775fda 100644 --- a/man/eml_project.Rd +++ b/man/eml_project.Rd @@ -9,7 +9,8 @@ eml_project(title, personnelList, abstract = NULL, funding = NULL, relatedProject = NULL) } \arguments{ -\item{title}{(character) Title of the project (Required). May have multiple titles constructed using \code{list}.} +\item{title}{(character) Title of the project (Required). May have multiple titles +constructed using \code{list}.} \item{personnelList}{(list of personnel) Personnel involved with the project.} diff --git a/man/eml_set_reference.Rd b/man/eml_set_reference.Rd index 894e1ab..b753c96 100644 --- a/man/eml_set_reference.Rd +++ b/man/eml_set_reference.Rd @@ -32,10 +32,13 @@ doc$dataset$dataTable[[3]] <- doc$dataset$dataTable[[1]] doc$dataset$dataTable[[4]] <- doc$dataset$dataTable[[1]] # Add references to the second and third elements only (not the 4th): for (i in 2:3) { - doc$dataset$dataTable[[i]]$attributeList <- eml_set_reference(doc$dataset$dataTable[[1]]$attributeList, + doc$dataset$dataTable[[i]]$attributeList <- eml_set_reference( + doc$dataset$dataTable[[1]]$attributeList, doc$dataset$dataTable[[i]]$attributeList) } -# If we print the entire 'dataTable' list we see elements 2 and 3 have references while 4 does not. +# If we print the entire 'dataTable' list we see elements 2 and 3 have +references while 4 does not. + doc$dataset$dataTable } } diff --git a/man/eml_set_shared_attributes.Rd b/man/eml_set_shared_attributes.Rd index 4ed9f6a..5c9bf8d 100644 --- a/man/eml_set_shared_attributes.Rd +++ b/man/eml_set_shared_attributes.Rd @@ -8,16 +8,16 @@ eml_set_shared_attributes(doc, attributeList = NULL, type = "dataTable") } \arguments{ +\item{doc}{(emld) An EML object.} + \item{attributeList}{(attributeList) Optional. An EML attributeList object. If not provided then it will default to the attributeList of the first \code{type} element.} -\item{type}{(character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' attributeList -objects with references. Defaults to 'dataTable'.} - -\item{eml}{(emld) An EML object.} +\item{type}{(character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' +attributeList objects with references. Defaults to 'dataTable'.} } \value{ -(eml) The modified EML document. +(doc) The modified EML document. } \description{ This function sets shared attributes using the attributes of the first \code{type} @@ -28,7 +28,8 @@ selected and creates references for all remaining objects of equivalent \code{ty cn <- dataone::CNode('PROD') adc <- dataone::getMNode(cn,'urn:node:ARCTIC') doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) -atts <- EML::set_attributes(EML::get_attributes(eml$dataset$dataTable[[1]]$attributeList)$attributes) +atts <- EML::set_attributes( + EML::get_attributes(eml$dataset$dataTable[[1]]$attributeList)$attributes) eml <- eml_set_shared_attributes(eml, atts, type = 'dataTable') } diff --git a/man/pid_to_eml_entity.Rd b/man/pid_to_eml_entity.Rd index d9574d1..cf1d575 100644 --- a/man/pid_to_eml_entity.Rd +++ b/man/pid_to_eml_entity.Rd @@ -11,10 +11,10 @@ pid_to_eml_entity(mn, pid, entity_type = "otherEntity", ...) \item{pid}{(character) The PID of the object to create the sub-tree for.} -\item{...}{(optional) Additional arguments to be passed to \code{eml$entityType())}.} - -\item{entityType}{(character) What kind of object to create from the input. One of "dataTable", +\item{entity_type}{(character) What kind of object to create from the input. One of "dataTable", "spatialRaster", "spatialVector", "storedProcedure", "view", or "otherEntity".} + +\item{...}{(optional) Additional arguments to be passed to \code{eml$entityType())}.} } \value{ (list) The entity object. @@ -27,7 +27,7 @@ Create EML entity with physical section from a DataONE PID # Generate EML otherEntity pid_to_eml_entity(mn, pid, - entityType = "otherEntity", + entity_type = "otherEntity", entityName = "Entity Name", entityDescription = "Description about entity") } diff --git a/man/pid_to_eml_physical.Rd b/man/pid_to_eml_physical.Rd index b2c9930..623024e 100644 --- a/man/pid_to_eml_physical.Rd +++ b/man/pid_to_eml_physical.Rd @@ -9,7 +9,7 @@ pid_to_eml_physical(mn, pid) \arguments{ \item{mn}{(MNode) Member Node where the PID is associated with an object.} -\item{pids}{(character) The PID of the object to create the physical for.} +\item{pid}{(character) The PID of the object to create the physical for.} } \value{ (list) A physical object. diff --git a/man/reorder_pids.Rd b/man/reorder_pids.Rd new file mode 100644 index 0000000..e3007a4 --- /dev/null +++ b/man/reorder_pids.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{reorder_pids} +\alias{reorder_pids} +\title{Reorder a named list of objects according to the order in the metadata} +\usage{ +reorder_pids(pid_list, doc) +} +\arguments{ +\item{pid_list}{(list) A named list of data pids} + +\item{doc}{(list) an \code{emld} document} +} +\value{ +ordered_pids (list) A list of reordered pids +} +\description{ +This function takes a named list of data objects, such as what is +returned from \code{get_package}, and reorders them according to the order +they are given in the EML document. +} +\examples{ +cn <- dataone::CNode('PROD') +adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +ids <- get_package(adc, 'resource_map_doi:10.18739/A2S17SS1M', file_names = TRUE) +doc <- EML::read_eml(dataone::getObject(adc, ids$metadata)) + +# return all entity types +ordered_pids <- reorder_pids(ids$data, doc) + +} From f38c0a65db2f8d74d8ec94701aac735c1131af32 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Thu, 21 Feb 2019 11:36:55 -0800 Subject: [PATCH 227/318] remove call to eml$ --- R/eml.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/eml.R b/R/eml.R index d5cc571..a303678 100644 --- a/R/eml.R +++ b/R/eml.R @@ -649,8 +649,8 @@ eml_validate_attributes <- function(attributes) { system = "test", dataset = list( title = "test", - creator = list(individualName = eml$individualName(givenName = "test", surName = "test")), - contact = list(individualName = eml$individualName(givenName = "test", surName = "test")), + creator = list(individualName = list(givenName = "test", surName = "test")), + contact = list(individualName = list(givenName = "test", surName = "test")), otherEntity = list(entityName = "name", entityType = "otherEntity"))) results <- sapply(attributes$attribute, function(attribute) { From 3de9efb3a5918627dc97fb170ad918aba22dcc59 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 5 Mar 2019 09:45:58 -0800 Subject: [PATCH 228/318] update publish_object --- R/editing.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/editing.R b/R/editing.R index 9b6c0cb..e6a30c1 100644 --- a/R/editing.R +++ b/R/editing.R @@ -191,10 +191,10 @@ update_object <- function(mn, pid, path, format_id = NULL, new_pid = NULL, sid = # Add packageId to metadata if the object is an xml file if (grepl("^eml:\\/\\/ecoinformatics.org\\/eml", format_id)) { - eml <- EML::read_eml(path) - eml@packageId <- new("xml_attribute", new_pid) + doc <- EML::read_eml(path) + doc$packageId <- new_pid path <- tempfile() - EML::write_eml(eml, path) + EML::write_eml(doc, path) # File changed - update checksum sysmeta@checksum <- digest::digest(path, algo = "sha1", serialize = FALSE, file = TRUE) } From 5a9b10b90e1bb64d74022b42ec79bb27cc48c9fe Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 5 Mar 2019 16:26:11 -0700 Subject: [PATCH 229/318] adding `read_zip_shapefile` helper --- DESCRIPTION | 1 + NAMESPACE | 1 + R/helpers.R | 39 +++++++++++++++++++++++++++++++++++++++ man/read_zip_shapefile.Rd | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+) create mode 100644 man/read_zip_shapefile.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 35beec3..258625e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,6 +30,7 @@ Imports: methods, stringr, stringi, + sf, tools, uuid, xml2, diff --git a/NAMESPACE b/NAMESPACE index 76e50d4..263984a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -53,6 +53,7 @@ export(pid_to_eml_entity) export(pid_to_eml_physical) export(publish_object) export(publish_update) +export(read_zip_shapefile) export(remove_public_read) export(set_abstract) export(set_access) diff --git a/R/helpers.R b/R/helpers.R index 28e5cf4..93b173e 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -540,3 +540,42 @@ list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType return(results) } + +#' Read a shapefile from a pid +#' +#' Read a shapefile from a pid that points to the zipped directory of the shapefile and associated files +#' on a given member node. +#' +#' @param mn (MNode) A DataOne Member Node +#' @param pid (character) An object identifier +#' +#' @return shapefile (sf) The shapefile as an `sf` object +#' +#' @export +#' +#' @author Jeanette Clark jclark@@nceas.ucsb.edu +#' +#' @examples +#' \dontrun{ +#' cn <- dataone::CNode('PROD') +#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +#' pid <- "urn:uuid:294a365f-c0d1-4cc3-a508-2e16260aa70c" +#' +#' shapefile <- read_zip_shapefile(adc, pid) +#' } +read_zip_shapefile <- function(mn, pid){ + + stopifnot(methods::is(mn, 'MNode')) + + temp <- tempfile() + writeBin(dataone::getObject(mn, pid), temp) + t <- unzip(temp, exdir = tempfile()) + + if (length(grep("shp", tools::file_ext(t))) != 1){ + stop("Zipped directory must contain one and only one .shp file") + } + + shapefile <- sf::st_read(t[grep("shp", tools::file_ext(t))], quiet = T, stringsAsFactors = F) + unlink(temp) + return(shapefile) +} diff --git a/man/read_zip_shapefile.Rd b/man/read_zip_shapefile.Rd new file mode 100644 index 0000000..9dc85f0 --- /dev/null +++ b/man/read_zip_shapefile.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{read_zip_shapefile} +\alias{read_zip_shapefile} +\title{Read a shapefile from a pid} +\usage{ +read_zip_shapefile(mn, pid) +} +\arguments{ +\item{mn}{(MNode) A DataOne Member Node} + +\item{pid}{(character) An object identifier} +} +\value{ +shapefile (sf) The shapefile as an \code{sf} object +} +\description{ +Read a shapefile from a pid that points to the zipped directory of the shapefile and associated files +on a given member node. +} +\examples{ +\dontrun{ +cn <- dataone::CNode('PROD') +adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +pid <- "urn:uuid:294a365f-c0d1-4cc3-a508-2e16260aa70c" + +shapefile <- read_zip_shapefile(adc, pid) +} +} +\author{ +Jeanette Clark jclark@nceas.ucsb.edu +} From 09c987ffe41294229b0c3e9f3990fc77487774f7 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 5 Mar 2019 16:58:30 -0700 Subject: [PATCH 230/318] updates to (hopefully) fix travis, and incorporating Dominic's suggestions --- .travis.yml | 2 +- DESCRIPTION | 2 +- R/helpers.R | 13 ++++++++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index aee4c81..4f0ca95 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,7 @@ r: - oldrel - release - devel -r_packages: ncdf4 +r_packages: ncdf4, sf sudo: false cache: packages addons: diff --git a/DESCRIPTION b/DESCRIPTION index 258625e..ac47cba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,7 +30,6 @@ Imports: methods, stringr, stringi, - sf, tools, uuid, xml2, @@ -44,6 +43,7 @@ Suggests: RCurl, purrr, rmarkdown, + sf, testthat, xslt, yaml diff --git a/R/helpers.R b/R/helpers.R index 93b173e..932406f 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -566,16 +566,23 @@ list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType read_zip_shapefile <- function(mn, pid){ stopifnot(methods::is(mn, 'MNode')) + stopifnot(is.character(pid)) + + if (!requireNamespace("sf")) { + stop(call. = FALSE, + "The package 'sf' must be installed to run this function. ", + "Please install it and try again.") + } temp <- tempfile() writeBin(dataone::getObject(mn, pid), temp) - t <- unzip(temp, exdir = tempfile()) + zip_contents <- unzip(temp, exdir = tempfile()) - if (length(grep("shp", tools::file_ext(t))) != 1){ + if (length(grep("shp", tools::file_ext(zip_contents))) != 1){ stop("Zipped directory must contain one and only one .shp file") } - shapefile <- sf::st_read(t[grep("shp", tools::file_ext(t))], quiet = T, stringsAsFactors = F) + shapefile <- sf::st_read(zip_contents[grep("shp", tools::file_ext(zip_contents))], quiet = T, stringsAsFactors = F) unlink(temp) return(shapefile) } From 70ab49076a2987041942b9edfeff4154959135a1 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 5 Mar 2019 17:04:14 -0700 Subject: [PATCH 231/318] fixing syntax issue --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4f0ca95..0827a24 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,9 @@ r: - oldrel - release - devel -r_packages: ncdf4, sf +r_packages: + - ncdf4 + - sf sudo: false cache: packages addons: From af87325c0c11e2693c261fe3417e3f178cdd7f86 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 5 Mar 2019 17:11:24 -0700 Subject: [PATCH 232/318] another attempt at fixing travis --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 0827a24..1b08bf8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,3 +17,4 @@ addons: - librdf0-dev - libnetcdf-dev - netcdf-bin + - libudunits2-dev From 8227b607a2d11254c08aebbaa97151b88ab40429 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 5 Mar 2019 17:13:41 -0700 Subject: [PATCH 233/318] adding more libraries --- .travis.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1b08bf8..3228a92 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,4 +17,7 @@ addons: - librdf0-dev - libnetcdf-dev - netcdf-bin - - libudunits2-dev + - libudunits2-dev # for udunits2 + - libgeos-dev # for sf + - libproj-dev # for sf + - libgdal-dev # for sf From ac47f50f9d04c20c43ce3f69b7b45e00c4caf205 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Wed, 6 Mar 2019 10:42:13 -0800 Subject: [PATCH 234/318] fixed bug in `which_in_eml` --- R/eml.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/eml.R b/R/eml.R index a303678..26307d2 100644 --- a/R/eml.R +++ b/R/eml.R @@ -794,7 +794,7 @@ eml_otherEntity_to_dataTable <- function(doc, index, validate_eml = TRUE) { #' } which_in_eml <- function(doc, element, test) { - stopifnot(methods::is(doc, "emld")) + stopifnot(methods::is(doc, "list")) stopifnot(is.character(element)) if (is.character(test)) { @@ -805,7 +805,7 @@ which_in_eml <- function(doc, element, test) { stopifnot(is.function(test)) } - elements_test <- eml_get(eml_list, element) + elements_test <- eml_get(doc, element) if (is.null(elements_test)) { location <- NULL From 001d33f1639632486241f5b4097c9da53fd6b0f5 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Thu, 21 Mar 2019 12:59:41 -0700 Subject: [PATCH 235/318] removing unnecessary rawToChar --- R/editing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/editing.R b/R/editing.R index e6a30c1..5e32f06 100644 --- a/R/editing.R +++ b/R/editing.R @@ -394,7 +394,7 @@ publish_update <- function(mn, if (is.null(metadata_path)) { # Get the metadata doc message("Getting metadata from the MN.") - doc <- EML::read_eml(rawToChar(dataone::getObject(mn, metadata_pid)), asText = TRUE) + doc <- EML::read_eml(dataone::getObject(mn, metadata_pid)) } else if (class(metadata_path)[1] == "emld") { # If an eml object is provided, use it directly after validating From b75f2d9b1aae4fceebb6475d02859e72b00f0b93 Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Mon, 15 Apr 2019 11:48:16 -0700 Subject: [PATCH 236/318] bug in read_zip_shapefile where it can call "unzip" from the zip library instead of utils --- R/helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/helpers.R b/R/helpers.R index 932406f..16b0aa8 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -576,7 +576,7 @@ read_zip_shapefile <- function(mn, pid){ temp <- tempfile() writeBin(dataone::getObject(mn, pid), temp) - zip_contents <- unzip(temp, exdir = tempfile()) + zip_contents <- utils::unzip(temp, exdir = tempfile()) if (length(grep("shp", tools::file_ext(zip_contents))) != 1){ stop("Zipped directory must contain one and only one .shp file") From e1a0906377f459c9785b5cb00d6613eadbf723e6 Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Fri, 24 May 2019 16:40:36 -0700 Subject: [PATCH 237/318] updated publish_object to reformat sysmeta fileNames --- R/editing.R | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/R/editing.R b/R/editing.R index c501617..914b963 100644 --- a/R/editing.R +++ b/R/editing.R @@ -107,7 +107,7 @@ publish_object <- function(mn, if (public == TRUE) { sysmeta <- datapack::addAccessRule(sysmeta, "public", "read") } - sysmeta@fileName <- basename(path) + sysmeta@fileName <- reformat_file_name(basename(path), sysmeta) dataone::createObject(mn, pid = pid, @@ -979,3 +979,36 @@ update_package_object <- function(mn, return(pkg_new) } + +#' Helper for publish_object. Reformat the filName in system metadata. +#' +#' Reformat the fileName field in an object's system metadata to follow Arctic Data Center +#' system metdata naming conventions. Publish_object calls this function to rename +#' the fileName field in system metadata. +#' +#' @param path (character) full file path +#' @param sysmeta (S4) A system metadata object +#' +reformat_file_name <- function(path, sysmeta) { + base_name <- basename(path) + if (sysmeta@formatId == 'http://www.openarchives.org/ore/terms') { + ext <- '.rdf.xml' + } else if (grepl('eml://ecoinformatics\\.org/eml*', sysmeta@formatId)) { + ext <- '.xml' + # remove extension then truncate to 50 characters + base_name <- stringr::str_sub(base_name, end = -5) %>% + stringr::str_sub(1, 50) + # re-trim if we're in the middle of a word and add extension back on + index <- stringi::stri_locate_last_fixed(base_name, ' ')[1] + base_name <- stringr::str_sub(base_name, 1, index -1) %>% + paste0(ext) + } else { + ext <- paste0('.', tools::file_ext(base_name)) + } + + file_name <- stringr::str_replace_all(base_name, '[^[:alnum:]]', '_') %>% + stringr::str_sub(end = -(nchar(ext) + 1)) %>% + paste0(ext) + + return(file_name) +} From 2999f25982ef3ddba51b665e8d1dd76993177224 Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Tue, 28 May 2019 09:58:38 -0700 Subject: [PATCH 238/318] add v8 and jq packages with apt-get utility --- .travis.yml | 2 ++ R/editing.R | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 3228a92..7275d32 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,3 +21,5 @@ addons: - libgeos-dev # for sf - libproj-dev # for sf - libgdal-dev # for sf + - libv8-dev + - libjq-dev diff --git a/R/editing.R b/R/editing.R index 914b963..4d0f571 100644 --- a/R/editing.R +++ b/R/editing.R @@ -184,7 +184,7 @@ update_object <- function(mn, pid, path, format_id = NULL, new_pid = NULL, sid = sysmeta@checksumAlgorithm <- "SHA1" slot(sysmeta, "obsoletes", check = FALSE) <- NA slot(sysmeta, "obsoletedBy", check = FALSE) <- NA - sysmeta@fileName <- basename(path) + sysmeta@fileName <- reformat_file_name(basename(path), sysmeta) # Set the replication policy back to default sysmeta <- clear_replication_policy(sysmeta) From b24729dd9c236e6c78efe246cca01943187cad1b Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Tue, 28 May 2019 10:30:14 -0700 Subject: [PATCH 239/318] install jq package before v8 to fix travis CI --- .travis.yml | 2 +- man/reformat_file_name.Rd | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 man/reformat_file_name.Rd diff --git a/.travis.yml b/.travis.yml index 7275d32..db39904 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,5 +21,5 @@ addons: - libgeos-dev # for sf - libproj-dev # for sf - libgdal-dev # for sf - - libv8-dev - libjq-dev + - libv8-dev diff --git a/man/reformat_file_name.Rd b/man/reformat_file_name.Rd new file mode 100644 index 0000000..f2898ac --- /dev/null +++ b/man/reformat_file_name.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/editing.R +\name{reformat_file_name} +\alias{reformat_file_name} +\title{Helper for publish_object. Reformat the filName in system metadata.} +\usage{ +reformat_file_name(path, sysmeta) +} +\arguments{ +\item{path}{(character) full file path} + +\item{sysmeta}{(S4) A system metadata object} +} +\description{ +Reformat the fileName field in an object's system metadata to follow Arctic Data Center +system metdata naming conventions. Publish_object calls this function to rename +the fileName field in system metadata. +} From e99b775e3598c50cefbdb1912045a15c7ee5baaf Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Tue, 28 May 2019 11:47:36 -0700 Subject: [PATCH 240/318] downgrade EML to avoid eml2 breaking changes --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ac47cba..25e4786 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,7 +24,7 @@ Imports: dataone, datapack, digest, - EML, + EML (== 1.0.1), httr, magrittr, methods, From c7dd1da50ad04ff6f6f6e9a9d5cba5607f46f5d9 Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Tue, 28 May 2019 12:21:56 -0700 Subject: [PATCH 241/318] added remote EML installation --- DESCRIPTION | 2 ++ 1 file changed, 2 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 25e4786..1ebce8c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,6 +47,8 @@ Suggests: testthat, xslt, yaml +Remotes: + ropensci/EML@1b3d0a2bd63e01a3af99c77aa6a0784c3da3ea06 RoxygenNote: 6.1.1 Roxygen: list(markdown = TRUE) VignetteBuilder: knitr From deb4ee99f8295fd3eabef2f85a902db2d768d8fb Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Tue, 28 May 2019 12:48:32 -0700 Subject: [PATCH 242/318] downgrade EML again to fix travis --- DESCRIPTION | 2 +- R/editing.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1ebce8c..0287e32 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -48,7 +48,7 @@ Suggests: xslt, yaml Remotes: - ropensci/EML@1b3d0a2bd63e01a3af99c77aa6a0784c3da3ea06 + ropensci/EML@7a1140796373b0b7216fdc756210f07e318cc1b8 RoxygenNote: 6.1.1 Roxygen: list(markdown = TRUE) VignetteBuilder: knitr diff --git a/R/editing.R b/R/editing.R index 4d0f571..94ad578 100644 --- a/R/editing.R +++ b/R/editing.R @@ -996,7 +996,7 @@ reformat_file_name <- function(path, sysmeta) { } else if (grepl('eml://ecoinformatics\\.org/eml*', sysmeta@formatId)) { ext <- '.xml' # remove extension then truncate to 50 characters - base_name <- stringr::str_sub(base_name, end = -5) %>% + base_name <- tools::file_path_sans_ext(base_name) %>% stringr::str_sub(1, 50) # re-trim if we're in the middle of a word and add extension back on index <- stringi::stri_locate_last_fixed(base_name, ' ')[1] From 912e7ca41b288aa84ab59bc970787082561b98d9 Mon Sep 17 00:00:00 2001 From: Jeanette <jclark@nceas.ucsb.edu> Date: Wed, 19 Jun 2019 16:17:24 -0700 Subject: [PATCH 243/318] issue #131: carry forward prov in certain scenarios this needs much more testing --- R/editing.R | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/R/editing.R b/R/editing.R index c501617..5d5b0b3 100644 --- a/R/editing.R +++ b/R/editing.R @@ -731,15 +731,43 @@ update_resource_map <- function(mn, me <- get_token_subject() set_rights_holder(mn, resource_map_pid, me) + # Get the old resource map so we can extract any statements we need out of it + # such as PROV statements + old_resource_map_path <- tempfile() + writeLines(rawToChar(dataone::getObject(mn, resource_map_pid)), old_resource_map_path) + statements <- parse_resource_map(old_resource_map_path) + statements <- filter_packaging_statements(statements) + if (is.data.frame(other_statements)) { + statements <- rbind(statements, + other_statements) + } + + prov_pids <- gsub("https://cn-stage-2.test.dataone.org/cn/v[0-9]/resolve/|https://cn.dataone.org/cn/v[0-9]/resolve/", "", c(statements$subject, statements$object)) %>% + gsub("%3A", ":", .) + prov_pids <- prov_pids[-(grep("^http", prov_pids))] %>% # might need to catch other things besides URLs + unique(.) + # Create the replacement resource map if (is.null(identifier)) { identifier <- paste0("resource_map_", new_uuid()) } - new_rm_path <- generate_resource_map(metadata_pid = metadata_pid, + if (any(prov_pids %in% data_pids == FALSE)){ + warning("Old provenance contains data pids not in new resource map. Provenance information will be removed") + + new_rm_path <- generate_resource_map(metadata_pid = metadata_pid, data_pids = data_pids, child_pids = child_pids, resource_map_pid = identifier) + } + else if (all(prov_pids %in% data_pids == TRUE)) { + new_rm_path <- generate_resource_map(metadata_pid = metadata_pid, + data_pids = data_pids, + child_pids = child_pids, + other_statements = statements, + resource_map_pid = identifier) + } + stopifnot(file.exists(new_rm_path)) rm(sysmeta) From 7efa12723944c0adaa8600697250f009515426fa Mon Sep 17 00:00:00 2001 From: Jeanette <jclark@nceas.ucsb.edu> Date: Wed, 19 Jun 2019 16:18:57 -0700 Subject: [PATCH 244/318] merge in upstream commits --- .travis.yml | 10 ++++++++- DESCRIPTION | 5 ++++- NAMESPACE | 1 + R/editing.R | 37 +++++++++++++++++++++++++++++-- R/helpers.R | 46 +++++++++++++++++++++++++++++++++++++++ man/read_zip_shapefile.Rd | 32 +++++++++++++++++++++++++++ man/reformat_file_name.Rd | 18 +++++++++++++++ 7 files changed, 145 insertions(+), 4 deletions(-) create mode 100644 man/read_zip_shapefile.Rd create mode 100644 man/reformat_file_name.Rd diff --git a/.travis.yml b/.travis.yml index aee4c81..db39904 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,9 @@ r: - oldrel - release - devel -r_packages: ncdf4 +r_packages: + - ncdf4 + - sf sudo: false cache: packages addons: @@ -15,3 +17,9 @@ addons: - librdf0-dev - libnetcdf-dev - netcdf-bin + - libudunits2-dev # for udunits2 + - libgeos-dev # for sf + - libproj-dev # for sf + - libgdal-dev # for sf + - libjq-dev + - libv8-dev diff --git a/DESCRIPTION b/DESCRIPTION index 35beec3..0287e32 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,7 +24,7 @@ Imports: dataone, datapack, digest, - EML, + EML (== 1.0.1), httr, magrittr, methods, @@ -43,9 +43,12 @@ Suggests: RCurl, purrr, rmarkdown, + sf, testthat, xslt, yaml +Remotes: + ropensci/EML@7a1140796373b0b7216fdc756210f07e318cc1b8 RoxygenNote: 6.1.1 Roxygen: list(markdown = TRUE) VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 76e50d4..263984a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -53,6 +53,7 @@ export(pid_to_eml_entity) export(pid_to_eml_physical) export(publish_object) export(publish_update) +export(read_zip_shapefile) export(remove_public_read) export(set_abstract) export(set_access) diff --git a/R/editing.R b/R/editing.R index 5d5b0b3..701e40f 100644 --- a/R/editing.R +++ b/R/editing.R @@ -107,7 +107,7 @@ publish_object <- function(mn, if (public == TRUE) { sysmeta <- datapack::addAccessRule(sysmeta, "public", "read") } - sysmeta@fileName <- basename(path) + sysmeta@fileName <- reformat_file_name(basename(path), sysmeta) dataone::createObject(mn, pid = pid, @@ -184,7 +184,7 @@ update_object <- function(mn, pid, path, format_id = NULL, new_pid = NULL, sid = sysmeta@checksumAlgorithm <- "SHA1" slot(sysmeta, "obsoletes", check = FALSE) <- NA slot(sysmeta, "obsoletedBy", check = FALSE) <- NA - sysmeta@fileName <- basename(path) + sysmeta@fileName <- reformat_file_name(basename(path), sysmeta) # Set the replication policy back to default sysmeta <- clear_replication_policy(sysmeta) @@ -1007,3 +1007,36 @@ update_package_object <- function(mn, return(pkg_new) } + +#' Helper for publish_object. Reformat the filName in system metadata. +#' +#' Reformat the fileName field in an object's system metadata to follow Arctic Data Center +#' system metdata naming conventions. Publish_object calls this function to rename +#' the fileName field in system metadata. +#' +#' @param path (character) full file path +#' @param sysmeta (S4) A system metadata object +#' +reformat_file_name <- function(path, sysmeta) { + base_name <- basename(path) + if (sysmeta@formatId == 'http://www.openarchives.org/ore/terms') { + ext <- '.rdf.xml' + } else if (grepl('eml://ecoinformatics\\.org/eml*', sysmeta@formatId)) { + ext <- '.xml' + # remove extension then truncate to 50 characters + base_name <- tools::file_path_sans_ext(base_name) %>% + stringr::str_sub(1, 50) + # re-trim if we're in the middle of a word and add extension back on + index <- stringi::stri_locate_last_fixed(base_name, ' ')[1] + base_name <- stringr::str_sub(base_name, 1, index -1) %>% + paste0(ext) + } else { + ext <- paste0('.', tools::file_ext(base_name)) + } + + file_name <- stringr::str_replace_all(base_name, '[^[:alnum:]]', '_') %>% + stringr::str_sub(end = -(nchar(ext) + 1)) %>% + paste0(ext) + + return(file_name) +} diff --git a/R/helpers.R b/R/helpers.R index 28e5cf4..16b0aa8 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -540,3 +540,49 @@ list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType return(results) } + +#' Read a shapefile from a pid +#' +#' Read a shapefile from a pid that points to the zipped directory of the shapefile and associated files +#' on a given member node. +#' +#' @param mn (MNode) A DataOne Member Node +#' @param pid (character) An object identifier +#' +#' @return shapefile (sf) The shapefile as an `sf` object +#' +#' @export +#' +#' @author Jeanette Clark jclark@@nceas.ucsb.edu +#' +#' @examples +#' \dontrun{ +#' cn <- dataone::CNode('PROD') +#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +#' pid <- "urn:uuid:294a365f-c0d1-4cc3-a508-2e16260aa70c" +#' +#' shapefile <- read_zip_shapefile(adc, pid) +#' } +read_zip_shapefile <- function(mn, pid){ + + stopifnot(methods::is(mn, 'MNode')) + stopifnot(is.character(pid)) + + if (!requireNamespace("sf")) { + stop(call. = FALSE, + "The package 'sf' must be installed to run this function. ", + "Please install it and try again.") + } + + temp <- tempfile() + writeBin(dataone::getObject(mn, pid), temp) + zip_contents <- utils::unzip(temp, exdir = tempfile()) + + if (length(grep("shp", tools::file_ext(zip_contents))) != 1){ + stop("Zipped directory must contain one and only one .shp file") + } + + shapefile <- sf::st_read(zip_contents[grep("shp", tools::file_ext(zip_contents))], quiet = T, stringsAsFactors = F) + unlink(temp) + return(shapefile) +} diff --git a/man/read_zip_shapefile.Rd b/man/read_zip_shapefile.Rd new file mode 100644 index 0000000..9dc85f0 --- /dev/null +++ b/man/read_zip_shapefile.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{read_zip_shapefile} +\alias{read_zip_shapefile} +\title{Read a shapefile from a pid} +\usage{ +read_zip_shapefile(mn, pid) +} +\arguments{ +\item{mn}{(MNode) A DataOne Member Node} + +\item{pid}{(character) An object identifier} +} +\value{ +shapefile (sf) The shapefile as an \code{sf} object +} +\description{ +Read a shapefile from a pid that points to the zipped directory of the shapefile and associated files +on a given member node. +} +\examples{ +\dontrun{ +cn <- dataone::CNode('PROD') +adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +pid <- "urn:uuid:294a365f-c0d1-4cc3-a508-2e16260aa70c" + +shapefile <- read_zip_shapefile(adc, pid) +} +} +\author{ +Jeanette Clark jclark@nceas.ucsb.edu +} diff --git a/man/reformat_file_name.Rd b/man/reformat_file_name.Rd new file mode 100644 index 0000000..f2898ac --- /dev/null +++ b/man/reformat_file_name.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/editing.R +\name{reformat_file_name} +\alias{reformat_file_name} +\title{Helper for publish_object. Reformat the filName in system metadata.} +\usage{ +reformat_file_name(path, sysmeta) +} +\arguments{ +\item{path}{(character) full file path} + +\item{sysmeta}{(S4) A system metadata object} +} +\description{ +Reformat the fileName field in an object's system metadata to follow Arctic Data Center +system metdata naming conventions. Publish_object calls this function to rename +the fileName field in system metadata. +} From 23ffc4805d1d03fcaf12df04a448e40725f08125 Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Thu, 27 Jun 2019 13:54:26 -0700 Subject: [PATCH 245/318] added remove_access sysmeta function --- NAMESPACE | 1 + R/access.R | 94 ++++++++++++++++++++++++++++++++++++ man/remove_access.Rd | 37 ++++++++++++++ tests/testthat/test_access.R | 2 + 4 files changed, 134 insertions(+) create mode 100644 man/remove_access.Rd diff --git a/NAMESPACE b/NAMESPACE index 263984a..662aa89 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -54,6 +54,7 @@ export(pid_to_eml_physical) export(publish_object) export(publish_update) export(read_zip_shapefile) +export(remove_access) export(remove_public_read) export(set_abstract) export(set_access) diff --git a/R/access.R b/R/access.R index 989a1fc..4ab2183 100644 --- a/R/access.R +++ b/R/access.R @@ -241,6 +241,100 @@ set_access <- function(mn, pids, subjects, permissions = c("read", "write", "cha result } +#' Remove a subject from an object's access policy +#' +#' Remove the given subjects from the access policy for the given objects on the given Member Node. +#' For each type of permission, this function checks if the permission is already set +#' and only updates the System Metadata when a change is needed. +#' +#' @param mn (MNode) The Member Node. +#' @param pids (character) The PIDs of the objects to set permissions for. +#' @param subjects (character) The identifiers of the subjects to set permissions for, typically an ORCID or DN. +#' @param permissions (character) Optional. The permissions to set. Defaults to +#' read, write, and changePermission. +#' +#' @return (logical) Whether an update was needed. +#' +#' @export +#' +#' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' remove_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX", +#' permissions = c("read", "write", "changePermission")) +#'} +remove_access <- function(mn, pids, subjects, permissions = c("read", "write", "changePermission")) { + if (!is(mn, "MNode")) { + stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) + } + + if (!all(is.character(pids), + all(nchar(pids) > 0))) { + stop("Argument 'pids' must be character class with non-zero number of characters.") + } + + if (!all(is.character(subjects), + all(nchar(subjects)) > 0)) { + stop("Argument 'subjects' must be character class with non-zero number of characters.") + } + + if (any(grepl("^https:\\/\\/orcid\\.org", subjects))) { + stop("Argument 'subjects' cannot contain 'https:', use 'http:' instead.") + } + + if (!all(permissions %in% c("read", "write", "changePermission"))) { + stop("Argument 'permissions' must be one or more of: 'read', 'write', 'changePermission'") + } + + + result <- c() + + for (pid in pids) { + changed <- FALSE + + sysmeta <- tryCatch({ + dataone::getSystemMetadata(mn, pid) + }, warning = function(w) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + w + }, error = function(e) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + message(e) + e + }) + + if (!inherits(sysmeta, "SystemMetadata")) { + stop("Failed to get System Metadata.") + } + + for (subject in subjects) { + for (permission in permissions) { + if (datapack::hasAccessRule(sysmeta, subject, permission)) { + sysmeta <- datapack::removeAccessRule(sysmeta, subject, permission) + changed <- TRUE + } + } + } + + if (changed) { + result[pid] <- TRUE + message(paste0("Updating System Metadata for ", pid, ".")) + dataone::updateSystemMetadata(mn, pid, sysmeta) + } else { + message(paste0("No changes needed for ", pid, ". Not updating.")) + result[pid] <- FALSE + } + } + + # Name the result vector + names(result) <- pids + + result +} + #' Set rights holder with access policy for an object #' diff --git a/man/remove_access.Rd b/man/remove_access.Rd new file mode 100644 index 0000000..221aad1 --- /dev/null +++ b/man/remove_access.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/access.R +\name{remove_access} +\alias{remove_access} +\title{Remove a subject from an object's access policy} +\usage{ +remove_access(mn, pids, subjects, permissions = c("read", "write", + "changePermission")) +} +\arguments{ +\item{mn}{(MNode) The Member Node.} + +\item{pids}{(character) The PIDs of the objects to set permissions for.} + +\item{subjects}{(character) The identifiers of the subjects to set permissions for, typically an ORCID or DN.} + +\item{permissions}{(character) Optional. The permissions to set. Defaults to +read, write, and changePermission.} +} +\value{ +(logical) Whether an update was needed. +} +\description{ +Remove the given subjects from the access policy for the given objects on the given Member Node. +For each type of permission, this function checks if the permission is already set +and only updates the System Metadata when a change is needed. +} +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", + "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +remove_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX", + permissions = c("read", "write", "changePermission")) +} +} diff --git a/tests/testthat/test_access.R b/tests/testthat/test_access.R index 5430a6d..6fae112 100644 --- a/tests/testthat/test_access.R +++ b/tests/testthat/test_access.R @@ -71,6 +71,8 @@ test_that("access functions stop if system metadata is not found", { expect_error(set_access(mn, "test", "http://orcid.org/0000-000X-XXXX-XXXX")) + expect_error(remove_access(mn, "test", "http://orcid.org/0000-000X-XXXX-XXXX")) + expect_error(set_public_read(mn, "test")) expect_error(remove_public_read(mn, "test")) From 0ef8c365c145f9fe45b130a06836c7a3c6a26e12 Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Fri, 28 Jun 2019 11:57:47 -0700 Subject: [PATCH 246/318] added reformat_file_name to publish_update --- R/editing.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/editing.R b/R/editing.R index 94ad578..31b1e52 100644 --- a/R/editing.R +++ b/R/editing.R @@ -496,6 +496,9 @@ publish_update <- function(mn, metadata_updated_sysmeta <- datapack::removeAccessRule(metadata_updated_sysmeta, "public", "read") } + # Update fileName to follow ADC naming conventions + metadata_updated_sysmeta@fileName <- reformat_file_name(eml@dataset@title[[1]]@.Data, metadata_updated_sysmeta) + set_rights_holder(mn, metadata_pid, me) dataone::updateObject(mn, @@ -1007,6 +1010,7 @@ reformat_file_name <- function(path, sysmeta) { } file_name <- stringr::str_replace_all(base_name, '[^[:alnum:]]', '_') %>% + stringr::str_replace_all('_[_]*', '_') %>% # replaces consecutive underscores with one stringr::str_sub(end = -(nchar(ext) + 1)) %>% paste0(ext) From 0040f8be31b7d505dc826fad7f240f1a3c499938 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Mon, 15 Jul 2019 12:02:08 -0700 Subject: [PATCH 247/318] update description --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index d05d501..2d8ddb7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,6 +44,7 @@ Suggests: RCurl, purrr, rmarkdown, + sf, testthat, xslt, yaml From 2517f4fe2d36ca8e36ab092c487e667437a466d2 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Mon, 15 Jul 2019 12:05:02 -0700 Subject: [PATCH 248/318] update Dom's addition to be compatible with eml2 --- R/editing.R | 51 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/R/editing.R b/R/editing.R index 5e32f06..09d24f1 100644 --- a/R/editing.R +++ b/R/editing.R @@ -107,7 +107,7 @@ publish_object <- function(mn, if (public == TRUE) { sysmeta <- datapack::addAccessRule(sysmeta, "public", "read") } - sysmeta@fileName <- basename(path) + sysmeta@fileName <- reformat_file_name(basename(path), sysmeta) dataone::createObject(mn, pid = pid, @@ -184,7 +184,7 @@ update_object <- function(mn, pid, path, format_id = NULL, new_pid = NULL, sid = sysmeta@checksumAlgorithm <- "SHA1" slot(sysmeta, "obsoletes", check = FALSE) <- NA slot(sysmeta, "obsoletedBy", check = FALSE) <- NA - sysmeta@fileName <- basename(path) + sysmeta@fileName <- reformat_file_name(basename(path), sysmeta) # Set the replication policy back to default sysmeta <- clear_replication_policy(sysmeta) @@ -452,10 +452,6 @@ publish_update <- function(mn, doc$access <- list() } - # Write out the document to disk. We do this in part because - # set_other_entities takes a path to the doc. - eml_path <- tempfile() - EML::write_eml(doc, eml_path) # Create System Metadata for the updated EML file metadata_updated_sysmeta <- new("SystemMetadata", @@ -496,6 +492,15 @@ publish_update <- function(mn, metadata_updated_sysmeta <- datapack::removeAccessRule(metadata_updated_sysmeta, "public", "read") } + # Update fileName to follow ADC naming conventions + metadata_updated_sysmeta@fileName <- reformat_file_name(doc$dataset$title, metadata_updated_sysmeta) + + + # Write out the document to disk. We do this in part because + # set_other_entities takes a path to the doc. + eml_path <- tempfile() + EML::write_eml(doc, eml_path) + set_rights_holder(mn, metadata_pid, me) dataone::updateObject(mn, @@ -979,3 +984,37 @@ update_package_object <- function(mn, return(pkg_new) } + +#' Helper for publish_object. Reformat the filName in system metadata. +#' +#' Reformat the fileName field in an object's system metadata to follow Arctic Data Center +#' system metdata naming conventions. Publish_object calls this function to rename +#' the fileName field in system metadata. +#' +#' @param path (character) full file path +#' @param sysmeta (S4) A system metadata object +#' +reformat_file_name <- function(path, sysmeta) { + base_name <- basename(path) + if (sysmeta@formatId == 'http://www.openarchives.org/ore/terms') { + ext <- '.rdf.xml' + } else if (grepl('eml://ecoinformatics\\.org/eml*', sysmeta@formatId)) { + ext <- '.xml' + # remove extension then truncate to 50 characters + base_name <- tools::file_path_sans_ext(base_name) %>% + stringr::str_sub(1, 50) + # re-trim if we're in the middle of a word and add extension back on + index <- stringi::stri_locate_last_fixed(base_name, ' ')[1] + base_name <- stringr::str_sub(base_name, 1, index -1) %>% + paste0(ext) + } else { + ext <- paste0('.', tools::file_ext(base_name)) + } + + file_name <- stringr::str_replace_all(base_name, '[^[:alnum:]]', '_') %>% + stringr::str_replace_all('_[_]*', '_') %>% # replaces consecutive underscores with one + stringr::str_sub(end = -(nchar(ext) + 1)) %>% + paste0(ext) + + return(file_name) +} From 2124b063f06f49e9ff5b90ab05e23a1cc3f3850f Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Mon, 15 Jul 2019 12:10:19 -0700 Subject: [PATCH 249/318] replace ::: with :: --- R/access.R | 4 ++-- R/util.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/access.R b/R/access.R index 53fb1c1..1fc6ad8 100644 --- a/R/access.R +++ b/R/access.R @@ -605,10 +605,10 @@ is_public_read <- function(mn, pids, use.names = TRUE) { vapply(pids, USE.NAMES = use.names, FUN.VALUE = logical(1), FUN = function(pid) { url <- paste(mn@endpoint, "meta", utils::URLencode(pid, reserved = TRUE), sep = "/") - response <- dataone:::auth_get(url, node = mn) + response <- dataone::auth_get(url, node = mn) if (response$status_code != "200") { - error_desc <- dataone:::getErrorDescription(response) + error_desc <- dataone::getErrorDescription(response) if (grepl("READ not allowed", error_desc, ignore.case = TRUE)) { return(FALSE) } else { diff --git a/R/util.R b/R/util.R index 6526065..13d8ee2 100644 --- a/R/util.R +++ b/R/util.R @@ -1082,7 +1082,7 @@ set_public_read_all_versions <- function(mn, resource_map_pid) { stopifnot(is(mn, 'MNode')) stopifnot(is_token_set(mn)) stopifnot(is.character(resource_map_pid)) - stopifnot(arcticdatautils:::is_resource_map(mn, resource_map_pid)) + stopifnot(arcticdatautils::is_resource_map(mn, resource_map_pid)) pids <- get_package(mn, resource_map_pid) %>% unlist() From ab0405335f86940280e373665d95851948c0019d Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Mon, 15 Jul 2019 12:50:39 -0700 Subject: [PATCH 250/318] Revert "replace ::: with ::" This reverts commit 2124b063f06f49e9ff5b90ab05e23a1cc3f3850f. --- R/access.R | 4 ++-- R/util.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/access.R b/R/access.R index 1fc6ad8..53fb1c1 100644 --- a/R/access.R +++ b/R/access.R @@ -605,10 +605,10 @@ is_public_read <- function(mn, pids, use.names = TRUE) { vapply(pids, USE.NAMES = use.names, FUN.VALUE = logical(1), FUN = function(pid) { url <- paste(mn@endpoint, "meta", utils::URLencode(pid, reserved = TRUE), sep = "/") - response <- dataone::auth_get(url, node = mn) + response <- dataone:::auth_get(url, node = mn) if (response$status_code != "200") { - error_desc <- dataone::getErrorDescription(response) + error_desc <- dataone:::getErrorDescription(response) if (grepl("READ not allowed", error_desc, ignore.case = TRUE)) { return(FALSE) } else { diff --git a/R/util.R b/R/util.R index 13d8ee2..6526065 100644 --- a/R/util.R +++ b/R/util.R @@ -1082,7 +1082,7 @@ set_public_read_all_versions <- function(mn, resource_map_pid) { stopifnot(is(mn, 'MNode')) stopifnot(is_token_set(mn)) stopifnot(is.character(resource_map_pid)) - stopifnot(arcticdatautils::is_resource_map(mn, resource_map_pid)) + stopifnot(arcticdatautils:::is_resource_map(mn, resource_map_pid)) pids <- get_package(mn, resource_map_pid) %>% unlist() From 571c21d89a3c6e8857654d4ffaec098c72063f60 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Mon, 15 Jul 2019 12:58:38 -0700 Subject: [PATCH 251/318] put this back in the correct place --- R/editing.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/R/editing.R b/R/editing.R index 09d24f1..82f735d 100644 --- a/R/editing.R +++ b/R/editing.R @@ -452,6 +452,10 @@ publish_update <- function(mn, doc$access <- list() } + # Write out the document to disk. We do this in part because + # set_other_entities takes a path to the doc. + eml_path <- tempfile() + EML::write_eml(doc, eml_path) # Create System Metadata for the updated EML file metadata_updated_sysmeta <- new("SystemMetadata", @@ -495,12 +499,6 @@ publish_update <- function(mn, # Update fileName to follow ADC naming conventions metadata_updated_sysmeta@fileName <- reformat_file_name(doc$dataset$title, metadata_updated_sysmeta) - - # Write out the document to disk. We do this in part because - # set_other_entities takes a path to the doc. - eml_path <- tempfile() - EML::write_eml(doc, eml_path) - set_rights_holder(mn, metadata_pid, me) dataone::updateObject(mn, From da683e5501687f2d9772357d0688d1c3fc3617ec Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Mon, 15 Jul 2019 14:21:51 -0700 Subject: [PATCH 252/318] update to 2.0 on CRAN --- DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2d8ddb7..f38a203 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,7 +24,7 @@ Imports: dataone, datapack, digest, - EML (>= 1.99.0), + EML (>= 2.0), httr, magrittr, methods, @@ -34,7 +34,6 @@ Imports: uuid, xml2, XML -Remotes: ropensci/EML Suggests: dplyr, humaniformat, From b06e3b591bf5465fb6a8edd5d23f1708c0cc3d00 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Mon, 15 Jul 2019 14:23:14 -0700 Subject: [PATCH 253/318] update a dummy package function, fix a minor bug in pid_to_eml_entity --- R/eml.R | 7 +++++-- R/helpers.R | 14 +++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/R/eml.R b/R/eml.R index 26307d2..f55d694 100644 --- a/R/eml.R +++ b/R/eml.R @@ -63,6 +63,9 @@ pid_to_eml_entity <- function(mn, if (entity_type == "otherEntity" && length(entity$entity_type) == 0) { entity$entityType <- "Other" } + else if (entity_type != "otherEntity"){ + entity$entityType <- NULL + } return(entity) } @@ -705,7 +708,7 @@ eml_otherEntity_to_dataTable <- function(doc, index, validate_eml = TRUE) { stopifnot(methods::is(doc, "emld")) stopifnot(is.logical(eml_validate(doc))) stopifnot(is.numeric(index)) - stopifnot(length(eml_get_simple(doc$dataset$otherEntity, "entityName")) >= length(index)) + stopifnot(length(eml_get_simple(doc$dataset$otherEntity, "entityName")) >= index) ## set OE entityTypes to NULL and select the ones we want to use @@ -823,7 +826,7 @@ which_in_eml <- function(doc, element, test) { location <- NULL } } - + names(location) <- NULL return(location) } diff --git a/R/helpers.R b/R/helpers.R index 16b0aa8..81574fe 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -368,10 +368,10 @@ create_dummy_package_full <- function(mn, title = "A Dummy Package") { # Import EML eml_path_original <- file.path(system.file(package = "arcticdatautils"), "example-eml-full.xml") - eml <- EML::read_eml(eml_path_original) + doc <- EML::read_eml(eml_path_original) # Add objects to EML - eml@dataset@title[[1]]@.Data <- title + doc$dataset$title <- title attr <- data.frame( attributeName = c("Date", "Location", "Salinity", "Temperature"), @@ -394,14 +394,14 @@ create_dummy_package_full <- function(mn, title = "A Dummy Package") { dT1 <- pid_to_eml_entity(mn, pid = pid_csv1, entityType = "dataTable") - dT1@attributeList <- attributeList + dT1$attributeList <- attributeList dT2 <- pid_to_eml_entity(mn, pid = pid_csv2, entityType = "dataTable") - dT2@attributeList <- attributeList + dT2$attributeList <- attributeList - eml@dataset@dataTable <- c(dT1, dT2) + doc$dataset$dataTable <- list(dT1, dT2) oE1 <- pid_to_eml_entity(mn, pid = pid_jpg1, @@ -411,10 +411,10 @@ create_dummy_package_full <- function(mn, title = "A Dummy Package") { pid = pid_R1, entityType = "otherEntity") - eml@dataset@otherEntity <- c(oE1, oE2) + doc$dataset$otherEntity <- list(oE1, oE2) eml_path <- tempfile(fileext = ".xml") - EML::write_eml(eml, eml_path) + EML::write_eml(doc, eml_path) pid_eml <- publish_object(mn, path = eml_path, From 6cf6c26c6bd4159cb862cd442fec31319c2b74a9 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Mon, 15 Jul 2019 14:24:43 -0700 Subject: [PATCH 254/318] update tests --- tests/testthat/test_editing.R | 20 +++++++++---------- tests/testthat/test_eml.R | 37 +++++++++++++++++------------------ 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index bcda829..debd453 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -224,17 +224,17 @@ test_that("publish_update removes the deprecated eml@access element", { eml_path <- tempfile(fileext = ".xml") writeBin(dataone::getObject(mn, pids$metadata), eml_path) - eml <- EML::read_eml(eml_path) + doc <- EML::read_eml(eml_path) # Populate dummy access element - eml@access@allow <- c(new("allow", .Data = "hello")) - write_eml(eml, eml_path) + doc$access <- list(allow = "hello") + write_eml(doc, eml_path) new_pids <- publish_update(mn, pids$metadata, pids$resource_map, metadata_path = eml_path) updated_eml_path <- tempfile(fileext = ".xml") writeBin(dataone::getObject(mn, new_pids$metadata), updated_eml_path) new_eml <- EML::read_eml(updated_eml_path) - expect_equal(0, length(new_eml@access@allow)) + expect_equal(0, length(new_eml$access$allow)) }) test_that("publishing an object with a valid format ID succeeds", { @@ -452,20 +452,20 @@ test_that("update_package_object updates EML", { attributeList1 <- EML::set_attributes(attributes1) phys <- pid_to_eml_physical(mn, pkg$data[1]) - dummy_data_table <- new("dataTable", + dummy_data_table <- list(dataTable = list( entityName = "Dummy Data Table", entityDescription = "Dummy Description", physical = phys, - attributeList = attributeList1) + attributeList = attributeList1)) - eml <- EML::read_eml(rawToChar(getObject(mn, pkg$metadata))) - eml@dataset@dataTable <- c(dummy_data_table) + doc <- EML::read_eml(rawToChar(getObject(mn, pkg$metadata))) + doc$dataset$dataTable <- dummy_data_table otherEnts <- pid_to_eml_entity(mn, pkg$data[2:3], entityType = "otherEntity") - eml@dataset@otherEntity <- new("ListOfotherEntity", otherEnts) + doc$dataset$otherEntity <- otherEnts eml_path <- tempfile(fileext = ".xml") - EML::write_eml(eml, eml_path) + EML::write_eml(doc, eml_path) pkg <- publish_update(mn, metadata_pid = pkg$metadata, diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index b1fa94f..42ab63d 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -91,7 +91,7 @@ test_that("a dataTable and otherEntity can be added from a pid", { # Create a dataTable DT <- pid_to_eml_entity(mn, pid2, - entityType = "dataTable", + entity_type = "dataTable", entityName = dummy_entityName, entityDescription = dummy_entityDescription, attributeList = dummy_attributeList) @@ -120,24 +120,23 @@ test_that("eml_otherEntity_to_dataTable fails gracefully", { expect_error(eml_otherEntity_to_dataTable(doc, "1")) # subscripts out of bounds - expect_error(eml_otherEntity_to_dataTable(doc, doc$dataset$otherEntity[[2]])) expect_error(eml_otherEntity_to_dataTable(doc, 2)) - # Duplicate entityNames found + # Duplicate entityNames found **function does not currently catch this** doc$dataset$otherEntity[[2]] <- doc$dataset$otherEntity[[1]] - expect_error(eml_otherEntity_to_dataTable(doc, doc$dataset$otherEntity[[1]])) + expect_error(eml_otherEntity_to_dataTable(doc, 1)) }) -test_that("eml_otherEntity_to_dataTable fails gracefully", { +test_that("eml_otherEntity_to_dataTable works", { if (!is_token_set(mn)) { skip("No token set. Skipping test.") } doc <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) - otherEntity <- doc$dataset$otherEntity[[1]] + otherEntity <- doc$dataset$otherEntity - doc <- eml_otherEntity_to_dataTable(doc, doc$dataset$otherEntity[[1]]) + doc <- eml_otherEntity_to_dataTable(doc, 1) # test that otherEntity was removed expect_length(doc$dataset$otherEntity, 0) @@ -191,39 +190,39 @@ test_that("which_in_eml returns correct locations", { attributeList <- EML::set_attributes(attributes) - dataTable_1 <- eml$dataTable( + dataTable_1 <- list( entityName = "2016_data.csv", entityDescription = "2016 data", attributeList = attributeList) dataTable_2 <- dataTable_1 - dataTable_3 <- eml$dataTable( + dataTable_3 <- list( entityName = "2015_data.csv", entityDescription = "2016 data", attributeList = attributeList) - creator_1 <- eml$creator( - individualName = eml$individualName( + creator_1 <- list( + individualName = list(individualName = list( surName = "LAST", - givenName = "FIRST")) - creator_2 <- eml$creator( - individualName = eml$individualName( + givenName = "FIRST"))) + creator_2 <- list( + individualName = list(individualName = list( surName = "LAST", - givenName = "FIRST_2")) + givenName = "FIRST_2"))) creator_3 <- creator_2 title <- "Title" - dataset <- eml$dataset( + dataset <- list(dataset = list( title = title, creator = list(creator_1, creator_2, creator_3), - dataTable = list(dataTable_1, dataTable_2, dataTable_3)) + dataTable = list(dataTable_1, dataTable_2, dataTable_3))) - doc <- list(dataset = dataset) + doc <- dataset expect_equal(c(2,3), which_in_eml(doc$dataset$creator, "givenName", "FIRST_2")) - expect_error(which_in_eml(doc$dataset$dataTable, "attributeName", "length_3")) + expect_error(which_in_eml(doc$dataset$dataTable, "attributeName", "length_3")) # not sure why this should fail? expect_equal(c(1,3), which_in_eml(doc$dataset$dataTable[[1]]$attribute, "attributeName", function(x) {grepl("^length", x)})) }) From 21f3aa377a8502d2d4ad8fade89fde885bdd3842 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Mon, 15 Jul 2019 14:32:54 -0700 Subject: [PATCH 255/318] remove nested list --- tests/testthat/test_editing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index debd453..f95d0d5 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -452,7 +452,7 @@ test_that("update_package_object updates EML", { attributeList1 <- EML::set_attributes(attributes1) phys <- pid_to_eml_physical(mn, pkg$data[1]) - dummy_data_table <- list(dataTable = list( + dummy_data_table <- list( entityName = "Dummy Data Table", entityDescription = "Dummy Description", physical = phys, From e57016a1e8b6b72644bce577449b030903eddbc4 Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Mon, 15 Jul 2019 16:13:40 -0700 Subject: [PATCH 256/318] updated bugs from unit tests --- R/eml.R | 5 ++++- man/eml_get_simple.Rd | 3 ++- man/reorder_pids.Rd | 2 ++ tests/testthat/test_access.R | 2 +- tests/testthat/test_editing.R | 2 +- tests/testthat/test_sysmeta.R | 2 +- 6 files changed, 11 insertions(+), 5 deletions(-) diff --git a/R/eml.R b/R/eml.R index f55d694..6d56b2d 100644 --- a/R/eml.R +++ b/R/eml.R @@ -948,13 +948,14 @@ eml_set_shared_attributes <- function(doc, attributeList = NULL, type = 'dataTab #' @export #' #' @examples +#' \dontrun{ #' cn <- dataone::CNode('PROD') #' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') #' #' doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) #' #' datatable_names <- eml_get_simple(doc$dataset$dataTable, element = "entityName") -#' +#'} #' eml_get_simple <- function(doc, element){ out <- eml_get(doc, element, from = "list") @@ -978,6 +979,7 @@ eml_get_simple <- function(doc, element){ #' @export #' #' @examples +#' \dontrun{ #' cn <- dataone::CNode('PROD') #' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') # @@ -986,6 +988,7 @@ eml_get_simple <- function(doc, element){ #' #' # return all entity types #' ordered_pids <- reorder_pids(ids$data, doc) +#'} #' reorder_pids <- function(pid_list, doc){ stopifnot(!is.null(names(pid_list))) diff --git a/man/eml_get_simple.Rd b/man/eml_get_simple.Rd index 1ebe1f8..967effd 100644 --- a/man/eml_get_simple.Rd +++ b/man/eml_get_simple.Rd @@ -24,12 +24,13 @@ flattened into a named character vector. This function is best used to extract values from elements that have no children. } \examples{ +\dontrun{ cn <- dataone::CNode('PROD') adc <- dataone::getMNode(cn,'urn:node:ARCTIC') doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) datatable_names <- eml_get_simple(doc$dataset$dataTable, element = "entityName") - +} } diff --git a/man/reorder_pids.Rd b/man/reorder_pids.Rd index e3007a4..313e6f0 100644 --- a/man/reorder_pids.Rd +++ b/man/reorder_pids.Rd @@ -20,6 +20,7 @@ returned from \code{get_package}, and reorders them according to the order they are given in the EML document. } \examples{ +\dontrun{ cn <- dataone::CNode('PROD') adc <- dataone::getMNode(cn,'urn:node:ARCTIC') ids <- get_package(adc, 'resource_map_doi:10.18739/A2S17SS1M', file_names = TRUE) @@ -27,5 +28,6 @@ doc <- EML::read_eml(dataone::getObject(adc, ids$metadata)) # return all entity types ordered_pids <- reorder_pids(ids$data, doc) +} } diff --git a/tests/testthat/test_access.R b/tests/testthat/test_access.R index 6fae112..5579c6f 100644 --- a/tests/testthat/test_access.R +++ b/tests/testthat/test_access.R @@ -1,6 +1,6 @@ context("Access rules") -mn <- env_load()$mn +mn <- tryCatch(env_load()$mn, error = function(e) env_load()$mn) test_that("get_package works for a simple package", { if (!is_token_set(mn)) { diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index f95d0d5..320e4f6 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -456,7 +456,7 @@ test_that("update_package_object updates EML", { entityName = "Dummy Data Table", entityDescription = "Dummy Description", physical = phys, - attributeList = attributeList1)) + attributeList = attributeList1) doc <- EML::read_eml(rawToChar(getObject(mn, pkg$metadata))) doc$dataset$dataTable <- dummy_data_table diff --git a/tests/testthat/test_sysmeta.R b/tests/testthat/test_sysmeta.R index fc1cac9..ed29476 100644 --- a/tests/testthat/test_sysmeta.R +++ b/tests/testthat/test_sysmeta.R @@ -24,7 +24,7 @@ test_that("the replication policy gets defaulted correctly", { }) test_that("all system metadata is retrieved", { - cn_staging <- CNode("STAGING") + cn_staging <- tryCatch(CNode("STAGING"), error = function(e) CNode("STAGING")) adc_test <- getMNode(cn_staging, "urn:node:mnTestARCTIC") rm_pid <- "resource_map_urn:uuid:3e3bb5de-ec63-4f13-a549-813f0cf28610" From bbe9f1320e750b95bbcb6fbdbc8a67935f58eb88 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 16 Jul 2019 12:03:08 -0700 Subject: [PATCH 257/318] add check to ensure entityNames are unique and update test --- R/eml.R | 4 ++++ tests/testthat/test_eml.R | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/R/eml.R b/R/eml.R index 6d56b2d..0a91670 100644 --- a/R/eml.R +++ b/R/eml.R @@ -709,6 +709,10 @@ eml_otherEntity_to_dataTable <- function(doc, index, validate_eml = TRUE) { stopifnot(is.logical(eml_validate(doc))) stopifnot(is.numeric(index)) stopifnot(length(eml_get_simple(doc$dataset$otherEntity, "entityName")) >= index) + if (any(duplicated(eml_get_simple(doc$dataset, "entityName"))) == T){ + stop(call. = FALSE, + "entityNames must be unique") + } ## set OE entityTypes to NULL and select the ones we want to use diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 42ab63d..31ed456 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -122,8 +122,8 @@ test_that("eml_otherEntity_to_dataTable fails gracefully", { # subscripts out of bounds expect_error(eml_otherEntity_to_dataTable(doc, 2)) - # Duplicate entityNames found **function does not currently catch this** - doc$dataset$otherEntity[[2]] <- doc$dataset$otherEntity[[1]] + # Duplicate entityNames found + doc$dataset$otherEntity <- list(doc$dataset$otherEntity, doc$dataset$otherEntity) expect_error(eml_otherEntity_to_dataTable(doc, 1)) }) From 07261901873f059b829236cf8c034f2a4d40abc7 Mon Sep 17 00:00:00 2001 From: Dominic Mullen <dmullen17@gmail.com> Date: Tue, 16 Jul 2019 13:03:51 -0700 Subject: [PATCH 258/318] Fixed a bug in reformat_file_name that Jeanette found if a file name has no spaces --- R/editing.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/editing.R b/R/editing.R index 31b1e52..4c6df17 100644 --- a/R/editing.R +++ b/R/editing.R @@ -1003,6 +1003,8 @@ reformat_file_name <- function(path, sysmeta) { stringr::str_sub(1, 50) # re-trim if we're in the middle of a word and add extension back on index <- stringi::stri_locate_last_fixed(base_name, ' ')[1] + # Set index to the end of the string if there are no spaces. Add + 1 because str_sub subtracts one to remove the white space. + if (is.na(index)) index <- nchar(base_name) + 1 base_name <- stringr::str_sub(base_name, 1, index -1) %>% paste0(ext) } else { From ba525394245d0f14d488115df613b98426aac02c Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 16 Jul 2019 14:19:34 -0700 Subject: [PATCH 259/318] updating `update_package_objet` and related functions and tests --- R/editing.R | 87 ++++++++++++++++++++++------------- R/helpers.R | 12 +++-- tests/testthat/test_editing.R | 27 ++++++----- 3 files changed, 79 insertions(+), 47 deletions(-) diff --git a/R/editing.R b/R/editing.R index 82f735d..db0b862 100644 --- a/R/editing.R +++ b/R/editing.R @@ -838,7 +838,7 @@ set_file_name <- function(mn, pid, name) { #' of a data object once it has been updated. #' This is a helper function for [update_package_object()]. #' -#' @param eml (eml) An EML class object. +#' @param doc (emld) An EML object. #' @param mn (MNode) The Member Node of the data package. #' @param data_pid (character) The identifier of the data object to be updated. #' @param new_data_pid (character) The new identifier of the updated data object. @@ -846,42 +846,72 @@ set_file_name <- function(mn, pid, name) { #' @importFrom stringr str_detect #' #' @noRd -update_physical <- function(eml, mn, data_pid, new_data_pid) { - stopifnot(is(eml, "eml")) +update_physical <- function(doc, mn, data_pid, new_data_pid) { + stopifnot(is(doc, "emld")) stopifnot(is(mn, "MNode")) stopifnot(is.character(data_pid), nchar(data_pid) > 0) stopifnot(is.character(new_data_pid), nchar(new_data_pid) > 0) - all_url <- unlist(EML::eml_get(eml, "url")) + all_url <- eml_get(doc, "url") %>% + grep("^http", ., value = T) %>% + unname() + if (sum(stringr::str_detect(all_url, data_pid)) == 0) { stop("The obsoleted data PID does not match any physical sections, so the EML will not be updated.") } - dataTable_url <- unlist(EML::eml_get(eml@dataset@dataTable, "url")) + if (length(doc$dataset$dataTable) != 0){ + dataTable_url <- eml_get(doc$dataset$dataTable, "url") %>% + grep("^http", ., value = T) %>% + unname() - if (any(stringr::str_detect(dataTable_url, data_pid))) { - position <- which(stringr::str_detect(dataTable_url, data_pid)) - new_phys <- pid_to_eml_physical(mn, new_data_pid) - eml@dataset@dataTable[[position]]@physical@.Data <- new_phys + if (any(stringr::str_detect(dataTable_url, data_pid))) { + position <- which(stringr::str_detect(dataTable_url, data_pid)) + new_phys <- pid_to_eml_physical(mn, new_data_pid) + if(all(is.null(names(doc$dataset$dataTable)))){ + doc$dataset$dataTable[[position]]$physical <- new_phys + } + else if (all(is.null(names(doc$dataset$dataTable))) == F & position == 1){ + doc$dataset$dataTable$physical <- new_phys + } + } } - otherEntity_url <- unlist(EML::eml_get(eml@dataset@otherEntity, "url")) + if (length(doc$dataset$otherEntity) != 0){ + otherEntity_url <- eml_get(doc$dataset$otherEntity, "url") %>% + grep("^http", ., value = T) %>% + unname() - if (any(stringr::str_detect(otherEntity_url, data_pid))) { - position <- which(stringr::str_detect(otherEntity_url, data_pid)) - new_phys <- pid_to_eml_physical(mn, new_data_pid) - eml@dataset@otherEntity[[position]]@physical@.Data <- new_phys + if (any(stringr::str_detect(otherEntity_url, data_pid))) { + position <- which(stringr::str_detect(otherEntity_url, data_pid)) + new_phys <- pid_to_eml_physical(mn, new_data_pid) + if(all(is.null(names(doc$dataset$otherEntity)))){ + doc$dataset$otherEntity[[position]]$physical <- new_phys + } + else if (all(is.null(names(doc$dataset$otherEntity))) == F & position == 1){ + doc$dataset$otherEntity$physical <- new_phys + } + } } - spatialVector_url <- unlist(EML::eml_get(eml@dataset@spatialVector, "url")) + if (length(doc$dataset$spatialVector) != 0){ + spatialVector_url <- eml_get(doc$dataset$spatialVector, "url") %>% + grep("^http", ., value = T) %>% + unname() - if (any(stringr::str_detect(spatialVector_url, data_pid))) { - position <- which(stringr::str_detect(spatialVector_url, data_pid)) - new_phys <- pid_to_eml_physical(mn, new_data_pid) - eml@dataset@spatialVector[[position]]@physical@.Data <- new_phys + if (any(stringr::str_detect(spatialVector_url, data_pid))) { + position <- which(stringr::str_detect(spatialVector_url, data_pid)) + new_phys <- pid_to_eml_physical(mn, new_data_pid) + if(all(is.null(names(doc$dataset$spatialVector)))){ + doc$dataset$spatialVector[[position]]$physical <- new_phys + } + else if (all(is.null(names(doc$dataset$spatialVector))) == F & position == 1){ + doc$dataset$spatialVector$physical <- new_phys + } + } } - invisible(eml) + return(doc) } @@ -943,7 +973,7 @@ update_package_object <- function(mn, stopifnot(is.logical(public)) pkg <- get_package(mn, resource_map_pid) - eml <- EML::read_eml(rawToChar(dataone::getObject(mn, pkg$metadata))) + doc <- EML::read_eml(rawToChar(dataone::getObject(mn, pkg$metadata))) new_data_pid <- update_object(mn, pid = data_pid, @@ -953,18 +983,13 @@ update_package_object <- function(mn, other_data_pids <- pkg$data[which(pkg$data != data_pid)] # wrapped in which for better NA handling new_data_pids <- c(other_data_pids, new_data_pid) - eml_new <- tryCatch(update_physical(eml = eml, + doc_new <- update_physical(doc = doc, mn = mn, data_pid = data_pid, - new_data_pid = new_data_pid), - error = function(e) { - message("The obsoleted data PID does not match any physical sections, so the EML will not be updated.", - "\nCheck if the correct resource map PID was given.") - return(eml) - }) - - eml_path <- tempfile(fileext = ".xml") - EML::write_eml(eml_new, eml_path) + new_data_pid = new_data_pid) + + eml_path <- tempfile() + EML::write_eml(doc_new, eml_path) pkg_new <- publish_update(mn, metadata_pid = pkg$metadata, diff --git a/R/helpers.R b/R/helpers.R index 81574fe..26dc587 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -364,6 +364,8 @@ create_dummy_package_full <- function(mn, title = "A Dummy Package") { path = "dummy1.R", format_id = "application/R") + unlink(c("dummy1.csv", "dummy2.csv", "dummy1.jpg", "dummy1.R")) + data_pids <- c(pid_csv1, pid_csv2, pid_jpg1, pid_R1) # Import EML @@ -393,23 +395,23 @@ create_dummy_package_full <- function(mn, title = "A Dummy Package") { dT1 <- pid_to_eml_entity(mn, pid = pid_csv1, - entityType = "dataTable") + entity_type = "dataTable") dT1$attributeList <- attributeList dT2 <- pid_to_eml_entity(mn, pid = pid_csv2, - entityType = "dataTable") + entity_type = "dataTable") dT2$attributeList <- attributeList doc$dataset$dataTable <- list(dT1, dT2) oE1 <- pid_to_eml_entity(mn, pid = pid_jpg1, - entityType = "otherEntity") + entity_type = "otherEntity") oE2 <- pid_to_eml_entity(mn, pid = pid_R1, - entityType = "otherEntity") + entity_type = "otherEntity") doc$dataset$otherEntity <- list(oE1, oE2) @@ -425,7 +427,7 @@ create_dummy_package_full <- function(mn, title = "A Dummy Package") { metadata_pid = pid_eml, data_pids = data_pids) - file.remove(c("dummy1.csv", "dummy2.csv", "dummy1.jpg", "dummy1.R"), eml_path) + file.remove(eml_path) return(list(resource_map = resource_map_pid, metadata = pid_eml, diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index 320e4f6..1a1c11e 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -316,7 +316,7 @@ test_that("update_physical works", { path = "dummy_object.csv", format_id = "text/csv") - file.remove("dummy_object.csv") + unlink("dummy_object.csv") pkg_new <- publish_update(mn, resource_map_pid = pkg$resource_map, @@ -330,8 +330,12 @@ test_that("update_physical works", { data_pid = pkg$data[2], new_data_pid = new_data_pid) - url_original <- unlist(EML::eml_get(eml_original, "url")) - url_new <- unlist(EML::eml_get(eml_new, "url")) + t <- tempfile() + write_eml(eml_new, t) + eml_new <- read_eml(t) + + url_original <- eml_get(eml_original, "url") %>% grep("^http", ., value = T) %>% unname() + url_new <- eml_get(eml_new, "url") %>% grep("^http", ., value = T) %>% unname() expect_equal(sum(stringr::str_detect(url_original, pkg$data[1])), 1) expect_equal(sum(stringr::str_detect(url_original, pkg$data[2])), 1) @@ -384,8 +388,8 @@ test_that("update_package_object changes specified data object and rest of packa eml_new <- EML::read_eml(rawToChar(dataone::getObject(mn, pkg_new$metadata))) - url_original <- unlist(EML::eml_get(eml_original, "url")) - url_new <- unlist(EML::eml_get(eml_new, "url")) + url_original <- eml_get(eml_original, "url") %>% grep("^http", ., value = T) %>% unname() + url_new <- eml_get(eml_new, "url") %>% grep("^http", ., value = T) %>% unname() expect_true(url_original[2] != url_new[2]) expect_equal(url_original[1], url_new[1]) @@ -426,7 +430,7 @@ test_that("update_package_object errors if wrong input", { data_pid = file_path, new_data_path = "something", rm_pid = 1)) - file.remove(file_path) + unlink(file_path) }) test_that("update_package_object updates EML", { @@ -452,8 +456,7 @@ test_that("update_package_object updates EML", { attributeList1 <- EML::set_attributes(attributes1) phys <- pid_to_eml_physical(mn, pkg$data[1]) - dummy_data_table <- list( - entityName = "Dummy Data Table", + dummy_data_table <- list(entityName = "Dummy Data Table", entityDescription = "Dummy Description", physical = phys, attributeList = attributeList1) @@ -461,7 +464,8 @@ test_that("update_package_object updates EML", { doc <- EML::read_eml(rawToChar(getObject(mn, pkg$metadata))) doc$dataset$dataTable <- dummy_data_table - otherEnts <- pid_to_eml_entity(mn, pkg$data[2:3], entityType = "otherEntity") + otherEnts <- list(pid_to_eml_entity(mn, pkg$data[2], entityType = "otherEntity"), + pid_to_eml_entity(mn, pkg$data[3], entityType = "otherEntity")) doc$dataset$otherEntity <- otherEnts eml_path <- tempfile(fileext = ".xml") @@ -489,11 +493,12 @@ test_that("update_package_object updates EML", { public = TRUE, use_doi = FALSE) - url_initial <- unlist(EML::eml_get(eml, "url")) + doc <- read_eml(getObject(mn, pkg$metadata)) + url_initial <- eml_get(doc, "url") %>% grep("^http", ., value = T) %>% unname() expect_equal(sum(stringr::str_count(url_initial, data_pid)), 1) eml_new <- EML::read_eml(rawToChar(getObject(mn, pkg_new$metadata))) - url_final <- unlist(EML::eml_get(eml_new, "url")) + url_final <- eml_get(eml_new, "url") %>% grep("^http", ., value = T) %>% unname() expect_equal(sum(stringr::str_count(url_final, data_pid)), 0) pid_matches <- lapply(seq_along(pkg_new$data), From abd56e2068569a4e2c36821eff255c135c13c67d Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 16 Jul 2019 14:35:59 -0700 Subject: [PATCH 260/318] minor fix to keep `set_physical` from erroring on an empty file --- tests/testthat/test_editing.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index 1a1c11e..e5e06bc 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -357,7 +357,9 @@ test_that("update_package_object changes specified data object and rest of packa pkg <- create_dummy_package_full(mn, title = "Check update_package_object") new_data_path <- "test_file.csv" - file.create(new_data_path) + dummy_data <- data.frame(col1 = 1:26, col2 = letters) + new_data_path <- tempfile(fileext = ".csv") + write.csv(dummy_data, new_data_path, row.names = FALSE) data_pid <- pkg$data[2] From c5383ab03daa730baef8bcb8ed1bbfe7549ec875 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 16 Jul 2019 14:41:31 -0700 Subject: [PATCH 261/318] merge in changes to master --- R/editing.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/editing.R b/R/editing.R index db0b862..eedcf09 100644 --- a/R/editing.R +++ b/R/editing.R @@ -1028,6 +1028,8 @@ reformat_file_name <- function(path, sysmeta) { stringr::str_sub(1, 50) # re-trim if we're in the middle of a word and add extension back on index <- stringi::stri_locate_last_fixed(base_name, ' ')[1] + # Set index to the end of the string if there are no spaces. Add + 1 because str_sub subtracts one to remove the white space. + if (is.na(index)) index <- nchar(base_name) + 1 base_name <- stringr::str_sub(base_name, 1, index -1) %>% paste0(ext) } else { From a41c5befa2612824889d442fce2ef0172d4c7992 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 16 Jul 2019 16:17:55 -0700 Subject: [PATCH 262/318] fix ORCID helper and add an email one --- R/helpers.R | 61 +++++++++++++++++++++++++---------- tests/testthat/test_access.R | 5 +-- tests/testthat/test_helpers.R | 13 +------- 3 files changed, 48 insertions(+), 31 deletions(-) diff --git a/R/helpers.R b/R/helpers.R index 26dc587..5da0e4d 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -434,10 +434,9 @@ create_dummy_package_full <- function(mn, title = "A Dummy Package") { data = data_pids)) } - #' Retrieve a name from an ORCID URL #' -#' Retrieve first and last name from an ORCID URL by scraping the page. +#' Retrieve first and last name from an ORCID URL. #' #' @param orcid_url (character) A valid ORCID URL address. #' @@ -449,19 +448,42 @@ create_dummy_package_full <- function(mn, title = "A Dummy Package") { #' \dontrun{ #' pi_name <- get_orcid_name('https://orcid.org/0000-0002-2561-5840') #' } + get_orcid_name <- function(orcid_url) { - req <- httr::GET(orcid_url) + req <- httr::GET(paste0(orcid_url, "/person.json")) if (req$status_code != 200) { stop('Failed to read in ', orcid_url) } + json <- httr::content(req) + return(json$displayName) +} - name <- httr::content(req, "text") %>% - stringr::str_extract("<title>.*<") %>% - stringr::str_split(" ") %>% - unlist() %>% - stringr::str_remove("<title>") +#' Retrieve an email address from an ORCID URL +#' +#' Retrieve public email addresses from an ORCID URL. +#' +#' @param orcid_url (character) A valid ORCID URL address. +#' +#' @return (character) Public e-mail addresses. +#' +#' @noRd +#' +#' @examples +#' \dontrun{ +#' pi_email <- get_orcid_email('https://orcid.org/0000-0002-2561-5840') +#' } - return(paste(name[1], name[2])) +get_orcid_email <- function(orcid_url) { + req <- httr::GET(paste0(orcid_url, "/person.json")) + if (req$status_code != 200) { + stop('Failed to read in ', orcid_url) + } + json <- httr::content(req) + email_list <- eml_get_simple(json$publicGroupedEmails, "email") %>% paste0(., collapse = ";") + if (is.null(email_list)){ + email_list <- NA + } + return(email_list) } @@ -474,7 +496,7 @@ get_orcid_name <- function(orcid_url) { #' @param from (character) the date at which the query begins in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()} #' @param to (character) the date at which the query ends in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()} #' @param formatType (character) the format of objects to query. Must be one of: RESOURCE, METADATA, DATA, or *. -#' @param whitelist (character) An xml list of admin orcid Identifiers. Defaults to https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org +#' @param whitelist (logical) Whether to filter out ADC admins, as listed at: https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org #' #' @export #' @@ -498,10 +520,10 @@ get_orcid_name <- function(orcid_url) { #' #' } list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType = '*', - whitelist = 'https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org') { - if (!requireNamespace('lubridate', 'purrr', 'RCurl')) { + use_whitelist = T) { + if (!requireNamespace('lubridate', "purrr", 'RCurl')) { stop(call. = FALSE, - 'The packages "lubridate", "purrr", and "RCurl" must be installed to run this function. ', + 'The packages "lubridate", "purrr, and "RCurl" must be installed to run this function. ', 'Please install them and try again.') } stopifnot(methods::is(mn, 'MNode')) @@ -517,8 +539,11 @@ list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType if (!(formatType %in% c('RESOURCE', 'METADATA', 'DATA', '*'))) { stop('formatType must be one of: RESOURCE, METADATA, DATA, or *') } + if (as.Date(from) > as.Date(to)){ + stop('"from" date must be after "to" date') + } - req <- httr::GET(whitelist) + req <- httr::GET('https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org') if(req$status_code != 200) { warning('Failed to read in', whitelist, '. Results will include admin submissions / edits.') } @@ -530,12 +555,14 @@ list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType fl = "identifier AND submitter AND dateUploaded AND formatType AND fileName", rows = 10000), as = "data.frame") - - # Filter out rows where the submitter is in the whitelist - results <- results[-which(stringr::str_detect(whitelist, results$submitter)),] + if (use_whitelist == T){ + # Filter out rows where the submitter is in the whitelist + results <- results[-which(stringr::str_detect(whitelist, results$submitter)),] + } # Return full names based on orcid Id results$submitter_name <- purrr::map(results$submitter, get_orcid_name) %>% unlist() + results$submitter_email <- purrr::map(results$submitter, get_orcid_email) %>% unlist() # Arrange by dateUploaded results <- dplyr::arrange(results, dateUploaded) diff --git a/tests/testthat/test_access.R b/tests/testthat/test_access.R index 5579c6f..61d5e91 100644 --- a/tests/testthat/test_access.R +++ b/tests/testthat/test_access.R @@ -9,11 +9,12 @@ test_that("get_package works for a simple package", { pkg <- create_dummy_package(mn) Sys.sleep(1) - get_pkg <- get_package(mn, pkg$metadata) + get_pkg <- get_package(mn, pkg$resource_map) expect_true(pkg$metadata == get_pkg$metadata) expect_true(pkg$resource_map == get_pkg$resource_map) expect_true(pkg$data == get_pkg$data) + expect_warning(get_package(mn, pkg$metadata)) }) test_that("get_package works for a package with a child package", { @@ -60,7 +61,7 @@ test_that("get_package works the same when given a metadata pid as it does when } child_pkg <- create_dummy_package(mn) - a <- get_package(mn, child_pkg$metadata) + a <- suppressWarnings(get_package(mn, child_pkg$metadata)) b <- get_package(mn, child_pkg$resource_map) expect_equal(a, b) diff --git a/tests/testthat/test_helpers.R b/tests/testthat/test_helpers.R index ce2be06..89ea076 100644 --- a/tests/testthat/test_helpers.R +++ b/tests/testthat/test_helpers.R @@ -33,16 +33,5 @@ test_that("list_submissions returns correct output", { } out <- list_submissions(adc, '2018-10-01', '2018-10-03') - expect_equal(out$submitter_name[1], 'Baptiste Vandecrux') -}) - -test_that('list_submissions returns correct output', { - cn <- dataone::CNode('PROD') - adc <- dataone::getMNode(cn,'urn:node:ARCTIC') - if (!is_token_set(adc)) { - skip("No token set. Skipping test.") - } - - out <- list_submissions(adc, '2018-10-01', '2018-10-03') - expect_equal(out$submitter_name[1], 'Baptiste Vandecrux') + expect_equal(out$submitter_name[1], 'Rachel Obbard') }) From f2a50b78a6ae7ffc58326340c69206b2bacbb014 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 16 Jul 2019 16:23:14 -0700 Subject: [PATCH 263/318] doc updates --- R/helpers.R | 2 +- man/list_submissions.Rd | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/R/helpers.R b/R/helpers.R index 5da0e4d..f456dee 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -496,7 +496,7 @@ get_orcid_email <- function(orcid_url) { #' @param from (character) the date at which the query begins in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()} #' @param to (character) the date at which the query ends in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()} #' @param formatType (character) the format of objects to query. Must be one of: RESOURCE, METADATA, DATA, or *. -#' @param whitelist (logical) Whether to filter out ADC admins, as listed at: https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org +#' @param use_whitelist (logical) Whether to filter out ADC admins, as listed at: https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org #' #' @export #' diff --git a/man/list_submissions.Rd b/man/list_submissions.Rd index 84145ae..9b48388 100644 --- a/man/list_submissions.Rd +++ b/man/list_submissions.Rd @@ -5,8 +5,7 @@ \title{List recent submissions to a DataOne Member Node} \usage{ list_submissions(mn, from = Sys.Date(), to = Sys.Date(), - formatType = "*", - whitelist = "https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org") + formatType = "*", use_whitelist = T) } \arguments{ \item{mn}{(MNode) A DataOne Member Node} @@ -17,7 +16,7 @@ list_submissions(mn, from = Sys.Date(), to = Sys.Date(), \item{formatType}{(character) the format of objects to query. Must be one of: RESOURCE, METADATA, DATA, or *.} -\item{whitelist}{(character) An xml list of admin orcid Identifiers. Defaults to https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org} +\item{use_whitelist}{(logical) Whether to filter out ADC admins, as listed at: https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org} } \description{ List recent submissions to a DataOne Member Node from all submitters not present From 27b55992b672eb95810a7bf39fb6bb050ec0addd Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 16 Jul 2019 16:25:12 -0700 Subject: [PATCH 264/318] update docs --- NAMESPACE | 1 + R/helpers.R | 2 +- man/get_orcid_email.Rd | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 man/get_orcid_email.Rd diff --git a/NAMESPACE b/NAMESPACE index 175ddad..104eee0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,6 +30,7 @@ export(get_all_sysmeta) export(get_all_versions) export(get_mn_base_url) export(get_ncdf4_attributes) +export(get_orcid_email) export(get_package) export(get_token) export(guess_format_id) diff --git a/R/helpers.R b/R/helpers.R index f456dee..8a1516e 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -465,8 +465,8 @@ get_orcid_name <- function(orcid_url) { #' @param orcid_url (character) A valid ORCID URL address. #' #' @return (character) Public e-mail addresses. +#' @export #' -#' @noRd #' #' @examples #' \dontrun{ diff --git a/man/get_orcid_email.Rd b/man/get_orcid_email.Rd new file mode 100644 index 0000000..8cf7ae9 --- /dev/null +++ b/man/get_orcid_email.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{get_orcid_email} +\alias{get_orcid_email} +\title{Retrieve an email address from an ORCID URL} +\usage{ +get_orcid_email(orcid_url) +} +\arguments{ +\item{orcid_url}{(character) A valid ORCID URL address.} +} +\value{ +(character) Public e-mail addresses. +} +\description{ +Retrieve public email addresses from an ORCID URL. +} +\examples{ +\dontrun{ +pi_email <- get_orcid_email('https://orcid.org/0000-0002-2561-5840') +} +} From 8729a090c4939152ef7371f58578fedaf8122181 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Wed, 17 Jul 2019 08:49:30 -0700 Subject: [PATCH 265/318] add a catch in case no ORCID displayName is present, export function --- NAMESPACE | 1 + R/helpers.R | 10 ++++++++-- man/get_orcid_name.Rd | 22 ++++++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 man/get_orcid_name.Rd diff --git a/NAMESPACE b/NAMESPACE index 104eee0..0025af0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,6 +31,7 @@ export(get_all_versions) export(get_mn_base_url) export(get_ncdf4_attributes) export(get_orcid_email) +export(get_orcid_name) export(get_package) export(get_token) export(guess_format_id) diff --git a/R/helpers.R b/R/helpers.R index 8a1516e..8b739cb 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -442,7 +442,7 @@ create_dummy_package_full <- function(mn, title = "A Dummy Package") { #' #' @return (character) First and last name. #' -#' @noRd +#' @export #' #' @examples #' \dontrun{ @@ -455,7 +455,13 @@ get_orcid_name <- function(orcid_url) { stop('Failed to read in ', orcid_url) } json <- httr::content(req) - return(json$displayName) + + display_name <- json$displayName + + if (is.null(display_name)){ + display_name <- NA + } + return(display_name) } #' Retrieve an email address from an ORCID URL diff --git a/man/get_orcid_name.Rd b/man/get_orcid_name.Rd new file mode 100644 index 0000000..bfe8832 --- /dev/null +++ b/man/get_orcid_name.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{get_orcid_name} +\alias{get_orcid_name} +\title{Retrieve a name from an ORCID URL} +\usage{ +get_orcid_name(orcid_url) +} +\arguments{ +\item{orcid_url}{(character) A valid ORCID URL address.} +} +\value{ +(character) First and last name. +} +\description{ +Retrieve first and last name from an ORCID URL. +} +\examples{ +\dontrun{ +pi_name <- get_orcid_name('https://orcid.org/0000-0002-2561-5840') +} +} From a645422d55def9cc319e1fbbb05ff53976585df3 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Mon, 29 Jul 2019 09:26:57 -0700 Subject: [PATCH 266/318] just changing the public argument --- R/editing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/editing.R b/R/editing.R index eedcf09..cbede3d 100644 --- a/R/editing.R +++ b/R/editing.R @@ -700,7 +700,7 @@ update_resource_map <- function(mn, child_pids = NULL, other_statements = NULL, identifier = NULL, - public = FALSE, + public = TRUE, check_first = TRUE) { # Check arguments From bcf463e74156b6a1fbb4c4fe55602245e48b7fc4 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 30 Jul 2019 13:00:05 -0700 Subject: [PATCH 267/318] update version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f38a203..4365c5e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: arcticdatautils Title: Utilities for the Arctic Data Center -Version: 0.6.4 +Version: 0.7.0 Authors@R: c( person("Bryce", "Mecum", email = "mecum@nceas.ucsb.edu", role = c("aut", "cre")), person("Matt", "Jones", email = "jones@nceas.ucsb.edu", role = "ctb"), From 7b85edfc00511248a8ac81d8ccb075f5bdbb0820 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Tue, 6 Aug 2019 14:12:41 -0700 Subject: [PATCH 268/318] update docs Merge branch 'master' into access_arguments --- man/update_resource_map.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/update_resource_map.Rd b/man/update_resource_map.Rd index e47a9da..61426a4 100644 --- a/man/update_resource_map.Rd +++ b/man/update_resource_map.Rd @@ -6,7 +6,7 @@ \usage{ update_resource_map(mn, resource_map_pid, metadata_pid, data_pids = NULL, child_pids = NULL, other_statements = NULL, identifier = NULL, - public = FALSE, check_first = TRUE) + public = TRUE, check_first = TRUE) } \arguments{ \item{mn}{(MNode) The Member Node.} From 5a1a7987b8ad6d91aac7be52fee6af9c01d22418 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Wed, 11 Sep 2019 14:09:41 -0700 Subject: [PATCH 269/318] should be able to pass a generic list here --- R/eml.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R/eml.R b/R/eml.R index 0a91670..fe05c79 100644 --- a/R/eml.R +++ b/R/eml.R @@ -626,10 +626,6 @@ eml_abstract <- function(text) { #' } eml_validate_attributes <- function(attributes) { - if (class(attributes)[1] != "emld") { - stop(call. = FALSE, - "Attributes must be an emld object generated by EML::set_attributes()") - } # Define an interal applyable function to validate each attribute eml_validate_attribute <- function(attribute) { From 5abcf8527d6b6b5f2c18d7f0b667e81a63bc6bc9 Mon Sep 17 00:00:00 2001 From: Jeanette <jclark@nceas.ucsb.edu> Date: Tue, 17 Sep 2019 16:13:38 -0700 Subject: [PATCH 270/318] fix the argument as opposed to thworing an error --- R/access.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/access.R b/R/access.R index 53fb1c1..767455d 100644 --- a/R/access.R +++ b/R/access.R @@ -90,7 +90,7 @@ set_rights_holder <- function(mn, pids, subject) { } if (grepl("^https:\\/\\/orcid\\.org", subject)) { - stop("Argument 'subject' cannot contain 'https:', use 'http:' instead.") + subject <- gsub("^https:\\/\\/orcid\\.org", "http:\\/\\/orcid\\.org", subject) } From ddf37a737836a59fee0aec977f6289a5beaad86e Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 20 Sep 2019 09:59:46 -0700 Subject: [PATCH 271/318] update other related functions with the same change --- R/access.R | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/R/access.R b/R/access.R index 767455d..54d1526 100644 --- a/R/access.R +++ b/R/access.R @@ -91,6 +91,8 @@ set_rights_holder <- function(mn, pids, subject) { if (grepl("^https:\\/\\/orcid\\.org", subject)) { subject <- gsub("^https:\\/\\/orcid\\.org", "http:\\/\\/orcid\\.org", subject) + warning = function(w) { + message("Subject contains https, transforming to http") } @@ -187,9 +189,11 @@ set_access <- function(mn, pids, subjects, permissions = c("read", "write", "cha stop("Argument 'subjects' must be character class with non-zero number of characters.") } - if (any(grepl("^https:\\/\\/orcid\\.org", subjects))) { - stop("Argument 'subjects' cannot contain 'https:', use 'http:' instead.") - } + if (grepl("^https:\\/\\/orcid\\.org", subject)) { + subject <- gsub("^https:\\/\\/orcid\\.org", "http:\\/\\/orcid\\.org", subject) + warning = function(w) { + message("Subject contains https, transforming to http") + } if (!all(permissions %in% c("read", "write", "changePermission"))) { stop("Argument 'permissions' must be one or more of: 'read', 'write', 'changePermission'") @@ -377,8 +381,10 @@ set_rights_and_access <- function(mn, pids, subject, permissions = c("read", "wr } if (grepl("^https:\\/\\/orcid\\.org", subject)) { - stop("Argument 'subjects' cannot contain 'https:', use 'http:' instead.") - } + subject <- gsub("^https:\\/\\/orcid\\.org", "http:\\/\\/orcid\\.org", subject) + warning = function(w) { + message("Subject contains https, transforming to http") + } if (!all(permissions %in% c("read", "write", "changePermission"))) { stop("Argument 'permissions' must be one or more of: 'read', 'write', 'changePermission'") From ef791f658542d205e033bfa32f1fc89533a03fe9 Mon Sep 17 00:00:00 2001 From: Jeanette Clark <jclark@nceas.ucsb.edu> Date: Fri, 20 Sep 2019 10:21:02 -0700 Subject: [PATCH 272/318] fixing incorrect syntax --- R/access.R | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/R/access.R b/R/access.R index 54d1526..241d00c 100644 --- a/R/access.R +++ b/R/access.R @@ -91,8 +91,7 @@ set_rights_holder <- function(mn, pids, subject) { if (grepl("^https:\\/\\/orcid\\.org", subject)) { subject <- gsub("^https:\\/\\/orcid\\.org", "http:\\/\\/orcid\\.org", subject) - warning = function(w) { - message("Subject contains https, transforming to http") + message("Subject contains https, transforming to http") } @@ -189,11 +188,10 @@ set_access <- function(mn, pids, subjects, permissions = c("read", "write", "cha stop("Argument 'subjects' must be character class with non-zero number of characters.") } - if (grepl("^https:\\/\\/orcid\\.org", subject)) { - subject <- gsub("^https:\\/\\/orcid\\.org", "http:\\/\\/orcid\\.org", subject) - warning = function(w) { - message("Subject contains https, transforming to http") - } + if (grepl("^https:\\/\\/orcid\\.org", subjects)) { + subjects <- gsub("^https:\\/\\/orcid\\.org", "http:\\/\\/orcid\\.org", subjects) + message("Subject contains https, transforming to http") + } if (!all(permissions %in% c("read", "write", "changePermission"))) { stop("Argument 'permissions' must be one or more of: 'read', 'write', 'changePermission'") @@ -285,8 +283,9 @@ remove_access <- function(mn, pids, subjects, permissions = c("read", "write", " stop("Argument 'subjects' must be character class with non-zero number of characters.") } - if (any(grepl("^https:\\/\\/orcid\\.org", subjects))) { - stop("Argument 'subjects' cannot contain 'https:', use 'http:' instead.") + if (grepl("^https:\\/\\/orcid\\.org", subjects)) { + subjects <- gsub("^https:\\/\\/orcid\\.org", "http:\\/\\/orcid\\.org", subjects) + message("Subject contains https, transforming to http") } if (!all(permissions %in% c("read", "write", "changePermission"))) { @@ -382,9 +381,8 @@ set_rights_and_access <- function(mn, pids, subject, permissions = c("read", "wr if (grepl("^https:\\/\\/orcid\\.org", subject)) { subject <- gsub("^https:\\/\\/orcid\\.org", "http:\\/\\/orcid\\.org", subject) - warning = function(w) { - message("Subject contains https, transforming to http") - } + message("Subject contains https, transforming to http") + } if (!all(permissions %in% c("read", "write", "changePermission"))) { stop("Argument 'permissions' must be one or more of: 'read', 'write', 'changePermission'") From 84030bb18ac148f864a3a059a3d1ead5c87bfe4d Mon Sep 17 00:00:00 2001 From: Jeanette <jclark@nceas.ucsb.edu> Date: Wed, 25 Sep 2019 16:35:19 -0700 Subject: [PATCH 273/318] update relevant publish functions to be compatible with EML2.2 --- R/editing.R | 4 ++-- R/formats.R | 11 +++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/R/editing.R b/R/editing.R index cbede3d..6f5bd88 100644 --- a/R/editing.R +++ b/R/editing.R @@ -190,7 +190,7 @@ update_object <- function(mn, pid, path, format_id = NULL, new_pid = NULL, sid = sysmeta <- clear_replication_policy(sysmeta) # Add packageId to metadata if the object is an xml file - if (grepl("^eml:\\/\\/ecoinformatics.org\\/eml", format_id)) { + if (grepl("^eml:\\/\\/ecoinformatics.org\\/eml|^https://eml.ecoinformatics.org/eml-2.2.0", format_id)) { doc <- EML::read_eml(path) doc$packageId <- new_pid path <- tempfile() @@ -460,7 +460,7 @@ publish_update <- function(mn, # Create System Metadata for the updated EML file metadata_updated_sysmeta <- new("SystemMetadata", identifier = metadata_updated_pid, - formatId = "eml://ecoinformatics.org/eml-2.1.1", + formatId = metadata_sysmeta@formatId, #keep formatId the same as previous version size = file.size(eml_path), checksum = digest::digest(eml_path, algo = "sha1", serialize = FALSE, file = TRUE), checksumAlgorithm = "SHA1", diff --git a/R/formats.R b/R/formats.R index 3fa01f2..3f97c29 100644 --- a/R/formats.R +++ b/R/formats.R @@ -249,6 +249,7 @@ format_iso <- function() { #' Generate the EML 2.1.1 format ID #' #' Returns the EML 2.1.1 format ID. +#' @param version The version of EML ('2.1.1' or '2.2.0') #' #' @return (character) The format ID for EML 2.1.1. #' @@ -261,6 +262,12 @@ format_iso <- function() { #' env <- env_load() #' publish_object(env$mn, "path_to_some_EML_file", format_eml()) #' } -format_eml <- function() { - "eml://ecoinformatics.org/eml-2.1.1" +format_eml <- function(version) { + if (version %in% c("2.1","2.1.1", "1", 1)){ + "eml://ecoinformatics.org/eml-2.1.1" + } + else if (version %in% c("2.2","2.2.0", "2", 2)){ + "https://eml.ecoinformatics.org/eml-2.2.0" + } + else print("Please specify a recognized version name, either '2.1.1' or '2.2.0'") } From b8065a1bbcf49d205a52b7ed8cb6912990217ed1 Mon Sep 17 00:00:00 2001 From: Jeanette <jclark@nceas.ucsb.edu> Date: Wed, 25 Sep 2019 16:36:12 -0700 Subject: [PATCH 274/318] doc updates --- man/format_eml.Rd | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/man/format_eml.Rd b/man/format_eml.Rd index da87e4d..b8491f5 100644 --- a/man/format_eml.Rd +++ b/man/format_eml.Rd @@ -4,7 +4,10 @@ \alias{format_eml} \title{Generate the EML 2.1.1 format ID} \usage{ -format_eml() +format_eml(version) +} +\arguments{ +\item{version}{The version of EML ('2.1.1' or '2.2.0')} } \value{ (character) The format ID for EML 2.1.1. From 1208216e20a58364d031d3920a04a97c6cce7068 Mon Sep 17 00:00:00 2001 From: Bryce Mecum <petridish@gmail.com> Date: Thu, 26 Sep 2019 14:52:08 -0800 Subject: [PATCH 275/318] Make publish_update use the metadata object's formatId Instead of hardcoding the formatID of the new object to EML 2.1.1, we instead just use the same formatID as what was set on the object we're obsoleting. We also add a format_id argument to allow an override of the behavior. Also adds an example EML 220 doc and a format ID helper --- NAMESPACE | 1 + R/editing.R | 19 +++++++- R/formats.R | 19 ++++++++ inst/example-eml-220.xml | 86 +++++++++++++++++++++++++++++++++++ man/format_eml_220.Rd | 22 +++++++++ man/publish_update.Rd | 6 ++- tests/testthat/test_editing.R | 17 +++++++ 7 files changed, 167 insertions(+), 3 deletions(-) create mode 100644 inst/example-eml-220.xml create mode 100644 man/format_eml_220.Rd diff --git a/NAMESPACE b/NAMESPACE index 0025af0..11093c0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,7 @@ export(eml_validate_attributes) export(env_get) export(find_newest_object) export(format_eml) +export(format_eml_220) export(format_iso) export(generate_resource_map) export(get_all_sysmeta) diff --git a/R/editing.R b/R/editing.R index 6f5bd88..b0edbea 100644 --- a/R/editing.R +++ b/R/editing.R @@ -253,6 +253,8 @@ update_object <- function(mn, pid, path, format_id = NULL, new_pid = NULL, sid = #' access policies are not affected. #' @param check_first (logical) Optional. Whether to check the PIDs passed in as arguments exist on the MN before continuing. #' Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements. +#' @param format_id (character) Optional. When omitted, the updated object will have the same formatId as `metadata_pid`. If set, will attempt +#' to use the value instead. #' #' @return (character) Named character vector of PIDs in the data package, including PIDs for the metadata, resource map, and data objects. #' @@ -289,7 +291,8 @@ publish_update <- function(mn, parent_data_pids = NULL, parent_child_pids = NULL, public = TRUE, - check_first = TRUE) { + check_first = TRUE, + format_id = NULL) { # Don't allow setting a dataset to private when it uses a DOI if (use_doi && !public) { @@ -328,6 +331,10 @@ publish_update <- function(mn, stopifnot(all(is.character(parent_child_pids))) } + if (!is.null(format_id)) { + stopifnot(is.character(format_id) && nchar(format_id) > 0) + } + # Check to see if the obsoleted package is in the list of parent_child_pids # If it is notify the user and remove it from the list if (resource_map_pid %in% parent_child_pids) { @@ -458,9 +465,17 @@ publish_update <- function(mn, EML::write_eml(doc, eml_path) # Create System Metadata for the updated EML file + # First, figure out what formatId we should use on the new object + if (!is.null(format_id)) { + message("Overridding format ID on new metadata object of: ", format_id, " instead of ", metadata_sysmeta@formatId, ".") + metadata_updated_format_id <- format_id + } else { + metadata_updated_format_id <- metadata_sysmeta@formatId + } + metadata_updated_sysmeta <- new("SystemMetadata", identifier = metadata_updated_pid, - formatId = metadata_sysmeta@formatId, #keep formatId the same as previous version + formatId = metadata_updated_format_id, size = file.size(eml_path), checksum = digest::digest(eml_path, algo = "sha1", serialize = FALSE, file = TRUE), checksumAlgorithm = "SHA1", diff --git a/R/formats.R b/R/formats.R index 3f97c29..e2f1972 100644 --- a/R/formats.R +++ b/R/formats.R @@ -271,3 +271,22 @@ format_eml <- function(version) { } else print("Please specify a recognized version name, either '2.1.1' or '2.2.0'") } + +#' Generate the EML 2.2.0 format ID +#' +#' Returns the EML 2.2.0 format ID. +#' +#' @return (character) The format ID for EML 2.2.0 +#' +#' @export +#' +#' @examples +#' format_eml() +#' \dontrun{ +#' # Upload a local EML 2.2.0 file: +#' env <- env_load() +#' publish_object(env$mn, "path_to_some_EML_file", format_eml_220()) +#' } +format_eml_220 <- function() { + "https://eml.ecoinformatics.org/eml-2.2.0" +} diff --git a/inst/example-eml-220.xml b/inst/example-eml-220.xml new file mode 100644 index 0000000..5964b54 --- /dev/null +++ b/inst/example-eml-220.xml @@ -0,0 +1,86 @@ +<eml:eml xmlns:eml="https://eml.ecoinformatics.org/eml-2.2.0" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:stmml="http://www.xml-cml.org/schema/stmml_1.1" packageId="urn:x-wmo:md:org.aoncadis.www::ee386c24-66ac-11e3-9147-00c0f03d5b7c" system="knb" scope="system" xsi:schemaLocation="https://eml.ecoinformatics.org/eml-2.2.0 eml.xsd"> + <dataset> + <alternateIdentifier>some-alternate-identifier-string</alternateIdentifier> + <title>arcticdata R package test + + + Test + User + + + + testuser + principtalInvestigator + + 2013-12-16 + eng + + Just an abstract. + + + SOME_KEY_WORD + + + This work is licensed under the Creative Commons Attribution 4.0 International License.To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/. + + + + No geographic description provided. + + -135 + -134 + 59 + 57 + + + + + + 2011-06-15 + + + + 2013-05-31 + + + + + + + testuser + + + A Test Project Title + + + Bryce + Mecum + + principalInvestigator + + An abstract. + + NSF Award XXXXXX + + + + NA + + NA + 27 + 5179254207aed0f39ded1add3d9bab3ea0e10b084c2c194ccf0a033b8f5e7789 + + + application/octet-stream + + + + + ecogrid://knb/urn:uuid:89bec5d0-26db-48ac-ae54-e1b4c999c456 + + + + Other + + + diff --git a/man/format_eml_220.Rd b/man/format_eml_220.Rd new file mode 100644 index 0000000..8d2de49 --- /dev/null +++ b/man/format_eml_220.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/formats.R +\name{format_eml_220} +\alias{format_eml_220} +\title{Generate the EML 2.2.0 format ID} +\usage{ +format_eml_220() +} +\value{ +(character) The format ID for EML 2.2.0 +} +\description{ +Returns the EML 2.2.0 format ID. +} +\examples{ +format_eml() +\dontrun{ +# Upload a local EML 2.2.0 file: +env <- env_load() +publish_object(env$mn, "path_to_some_EML_file", format_eml_220()) +} +} diff --git a/man/publish_update.Rd b/man/publish_update.Rd index 49fbb69..4c0b832 100644 --- a/man/publish_update.Rd +++ b/man/publish_update.Rd @@ -8,7 +8,8 @@ publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL, child_pids = NULL, metadata_path = NULL, identifier = NULL, use_doi = FALSE, parent_resmap_pid = NULL, parent_metadata_pid = NULL, parent_data_pids = NULL, - parent_child_pids = NULL, public = TRUE, check_first = TRUE) + parent_child_pids = NULL, public = TRUE, check_first = TRUE, + format_id = NULL) } \arguments{ \item{mn}{(MNode) The Member Node to update the object on.} @@ -46,6 +47,9 @@ access policies are not affected.} \item{check_first}{(logical) Optional. Whether to check the PIDs passed in as arguments exist on the MN before continuing. Checks that objects exist and are of the right format type. This speeds up the function, especially when \code{data_pids} has many elements.} + +\item{format_id}{(character) Optional. When omitted, the updated object will have the same formatId as \code{metadata_pid}. If set, will attempt +to use the value instead.} } \value{ (character) Named character vector of PIDs in the data package, including PIDs for the metadata, resource map, and data objects. diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index e5e06bc..3878c5c 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -512,3 +512,20 @@ test_that("update_package_object updates EML", { expect_equal(sum(unlist(pid_matches)), length(url_final)) }) + +test_that("publish_update can replace an EML 2.1.1 record with a 2.2.0 record", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + meta <- publish_object(mn, + path = file.path( + system.file(package = "arcticdatautils"), + "example-eml.xml"), + format_id = format_eml()) + ore <- create_resource_map(mn, meta) + pkg <- publish_update(mn, meta, ore, format_id = format_eml_220()) + sm <- getSystemMetadata(mn, pkg$metadata) + + expect_equal(sm@formatId, format_eml_220()) +}) From d6b7f82e52a8a4038e773010410a36ad7de1ce57 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Thu, 26 Sep 2019 14:52:31 -0800 Subject: [PATCH 276/318] Relax EML format ID test in reformat_file_name to cover EML 2.2.0 --- R/editing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/editing.R b/R/editing.R index b0edbea..7717fe8 100644 --- a/R/editing.R +++ b/R/editing.R @@ -1036,7 +1036,7 @@ reformat_file_name <- function(path, sysmeta) { base_name <- basename(path) if (sysmeta@formatId == 'http://www.openarchives.org/ore/terms') { ext <- '.rdf.xml' - } else if (grepl('eml://ecoinformatics\\.org/eml*', sysmeta@formatId)) { + } else if (grepl('ecoinformatics\\.org/eml*', sysmeta@formatId)) { ext <- '.xml' # remove extension then truncate to 50 characters base_name <- tools::file_path_sans_ext(base_name) %>% From 0bf5fccac6aedb9ce120120c5586c798bd7a3318 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Fri, 27 Sep 2019 14:55:01 -0700 Subject: [PATCH 277/318] make `format_eml` and `format_eml_220` one function this function now takes an argument, with no default. --- NAMESPACE | 1 - R/formats.R | 23 ++--------------------- man/format_eml.Rd | 4 ++-- man/format_eml_220.Rd | 22 ---------------------- tests/testthat/test_editing.R | 8 ++++---- tests/testthat/test_formats.R | 2 +- 6 files changed, 9 insertions(+), 51 deletions(-) delete mode 100644 man/format_eml_220.Rd diff --git a/NAMESPACE b/NAMESPACE index 11093c0..0025af0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,7 +24,6 @@ export(eml_validate_attributes) export(env_get) export(find_newest_object) export(format_eml) -export(format_eml_220) export(format_iso) export(generate_resource_map) export(get_all_sysmeta) diff --git a/R/formats.R b/R/formats.R index e2f1972..263f86a 100644 --- a/R/formats.R +++ b/R/formats.R @@ -256,11 +256,11 @@ format_iso <- function() { #' @export #' #' @examples -#' format_eml() +#' format_eml("2.1.1") #' \dontrun{ #' # Upload a local EML 2.1.1 file: #' env <- env_load() -#' publish_object(env$mn, "path_to_some_EML_file", format_eml()) +#' publish_object(env$mn, "path_to_some_EML_file", format_eml("2.1")) #' } format_eml <- function(version) { if (version %in% c("2.1","2.1.1", "1", 1)){ @@ -271,22 +271,3 @@ format_eml <- function(version) { } else print("Please specify a recognized version name, either '2.1.1' or '2.2.0'") } - -#' Generate the EML 2.2.0 format ID -#' -#' Returns the EML 2.2.0 format ID. -#' -#' @return (character) The format ID for EML 2.2.0 -#' -#' @export -#' -#' @examples -#' format_eml() -#' \dontrun{ -#' # Upload a local EML 2.2.0 file: -#' env <- env_load() -#' publish_object(env$mn, "path_to_some_EML_file", format_eml_220()) -#' } -format_eml_220 <- function() { - "https://eml.ecoinformatics.org/eml-2.2.0" -} diff --git a/man/format_eml.Rd b/man/format_eml.Rd index b8491f5..968fef5 100644 --- a/man/format_eml.Rd +++ b/man/format_eml.Rd @@ -16,10 +16,10 @@ format_eml(version) Returns the EML 2.1.1 format ID. } \examples{ -format_eml() +format_eml("2.1.1") \dontrun{ # Upload a local EML 2.1.1 file: env <- env_load() -publish_object(env$mn, "path_to_some_EML_file", format_eml()) +publish_object(env$mn, "path_to_some_EML_file", format_eml("2.1")) } } diff --git a/man/format_eml_220.Rd b/man/format_eml_220.Rd deleted file mode 100644 index 8d2de49..0000000 --- a/man/format_eml_220.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/formats.R -\name{format_eml_220} -\alias{format_eml_220} -\title{Generate the EML 2.2.0 format ID} -\usage{ -format_eml_220() -} -\value{ -(character) The format ID for EML 2.2.0 -} -\description{ -Returns the EML 2.2.0 format ID. -} -\examples{ -format_eml() -\dontrun{ -# Upload a local EML 2.2.0 file: -env <- env_load() -publish_object(env$mn, "path_to_some_EML_file", format_eml_220()) -} -} diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index 3878c5c..c85a1fb 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -207,7 +207,7 @@ test_that("update_object updates the packageId for EML object updates", { eml_path <- tempfile(fileext = ".xml") writeBin(dataone::getObject(mn, eml_pid), eml_path) - new_pid <- update_object(mn, eml_pid, eml_path, format_id = format_eml()) + new_pid <- update_object(mn, eml_pid, eml_path, format_id = format_eml("2.1")) updated_eml_path <- tempfile(fileext = ".xml") writeBin(dataone::getObject(mn, new_pid), updated_eml_path) @@ -522,10 +522,10 @@ test_that("publish_update can replace an EML 2.1.1 record with a 2.2.0 record", path = file.path( system.file(package = "arcticdatautils"), "example-eml.xml"), - format_id = format_eml()) + format_id = format_eml("2.1")) ore <- create_resource_map(mn, meta) - pkg <- publish_update(mn, meta, ore, format_id = format_eml_220()) + pkg <- publish_update(mn, meta, ore, format_id = format_eml("2.2")) sm <- getSystemMetadata(mn, pkg$metadata) - expect_equal(sm@formatId, format_eml_220()) + expect_equal(sm@formatId, format_eml("2.2")) }) diff --git a/tests/testthat/test_formats.R b/tests/testthat/test_formats.R index 5988571..3cbb06c 100644 --- a/tests/testthat/test_formats.R +++ b/tests/testthat/test_formats.R @@ -6,7 +6,7 @@ test_that("valid formats are valid and invalid ones are not", { }) test_that("a format can be returned", { - fmt <- format_eml() + fmt <- format_eml("2.2") expect_is(fmt, "character") expect_gt(length(fmt), 0) }) From d325a9c387123f9f177f527d3ae37fa8e8ee05eb Mon Sep 17 00:00:00 2001 From: Rachel Sun Date: Tue, 8 Oct 2019 11:44:49 -0700 Subject: [PATCH 278/318] Author update --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 4365c5e..7076b25 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,6 +10,7 @@ Authors@R: c( person("Emily", "O'Dean", email = "eodean10@gmail.com", role = "ctb"), person("Robyn", "Thiessen-Bock", email = "robyn.thiessenbock@gmail.com", role = "ctb"), person("Derek", "Strong", email = "dstrong@nceas.ucsb.edu", role = "ctb") + person("Rachel", "Sun", email = "rachelsun@ucsb.edu", role = "ctb") ) Description: A set of utilities for working with the Arctic Data Center (https://arcticdata.io). From 6de27b0f0303da5ecb76c4b7e6d8eeed19946261 Mon Sep 17 00:00:00 2001 From: Rachel Sun Date: Tue, 8 Oct 2019 11:54:22 -0700 Subject: [PATCH 279/318] Added recover_failed_submission function --- R/helpers.R | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/R/helpers.R b/R/helpers.R index 8b739cb..c40f1a2 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -621,3 +621,22 @@ read_zip_shapefile <- function(mn, pid){ unlink(temp) return(shapefile) } + + + +recover_failed_submission <- function(node, pid, path){ + stopifnot(is(node, "MNode")) + stopifnot(is.character(pid), nchar(pid) > 0, arcticdatautils::object_exists(mn, pid)) + + convert_to_text <- dataone::getObject(node, pid) %>% + rawToChar() + remove_error_tag <- paste0(convert_to_text, collapse = "") %>% + stringr::str_remove("EML draft.*`") %>% + stringr::str_remove(".*`") %>% + stringr::str_remove_all(" ") %>% + stringr::str_trim() + + doc <- EML::read_eml(remove_error_tag) + emld::eml_validate(doc) + EML::write_eml(doc, path) +} From 6868a273074fb07e4ba54a765c81f7a14d34d2fa Mon Sep 17 00:00:00 2001 From: Rachel Sun Date: Tue, 8 Oct 2019 12:04:03 -0700 Subject: [PATCH 280/318] updated typo --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7076b25..6d23b6f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,7 @@ Authors@R: c( person("Dominic", "Mullen", email = "dmullen17@gmail.com", role = "ctb"), person("Emily", "O'Dean", email = "eodean10@gmail.com", role = "ctb"), person("Robyn", "Thiessen-Bock", email = "robyn.thiessenbock@gmail.com", role = "ctb"), - person("Derek", "Strong", email = "dstrong@nceas.ucsb.edu", role = "ctb") + person("Derek", "Strong", email = "dstrong@nceas.ucsb.edu", role = "ctb"), person("Rachel", "Sun", email = "rachelsun@ucsb.edu", role = "ctb") ) Description: A set of utilities for working with the Arctic Data Center From 6e9821301ffccd86c7707e0b3a9a554a0a492ece Mon Sep 17 00:00:00 2001 From: Rachel Sun Date: Thu, 10 Oct 2019 11:59:47 -0700 Subject: [PATCH 281/318] added documentation for recover_failed_submission function --- R/helpers.R | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/R/helpers.R b/R/helpers.R index c40f1a2..0496c28 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -623,6 +623,31 @@ read_zip_shapefile <- function(mn, pid){ } +#' Recovers failed submissions +#' +#' Recovers failed submissions and write the new, valid EML to a given path +#' +#' @param node (MNode) The Member Node to publish the object to. +#' @param pid The PID of the EML metadata document to be recovered. +#' @param path path to write XML +#' +#' @return recovers and write the valid EML to the indicated path +#' +#' @export +#' +#' @author Rachel Sun rachelsun@@ucsb.edu +#' +#' @examples +#' \dontrun{ +#' # Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pid <- "urn:uuid:c40b93d9-a15a-47d0-9a5f-06e7056c93c1" +#' path <- system.file("extdata", "example.xml", package = "emld") +#' recover_failed_submission <- function(mn, pid, path) +#'} + + recover_failed_submission <- function(node, pid, path){ stopifnot(is(node, "MNode")) From bb3bc374b342e8bfd63bf3a270cd89d51bc73098 Mon Sep 17 00:00:00 2001 From: Rachel Sun Date: Fri, 11 Oct 2019 11:41:37 -0700 Subject: [PATCH 282/318] Updated NAMESPACE, helpers.R and recover_failed_submission documentation --- NAMESPACE | 1 + R/helpers.R | 14 ++++++------- man/recover_failed_submission.Rd | 35 ++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 7 deletions(-) create mode 100644 man/recover_failed_submission.Rd diff --git a/NAMESPACE b/NAMESPACE index 0025af0..6c90175 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,6 +50,7 @@ export(pid_to_eml_physical) export(publish_object) export(publish_update) export(read_zip_shapefile) +export(recover_failed_submission) export(remove_access) export(remove_public_read) export(reorder_pids) diff --git a/R/helpers.R b/R/helpers.R index 0496c28..03edaad 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -629,22 +629,23 @@ read_zip_shapefile <- function(mn, pid){ #' #' @param node (MNode) The Member Node to publish the object to. #' @param pid The PID of the EML metadata document to be recovered. -#' @param path path to write XML +#' @param path path to write XML. #' #' @return recovers and write the valid EML to the indicated path #' #' @export #' -#' @author Rachel Sun rachelsun@@ucsb.edu +#' @author Rachel Sun rachelsun@ucsb.edu #' #' @examples #' \dontrun{ #' # Set environment -#' cn <- CNode("STAGING2") -#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' cn <- dataone::CNode("STAGING2") +#' mn <- dataone::getMNode(cn,"urn:node:mnTestKNB") #' pid <- "urn:uuid:c40b93d9-a15a-47d0-9a5f-06e7056c93c1" -#' path <- system.file("extdata", "example.xml", package = "emld") -#' recover_failed_submission <- function(mn, pid, path) +#' path <- tempfile("file", fileext = ".xml") +#' recover_failed_submission(mn, pid, path) +#' eml <- EML::read_eml(path) #'} @@ -656,7 +657,6 @@ recover_failed_submission <- function(node, pid, path){ convert_to_text <- dataone::getObject(node, pid) %>% rawToChar() remove_error_tag <- paste0(convert_to_text, collapse = "") %>% - stringr::str_remove("EML draft.*`") %>% stringr::str_remove(".*`") %>% stringr::str_remove_all(" ") %>% stringr::str_trim() diff --git a/man/recover_failed_submission.Rd b/man/recover_failed_submission.Rd new file mode 100644 index 0000000..fd9ffab --- /dev/null +++ b/man/recover_failed_submission.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{recover_failed_submission} +\alias{recover_failed_submission} +\title{Recovers failed submissions} +\usage{ +recover_failed_submission(node, pid, path) +} +\arguments{ +\item{node}{(MNode) The Member Node to publish the object to.} + +\item{pid}{The PID of the EML metadata document to be recovered.} + +\item{path}{path to write XML.} +} +\value{ +recovers and write the valid EML to the indicated path +} +\description{ +Recovers failed submissions and write the new, valid EML to a given path +} +\examples{ +\dontrun{ +# Set environment +cn <- dataone::CNode("STAGING2") +mn <- dataone::getMNode(cn,"urn:node:mnTestKNB") +pid <- "urn:uuid:c40b93d9-a15a-47d0-9a5f-06e7056c93c1" +path <- tempfile("file", fileext = ".xml") +recover_failed_submission(mn, pid, path) +eml <- EML::read_eml(path) +} +} +\author{ +Rachel Sun rachelsun@ucsb.edu +} From db62aaf4d48b0d2f8626e8b7050e67c3898d6400 Mon Sep 17 00:00:00 2001 From: Rachel Sun Date: Tue, 15 Oct 2019 12:00:41 -0700 Subject: [PATCH 283/318] helpers.R: fixed error messages recover_failed_submission.Rd: editted documentation test_helpers.R: added unit testing for recover_failed_submission --- R/helpers.R | 5 +++-- man/recover_failed_submission.Rd | 2 +- tests/testthat/test_helpers.R | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/R/helpers.R b/R/helpers.R index 03edaad..d2a1c71 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -642,7 +642,7 @@ read_zip_shapefile <- function(mn, pid){ #' # Set environment #' cn <- dataone::CNode("STAGING2") #' mn <- dataone::getMNode(cn,"urn:node:mnTestKNB") -#' pid <- "urn:uuid:c40b93d9-a15a-47d0-9a5f-06e7056c93c1" +#' pid <- "urn:uuid:b1a234f0-eed5-4f58-b8d5-6334ce07c010" #' path <- tempfile("file", fileext = ".xml") #' recover_failed_submission(mn, pid, path) #' eml <- EML::read_eml(path) @@ -652,12 +652,13 @@ read_zip_shapefile <- function(mn, pid){ recover_failed_submission <- function(node, pid, path){ stopifnot(is(node, "MNode")) - stopifnot(is.character(pid), nchar(pid) > 0, arcticdatautils::object_exists(mn, pid)) + stopifnot(is.character(pid), nchar(pid) > 0, arcticdatautils::object_exists(node, pid)) convert_to_text <- dataone::getObject(node, pid) %>% rawToChar() remove_error_tag <- paste0(convert_to_text, collapse = "") %>% stringr::str_remove(".*`") %>% + stringr::str_remove("EML draft.*`") %>% stringr::str_remove_all(" ") %>% stringr::str_trim() diff --git a/man/recover_failed_submission.Rd b/man/recover_failed_submission.Rd index fd9ffab..d438ab3 100644 --- a/man/recover_failed_submission.Rd +++ b/man/recover_failed_submission.Rd @@ -24,7 +24,7 @@ Recovers failed submissions and write the new, valid EML to a given path # Set environment cn <- dataone::CNode("STAGING2") mn <- dataone::getMNode(cn,"urn:node:mnTestKNB") -pid <- "urn:uuid:c40b93d9-a15a-47d0-9a5f-06e7056c93c1" +pid <- "urn:uuid:b1a234f0-eed5-4f58-b8d5-6334ce07c010" path <- tempfile("file", fileext = ".xml") recover_failed_submission(mn, pid, path) eml <- EML::read_eml(path) diff --git a/tests/testthat/test_helpers.R b/tests/testthat/test_helpers.R index 89ea076..5d336ab 100644 --- a/tests/testthat/test_helpers.R +++ b/tests/testthat/test_helpers.R @@ -35,3 +35,35 @@ test_that("list_submissions returns correct output", { out <- list_submissions(adc, '2018-10-01', '2018-10-03') expect_equal(out$submitter_name[1], 'Rachel Obbard') }) + +test_that("write a valid EML to the given path", { + cn <- dataone::CNode('PROD') + adc <- dataone::getMNode(cn, 'urn:node:ARCTIC') + if (!is_token_set(adc)) { + skip("No token set. Skipping test.") + } + + create_dummy_package() + +} + +test_that("recover a failed submission", { + cn <- dataone::CNode('PROD') + adc <- dataone::getMNode(cn, 'urn:node:ARCTIC') + if (!is_token_set(adc)) { + skip("No token set. Skipping test.") + } + + pids <- query(adc, list(q="fileName:(*eml_draft* AND -*Mullen*)", + fl = "id", + rows="50")) + + path <- tempfile(fileext = ".xml") + + recover_failed_submission(adc, pids[[1]]$id[1], path) + + doc <- EML::read_eml(path) + expect_true(EML::eml_validate(doc)) + + } +) From 5bcbeeea712e18e77cae601451cde38f41bd4792 Mon Sep 17 00:00:00 2001 From: Rachel Sun Date: Thu, 17 Oct 2019 11:01:00 -0700 Subject: [PATCH 284/318] updated test_helpers.R --- tests/testthat/test_helpers.R | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test_helpers.R b/tests/testthat/test_helpers.R index 5d336ab..4bd7ea5 100644 --- a/tests/testthat/test_helpers.R +++ b/tests/testthat/test_helpers.R @@ -44,8 +44,7 @@ test_that("write a valid EML to the given path", { } create_dummy_package() - -} +}) test_that("recover a failed submission", { cn <- dataone::CNode('PROD') @@ -53,8 +52,7 @@ test_that("recover a failed submission", { if (!is_token_set(adc)) { skip("No token set. Skipping test.") } - - pids <- query(adc, list(q="fileName:(*eml_draft* AND -*Mullen*)", + pids <- dataone::query(adc, list(q="fileName:(*eml_draft* AND -*Mullen*)", fl = "id", rows="50")) @@ -65,5 +63,4 @@ test_that("recover a failed submission", { doc <- EML::read_eml(path) expect_true(EML::eml_validate(doc)) - } -) +}) From aa37be934ef13c4f3ce2e3bf20d7011e3359e7ff Mon Sep 17 00:00:00 2001 From: Jeanette Date: Fri, 18 Oct 2019 12:18:01 -0700 Subject: [PATCH 285/318] future proof for additional EML versions --- R/editing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/editing.R b/R/editing.R index 7717fe8..ee33e98 100644 --- a/R/editing.R +++ b/R/editing.R @@ -190,7 +190,7 @@ update_object <- function(mn, pid, path, format_id = NULL, new_pid = NULL, sid = sysmeta <- clear_replication_policy(sysmeta) # Add packageId to metadata if the object is an xml file - if (grepl("^eml:\\/\\/ecoinformatics.org\\/eml|^https://eml.ecoinformatics.org/eml-2.2.0", format_id)) { + if (grepl("^eml:\\/\\/ecoinformatics.org\\/eml|^https://eml.ecoinformatics.org", format_id)) { doc <- EML::read_eml(path) doc$packageId <- new_pid path <- tempfile() From a7e6be78692391d64179757b37f33d38116e0cd1 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Fri, 18 Oct 2019 12:37:23 -0700 Subject: [PATCH 286/318] fix malformed DESCRIPTION file --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7076b25..6d23b6f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,7 @@ Authors@R: c( person("Dominic", "Mullen", email = "dmullen17@gmail.com", role = "ctb"), person("Emily", "O'Dean", email = "eodean10@gmail.com", role = "ctb"), person("Robyn", "Thiessen-Bock", email = "robyn.thiessenbock@gmail.com", role = "ctb"), - person("Derek", "Strong", email = "dstrong@nceas.ucsb.edu", role = "ctb") + person("Derek", "Strong", email = "dstrong@nceas.ucsb.edu", role = "ctb"), person("Rachel", "Sun", email = "rachelsun@ucsb.edu", role = "ctb") ) Description: A set of utilities for working with the Arctic Data Center From 60bac03feecc9e23b9d891fc5aadc2781dfccef6 Mon Sep 17 00:00:00 2001 From: Rachel Sun Date: Fri, 18 Oct 2019 14:47:32 -0700 Subject: [PATCH 287/318] changing node to a testing node --- tests/testthat/test_helpers.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test_helpers.R b/tests/testthat/test_helpers.R index 4bd7ea5..1c2503f 100644 --- a/tests/testthat/test_helpers.R +++ b/tests/testthat/test_helpers.R @@ -47,18 +47,18 @@ test_that("write a valid EML to the given path", { }) test_that("recover a failed submission", { - cn <- dataone::CNode('PROD') - adc <- dataone::getMNode(cn, 'urn:node:ARCTIC') - if (!is_token_set(adc)) { + cn_staging <- dataone::CNode('STAGING') + adc_test <- dataone::getMNode(cn_staging,'urn:node:mnTestARCTIC') + if (!is_token_set(adc_test)) { skip("No token set. Skipping test.") } - pids <- dataone::query(adc, list(q="fileName:(*eml_draft* AND -*Mullen*)", + pids <- dataone::query(adc_test, list(q="fileName:(*eml_draft* AND -*Mullen*)", fl = "id", rows="50")) path <- tempfile(fileext = ".xml") - recover_failed_submission(adc, pids[[1]]$id[1], path) + recover_failed_submission(adc_test, pids[[1]]$id[1], path) doc <- EML::read_eml(path) expect_true(EML::eml_validate(doc)) From 9b702da50d635be27656df44d851a65cbe11b170 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Mon, 21 Oct 2019 10:57:45 -0700 Subject: [PATCH 288/318] typo fixes --- DESCRIPTION | 2 +- R/packaging.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7076b25..6d23b6f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,7 @@ Authors@R: c( person("Dominic", "Mullen", email = "dmullen17@gmail.com", role = "ctb"), person("Emily", "O'Dean", email = "eodean10@gmail.com", role = "ctb"), person("Robyn", "Thiessen-Bock", email = "robyn.thiessenbock@gmail.com", role = "ctb"), - person("Derek", "Strong", email = "dstrong@nceas.ucsb.edu", role = "ctb") + person("Derek", "Strong", email = "dstrong@nceas.ucsb.edu", role = "ctb"), person("Rachel", "Sun", email = "rachelsun@ucsb.edu", role = "ctb") ) Description: A set of utilities for working with the Arctic Data Center diff --git a/R/packaging.R b/R/packaging.R index e6554ac..5a32a36 100644 --- a/R/packaging.R +++ b/R/packaging.R @@ -63,7 +63,7 @@ insert_file <- function(inventory, file, env=NULL) { inventory_file[1,"created"] <- create_object(inventory_file[1,], sysmeta, env$base_path, - mn) + env$mn) } if (inventory_file[1,"created"] == FALSE) { From 0e7d3d4ce88fbcf813aee14d4f56396e22dbc9e0 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Mon, 21 Oct 2019 12:01:30 -0700 Subject: [PATCH 289/318] add emld to imports add emld to imports --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 6d23b6f..5bd35b1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,6 +26,7 @@ Imports: datapack, digest, EML (>= 2.0), + emld, httr, magrittr, methods, From 222f7b218b681b88ffe51efcd8a69ae92d779aa8 Mon Sep 17 00:00:00 2001 From: Rachel Sun Date: Wed, 23 Oct 2019 10:32:54 -0700 Subject: [PATCH 290/318] updated test_helpers.R and helpers.R. Removed emld from DESCRIPTION --- DESCRIPTION | 1 - R/helpers.R | 2 +- tests/testthat/test_helpers.R | 22 +++++----------------- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5bd35b1..6d23b6f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,6 @@ Imports: datapack, digest, EML (>= 2.0), - emld, httr, magrittr, methods, diff --git a/R/helpers.R b/R/helpers.R index d2a1c71..dd5e782 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -663,6 +663,6 @@ recover_failed_submission <- function(node, pid, path){ stringr::str_trim() doc <- EML::read_eml(remove_error_tag) - emld::eml_validate(doc) + EML::eml_validate(doc) EML::write_eml(doc, path) } diff --git a/tests/testthat/test_helpers.R b/tests/testthat/test_helpers.R index 1c2503f..a280ecb 100644 --- a/tests/testthat/test_helpers.R +++ b/tests/testthat/test_helpers.R @@ -36,31 +36,19 @@ test_that("list_submissions returns correct output", { expect_equal(out$submitter_name[1], 'Rachel Obbard') }) -test_that("write a valid EML to the given path", { +test_that("recover a failed submission", { + #test runs without a token + cn <- dataone::CNode('PROD') adc <- dataone::getMNode(cn, 'urn:node:ARCTIC') - if (!is_token_set(adc)) { - skip("No token set. Skipping test.") - } - - create_dummy_package() -}) - -test_that("recover a failed submission", { - cn_staging <- dataone::CNode('STAGING') - adc_test <- dataone::getMNode(cn_staging,'urn:node:mnTestARCTIC') - if (!is_token_set(adc_test)) { - skip("No token set. Skipping test.") - } - pids <- dataone::query(adc_test, list(q="fileName:(*eml_draft* AND -*Mullen*)", + pids <- dataone::query(adc, list(q="fileName:(*eml_draft* AND -*Mullen*)", fl = "id", rows="50")) path <- tempfile(fileext = ".xml") - recover_failed_submission(adc_test, pids[[1]]$id[1], path) + recover_failed_submission(adc, pids[[1]]$id[1], path) doc <- EML::read_eml(path) expect_true(EML::eml_validate(doc)) - }) From bae7d7a91b580e41af1db0fbf06f773a8a999e87 Mon Sep 17 00:00:00 2001 From: Rachel Sun Date: Thu, 24 Oct 2019 10:18:31 -0700 Subject: [PATCH 291/318] Changed the grammar a little (Jeanette's suggestion) --- tests/testthat/test_helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_helpers.R b/tests/testthat/test_helpers.R index a280ecb..929872c 100644 --- a/tests/testthat/test_helpers.R +++ b/tests/testthat/test_helpers.R @@ -36,7 +36,7 @@ test_that("list_submissions returns correct output", { expect_equal(out$submitter_name[1], 'Rachel Obbard') }) -test_that("recover a failed submission", { +test_that("A failed submission can be recovered", { #test runs without a token cn <- dataone::CNode('PROD') From 9f61964e0ac98b741e3e781809fa15abb8f60cfa Mon Sep 17 00:00:00 2001 From: Jeanette Date: Thu, 14 Nov 2019 14:31:48 -0800 Subject: [PATCH 292/318] remove some message output --- R/access.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/access.R b/R/access.R index 241d00c..ef98c0f 100644 --- a/R/access.R +++ b/R/access.R @@ -118,11 +118,9 @@ set_rights_holder <- function(mn, pids, subject) { # Change rightsHolder (if needed) if (sysmeta@rightsHolder == subject) { - message(paste0("rightsHolder field is already set to ", subject, ". System Metadata not updated.")) result[i] <- TRUE } else { # Update System Metadata - message(paste0("Updating rightsHolder for PID ", pid, " from ", sysmeta@rightsHolder, " to ", subject, ".")) sysmeta@rightsHolder <- subject From 737b55c74f00160cb0bf58ca2db815ffa49c4ae3 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Thu, 14 Nov 2019 14:32:13 -0800 Subject: [PATCH 293/318] allow prov to be carried forward and add recover prov function --- R/editing.R | 71 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 16 deletions(-) diff --git a/R/editing.R b/R/editing.R index 6972324..a2a028b 100644 --- a/R/editing.R +++ b/R/editing.R @@ -292,7 +292,8 @@ publish_update <- function(mn, parent_child_pids = NULL, public = TRUE, check_first = TRUE, - format_id = NULL) { + format_id = NULL, + keep_prov = FALSE) { # Don't allow setting a dataset to private when it uses a DOI if (use_doi && !public) { @@ -424,8 +425,6 @@ publish_update <- function(mn, # get the metadata sysmeta from the node metadata_sysmeta <- dataone::getSystemMetadata(mn, metadata_pid) - message("Downloaded EML and sysmeta...") - # Generate PIDs for our updated objects if (is.null(identifier)) { if (use_doi) { @@ -543,11 +542,11 @@ publish_update <- function(mn, child_pids = child_pids, identifier = resmap_updated_pid, public = public, - check_first = check_first) + check_first = check_first, + keep_prov = keep_prov) set_rights_holder(mn, response[["resource_map"]], metadata_sysmeta@rightsHolder) - message("Updated resource map") # Update the parent resource map to add the new package ####################################################### @@ -556,8 +555,6 @@ publish_update <- function(mn, stop("Missing required parameters to update parent package.") } - message("Updating parent resource map...") - # Check to see if the just-updated package is in the list of # parent_child_pids, notify the user, and add it to the list if (!(resmap_updated_pid %in% parent_child_pids)) { @@ -571,7 +568,8 @@ publish_update <- function(mn, data_pids = parent_data_pids, child_pids = parent_child_pids, public = public, - check_first = check_first) + check_first = check_first, + keep_prov = keep_prov) set_rights_holder(mn, response[["parent_resource_map"]], metadata_sysmeta@rightsHolder) } @@ -716,7 +714,8 @@ update_resource_map <- function(mn, other_statements = NULL, identifier = NULL, public = TRUE, - check_first = TRUE) { + check_first = TRUE, + keep_prov = FALSE) { # Check arguments stopifnot(is(mn, "MNode")) @@ -760,7 +759,9 @@ update_resource_map <- function(mn, other_statements) } - prov_pids <- gsub("https://cn-stage-2.test.dataone.org/cn/v[0-9]/resolve/|https://cn.dataone.org/cn/v[0-9]/resolve/", "", c(statements$subject, statements$object)) %>% + prov_pids <- gsub("https://cn-stage-2.test.dataone.org/cn/v[0-9]/resolve/|https://cn.dataone.org/cn/v[0-9]/resolve/|https://cn-stage.test.dataone.org/cn/v[0-9]/resolve/", + "", + c(statements$subject, statements$object)) %>% gsub("%3A", ":", .) prov_pids <- prov_pids[-(grep("^http", prov_pids))] %>% # might need to catch other things besides URLs unique(.) @@ -770,27 +771,46 @@ update_resource_map <- function(mn, identifier <- paste0("resource_map_", new_uuid()) } - if (any(prov_pids %in% data_pids == FALSE)){ - warning("Old provenance contains data pids not in new resource map. Provenance information will be removed") + if (keep_prov == FALSE){ + if (is.null(prov_pids)){ + new_rm_path <- generate_resource_map(metadata_pid = metadata_pid, + data_pids = data_pids, + child_pids = child_pids, + resource_map_pid = identifier) + } + else if (any(prov_pids %in% data_pids == FALSE)){ + warning("Old provenance contains data pids not in new resource map. Provenance information will be removed. \n + You can get old provenance statements back using: \n + old_prov <- get_prov(mn, rm_pid) \n + rm_new <- update_resource_map(mn, rm_pid, metadata_pid, data_pids, other_statements = old_prov, keep_prov = T)") new_rm_path <- generate_resource_map(metadata_pid = metadata_pid, data_pids = data_pids, child_pids = child_pids, resource_map_pid = identifier) + } + else if (all(prov_pids %in% data_pids) == TRUE) { + new_rm_path <- generate_resource_map(metadata_pid = metadata_pid, + data_pids = data_pids, + child_pids = child_pids, + other_statements = statements, + resource_map_pid = identifier) + } } - else if (all(prov_pids %in% data_pids == TRUE)) { + else if (keep_prov == TRUE) { + if (any(prov_pids %in% data_pids == FALSE)){ + warning("Old provenance contains data pids not in new resource map. Provenance information is retained since keep_prov is set to TRUE") + } new_rm_path <- generate_resource_map(metadata_pid = metadata_pid, data_pids = data_pids, child_pids = child_pids, other_statements = statements, resource_map_pid = identifier) } - stopifnot(file.exists(new_rm_path)) rm(sysmeta) - message(paste0("Getting updated copy of System Metadata for ", resource_map_pid)) sysmeta <- dataone::getSystemMetadata(mn, resource_map_pid) stopifnot(is(sysmeta, "SystemMetadata")) @@ -815,7 +835,6 @@ update_resource_map <- function(mn, } # Update it - message(paste0("Updating resource map...")) resmap_update_response <- dataone::updateObject(mn, pid = resource_map_pid, newpid = identifier, @@ -1086,3 +1105,23 @@ reformat_file_name <- function(path, sysmeta) { return(file_name) } + +#' Get a data.frame of prov statements from a resource map pid. +#' +#' This is a function that is useful if you need to recover lost prov statements. It returns +#' a data.frame of statements that can be passed to `update_resource_map` in the `other_statements` +#' argument. +#' +#' @param mn (mn) A memeber node instance +#' @param rm_pid (character) A resource map identifier +#' @return a data.frame of prov statments +#' @export +recover_prov <- function(mn, rm_pid){ + old_resource_map_path <- tempfile() + writeLines(rawToChar(dataone::getObject(mn, rm_pid)), old_resource_map_path) + statements <- parse_resource_map(old_resource_map_path) + statements <- filter_packaging_statements(statements) + unlink(old_resource_map_path) + return(statements) +} + From f4237d3c6af5d88e31444c37b2cad3e0c8d650e9 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Thu, 14 Nov 2019 14:32:43 -0800 Subject: [PATCH 294/318] make dummy metadata creation more accurate and add a dummy prov helper --- R/helpers.R | 54 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/R/helpers.R b/R/helpers.R index dd5e782..5c64d73 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -32,8 +32,24 @@ create_dummy_metadata <- function(mn, data_pids = NULL) { # Copy the original EML file to a temporary place original_file <- file.path(system.file(package = "arcticdatautils"), "example-eml.xml") + doc <- read_eml(original_file) + + if (is.null(data_pids)){ + doc$dataset$otherEntity <- NULL + } + else if (!is.null(data_pids)){ + oe <- list() + for (i in 1:length(data_pids)){ + oe[[i]] <- list(entityName = "dummy_object", + entityDescription = data_pids[i], + entityType = "text/plain") + } + + doc$dataset$otherEntity <- oe + } + metadata_file <- tempfile() - file.copy(original_file, metadata_file) + write_eml(doc, metadata_file) sysmeta <- new("SystemMetadata", id = pid, @@ -647,9 +663,6 @@ read_zip_shapefile <- function(mn, pid){ #' recover_failed_submission(mn, pid, path) #' eml <- EML::read_eml(path) #'} - - - recover_failed_submission <- function(node, pid, path){ stopifnot(is(node, "MNode")) stopifnot(is.character(pid), nchar(pid) > 0, arcticdatautils::object_exists(node, pid)) @@ -666,3 +679,36 @@ recover_failed_submission <- function(node, pid, path){ EML::eml_validate(doc) EML::write_eml(doc, path) } + +#' Add prov to a dummy package +#' +#' Adds provenance information to a dummy package for testing +#' +#' @param mn member node (the ADC test node) +#' @param rm_pid resource map identifier +#' +add_dummy_prov <- function(mn, rm_pid){ + if (mn@env == "prod") { + stop("Cannot create dummy prov on a production node.") + } + if (mn@identifier == "urn:node:mnTestARCTIC"){ + d1c <- dataone::D1Client("STAGING", "urn:node:mnTestARCTIC") + } + else if (mn@identifier != "urn:node:mnTestARCTIC"){ + stop("Use the ADC test node") + } + + pkg <- getDataPackage(d1c, id=rm_pid, lazyLoad=TRUE, limit="0MB", quiet=T) + objs <- selectMember(pkg, name="sysmeta@fileName", value='dummy_object') + if (length(objs) < 2){ + stop(.call = FALSE, + "Dummy package must have at least 2 data objects") + } + sourceObjId <- selectMember(pkg, name="sysmeta@fileName", value='dummy_object')[[1]] + outputObjId <- selectMember(pkg, name="sysmeta@fileName", value='dummy_object')[[2]] + + pkg <- describeWorkflow(pkg, sources=sourceObjId, derivations=outputObjId) + + resmapId_new <- uploadDataPackage(d1c, pkg, public = TRUE, quiet = FALSE) +} + From 3720283b10e4608df1f77da1ebd5ca9824b9ada4 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Thu, 14 Nov 2019 14:32:50 -0800 Subject: [PATCH 295/318] remove more messages --- R/packaging.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/packaging.R b/R/packaging.R index 5a32a36..a13011c 100644 --- a/R/packaging.R +++ b/R/packaging.R @@ -458,8 +458,6 @@ generate_resource_map <- function(metadata_pid, resource_map <- new("ResourceMap", id = resource_map_pid) - message("Generating resource map with pids ", paste0(head(unlist(c(metadata_pid, data_pids, child_pids)), n = 10), collapse = ", "), ".") - resource_map <- datapack::createFromTriples(resource_map, relations = relationships, identifiers = unlist(c(metadata_pid, data_pids, child_pids)), From bcffcdc6ec6cb90ff461899219d3233b659eaa2b Mon Sep 17 00:00:00 2001 From: Jeanette Date: Thu, 14 Nov 2019 14:32:59 -0800 Subject: [PATCH 296/318] add tests --- tests/testthat/test_access.R | 2 +- tests/testthat/test_editing.R | 59 +++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test_access.R b/tests/testthat/test_access.R index 61d5e91..a30ad06 100644 --- a/tests/testthat/test_access.R +++ b/tests/testthat/test_access.R @@ -86,7 +86,7 @@ test_that("is_public_read returns true for public packages and false for private skip("No token set. Skipping test.") } - pkg <- create_dummy_package(mn) + pkg <- create_dummy_package(mn, size = 3) public_response <- is_public_read(mn, pkg$resource_map) remove_public_read(mn, pkg$resource_map) diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index c85a1fb..ef01c85 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -529,3 +529,62 @@ test_that("publish_update can replace an EML 2.1.1 record with a 2.2.0 record", expect_equal(sm@formatId, format_eml("2.2")) }) + +test_that("PROV is carried forward if data pids don't change", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + # Make a test package and add prov + package <- create_dummy_package(mn, size = 3) + package_prov <- suppressMessages(add_dummy_prov(mn, package$resource_map)) + + # Publish an update on it + update <- publish_update(mn, + package$metadata, + package_prov, + package$data, + check_first = FALSE) + + t <- recover_prov(mn, update$resource_map) + + prov_pids <- gsub("https://cn-stage-2.test.dataone.org/cn/v[0-9]/resolve/|https://cn.dataone.org/cn/v[0-9]/resolve/|https://cn-stage.test.dataone.org/cn/v[0-9]/resolve/", "", c(t$subject, t$object)) %>% + gsub("%3A", ":", .) + prov_pids <- prov_pids[-(grep("^http", prov_pids))] %>% + unique(.) + + expect_equal(sort(prov_pids), sort(update$data)) + +}) + +test_that("PROV is handled with appropriate warnings", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + # Make a test package and add prov + package <- create_dummy_package(mn, size = 3) + package_prov <- suppressMessages(add_dummy_prov(mn, package$resource_map)) + data_new <- create_dummy_object(mn) + # Publish an update on it + expect_warning(update <- publish_update(mn, + package$metadata, + package_prov, + data_new, + keep_prov = TRUE, + check_first = FALSE), "Provenance information is retained") + + # Make a test package and add prov + package <- create_dummy_package(mn, size = 3) + package_prov <- suppressMessages(add_dummy_prov(mn, package$resource_map)) + data_new <- create_dummy_object(mn) + + # Publish an update on it + expect_warning(update <- publish_update(mn, + package$metadata, + package_prov, + data_new, + keep_prov = FALSE, + check_first = FALSE), "Provenance information will be removed") + +}) From 3582fdb04080e37df12a229fe18aafcfe90c3171 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Thu, 14 Nov 2019 14:34:49 -0800 Subject: [PATCH 297/318] remove update_package_object and relatives I don't think I'd recommend the use of this anymore, even though it was my idea --- R/editing.R | 175 ---------------------------- tests/testthat/test_editing.R | 211 ---------------------------------- 2 files changed, 386 deletions(-) diff --git a/R/editing.R b/R/editing.R index a2a028b..c16c217 100644 --- a/R/editing.R +++ b/R/editing.R @@ -894,181 +894,6 @@ set_file_name <- function(mn, pid, name) { } -#' Update physical of an updated data object -#' -#' This function updates the EML with the new physical -#' of a data object once it has been updated. -#' This is a helper function for [update_package_object()]. -#' -#' @param doc (emld) An EML object. -#' @param mn (MNode) The Member Node of the data package. -#' @param data_pid (character) The identifier of the data object to be updated. -#' @param new_data_pid (character) The new identifier of the updated data object. -#' -#' @importFrom stringr str_detect -#' -#' @noRd -update_physical <- function(doc, mn, data_pid, new_data_pid) { - stopifnot(is(doc, "emld")) - stopifnot(is(mn, "MNode")) - stopifnot(is.character(data_pid), nchar(data_pid) > 0) - stopifnot(is.character(new_data_pid), nchar(new_data_pid) > 0) - - all_url <- eml_get(doc, "url") %>% - grep("^http", ., value = T) %>% - unname() - - if (sum(stringr::str_detect(all_url, data_pid)) == 0) { - stop("The obsoleted data PID does not match any physical sections, so the EML will not be updated.") - } - - if (length(doc$dataset$dataTable) != 0){ - dataTable_url <- eml_get(doc$dataset$dataTable, "url") %>% - grep("^http", ., value = T) %>% - unname() - - if (any(stringr::str_detect(dataTable_url, data_pid))) { - position <- which(stringr::str_detect(dataTable_url, data_pid)) - new_phys <- pid_to_eml_physical(mn, new_data_pid) - if(all(is.null(names(doc$dataset$dataTable)))){ - doc$dataset$dataTable[[position]]$physical <- new_phys - } - else if (all(is.null(names(doc$dataset$dataTable))) == F & position == 1){ - doc$dataset$dataTable$physical <- new_phys - } - } - } - - if (length(doc$dataset$otherEntity) != 0){ - otherEntity_url <- eml_get(doc$dataset$otherEntity, "url") %>% - grep("^http", ., value = T) %>% - unname() - - if (any(stringr::str_detect(otherEntity_url, data_pid))) { - position <- which(stringr::str_detect(otherEntity_url, data_pid)) - new_phys <- pid_to_eml_physical(mn, new_data_pid) - if(all(is.null(names(doc$dataset$otherEntity)))){ - doc$dataset$otherEntity[[position]]$physical <- new_phys - } - else if (all(is.null(names(doc$dataset$otherEntity))) == F & position == 1){ - doc$dataset$otherEntity$physical <- new_phys - } - } - } - - if (length(doc$dataset$spatialVector) != 0){ - spatialVector_url <- eml_get(doc$dataset$spatialVector, "url") %>% - grep("^http", ., value = T) %>% - unname() - - if (any(stringr::str_detect(spatialVector_url, data_pid))) { - position <- which(stringr::str_detect(spatialVector_url, data_pid)) - new_phys <- pid_to_eml_physical(mn, new_data_pid) - if(all(is.null(names(doc$dataset$spatialVector)))){ - doc$dataset$spatialVector[[position]]$physical <- new_phys - } - else if (all(is.null(names(doc$dataset$spatialVector))) == F & position == 1){ - doc$dataset$spatialVector$physical <- new_phys - } - } - } - - return(doc) -} - - -#' Update a data object and associated resource map and metadata -#' -#' This function updates a data object and then automatically -#' updates the package resource map with the new data PID. If an object -#' already has a `dataTable`, `otherEntity`, or `spatialVector` -#' with a working physical section, the EML will be updated with the new physical. -#' It is a convenience wrapper around [update_object()] and [publish_update()]. -#' -#' @param mn (MNode) The Member Node of the data package. -#' @param data_pid (character) PID for data object to update. -#' @param new_data_path (character) Path to new data object. -#' @param resource_map_pid (character) PID for resource map to update. -#' @param format_id (character) Optional. The format ID to set for the object. -#' When not set, [guess_format_id()] will be used -#' to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}. -#' @param public (logical) Optional. Make the update public. If `FALSE`, -#' will set the metadata and resource map to private (but not the data objects). -#' This applies to the new metadata PID and its resource map and data object. -#' Access policies are not affected. -#' @param use_doi (logical) Optional. If `TRUE`, a new DOI will be minted. -#' @param ... Other arguments to pass into [publish_update()]. -#' -#' @return (character) Named character vector of PIDs in the data package, including PIDs -#' for the metadata, resource map, and data objects. -#' -#' @import dataone -#' @import EML -#' -#' @export -#' -#' @seealso [update_object()] [publish_update()] -#' -#' @examples -#' \dontrun{ -#' cnTest <- dataone::CNode("STAGING") -#' mnTest <- dataone::getMNode(cnTest,"urn:node:mnTestARCTIC") -#' -#' pkg <- create_dummy_package_full(mnTest, title = "My package") -#' -#' file.create("new_file.csv") -#' update_package_object(mnTest, pkg$data[1], "new_file.csv", pkg$resource_map, format_id = "text/csv") -#' file.remove("new_file.csv") -#' } -update_package_object <- function(mn, - data_pid, - new_data_path, - resource_map_pid, - format_id = NULL, - public = TRUE, - use_doi = FALSE, - ...) { - stopifnot(is(mn, "MNode")) - stopifnot(is.character(data_pid), nchar(data_pid) > 0) - stopifnot(is.character(new_data_path), nchar(new_data_path) > 0, file.exists(new_data_path)) - stopifnot(is.character(resource_map_pid), nchar(resource_map_pid) > 0) - stopifnot(is.logical(public)) - - pkg <- get_package(mn, resource_map_pid) - doc <- EML::read_eml(rawToChar(dataone::getObject(mn, pkg$metadata))) - - new_data_pid <- update_object(mn, - pid = data_pid, - path = new_data_path, - format_id = format_id) - - other_data_pids <- pkg$data[which(pkg$data != data_pid)] # wrapped in which for better NA handling - new_data_pids <- c(other_data_pids, new_data_pid) - - doc_new <- update_physical(doc = doc, - mn = mn, - data_pid = data_pid, - new_data_pid = new_data_pid) - - eml_path <- tempfile() - EML::write_eml(doc_new, eml_path) - - pkg_new <- publish_update(mn, - metadata_pid = pkg$metadata, - resource_map_pid = pkg$resource_map, - metadata_path = eml_path, - data_pids = new_data_pids, - child_pids = pkg$child_packages, - public = public, - use_doi = use_doi, - ...) - - file.remove(eml_path) - - cat("\nThe new data pid is:", new_data_pid) - - return(pkg_new) -} #' Helper for publish_object. Reformat the filName in system metadata. #' diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index ef01c85..c2ab42b 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -302,217 +302,6 @@ test_that("publish_update errors if the non-current resource map or metadata pid }) -test_that("update_physical works", { - if (!is_token_set(mn)) { - skip("No token set. Skipping test.") - } - - pkg <- create_dummy_package_full(mn, title = "Update physical check") - - file.create("dummy_object.csv") - - new_data_pid <- update_object(mn, - pid = pkg$data[2], - path = "dummy_object.csv", - format_id = "text/csv") - - unlink("dummy_object.csv") - - pkg_new <- publish_update(mn, - resource_map_pid = pkg$resource_map, - metadata_pid = pkg$metadata, - data_pids = c(pkg$data[-2], new_data_pid)) - - eml_original <- EML::read_eml(rawToChar(dataone::getObject(mn, pkg$metadata))) - - eml_new <- update_physical(eml_original, - mn, - data_pid = pkg$data[2], - new_data_pid = new_data_pid) - - t <- tempfile() - write_eml(eml_new, t) - eml_new <- read_eml(t) - - url_original <- eml_get(eml_original, "url") %>% grep("^http", ., value = T) %>% unname() - url_new <- eml_get(eml_new, "url") %>% grep("^http", ., value = T) %>% unname() - - expect_equal(sum(stringr::str_detect(url_original, pkg$data[1])), 1) - expect_equal(sum(stringr::str_detect(url_original, pkg$data[2])), 1) - expect_equal(sum(stringr::str_detect(url_original, pkg$data[3])), 1) - expect_equal(sum(stringr::str_detect(url_original, pkg$data[4])), 1) - - expect_equal(sum(stringr::str_detect(url_new, new_data_pid)), 1) - expect_equal(sum(stringr::str_detect(url_new, pkg$data[1])), 1) - expect_equal(sum(stringr::str_detect(url_new, pkg$data[2])), 0) - expect_equal(sum(stringr::str_detect(url_new, pkg$data[3])), 1) - expect_equal(sum(stringr::str_detect(url_new, pkg$data[4])), 1) -}) - -test_that("update_package_object changes specified data object and rest of package is intact", { - if (!is_token_set(mn)) { - skip("No token set. Skipping test.") - } - - pkg <- create_dummy_package_full(mn, title = "Check update_package_object") - - new_data_path <- "test_file.csv" - dummy_data <- data.frame(col1 = 1:26, col2 = letters) - new_data_path <- tempfile(fileext = ".csv") - write.csv(dummy_data, new_data_path, row.names = FALSE) - - data_pid <- pkg$data[2] - - pkg_new <- update_package_object(mn, - data_pid = data_pid, - new_data_path = new_data_path, - resource_map_pid = pkg$resource_map, - format_id = "text/csv") - - file.remove(new_data_path) - - # test: other objects are retained - expect_equal(all(pkg$data[-2] %in% pkg_new$data), TRUE) - - # test: metadata changes - expect_false(pkg$metadata == pkg_new$metadata) - - # test: new data PID is a version of old data PID - versions <- get_all_versions(mn, data_pid) - latest_version <- versions[length(versions)] - - new_data_pid <- pkg_new$data[!pkg_new$data %in% pkg$data] - - expect_equal(latest_version, new_data_pid) - - # test: EML is updated - eml_original <- EML::read_eml(rawToChar(dataone::getObject(mn, pkg$metadata))) - - eml_new <- EML::read_eml(rawToChar(dataone::getObject(mn, pkg_new$metadata))) - - url_original <- eml_get(eml_original, "url") %>% grep("^http", ., value = T) %>% unname() - url_new <- eml_get(eml_new, "url") %>% grep("^http", ., value = T) %>% unname() - - expect_true(url_original[2] != url_new[2]) - expect_equal(url_original[1], url_new[1]) - expect_equal(url_original[3], url_new[3]) - expect_equal(url_original[4], url_new[4]) - - expect_true(stringr::str_detect(url_new[2], new_data_pid)) -}) - -test_that("update_package_object errors if wrong input", { - if (!is_token_set(mn)) { - skip("No token set. Skipping test.") - } - - file_path <- tempfile(fileext = ".csv") - - expect_error(update_package_object(LETTERS, - data_pid = file_path, - new_data_path = "something", - rm_pid = "something")) - - expect_error(update_package_object(mn, - data_pid = c(1, 2), - new_data_path = "something", - rm_pid = "something")) - - expect_error(update_package_object(mn, - data_pid = "something", - new_data_path = "something", - rm_pid = "something")) - - expect_error(update_package_object(mn, - data_pid = "something", - new_data_path = TRUE, - rm_pid = "something")) - - expect_error(update_package_object(mn, - data_pid = file_path, - new_data_path = "something", - rm_pid = 1)) - unlink(file_path) -}) - -test_that("update_package_object updates EML", { - if (!is_token_set(mn)) { - skip("No token set. Skipping test.") - } - - pkg <- create_dummy_package(mn, size = 4) - - attributes1 <- data.frame( - attributeName = c("col1", "col2"), - attributeDefinition = c("Numbers", "Letters in the alphabet"), - measurementScale = c("ratio", "nominal"), - domain = c("numericDomain", "textDomain"), - formatString = c(NA, NA), - definition = c(NA, "ABCDEFG..."), - unit = c("dimensionless", NA), - numberType = c("integer", NA), - missingValueCode = c(NA, NA), - missingValueCodeExplanation = c(NA, NA), - stringsAsFactors = FALSE) - - attributeList1 <- EML::set_attributes(attributes1) - phys <- pid_to_eml_physical(mn, pkg$data[1]) - - dummy_data_table <- list(entityName = "Dummy Data Table", - entityDescription = "Dummy Description", - physical = phys, - attributeList = attributeList1) - - doc <- EML::read_eml(rawToChar(getObject(mn, pkg$metadata))) - doc$dataset$dataTable <- dummy_data_table - - otherEnts <- list(pid_to_eml_entity(mn, pkg$data[2], entityType = "otherEntity"), - pid_to_eml_entity(mn, pkg$data[3], entityType = "otherEntity")) - doc$dataset$otherEntity <- otherEnts - - eml_path <- tempfile(fileext = ".xml") - EML::write_eml(doc, eml_path) - - pkg <- publish_update(mn, - metadata_pid = pkg$metadata, - resource_map_pid = pkg$resource_map, - data_pids = pkg$data, - metadata_path = eml_path, - public = TRUE, - use_doi = FALSE) - - dummy_data <- data.frame(col1 = 1:26, col2 = letters) - new_data_path <- tempfile(fileext = ".csv") - write.csv(dummy_data, new_data_path, row.names = FALSE) - - data_pid <- pkg$data[1] - - pkg_new <- update_package_object(mn, - data_pid, - new_data_path, - pkg$resource_map, - format_id = "text/csv", - public = TRUE, - use_doi = FALSE) - - doc <- read_eml(getObject(mn, pkg$metadata)) - url_initial <- eml_get(doc, "url") %>% grep("^http", ., value = T) %>% unname() - expect_equal(sum(stringr::str_count(url_initial, data_pid)), 1) - - eml_new <- EML::read_eml(rawToChar(getObject(mn, pkg_new$metadata))) - url_final <- eml_get(eml_new, "url") %>% grep("^http", ., value = T) %>% unname() - expect_equal(sum(stringr::str_count(url_final, data_pid)), 0) - - pid_matches <- lapply(seq_along(pkg_new$data), - function(i) {stringr::str_count(url_final, pkg_new$data[i])}) - - # confirm that URLs have a matching PID - # if new PID corresponds to a dataset that had a dataTable/otherEntity - # and has not been updated, expect_equal will error - expect_equal(sum(unlist(pid_matches)), - length(url_final)) -}) - test_that("publish_update can replace an EML 2.1.1 record with a 2.2.0 record", { if (!is_token_set(mn)) { skip("No token set. Skipping test.") From 11553d6c3b31bcface47d821ceb0b44a50fb6810 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Thu, 14 Nov 2019 14:43:03 -0800 Subject: [PATCH 298/318] update namespace and Rd files --- NAMESPACE | 3 +- man/add_dummy_prov.Rd | 16 ++++++++++ man/publish_update.Rd | 2 +- man/recover_prov.Rd | 21 +++++++++++++ man/update_package_object.Rd | 57 ------------------------------------ man/update_resource_map.Rd | 2 +- 6 files changed, 40 insertions(+), 61 deletions(-) create mode 100644 man/add_dummy_prov.Rd create mode 100644 man/recover_prov.Rd delete mode 100644 man/update_package_object.Rd diff --git a/NAMESPACE b/NAMESPACE index 6c90175..45e70f6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -51,6 +51,7 @@ export(publish_object) export(publish_update) export(read_zip_shapefile) export(recover_failed_submission) +export(recover_prov) export(remove_access) export(remove_public_read) export(reorder_pids) @@ -63,7 +64,6 @@ export(set_rights_holder) export(show_indexing_status) export(sysmeta_to_eml_physical) export(update_object) -export(update_package_object) export(update_resource_map) export(view_profile) export(which_in_eml) @@ -79,7 +79,6 @@ importFrom(methods,is) importFrom(methods,new) importFrom(methods,slot) importFrom(stats,na.omit) -importFrom(stringr,str_detect) importFrom(utils,URLencode) importFrom(utils,head) importFrom(utils,read.csv) diff --git a/man/add_dummy_prov.Rd b/man/add_dummy_prov.Rd new file mode 100644 index 0000000..d1d39c8 --- /dev/null +++ b/man/add_dummy_prov.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{add_dummy_prov} +\alias{add_dummy_prov} +\title{Add prov to a dummy package} +\usage{ +add_dummy_prov(mn, rm_pid) +} +\arguments{ +\item{mn}{member node (the ADC test node)} + +\item{rm_pid}{resource map identifier} +} +\description{ +Adds provenance information to a dummy package for testing +} diff --git a/man/publish_update.Rd b/man/publish_update.Rd index 4c0b832..22f1148 100644 --- a/man/publish_update.Rd +++ b/man/publish_update.Rd @@ -9,7 +9,7 @@ publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL, use_doi = FALSE, parent_resmap_pid = NULL, parent_metadata_pid = NULL, parent_data_pids = NULL, parent_child_pids = NULL, public = TRUE, check_first = TRUE, - format_id = NULL) + format_id = NULL, keep_prov = FALSE) } \arguments{ \item{mn}{(MNode) The Member Node to update the object on.} diff --git a/man/recover_prov.Rd b/man/recover_prov.Rd new file mode 100644 index 0000000..a7722fd --- /dev/null +++ b/man/recover_prov.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/editing.R +\name{recover_prov} +\alias{recover_prov} +\title{Get a data.frame of prov statements from a resource map pid.} +\usage{ +recover_prov(mn, rm_pid) +} +\arguments{ +\item{mn}{(mn) A memeber node instance} + +\item{rm_pid}{(character) A resource map identifier} +} +\value{ +a data.frame of prov statments +} +\description{ +This is a function that is useful if you need to recover lost prov statements. It returns +a data.frame of statements that can be passed to \code{update_resource_map} in the \code{other_statements} +argument. +} diff --git a/man/update_package_object.Rd b/man/update_package_object.Rd deleted file mode 100644 index 10dda82..0000000 --- a/man/update_package_object.Rd +++ /dev/null @@ -1,57 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/editing.R -\name{update_package_object} -\alias{update_package_object} -\title{Update a data object and associated resource map and metadata} -\usage{ -update_package_object(mn, data_pid, new_data_path, resource_map_pid, - format_id = NULL, public = TRUE, use_doi = FALSE, ...) -} -\arguments{ -\item{mn}{(MNode) The Member Node of the data package.} - -\item{data_pid}{(character) PID for data object to update.} - -\item{new_data_path}{(character) Path to new data object.} - -\item{resource_map_pid}{(character) PID for resource map to update.} - -\item{format_id}{(character) Optional. The format ID to set for the object. -When not set, \code{\link[=guess_format_id]{guess_format_id()}} will be used -to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}.} - -\item{public}{(logical) Optional. Make the update public. If \code{FALSE}, -will set the metadata and resource map to private (but not the data objects). -This applies to the new metadata PID and its resource map and data object. -Access policies are not affected.} - -\item{use_doi}{(logical) Optional. If \code{TRUE}, a new DOI will be minted.} - -\item{...}{Other arguments to pass into \code{\link[=publish_update]{publish_update()}}.} -} -\value{ -(character) Named character vector of PIDs in the data package, including PIDs -for the metadata, resource map, and data objects. -} -\description{ -This function updates a data object and then automatically -updates the package resource map with the new data PID. If an object -already has a \code{dataTable}, \code{otherEntity}, or \code{spatialVector} -with a working physical section, the EML will be updated with the new physical. -It is a convenience wrapper around \code{\link[=update_object]{update_object()}} and \code{\link[=publish_update]{publish_update()}}. -} -\examples{ -\dontrun{ -cnTest <- dataone::CNode("STAGING") -mnTest <- dataone::getMNode(cnTest,"urn:node:mnTestARCTIC") - -pkg <- create_dummy_package_full(mnTest, title = "My package") - -file.create("new_file.csv") -update_package_object(mnTest, pkg$data[1], "new_file.csv", pkg$resource_map, format_id = "text/csv") -file.remove("new_file.csv") -} -} -\seealso{ -\code{\link[=update_object]{update_object()}} \code{\link[=publish_update]{publish_update()}} -} diff --git a/man/update_resource_map.Rd b/man/update_resource_map.Rd index 61426a4..61c2a25 100644 --- a/man/update_resource_map.Rd +++ b/man/update_resource_map.Rd @@ -6,7 +6,7 @@ \usage{ update_resource_map(mn, resource_map_pid, metadata_pid, data_pids = NULL, child_pids = NULL, other_statements = NULL, identifier = NULL, - public = TRUE, check_first = TRUE) + public = TRUE, check_first = TRUE, keep_prov = FALSE) } \arguments{ \item{mn}{(MNode) The Member Node.} From 11aa4c320196f70f1e3be2389fea0f29fe94f90a Mon Sep 17 00:00:00 2001 From: Jeanette Date: Thu, 14 Nov 2019 14:49:02 -0800 Subject: [PATCH 299/318] update warning message with correct function name --- R/editing.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/editing.R b/R/editing.R index c16c217..b66a9ca 100644 --- a/R/editing.R +++ b/R/editing.R @@ -780,8 +780,8 @@ update_resource_map <- function(mn, } else if (any(prov_pids %in% data_pids == FALSE)){ warning("Old provenance contains data pids not in new resource map. Provenance information will be removed. \n - You can get old provenance statements back using: \n - old_prov <- get_prov(mn, rm_pid) \n + You can get old provenance statements back using: + old_prov <- recover_prov(mn, rm_pid) rm_new <- update_resource_map(mn, rm_pid, metadata_pid, data_pids, other_statements = old_prov, keep_prov = T)") new_rm_path <- generate_resource_map(metadata_pid = metadata_pid, From fd927f26850ea6bc1029766740bdadcd0fb1e4bc Mon Sep 17 00:00:00 2001 From: Jeanette Date: Thu, 14 Nov 2019 16:16:57 -0800 Subject: [PATCH 300/318] start of the NSF function still some failure cases that aren't working --- R/eml.R | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/R/eml.R b/R/eml.R index fe05c79..590e706 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1006,3 +1006,94 @@ reorder_pids <- function(pid_list, doc){ ordered_pids <- pid_list[order(match(names(pid_list), entity_names))] return(ordered_pids) } + +#' Create an EML project section from a list of NSF award numbers +#' +#' This function takes a list of NSF award numbers and uses it to +#' query the NSF API to get the award title, PIs, and coPIs. The +#' return value is an EML project section. The function supports 1 +#' or more award numbers +#' +#' @param awards (list) A list of NSF award numbers as characters +#' +#' @return project (emld) An EML project section +#' +#' @export +#' +#' @examples +#' +#' proj <- eml_nsf_to_proejct(awards = c("1203146", "1203473", "1603116")) +#' +#' +eml_nsf_to_project <- function(awards){ + award_nums <- awards + + result_o <- lapply(award_nums, function(x){ + url <- paste0("https://api.nsf.gov/services/v1/awards.json?id=", x ,"&printFields=coPDPI,pdPIName,title") + + t <- fromJSON(url) + }) + + result <- list() + for (i in 1:length(result_o)){ + if ("serviceNotification" %in% names(result_o[[i]]$response)) { + warning(paste(result_o[[i]]$response$serviceNotification$notificationType, "for award", award_nums[i], "\n this award will not be included in the project section.")) + result[[i]] <- NULL + award_nums[i] <- NA + } + else if (length(result_o[[i]]$response$award) == 0){ + warning(paste("Empty result for award", award_nums[i])) + result[[i]] <- NULL + } + else {result[[i]] <- result_o[[i]]} + } + + + award_nums <- subset(award_nums, !is.na(award_nums)) + + if (length(award_nums) > 0){ + co_pis <- lapply(result, function(x){ + stringi::stri_split_fixed(unlist(x$response$award$coPDPI), pattern = " ", simplify = T) %>% + as.data.frame(stringsAsFactors = F) %>% + unite("firstName", V1, V2, sep = " ") %>% + mutate(firstName = trimws(firstName, which = "both")) %>% + rename(lastName = V3) %>% + select(firstName, lastName) + }) + + co_pis <- do.call("rbind", co_pis) %>% + mutate(role = "coPrincipalInvestigator") + + pis <- lapply(result, function(x){ + n <- stringi::stri_split_fixed(unlist(x$response$award$pdPIName), pattern = " ", simplify = T) %>% + as.data.frame(stringsAsFactors = F) %>% + unite("firstName", V1, V2, sep = " ") %>% + mutate(firstName = trimws(firstName, which = "both")) %>% + rename(lastName = V3) %>% + select(firstName, lastName) + }) + + pis <- do.call("rbind", pis) %>% + mutate(role = "principalInvestigator") + + people <- bind_rows(co_pis, pis) + p_list <- list() + + for (i in 1:nrow(people)){ + p_list[[i]] <- eml_personnel(given_names = people$firstName[i], + sur_name = people$lastName[i], + role = people$role[i]) + } + + titles <- lapply(result, function(x){ + unlist(x$response$award$title) + }) + + proj <- eml_project(title = titles, personnelList = p_list, funding = award_nums) + } + else if (length(award_nums) == 0){ + stop(call. = F, + "No valid award numbers were found.") + } + +} From 3788836f3f282c4552c0b47f7ebe28f4b2c1bf3d Mon Sep 17 00:00:00 2001 From: Jeanette Date: Fri, 15 Nov 2019 08:42:12 -0800 Subject: [PATCH 301/318] update to now cover more failure cases --- R/eml.R | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/R/eml.R b/R/eml.R index 590e706..16d11d0 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1021,35 +1021,48 @@ reorder_pids <- function(pid_list, doc){ #' @export #' #' @examples +#' awards <- c("1203146", "1203473", "1603116") #' -#' proj <- eml_nsf_to_proejct(awards = c("1203146", "1203473", "1603116")) +#' proj <- eml_nsf_to_project(awards) #' +#' me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) +#' +#' doc <- list(packageId = "id", system = "system", +#' dataset = list(title = "A Mimimal Valid EML Dataset", +#' creator = me, +#' contact = me)) +#' +#' doc$dataset$project <- proj +#' +#' eml_validate(doc) #' eml_nsf_to_project <- function(awards){ + + stopifnot(is.character(awards)) + award_nums <- awards - result_o <- lapply(award_nums, function(x){ + result <- lapply(award_nums, function(x){ url <- paste0("https://api.nsf.gov/services/v1/awards.json?id=", x ,"&printFields=coPDPI,pdPIName,title") t <- fromJSON(url) - }) - result <- list() - for (i in 1:length(result_o)){ - if ("serviceNotification" %in% names(result_o[[i]]$response)) { - warning(paste(result_o[[i]]$response$serviceNotification$notificationType, "for award", award_nums[i], "\n this award will not be included in the project section.")) - result[[i]] <- NULL - award_nums[i] <- NA + if ("serviceNotification" %in% names(t$response)) { + warning(paste(t$response$serviceNotification$notificationType, "for award", x , "\n this award will not be included in the project section.")) + t <- NULL } - else if (length(result_o[[i]]$response$award) == 0){ - warning(paste("Empty result for award", award_nums[i])) - result[[i]] <- NULL + else if (length(t$response$award) == 0){ + warning(paste("Empty result for award", x, "\n this award will not be included in the project section.")) + t <- NULL } - else {result[[i]] <- result_o[[i]]} - } + else t + }) + - award_nums <- subset(award_nums, !is.na(award_nums)) + i <- lapply(result, function(x) {!is.null(x)}) + result <- result[unlist(i)] + award_nums <- award_nums[unlist(i)] if (length(award_nums) > 0){ co_pis <- lapply(result, function(x){ From 069399699f02cc65f1f90998f3d81d4e7a9a049f Mon Sep 17 00:00:00 2001 From: Jeanette Date: Fri, 15 Nov 2019 08:42:21 -0800 Subject: [PATCH 302/318] add tests for nsf to project function --- tests/testthat/test_eml.R | 60 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 31ed456..db7e5e4 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -303,3 +303,63 @@ test_that('reorder_pids fails gracefully', { expect_error(reorder_pids(pid_list, doc)) }) + +test_that('eml_nsf_to_project generates a valid project section', { + + # for a single award + awards <- "1203146" + proj <- eml_nsf_to_project(awards) + + me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) + + doc <- list(packageId = "id", system = "system", + dataset = list(title = "A Mimimal Valid EML Dataset", + creator = me, + contact = me)) + + doc$dataset$project <- proj + + expect_true(eml_validate(doc)) + + # for multiple awards + awards <- c("1203146", "1203473", "1603116") + + proj <- eml_nsf_to_project(awards) + + me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) + + doc <- list(packageId = "id", system = "system", + dataset = list(title = "A Mimimal Valid EML Dataset", + creator = me, + contact = me)) + + doc$dataset$project <- proj + + expect_true(eml_validate(doc)) + +}) + +test_that('eml_nsf_to_project handles bad funding numbers gracefully', { + + awards <- c("abcdef", "1203473", "12345") + + expect_warning(proj <- eml_nsf_to_project(awards), "this award will not be included in the project section") + + me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) + + doc <- list(packageId = "id", system = "system", + dataset = list(title = "A Mimimal Valid EML Dataset", + creator = me, + contact = me)) + + doc$dataset$project <- proj + + expect_true(eml_validate(doc)) +}) + +test_that('eml_nsf_to_project fails gracefully', { + + awards <- c("abcdef", "12345") + + expect_error(proj <- eml_nsf_to_project(awards), "No valid award numbers were found") +}) From 8a01bd67b48d4aec8dee70ee3a91a05b19348bf6 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Fri, 15 Nov 2019 09:14:08 -0800 Subject: [PATCH 303/318] make namespacing more explicit to make the R checks happier --- R/eml.R | 29 +++++++++++++++-------------- tests/testthat/test_eml.R | 2 +- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/R/eml.R b/R/eml.R index 16d11d0..6d024ca 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1019,6 +1019,7 @@ reorder_pids <- function(pid_list, doc){ #' @return project (emld) An EML project section #' #' @export +#' @importFrom rlang .data #' #' @examples #' awards <- c("1203146", "1203473", "1603116") @@ -1034,7 +1035,7 @@ reorder_pids <- function(pid_list, doc){ #' #' doc$dataset$project <- proj #' -#' eml_validate(doc) +#' EML::eml_validate(doc) #' eml_nsf_to_project <- function(awards){ @@ -1045,7 +1046,7 @@ eml_nsf_to_project <- function(awards){ result <- lapply(award_nums, function(x){ url <- paste0("https://api.nsf.gov/services/v1/awards.json?id=", x ,"&printFields=coPDPI,pdPIName,title") - t <- fromJSON(url) + t <- jsonlite::fromJSON(url) if ("serviceNotification" %in% names(t$response)) { warning(paste(t$response$serviceNotification$notificationType, "for award", x , "\n this award will not be included in the project section.")) @@ -1068,30 +1069,30 @@ eml_nsf_to_project <- function(awards){ co_pis <- lapply(result, function(x){ stringi::stri_split_fixed(unlist(x$response$award$coPDPI), pattern = " ", simplify = T) %>% as.data.frame(stringsAsFactors = F) %>% - unite("firstName", V1, V2, sep = " ") %>% - mutate(firstName = trimws(firstName, which = "both")) %>% - rename(lastName = V3) %>% - select(firstName, lastName) + tidyr::unite("firstName", .data$V1, .data$V2, sep = " ") %>% + dplyr::mutate(firstName = trimws(.data$firstName, which = "both")) %>% + dplyr::rename(lastName = .data$V3) %>% + dplyr::select(.data$firstName, .data$lastName) }) co_pis <- do.call("rbind", co_pis) %>% - mutate(role = "coPrincipalInvestigator") + dplyr::mutate(role = "coPrincipalInvestigator") pis <- lapply(result, function(x){ n <- stringi::stri_split_fixed(unlist(x$response$award$pdPIName), pattern = " ", simplify = T) %>% as.data.frame(stringsAsFactors = F) %>% - unite("firstName", V1, V2, sep = " ") %>% - mutate(firstName = trimws(firstName, which = "both")) %>% - rename(lastName = V3) %>% - select(firstName, lastName) + tidyr::unite("firstName", .data$V1, .data$V2, sep = " ") %>% + dplyr::mutate(firstName = trimws(.data$firstName, which = "both")) %>% + dplyr::rename(lastName = .data$V3) %>% + dplyr::select(.data$firstName, .data$lastName) }) pis <- do.call("rbind", pis) %>% - mutate(role = "principalInvestigator") + dplyr::mutate(role = "principalInvestigator") - people <- bind_rows(co_pis, pis) - p_list <- list() + people <- dplyr::bind_rows(co_pis, pis) + p_list <- list() for (i in 1:nrow(people)){ p_list[[i]] <- eml_personnel(given_names = people$firstName[i], sur_name = people$lastName[i], diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index db7e5e4..8d38cfc 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -360,6 +360,6 @@ test_that('eml_nsf_to_project handles bad funding numbers gracefully', { test_that('eml_nsf_to_project fails gracefully', { awards <- c("abcdef", "12345") + expect_error(suppressWarnings(proj <- eml_nsf_to_project(awards)), "No valid award numbers were found") - expect_error(proj <- eml_nsf_to_project(awards), "No valid award numbers were found") }) From b46cf0685d3488dfb868f1b2baff54de45f602a7 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Fri, 15 Nov 2019 09:14:26 -0800 Subject: [PATCH 304/318] update DESCRIPTION, ns, and Rd files --- DESCRIPTION | 5 ++++- NAMESPACE | 2 ++ man/eml_nsf_to_project.Rd | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 man/eml_nsf_to_project.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 6d23b6f..718561a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,19 +24,22 @@ Depends: Imports: dataone, datapack, + dplyr, digest, EML (>= 2.0), httr, + jsonlite, magrittr, methods, + rlang, stringr, stringi, + tidyr, tools, uuid, xml2, XML Suggests: - dplyr, humaniformat, knitr, lubridate, diff --git a/NAMESPACE b/NAMESPACE index 0025af0..db1f841 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ export(eml_contact) export(eml_creator) export(eml_get_simple) export(eml_metadata_provider) +export(eml_nsf_to_project) export(eml_otherEntity_to_dataTable) export(eml_party) export(eml_personnel) @@ -77,6 +78,7 @@ importFrom(methods,as) importFrom(methods,is) importFrom(methods,new) importFrom(methods,slot) +importFrom(rlang,.data) importFrom(stats,na.omit) importFrom(stringr,str_detect) importFrom(utils,URLencode) diff --git a/man/eml_nsf_to_project.Rd b/man/eml_nsf_to_project.Rd new file mode 100644 index 0000000..7e9a12c --- /dev/null +++ b/man/eml_nsf_to_project.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_nsf_to_project} +\alias{eml_nsf_to_project} +\title{Create an EML project section from a list of NSF award numbers} +\usage{ +eml_nsf_to_project(awards) +} +\arguments{ +\item{awards}{(list) A list of NSF award numbers as characters} +} +\value{ +project (emld) An EML project section +} +\description{ +This function takes a list of NSF award numbers and uses it to +query the NSF API to get the award title, PIs, and coPIs. The +return value is an EML project section. The function supports 1 +or more award numbers +} +\examples{ +awards <- c("1203146", "1203473", "1603116") + +proj <- eml_nsf_to_project(awards) + +me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) + +doc <- list(packageId = "id", system = "system", + dataset = list(title = "A Mimimal Valid EML Dataset", + creator = me, + contact = me)) + +doc$dataset$project <- proj + +EML::eml_validate(doc) + +} From d8bed9c9383608f1bb1dedbf5a47d2944d20a95d Mon Sep 17 00:00:00 2001 From: Jeanette Date: Fri, 15 Nov 2019 12:33:21 -0800 Subject: [PATCH 305/318] prepend "NSF" to award numbers for funding section --- R/eml.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/eml.R b/R/eml.R index 6d024ca..14b0c93 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1064,6 +1064,7 @@ eml_nsf_to_project <- function(awards){ i <- lapply(result, function(x) {!is.null(x)}) result <- result[unlist(i)] award_nums <- award_nums[unlist(i)] + award_nums <- paste("NSF", award_nums) if (length(award_nums) > 0){ co_pis <- lapply(result, function(x){ From d8cc7edbe0409cce5256065b947cd868fe4ef080 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Fri, 15 Nov 2019 13:44:23 -0800 Subject: [PATCH 306/318] make the warning output nicer and start clearing a path for EML2.2 support --- R/eml.R | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/R/eml.R b/R/eml.R index 14b0c93..b2737f2 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1037,7 +1037,7 @@ reorder_pids <- function(pid_list, doc){ #' #' EML::eml_validate(doc) #' -eml_nsf_to_project <- function(awards){ +eml_nsf_to_project <- function(awards, eml_version = "2.1"){ stopifnot(is.character(awards)) @@ -1049,22 +1049,20 @@ eml_nsf_to_project <- function(awards){ t <- jsonlite::fromJSON(url) if ("serviceNotification" %in% names(t$response)) { - warning(paste(t$response$serviceNotification$notificationType, "for award", x , "\n this award will not be included in the project section.")) + warning(paste(t$response$serviceNotification$notificationType, "for award", x , "\n this award will not be included in the project section."), call. = FALSE) t <- NULL } else if (length(t$response$award) == 0){ - warning(paste("Empty result for award", x, "\n this award will not be included in the project section.")) + warning(paste("Empty result for award", x, "\n this award will not be included in the project section."), call. = FALSE) t <- NULL } else t }) - i <- lapply(result, function(x) {!is.null(x)}) result <- result[unlist(i)] award_nums <- award_nums[unlist(i)] - award_nums <- paste("NSF", award_nums) if (length(award_nums) > 0){ co_pis <- lapply(result, function(x){ @@ -1104,7 +1102,11 @@ eml_nsf_to_project <- function(awards){ unlist(x$response$award$title) }) - proj <- eml_project(title = titles, personnelList = p_list, funding = award_nums) + if (eml_version %in% c("2.1", "2.1.1")){ + award_nums <- paste("NSF", award_nums) + proj <- eml_project(title = titles, personnelList = p_list, funding = award_nums) + } + } else if (length(award_nums) == 0){ stop(call. = F, From c2f3117c4a0867f83006bf35d42e7fe3511b56c2 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Fri, 15 Nov 2019 14:04:31 -0800 Subject: [PATCH 307/318] add support for EML 2.2 --- R/eml.R | 18 ++++++++++++++++-- tests/testthat/test_eml.R | 23 +++++++++++++++++++++-- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/R/eml.R b/R/eml.R index b2737f2..bf8330e 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1015,7 +1015,7 @@ reorder_pids <- function(pid_list, doc){ #' or more award numbers #' #' @param awards (list) A list of NSF award numbers as characters -#' +#' @param eml_version (char) EML version to use (2.1.1 or 2.2.0) #' @return project (emld) An EML project section #' #' @export @@ -1024,7 +1024,7 @@ reorder_pids <- function(pid_list, doc){ #' @examples #' awards <- c("1203146", "1203473", "1603116") #' -#' proj <- eml_nsf_to_project(awards) +#' proj <- eml_nsf_to_project(awards, eml_version = "2.1.1") #' #' me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) #' @@ -1040,6 +1040,7 @@ reorder_pids <- function(pid_list, doc){ eml_nsf_to_project <- function(awards, eml_version = "2.1"){ stopifnot(is.character(awards)) + stopifnot(eml_version %in% c("2.1", "2.1.0", "2.2", "2.2.0")) award_nums <- awards @@ -1106,6 +1107,19 @@ eml_nsf_to_project <- function(awards, eml_version = "2.1"){ award_nums <- paste("NSF", award_nums) proj <- eml_project(title = titles, personnelList = p_list, funding = award_nums) } + else if (eml_version %in% c("2.2", "2.2.0")){ + awards <- list() + + for (i in 1:length(award_nums)){ + awards[[i]] <- list(title = titles[i], + funderName = "National Science Foundation", + funderIdentifier = "https://doi.org/10.13039/00000001", + awardNumber = award_nums[i], + awardUrl = paste0("https://www.nsf.gov/awardsearch/showAward?AWD_ID=", award_nums[i])) + } + + proj <- list(title = titles, personnel = p_list, award = awards) + } } else if (length(award_nums) == 0){ diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 8d38cfc..89415ee 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -306,7 +306,7 @@ test_that('reorder_pids fails gracefully', { test_that('eml_nsf_to_project generates a valid project section', { - # for a single award + # for a single award, EML 2.1.1 awards <- "1203146" proj <- eml_nsf_to_project(awards) @@ -321,7 +321,7 @@ test_that('eml_nsf_to_project generates a valid project section', { expect_true(eml_validate(doc)) - # for multiple awards + # for multiple awards, EML 2.1.1 awards <- c("1203146", "1203473", "1603116") proj <- eml_nsf_to_project(awards) @@ -337,6 +337,23 @@ test_that('eml_nsf_to_project generates a valid project section', { expect_true(eml_validate(doc)) + # for multiple awards, EML 2.2.0 + awards <- c("1203146", "1203473", "1603116") + + emld::eml_version("eml-2.2.0") + proj <- eml_nsf_to_project(awards, eml_version = "2.2") + + me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) + + doc <- list(packageId = "id", system = "system", + dataset = list(title = "A Mimimal Valid EML Dataset", + creator = me, + contact = me)) + + doc$dataset$project <- proj + + expect_true(eml_validate(doc)) + }) test_that('eml_nsf_to_project handles bad funding numbers gracefully', { @@ -363,3 +380,5 @@ test_that('eml_nsf_to_project fails gracefully', { expect_error(suppressWarnings(proj <- eml_nsf_to_project(awards)), "No valid award numbers were found") }) + + From 7090a129a6f80bdaaf02f4f796836519e31f29fe Mon Sep 17 00:00:00 2001 From: Jeanette Date: Fri, 15 Nov 2019 14:04:49 -0800 Subject: [PATCH 308/318] update description and Rd --- DESCRIPTION | 1 + man/eml_nsf_to_project.Rd | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 718561a..057a534 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,6 +40,7 @@ Imports: xml2, XML Suggests: + emld, humaniformat, knitr, lubridate, diff --git a/man/eml_nsf_to_project.Rd b/man/eml_nsf_to_project.Rd index 7e9a12c..9cd3cc3 100644 --- a/man/eml_nsf_to_project.Rd +++ b/man/eml_nsf_to_project.Rd @@ -4,10 +4,12 @@ \alias{eml_nsf_to_project} \title{Create an EML project section from a list of NSF award numbers} \usage{ -eml_nsf_to_project(awards) +eml_nsf_to_project(awards, eml_version = "2.1") } \arguments{ \item{awards}{(list) A list of NSF award numbers as characters} + +\item{eml_version}{(char) EML version to use (2.1.1 or 2.2.0)} } \value{ project (emld) An EML project section From adc565971702c541109fc0d7900592809a307094 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Fri, 15 Nov 2019 14:48:26 -0800 Subject: [PATCH 309/318] add an eml_version argument check and make the spliting of first and last names less likely to discard information --- DESCRIPTION | 1 - R/eml.R | 117 +++++++++++++++++++------------------- man/eml_nsf_to_project.Rd | 2 +- 3 files changed, 60 insertions(+), 60 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 057a534..f71ae5a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,7 +34,6 @@ Imports: rlang, stringr, stringi, - tidyr, tools, uuid, xml2, diff --git a/R/eml.R b/R/eml.R index bf8330e..cb4ce56 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1040,7 +1040,7 @@ reorder_pids <- function(pid_list, doc){ eml_nsf_to_project <- function(awards, eml_version = "2.1"){ stopifnot(is.character(awards)) - stopifnot(eml_version %in% c("2.1", "2.1.0", "2.2", "2.2.0")) + stopifnot(eml_version %in% c("2.1", "2.1.1", "2.2", "2.2.0")) award_nums <- awards @@ -1060,71 +1060,72 @@ eml_nsf_to_project <- function(awards, eml_version = "2.1"){ else t }) - i <- lapply(result, function(x) {!is.null(x)}) result <- result[unlist(i)] award_nums <- award_nums[unlist(i)] - if (length(award_nums) > 0){ - co_pis <- lapply(result, function(x){ - stringi::stri_split_fixed(unlist(x$response$award$coPDPI), pattern = " ", simplify = T) %>% - as.data.frame(stringsAsFactors = F) %>% - tidyr::unite("firstName", .data$V1, .data$V2, sep = " ") %>% - dplyr::mutate(firstName = trimws(.data$firstName, which = "both")) %>% - dplyr::rename(lastName = .data$V3) %>% - dplyr::select(.data$firstName, .data$lastName) - }) - - co_pis <- do.call("rbind", co_pis) %>% - dplyr::mutate(role = "coPrincipalInvestigator") - - pis <- lapply(result, function(x){ - n <- stringi::stri_split_fixed(unlist(x$response$award$pdPIName), pattern = " ", simplify = T) %>% - as.data.frame(stringsAsFactors = F) %>% - tidyr::unite("firstName", .data$V1, .data$V2, sep = " ") %>% - dplyr::mutate(firstName = trimws(.data$firstName, which = "both")) %>% - dplyr::rename(lastName = .data$V3) %>% - dplyr::select(.data$firstName, .data$lastName) - }) - - pis <- do.call("rbind", pis) %>% - dplyr::mutate(role = "principalInvestigator") - - people <- dplyr::bind_rows(co_pis, pis) - - p_list <- list() - for (i in 1:nrow(people)){ - p_list[[i]] <- eml_personnel(given_names = people$firstName[i], - sur_name = people$lastName[i], - role = people$role[i]) - } + if (length(award_nums) == 0){ + stop(call. = F, + "No valid award numbers were found.") + } + + # create function to extract first name and middle initial (if present) + # as firstName, and whatever else exists as lastName + extract_name <- function(x){ + lapply(x, function(x) { + data.frame( + firstName = trimws(stringr::str_extract(x, "[A-Za-z]{2,}\\s[A-Z]?")), + lastName = trimws(gsub("[A-Za-z]{2,}\\s[A-Z]?", "", x)), + stringsAsFactors = F)}) + } - titles <- lapply(result, function(x){ - unlist(x$response$award$title) - }) + co_pis <- lapply(result, function(x){ + extract_name(x$response$award$coPDPI) + }) - if (eml_version %in% c("2.1", "2.1.1")){ - award_nums <- paste("NSF", award_nums) - proj <- eml_project(title = titles, personnelList = p_list, funding = award_nums) - } - else if (eml_version %in% c("2.2", "2.2.0")){ - awards <- list() - - for (i in 1:length(award_nums)){ - awards[[i]] <- list(title = titles[i], - funderName = "National Science Foundation", - funderIdentifier = "https://doi.org/10.13039/00000001", - awardNumber = award_nums[i], - awardUrl = paste0("https://www.nsf.gov/awardsearch/showAward?AWD_ID=", award_nums[i])) - } - - proj <- list(title = titles, personnel = p_list, award = awards) - } + co_pis <- unlist(co_pis, recursive = F) + co_pis <- do.call("rbind", co_pis) %>% + dplyr::mutate(role = "coPrincipalInvestigator") + + pis <- lapply(result, function(x){ + extract_name(x$response$award$pdPIName) + }) + pis <- unlist(pis, recursive = F) + pis <- do.call("rbind", pis) %>% + dplyr::mutate(role = "principalInvestigator") + + people <- dplyr::bind_rows(co_pis, pis) + + p_list <- list() + for (i in 1:nrow(people)){ + p_list[[i]] <- eml_personnel(given_names = people$firstName[i], + sur_name = people$lastName[i], + role = people$role[i]) } - else if (length(award_nums) == 0){ - stop(call. = F, - "No valid award numbers were found.") + + titles <- lapply(result, function(x){ + unlist(x$response$award$title) + }) + + if (eml_version %in% c("2.1", "2.1.1")){ + award_nums <- paste("NSF", award_nums) + proj <- eml_project(title = titles, personnelList = p_list, funding = award_nums) + } + else if (eml_version %in% c("2.2", "2.2.0")){ + awards <- list() + + for (i in 1:length(award_nums)){ + awards[[i]] <- list(title = titles[i], + funderName = "National Science Foundation", + funderIdentifier = "https://doi.org/10.13039/00000001", + awardNumber = award_nums[i], + awardUrl = paste0("https://www.nsf.gov/awardsearch/showAward?AWD_ID=", award_nums[i])) + } + + proj <- list(title = titles, personnel = p_list, award = awards) } + + } diff --git a/man/eml_nsf_to_project.Rd b/man/eml_nsf_to_project.Rd index 9cd3cc3..eb40f3a 100644 --- a/man/eml_nsf_to_project.Rd +++ b/man/eml_nsf_to_project.Rd @@ -23,7 +23,7 @@ or more award numbers \examples{ awards <- c("1203146", "1203473", "1603116") -proj <- eml_nsf_to_project(awards) +proj <- eml_nsf_to_project(awards, eml_version = "2.1.1") me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) From 5e4ecd56ad26ac48629678e542edacc98ea4d51f Mon Sep 17 00:00:00 2001 From: Jeanette Date: Wed, 20 Nov 2019 11:05:19 -0800 Subject: [PATCH 310/318] remove a dependency we don't need anymore --- DESCRIPTION | 1 - R/eml.R | 1 - 2 files changed, 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f71ae5a..5057935 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,7 +31,6 @@ Imports: jsonlite, magrittr, methods, - rlang, stringr, stringi, tools, diff --git a/R/eml.R b/R/eml.R index cb4ce56..ec8d4c8 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1019,7 +1019,6 @@ reorder_pids <- function(pid_list, doc){ #' @return project (emld) An EML project section #' #' @export -#' @importFrom rlang .data #' #' @examples #' awards <- c("1203146", "1203473", "1603116") From a32850f1c21f907ce4049ca13d09bead2ca84364 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Wed, 20 Nov 2019 11:17:58 -0800 Subject: [PATCH 311/318] update namespace file removing rlang dep --- NAMESPACE | 1 - 1 file changed, 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index db1f841..f0386ae 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -78,7 +78,6 @@ importFrom(methods,as) importFrom(methods,is) importFrom(methods,new) importFrom(methods,slot) -importFrom(rlang,.data) importFrom(stats,na.omit) importFrom(stringr,str_detect) importFrom(utils,URLencode) From 65ec9b5bf0c070366a7bb614cfebd5eebf2e78e8 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Wed, 20 Nov 2019 12:52:50 -0800 Subject: [PATCH 312/318] incorporate some of Dom's suggestions --- R/eml.R | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/R/eml.R b/R/eml.R index ec8d4c8..8667687 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1068,16 +1068,6 @@ eml_nsf_to_project <- function(awards, eml_version = "2.1"){ "No valid award numbers were found.") } - # create function to extract first name and middle initial (if present) - # as firstName, and whatever else exists as lastName - extract_name <- function(x){ - lapply(x, function(x) { - data.frame( - firstName = trimws(stringr::str_extract(x, "[A-Za-z]{2,}\\s[A-Z]?")), - lastName = trimws(gsub("[A-Za-z]{2,}\\s[A-Z]?", "", x)), - stringsAsFactors = F)}) - } - co_pis <- lapply(result, function(x){ extract_name(x$response$award$coPDPI) }) @@ -1123,8 +1113,18 @@ eml_nsf_to_project <- function(awards, eml_version = "2.1"){ } proj <- list(title = titles, personnel = p_list, award = awards) + return(proj) } +} - - +# Extract first and last name from NSF API results +# +# The NSF API jams the first name, last name, and middle initial if it exists into a single string. +# This simple helper uses some regex to split the names up. +extract_name <- function(x){ + lapply(x, function(x) { + data.frame( + firstName = trimws(stringr::str_extract(x, "[A-Za-z]{2,}\\s[A-Z]?")), + lastName = trimws(gsub("[A-Za-z]{2,}\\s[A-Z]?", "", x)), + stringsAsFactors = F)}) } From 4568c5239d44a895e38c1a350760d21833e8392c Mon Sep 17 00:00:00 2001 From: Jeanette Date: Thu, 21 Nov 2019 10:26:03 -0800 Subject: [PATCH 313/318] fix bug if co-pis is empty --- R/eml.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/eml.R b/R/eml.R index 8667687..2ebc44a 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1073,8 +1073,10 @@ eml_nsf_to_project <- function(awards, eml_version = "2.1"){ }) co_pis <- unlist(co_pis, recursive = F) - co_pis <- do.call("rbind", co_pis) %>% - dplyr::mutate(role = "coPrincipalInvestigator") + co_pis <- do.call("rbind", co_pis) + if (!is.null(co_pis)){ + co_pis$role <- "coPrincipalInvestigator" + } pis <- lapply(result, function(x){ extract_name(x$response$award$pdPIName) From 4cec0702df025ffe67d546e07d3770752d39d722 Mon Sep 17 00:00:00 2001 From: Jeanette Date: Tue, 26 Nov 2019 10:23:50 -0800 Subject: [PATCH 314/318] documenting new keep_prov argument --- R/editing.R | 2 ++ man/publish_update.Rd | 3 ++- man/update_resource_map.Rd | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/R/editing.R b/R/editing.R index b66a9ca..2fd0e29 100644 --- a/R/editing.R +++ b/R/editing.R @@ -255,6 +255,7 @@ update_object <- function(mn, pid, path, format_id = NULL, new_pid = NULL, sid = #' Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements. #' @param format_id (character) Optional. When omitted, the updated object will have the same formatId as `metadata_pid`. If set, will attempt #' to use the value instead. +#' @param keep_prov (logical) Option to force publish_update to keep prov #' #' @return (character) Named character vector of PIDs in the data package, including PIDs for the metadata, resource map, and data objects. #' @@ -689,6 +690,7 @@ create_resource_map <- function(mn, #' @param resource_map_pid (character) The PID of the resource map to be updated. #' @param other_statements (data.frame) Extra statements to add to the resource map. #' @param identifier (character) Manually specify the identifier for the new metadata object. +#' @param keep_prov (character) Option to force prov to be forwarded into new resource map #' #' @return (character) The PID of the updated resource map. #' diff --git a/man/publish_update.Rd b/man/publish_update.Rd index 22f1148..fe1d1d2 100644 --- a/man/publish_update.Rd +++ b/man/publish_update.Rd @@ -49,7 +49,8 @@ access policies are not affected.} Checks that objects exist and are of the right format type. This speeds up the function, especially when \code{data_pids} has many elements.} \item{format_id}{(character) Optional. When omitted, the updated object will have the same formatId as \code{metadata_pid}. If set, will attempt -to use the value instead.} +to use the value instead. +@param keep_prov (logical) Option to force publish_update to keep prov} } \value{ (character) Named character vector of PIDs in the data package, including PIDs for the metadata, resource map, and data objects. diff --git a/man/update_resource_map.Rd b/man/update_resource_map.Rd index 61c2a25..e65bc95 100644 --- a/man/update_resource_map.Rd +++ b/man/update_resource_map.Rd @@ -29,6 +29,8 @@ nested under the package.} \item{check_first}{(logical) Optional. Whether to check the PIDs passed in as arguments exist on the MN before continuing. This speeds up the function, especially when \code{data_pids} has many elements.} + +\item{keep_prov}{(character) Option to force prov to be forwarded into new resource map} } \value{ (character) The PID of the updated resource map. From d9a86137decde08a5028c4ff8b91b03031049d16 Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Wed, 11 Dec 2019 14:20:05 -0800 Subject: [PATCH 315/318] return eml_validate check in eml_otherEntity_to_dataTable --- R/eml.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/eml.R b/R/eml.R index 2ebc44a..e8d806d 100644 --- a/R/eml.R +++ b/R/eml.R @@ -755,7 +755,10 @@ eml_otherEntity_to_dataTable <- function(doc, index, validate_eml = TRUE) { ## return eml if (validate_eml == TRUE) { - eml_validate(doc) + valid_eml <- eml_validate(doc) + if (!valid_eml) { + return(attributes(valid_eml)) + } } return(doc) } From deb06b9439b3b2b6ec8fee003bf79b7c549e923b Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Wed, 11 Dec 2019 16:37:15 -0800 Subject: [PATCH 316/318] updated eml_otherEntity_to_dataTable to handle otherEnt lists of length 1 --- R/eml.R | 21 +++++++++------------ R/helpers.R | 1 + tests/testthat/test_eml.R | 5 +++-- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/R/eml.R b/R/eml.R index e8d806d..9afd6be 100644 --- a/R/eml.R +++ b/R/eml.R @@ -712,17 +712,17 @@ eml_otherEntity_to_dataTable <- function(doc, index, validate_eml = TRUE) { ## set OE entityTypes to NULL and select the ones we want to use - if (length(eml_get_simple(doc$dataset$otherEntity, "entityName")) == 1){ + if (length(eml_get_simple(doc$dataset$otherEntity, "entityName")) == 1) { ## prepare OE to copy otherEntity <- doc$dataset$otherEntity + ## If there are copies of entities from the editor then otherEnt can incorrectly be a list + if (is.null(names(otherEntity))) { + otherEntity <- otherEntity[[1]] + } otherEntity$entityType <- NULL - otherEntity <- list(otherEntity) ## delete otherEntity from list doc$dataset$otherEntity <- NULL - } - - - else { + } else { otherEntity <- doc$dataset$otherEntity[index] for (i in 1:length(index)){ @@ -738,8 +738,7 @@ eml_otherEntity_to_dataTable <- function(doc, index, validate_eml = TRUE) { ## handle various datatable length cases if (length(dts) == 0){ doc$dataset$dataTable <- otherEntity - } - else{ + } else{ if (length(eml_get_simple(dts, "entityName")) == 1){ dts <- list(dts) doc$dataset$dataTable <- c(dts, otherEntity) @@ -750,16 +749,14 @@ eml_otherEntity_to_dataTable <- function(doc, index, validate_eml = TRUE) { } } - - - ## return eml if (validate_eml == TRUE) { valid_eml <- eml_validate(doc) if (!valid_eml) { - return(attributes(valid_eml)) + stop(attributes(valid_eml)) } } + return(doc) } diff --git a/R/helpers.R b/R/helpers.R index dd5e782..2ab4c77 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -273,6 +273,7 @@ create_dummy_parent_package <- function(mn, children) { #' } create_dummy_attributes_dataframe <- function(numberAttributes, factors = NULL) { names <- vapply(seq_len(numberAttributes), function(x) { paste0("Attribute ", x)}, "") + domains <- rep("textDomain", numberAttributes) if(!is.null(factors)) { domains <- c(rep("textDomain", numberAttributes - length(factors)), diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 89415ee..064f257 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -134,6 +134,7 @@ test_that("eml_otherEntity_to_dataTable works", { } doc <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) + doc$dataset$otherEntity$attributeList <- EML::set_attributes(create_dummy_attributes_dataframe(1)) otherEntity <- doc$dataset$otherEntity doc <- eml_otherEntity_to_dataTable(doc, 1) @@ -142,8 +143,8 @@ test_that("eml_otherEntity_to_dataTable works", { expect_length(doc$dataset$otherEntity, 0) # test that dataTable was added - expect_equal(otherEntity$entityName, doc$dataset$dataTable[[1]]$entityName) - expect_equivalent(otherEntity$physical, doc$dataset$dataTable[[1]]$physical) + expect_equal(otherEntity$entityName, doc$dataset$dataTable$entityName) + expect_equivalent(otherEntity$physical, doc$dataset$dataTable$physical) }) test_that("which_in_eml returns correct locations", { From c045895bfd614531ed13646fc5bf69107848b16c Mon Sep 17 00:00:00 2001 From: Dominic Mullen Date: Tue, 14 Jan 2020 14:01:18 -0800 Subject: [PATCH 317/318] Update R/eml.R Co-Authored-By: Jeanette Clark --- R/eml.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/eml.R b/R/eml.R index 9afd6be..01fdcc7 100644 --- a/R/eml.R +++ b/R/eml.R @@ -715,7 +715,7 @@ eml_otherEntity_to_dataTable <- function(doc, index, validate_eml = TRUE) { if (length(eml_get_simple(doc$dataset$otherEntity, "entityName")) == 1) { ## prepare OE to copy otherEntity <- doc$dataset$otherEntity - ## If there are copies of entities from the editor then otherEnt can incorrectly be a list + ## Handle case where otherEntity is in a list of length 1 (boxed) if (is.null(names(otherEntity))) { otherEntity <- otherEntity[[1]] } From 3282e33f89ac269a367855d2b286a19ebe1ad056 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Tue, 14 Jan 2020 14:31:52 -0800 Subject: [PATCH 318/318] remove duplicate personnel often the same personnel are listed on multiple grants, this prevents them from being listed multiple times in the EML project section --- R/eml.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/eml.R b/R/eml.R index 01fdcc7..2e746e0 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1086,7 +1086,8 @@ eml_nsf_to_project <- function(awards, eml_version = "2.1"){ pis <- do.call("rbind", pis) %>% dplyr::mutate(role = "principalInvestigator") - people <- dplyr::bind_rows(co_pis, pis) + people <- dplyr::bind_rows(co_pis, pis) %>% + dplyr::distinct() p_list <- list() for (i in 1:nrow(people)){