Skip to content

Commit

Permalink
Remove handling of mixed atomic/data.table columns
Browse files Browse the repository at this point in the history
  • Loading branch information
William Dearden committed Mar 13, 2018
1 parent ada870c commit f9b4516
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 34 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ importFrom(parallel,makeForkCluster)
importFrom(parallel,makePSOCKcluster)
importFrom(parallel,stopCluster)
importFrom(purrr,map2)
importFrom(purrr,map_if)
importFrom(purrr,map_int)
importFrom(purrr,map_lgl)
importFrom(purrr,simplify)
importFrom(purrr,transpose)
importFrom(stringr,str_detect)
Expand Down
40 changes: 17 additions & 23 deletions R/elasticsearch_parsers.R
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ chomp_aggs <- function(aggs_json = NULL) {
#' This is a side-effect-free function: it returns a new data.table and the
#' input data.table is unmodified.
#' @importFrom data.table copy as.data.table rbindlist setnames
#' @importFrom purrr map_if map_lgl map_int
#' @export
#' @param chomped_df a data.table
#' @param col_to_unpack a character vector of length one: the column name to
Expand Down Expand Up @@ -390,60 +391,53 @@ unpack_nested_data <- function(chomped_df, col_to_unpack) {
log_fatal(msg)
}

outDT <- data.table::copy(chomped_df)
listDT <- outDT[[col_to_unpack]]
listDT <- chomped_df[[col_to_unpack]]

# Check for empty column
if (all(lengths(listDT) == 0)) {
if (all(purrr::map_int(listDT, NROW) == 0)) {
msg <- "The column given to unpack_nested_data had no data in it."
log_fatal(msg)
}

listDT[lengths(listDT) == 0] <- NA

is_df <- purrr::map_lgl(listDT, is.data.frame)
is_list <- purrr::map_lgl(listDT, is.list)
is_atomic <- purrr::map_lgl(listDT, is.atomic)
is_na <- is.na(listDT)

# Bind packed column into one data.table
if (all(is_atomic)) {
newDT <- data.table::as.data.table(unlist(listDT))
} else if (all(is_df | is_atomic)) {
# If the packed column contains a mixture of data tables, we need to
# to convert the atomic vectors to data.tables

# Find column name to use for NA vectors
} else if (all(is_df | is_list | is_na)) {
# Find name to use for NA columns
first_df <- min(which(is_df))
col_name <- names(listDT[[first_df]])[1]

# Convert non data.frame rows to data.table and assign name to rows
# with no name
prep_row <- function(x) {
if (is.atomic(x)) {
x <- data.table::as.data.table(x)
if (is.na(x)) names(x) <- col_name
else names(x) <- col_to_unpack
}

.prep_na_row <- function(x, col_name) {
x <- data.table::as.data.table(x)
names(x) <- col_name
x
}
newDT <- purrr::map(listDT, prep_row)


# If the packed column contains data.tables, we use rbindlist
newDT <- purrr::map_if(listDT, is_na, .prep_na_row, col_name = col_name)
newDT <- data.table::rbindlist(newDT, fill = TRUE)
} else {
msg <- paste0("Each row in column ", col_to_unpack, " must be a data frame or a vector.")
futile.logger::flog.fatal(msg)
stop(msg)
log_fatal(msg)
}

# Create the unpacked data.table by replicating the originally unpacked
# columns by the number of rows in each entry in the original unpacked column
# We don't use newDT because it doesn't have the original row lengths
times_to_replicate <- pmax(purrr::map_int(listDT, NROW), 1)
# Replicate the rows of the data.table by entries of times_to_replicate but drop col_to_unpack
replicatedDT <- chomped_df[rep(1:nrow(chomped_df), times_to_replicate)]
replicatedDT[, col_to_unpack] <- NULL

# Then bind the replicated columns with the unpacked column
outDT <- data.table::data.table(newDT, replicatedDT)

if ("V1" %in% names(outDT)) {
data.table::setnames(outDT, "V1", col_to_unpack)
}
Expand Down
11 changes: 0 additions & 11 deletions tests/testthat/test-elasticsearch_parsers.R
Original file line number Diff line number Diff line change
Expand Up @@ -934,17 +934,6 @@ futile.logger::flog.threshold(0)
expect_equal(unpack_nested_data(DT2, col_to_unpack = "y"), unpackedDT)
})

test_that("unpack_nested_data should handle mixed atomic/data frame column", {
DT <- data.table::data.table(x = 1:2, y = list(1, data.table(w = 5:6, z = 7:8)))
unpackedDT <- data.table::data.table(
y = c(1, NA, NA)
, w = c(NA, 5, 6)
, z = c(NA, 7, 8)
, x = c(1, 2, 2)
)
expect_equal(unpack_nested_data(DT, col_to_unpack = "y"), unpackedDT)
})

#---- 5. .ConvertToSec

# .ConvertToSec should work for seconds
Expand Down

0 comments on commit f9b4516

Please sign in to comment.