From f954a805538fa7145457da03f3ef438728f39de2 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 30 Jun 2022 23:51:54 +0200 Subject: [PATCH 001/141] Update Rd files --- DESCRIPTION | 2 +- man/euCodedFileHeader.Rd | 10 ++++++++-- man/readEuCodedFile.Rd | 11 ++++++++--- man/readEuCodedFiles.Rd | 3 +-- man/valuesToCsv.Rd | 10 ++++++++-- man/writeEuCodedFile.Rd | 9 +++++++-- 6 files changed, 33 insertions(+), 12 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c4082cc..1719359 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,4 +23,4 @@ Suggests: Remotes: github::kwb-r/kwb.utils Encoding: UTF-8 -RoxygenNote: 6.1.1 +RoxygenNote: 7.1.2 diff --git a/man/euCodedFileHeader.Rd b/man/euCodedFileHeader.Rd index bcf555b..3c68b83 100644 --- a/man/euCodedFileHeader.Rd +++ b/man/euCodedFileHeader.Rd @@ -4,8 +4,14 @@ \alias{euCodedFileHeader} \title{Generate List With EU Header Information} \usage{ -euCodedFileHeader(separator = ";", decimal = ".", quote = "\\"", - encoding = "ISO-8859-1", language = "en", year = 2010) +euCodedFileHeader( + separator = ";", + decimal = ".", + quote = "\\"", + encoding = "ISO-8859-1", + language = "en", + year = 2010 +) } \arguments{ \item{separator}{default: ";"} diff --git a/man/readEuCodedFile.Rd b/man/readEuCodedFile.Rd index 1255169..0c7411b 100644 --- a/man/readEuCodedFile.Rd +++ b/man/readEuCodedFile.Rd @@ -4,9 +4,14 @@ \alias{readEuCodedFile} \title{Read CCTV Inspection Data in EN13508-2 Format} \usage{ -readEuCodedFile(input.file, encoding = "latin1", - read.inspections = TRUE, simple.algorithm = TRUE, warn = TRUE, - dbg = TRUE) +readEuCodedFile( + input.file, + encoding = "latin1", + read.inspections = TRUE, + simple.algorithm = TRUE, + warn = TRUE, + dbg = TRUE +) } \arguments{ \item{input.file}{full path to text file containing CCTV inspection results diff --git a/man/readEuCodedFiles.Rd b/man/readEuCodedFiles.Rd index 803992d..31c71d0 100644 --- a/man/readEuCodedFiles.Rd +++ b/man/readEuCodedFiles.Rd @@ -4,8 +4,7 @@ \alias{readEuCodedFiles} \title{Read Multiple CCTV Inspection Files} \usage{ -readEuCodedFiles(input.files, dbg = TRUE, append.file.names = TRUE, - ...) +readEuCodedFiles(input.files, dbg = TRUE, append.file.names = TRUE, ...) } \arguments{ \item{input.files}{vector of character paths to input files} diff --git a/man/valuesToCsv.Rd b/man/valuesToCsv.Rd index 34e0ab7..6160966 100644 --- a/man/valuesToCsv.Rd +++ b/man/valuesToCsv.Rd @@ -4,8 +4,14 @@ \alias{valuesToCsv} \title{Values to CSV} \usage{ -valuesToCsv(x, dec = ".", sep = ",", na = "", qchar = "\\"", - qmethod = c("double", "escape")[1]) +valuesToCsv( + x, + dec = ".", + sep = ",", + na = "", + qchar = "\\"", + qmethod = c("double", "escape")[1] +) } \arguments{ \item{x}{vector of values representing a row in a CSV file} diff --git a/man/writeEuCodedFile.Rd b/man/writeEuCodedFile.Rd index 9f53288..642a019 100644 --- a/man/writeEuCodedFile.Rd +++ b/man/writeEuCodedFile.Rd @@ -4,8 +4,13 @@ \alias{writeEuCodedFile} \title{Write Inspection Data to File in EU Format} \usage{ -writeEuCodedFile(inspection.data, output.file = NULL, version = 3, - dbg = TRUE, ...) +writeEuCodedFile( + inspection.data, + output.file = NULL, + version = 3, + dbg = TRUE, + ... +) } \arguments{ \item{inspection.data}{inspection data as retrieved by e.g. From a9b3062d7ab3b6422808c31ace68302249005167 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 30 Jun 2022 23:52:50 +0200 Subject: [PATCH 002/141] Use non-deprecated function --- R/import.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/import.R b/R/import.R index 7a3b986..1fc746e 100644 --- a/R/import.R +++ b/R/import.R @@ -42,7 +42,6 @@ readEuCodedFiles <- function( filename <- basename(input.file) if (append.file.names) { - inspectionData$inspections$file <- filename } @@ -62,7 +61,7 @@ readEuCodedFiles <- function( } # Create valid list element names - elements <- kwb.utils::hsSubstSpecChars(basename(input.files)) + elements <- kwb.utils::substSpecialChars(basename(input.files)) # Prepend an "x" to element names that start with a digit starts_with_digit <- grepl("^[0-9_]", elements) From 9ec7ecadbcb5f609ce88deb3637a97999fec28ae Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 1 Jul 2022 10:04:40 +0200 Subject: [PATCH 003/141] Cosmetics, let if () set variable to NULL --- R/import.R | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/R/import.R b/R/import.R index 1fc746e..9a373e2 100644 --- a/R/import.R +++ b/R/import.R @@ -117,7 +117,9 @@ readEuCodedFile <- function( removeEmptyLines(eu_lines) ) - header.info <- kwb.utils::catAndRun(dbg = dbg, "Extracting file header", { + header.info <- kwb.utils::catAndRun( + dbg = dbg, + "Extracting file header", { getHeaderInfoFromHeaderLines( header.lines = getHeaderLinesFromEuCodedLines(eu_lines), warn = warn @@ -128,12 +130,9 @@ readEuCodedFile <- function( if (read.inspections) { - inspections <- NULL - - if (simple.algorithm) { - - inspections <- getInspectionsFromEuLines(eu_lines, header.info) - } + inspections <- if (simple.algorithm) { + getInspectionsFromEuLines(eu_lines, header.info) + } # else NULL # If the inspections could not be read with the simple algorithm (due to # changing header rows) or if the user requests it, try it again with @@ -154,7 +153,7 @@ readEuCodedFile <- function( warning( "I (yet) cannot read the inspection data (#B-blocks). ", "So I just returned the number of inspections instead of a ", - "data frame with all informatin on the inspection!" + "data frame with all information on the inspection!" ) inspections <- length(grep("^#B01", eu_lines)) From 75ba01759ed52f81c9de7d61619a99f1fd917052 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 1 Jul 2022 10:10:34 +0200 Subject: [PATCH 004/141] Add new version of getInspectionHeaderInfo() using kwb.utils::extractSubstring() --- R/importInspections.R | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/R/importInspections.R b/R/importInspections.R index 9b249c3..dc5fcc1 100644 --- a/R/importInspections.R +++ b/R/importInspections.R @@ -54,6 +54,37 @@ getInspectionHeaderInfo <- function(eu_lines) stats::setNames(header_rows, unique_headers) } +getInspectionHeaderInfo2 <- function(eu_lines) +{ + # Get list of matching sub expressions + matches <- kwb.utils::extractSubstring("^#B(\\d\\d)=(.*)$", eu_lines, c( + number = 1L, fields = 2L + )) + + matches$row <- seq_along(eu_lines) + + # Indices of header lines + header_indices <- which(!kwb.utils::isNaOrEmpty(matches$fields)) + + # Number of header (#B01 = 1, #B02 = 2) + header_numbers <- as.integer(matches$number[header_indices]) + + # Only the header (right of equal sign) + header_lines <- matches$fields[header_indices] + + unique_headers <- unique(header_lines) + + # For each different type of header, determine the line numbers in which it + # occurs + header_rows <- lapply(unique_headers, function(header) { + indices <- which(header_lines == header) + stopifnot(kwb.utils::allAreEqual(header_numbers[indices])) + list(line = header_numbers[indices[1L]], rows = header_indices[indices]) + }) + + stats::setNames(header_rows, unique_headers) +} + # extractInspectionBlocks ------------------------------------------------------ extractInspectionBlocks <- function( From a1087daef1e54bc8c7d2e63d1fc41a0f61920722 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 1 Jul 2022 10:50:53 +0200 Subject: [PATCH 005/141] Extract readObservationsFromCsvText() --- R/importObservations.R | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/R/importObservations.R b/R/importObservations.R index e7bb87b..c2c0da9 100644 --- a/R/importObservations.R +++ b/R/importObservations.R @@ -127,10 +127,12 @@ get_observations <- function(caption_line, c_body, header_info) # to NA, otherwise let read.table skip the unknown columns colClasses <- if (all(is_null)) NA else unname(colClasses) - observations <- utils::read.table( - text = paste(c_body, collapse = "\n"), sep = arguments$sep, - dec = arguments$dec, quote = arguments$quote, comment.char = "", - blank.lines.skip = FALSE, stringsAsFactors = FALSE, colClasses = colClasses + observations <- readObservationsFromCsvText( + text = paste(c_body, collapse = "\n"), + sep = arguments$sep, + dec = arguments$dec, + quote = arguments$quote, + colClasses = colClasses ) # Set the column names to the captions @@ -139,6 +141,22 @@ get_observations <- function(caption_line, c_body, header_info) observations } +# readObservationsFromCsvText -------------------------------------------------- +readObservationsFromCsvText <- function(text, sep, dec, quote, colClasses, ...) +{ + utils::read.table( + text = text, + sep = sep, + dec = dec, + quote = quote, + comment.char = "", + blank.lines.skip = FALSE, + stringsAsFactors = FALSE, + colClasses = colClasses, + ... + ) +} + # getInspectionNumbers --------------------------------------------------------- getInspectionNumbers <- function(indices.C, indices.B01, indices.B, indices.Z) From 9702b3f7da80213bdff6d75ff96cd2a25e14101d Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 1 Jul 2022 10:52:00 +0200 Subject: [PATCH 006/141] Use new function to get read inspection headers --- R/importInspections.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/importInspections.R b/R/importInspections.R index dc5fcc1..ce70e18 100644 --- a/R/importInspections.R +++ b/R/importInspections.R @@ -1,10 +1,12 @@ # getInspectionsFromEuLines.new ------------------------------------------------ -getInspectionsFromEuLines.new <- function(eu_lines, header.info, dbg = TRUE) +getInspectionsFromEuLines.new <- function( + eu_lines, header.info, dbg = TRUE, getInfo = getInspectionHeaderInfo2 +) { x <- mergeInspectionBlocks(extractInspectionBlocks( eu_lines = eu_lines, - headerInfos = getInspectionHeaderInfo(eu_lines), + headerInfos = getInfo(eu_lines), sep = get_elements(header.info, "separator"), dec = get_elements(header.info, "decimal"), quoteCharacter = get_elements(header.info, "quote"), From 9bcf146dd0793572748ff3f86646534554afd849 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 1 Jul 2022 10:53:08 +0200 Subject: [PATCH 007/141] Use new function to read observation data if the original function getObservationsFromEuLines() fails --- R/extractObservationData.R | 81 +++++++++++++++++++++++++++++++++ R/getHeaderInfo.R | 48 +++++++++++++++++++ R/import.R | 10 +++- man/extractObservationBlocks.Rd | 23 ++++++++++ man/extractObservationData.Rd | 23 ++++++++++ 5 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 R/extractObservationData.R create mode 100644 R/getHeaderInfo.R create mode 100644 man/extractObservationBlocks.Rd create mode 100644 man/extractObservationData.Rd diff --git a/R/extractObservationData.R b/R/extractObservationData.R new file mode 100644 index 0000000..c3ed799 --- /dev/null +++ b/R/extractObservationData.R @@ -0,0 +1,81 @@ +#' Extract Observations from EN13508.2-coded file +#' +#' @param euLines text lines read from EN13508.2-coded file +#' @param headerInfo data frame with information about header lines +#' @param header.info list as returned by +#' \code{kwb.en13508.2:::getHeaderInfoFromHeaderLines} +#' @return data frame with columns \code{A}, \code{B}, \code{C}, ... as defined +#' in EN13508.2 and a column \code{inspno} referring to the inspection number. +extractObservationData <- function(euLines, headerInfo, header.info) +{ + kwb.utils::checkForMissingColumns(headerInfo, c("uniqueKey", "type", "value")) + + uniqueKeys <- unique(headerInfo[["uniqueKey"]][headerInfo[["type"]] == "C"]) + + dataBlocks <- lapply(uniqueKeys, function(uniqueKey) { + + blocks <- extractObservationBlocks(euLines, headerInfo, uniqueKey) + + rowsWithKey <- which(headerInfo[["uniqueKey"]] == uniqueKey) + + captionLine <- headerInfo[["value"]][rowsWithKey][1L] + + text <- c(captionLine, do.call(c, blocks)) + + blockLengths <- lengths(blocks) + + stopifnot(length(text) == sum(blockLengths) + 1L) + + #result <- read.table(text = text, sep = ";", header = TRUE) + + result <- readObservationsFromCsvText( + text = text, + sep = header.info$separator, + dec = header.info$decimal, + quote = header.info$quote, + colClasses = NA, + header = TRUE + ) + + result$inspno <- rep(headerInfo$inspno[rowsWithKey], blockLengths) + + result + }) + + inspectionData <- kwb.utils::safeRowBindAll(dataBlocks) + + inspectionData <- inspectionData[, order(names(inspectionData))] + + inspectionData <- kwb.utils::orderBy(inspectionData, c("inspno", "I")) + + kwb.utils::moveColumnsToFront(inspectionData, "inspno") +} + +#' Extract Lines Between #C-Header and #Z End Tag +#' +#' @param euLines text lines read from EN13508.2-coded file +#' @param headerInfo data frame with information about header lines +#' @param uniqueKey identifier of C-header row, as given in +#' \code{headerInfo$uniqueKey} +#' @return list of vectors of character representing the "body" lines +#' below the #C-headers of type specified in \code{uniqueKey} +extractObservationBlocks <- function(euLines, headerInfo, uniqueKey) +{ + kwb.utils::checkForMissingColumns(headerInfo, c("uniqueKey", "type")) + + keyMatches <- headerInfo[["uniqueKey"]] == uniqueKey + x <- headerInfo[keyMatches | headerInfo[["type"]] == "Z", ] + + changes <- kwb.utils::findChanges(x$type) + + if (changes$value[1L] == "Z") { + changes <- changes[-1L, ] + } + + mapply( + from = x$row[changes$starts_at[changes$value == "C"]] + 1L, + to = x$row[changes$starts_at[changes$value == "Z"]] - 1L, + FUN = function(from, to) euLines[from:to], + SIMPLIFY = FALSE + ) +} diff --git a/R/getHeaderInfo.R b/R/getHeaderInfo.R new file mode 100644 index 0000000..7cf722f --- /dev/null +++ b/R/getHeaderInfo.R @@ -0,0 +1,48 @@ +# getHeaderInfo ---------------------------------------------------------------- +getHeaderInfo <- function(euLines) +{ + pattern <- paste0("^#", c("A", "B", "C", "Z"), collapse = "|") + + headerIndices <- grep(pattern, euLines) + + headerLines <- euLines[headerIndices] + + keyValue = strsplit(headerLines, "=") + + keys <- sapply(keyValue, "[", 1L) + + values <- character(length(keys)) + hasValue <- lengths(keyValue) > 1L + values[hasValue] <- sapply(keyValue[hasValue], "[", 2L) + + headerInfo <- kwb.utils::noFactorDataFrame( + row = headerIndices, + type = substr(headerLines, 2L, 2L), + key = keys, + uniqueKey = "", + value = values + ) + + setUniqueKey <- function(data, type) { + isType <- data$type == type + uniqueValues <- unique(data$value[isType]) + key <- paste0(tolower(type), match(data$value[isType], uniqueValues)) + data$uniqueKey[isType] <- key + data + } + + headerInfo <- setUniqueKey(headerInfo, "B") + headerInfo <- setUniqueKey(headerInfo, "C") + + # Set inspection number in column "inspno" + changes <- kwb.utils::findChanges(headerInfo$type) + bStarts <- changes$starts_at[changes$value == "B"] + + inspectionNumbers <- rep(NA_integer_, nrow(headerInfo)) + inspectionNumbers[bStarts] <- seq_along(bStarts) + inspectionNumbers <- kwb.utils::naToLastNonNa(inspectionNumbers) + + headerInfo[["inspno"]] <- inspectionNumbers + + headerInfo +} diff --git a/R/import.R b/R/import.R index 9a373e2..daa91c4 100644 --- a/R/import.R +++ b/R/import.R @@ -163,7 +163,15 @@ readEuCodedFile <- function( kwb.utils::.logstart(dbg, "Extracting observation records") - observations <- getObservationsFromEuLines(eu_lines, header.info, dbg = dbg) + observations <- try( + getObservationsFromEuLines(eu_lines, header.info, dbg = dbg) + ) + + if (inherits(observations, "try-error")) { + headerInfo <- getHeaderInfo(eu_lines) + #View(headerInfo) + observations <- extractObservationData(eu_lines, headerInfo, header.info) + } kwb.utils::catIf( dbg, sprintf("%d observations extracted. ", nrow(observations)) diff --git a/man/extractObservationBlocks.Rd b/man/extractObservationBlocks.Rd new file mode 100644 index 0000000..b8f6148 --- /dev/null +++ b/man/extractObservationBlocks.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extractObservationData.R +\name{extractObservationBlocks} +\alias{extractObservationBlocks} +\title{Extract Lines Between #C-Header and #Z End Tag} +\usage{ +extractObservationBlocks(euLines, headerInfo, uniqueKey) +} +\arguments{ +\item{euLines}{text lines read from EN13508.2-coded file} + +\item{headerInfo}{data frame with information about header lines} + +\item{uniqueKey}{identifier of C-header row, as given in +\code{headerInfo$uniqueKey}} +} +\value{ +list of vectors of character representing the "body" lines + below the #C-headers of type specified in \code{uniqueKey} +} +\description{ +Extract Lines Between #C-Header and #Z End Tag +} diff --git a/man/extractObservationData.Rd b/man/extractObservationData.Rd new file mode 100644 index 0000000..410b85f --- /dev/null +++ b/man/extractObservationData.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extractObservationData.R +\name{extractObservationData} +\alias{extractObservationData} +\title{Extract Observations from EN13508.2-coded file} +\usage{ +extractObservationData(euLines, headerInfo, header.info) +} +\arguments{ +\item{euLines}{text lines read from EN13508.2-coded file} + +\item{headerInfo}{data frame with information about header lines} + +\item{header.info}{list as returned by +\code{kwb.en13508.2:::getHeaderInfoFromHeaderLines}} +} +\value{ +data frame with columns \code{A}, \code{B}, \code{C}, ... as defined + in EN13508.2 and a column \code{inspno} referring to the inspection number. +} +\description{ +Extract Observations from EN13508.2-coded file +} From fec0fbc3fc4382ea535572b7dfc131a49381e50b Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Jul 2022 21:50:38 +0200 Subject: [PATCH 008/141] Use kwb.utils::isTryError() --- R/import.R | 9 +++++---- R/importInspections.R | 6 +++--- R/utils.R | 0 3 files changed, 8 insertions(+), 7 deletions(-) create mode 100644 R/utils.R diff --git a/R/import.R b/R/import.R index daa91c4..542fa5e 100644 --- a/R/import.R +++ b/R/import.R @@ -36,7 +36,7 @@ readEuCodedFiles <- function( inspectionData <- try(readEuCodedFile(input.file, dbg = dbg, ...)) # Skip the following if an error occurred - if (! inherits(inspectionData, "try-error")) { + if (! kwb.utils::isTryError(inspectionData)) { # Append inspection data to result list filename <- basename(input.file) @@ -96,7 +96,7 @@ readEuCodedFiles <- function( #' #' @return list with elements \code{header.info}, \code{inspections}, #' \code{observations} -#' +#' @importFrom kwb.utils catAndRun catIf isTryError .logstart .logok #' @export #' readEuCodedFile <- function( @@ -164,10 +164,11 @@ readEuCodedFile <- function( kwb.utils::.logstart(dbg, "Extracting observation records") observations <- try( - getObservationsFromEuLines(eu_lines, header.info, dbg = dbg) + getObservationsFromEuLines(eu_lines, header.info, dbg = dbg), + silent = TRUE ) - if (inherits(observations, "try-error")) { + if (kwb.utils::isTryError(observations)) { headerInfo <- getHeaderInfo(eu_lines) #View(headerInfo) observations <- extractObservationData(eu_lines, headerInfo, header.info) diff --git a/R/importInspections.R b/R/importInspections.R index ce70e18..530d48f 100644 --- a/R/importInspections.R +++ b/R/importInspections.R @@ -88,7 +88,7 @@ getInspectionHeaderInfo2 <- function(eu_lines) } # extractInspectionBlocks ------------------------------------------------------ - +#' @importFrom kwb.utils isTryError extractInspectionBlocks <- function( eu_lines, headerInfos, sep, dec, quoteCharacter, dbg = TRUE ) @@ -109,8 +109,8 @@ extractInspectionBlocks <- function( textblock, sep, dec, quoteCharacter, captionLine = unique_headers[i], rowNumbers = row_numbers, dbg = dbg )) - - if (! inherits(try_result, "try-error")) { + + if (! kwb.utils::isTryError(try_result)) { line_number <- headerInfos[[i]]$line diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..e69de29 From 320ce55cc7d51498e25850d21caf91f83400746f Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Jul 2022 21:53:00 +0200 Subject: [PATCH 009/141] Add arg "dbg" to getInspectionsFromEuLines() --- R/import.R | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/R/import.R b/R/import.R index 542fa5e..7e8cabd 100644 --- a/R/import.R +++ b/R/import.R @@ -18,7 +18,7 @@ #' letter "x" if they start with a digit or with underscore. If files could #' not be read correctly, their indices are returnded in attribute #' \code{which_failed}. -#' +#' @importFrom kwb.utils catIf isTryError stringList substSpecialChars #' @export #' readEuCodedFiles <- function( @@ -131,7 +131,7 @@ readEuCodedFile <- function( if (read.inspections) { inspections <- if (simple.algorithm) { - getInspectionsFromEuLines(eu_lines, header.info) + getInspectionsFromEuLines(eu_lines, header.info, dbg = dbg > 1L) } # else NULL # If the inspections could not be read with the simple algorithm (due to @@ -278,7 +278,7 @@ findKeyAndExtractValue <- function(keyvalues, key, default = NA, warn = TRUE) # getInspectionsFromEuLines ---------------------------------------------------- -getInspectionsFromEuLines <- function(eu_lines, header.info) +getInspectionsFromEuLines <- function(eu_lines, header.info, dbg = TRUE) { inspections.complete <- NULL @@ -310,10 +310,12 @@ getInspectionsFromEuLines <- function(eu_lines, header.info) } else { - message( - "The #B-header lines differ within the file -> I will change the ", - "algorithm..." - ) + if (dbg) { + message( + "The #B-header lines differ within the file -> I will change the ", + "algorithm..." + ) + } aborted <- TRUE } From 73df1d543ea25095251d1073d5141117bd769ae1 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Jul 2022 21:54:24 +0200 Subject: [PATCH 010/141] Check and adapt arg "colClasses" if given --- R/importObservations.R | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/R/importObservations.R b/R/importObservations.R index c2c0da9..fb3bbf7 100644 --- a/R/importObservations.R +++ b/R/importObservations.R @@ -142,8 +142,24 @@ get_observations <- function(caption_line, c_body, header_info) } # readObservationsFromCsvText -------------------------------------------------- + readObservationsFromCsvText <- function(text, sep, dec, quote, colClasses, ...) { + # If colClasses is specified, reduce it to the columns that actually occur + if (! identical(colClasses, NA)) { + + # Get the column names from the first line + colNames <- strsplit(text[1L], sep)[[1L]] + + # Check that the column names are unique + stopifnot(anyDuplicated(colNames) == 0L) + + # Check that we know the column class for each column name + stopifnot(all(colNames %in% names(colClasses))) + + colClasses <- colClasses[colNames] + } + utils::read.table( text = text, sep = sep, From 32ed19bf65758a077649f07eb0857bab924bb07b Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Jul 2022 21:56:29 +0200 Subject: [PATCH 011/141] Set "colClasses" according to expected types --- R/extractObservationData.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/extractObservationData.R b/R/extractObservationData.R index c3ed799..16de936 100644 --- a/R/extractObservationData.R +++ b/R/extractObservationData.R @@ -12,6 +12,10 @@ extractObservationData <- function(euLines, headerInfo, header.info) uniqueKeys <- unique(headerInfo[["uniqueKey"]][headerInfo[["type"]] == "C"]) + colClasses <- sapply( + inspectionDataFieldCodes(), kwb.utils::selectElements, "class" + ) + dataBlocks <- lapply(uniqueKeys, function(uniqueKey) { blocks <- extractObservationBlocks(euLines, headerInfo, uniqueKey) @@ -33,7 +37,7 @@ extractObservationData <- function(euLines, headerInfo, header.info) sep = header.info$separator, dec = header.info$decimal, quote = header.info$quote, - colClasses = NA, + colClasses = colClasses, header = TRUE ) From 2ef0fad245a6a8d3b158f54e2516b7169b65968d Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Jul 2022 21:56:53 +0200 Subject: [PATCH 012/141] Remove empty records from table of observations --- R/extractObservationData.R | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/R/extractObservationData.R b/R/extractObservationData.R index 16de936..4952fc1 100644 --- a/R/extractObservationData.R +++ b/R/extractObservationData.R @@ -43,7 +43,7 @@ extractObservationData <- function(euLines, headerInfo, header.info) result$inspno <- rep(headerInfo$inspno[rowsWithKey], blockLengths) - result + removeEmptyRecords(result) }) inspectionData <- kwb.utils::safeRowBindAll(dataBlocks) @@ -83,3 +83,28 @@ extractObservationBlocks <- function(euLines, headerInfo, uniqueKey) SIMPLIFY = FALSE ) } + +# removeEmptyRecords ----------------------------------------------------------- +removeEmptyRecords <- function(data) +{ + textValues <- as.matrix(kwb.utils::removeColumns(data, "inspno")) + + mode(textValues) <- "character" + + nCharacters <- kwb.utils::defaultIfNA(nchar(textValues), 0L) + + isEmpty <- rowSums(nCharacters) == 0L + + if (any(isEmpty)) { + message(sprintf( + paste( + "Removing %d empty records from observations table (inspection ", + "number(s): %s)" + ), + sum(isEmpty), + paste(unique(data[["inspno"]][isEmpty]), collapse = ", ") + )) + } + + data[!isEmpty, ] +} From 53ed267771bdeeb53823785ae685b213826e0b00 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Jul 2022 21:57:18 +0200 Subject: [PATCH 013/141] Update NAMESPACE and Roxygen version --- DESCRIPTION | 2 +- NAMESPACE | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1719359..6991432 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,4 +23,4 @@ Suggests: Remotes: github::kwb-r/kwb.utils Encoding: UTF-8 -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 diff --git a/NAMESPACE b/NAMESPACE index 4e811bb..a4a104c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,3 +10,10 @@ export(readEuCodedFile) export(readEuCodedFiles) export(writeEuCodedFile) export(writeEuCodedFiles) +importFrom(kwb.utils,.logok) +importFrom(kwb.utils,.logstart) +importFrom(kwb.utils,catAndRun) +importFrom(kwb.utils,catIf) +importFrom(kwb.utils,isTryError) +importFrom(kwb.utils,stringList) +importFrom(kwb.utils,substSpecialChars) From 5f69a146d148bdc85a794e8a8f65129c95b9b2ef Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Jul 2022 22:51:01 +0200 Subject: [PATCH 014/141] Add last "to" position if there is not last #Z --- R/extractObservationData.R | 14 ++++++++++++-- R/importObservations.R | 5 +---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/R/extractObservationData.R b/R/extractObservationData.R index 4952fc1..71987e8 100644 --- a/R/extractObservationData.R +++ b/R/extractObservationData.R @@ -18,6 +18,7 @@ extractObservationData <- function(euLines, headerInfo, header.info) dataBlocks <- lapply(uniqueKeys, function(uniqueKey) { + #uniqueKey <- uniqueKeys[1L] blocks <- extractObservationBlocks(euLines, headerInfo, uniqueKey) rowsWithKey <- which(headerInfo[["uniqueKey"]] == uniqueKey) @@ -75,10 +76,19 @@ extractObservationBlocks <- function(euLines, headerInfo, uniqueKey) if (changes$value[1L] == "Z") { changes <- changes[-1L, ] } + + from <- x$row[changes$starts_at[changes$value == "C"]] + 1L + to <- x$row[changes$starts_at[changes$value == "Z"]] - 1L + + # Add a last "to" value if the last EU-line is not "#Z" + if (length(to) != length(from)) { + stopifnot(length(to) == length(from) - 1L) + to <- c(to, length(euLines)) + } mapply( - from = x$row[changes$starts_at[changes$value == "C"]] + 1L, - to = x$row[changes$starts_at[changes$value == "Z"]] - 1L, + from = from, + to = to, FUN = function(from, to) euLines[from:to], SIMPLIFY = FALSE ) diff --git a/R/importObservations.R b/R/importObservations.R index fb3bbf7..c398f51 100644 --- a/R/importObservations.R +++ b/R/importObservations.R @@ -13,11 +13,8 @@ getObservationsFromEuLines <- function( # If the file does not end with #Z add "number of lines + 1" to the vector of # #Z-indices last_z_index <- if (kwb.utils::isNullOrEmpty(indices$Z)) { - - -1 - + -1L } else { - kwb.utils::lastElement(indices$Z) } From 10cb2385648ca098c0d21d69c9dc2564ac6d7f1a Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 1 Aug 2022 18:32:14 +0200 Subject: [PATCH 015/141] Add getLineDamageInfo() from kwb.rerau also add related helper functions and tests --- NAMESPACE | 4 + R/getLineDamageInfo.R | 81 +++++++++++++++++++ R/utils.R | 13 +++ man/getLineDamageInfo.Rd | 24 ++++++ .../test-function-aggregateAndFilter.R | 16 ++++ .../test-function-asNumericIfRequired.R | 16 ++++ .../test-function-getLineDamageInfo.R | 16 ++++ ...st-function-stopOnInvalidLineDamageCodes.R | 16 ++++ 8 files changed, 186 insertions(+) create mode 100644 R/getLineDamageInfo.R create mode 100644 man/getLineDamageInfo.Rd create mode 100644 tests/testthat/test-function-aggregateAndFilter.R create mode 100644 tests/testthat/test-function-asNumericIfRequired.R create mode 100644 tests/testthat/test-function-getLineDamageInfo.R create mode 100644 tests/testthat/test-function-stopOnInvalidLineDamageCodes.R diff --git a/NAMESPACE b/NAMESPACE index a4a104c..a74a994 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(euCodedFileHeader) export(getCodes) +export(getLineDamageInfo) export(mergeInspectionData) export(numberOfInspections) export(plotObservations) @@ -15,5 +16,8 @@ importFrom(kwb.utils,.logstart) importFrom(kwb.utils,catAndRun) importFrom(kwb.utils,catIf) importFrom(kwb.utils,isTryError) +importFrom(kwb.utils,selectColumns) +importFrom(kwb.utils,setColumns) +importFrom(kwb.utils,stopFormatted) importFrom(kwb.utils,stringList) importFrom(kwb.utils,substSpecialChars) diff --git a/R/getLineDamageInfo.R b/R/getLineDamageInfo.R new file mode 100644 index 0000000..10813de --- /dev/null +++ b/R/getLineDamageInfo.R @@ -0,0 +1,81 @@ +# getLineDamageInfo ------------------------------------------------------------ + +#' Get Information on Line Damages +#' +#' @param observations data frame with observations. Required columns: \code{I} +#' (= horizontal or vertical position), \code{J} (= code for line damage), +#' \code{inspno} (number of inspection to which the observation belongs) +#' @param dbg if \code{TRUE}, debug messages are shown +#' @return data frame with columns \code{ino} (inspection number), \code{ldno} +#' (line damage number), \code{beg.at}, \code{end.at}, \code{beg.x} (position +#' of line damage begin), \code{end.x} (position of line damage end), +#' \code{length} (length of line damage) +#' @importFrom kwb.utils selectColumns setColumns +#' @export +getLineDamageInfo <- function(observations, dbg = TRUE) +{ + getcol <- kwb.utils::selectColumns + + if (! "J" %in% names(observations)) { + message("No column 'J' (line damages) found in table of observations.") + return(NULL) + } + + I <- asNumericIfRequired(getcol(observations, "I"), dbg = dbg) + + J <- getcol(observations, "J") + + # Check if the values in J match the expected pattern + stopOnInvalidLineDamageCodes(J) + + # Split line damage identifier in J into "A" or "B" (ld) and number (ldno) + x <- kwb.utils::noFactorDataFrame( + ino = getcol(observations, "inspno"), + ld = substr(J, 1, 1), + ldno = substr(J, 2, nchar(J)) + ) + + info <- merge( + x = aggregateAndFilter(x, FUN = min, name = "beg.at"), + y = aggregateAndFilter(x, FUN = max, name = "end.at") + ) + + # Order by inspection number and line damage number + info <- info[do.call(order, info[, c("ino", "ldno")]), ] + + kwb.utils::setColumns( + info, + beg.x = I[getcol(info, "beg.at")], + end.x = I[getcol(info, "end.at")], + length = I[getcol(info, "end.at")] - I[getcol(info, "beg.at")], + dbg = FALSE + ) +} + +# stopOnInvalidLineDamageCodes ------------------------------------------------- + +#' @importFrom kwb.utils stopFormatted stringList +stopOnInvalidLineDamageCodes <- function(J, pattern = "^$|^[AB]\\d+$") +{ + unique_values <- unique(J) + + is_invalid <- ! grepl(pattern, unique_values) + + if (any(is_invalid)) { + + kwb.utils::stopFormatted( + "There are line damage codes in column 'J' that do not match '%s':\n%s", + pattern, kwb.utils::stringList(unique_values[is_invalid]) + ) + } +} + +# aggregateAndFilter ----------------------------------------------------------- +aggregateAndFilter <- function(x, FUN, name) +{ + result <- stats::aggregate(seq_len(nrow(x)), by = x[, c("ino", "ldno")], FUN) + + names(result)[ncol(result)] <- name + + result[result$ldno != "", ] +} diff --git a/R/utils.R b/R/utils.R index e69de29..7e9189a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -0,0 +1,13 @@ +# asNumericIfRequired ---------------------------------------------------------- +asNumericIfRequired <- function(x, name = deparse(substitute(x)), dbg = TRUE) +{ + if (! is.numeric(x)) { + + kwb.utils::catAndRun( + dbg = dbg, sprintf("Converting '%s' to numeric", name), + x <- as.numeric(x) + ) + } + + x +} diff --git a/man/getLineDamageInfo.Rd b/man/getLineDamageInfo.Rd new file mode 100644 index 0000000..218c6f0 --- /dev/null +++ b/man/getLineDamageInfo.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getLineDamageInfo.R +\name{getLineDamageInfo} +\alias{getLineDamageInfo} +\title{Get Information on Line Damages} +\usage{ +getLineDamageInfo(observations, dbg = TRUE) +} +\arguments{ +\item{observations}{data frame with observations. Required columns: \code{I} +(= horizontal or vertical position), \code{J} (= code for line damage), +\code{inspno} (number of inspection to which the observation belongs)} + +\item{dbg}{if \code{TRUE}, debug messages are shown} +} +\value{ +data frame with columns \code{ino} (inspection number), \code{ldno} + (line damage number), \code{beg.at}, \code{end.at}, \code{beg.x} (position + of line damage begin), \code{end.x} (position of line damage end), + \code{length} (length of line damage) +} +\description{ +Get Information on Line Damages +} diff --git a/tests/testthat/test-function-aggregateAndFilter.R b/tests/testthat/test-function-aggregateAndFilter.R new file mode 100644 index 0000000..fbd78d6 --- /dev/null +++ b/tests/testthat/test-function-aggregateAndFilter.R @@ -0,0 +1,16 @@ +# +# This test file has been generated by kwb.test::create_test_files() +# launched by user hsonne on 2022-07-27 07:01:23. +# Your are strongly encouraged to modify the dummy functions +# so that real cases are tested. You should then delete this comment. +# + +test_that("aggregateAndFilter() works", { + + expect_error( + kwb.rerau:::aggregateAndFilter() + # argument "x" is missing, with no default + ) + +}) + diff --git a/tests/testthat/test-function-asNumericIfRequired.R b/tests/testthat/test-function-asNumericIfRequired.R new file mode 100644 index 0000000..fbf04ec --- /dev/null +++ b/tests/testthat/test-function-asNumericIfRequired.R @@ -0,0 +1,16 @@ +# +# This test file has been generated by kwb.test::create_test_files() +# launched by user hsonne on 2022-07-27 07:01:25. +# Your are strongly encouraged to modify the dummy functions +# so that real cases are tested. You should then delete this comment. +# + +test_that("asNumericIfRequired() works", { + + expect_error( + kwb.rerau:::asNumericIfRequired() + # argument "x" is missing, with no default + ) + +}) + diff --git a/tests/testthat/test-function-getLineDamageInfo.R b/tests/testthat/test-function-getLineDamageInfo.R new file mode 100644 index 0000000..f18f385 --- /dev/null +++ b/tests/testthat/test-function-getLineDamageInfo.R @@ -0,0 +1,16 @@ +# +# This test file has been generated by kwb.test::create_test_files() +# launched by user hsonne on 2022-07-27 07:01:23. +# Your are strongly encouraged to modify the dummy functions +# so that real cases are tested. You should then delete this comment. +# + +test_that("getLineDamageInfo() works", { + + expect_error( + kwb.rerau:::getLineDamageInfo() + # argument "observations" is missing, with no default + ) + +}) + diff --git a/tests/testthat/test-function-stopOnInvalidLineDamageCodes.R b/tests/testthat/test-function-stopOnInvalidLineDamageCodes.R new file mode 100644 index 0000000..b497f13 --- /dev/null +++ b/tests/testthat/test-function-stopOnInvalidLineDamageCodes.R @@ -0,0 +1,16 @@ +# +# This test file has been generated by kwb.test::create_test_files() +# launched by user hsonne on 2022-07-27 07:01:23. +# Your are strongly encouraged to modify the dummy functions +# so that real cases are tested. You should then delete this comment. +# + +test_that("stopOnInvalidLineDamageCodes() works", { + + expect_error( + kwb.rerau:::stopOnInvalidLineDamageCodes() + # argument "J" is missing, with no default + ) + +}) + From a19a84fa7346a09fe8e1a4214f17543da76e0e73 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 1 Aug 2022 18:46:45 +0200 Subject: [PATCH 016/141] Use kwb.utils::orderBy(), renumbering rows --- NAMESPACE | 1 + R/getLineDamageInfo.R | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index a74a994..98bbc30 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,7 @@ importFrom(kwb.utils,.logstart) importFrom(kwb.utils,catAndRun) importFrom(kwb.utils,catIf) importFrom(kwb.utils,isTryError) +importFrom(kwb.utils,orderBy) importFrom(kwb.utils,selectColumns) importFrom(kwb.utils,setColumns) importFrom(kwb.utils,stopFormatted) diff --git a/R/getLineDamageInfo.R b/R/getLineDamageInfo.R index 10813de..12382db 100644 --- a/R/getLineDamageInfo.R +++ b/R/getLineDamageInfo.R @@ -10,7 +10,7 @@ #' (line damage number), \code{beg.at}, \code{end.at}, \code{beg.x} (position #' of line damage begin), \code{end.x} (position of line damage end), #' \code{length} (length of line damage) -#' @importFrom kwb.utils selectColumns setColumns +#' @importFrom kwb.utils orderBy selectColumns setColumns #' @export getLineDamageInfo <- function(observations, dbg = TRUE) { @@ -41,7 +41,7 @@ getLineDamageInfo <- function(observations, dbg = TRUE) ) # Order by inspection number and line damage number - info <- info[do.call(order, info[, c("ino", "ldno")]), ] + info <- kwb.utils::orderBy(info, c("ino", "ldno")) kwb.utils::setColumns( info, From 538b5e13754b090c6bad7225a8b6e99a1a014e4c Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 03:33:04 +0200 Subject: [PATCH 017/141] Rename file according to contained function --- R/{merge.R => readAndMergeEuCodedFiles.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{merge.R => readAndMergeEuCodedFiles.R} (100%) diff --git a/R/merge.R b/R/readAndMergeEuCodedFiles.R similarity index 100% rename from R/merge.R rename to R/readAndMergeEuCodedFiles.R From c5582c6b0f0c1c1ffe2dd6f7db64e8795afe7b0b Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 03:34:31 +0200 Subject: [PATCH 018/141] Reindent to improve readability --- R/readAndMergeEuCodedFiles.R | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/R/readAndMergeEuCodedFiles.R b/R/readAndMergeEuCodedFiles.R index 778e16a..ec29a28 100644 --- a/R/readAndMergeEuCodedFiles.R +++ b/R/readAndMergeEuCodedFiles.R @@ -15,9 +15,14 @@ readAndMergeEuCodedFiles <- function(input.files, dbg = FALSE, ...) { # by setting simple.algorithm = FALSE we get unique column names, e.g. "ADE" # and "ADE.1" - mergeInspectionData(readEuCodedFiles( - input.files = input.files, simple.algorithm = FALSE, dbg = dbg, ... - )) + mergeInspectionData( + readEuCodedFiles( + input.files = input.files, + simple.algorithm = FALSE, + dbg = dbg, + ... + ) + ) } # mergeInspectionData ---------------------------------------------------------- From bb531bbd7ff180fb6259342b691b4634f60acfbe Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 04:00:16 +0200 Subject: [PATCH 019/141] Update Rd files (consider file renaming) --- man/mergeInspectionData.Rd | 2 +- man/readAndMergeEuCodedFiles.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/man/mergeInspectionData.Rd b/man/mergeInspectionData.Rd index 1e3cf9e..0e49b55 100644 --- a/man/mergeInspectionData.Rd +++ b/man/mergeInspectionData.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/merge.R +% Please edit documentation in R/readAndMergeEuCodedFiles.R \name{mergeInspectionData} \alias{mergeInspectionData} \title{Merge Inspection Data} diff --git a/man/readAndMergeEuCodedFiles.Rd b/man/readAndMergeEuCodedFiles.Rd index 5c8461b..9f17ca3 100644 --- a/man/readAndMergeEuCodedFiles.Rd +++ b/man/readAndMergeEuCodedFiles.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/merge.R +% Please edit documentation in R/readAndMergeEuCodedFiles.R \name{readAndMergeEuCodedFiles} \alias{readAndMergeEuCodedFiles} \title{Read and Merge Files in EN13508.2-Format} From 9e0abcaf838fe861dac5bc85321a22ec3ef19c48 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 04:04:12 +0200 Subject: [PATCH 020/141] Clean readAndMergeEuCodedFiles() - Use "L" to indicate integer constant - Do not use intermediate varialbes "header_info", "inspections_all", "observations_all" - Use lapply() instead of for-loop - Use variable name "obs" to shorten lines --- R/readAndMergeEuCodedFiles.R | 43 ++++++++++++++---------------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/R/readAndMergeEuCodedFiles.R b/R/readAndMergeEuCodedFiles.R index ec29a28..e9bdf29 100644 --- a/R/readAndMergeEuCodedFiles.R +++ b/R/readAndMergeEuCodedFiles.R @@ -41,46 +41,35 @@ readAndMergeEuCodedFiles <- function(input.files, dbg = FALSE, ...) #' mergeInspectionData <- function(x) { - if (length(x) == 1) { - - return (x[[1]]) + if (length(x) == 1L) { + return (x[[1L]]) } # Check if there are differences in the file headers warnOnDifferingHeaders(x) - # In any case, use the first header - header_info <- get_elements(x[[1]], "header.info") - - # Join the inspections - inspections_all <- kwb.utils::safeRowBindOfListElements(x, "inspections") - - # Join the observations - observations_all <- NULL - # Prepare vector of offsets to be added to the inspection number (= row number # in list element "inspections") offsets <- cumsum(numberOfInspections(x)) - for (i in seq_along(x)) { + # Add offsets to observation table's column "inspno" + observations <- lapply(seq_along(x), function(i) { - observations <- get_elements(x[[i]], "observations") - - # Add inspection number offset (maximum value so far) to column "inspno" - if (i > 1) { - - inspno <- get_columns(observations, "inspno") - - observations$inspno <- inspno + offsets[i - 1] + obs <- get_elements(x[[i]], "observations") + + # Add inspection number offset to column "inspno" if this is not the very + # first data frame of observations + if (i > 1L) { + obs[["inspno"]] <- get_columns(obs, "inspno") + offsets[i - 1L] } - observations_all <- kwb.utils::safeRowBind(observations_all, observations) - } - + obs + }) + list( - header.info = header_info, - inspections = inspections_all, - observations = observations_all + header.info = get_elements(x[[1L]], "header.info"), + inspections = kwb.utils::safeRowBindOfListElements(x, "inspections"), + observations = kwb.utils::safeRowBindAll(observations) ) } From b22b2c4aed3c8d49e55199781eb119632a85e10e Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 04:21:12 +0200 Subject: [PATCH 021/141] Add simple test for mergeInspectionData() --- .../test-function-mergeInspectionData.R | 50 ++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/tests/testthat/test-function-mergeInspectionData.R b/tests/testthat/test-function-mergeInspectionData.R index fa79793..b14d967 100644 --- a/tests/testthat/test-function-mergeInspectionData.R +++ b/tests/testthat/test-function-mergeInspectionData.R @@ -1,29 +1,33 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("mergeInspectionData() works", { - kwb.en13508.2:::mergeInspectionData(x = 1) - kwb.en13508.2:::mergeInspectionData(x = "a") - kwb.en13508.2:::mergeInspectionData(x = TRUE) - kwb.en13508.2:::mergeInspectionData(x = FALSE) - kwb.en13508.2:::mergeInspectionData(x = as.POSIXct("2018-06-03 23:50:00")) - expect_error( - kwb.en13508.2:::mergeInspectionData(x = 1:2) - # x is not a list but: -# int 1 + f <- kwb.en13508.2:::mergeInspectionData + + f(x = 1) + + expect_error(f(x = 1:2)) + + x <- list( + list( + header.info = "", + inspections = data.frame(file = "a", id = 1:2), + observations = data.frame(inspno = c(1L, 1L, 2L, 2L, 2L)) + ), + list( + header.info = "", + inspections = data.frame(file = "b", id = 1:3), + observations = data.frame(inspno = c(1L, 2L, 3L)) + ) ) - expect_error( - kwb.en13508.2:::mergeInspectionData(x = c("a", "b")) - # x is not a list but: -# chr "a" + + result <- f(x) + + expect_identical( + result$inspections, + rbind(x[[1L]]$inspections, x[[2L]]$inspections) ) - expect_error( - kwb.en13508.2:::mergeInspectionData(x = list(key = c("a", "b"), value = 1:2)) - # x is not a list but: -# chr [1:2] "a" "b" + + expect_identical( + result$observations$inspno, + c(1L, 1L, 2L, 2L, 2L, 3L, 4L, 5L) ) - }) - From 133416deaf2974e4f12c9c2b38a26da1e108ac94 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 04:34:33 +0200 Subject: [PATCH 022/141] Return early in asNumericIfRequired() --- R/utils.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/R/utils.R b/R/utils.R index 7e9189a..2d445e6 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,13 +1,13 @@ # asNumericIfRequired ---------------------------------------------------------- asNumericIfRequired <- function(x, name = deparse(substitute(x)), dbg = TRUE) { - if (! is.numeric(x)) { - - kwb.utils::catAndRun( - dbg = dbg, sprintf("Converting '%s' to numeric", name), - x <- as.numeric(x) - ) - } - - x + if (is.numeric(x)) { + return(x) + } + + kwb.utils::catAndRun( + messageText = sprintf("Converting '%s' to numeric", name), + expr = as.numeric(x), + dbg = dbg + ) } From 523e95a04debdc59a0771e1d9097af11e7717b64 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 04:57:01 +0200 Subject: [PATCH 023/141] Improve output of warnOnDifferingHeaders() by using "drop = FALSE" and add simple test --- R/readAndMergeEuCodedFiles.R | 18 +++--- .../test-function-warnOnDifferingHeaders.R | 55 +++++-------------- 2 files changed, 23 insertions(+), 50 deletions(-) diff --git a/R/readAndMergeEuCodedFiles.R b/R/readAndMergeEuCodedFiles.R index e9bdf29..41598e5 100644 --- a/R/readAndMergeEuCodedFiles.R +++ b/R/readAndMergeEuCodedFiles.R @@ -74,12 +74,10 @@ mergeInspectionData <- function(x) } # warnOnDifferingHeaders ------------------------------------------------------- - warnOnDifferingHeaders <- function(x) { # list to data frame header_infos <- do.call(rbind, lapply(x, function(x) { - as.data.frame(get_elements(x, "header.info")) })) @@ -87,16 +85,16 @@ warnOnDifferingHeaders <- function(x) equal_in_column <- sapply(header_infos, kwb.utils::allAreEqual) if (! all(equal_in_column)) { + + text <- paste(collapse = "\n", utils::capture.output(print( + unique(header_infos[, ! equal_in_column, drop = FALSE]) + ))) - text <- paste0( + warning( "The file headers are differing in the folowing fields:\n\n", - paste( - utils::capture.output(print(unique(header_infos[, ! equal_in_column]))), - collapse = "\n" - ), - "\n\nI will use the first header." + text, + "\n\nI will use the first header.", + call. = FALSE ) - - warning(text) } } diff --git a/tests/testthat/test-function-warnOnDifferingHeaders.R b/tests/testthat/test-function-warnOnDifferingHeaders.R index 95db398..6a3749e 100644 --- a/tests/testthat/test-function-warnOnDifferingHeaders.R +++ b/tests/testthat/test-function-warnOnDifferingHeaders.R @@ -1,44 +1,19 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("warnOnDifferingHeaders() works", { - expect_error( - kwb.en13508.2:::warnOnDifferingHeaders(x = 1) - # x is not a list but: -# num 1 - ) - expect_error( - kwb.en13508.2:::warnOnDifferingHeaders(x = 1:2) - # x is not a list but: -# int 1 - ) - expect_error( - kwb.en13508.2:::warnOnDifferingHeaders(x = "a") - # x is not a list but: -# chr "a" - ) - expect_error( - kwb.en13508.2:::warnOnDifferingHeaders(x = TRUE) - # x is not a list but: -# logi TRUE - ) - expect_error( - kwb.en13508.2:::warnOnDifferingHeaders(x = FALSE) - # x is not a list but: -# logi FALSE - ) - expect_error( - kwb.en13508.2:::warnOnDifferingHeaders(x = as.POSIXct("2018-06-03 23:50:00")) - # x is not a list but: -# POSIXct[1:1], format: "2018-06-03 23:50:00" - ) - expect_error( - kwb.en13508.2:::warnOnDifferingHeaders(x = list(key = c("a", "b"), value = 1:2)) - # x is not a list but: -# chr [1:2] "a" "b" - ) + f <- kwb.en13508.2:::warnOnDifferingHeaders + + expect_error(f(x = 1)) -}) + expect_error(f(list(list(), list()))) + expect_warning(f(list( + list(header.info = list(a = 1)), + list(header.info = list(a = 2)) + ))) + + expect_silent(f(list( + list(header.info = list(a = 1)), + list(header.info = list(a = 1)) + ))) + +}) From 3c1dba436b83ebfcfdde1706985a6deed23c6723 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 12:02:24 +0200 Subject: [PATCH 024/141] Allow "table" to be a vector of table names Stop with error message if table name does not exist --- R/main.R | 22 ++++++++++++++-------- man/getCodes.Rd | 5 +++-- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/R/main.R b/R/main.R index d108df8..81cd192 100644 --- a/R/main.R +++ b/R/main.R @@ -10,8 +10,9 @@ get_elements <- kwb.utils::selectElements #' #' Get a data frame containing EU codes and their meaning in different languages #' -#' @param table set to a name that is related to a specific table in the EU -#' norm. To get the possible table names, try \code{unique(getCodes()$Table)} +#' @param table name or vector of names of tables in the EU norm for which to +#' get field information. Use \code{unique(getCodes()$Table)} to get the +#' possible table names. #' @param fields set to a vector of field (column) names to restrict the columns #' returned #' @@ -25,18 +26,23 @@ getCodes <- function(table = NULL, fields = NULL) # Check if all codes are unique stopifnot(! any(duplicated(get_columns(codes, "Code")))) - + if (! is.null(table)) { - codes <- codes[get_columns(codes, "Table") == table, ] + subtables <- split(codes, get_columns(codes, "Table")) + + codes <- kwb.utils::selectElements(subtables, table) + + if (length(table) > 1L) { + codes <- kwb.utils::safeRowBindAll(codes) + } } - if (! is.null(fields)) { - - codes <- get_columns(codes, fields) + if (is.null(fields)) { + return(codes) } - codes + get_columns(codes, fields) } # numberOfInspections ---------------------------------------------------------- diff --git a/man/getCodes.Rd b/man/getCodes.Rd index 0f6a48e..c71eef9 100644 --- a/man/getCodes.Rd +++ b/man/getCodes.Rd @@ -7,8 +7,9 @@ getCodes(table = NULL, fields = NULL) } \arguments{ -\item{table}{set to a name that is related to a specific table in the EU -norm. To get the possible table names, try \code{unique(getCodes()$Table)}} +\item{table}{name or vector of names of tables in the EU norm for which to +get field information. Use \code{unique(getCodes()$Table)} to get the +possible table names.} \item{fields}{set to a vector of field (column) names to restrict the columns returned} From fab53b35215faf163968ff56835a8b880ef8c201 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 12:22:31 +0200 Subject: [PATCH 025/141] Simplify get_code_meanings() and let getCodes() renumber the rows --- R/main.R | 3 +++ R/plot.R | 17 +++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/R/main.R b/R/main.R index 81cd192..c0d5267 100644 --- a/R/main.R +++ b/R/main.R @@ -37,6 +37,9 @@ getCodes <- function(table = NULL, fields = NULL) codes <- kwb.utils::safeRowBindAll(codes) } } + + # Reset row names + row.names(codes) <- NULL if (is.null(fields)) { return(codes) diff --git a/R/plot.R b/R/plot.R index 4ba3590..54913cd 100644 --- a/R/plot.R +++ b/R/plot.R @@ -149,14 +149,19 @@ get_extreme_positions <- function(inspections) # get_code_meanings ------------------------------------------------------------ get_code_meanings <- function() { + renamings <- list( + Table = "CodeTable", + Code = "Code", + Text_EN = "CodeMeaning" + ) + # Provide table that maps codes to their meanings - code_meanings <- kwb.utils::resetRowNames(do.call(rbind, lapply( - X = sprintf("T%d", 4:7), - FUN = kwb.en13508.2::getCodes, - fields = c("Table", "Code", "Text_EN") - ))) + code_meanings <- getCodes( + table = sprintf("T%d", 4:7), + fields = names(renamings) + ) - stats::setNames(code_meanings, c("CodeTable", "Code", "CodeMeaning")) + kwb.utils::renameColumns(code_meanings, renamings) } # order_by --------------------------------------------------------------------- From f38b650d4d1750fe580034f2e327f36c6ad2a63d Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 13:41:54 +0200 Subject: [PATCH 026/141] Simply use writeLines() --- index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.md b/index.md index 71306df..782a697 100644 --- a/index.md +++ b/index.md @@ -135,5 +135,5 @@ writeEuCodedFile(survey, output_file) The first 20 lines of the file produced read: ```r -kwb.utils::catLines(readLines(output_file, 20)) +writeLines(readLines(output_file, 20)) ``` From 26e98f043e5b84b36989b14fe2f9c2d8671e1c3d Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 16:36:16 +0200 Subject: [PATCH 027/141] Move functions from main.R to utils.R --- R/main.R | 129 ----------------------------------------- R/utils.R | 127 ++++++++++++++++++++++++++++++++++++++++ man/readPackageFile.Rd | 2 +- man/valuesToCsv.Rd | 2 +- 4 files changed, 129 insertions(+), 131 deletions(-) diff --git a/R/main.R b/R/main.R index c0d5267..56f3887 100644 --- a/R/main.R +++ b/R/main.R @@ -1,9 +1,3 @@ -# get_columns ------------------------------------------------------------------ -get_columns <- kwb.utils::selectColumns - -# get_elements ----------------------------------------------------------------- -get_elements <- kwb.utils::selectElements - # getCodes --------------------------------------------------------------------- #' Get EU Codes and Their Meaning @@ -83,126 +77,3 @@ inspectionDataFieldCodes <- function() }) } -# readPackageFile -------------------------------------------------------------- - -#' Read CSV File from Package's extdata Folder -#' -#' @param file file name (without path) -#' @param \dots additional arguments passed to \code{read.csv} -#' -#' @return data frame representing the content of \code{\link{file}} -#' -readPackageFile <- function(file, ...) -{ - kwb.utils::readPackageFile(file, package = "kwb.en13508.2", ...) -} - -# dataFrameContentToTextLines -------------------------------------------------- - -dataFrameContentToTextLines <- function(dframe, mycsv = FALSE, ...) -{ - if (mycsv) { - - dataFrameToCsvLines_v1(dframe, ...) - - } else { - - dataFrameToCsvLines_v2(dframe, ...) - } -} - -# dataFrameToCsvLines_v1 ------------------------------------------------------- - -dataFrameToCsvLines_v1 <- function(dframe, sep, ...) -{ - #cat(sprintf("\nsep in dataFrameToCsvLines_v1(): '%s'\n", sep)) - - n_columns <- ncol(dframe) - - output <- matrix("", nrow = nrow(dframe), ncol = n_columns) - - for (column_num in seq_len(n_columns)) { - - output[, column_num] <- valuesToCsv(dframe[[column_num]], sep = sep, ...) - } - - apply(output, 1, paste, collapse = sep) -} - -# dataFrameToCsvLines_v2 ------------------------------------------------------- - -dataFrameToCsvLines_v2 <- function(dframe, qchar = NULL, ...) -{ - # Do not pass qchar to write.table - - con <- textConnection("buffer", "w") - - utils::write.table(dframe, con, row.names = FALSE, col.names = FALSE, ...) - - close(con) - - buffer -} - -# valuesToCsv ------------------------------------------------------------------ - -#' Values to CSV -#' -#' @param x vector of values representing a row in a CSV file -#' @param dec decimal character -#' @param sep field separating character -#' @param na text to be used in case of NA values -#' @param qchar quoting character to be used to surround text fields containing -#' the field separator -#' @param qmethod method used to indicate that a quoting character within a -#' quoted text field is not the ending quote. Either "double" (double the -#' quote character) or "escape" (backslash in front of the quoting character). -#' -valuesToCsv <- function( - x, dec = ".", sep = ",", na = "", qchar = '"', - qmethod = c("double", "escape")[1] -) -{ - #cat(sprintf("\nsep in valuesToCsv(): '%s'\n", sep)) - - na.indices <- is.na(x) - - if (is.factor(x)) { - - x <- as.character(x) - } - - x <- if (mode(x) == "numeric") { - - # Replace decimal character if required - if (dec != ".") sub("\\.", dec, x) else x - - } else { - - quoteTextIfNeeded(x, sep, qchar, qmethod) - } - - x[na.indices] <- na - - x -} - -# quoteTextIfNeeded ------------------------------------------------------------ - -quoteTextIfNeeded <- function(x, sep, qchar, qmethod) -{ - pattern <- sprintf("\\%s|\\%s", sep, qchar) - - indices <- grep(pattern, x) - - if (length(indices) > 0) { - - surrogate <- paste0(if (qmethod == "double") qchar else '\\\\', qchar) - - quoted <- gsub(qchar, surrogate, x[indices]) - - x[indices] <- paste0(qchar, quoted, qchar) - } - - x -} diff --git a/R/utils.R b/R/utils.R index 2d445e6..ddb45b2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -11,3 +11,130 @@ asNumericIfRequired <- function(x, name = deparse(substitute(x)), dbg = TRUE) dbg = dbg ) } + +# dataFrameContentToTextLines -------------------------------------------------- +dataFrameContentToTextLines <- function(dframe, mycsv = FALSE, ...) +{ + if (mycsv) { + + dataFrameToCsvLines_v1(dframe, ...) + + } else { + + dataFrameToCsvLines_v2(dframe, ...) + } +} + +# dataFrameToCsvLines_v1 ------------------------------------------------------- +dataFrameToCsvLines_v1 <- function(dframe, sep, ...) +{ + #cat(sprintf("\nsep in dataFrameToCsvLines_v1(): '%s'\n", sep)) + + n_columns <- ncol(dframe) + + output <- matrix("", nrow = nrow(dframe), ncol = n_columns) + + for (column_num in seq_len(n_columns)) { + + output[, column_num] <- valuesToCsv(dframe[[column_num]], sep = sep, ...) + } + + apply(output, 1, paste, collapse = sep) +} + +# dataFrameToCsvLines_v2 ------------------------------------------------------- +dataFrameToCsvLines_v2 <- function(dframe, qchar = NULL, ...) +{ + # Do not pass qchar to write.table + + con <- textConnection("buffer", "w") + + utils::write.table(dframe, con, row.names = FALSE, col.names = FALSE, ...) + + close(con) + + buffer +} + +# get_columns ------------------------------------------------------------------ +get_columns <- kwb.utils::selectColumns + +# get_elements ----------------------------------------------------------------- +get_elements <- kwb.utils::selectElements + +# quoteTextIfNeeded ------------------------------------------------------------ +quoteTextIfNeeded <- function(x, sep, qchar, qmethod) +{ + pattern <- sprintf("\\%s|\\%s", sep, qchar) + + indices <- grep(pattern, x) + + if (length(indices) > 0) { + + surrogate <- paste0(if (qmethod == "double") qchar else '\\\\', qchar) + + quoted <- gsub(qchar, surrogate, x[indices]) + + x[indices] <- paste0(qchar, quoted, qchar) + } + + x +} + +# readPackageFile -------------------------------------------------------------- + +#' Read CSV File from Package's extdata Folder +#' +#' @param file file name (without path) +#' @param \dots additional arguments passed to \code{read.csv} +#' +#' @return data frame representing the content of \code{\link{file}} +#' +readPackageFile <- function(file, ...) +{ + kwb.utils::readPackageFile(file, package = "kwb.en13508.2", ...) +} + +# valuesToCsv ------------------------------------------------------------------ + +#' Values to CSV +#' +#' @param x vector of values representing a row in a CSV file +#' @param dec decimal character +#' @param sep field separating character +#' @param na text to be used in case of NA values +#' @param qchar quoting character to be used to surround text fields containing +#' the field separator +#' @param qmethod method used to indicate that a quoting character within a +#' quoted text field is not the ending quote. Either "double" (double the +#' quote character) or "escape" (backslash in front of the quoting character). +#' +valuesToCsv <- function( + x, dec = ".", sep = ",", na = "", qchar = '"', + qmethod = c("double", "escape")[1] +) +{ + #cat(sprintf("\nsep in valuesToCsv(): '%s'\n", sep)) + + na.indices <- is.na(x) + + if (is.factor(x)) { + + x <- as.character(x) + } + + x <- if (mode(x) == "numeric") { + + # Replace decimal character if required + if (dec != ".") sub("\\.", dec, x) else x + + } else { + + quoteTextIfNeeded(x, sep, qchar, qmethod) + } + + x[na.indices] <- na + + x +} + diff --git a/man/readPackageFile.Rd b/man/readPackageFile.Rd index 3d6e7c8..97942cf 100644 --- a/man/readPackageFile.Rd +++ b/man/readPackageFile.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/main.R +% Please edit documentation in R/utils.R \name{readPackageFile} \alias{readPackageFile} \title{Read CSV File from Package's extdata Folder} diff --git a/man/valuesToCsv.Rd b/man/valuesToCsv.Rd index 6160966..b03e0de 100644 --- a/man/valuesToCsv.Rd +++ b/man/valuesToCsv.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/main.R +% Please edit documentation in R/utils.R \name{valuesToCsv} \alias{valuesToCsv} \title{Values to CSV} From 12756fbc361725940839cebdb0ed2edbd9abdba7 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 16:39:53 +0200 Subject: [PATCH 028/141] Use "L" to indicate integer constant --- R/import.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/import.R b/R/import.R index 7e8cabd..3513ede 100644 --- a/R/import.R +++ b/R/import.R @@ -349,8 +349,7 @@ extractInspectionData <- function(b.lines, header.info, captions) } # getValueFromKeyValueString --------------------------------------------------- - getValueFromKeyValueString <- function(keyvalue) { - sapply(strsplit(keyvalue, "="), "[", 2) + sapply(strsplit(keyvalue, "="), "[", 2L) } From c06aaa27b6f093767190bd9f64e9f472969398d9 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 16:40:15 +0200 Subject: [PATCH 029/141] Move removeEmptyLines() to utils.R --- R/import.R | 25 ------------------------- R/utils.R | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/R/import.R b/R/import.R index 3513ede..2bf1468 100644 --- a/R/import.R +++ b/R/import.R @@ -187,35 +187,13 @@ readEuCodedFile <- function( ) } -# removeEmptyLines ------------------------------------------------------------- - -removeEmptyLines <- function(x, dbg = TRUE) -{ - empty.line.indices <- grep("^$", x) - - numberOfEmptyLines <- length(empty.line.indices) - - if (numberOfEmptyLines > 0) { - - kwb.utils::.logstart(dbg, "Removing", numberOfEmptyLines, "empty lines") - - x <- x[-empty.line.indices] - - kwb.utils::.logok(dbg) - } - - x -} - # getHeaderLinesFromEuCodedLines ----------------------------------------------- - getHeaderLinesFromEuCodedLines <- function(lines) { grep("^#A", lines, value = TRUE) } # getHeaderInfoFromHeaderLines ------------------------------------------------- - getHeaderInfoFromHeaderLines <- function(header.lines, warn = TRUE) { # original_fields <- do.call(kwb.utils::toLookupList, kwb.utils::toKeysAndValues( @@ -243,7 +221,6 @@ getHeaderInfoFromHeaderLines <- function(header.lines, warn = TRUE) } # findKeyAndExtractValue ------------------------------------------------------- - findKeyAndExtractValue <- function(keyvalues, key, default = NA, warn = TRUE) { pattern <- paste0("^#", key) @@ -277,7 +254,6 @@ findKeyAndExtractValue <- function(keyvalues, key, default = NA, warn = TRUE) } # getInspectionsFromEuLines ---------------------------------------------------- - getInspectionsFromEuLines <- function(eu_lines, header.info, dbg = TRUE) { inspections.complete <- NULL @@ -333,7 +309,6 @@ getInspectionsFromEuLines <- function(eu_lines, header.info, dbg = TRUE) } # extractInspectionData -------------------------------------------------------- - extractInspectionData <- function(b.lines, header.info, captions) { inspections <- kwb.utils::csvTextToDataFrame( diff --git a/R/utils.R b/R/utils.R index ddb45b2..cce49d4 100644 --- a/R/utils.R +++ b/R/utils.R @@ -95,6 +95,25 @@ readPackageFile <- function(file, ...) kwb.utils::readPackageFile(file, package = "kwb.en13508.2", ...) } +# removeEmptyLines ------------------------------------------------------------- +removeEmptyLines <- function(x, dbg = TRUE) +{ + empty.line.indices <- grep("^$", x) + + numberOfEmptyLines <- length(empty.line.indices) + + if (numberOfEmptyLines > 0) { + + kwb.utils::.logstart(dbg, "Removing", numberOfEmptyLines, "empty lines") + + x <- x[-empty.line.indices] + + kwb.utils::.logok(dbg) + } + + x +} + # valuesToCsv ------------------------------------------------------------------ #' Values to CSV From e09f59ad6f835f6d17d8189abc6382445384b7ff Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 16:48:20 +0200 Subject: [PATCH 030/141] Reindent --- R/import.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/import.R b/R/import.R index 2bf1468..1aad32e 100644 --- a/R/import.R +++ b/R/import.R @@ -29,10 +29,10 @@ readEuCodedFiles <- function( input.file <- input.files[i] - kwb.utils::catIf( - dbg, sprintf("input file %d/%d: %s\n", i, length(input.files), input.file) - ) - + kwb.utils::catIf(dbg, sprintf( + "input file %d/%d: %s\n", i, length(input.files), input.file + )) + inspectionData <- try(readEuCodedFile(input.file, dbg = dbg, ...)) # Skip the following if an error occurred @@ -144,9 +144,9 @@ readEuCodedFile <- function( ) } - kwb.utils::catIf( - dbg, sprintf("%d inspections extracted. ", nrow(inspections)) - ) + kwb.utils::catIf(dbg, sprintf( + "%d inspections extracted. ", nrow(inspections) + )) } else { From 9788bea594722327aec38884472a09d807ca15ae Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 16:49:13 +0200 Subject: [PATCH 031/141] Use "L" as indicator for integer, return early --- R/import.R | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/R/import.R b/R/import.R index 1aad32e..3edbb7e 100644 --- a/R/import.R +++ b/R/import.R @@ -227,21 +227,18 @@ findKeyAndExtractValue <- function(keyvalues, key, default = NA, warn = TRUE) index <- grep(pattern, keyvalues) - if (length(index) == 0) { + if (length(index) == 0L) { warnMessage <- sprintf( "Key '#%s' not found in the #A-header of the file.", key ) if (! is.na(default)) { - warnMessage <- paste(warnMessage, "I will use the default:", default) } if (warn) { - message(warnMessage) - warning(warnMessage) } @@ -249,7 +246,7 @@ findKeyAndExtractValue <- function(keyvalues, key, default = NA, warn = TRUE) } else { - strsplit(keyvalues[index], "=")[[1]][2] + strsplit(keyvalues[index], "=")[[1L]][2L] } } @@ -258,7 +255,7 @@ getInspectionsFromEuLines <- function(eu_lines, header.info, dbg = TRUE) { inspections.complete <- NULL - header.line.number <- 1 + header.line.number <- 1L continue <- TRUE @@ -266,7 +263,7 @@ getInspectionsFromEuLines <- function(eu_lines, header.info, dbg = TRUE) aborted <- FALSE - while (! aborted && length(indices.B) > 0) { + while (! aborted && length(indices.B) > 0L) { b.caption.lines <- getValueFromKeyValueString(eu_lines[indices.B]) @@ -275,9 +272,9 @@ getInspectionsFromEuLines <- function(eu_lines, header.info, dbg = TRUE) if (kwb.utils::allAreEqual(b.captions)) { inspections <- extractInspectionData( - b.lines = eu_lines[indices.B + 1], + b.lines = eu_lines[indices.B + 1L], header.info = header.info, - captions = b.captions[[1]] + captions = b.captions[[1L]] ) inspections.complete <- kwb.utils::safeColumnBind( @@ -296,16 +293,16 @@ getInspectionsFromEuLines <- function(eu_lines, header.info, dbg = TRUE) aborted <- TRUE } - header.line.number <- header.line.number + 1 + header.line.number <- header.line.number + 1L - indices.B <- grep(sprintf("^#B%02d", header.line.number), eu_lines) + indices.B <- grep(sprintf("^#B%02d", header.line.number), eu_lines) } - if (! aborted) { - - inspections.complete - } - # else NULL implicitly + if (aborted) { + return(NULL) + } + + inspections.complete } # extractInspectionData -------------------------------------------------------- From fad8231ea8f3b1bd00beb6cc2dc38bc1ecae420f Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 18:39:17 +0200 Subject: [PATCH 032/141] Rename import.R to readEuCodedFiles.R --- R/{import.R => readEuCodedFiles.R} | 30 +++++++++++++++++++----------- man/readEuCodedFile.Rd | 2 +- man/readEuCodedFiles.Rd | 2 +- 3 files changed, 21 insertions(+), 13 deletions(-) rename R/{import.R => readEuCodedFiles.R} (95%) diff --git a/R/import.R b/R/readEuCodedFiles.R similarity index 95% rename from R/import.R rename to R/readEuCodedFiles.R index 3edbb7e..248c16d 100644 --- a/R/import.R +++ b/R/readEuCodedFiles.R @@ -35,18 +35,19 @@ readEuCodedFiles <- function( inspectionData <- try(readEuCodedFile(input.file, dbg = dbg, ...)) - # Skip the following if an error occurred - if (! kwb.utils::isTryError(inspectionData)) { - - # Append inspection data to result list - filename <- basename(input.file) - - if (append.file.names) { - inspectionData$inspections$file <- filename - } - - inspectionData + # Return NULL if an error occurred + if (kwb.utils::isTryError(inspectionData)) { + return(NULL) } + + if (append.file.names) { + inspectionData$inspections <- setFilename( + inspectionData$inspections, + basename(input.file) + ) + } + + inspectionData }) failed <- sapply(result, is.null) @@ -325,3 +326,10 @@ getValueFromKeyValueString <- function(keyvalue) { sapply(strsplit(keyvalue, "="), "[", 2L) } + +# setFilename ------------------------------------------------------------------ +setFilename <- function(data, name) +{ + data[["file"]] <- name + kwb.utils::moveColumnsToFront(data, "file") +} diff --git a/man/readEuCodedFile.Rd b/man/readEuCodedFile.Rd index 0c7411b..52a7bca 100644 --- a/man/readEuCodedFile.Rd +++ b/man/readEuCodedFile.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/import.R +% Please edit documentation in R/readEuCodedFiles.R \name{readEuCodedFile} \alias{readEuCodedFile} \title{Read CCTV Inspection Data in EN13508-2 Format} diff --git a/man/readEuCodedFiles.Rd b/man/readEuCodedFiles.Rd index 31c71d0..2efafe7 100644 --- a/man/readEuCodedFiles.Rd +++ b/man/readEuCodedFiles.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/import.R +% Please edit documentation in R/readEuCodedFiles.R \name{readEuCodedFiles} \alias{readEuCodedFiles} \title{Read Multiple CCTV Inspection Files} From 411a83d98b05a0a0a5072975219831c7bac94275 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 18:39:31 +0200 Subject: [PATCH 033/141] Use kwb.utils::orderBy() --- R/plot.R | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/R/plot.R b/R/plot.R index 54913cd..5a234f6 100644 --- a/R/plot.R +++ b/R/plot.R @@ -123,7 +123,7 @@ get_extended_observations <- function(survey) key_columns <- c("inspno", "I", code_columns) - obs <- order_by(obs, key_columns) + obs <- kwb.utils::orderBy(obs, key_columns) kwb.utils::moveColumnsToFront(obs, columns = c(key_columns, "ldid", "ldidno")) } @@ -164,14 +164,6 @@ get_code_meanings <- function() kwb.utils::renameColumns(code_meanings, renamings) } -# order_by --------------------------------------------------------------------- -order_by <- function(df, columns) -{ - keys <- kwb.utils::selectColumns(df, columns) - - kwb.utils::resetRowNames(df[do.call(order, keys), ]) -} - # remove_point_damages --------------------------------------------------------- remove_point_damages <- function(x) { From 783a938b6a79f610a5451ac6dbc7c946473c3eee Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 18:42:36 +0200 Subject: [PATCH 034/141] Rename get_code_meanings() to getCodeMeanings() --- R/plot.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/plot.R b/R/plot.R index 5a234f6..5ef7015 100644 --- a/R/plot.R +++ b/R/plot.R @@ -105,7 +105,7 @@ get_extended_observations <- function(survey) obs <- kwb.utils::selectColumns(observations, obs_columns) # Add start and end positions and code meanings to observations - codes <- get_code_meanings() + codes <- getCodeMeanings() obs <- merge(obs, codes, by.x = "A", by.y = "Code", all.x = TRUE) obs <- merge(obs, ins, by = "inspno") @@ -146,8 +146,8 @@ get_extreme_positions <- function(inspections) } -# get_code_meanings ------------------------------------------------------------ -get_code_meanings <- function() +# getCodeMeanings -------------------------------------------------------------- +getCodeMeanings <- function() { renamings <- list( Table = "CodeTable", From cbd4fa3ef3e41f41407edee16617c91647397037 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Aug 2022 23:33:39 +0200 Subject: [PATCH 035/141] Clean export.R --- R/export.R | 215 ++++++++++++++++++---------------------- man/writeEuCodedFile.Rd | 2 +- 2 files changed, 98 insertions(+), 119 deletions(-) diff --git a/R/export.R b/R/export.R index 4577e92..8672d9d 100644 --- a/R/export.R +++ b/R/export.R @@ -17,13 +17,21 @@ utils::globalVariables(c("buffer")) # see toEuFormat_v1 #' @export #' euCodedFileHeader <- function( - separator = ";", decimal = ".", quote = '"', encoding = "ISO-8859-1", - language = "en", year = 2010 + separator = ";", + decimal = ".", + quote = '"', + encoding = "ISO-8859-1", + language = "en", + year = 2010L ) { list( - separator = separator, decimal = decimal, quote = quote, - encoding = encoding, language = language, year = year + separator = separator, + decimal = decimal, + quote = quote, + encoding = encoding, + language = language, + year = year ) } @@ -50,41 +58,44 @@ euCodedFileHeader <- function( #' writeEuCodedFiles <- function(survey, file, blocksize = 100, dbg = TRUE) { - stopifnot(kwb.utils::stringEndsWith(file, ".txt")) + stopifnot(endsWith(file, ".txt")) - N <- nrow(get_elements(survey, "inspections")) + header.info <- get_elements(survey, "header.info") + inspections <- get_elements(survey, "inspections") + observations <- get_elements(survey, "observations") + + inspnos <- get_columns(observations, "inspno") + + N <- nrow(inspections) + + # Function to create output file name + fileToOutputFile <- function(i, j) { + pattern <- paste0("%0", nchar(N), "d") + postfix <- sprintf(paste0("_", pattern, "_", pattern, ".txt"), i, j) + gsub("\\.txt$", postfix, file) + } for (block_number in seq_len(ceiling(N / blocksize))) { - i <- (block_number - 1) * blocksize + 1 - + i <- (block_number - 1L) * blocksize + 1L j <- min(block_number * blocksize, N) - pattern <- paste0("%0", nchar(N), "d") - - postfix <- sprintf(paste0("_", pattern, "_", pattern, ".txt"), i, j) - - output_file <- gsub("\\.txt$", postfix, file) - - kwb.utils::.logstart(dbg, "Writing", output_file) + output_file <- fileToOutputFile(file, i, j) - inspno <- get_columns(get_elements(survey, "observations"), "inspno") - - # Select inspections with numbers between i and j and select the - # corresponding observations - selected <- kwb.utils::inRange(inspno, i, j) - - writeEuCodedFile( - inspection.data = list( - header.info = get_elements(survey, "header.info"), - inspections = get_elements(survey, "inspections")[i:j, ], - observations = get_elements(survey, "observations")[selected, ] - ), - output.file = output_file, - dbg = dbg - ) - - kwb.utils::.logok(dbg) + kwb.utils::catAndRun(dbg = dbg, paste("Writing", output_file), { + + writeEuCodedFile( + inspection.data = list( + header.info = header.info, + # Select inspections with numbers between i and j + inspections = inspections[i:j, ], + # Select the corresponding observations + observations = observations[kwb.utils::inRange(inspnos, i, j), ] + ), + output.file = output_file, + dbg = dbg + ) + }) } } @@ -112,44 +123,39 @@ writeEuCodedFiles <- function(survey, file, blocksize = 100, dbg = TRUE) #' @export #' writeEuCodedFile <- function( - inspection.data, output.file = NULL, version = 3, dbg = TRUE, ... + inspection.data, output.file = NULL, version = 3L, dbg = TRUE, ... ) { #kwb.utils::assignPackageObjects("kwb.en13508.2") - kwb.utils::.logstart(dbg, "Formatting lines") - - output.lines <- if (version == 1) { - - toEuFormat_v1( - header.info = get_elements(inspection.data, "header.info"), - inspections = get_elements(inspection.data, "inspections"), - observations = get_elements(inspection.data, "observations") - ) - - } else if (version == 2) { + output.lines <- kwb.utils::catAndRun(dbg = dbg, "Formatting lines", { - toEuFormat_v2(inspection.data, mycsv = FALSE, ...) - - } else { + if (version == 1L) { + + toEuFormat_v1( + header.info = get_elements(inspection.data, "header.info"), + inspections = get_elements(inspection.data, "inspections"), + observations = get_elements(inspection.data, "observations") + ) + + } else if (version == 2L) { + + toEuFormat_v2(inspection.data, mycsv = FALSE, ...) + + } else { + + toEuFormat_v2(inspection.data, mycsv = TRUE, ...) + } - toEuFormat_v2(inspection.data, mycsv = TRUE, ...) - } + }) - kwb.utils::.logok(dbg) - - if (! is.null(output.file)) { - - kwb.utils::.logstart(dbg, "Writing lines to", output.file) + if (is.null(output.file)) { + return(output.lines) + } + kwb.utils::catAndRun(dbg = dbg, paste("Writing lines to", output.file), { writeLines(output.lines, output.file) - - kwb.utils::.logok(dbg) - - } else { - - output.lines - } + }) } # toEuFormat_v1 ---------------------------------------------------------------- @@ -170,23 +176,19 @@ toEuFormat_v1 <- function(header.info, inspections, observations) sep <- header.info$separator # Save the inspection numbers - inspno <- get_columns(observations, "inspno") + inspnos <- get_columns(observations, "inspno") # Remove the column containing the inspection numbers observations <- kwb.utils::removeColumns(observations, "inspno") tc <- textConnection("buffer", "w") + on.exit(close(tc)) #kwb.utils::assignPackageObjects("kwb.en13508.2") writeLines(getHeaderLinesFromHeaderInfo(header.info), tc) - insp.header.line <- inspectionHeaderLine( - header.fields = names(inspections), separator = sep - ) - - obs.header.line <- observationHeaderLine( - header.fields = names(observations), separator = sep - ) + insp.header.line <- inspectionHeaderLine(names(inspections), sep) + obs.header.line <- observationHeaderLine(names(observations), sep) insp.numbers <- rownames(inspections) @@ -199,10 +201,10 @@ toEuFormat_v1 <- function(header.info, inspections, observations) ) # Get index ranges of inspections (see kwb.event::hsEventsOnChange()) - n_obs <- length(inspno) - change_index <- which(inspno[1:(n_obs - 1)] != inspno[2:n_obs]) + 1 - begin_index <- c(1, change_index) - end_index = c(change_index - 1, n_obs) + n_obs <- length(inspnos) + change_index <- which(inspnos[1:(n_obs - 1L)] != inspnos[2:n_obs]) + 1L + begin_index <- c(1L, change_index) + end_index = c(change_index - 1L, n_obs) # Loop through the inspections for (i in seq_len(n)) { @@ -210,9 +212,7 @@ toEuFormat_v1 <- function(header.info, inspections, observations) kwb.utils::catIf(i %% 100 == 0, "i =", i, "\n") writeLines(insp.header.line, tc) - write_table(inspections[i, ], tc, sep) - writeLines(obs.header.line, tc) indices <- begin_index[i]:end_index[i] @@ -220,13 +220,10 @@ toEuFormat_v1 <- function(header.info, inspections, observations) write_table(observations[indices, ], tc, sep) if (i < n) { - writeLines("#Z", tc) } } - close(tc) - buffer } @@ -239,36 +236,23 @@ toEuFormat_v1 <- function(header.info, inspections, observations) #' getHeaderLinesFromHeaderInfo <- function(header.info) { - columns <- c("encoding", "language", "separator", "decimal", "quote", "year") + elements <- c("encoding", "language", "separator", "decimal", "quote", "year") - kwb.utils::checkForMissingColumns(header.info, columns) - - values <- c( - header.info$encoding, - header.info$language, - header.info$separator, - header.info$decimal, - header.info$quote, - header.info$year - ) + values <- unlist(kwb.utils::selectElements(header.info, elements)) paste(sprintf("#A%d", seq_along(values)), values, sep = "=") } # inspectionHeaderLine --------------------------------------------------------- - -inspectionHeaderLine <- function(header.fields, separator) +inspectionHeaderLine <- function(header.fields, sep) { - sprintf("#B01=%s", paste(header.fields, collapse = separator)) + sprintf("#B01=%s", paste(header.fields, collapse = sep)) } # observationHeaderLine -------------------------------------------------------- - -observationHeaderLine <- function(header.fields, separator) +observationHeaderLine <- function(header.fields, sep) { - sprintf("#C=%s", paste( - header.fields[header.fields != "inspno"], collapse = separator - )) + sprintf("#C=%s", paste(setdiff(header.fields, "inspno"), collapse = sep)) } # toEuFormat_v2 ---------------------------------------------------------------- @@ -322,29 +306,24 @@ toEuFormat_v2 <- function(inspection.data, mycsv, ...) # Number of C-blocks (= number of inspections) n <- length(c_sizes) - cat("ok.\n Writing B-blocks (inspection data) ... ") - - b_indices <- offset + 1 + 4 * (seq_len(n) - 1) + c(0, c_sizes[-n]) - - out_lines[b_indices] <- inspectionHeaderLine(names(inspections), sep) - - out_lines[b_indices + 1] <- to_csv(inspections) - - cat("ok.\n Writing C-blocks (observation data) ... ") - - out_lines[b_indices + 2] <- observationHeaderLine(names(observations), sep) - - z_indices <- b_indices[-1] - 1 - - skip_indices <- c(b_indices, b_indices + 1, b_indices + 2, z_indices) - - n_rows <- c_sizes[n] + 4 * n - 1 + offset - - c_body_indices <- setdiff(seq(offset + 1, n_rows), skip_indices) + cat("ok.\n") - out_lines[c_body_indices] <- to_csv(observations) + kwb.utils::catAndRun(" Writing B-blocks (inspection data) ... ", { + + b_indices <- offset + 1L + 4 * (seq_len(n) - 1L) + c(0L, c_sizes[-n]) + out_lines[b_indices] <- inspectionHeaderLine(names(inspections), sep) + out_lines[b_indices + 1L] <- to_csv(inspections) + }) - cat("ok.\n") + kwb.utils::catAndRun(" Writing C-blocks (observation data) ... ", { + + out_lines[b_indices + 2L] <- observationHeaderLine(names(observations), sep) + z_indices <- b_indices[-1L] - 1L + skip_indices <- c(b_indices, b_indices + 1L, b_indices + 2L, z_indices) + n_rows <- c_sizes[n] + 4 * n - 1L + offset + c_body_indices <- setdiff(seq(offset + 1L, n_rows), skip_indices) + out_lines[c_body_indices] <- to_csv(observations) + }) out_lines[z_indices] <- "#Z" diff --git a/man/writeEuCodedFile.Rd b/man/writeEuCodedFile.Rd index 642a019..6ac193f 100644 --- a/man/writeEuCodedFile.Rd +++ b/man/writeEuCodedFile.Rd @@ -7,7 +7,7 @@ writeEuCodedFile( inspection.data, output.file = NULL, - version = 3, + version = 3L, dbg = TRUE, ... ) From 6ef008dced7f1b0fff43f7d6e59621473234b184 Mon Sep 17 00:00:00 2001 From: hsonne Date: Wed, 3 Aug 2022 08:19:24 +0200 Subject: [PATCH 036/141] Rename export.R to writeEuCodedFiles.R according to the main function contained --- R/{export.R => writeEuCodedFiles.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{export.R => writeEuCodedFiles.R} (100%) diff --git a/R/export.R b/R/writeEuCodedFiles.R similarity index 100% rename from R/export.R rename to R/writeEuCodedFiles.R From 3a077bb06b106cb61f0b30bb0949b865b1061894 Mon Sep 17 00:00:00 2001 From: hsonne Date: Wed, 3 Aug 2022 08:21:59 +0200 Subject: [PATCH 037/141] Rename N to n_inspections --- R/writeEuCodedFiles.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/writeEuCodedFiles.R b/R/writeEuCodedFiles.R index 8672d9d..354cad9 100644 --- a/R/writeEuCodedFiles.R +++ b/R/writeEuCodedFiles.R @@ -66,19 +66,19 @@ writeEuCodedFiles <- function(survey, file, blocksize = 100, dbg = TRUE) inspnos <- get_columns(observations, "inspno") - N <- nrow(inspections) + n_inspections <- nrow(inspections) # Function to create output file name fileToOutputFile <- function(i, j) { - pattern <- paste0("%0", nchar(N), "d") + pattern <- paste0("%0", nchar(n_inspections), "d") postfix <- sprintf(paste0("_", pattern, "_", pattern, ".txt"), i, j) gsub("\\.txt$", postfix, file) } - for (block_number in seq_len(ceiling(N / blocksize))) { + for (block_number in seq_len(ceiling(n_inspections / blocksize))) { i <- (block_number - 1L) * blocksize + 1L - j <- min(block_number * blocksize, N) + j <- min(block_number * blocksize, n_inspections) output_file <- fileToOutputFile(file, i, j) From 3c9b456a6337ef23c9124842adef97da25e3ac0a Mon Sep 17 00:00:00 2001 From: hsonne Date: Wed, 3 Aug 2022 08:39:16 +0200 Subject: [PATCH 038/141] Move functions to extra files --- R/euCodedFileHeader.R | 34 +++++++ R/toEuFormat.R | 173 ++++++++++++++++++++++++++++++++++ R/writeEuCodedFiles.R | 211 +----------------------------------------- 3 files changed, 208 insertions(+), 210 deletions(-) create mode 100644 R/euCodedFileHeader.R create mode 100644 R/toEuFormat.R diff --git a/R/euCodedFileHeader.R b/R/euCodedFileHeader.R new file mode 100644 index 0000000..77e6252 --- /dev/null +++ b/R/euCodedFileHeader.R @@ -0,0 +1,34 @@ +# euCodedFileHeader ------------------------------------------------------------ + +#' Generate List With EU Header Information +#' +#' @param separator default: ";" +#' @param decimal default: "." +#' @param quote default: '"' +#' @param encoding default: "ISO-8859-1" +#' @param language default: "en" +#' @param year default: 2010 +#' +#' @return list with elements \code{separator}, \code{decimal}, \code{quote}, +#' \code{encoding}, \code{language}, \code{year} +#' +#' @export +#' +euCodedFileHeader <- function( + separator = ";", + decimal = ".", + quote = '"', + encoding = "ISO-8859-1", + language = "en", + year = 2010L +) +{ + list( + separator = separator, + decimal = decimal, + quote = quote, + encoding = encoding, + language = language, + year = year + ) +} diff --git a/R/toEuFormat.R b/R/toEuFormat.R new file mode 100644 index 0000000..a44debe --- /dev/null +++ b/R/toEuFormat.R @@ -0,0 +1,173 @@ +utils::globalVariables(c("buffer")) # see toEuFormat_v1 + +# toEuFormat_v1 ---------------------------------------------------------------- + +#' Generate Lines in EU Export Format (v1) +#' +#' Generate lines in EU export format (version 1: slow) +#' +#' @param header.info according to list element "header.info" of list returned +#' by \code{\link{readEuCodedFile}} +#' @param inspections according to list element "inspections" of list returned +#' by \code{\link{readEuCodedFile}} +#' @param observations according to list element "observations" of list returned +#' by \code{\link{readEuCodedFile}} +#' +toEuFormat_v1 <- function(header.info, inspections, observations) +{ + sep <- header.info$separator + + # Save the inspection numbers + inspnos <- get_columns(observations, "inspno") + + # Remove the column containing the inspection numbers + observations <- kwb.utils::removeColumns(observations, "inspno") + + tc <- textConnection("buffer", "w") + on.exit(close(tc)) + + #kwb.utils::assignPackageObjects("kwb.en13508.2") + writeLines(getHeaderLinesFromHeaderInfo(header.info), tc) + + insp.header.line <- inspectionHeaderLine(names(inspections), sep) + obs.header.line <- observationHeaderLine(names(observations), sep) + + insp.numbers <- rownames(inspections) + + n <- nrow(inspections) + + # Define helper function + write_table <- function(x, tc, sep) utils::write.table( + x, tc, sep = sep, col.names = FALSE, row.names = FALSE, append = TRUE, + na = "" + ) + + # Get index ranges of inspections (see kwb.event::hsEventsOnChange()) + n_obs <- length(inspnos) + change_index <- which(inspnos[1:(n_obs - 1L)] != inspnos[2:n_obs]) + 1L + begin_index <- c(1L, change_index) + end_index = c(change_index - 1L, n_obs) + + # Loop through the inspections + for (i in seq_len(n)) { + + kwb.utils::catIf(i %% 100 == 0, "i =", i, "\n") + + writeLines(insp.header.line, tc) + write_table(inspections[i, ], tc, sep) + writeLines(obs.header.line, tc) + + indices <- begin_index[i]:end_index[i] + + write_table(observations[indices, ], tc, sep) + + if (i < n) { + writeLines("#Z", tc) + } + } + + buffer +} + +# getHeaderLinesFromHeaderInfo ------------------------------------------------- + +#' Get Header Lines From Header Info +#' +#' @param header.info list with elements \code{encoding}, \code{language}, +#' \code{separator}, \code{decimal}, \code{quote}, \code{year} +#' +getHeaderLinesFromHeaderInfo <- function(header.info) +{ + elements <- c("encoding", "language", "separator", "decimal", "quote", "year") + + values <- unlist(kwb.utils::selectElements(header.info, elements)) + + paste(sprintf("#A%d", seq_along(values)), values, sep = "=") +} + +# inspectionHeaderLine --------------------------------------------------------- +inspectionHeaderLine <- function(header.fields, sep) +{ + sprintf("#B01=%s", paste(header.fields, collapse = sep)) +} + +# observationHeaderLine -------------------------------------------------------- +observationHeaderLine <- function(header.fields, sep) +{ + sprintf("#C=%s", paste(setdiff(header.fields, "inspno"), collapse = sep)) +} + +# toEuFormat_v2 ---------------------------------------------------------------- + +#' Generate Lines in EU Export Format (v2) +#' +#' Generate lines in EU export format (version 2: faster than version 1) +#' +#' @param inspection.data inspection data as retrieved by e.g. +#' \code{\link{readEuCodedFile}} +#' @param mycsv logical. If TRUE "my" version of writing CSV is used (fast), +#' otherwise CSV is written by means of write.table (slow) +#' @param \dots further arguments passed to dataFrameContentToTextLines +#' +toEuFormat_v2 <- function(inspection.data, mycsv, ...) +{ + #kwb.utils::assignPackageObjects("kwb.en13508.2") + + # Provide list elements in variables header_info, inspections, observations + header_info <- get_elements(inspection.data, "header.info") + inspections <- get_elements(inspection.data, "inspections") + observations <- get_elements(inspection.data, "observations") + + # Save the inspection numbers in inspno + inspection_numbers <- get_columns(observations, "inspno") + + # Remove the inspection numbers + observations <- kwb.utils::removeColumns(observations, "inspno") + + # Build argument list for calling dataFrameContentToTextLines + elements <- c(sep = "separator", dec = "decimal", qchar = "quote") + arguments <- get_elements(header_info, elements) + + # Save the separator in its own variable for reusage + sep <- arguments$sep + + # Helper function to create CSV lines + to_csv <- function(x) do.call(dataFrameContentToTextLines, c( + arguments, list(dframe = x, na = "", mycsv = mycsv, ...) + )) + + # Start the output lines with the A-block + out_lines <- getHeaderLinesFromHeaderInfo(header_info) + + # Offset of further rows + offset <- length(out_lines) + + # Cumulative sizes (number of lines) of the C-blocks + c_sizes <- cumsum(unname(table(inspection_numbers))) + + # Number of C-blocks (= number of inspections) + n <- length(c_sizes) + + cat("ok.\n") + + kwb.utils::catAndRun(" Writing B-blocks (inspection data) ... ", { + + b_indices <- offset + 1L + 4 * (seq_len(n) - 1L) + c(0L, c_sizes[-n]) + out_lines[b_indices] <- inspectionHeaderLine(names(inspections), sep) + out_lines[b_indices + 1L] <- to_csv(inspections) + }) + + kwb.utils::catAndRun(" Writing C-blocks (observation data) ... ", { + + out_lines[b_indices + 2L] <- observationHeaderLine(names(observations), sep) + z_indices <- b_indices[-1L] - 1L + skip_indices <- c(b_indices, b_indices + 1L, b_indices + 2L, z_indices) + n_rows <- c_sizes[n] + 4 * n - 1L + offset + c_body_indices <- setdiff(seq(offset + 1L, n_rows), skip_indices) + out_lines[c_body_indices] <- to_csv(observations) + }) + + out_lines[z_indices] <- "#Z" + + out_lines +} diff --git a/R/writeEuCodedFiles.R b/R/writeEuCodedFiles.R index 354cad9..21d4e08 100644 --- a/R/writeEuCodedFiles.R +++ b/R/writeEuCodedFiles.R @@ -1,40 +1,3 @@ -utils::globalVariables(c("buffer")) # see toEuFormat_v1 - -# euCodedFileHeader ------------------------------------------------------------ - -#' Generate List With EU Header Information -#' -#' @param separator default: ";" -#' @param decimal default: "." -#' @param quote default: '"' -#' @param encoding default: "ISO-8859-1" -#' @param language default: "en" -#' @param year default: 2010 -#' -#' @return list with elements \code{separator}, \code{decimal}, \code{quote}, -#' \code{encoding}, \code{language}, \code{year} -#' -#' @export -#' -euCodedFileHeader <- function( - separator = ";", - decimal = ".", - quote = '"', - encoding = "ISO-8859-1", - language = "en", - year = 2010L -) -{ - list( - separator = separator, - decimal = decimal, - quote = quote, - encoding = encoding, - language = language, - year = year - ) -} - # writeEuCodedFiles ------------------------------------------------------------ #' Write Inspection Data to Files in EU Format @@ -68,7 +31,7 @@ writeEuCodedFiles <- function(survey, file, blocksize = 100, dbg = TRUE) n_inspections <- nrow(inspections) - # Function to create output file name + # Function to create output file name indicating first and last inspection fileToOutputFile <- function(i, j) { pattern <- paste0("%0", nchar(n_inspections), "d") postfix <- sprintf(paste0("_", pattern, "_", pattern, ".txt"), i, j) @@ -157,175 +120,3 @@ writeEuCodedFile <- function( writeLines(output.lines, output.file) }) } - -# toEuFormat_v1 ---------------------------------------------------------------- - -#' Generate Lines in EU Export Format (v1) -#' -#' Generate lines in EU export format (version 1: slow) -#' -#' @param header.info according to list element "header.info" of list returned -#' by \code{\link{readEuCodedFile}} -#' @param inspections according to list element "inspections" of list returned -#' by \code{\link{readEuCodedFile}} -#' @param observations according to list element "observations" of list returned -#' by \code{\link{readEuCodedFile}} -#' -toEuFormat_v1 <- function(header.info, inspections, observations) -{ - sep <- header.info$separator - - # Save the inspection numbers - inspnos <- get_columns(observations, "inspno") - - # Remove the column containing the inspection numbers - observations <- kwb.utils::removeColumns(observations, "inspno") - - tc <- textConnection("buffer", "w") - on.exit(close(tc)) - - #kwb.utils::assignPackageObjects("kwb.en13508.2") - writeLines(getHeaderLinesFromHeaderInfo(header.info), tc) - - insp.header.line <- inspectionHeaderLine(names(inspections), sep) - obs.header.line <- observationHeaderLine(names(observations), sep) - - insp.numbers <- rownames(inspections) - - n <- nrow(inspections) - - # Define helper function - write_table <- function(x, tc, sep) utils::write.table( - x, tc, sep = sep, col.names = FALSE, row.names = FALSE, append = TRUE, - na = "" - ) - - # Get index ranges of inspections (see kwb.event::hsEventsOnChange()) - n_obs <- length(inspnos) - change_index <- which(inspnos[1:(n_obs - 1L)] != inspnos[2:n_obs]) + 1L - begin_index <- c(1L, change_index) - end_index = c(change_index - 1L, n_obs) - - # Loop through the inspections - for (i in seq_len(n)) { - - kwb.utils::catIf(i %% 100 == 0, "i =", i, "\n") - - writeLines(insp.header.line, tc) - write_table(inspections[i, ], tc, sep) - writeLines(obs.header.line, tc) - - indices <- begin_index[i]:end_index[i] - - write_table(observations[indices, ], tc, sep) - - if (i < n) { - writeLines("#Z", tc) - } - } - - buffer -} - -# getHeaderLinesFromHeaderInfo ------------------------------------------------- - -#' Get Header Lines From Header Info -#' -#' @param header.info list with elements \code{encoding}, \code{language}, -#' \code{separator}, \code{decimal}, \code{quote}, \code{year} -#' -getHeaderLinesFromHeaderInfo <- function(header.info) -{ - elements <- c("encoding", "language", "separator", "decimal", "quote", "year") - - values <- unlist(kwb.utils::selectElements(header.info, elements)) - - paste(sprintf("#A%d", seq_along(values)), values, sep = "=") -} - -# inspectionHeaderLine --------------------------------------------------------- -inspectionHeaderLine <- function(header.fields, sep) -{ - sprintf("#B01=%s", paste(header.fields, collapse = sep)) -} - -# observationHeaderLine -------------------------------------------------------- -observationHeaderLine <- function(header.fields, sep) -{ - sprintf("#C=%s", paste(setdiff(header.fields, "inspno"), collapse = sep)) -} - -# toEuFormat_v2 ---------------------------------------------------------------- - -#' Generate Lines in EU Export Format (v2) -#' -#' Generate lines in EU export format (version 2: faster than version 1) -#' -#' @param inspection.data inspection data as retrieved by e.g. -#' \code{\link{readEuCodedFile}} -#' @param mycsv logical. If TRUE "my" version of writing CSV is used (fast), -#' otherwise CSV is written by means of write.table (slow) -#' @param \dots further arguments passed to dataFrameContentToTextLines -#' -toEuFormat_v2 <- function(inspection.data, mycsv, ...) -{ - #kwb.utils::assignPackageObjects("kwb.en13508.2") - - # Provide list elements in variables header_info, inspections, observations - header_info <- get_elements(inspection.data, "header.info") - inspections <- get_elements(inspection.data, "inspections") - observations <- get_elements(inspection.data, "observations") - - # Save the inspection numbers in inspno - inspection_numbers <- get_columns(observations, "inspno") - - # Remove the inspection numbers - observations <- kwb.utils::removeColumns(observations, "inspno") - - # Build argument list for calling dataFrameContentToTextLines - elements <- c(sep = "separator", dec = "decimal", qchar = "quote") - arguments <- get_elements(header_info, elements) - - # Save the separator in its own variable for reusage - sep <- arguments$sep - - # Helper function to create CSV lines - to_csv <- function(x) do.call(dataFrameContentToTextLines, c( - arguments, list(dframe = x, na = "", mycsv = mycsv, ...) - )) - - # Start the output lines with the A-block - out_lines <- getHeaderLinesFromHeaderInfo(header_info) - - # Offset of further rows - offset <- length(out_lines) - - # Cumulative sizes (number of lines) of the C-blocks - c_sizes <- cumsum(unname(table(inspection_numbers))) - - # Number of C-blocks (= number of inspections) - n <- length(c_sizes) - - cat("ok.\n") - - kwb.utils::catAndRun(" Writing B-blocks (inspection data) ... ", { - - b_indices <- offset + 1L + 4 * (seq_len(n) - 1L) + c(0L, c_sizes[-n]) - out_lines[b_indices] <- inspectionHeaderLine(names(inspections), sep) - out_lines[b_indices + 1L] <- to_csv(inspections) - }) - - kwb.utils::catAndRun(" Writing C-blocks (observation data) ... ", { - - out_lines[b_indices + 2L] <- observationHeaderLine(names(observations), sep) - z_indices <- b_indices[-1L] - 1L - skip_indices <- c(b_indices, b_indices + 1L, b_indices + 2L, z_indices) - n_rows <- c_sizes[n] + 4 * n - 1L + offset - c_body_indices <- setdiff(seq(offset + 1L, n_rows), skip_indices) - out_lines[c_body_indices] <- to_csv(observations) - }) - - out_lines[z_indices] <- "#Z" - - out_lines -} From 9e4bb155817e144bcb9db744d2eb0ebfb143999c Mon Sep 17 00:00:00 2001 From: hsonne Date: Wed, 3 Aug 2022 09:03:02 +0200 Subject: [PATCH 039/141] Add toEuFormat() --- R/toEuFormat.R | 26 ++++++++++++++++++++++++++ R/writeEuCodedFiles.R | 19 +------------------ 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/R/toEuFormat.R b/R/toEuFormat.R index a44debe..38ba787 100644 --- a/R/toEuFormat.R +++ b/R/toEuFormat.R @@ -1,5 +1,31 @@ utils::globalVariables(c("buffer")) # see toEuFormat_v1 +#' Generate Lines in EU Export Format +#' +#' @param inspection.data inspection data as retrieved by e.g. +#' \code{\link{readEuCodedFile}} +#' @param version version of implementation. One of \code{c(1, 2, 3)} +#' @param \dots passed to \code{toEuFormat_v2} +toEuFormat <- function(inspection.data, version = 3L) +{ + if (version == 1L) { + + toEuFormat_v1( + header.info = get_elements(inspection.data, "header.info"), + inspections = get_elements(inspection.data, "inspections"), + observations = get_elements(inspection.data, "observations") + ) + + } else if (version == 2L) { + + toEuFormat_v2(inspection.data, mycsv = FALSE, ...) + + } else { + + toEuFormat_v2(inspection.data, mycsv = TRUE, ...) + } +} + # toEuFormat_v1 ---------------------------------------------------------------- #' Generate Lines in EU Export Format (v1) diff --git a/R/writeEuCodedFiles.R b/R/writeEuCodedFiles.R index 21d4e08..81817b9 100644 --- a/R/writeEuCodedFiles.R +++ b/R/writeEuCodedFiles.R @@ -92,24 +92,7 @@ writeEuCodedFile <- function( #kwb.utils::assignPackageObjects("kwb.en13508.2") output.lines <- kwb.utils::catAndRun(dbg = dbg, "Formatting lines", { - - if (version == 1L) { - - toEuFormat_v1( - header.info = get_elements(inspection.data, "header.info"), - inspections = get_elements(inspection.data, "inspections"), - observations = get_elements(inspection.data, "observations") - ) - - } else if (version == 2L) { - - toEuFormat_v2(inspection.data, mycsv = FALSE, ...) - - } else { - - toEuFormat_v2(inspection.data, mycsv = TRUE, ...) - } - + toEuFormat(inspection.data, version) }) if (is.null(output.file)) { From 0295fe96e79b13cdfc01051174de2636e838123b Mon Sep 17 00:00:00 2001 From: hsonne Date: Wed, 3 Aug 2022 09:07:56 +0200 Subject: [PATCH 040/141] Clean toEuFormat.R, improve names --- R/toEuFormat.R | 58 ++++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/R/toEuFormat.R b/R/toEuFormat.R index 38ba787..cb3327a 100644 --- a/R/toEuFormat.R +++ b/R/toEuFormat.R @@ -41,8 +41,6 @@ toEuFormat <- function(inspection.data, version = 3L) #' toEuFormat_v1 <- function(header.info, inspections, observations) { - sep <- header.info$separator - # Save the inspection numbers inspnos <- get_columns(observations, "inspno") @@ -55,16 +53,23 @@ toEuFormat_v1 <- function(header.info, inspections, observations) #kwb.utils::assignPackageObjects("kwb.en13508.2") writeLines(getHeaderLinesFromHeaderInfo(header.info), tc) + sep <- header.info$separator + insp.header.line <- inspectionHeaderLine(names(inspections), sep) obs.header.line <- observationHeaderLine(names(observations), sep) insp.numbers <- rownames(inspections) - n <- nrow(inspections) + n_inspections <- nrow(inspections) # Define helper function - write_table <- function(x, tc, sep) utils::write.table( - x, tc, sep = sep, col.names = FALSE, row.names = FALSE, append = TRUE, + write_table <- function(x) utils::write.table( + x, + file = tc, + sep = sep, + col.names = FALSE, + row.names = FALSE, + append = TRUE, na = "" ) @@ -75,19 +80,19 @@ toEuFormat_v1 <- function(header.info, inspections, observations) end_index = c(change_index - 1L, n_obs) # Loop through the inspections - for (i in seq_len(n)) { + for (i in seq_len(n_inspections)) { - kwb.utils::catIf(i %% 100 == 0, "i =", i, "\n") + kwb.utils::catIf(i %% 100 == 0L, "i =", i, "\n") writeLines(insp.header.line, tc) - write_table(inspections[i, ], tc, sep) + write_table(inspections[i, ]) writeLines(obs.header.line, tc) indices <- begin_index[i]:end_index[i] - write_table(observations[indices, ], tc, sep) + write_table(observations[indices, ]) - if (i < n) { + if (i < n_inspections) { writeLines("#Z", tc) } } @@ -144,8 +149,8 @@ toEuFormat_v2 <- function(inspection.data, mycsv, ...) inspections <- get_elements(inspection.data, "inspections") observations <- get_elements(inspection.data, "observations") - # Save the inspection numbers in inspno - inspection_numbers <- get_columns(observations, "inspno") + # Save the inspection numbers in inspnos + inspnos <- get_columns(observations, "inspno") # Remove the inspection numbers observations <- kwb.utils::removeColumns(observations, "inspno") @@ -154,7 +159,7 @@ toEuFormat_v2 <- function(inspection.data, mycsv, ...) elements <- c(sep = "separator", dec = "decimal", qchar = "quote") arguments <- get_elements(header_info, elements) - # Save the separator in its own variable for reusage + # Save the separator in its own variable sep <- arguments$sep # Helper function to create CSV lines @@ -169,31 +174,34 @@ toEuFormat_v2 <- function(inspection.data, mycsv, ...) offset <- length(out_lines) # Cumulative sizes (number of lines) of the C-blocks - c_sizes <- cumsum(unname(table(inspection_numbers))) + c_sizes <- cumsum(unname(table(inspnos))) # Number of C-blocks (= number of inspections) - n <- length(c_sizes) + n_inspections <- length(c_sizes) cat("ok.\n") kwb.utils::catAndRun(" Writing B-blocks (inspection data) ... ", { - b_indices <- offset + 1L + 4 * (seq_len(n) - 1L) + c(0L, c_sizes[-n]) - out_lines[b_indices] <- inspectionHeaderLine(names(inspections), sep) - out_lines[b_indices + 1L] <- to_csv(inspections) + b_at <- offset + 1L + + 4L * (seq_len(n_inspections) - 1L) + + c(0L, c_sizes[-n_inspections]) + + out_lines[b_at] <- inspectionHeaderLine(names(inspections), sep) + out_lines[b_at + 1L] <- to_csv(inspections) }) kwb.utils::catAndRun(" Writing C-blocks (observation data) ... ", { - out_lines[b_indices + 2L] <- observationHeaderLine(names(observations), sep) - z_indices <- b_indices[-1L] - 1L - skip_indices <- c(b_indices, b_indices + 1L, b_indices + 2L, z_indices) - n_rows <- c_sizes[n] + 4 * n - 1L + offset - c_body_indices <- setdiff(seq(offset + 1L, n_rows), skip_indices) - out_lines[c_body_indices] <- to_csv(observations) + out_lines[b_at + 2L] <- observationHeaderLine(names(observations), sep) + z_at <- b_at[-1L] - 1L + skip_indices <- c(b_at, b_at + 1L, b_at + 2L, z_at) + n_rows <- c_sizes[n_inspections] + 4 * n_inspections - 1L + offset + c_body_at <- setdiff(seq(offset + 1L, n_rows), skip_indices) + out_lines[c_body_at] <- to_csv(observations) }) - out_lines[z_indices] <- "#Z" + out_lines[z_at] <- "#Z" out_lines } From 9a3b19ee2378ce134927c17dc77f1f9881ec8b27 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 4 Aug 2022 09:53:19 +0200 Subject: [PATCH 041/141] Remove old code that was commented out --- R/extractObservationData.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/extractObservationData.R b/R/extractObservationData.R index 71987e8..9bec94e 100644 --- a/R/extractObservationData.R +++ b/R/extractObservationData.R @@ -31,8 +31,6 @@ extractObservationData <- function(euLines, headerInfo, header.info) stopifnot(length(text) == sum(blockLengths) + 1L) - #result <- read.table(text = text, sep = ";", header = TRUE) - result <- readObservationsFromCsvText( text = text, sep = header.info$separator, From 2b84e2fe3a858cc5cb76fdd9e74062fc25acefb0 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 4 Aug 2022 10:20:02 +0200 Subject: [PATCH 042/141] Test writeEuCodedFile(), add/pass "dbg" arg --- R/toEuFormat.R | 64 +++++++++++-------- R/writeEuCodedFiles.R | 14 ++-- .../testthat/test-function-writeEuCodedFile.R | 64 ++++++------------- 3 files changed, 64 insertions(+), 78 deletions(-) diff --git a/R/toEuFormat.R b/R/toEuFormat.R index cb3327a..03cefb9 100644 --- a/R/toEuFormat.R +++ b/R/toEuFormat.R @@ -6,23 +6,25 @@ utils::globalVariables(c("buffer")) # see toEuFormat_v1 #' \code{\link{readEuCodedFile}} #' @param version version of implementation. One of \code{c(1, 2, 3)} #' @param \dots passed to \code{toEuFormat_v2} -toEuFormat <- function(inspection.data, version = 3L) +#' @param dbg whether or not to show debug messages +toEuFormat <- function(inspection.data, version = 3L, ..., dbg = TRUE) { if (version == 1L) { toEuFormat_v1( header.info = get_elements(inspection.data, "header.info"), inspections = get_elements(inspection.data, "inspections"), - observations = get_elements(inspection.data, "observations") + observations = get_elements(inspection.data, "observations"), + dbg = dbg ) } else if (version == 2L) { - toEuFormat_v2(inspection.data, mycsv = FALSE, ...) + toEuFormat_v2(inspection.data, mycsv = FALSE, ..., dbg = dbg) } else { - toEuFormat_v2(inspection.data, mycsv = TRUE, ...) + toEuFormat_v2(inspection.data, mycsv = TRUE, ..., dbg = dbg) } } @@ -38,8 +40,8 @@ toEuFormat <- function(inspection.data, version = 3L) #' by \code{\link{readEuCodedFile}} #' @param observations according to list element "observations" of list returned #' by \code{\link{readEuCodedFile}} -#' -toEuFormat_v1 <- function(header.info, inspections, observations) +#' @param dbg whether or not to show debug messages +toEuFormat_v1 <- function(header.info, inspections, observations, dbg = TRUE) { # Save the inspection numbers inspnos <- get_columns(observations, "inspno") @@ -139,8 +141,8 @@ observationHeaderLine <- function(header.fields, sep) #' @param mycsv logical. If TRUE "my" version of writing CSV is used (fast), #' otherwise CSV is written by means of write.table (slow) #' @param \dots further arguments passed to dataFrameContentToTextLines -#' -toEuFormat_v2 <- function(inspection.data, mycsv, ...) +#' @param dbg whether or not to show debug messages +toEuFormat_v2 <- function(inspection.data, mycsv, ..., dbg = TRUE) { #kwb.utils::assignPackageObjects("kwb.en13508.2") @@ -179,27 +181,33 @@ toEuFormat_v2 <- function(inspection.data, mycsv, ...) # Number of C-blocks (= number of inspections) n_inspections <- length(c_sizes) - cat("ok.\n") - - kwb.utils::catAndRun(" Writing B-blocks (inspection data) ... ", { - - b_at <- offset + 1L + - 4L * (seq_len(n_inspections) - 1L) + - c(0L, c_sizes[-n_inspections]) - - out_lines[b_at] <- inspectionHeaderLine(names(inspections), sep) - out_lines[b_at + 1L] <- to_csv(inspections) - }) + kwb.utils::catIf(dbg, "ok.\n") + + kwb.utils::catAndRun( + " Writing B-blocks (inspection data) ... ", + dbg = dbg, + expr = { + b_at <- offset + 1L + + 4L * (seq_len(n_inspections) - 1L) + + c(0L, c_sizes[-n_inspections]) + + out_lines[b_at] <- inspectionHeaderLine(names(inspections), sep) + out_lines[b_at + 1L] <- to_csv(inspections) + } + ) - kwb.utils::catAndRun(" Writing C-blocks (observation data) ... ", { - - out_lines[b_at + 2L] <- observationHeaderLine(names(observations), sep) - z_at <- b_at[-1L] - 1L - skip_indices <- c(b_at, b_at + 1L, b_at + 2L, z_at) - n_rows <- c_sizes[n_inspections] + 4 * n_inspections - 1L + offset - c_body_at <- setdiff(seq(offset + 1L, n_rows), skip_indices) - out_lines[c_body_at] <- to_csv(observations) - }) + kwb.utils::catAndRun( + " Writing C-blocks (observation data) ... ", + dbg = dbg, + expr = { + out_lines[b_at + 2L] <- observationHeaderLine(names(observations), sep) + z_at <- b_at[-1L] - 1L + skip_indices <- c(b_at, b_at + 1L, b_at + 2L, z_at) + n_rows <- c_sizes[n_inspections] + 4 * n_inspections - 1L + offset + c_body_at <- setdiff(seq(offset + 1L, n_rows), skip_indices) + out_lines[c_body_at] <- to_csv(observations) + } + ) out_lines[z_at] <- "#Z" diff --git a/R/writeEuCodedFiles.R b/R/writeEuCodedFiles.R index 81817b9..2d8728c 100644 --- a/R/writeEuCodedFiles.R +++ b/R/writeEuCodedFiles.R @@ -91,15 +91,19 @@ writeEuCodedFile <- function( { #kwb.utils::assignPackageObjects("kwb.en13508.2") - output.lines <- kwb.utils::catAndRun(dbg = dbg, "Formatting lines", { - toEuFormat(inspection.data, version) - }) + output.lines <- kwb.utils::catAndRun( + "Formatting lines", + dbg = dbg, + toEuFormat(inspection.data, version, dbg = dbg) + ) if (is.null(output.file)) { return(output.lines) } - kwb.utils::catAndRun(dbg = dbg, paste("Writing lines to", output.file), { + kwb.utils::catAndRun( + paste("Writing lines to", output.file), + dbg = dbg, writeLines(output.lines, output.file) - }) + ) } diff --git a/tests/testthat/test-function-writeEuCodedFile.R b/tests/testthat/test-function-writeEuCodedFile.R index 9cf6343..25232ae 100644 --- a/tests/testthat/test-function-writeEuCodedFile.R +++ b/tests/testthat/test-function-writeEuCodedFile.R @@ -1,49 +1,23 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - +#library(testthat) +#kwb.utils::assignPackageObjects("kwb.en13508.2") test_that("writeEuCodedFile() works", { + + f <- kwb.en13508.2:::writeEuCodedFile + + expect_error(f(dbg = FALSE)) + expect_error(f(list(), dbg = FALSE), "No such element") - expect_error( - kwb.en13508.2:::writeEuCodedFile(inspection.data = 1, ... = 1) - # inspection.data is not a list but: -# num 1 - ) - expect_error( - kwb.en13508.2:::writeEuCodedFile(inspection.data = 1:2, ... = 1) - # inspection.data is not a list but: -# int [1:2] 1 2 - ) - expect_error( - kwb.en13508.2:::writeEuCodedFile(inspection.data = "a", ... = 1) - # inspection.data is not a list but: -# chr "a" - ) - expect_error( - kwb.en13508.2:::writeEuCodedFile(inspection.data = c("a", "b"), ... = 1) - # inspection.data is not a list but: -# chr [1:2] "a" "b" - ) - expect_error( - kwb.en13508.2:::writeEuCodedFile(inspection.data = TRUE, ... = 1) - # inspection.data is not a list but: -# logi TRUE - ) - expect_error( - kwb.en13508.2:::writeEuCodedFile(inspection.data = FALSE, ... = 1) - # inspection.data is not a list but: -# logi FALSE - ) - expect_error( - kwb.en13508.2:::writeEuCodedFile(inspection.data = as.POSIXct("2018-06-03 23:50:00"), ... = 1) - # inspection.data is not a list but: -# POSIXct[1:1], format: "2018-06-03 23:50:00" - ) - expect_error( - kwb.en13508.2:::writeEuCodedFile(inspection.data = list(key = c("a", "b"), value = 1:2), ... = 1) - # No such list elements: 'header.info' -# Available elements: 'key', 'value' - ) + file <- system.file("extdata/example_13508_2.txt", package = "kwb.en13508.2") + # Read example file + inspection.data <- readEuCodedFile(file, dbg = FALSE) + + # Write to temporary file + output.file <- tempfile() + f(inspection.data, output.file, dbg = FALSE) + + # Read tempoary file + inspection.data.2 <- readEuCodedFile(output.file, dbg = FALSE) + + expect_identical(inspection.data, inspection.data.2) }) - From 32d86e74b15ce8493af7e6cff6a579fe15b457c3 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 4 Aug 2022 13:02:53 +0200 Subject: [PATCH 043/141] Add getExampleData(), getExampleFile() and extdataFile() --- R/getExampleData.R | 11 +++++++++++ R/plot.R | 5 ++--- man/plotObservations.Rd | 5 ++--- 3 files changed, 15 insertions(+), 6 deletions(-) create mode 100644 R/getExampleData.R diff --git a/R/getExampleData.R b/R/getExampleData.R new file mode 100644 index 0000000..ddddff6 --- /dev/null +++ b/R/getExampleData.R @@ -0,0 +1,11 @@ +getExampleData <- function(dbg = FALSE) +{ + readEuCodedFile(input.file = getExampleFile(), dbg = dbg) +} + +getExampleFile <- function() +{ + extdataFile("example_13508_2.txt") +} + +extdataFile <- kwb.utils::createFunctionExtdataFile("kwb.en13508.2") diff --git a/R/plot.R b/R/plot.R index 5ef7015..49b5a19 100644 --- a/R/plot.R +++ b/R/plot.R @@ -18,9 +18,8 @@ #' devtools::install_github("guiastrennec/ggplus") #' } #' -#' # Load example data -#' file <- system.file("extdata/example_13508_2.txt", package = "kwb.en13508.2") -#' survey <- kwb.en13508.2::readEuCodedFile(file) +#' # Get example data that are contained in the package +#' survey <- kwb.en13508.2:::getExampleData() #' #' # Create one plot per inspection in "survey" #' kwb.en13508.2::plotObservations(survey, to_pdf = FALSE) diff --git a/man/plotObservations.Rd b/man/plotObservations.Rd index e54038f..e6f5032 100644 --- a/man/plotObservations.Rd +++ b/man/plotObservations.Rd @@ -26,9 +26,8 @@ install.packages("ggplot2") devtools::install_github("guiastrennec/ggplus") } -# Load example data -file <- system.file("extdata/example_13508_2.txt", package = "kwb.en13508.2") -survey <- kwb.en13508.2::readEuCodedFile(file) +# Get example data that are contained in the package +survey <- kwb.en13508.2:::getExampleData() # Create one plot per inspection in "survey" kwb.en13508.2::plotObservations(survey, to_pdf = FALSE) From 66a48c122522e9ea2b3441cd0c7991eef05673ec Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 4 Aug 2022 13:03:41 +0200 Subject: [PATCH 044/141] Update Rd files --- man/euCodedFileHeader.Rd | 4 ++-- man/getHeaderLinesFromHeaderInfo.Rd | 2 +- man/toEuFormat.Rd | 21 +++++++++++++++++++++ man/toEuFormat_v1.Rd | 6 ++++-- man/toEuFormat_v2.Rd | 6 ++++-- man/writeEuCodedFile.Rd | 2 +- man/writeEuCodedFiles.Rd | 2 +- 7 files changed, 34 insertions(+), 9 deletions(-) create mode 100644 man/toEuFormat.Rd diff --git a/man/euCodedFileHeader.Rd b/man/euCodedFileHeader.Rd index 3c68b83..a490409 100644 --- a/man/euCodedFileHeader.Rd +++ b/man/euCodedFileHeader.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/export.R +% Please edit documentation in R/euCodedFileHeader.R \name{euCodedFileHeader} \alias{euCodedFileHeader} \title{Generate List With EU Header Information} @@ -10,7 +10,7 @@ euCodedFileHeader( quote = "\\"", encoding = "ISO-8859-1", language = "en", - year = 2010 + year = 2010L ) } \arguments{ diff --git a/man/getHeaderLinesFromHeaderInfo.Rd b/man/getHeaderLinesFromHeaderInfo.Rd index 986be19..9ea2a60 100644 --- a/man/getHeaderLinesFromHeaderInfo.Rd +++ b/man/getHeaderLinesFromHeaderInfo.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/export.R +% Please edit documentation in R/toEuFormat.R \name{getHeaderLinesFromHeaderInfo} \alias{getHeaderLinesFromHeaderInfo} \title{Get Header Lines From Header Info} diff --git a/man/toEuFormat.Rd b/man/toEuFormat.Rd new file mode 100644 index 0000000..2b5c321 --- /dev/null +++ b/man/toEuFormat.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/toEuFormat.R +\name{toEuFormat} +\alias{toEuFormat} +\title{Generate Lines in EU Export Format} +\usage{ +toEuFormat(inspection.data, version = 3L, ..., dbg = TRUE) +} +\arguments{ +\item{inspection.data}{inspection data as retrieved by e.g. +\code{\link{readEuCodedFile}}} + +\item{version}{version of implementation. One of \code{c(1, 2, 3)}} + +\item{\dots}{passed to \code{toEuFormat_v2}} + +\item{dbg}{whether or not to show debug messages} +} +\description{ +Generate Lines in EU Export Format +} diff --git a/man/toEuFormat_v1.Rd b/man/toEuFormat_v1.Rd index b668e4a..79439cf 100644 --- a/man/toEuFormat_v1.Rd +++ b/man/toEuFormat_v1.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/export.R +% Please edit documentation in R/toEuFormat.R \name{toEuFormat_v1} \alias{toEuFormat_v1} \title{Generate Lines in EU Export Format (v1)} \usage{ -toEuFormat_v1(header.info, inspections, observations) +toEuFormat_v1(header.info, inspections, observations, dbg = TRUE) } \arguments{ \item{header.info}{according to list element "header.info" of list returned @@ -15,6 +15,8 @@ by \code{\link{readEuCodedFile}}} \item{observations}{according to list element "observations" of list returned by \code{\link{readEuCodedFile}}} + +\item{dbg}{whether or not to show debug messages} } \description{ Generate lines in EU export format (version 1: slow) diff --git a/man/toEuFormat_v2.Rd b/man/toEuFormat_v2.Rd index 1db4e09..77cae18 100644 --- a/man/toEuFormat_v2.Rd +++ b/man/toEuFormat_v2.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/export.R +% Please edit documentation in R/toEuFormat.R \name{toEuFormat_v2} \alias{toEuFormat_v2} \title{Generate Lines in EU Export Format (v2)} \usage{ -toEuFormat_v2(inspection.data, mycsv, ...) +toEuFormat_v2(inspection.data, mycsv, ..., dbg = TRUE) } \arguments{ \item{inspection.data}{inspection data as retrieved by e.g. @@ -14,6 +14,8 @@ toEuFormat_v2(inspection.data, mycsv, ...) otherwise CSV is written by means of write.table (slow)} \item{\dots}{further arguments passed to dataFrameContentToTextLines} + +\item{dbg}{whether or not to show debug messages} } \description{ Generate lines in EU export format (version 2: faster than version 1) diff --git a/man/writeEuCodedFile.Rd b/man/writeEuCodedFile.Rd index 6ac193f..3ca3901 100644 --- a/man/writeEuCodedFile.Rd +++ b/man/writeEuCodedFile.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/export.R +% Please edit documentation in R/writeEuCodedFiles.R \name{writeEuCodedFile} \alias{writeEuCodedFile} \title{Write Inspection Data to File in EU Format} diff --git a/man/writeEuCodedFiles.Rd b/man/writeEuCodedFiles.Rd index 3199dd6..cdb4f0a 100644 --- a/man/writeEuCodedFiles.Rd +++ b/man/writeEuCodedFiles.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/export.R +% Please edit documentation in R/writeEuCodedFiles.R \name{writeEuCodedFiles} \alias{writeEuCodedFiles} \title{Write Inspection Data to Files in EU Format} From 05174364707fd0aa1f772c5ac64354dfc7fe5770 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 4 Aug 2022 13:04:24 +0200 Subject: [PATCH 045/141] Make all tests pass, silently. --- .../test-function-cleanDuplicatedColumns.R | 21 ++-- .../test-function-euCodedFileHeader.R | 7 +- .../test-function-findKeyAndExtractValue.R | 82 +++----------- tests/testthat/test-function-getCodes.R | 11 +- ...st-function-getHeaderInfoFromHeaderLines.R | 25 ++--- ...-function-getHeaderLinesFromEuCodedLines.R | 17 +-- .../test-function-getInspectionHeaderInfo.R | 20 ++-- .../test-function-getInspectionsFromEuLines.R | 78 ++------------ ...test-function-getObservationsFromEuLines.R | 25 +++-- .../test-function-inspectionDataFieldCodes.R | 12 +-- .../test-function-inspectionHeaderLine.R | 28 +---- .../test-function-observationHeaderLine.R | 33 +----- .../test-function-readAndMergeEuCodedFiles.R | 21 ++-- .../testthat/test-function-readEuCodedFile.R | 28 ++--- .../testthat/test-function-readEuCodedFiles.R | 17 ++- .../testthat/test-function-readPackageFile.R | 101 ++---------------- .../test-function-removeDuplicatedColumns.R | 17 +-- .../testthat/test-function-removeEmptyLines.R | 21 ++-- .../testthat/test-function-writeEuCodedFile.R | 14 ++- 19 files changed, 130 insertions(+), 448 deletions(-) diff --git a/tests/testthat/test-function-cleanDuplicatedColumns.R b/tests/testthat/test-function-cleanDuplicatedColumns.R index 074b16d..c3bcf47 100644 --- a/tests/testthat/test-function-cleanDuplicatedColumns.R +++ b/tests/testthat/test-function-cleanDuplicatedColumns.R @@ -1,17 +1,12 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("cleanDuplicatedColumns() works", { - kwb.en13508.2:::cleanDuplicatedColumns(x = 1) - kwb.en13508.2:::cleanDuplicatedColumns(x = 1:2) - kwb.en13508.2:::cleanDuplicatedColumns(x = "a") - kwb.en13508.2:::cleanDuplicatedColumns(x = c("a", "b")) - kwb.en13508.2:::cleanDuplicatedColumns(x = TRUE) - kwb.en13508.2:::cleanDuplicatedColumns(x = FALSE) - kwb.en13508.2:::cleanDuplicatedColumns(x = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::cleanDuplicatedColumns(x = list(key = c("a", "b"), value = 1:2)) + f <- kwb.en13508.2:::cleanDuplicatedColumns + + expect_error(f()) + x <- data.frame(a.x = 1:2, a.y = 1:2, id = 1:2) + + expect_output(result <- f(x)) + + expect_identical(result, stats::setNames(x[, -2L, ], c("a", "id"))) }) - diff --git a/tests/testthat/test-function-euCodedFileHeader.R b/tests/testthat/test-function-euCodedFileHeader.R index 04e3a46..69bfa56 100644 --- a/tests/testthat/test-function-euCodedFileHeader.R +++ b/tests/testthat/test-function-euCodedFileHeader.R @@ -1,10 +1,5 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("euCodedFileHeader() works", { - kwb.en13508.2:::euCodedFileHeader() + expect_type(kwb.en13508.2:::euCodedFileHeader(), "list") }) - diff --git a/tests/testthat/test-function-findKeyAndExtractValue.R b/tests/testthat/test-function-findKeyAndExtractValue.R index 0ee6454..25d33e3 100644 --- a/tests/testthat/test-function-findKeyAndExtractValue.R +++ b/tests/testthat/test-function-findKeyAndExtractValue.R @@ -1,73 +1,15 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("findKeyAndExtractValue() works", { - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1, key = 1) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1, key = 1:2) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1, key = "a") - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1, key = c("a", "b")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1, key = TRUE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1, key = FALSE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1, key = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1, key = list(key = c("a", "b"), value = 1:2)) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1:2, key = 1) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1:2, key = 1:2) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1:2, key = "a") - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1:2, key = c("a", "b")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1:2, key = TRUE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1:2, key = FALSE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1:2, key = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = 1:2, key = list(key = c("a", "b"), value = 1:2)) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = "a", key = 1) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = "a", key = 1:2) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = "a", key = "a") - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = "a", key = c("a", "b")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = "a", key = TRUE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = "a", key = FALSE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = "a", key = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = "a", key = list(key = c("a", "b"), value = 1:2)) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = c("a", "b"), key = 1) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = c("a", "b"), key = 1:2) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = c("a", "b"), key = "a") - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = c("a", "b"), key = c("a", "b")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = c("a", "b"), key = TRUE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = c("a", "b"), key = FALSE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = c("a", "b"), key = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = c("a", "b"), key = list(key = c("a", "b"), value = 1:2)) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = TRUE, key = 1) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = TRUE, key = 1:2) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = TRUE, key = "a") - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = TRUE, key = c("a", "b")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = TRUE, key = TRUE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = TRUE, key = FALSE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = TRUE, key = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = TRUE, key = list(key = c("a", "b"), value = 1:2)) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = FALSE, key = 1) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = FALSE, key = 1:2) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = FALSE, key = "a") - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = FALSE, key = c("a", "b")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = FALSE, key = TRUE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = FALSE, key = FALSE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = FALSE, key = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = FALSE, key = list(key = c("a", "b"), value = 1:2)) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = as.POSIXct("2018-06-03 23:50:00"), key = 1) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = as.POSIXct("2018-06-03 23:50:00"), key = 1:2) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = as.POSIXct("2018-06-03 23:50:00"), key = "a") - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = as.POSIXct("2018-06-03 23:50:00"), key = c("a", "b")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = as.POSIXct("2018-06-03 23:50:00"), key = TRUE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = as.POSIXct("2018-06-03 23:50:00"), key = FALSE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = as.POSIXct("2018-06-03 23:50:00"), key = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = as.POSIXct("2018-06-03 23:50:00"), key = list(key = c("a", "b"), value = 1:2)) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = list(key = c("a", "b"), value = 1:2), key = 1) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = list(key = c("a", "b"), value = 1:2), key = 1:2) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = list(key = c("a", "b"), value = 1:2), key = "a") - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = list(key = c("a", "b"), value = 1:2), key = c("a", "b")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = list(key = c("a", "b"), value = 1:2), key = TRUE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = list(key = c("a", "b"), value = 1:2), key = FALSE) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = list(key = c("a", "b"), value = 1:2), key = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::findKeyAndExtractValue(keyvalues = list(key = c("a", "b"), value = 1:2), key = list(key = c("a", "b"), value = 1:2)) - + f <- kwb.en13508.2:::findKeyAndExtractValue + + expect_error(f()) + + expect_warning(suppressMessages( + result <- f(c("A1", "B", "C2"), "A") + )) + + expect_identical(result, NA) + + expect_identical(f(c("#ABC=1", "B", "C2"), "A"), "1") + expect_identical(f(c("#ABC=1", "#B=hallo", "C2"), "B"), "hallo") }) - diff --git a/tests/testthat/test-function-getCodes.R b/tests/testthat/test-function-getCodes.R index 6b42b76..518b288 100644 --- a/tests/testthat/test-function-getCodes.R +++ b/tests/testthat/test-function-getCodes.R @@ -1,10 +1,11 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# +#library(testthat) test_that("getCodes() works", { - kwb.en13508.2:::getCodes() + f <- kwb.en13508.2:::getCodes + + result <- f() + expect_true(is.data.frame(result)) + expect_true(all(c("Table", "Code", "Name") %in% names(result))) }) - diff --git a/tests/testthat/test-function-getHeaderInfoFromHeaderLines.R b/tests/testthat/test-function-getHeaderInfoFromHeaderLines.R index 7ae4cd1..1ac03e0 100644 --- a/tests/testthat/test-function-getHeaderInfoFromHeaderLines.R +++ b/tests/testthat/test-function-getHeaderInfoFromHeaderLines.R @@ -1,17 +1,14 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("getHeaderInfoFromHeaderLines() works", { - kwb.en13508.2:::getHeaderInfoFromHeaderLines(header.lines = 1) - kwb.en13508.2:::getHeaderInfoFromHeaderLines(header.lines = 1:2) - kwb.en13508.2:::getHeaderInfoFromHeaderLines(header.lines = "a") - kwb.en13508.2:::getHeaderInfoFromHeaderLines(header.lines = c("a", "b")) - kwb.en13508.2:::getHeaderInfoFromHeaderLines(header.lines = TRUE) - kwb.en13508.2:::getHeaderInfoFromHeaderLines(header.lines = FALSE) - kwb.en13508.2:::getHeaderInfoFromHeaderLines(header.lines = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::getHeaderInfoFromHeaderLines(header.lines = list(key = c("a", "b"), value = 1:2)) - + f <- kwb.en13508.2:::getHeaderInfoFromHeaderLines + + expect_error(f()) + + expect_warning(suppressMessages(f(c("a", "b")))) + + header.lines <- c("#A1=a", "#A2=b", "#A3=c", "#A4=d", "#A5=e", "#A6=f") + + result <- f(header.lines) + + expect_identical(unname(unlist(result)), letters[1:6]) }) - diff --git a/tests/testthat/test-function-getHeaderLinesFromEuCodedLines.R b/tests/testthat/test-function-getHeaderLinesFromEuCodedLines.R index 0aa6621..2ca681d 100644 --- a/tests/testthat/test-function-getHeaderLinesFromEuCodedLines.R +++ b/tests/testthat/test-function-getHeaderLinesFromEuCodedLines.R @@ -1,17 +1,8 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("getHeaderLinesFromEuCodedLines() works", { - kwb.en13508.2:::getHeaderLinesFromEuCodedLines(lines = 1) - kwb.en13508.2:::getHeaderLinesFromEuCodedLines(lines = 1:2) - kwb.en13508.2:::getHeaderLinesFromEuCodedLines(lines = "a") - kwb.en13508.2:::getHeaderLinesFromEuCodedLines(lines = c("a", "b")) - kwb.en13508.2:::getHeaderLinesFromEuCodedLines(lines = TRUE) - kwb.en13508.2:::getHeaderLinesFromEuCodedLines(lines = FALSE) - kwb.en13508.2:::getHeaderLinesFromEuCodedLines(lines = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::getHeaderLinesFromEuCodedLines(lines = list(key = c("a", "b"), value = 1:2)) + f <- kwb.en13508.2:::getHeaderLinesFromEuCodedLines + + expect_error(f()) + expect_identical(f(c("#A123", "#B", "#C")), "#A123") }) - diff --git a/tests/testthat/test-function-getInspectionHeaderInfo.R b/tests/testthat/test-function-getInspectionHeaderInfo.R index a72a4fa..513b241 100644 --- a/tests/testthat/test-function-getInspectionHeaderInfo.R +++ b/tests/testthat/test-function-getInspectionHeaderInfo.R @@ -1,15 +1,13 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("getInspectionHeaderInfo() works", { - kwb.en13508.2:::getInspectionHeaderInfo(eu_lines = "a") - kwb.en13508.2:::getInspectionHeaderInfo(eu_lines = c("a", "b")) - # expect_error( - # kwb.en13508.2:::getInspectionHeaderInfo(eu_lines = 1) - # # invalid 'text' argument - # ) + f <- kwb.en13508.2:::getInspectionHeaderInfo + + expect_error(f()) + + lines <- c("#X", "#B01=a", "#B02=b", "#Y", "#Z") + expect_equal(f(lines), list( + a = list(line = 1L, rows = 2L), + b = list(line = 2L, rows = 3L) + )) }) - diff --git a/tests/testthat/test-function-getInspectionsFromEuLines.R b/tests/testthat/test-function-getInspectionsFromEuLines.R index 8478262..1cfc3d6 100644 --- a/tests/testthat/test-function-getInspectionsFromEuLines.R +++ b/tests/testthat/test-function-getInspectionsFromEuLines.R @@ -1,73 +1,13 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("getInspectionsFromEuLines() works", { - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1, header.info = 1) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1, header.info = 1:2) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1, header.info = "a") - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1, header.info = c("a", "b")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1, header.info = TRUE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1, header.info = FALSE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1, header.info = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1, header.info = list(key = c("a", "b"), value = 1:2)) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1:2, header.info = 1) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1:2, header.info = 1:2) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1:2, header.info = "a") - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1:2, header.info = c("a", "b")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1:2, header.info = TRUE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1:2, header.info = FALSE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1:2, header.info = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = 1:2, header.info = list(key = c("a", "b"), value = 1:2)) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = "a", header.info = 1) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = "a", header.info = 1:2) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = "a", header.info = "a") - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = "a", header.info = c("a", "b")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = "a", header.info = TRUE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = "a", header.info = FALSE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = "a", header.info = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = "a", header.info = list(key = c("a", "b"), value = 1:2)) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = c("a", "b"), header.info = 1) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = c("a", "b"), header.info = 1:2) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = c("a", "b"), header.info = "a") - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = c("a", "b"), header.info = c("a", "b")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = c("a", "b"), header.info = TRUE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = c("a", "b"), header.info = FALSE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = c("a", "b"), header.info = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = c("a", "b"), header.info = list(key = c("a", "b"), value = 1:2)) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = TRUE, header.info = 1) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = TRUE, header.info = 1:2) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = TRUE, header.info = "a") - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = TRUE, header.info = c("a", "b")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = TRUE, header.info = TRUE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = TRUE, header.info = FALSE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = TRUE, header.info = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = TRUE, header.info = list(key = c("a", "b"), value = 1:2)) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = FALSE, header.info = 1) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = FALSE, header.info = 1:2) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = FALSE, header.info = "a") - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = FALSE, header.info = c("a", "b")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = FALSE, header.info = TRUE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = FALSE, header.info = FALSE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = FALSE, header.info = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = FALSE, header.info = list(key = c("a", "b"), value = 1:2)) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = as.POSIXct("2018-06-03 23:50:00"), header.info = 1) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = as.POSIXct("2018-06-03 23:50:00"), header.info = 1:2) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = as.POSIXct("2018-06-03 23:50:00"), header.info = "a") - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = as.POSIXct("2018-06-03 23:50:00"), header.info = c("a", "b")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = as.POSIXct("2018-06-03 23:50:00"), header.info = TRUE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = as.POSIXct("2018-06-03 23:50:00"), header.info = FALSE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = as.POSIXct("2018-06-03 23:50:00"), header.info = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = as.POSIXct("2018-06-03 23:50:00"), header.info = list(key = c("a", "b"), value = 1:2)) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = list(key = c("a", "b"), value = 1:2), header.info = 1) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = list(key = c("a", "b"), value = 1:2), header.info = 1:2) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = list(key = c("a", "b"), value = 1:2), header.info = "a") - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = list(key = c("a", "b"), value = 1:2), header.info = c("a", "b")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = list(key = c("a", "b"), value = 1:2), header.info = TRUE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = list(key = c("a", "b"), value = 1:2), header.info = FALSE) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = list(key = c("a", "b"), value = 1:2), header.info = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::getInspectionsFromEuLines(eu_lines = list(key = c("a", "b"), value = 1:2), header.info = list(key = c("a", "b"), value = 1:2)) + f <- kwb.en13508.2:::getInspectionsFromEuLines + + expect_error(f()) + eu_lines <- c( + "#A1=x", + "#A2=y" + ) + + expect_null(f(eu_lines)) }) - diff --git a/tests/testthat/test-function-getObservationsFromEuLines.R b/tests/testthat/test-function-getObservationsFromEuLines.R index 519d146..c1bfa9b 100644 --- a/tests/testthat/test-function-getObservationsFromEuLines.R +++ b/tests/testthat/test-function-getObservationsFromEuLines.R @@ -1,17 +1,16 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - +#kwb.utils::assignPackageObjects("kwb.en13508.2") test_that("getObservationsFromEuLines() works", { - expect_error( - kwb.en13508.2:::getObservationsFromEuLines(eu_lines = 1, header.info = 1) - # non-character argument - ) - expect_error( - kwb.en13508.2:::getObservationsFromEuLines(eu_lines = "a", header.info = 1) - # kwb.utils::allAreEqual(c_headers) is not TRUE + f <- kwb.en13508.2:::getObservationsFromEuLines + + expect_error(f()) + + eu_lines <- c( + "#C=A;B", + "1;2" ) - + + header.info <- euCodedFileHeader() + + expect_error(capture.output(f(eu_lines, header.info))) }) - diff --git a/tests/testthat/test-function-inspectionDataFieldCodes.R b/tests/testthat/test-function-inspectionDataFieldCodes.R index 22119cb..dd0e7c4 100644 --- a/tests/testthat/test-function-inspectionDataFieldCodes.R +++ b/tests/testthat/test-function-inspectionDataFieldCodes.R @@ -1,10 +1,10 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("inspectionDataFieldCodes() works", { - kwb.en13508.2:::inspectionDataFieldCodes() + f <- kwb.en13508.2:::inspectionDataFieldCodes + result <- f() + + expect_type(result, "list") + expect_true(kwb.utils::allAreIdentical(lapply(result, names))) + expect_identical(names(result[[1L]]), c("class", "meaning")) }) - diff --git a/tests/testthat/test-function-inspectionHeaderLine.R b/tests/testthat/test-function-inspectionHeaderLine.R index 122306e..7f925b2 100644 --- a/tests/testthat/test-function-inspectionHeaderLine.R +++ b/tests/testthat/test-function-inspectionHeaderLine.R @@ -1,29 +1,7 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("inspectionHeaderLine() works", { - kwb.en13508.2:::inspectionHeaderLine(header.fields = 1, separator = "a") - kwb.en13508.2:::inspectionHeaderLine(header.fields = 1, separator = c("a", "b")) - kwb.en13508.2:::inspectionHeaderLine(header.fields = 1:2, separator = "a") - kwb.en13508.2:::inspectionHeaderLine(header.fields = 1:2, separator = c("a", "b")) - kwb.en13508.2:::inspectionHeaderLine(header.fields = "a", separator = "a") - kwb.en13508.2:::inspectionHeaderLine(header.fields = "a", separator = c("a", "b")) - kwb.en13508.2:::inspectionHeaderLine(header.fields = c("a", "b"), separator = "a") - kwb.en13508.2:::inspectionHeaderLine(header.fields = c("a", "b"), separator = c("a", "b")) - kwb.en13508.2:::inspectionHeaderLine(header.fields = TRUE, separator = "a") - kwb.en13508.2:::inspectionHeaderLine(header.fields = TRUE, separator = c("a", "b")) - kwb.en13508.2:::inspectionHeaderLine(header.fields = FALSE, separator = "a") - kwb.en13508.2:::inspectionHeaderLine(header.fields = FALSE, separator = c("a", "b")) - kwb.en13508.2:::inspectionHeaderLine(header.fields = as.POSIXct("2018-06-03 23:50:00"), separator = "a") - kwb.en13508.2:::inspectionHeaderLine(header.fields = as.POSIXct("2018-06-03 23:50:00"), separator = c("a", "b")) - kwb.en13508.2:::inspectionHeaderLine(header.fields = list(key = c("a", "b"), value = 1:2), separator = "a") - kwb.en13508.2:::inspectionHeaderLine(header.fields = list(key = c("a", "b"), value = 1:2), separator = c("a", "b")) - expect_error( - kwb.en13508.2:::inspectionHeaderLine(header.fields = 1, separator = 1) - # invalid 'collapse' argument - ) + f <- kwb.en13508.2:::inspectionHeaderLine + + expect_error(f()) }) - diff --git a/tests/testthat/test-function-observationHeaderLine.R b/tests/testthat/test-function-observationHeaderLine.R index 7ebcb06..b1fda23 100644 --- a/tests/testthat/test-function-observationHeaderLine.R +++ b/tests/testthat/test-function-observationHeaderLine.R @@ -1,31 +1,8 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("observationHeaderLine() works", { - kwb.en13508.2:::observationHeaderLine(header.fields = 1, separator = "a") - kwb.en13508.2:::observationHeaderLine(header.fields = 1, separator = c("a", "b")) - kwb.en13508.2:::observationHeaderLine(header.fields = 1:2, separator = "a") - kwb.en13508.2:::observationHeaderLine(header.fields = 1:2, separator = c("a", "b")) - kwb.en13508.2:::observationHeaderLine(header.fields = "a", separator = "a") - kwb.en13508.2:::observationHeaderLine(header.fields = "a", separator = c("a", "b")) - kwb.en13508.2:::observationHeaderLine(header.fields = c("a", "b"), separator = "a") - kwb.en13508.2:::observationHeaderLine(header.fields = c("a", "b"), separator = c("a", "b")) - kwb.en13508.2:::observationHeaderLine(header.fields = TRUE, separator = "a") - kwb.en13508.2:::observationHeaderLine(header.fields = TRUE, separator = c("a", "b")) - kwb.en13508.2:::observationHeaderLine(header.fields = FALSE, separator = "a") - kwb.en13508.2:::observationHeaderLine(header.fields = FALSE, separator = c("a", "b")) - kwb.en13508.2:::observationHeaderLine(header.fields = list(key = c("a", "b"), value = 1:2), separator = "a") - kwb.en13508.2:::observationHeaderLine(header.fields = list(key = c("a", "b"), value = 1:2), separator = c("a", "b")) - expect_error( - kwb.en13508.2:::observationHeaderLine(header.fields = 1, separator = 1) - # invalid 'collapse' argument - ) - expect_error( - kwb.en13508.2:::observationHeaderLine(header.fields = as.POSIXct("2018-06-03 23:50:00"), separator = 1) - # character string is not in a standard unambiguous format - ) - + f <- kwb.en13508.2:::observationHeaderLine + + expect_error(f()) + + expect_identical(f(c("a", "b", "c"), sep = ","), c("#C=a,b,c")) }) - diff --git a/tests/testthat/test-function-readAndMergeEuCodedFiles.R b/tests/testthat/test-function-readAndMergeEuCodedFiles.R index d407e5e..52a05f0 100644 --- a/tests/testthat/test-function-readAndMergeEuCodedFiles.R +++ b/tests/testthat/test-function-readAndMergeEuCodedFiles.R @@ -1,17 +1,10 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("readAndMergeEuCodedFiles() works", { - expect_error( - kwb.en13508.2:::readAndMergeEuCodedFiles(input.files = 1) - # a character vector argument expected - ) - expect_error( - kwb.en13508.2:::readAndMergeEuCodedFiles(input.files = "a") - # subscript out of bounds - ) - + f <- kwb.en13508.2:::readAndMergeEuCodedFiles + + expect_error(f()) + + file <- getExampleFile() + + result <- f(c(file, file)) }) - diff --git a/tests/testthat/test-function-readEuCodedFile.R b/tests/testthat/test-function-readEuCodedFile.R index ad90b79..e593e29 100644 --- a/tests/testthat/test-function-readEuCodedFile.R +++ b/tests/testthat/test-function-readEuCodedFile.R @@ -1,25 +1,13 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# +#kwb.utils::assignPackageObjects("kwb.en13508.2") test_that("readEuCodedFile() works", { - expect_error( - kwb.en13508.2:::readEuCodedFile(input.file = 1) - # 'con' is not a connection - ) - expect_error( - kwb.en13508.2:::readEuCodedFile(input.file = "a") - # cannot open the connection - ) - expect_error( - kwb.en13508.2:::readEuCodedFile(input.file = c("a", "b")) - # invalid 'description' argument - ) - expect_error( - kwb.en13508.2:::readEuCodedFile(input.file = list(key = c("a", "b"), value = 1:2)) - # argument 3 (type 'list') cannot be handled by 'cat' - ) + f <- kwb.en13508.2:::readEuCodedFile + + expect_error(f()) + result <- f(getExampleFile(), dbg = FALSE) + + expect_type(result, "list") + expect_identical(names(result), c("header.info", "inspections", "observations")) }) - diff --git a/tests/testthat/test-function-readEuCodedFiles.R b/tests/testthat/test-function-readEuCodedFiles.R index 4647540..b624355 100644 --- a/tests/testthat/test-function-readEuCodedFiles.R +++ b/tests/testthat/test-function-readEuCodedFiles.R @@ -1,13 +1,8 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("readEuCodedFiles() works", { - - expect_error( - kwb.en13508.2:::readEuCodedFiles(input.files = 1) - # a character vector argument expected - ) - + + f <- kwb.en13508.2:::readEuCodedFiles + + expect_error(f(dbg = FALSE)) + + }) - diff --git a/tests/testthat/test-function-readPackageFile.R b/tests/testthat/test-function-readPackageFile.R index f297fa1..83dc377 100644 --- a/tests/testthat/test-function-readPackageFile.R +++ b/tests/testthat/test-function-readPackageFile.R @@ -1,97 +1,10 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("readPackageFile() works", { - expect_error( - kwb.en13508.2:::readPackageFile(file = 1, ... = 1) - # No such file: '1' in -# '/home/hauke/R/i686-pc-linux-gnu-library/3.3/kwb.en13508.2/extdata'. -# Available files: -# 'eucodes.csv' -# 'eucodes_de.csv' - ) - expect_error( - kwb.en13508.2:::readPackageFile(file = 1:2, ... = 1) - # No such file: '1' in -# '/home/hauke/R/i686-pc-linux-gnu-library/3.3/kwb.en13508.2/extdata'. -# Available files: -# 'eucodes.csv' -# 'eucodes.csv' -# 'eucodes_de.csv' -# 'eucodes_de.csv'No such file: '2' in -# '/home/hauke/R/i686-pc-linux-gnu-library/3.3/kwb.en13508.2/extdata'. -# Available files: -# 'eucodes.csv' -# 'eucodes.csv' -# 'eucodes_de.csv' -# 'eucodes_de.csv' - ) - expect_error( - kwb.en13508.2:::readPackageFile(file = "a", ... = 1) - # No such file: 'a' in -# '/home/hauke/R/i686-pc-linux-gnu-library/3.3/kwb.en13508.2/extdata'. -# Available files: -# 'eucodes.csv' -# 'eucodes_de.csv' - ) - expect_error( - kwb.en13508.2:::readPackageFile(file = c("a", "b"), ... = 1) - # No such file: 'a' in -# '/home/hauke/R/i686-pc-linux-gnu-library/3.3/kwb.en13508.2/extdata'. -# Available files: -# 'eucodes.csv' -# 'eucodes.csv' -# 'eucodes_de.csv' -# 'eucodes_de.csv'No such file: 'b' in -# '/home/hauke/R/i686-pc-linux-gnu-library/3.3/kwb.en13508.2/extdata'. -# Available files: -# 'eucodes.csv' -# 'eucodes.csv' -# 'eucodes_de.csv' -# 'eucodes_de.csv' - ) - expect_error( - kwb.en13508.2:::readPackageFile(file = TRUE, ... = 1) - # No such file: 'TRUE' in -# '/home/hauke/R/i686-pc-linux-gnu-library/3.3/kwb.en13508.2/extdata'. -# Available files: -# 'eucodes.csv' -# 'eucodes_de.csv' - ) - expect_error( - kwb.en13508.2:::readPackageFile(file = FALSE, ... = 1) - # No such file: 'FALSE' in -# '/home/hauke/R/i686-pc-linux-gnu-library/3.3/kwb.en13508.2/extdata'. -# Available files: -# 'eucodes.csv' -# 'eucodes_de.csv' - ) - expect_error( - kwb.en13508.2:::readPackageFile(file = as.POSIXct("2018-06-03 23:50:00"), ... = 1) - # No such file: '2018-06-03 23:50:00' in -# '/home/hauke/R/i686-pc-linux-gnu-library/3.3/kwb.en13508.2/extdata'. -# Available files: -# 'eucodes.csv' -# 'eucodes_de.csv' - ) - expect_error( - kwb.en13508.2:::readPackageFile(file = list(key = c("a", "b"), value = 1:2), ... = 1) - # No such file: 'c("a", "b")' in -# '/home/hauke/R/i686-pc-linux-gnu-library/3.3/kwb.en13508.2/extdata'. -# Available files: -# 'eucodes.csv' -# 'eucodes.csv' -# 'eucodes_de.csv' -# 'eucodes_de.csv'No such file: '1:2' in -# '/home/hauke/R/i686-pc-linux-gnu-library/3.3/kwb.en13508.2/extdata'. -# Available files: -# 'eucodes.csv' -# 'eucodes.csv' -# 'eucodes_de.csv' -# 'eucodes_de.csv' - ) - + f <- kwb.en13508.2:::readPackageFile + + expect_error(f()) + expect_error(f("no-such-file.txt")) + + result <- f("eucodes.csv") + expect_true(is.data.frame(result)) }) - diff --git a/tests/testthat/test-function-removeDuplicatedColumns.R b/tests/testthat/test-function-removeDuplicatedColumns.R index a3681e4..26f27b6 100644 --- a/tests/testthat/test-function-removeDuplicatedColumns.R +++ b/tests/testthat/test-function-removeDuplicatedColumns.R @@ -1,17 +1,6 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("removeDuplicatedColumns() works", { - kwb.en13508.2:::removeDuplicatedColumns(x = 1) - kwb.en13508.2:::removeDuplicatedColumns(x = 1:2) - kwb.en13508.2:::removeDuplicatedColumns(x = "a") - kwb.en13508.2:::removeDuplicatedColumns(x = c("a", "b")) - kwb.en13508.2:::removeDuplicatedColumns(x = TRUE) - kwb.en13508.2:::removeDuplicatedColumns(x = FALSE) - kwb.en13508.2:::removeDuplicatedColumns(x = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::removeDuplicatedColumns(x = list(key = c("a", "b"), value = 1:2)) - + f <- kwb.en13508.2:::removeDuplicatedColumns + + expect_error(f()) }) - diff --git a/tests/testthat/test-function-removeEmptyLines.R b/tests/testthat/test-function-removeEmptyLines.R index d69dce4..fb92dce 100644 --- a/tests/testthat/test-function-removeEmptyLines.R +++ b/tests/testthat/test-function-removeEmptyLines.R @@ -1,17 +1,10 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("removeEmptyLines() works", { - kwb.en13508.2:::removeEmptyLines(x = 1) - kwb.en13508.2:::removeEmptyLines(x = 1:2) - kwb.en13508.2:::removeEmptyLines(x = "a") - kwb.en13508.2:::removeEmptyLines(x = c("a", "b")) - kwb.en13508.2:::removeEmptyLines(x = TRUE) - kwb.en13508.2:::removeEmptyLines(x = FALSE) - kwb.en13508.2:::removeEmptyLines(x = as.POSIXct("2018-06-03 23:50:00")) - kwb.en13508.2:::removeEmptyLines(x = list(key = c("a", "b"), value = 1:2)) - + f <- kwb.en13508.2:::removeEmptyLines + + expect_error(f()) + + expect_output(result <- f(c("a", "", "b"))) + + expect_identical(result, c("a", "b")) }) - diff --git a/tests/testthat/test-function-writeEuCodedFile.R b/tests/testthat/test-function-writeEuCodedFile.R index 25232ae..cee9dfb 100644 --- a/tests/testthat/test-function-writeEuCodedFile.R +++ b/tests/testthat/test-function-writeEuCodedFile.R @@ -7,17 +7,15 @@ test_that("writeEuCodedFile() works", { expect_error(f(dbg = FALSE)) expect_error(f(list(), dbg = FALSE), "No such element") - file <- system.file("extdata/example_13508_2.txt", package = "kwb.en13508.2") - - # Read example file - inspection.data <- readEuCodedFile(file, dbg = FALSE) + data.1 <- getExampleData() - # Write to temporary file + # Write example data to temporary file output.file <- tempfile() - f(inspection.data, output.file, dbg = FALSE) + + f(data.1, output.file, dbg = FALSE) # Read tempoary file - inspection.data.2 <- readEuCodedFile(output.file, dbg = FALSE) + data.2 <- readEuCodedFile(output.file, dbg = FALSE) - expect_identical(inspection.data, inspection.data.2) + expect_identical(data.1, data.2) }) From d68b1c1add9477dae96ae0912447a92e658de5f8 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 4 Aug 2022 14:59:03 +0200 Subject: [PATCH 046/141] Rename columns via new arg "short.names" --- R/readEuCodedFiles.R | 21 +++++++++++++++++-- man/readEuCodedFile.Rd | 6 ++++++ .../testthat/test-function-readEuCodedFile.R | 13 +++++++++--- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/R/readEuCodedFiles.R b/R/readEuCodedFiles.R index 248c16d..fe30c60 100644 --- a/R/readEuCodedFiles.R +++ b/R/readEuCodedFiles.R @@ -86,6 +86,10 @@ readEuCodedFiles <- function( #' @param encoding default: "latin1" #' @param read.inspections if \code{TRUE}, general inspection data (in #' #B-blocks) are read, otherwise skipped (use if function fails) +#' @param short.names if \code{TRUE} (default), the short names (codes) as defined +#' in EN13508.2 are used as column names, otherwise more meaningful names are used. +#' See columns\code{Code} and \code{Name}, respectively, in the data frame returned by +#' \code{getCodes()}. #' @param simple.algorithm if \code{TRUE} (default), a simple (and faster) #' algorithm is used to extract the general information about the inspections #' from the #B-headers. It requires that all #B-headers have the same number @@ -101,7 +105,7 @@ readEuCodedFiles <- function( #' @export #' readEuCodedFile <- function( - input.file, encoding = "latin1", read.inspections = TRUE, + input.file, encoding = "latin1", read.inspections = TRUE, short.names = TRUE, simple.algorithm = TRUE, warn = TRUE, dbg = TRUE ) { @@ -180,7 +184,12 @@ readEuCodedFile <- function( ) kwb.utils::.logok(dbg) - + + if (!short.names) { + inspections <- renameColumnsToMeaningful(inspections) + observations <- renameColumnsToMeaningful(observations) + } + list( header.info = header.info, inspections = inspections, @@ -188,6 +197,14 @@ readEuCodedFile <- function( ) } +# renameColumnsToMeaningful ---------------------------------------------------- +renameColumnsToMeaningful <- function(x) +{ + codeInfo <- kwb.utils::selectColumns(getCodes(), c("Code", "Name")) + renamings <- kwb.utils::toLookupList(data = codeInfo) + kwb.utils::renameColumns(x, renamings) +} + # getHeaderLinesFromEuCodedLines ----------------------------------------------- getHeaderLinesFromEuCodedLines <- function(lines) { diff --git a/man/readEuCodedFile.Rd b/man/readEuCodedFile.Rd index 52a7bca..e1bfbbd 100644 --- a/man/readEuCodedFile.Rd +++ b/man/readEuCodedFile.Rd @@ -8,6 +8,7 @@ readEuCodedFile( input.file, encoding = "latin1", read.inspections = TRUE, + short.names = TRUE, simple.algorithm = TRUE, warn = TRUE, dbg = TRUE @@ -22,6 +23,11 @@ in the format described in DIN EN 13508-2} \item{read.inspections}{if \code{TRUE}, general inspection data (in #B-blocks) are read, otherwise skipped (use if function fails)} +\item{short.names}{if \code{TRUE} (default), the short names (codes) as defined +in EN13508.2 are used as column names, otherwise more meaningful names are used. +See columns\code{Code} and \code{Name}, respectively, in the data frame returned by +\code{getCodes()}.} + \item{simple.algorithm}{if \code{TRUE} (default), a simple (and faster) algorithm is used to extract the general information about the inspections from the #B-headers. It requires that all #B-headers have the same number diff --git a/tests/testthat/test-function-readEuCodedFile.R b/tests/testthat/test-function-readEuCodedFile.R index e593e29..b6ca8c5 100644 --- a/tests/testthat/test-function-readEuCodedFile.R +++ b/tests/testthat/test-function-readEuCodedFile.R @@ -6,8 +6,15 @@ test_that("readEuCodedFile() works", { expect_error(f()) - result <- f(getExampleFile(), dbg = FALSE) + result.1 <- f(getExampleFile(), dbg = FALSE) + result.2 <- f(getExampleFile(), dbg = FALSE, short.names = FALSE) - expect_type(result, "list") - expect_identical(names(result), c("header.info", "inspections", "observations")) + expect_type(result.1, "list") + expect_identical(names(result.1), c("header.info", "inspections", "observations")) + + expect_true(all(c("AAA", "AAB", "AAD") %in% names(result.1$inspections))) + expect_true(all(c("A", "B", "C") %in% names(result.1$observations))) + + expect_true(all(c("Node1Ref", "Node2Ref") %in% names(result.2$inspections))) + expect_true(all(c("MainCode", "Char1", "Char2") %in% names(result.2$observations))) }) From 7cd112f65d082b93b809f78fd1c27101614b5693 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 4 Aug 2022 16:41:59 +0200 Subject: [PATCH 047/141] Optionally add globally unique inspection ID --- DESCRIPTION | 3 ++- R/createInspectionId.R | 24 ++++++++++++++++++++++++ R/readAndMergeEuCodedFiles.R | 32 +++++++++++++++++++++++--------- R/setGlobalInspectionID.R | 33 +++++++++++++++++++++++++++++++++ man/readAndMergeEuCodedFiles.Rd | 6 +++++- man/setGlobalInspectionID.Rd | 24 ++++++++++++++++++++++++ 6 files changed, 111 insertions(+), 11 deletions(-) create mode 100644 R/createInspectionId.R create mode 100644 R/setGlobalInspectionID.R create mode 100644 man/setGlobalInspectionID.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 6991432..8471942 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,7 +15,8 @@ Description: functions to read and write CCTV inspections License: MIT + file LICENSE URL: https://github.com/kwb-r/kwb.en13508.2 BugReports: https://github.com/kwb-r/kwb.en13508.2/issues -Imports: +Imports: + digest, kwb.utils Suggests: testthat, diff --git a/R/createInspectionId.R b/R/createInspectionId.R new file mode 100644 index 0000000..643da7d --- /dev/null +++ b/R/createInspectionId.R @@ -0,0 +1,24 @@ +# createInspectionId ----------------------------------------------------------- +createInspectionId <- function( + inspections, + id.columns = c("Project", "InspDate", "InspTime", "Node1Ref", "Node2Ref"), + n.chars = 8L +) +{ + duplicateInfo <- kwb.utils::findPartialDuplicates(inspections, id.columns) + + if (! is.null(duplicateInfo)) { + print(duplicateInfo) + stop("There are duplicates in the key columns (see above)!") + } + + keyStrings <- kwb.utils::pasteColumns(inspections, id.columns, "|") + + stopifnot(!anyDuplicated(keyStrings)) + + ids <- kwb.utils::left(unlist(lapply(keyStrings, digest::digest)), n.chars) + + stopifnot(!anyDuplicated(ids)) + + ids +} diff --git a/R/readAndMergeEuCodedFiles.R b/R/readAndMergeEuCodedFiles.R index 41598e5..f041a51 100644 --- a/R/readAndMergeEuCodedFiles.R +++ b/R/readAndMergeEuCodedFiles.R @@ -9,20 +9,34 @@ #' in the format described in DIN EN 13508-2 #' @param dbg if \code{TRUE} debug messages are shown #' @param \dots further arguments passed to \code{\link{readEuCodedFiles}} +#' @param add.inspid if \code{TRUE} (the default is \code{FALSE}) a globally +#' unique inspection ID (inspid) is added to the data frames in elements +#' "inspections" and "observations" of the returned list. #' @export #' -readAndMergeEuCodedFiles <- function(input.files, dbg = FALSE, ...) +readAndMergeEuCodedFiles <- function( + input.files, + dbg = FALSE, + ..., + add.inspid = FALSE +) { # by setting simple.algorithm = FALSE we get unique column names, e.g. "ADE" - # and "ADE.1" - mergeInspectionData( - readEuCodedFiles( - input.files = input.files, - simple.algorithm = FALSE, - dbg = dbg, - ... - ) + # and "ADE.1" + inspection.data.list <- readEuCodedFiles( + input.files = input.files, + simple.algorithm = FALSE, + dbg = dbg, + ... ) + + inspection.data <- mergeInspectionData(inspection.data.list) + + if (!add.inspid) { + return(inspection.data) + } + + setGlobalInspectionID(inspection.data) } # mergeInspectionData ---------------------------------------------------------- diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R new file mode 100644 index 0000000..7692830 --- /dev/null +++ b/R/setGlobalInspectionID.R @@ -0,0 +1,33 @@ +#' Set Global Inspection ID +#' +#' Convert inspections numbers (inspno) 1,2,3,... to globally unique identifiers +#' (inspid), such as "e4b48d86" +#' +#' @param inspection.data list with elements \code{header.info}, +#' \code{inspections}, \code{observations} +#' @param project name of project to which the data are related, such as: +#' "Lausanne" +#' @return list with the same elements as in \code{inspection.data} but with +#' columns \code{inspid} being added to the data frames "inspections" and +#' "observations" +setGlobalInspectionID <- function(inspection.data, project) +{ + header.info <- kwb.utils::selectElements(inspection.data, "header.info") + inspections <- kwb.utils::selectElements(inspection.data, "inspections") + observations <- kwb.utils::selectElements(inspection.data, "observations") + + inspections <- kwb.utils::removeEmptyColumns(inspections) + inspections[["Project"]] <- project + inspections[["inspid"]] <- createInspectionId(inspections) + + observations <- kwb.utils::removeEmptyColumns(observations) + i <- kwb.utils::selectColumns(observations, "inspno") + observations[["inspid"]] <- kwb.utils::selectColumns(inspections, "inspid")[i] + observations <- kwb.utils::removeColumns(observations, "inspno") + + list( + header.info = header.info, + inspections = kwb.utils::moveColumnsToFront(inspections, "inspid"), + observations = kwb.utils::moveColumnsToFront(observations, "inspid") + ) +} diff --git a/man/readAndMergeEuCodedFiles.Rd b/man/readAndMergeEuCodedFiles.Rd index 9f17ca3..007163a 100644 --- a/man/readAndMergeEuCodedFiles.Rd +++ b/man/readAndMergeEuCodedFiles.Rd @@ -4,7 +4,7 @@ \alias{readAndMergeEuCodedFiles} \title{Read and Merge Files in EN13508.2-Format} \usage{ -readAndMergeEuCodedFiles(input.files, dbg = FALSE, ...) +readAndMergeEuCodedFiles(input.files, dbg = FALSE, ..., add.inspid = FALSE) } \arguments{ \item{input.files}{full path to text file containing CCTV inspection results @@ -13,6 +13,10 @@ in the format described in DIN EN 13508-2} \item{dbg}{if \code{TRUE} debug messages are shown} \item{\dots}{further arguments passed to \code{\link{readEuCodedFiles}}} + +\item{add.inspid}{if \code{TRUE} (the default is \code{FALSE}) a globally +unique inspection ID (inspid) is added to the data frames in elements +"inspections" and "observations" of the returned list.} } \description{ Read files in EN13508.2-format using \code{\link{readEuCodedFiles}} and merge diff --git a/man/setGlobalInspectionID.Rd b/man/setGlobalInspectionID.Rd new file mode 100644 index 0000000..4ec4ebe --- /dev/null +++ b/man/setGlobalInspectionID.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setGlobalInspectionID.R +\name{setGlobalInspectionID} +\alias{setGlobalInspectionID} +\title{Set Global Inspection ID} +\usage{ +setGlobalInspectionID(inspection.data, project) +} +\arguments{ +\item{inspection.data}{list with elements \code{header.info}, +\code{inspections}, \code{observations}} + +\item{project}{name of project to which the data are related, such as: +"Lausanne"} +} +\value{ +list with the same elements as in \code{inspection.data} but with + columns \code{inspid} being added to the data frames "inspections" and + "observations" +} +\description{ +Convert inspections numbers (inspno) 1,2,3,... to globally unique identifiers +(inspid), such as "e4b48d86" +} From 7ff4e784ef32e4fa6b30b3fd7b0694567fc6cf17 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 4 Aug 2022 16:52:19 +0200 Subject: [PATCH 048/141] Add arg "project", stop if required but missing --- R/readAndMergeEuCodedFiles.R | 9 ++++++--- R/setGlobalInspectionID.R | 9 ++++++++- man/readAndMergeEuCodedFiles.Rd | 11 ++++++++++- man/setGlobalInspectionID.Rd | 2 +- 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/R/readAndMergeEuCodedFiles.R b/R/readAndMergeEuCodedFiles.R index f041a51..877268d 100644 --- a/R/readAndMergeEuCodedFiles.R +++ b/R/readAndMergeEuCodedFiles.R @@ -12,13 +12,16 @@ #' @param add.inspid if \code{TRUE} (the default is \code{FALSE}) a globally #' unique inspection ID (inspid) is added to the data frames in elements #' "inspections" and "observations" of the returned list. +#' @param project name of project to which the data are related, such as: +#' "Lausanne" #' @export #' readAndMergeEuCodedFiles <- function( input.files, dbg = FALSE, ..., - add.inspid = FALSE + add.inspid = FALSE, + project = NULL ) { # by setting simple.algorithm = FALSE we get unique column names, e.g. "ADE" @@ -35,8 +38,8 @@ readAndMergeEuCodedFiles <- function( if (!add.inspid) { return(inspection.data) } - - setGlobalInspectionID(inspection.data) + + setGlobalInspectionID(inspection.data, project) } # mergeInspectionData ---------------------------------------------------------- diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index 7692830..e2416b8 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -10,8 +10,15 @@ #' @return list with the same elements as in \code{inspection.data} but with #' columns \code{inspid} being added to the data frames "inspections" and #' "observations" -setGlobalInspectionID <- function(inspection.data, project) +setGlobalInspectionID <- function(inspection.data, project = NULL) { + if (is.null(project)) { + stop( + "Please specify the 'project' name (e.g. name of city or data provider)", + call. = FALSE + ) + } + header.info <- kwb.utils::selectElements(inspection.data, "header.info") inspections <- kwb.utils::selectElements(inspection.data, "inspections") observations <- kwb.utils::selectElements(inspection.data, "observations") diff --git a/man/readAndMergeEuCodedFiles.Rd b/man/readAndMergeEuCodedFiles.Rd index 007163a..476a035 100644 --- a/man/readAndMergeEuCodedFiles.Rd +++ b/man/readAndMergeEuCodedFiles.Rd @@ -4,7 +4,13 @@ \alias{readAndMergeEuCodedFiles} \title{Read and Merge Files in EN13508.2-Format} \usage{ -readAndMergeEuCodedFiles(input.files, dbg = FALSE, ..., add.inspid = FALSE) +readAndMergeEuCodedFiles( + input.files, + dbg = FALSE, + ..., + add.inspid = FALSE, + project = NULL +) } \arguments{ \item{input.files}{full path to text file containing CCTV inspection results @@ -17,6 +23,9 @@ in the format described in DIN EN 13508-2} \item{add.inspid}{if \code{TRUE} (the default is \code{FALSE}) a globally unique inspection ID (inspid) is added to the data frames in elements "inspections" and "observations" of the returned list.} + +\item{project}{name of project to which the data are related, such as: +"Lausanne"} } \description{ Read files in EN13508.2-format using \code{\link{readEuCodedFiles}} and merge diff --git a/man/setGlobalInspectionID.Rd b/man/setGlobalInspectionID.Rd index 4ec4ebe..621bcb8 100644 --- a/man/setGlobalInspectionID.Rd +++ b/man/setGlobalInspectionID.Rd @@ -4,7 +4,7 @@ \alias{setGlobalInspectionID} \title{Set Global Inspection ID} \usage{ -setGlobalInspectionID(inspection.data, project) +setGlobalInspectionID(inspection.data, project = NULL) } \arguments{ \item{inspection.data}{list with elements \code{header.info}, From 3cca31e8fd3698445d88600ada7f7f3f85bad947 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 4 Aug 2022 18:03:13 +0200 Subject: [PATCH 049/141] Use lower case "project" instead of "Project" --- R/createInspectionId.R | 2 +- R/setGlobalInspectionID.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/createInspectionId.R b/R/createInspectionId.R index 643da7d..bd1e49a 100644 --- a/R/createInspectionId.R +++ b/R/createInspectionId.R @@ -1,7 +1,7 @@ # createInspectionId ----------------------------------------------------------- createInspectionId <- function( inspections, - id.columns = c("Project", "InspDate", "InspTime", "Node1Ref", "Node2Ref"), + id.columns = c("project", "InspDate", "InspTime", "Node1Ref", "Node2Ref"), n.chars = 8L ) { diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index e2416b8..768f893 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -24,7 +24,7 @@ setGlobalInspectionID <- function(inspection.data, project = NULL) observations <- kwb.utils::selectElements(inspection.data, "observations") inspections <- kwb.utils::removeEmptyColumns(inspections) - inspections[["Project"]] <- project + inspections[["project"]] <- project inspections[["inspid"]] <- createInspectionId(inspections) observations <- kwb.utils::removeEmptyColumns(observations) From 6780bce492ff0bc9b98c353b7e3684adcb7d4d62 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 4 Aug 2022 23:55:06 +0200 Subject: [PATCH 050/141] Move functions from main.R to separate files --- R/getCodes.R | 41 +++++++++++++++++++ R/inspectionDataFieldCodes.R | 12 ++++++ R/main.R | 79 ------------------------------------ R/numberOfInspections.R | 16 ++++++++ man/getCodes.Rd | 2 +- man/numberOfInspections.Rd | 2 +- 6 files changed, 71 insertions(+), 81 deletions(-) create mode 100644 R/getCodes.R create mode 100644 R/inspectionDataFieldCodes.R delete mode 100644 R/main.R create mode 100644 R/numberOfInspections.R diff --git a/R/getCodes.R b/R/getCodes.R new file mode 100644 index 0000000..ba5ab18 --- /dev/null +++ b/R/getCodes.R @@ -0,0 +1,41 @@ +#' Get EU Codes and Their Meaning +#' +#' Get a data frame containing EU codes and their meaning in different languages +#' +#' @param table name or vector of names of tables in the EU norm for which to +#' get field information. Use \code{unique(getCodes()$Table)} to get the +#' possible table names. +#' @param fields set to a vector of field (column) names to restrict the columns +#' returned +#' +#' @return data frame +#' +#' @export +#' +getCodes <- function(table = NULL, fields = NULL) +{ + codes <- readPackageFile("eucodes.csv") + + # Check if all codes are unique + stopifnot(! any(duplicated(get_columns(codes, "Code")))) + + if (! is.null(table)) { + + subtables <- split(codes, get_columns(codes, "Table")) + + codes <- kwb.utils::selectElements(subtables, table) + + if (length(table) > 1L) { + codes <- kwb.utils::safeRowBindAll(codes) + } + } + + # Reset row names + row.names(codes) <- NULL + + if (is.null(fields)) { + return(codes) + } + + get_columns(codes, fields) +} diff --git a/R/inspectionDataFieldCodes.R b/R/inspectionDataFieldCodes.R new file mode 100644 index 0000000..ed1b0fa --- /dev/null +++ b/R/inspectionDataFieldCodes.R @@ -0,0 +1,12 @@ +inspectionDataFieldCodes <- function() +{ + codeInfo <- readPackageFile("eucodes_de.csv") + + codes <- get_columns(codeInfo, "Code") + + columns <- c("class", "meaning") + + lapply(kwb.utils::toNamedList(codes), function(code) { + as.list(get_columns(codeInfo[codes == code, ], columns)) + }) +} diff --git a/R/main.R b/R/main.R deleted file mode 100644 index 56f3887..0000000 --- a/R/main.R +++ /dev/null @@ -1,79 +0,0 @@ -# getCodes --------------------------------------------------------------------- - -#' Get EU Codes and Their Meaning -#' -#' Get a data frame containing EU codes and their meaning in different languages -#' -#' @param table name or vector of names of tables in the EU norm for which to -#' get field information. Use \code{unique(getCodes()$Table)} to get the -#' possible table names. -#' @param fields set to a vector of field (column) names to restrict the columns -#' returned -#' -#' @return data frame -#' -#' @export -#' -getCodes <- function(table = NULL, fields = NULL) -{ - codes <- readPackageFile("eucodes.csv") - - # Check if all codes are unique - stopifnot(! any(duplicated(get_columns(codes, "Code")))) - - if (! is.null(table)) { - - subtables <- split(codes, get_columns(codes, "Table")) - - codes <- kwb.utils::selectElements(subtables, table) - - if (length(table) > 1L) { - codes <- kwb.utils::safeRowBindAll(codes) - } - } - - # Reset row names - row.names(codes) <- NULL - - if (is.null(fields)) { - return(codes) - } - - get_columns(codes, fields) -} - -# numberOfInspections ---------------------------------------------------------- - -#' Number of Inspections -#' -#' Get number of inspections from list of inspection data -#' -#' @param x list of inspection data elements each of which was read from an EN -#' 13508-2-coded file by means of \code{\link{readEuCodedFile}} -#' -#' @return vector of integer representing the number of inspections in each -#' element of \code{inspectionDataList} -#' -#' @export -#' -numberOfInspections <- function(x) -{ - sapply(x, function(xx) nrow(get_elements(xx, "inspections"))) -} - -# inspectionDataFieldCodes ----------------------------------------------------- - -inspectionDataFieldCodes <- function() -{ - codeInfo <- readPackageFile("eucodes_de.csv") - - codes <- get_columns(codeInfo, "Code") - - columns <- c("class", "meaning") - - lapply(kwb.utils::toNamedList(codes), function(code) { - - as.list(get_columns(codeInfo[codes == code, ], columns)) - }) -} - diff --git a/R/numberOfInspections.R b/R/numberOfInspections.R new file mode 100644 index 0000000..1c0e79f --- /dev/null +++ b/R/numberOfInspections.R @@ -0,0 +1,16 @@ +#' Number of Inspections +#' +#' Get number of inspections from list of inspection data +#' +#' @param x list of inspection data elements each of which was read from an EN +#' 13508-2-coded file by means of \code{\link{readEuCodedFile}} +#' +#' @return vector of integer representing the number of inspections in each +#' element of \code{inspectionDataList} +#' +#' @export +#' +numberOfInspections <- function(x) +{ + sapply(x, function(xx) nrow(get_elements(xx, "inspections"))) +} diff --git a/man/getCodes.Rd b/man/getCodes.Rd index c71eef9..716c8fa 100644 --- a/man/getCodes.Rd +++ b/man/getCodes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/main.R +% Please edit documentation in R/getCodes.R \name{getCodes} \alias{getCodes} \title{Get EU Codes and Their Meaning} diff --git a/man/numberOfInspections.Rd b/man/numberOfInspections.Rd index 192dc46..101458c 100644 --- a/man/numberOfInspections.Rd +++ b/man/numberOfInspections.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/main.R +% Please edit documentation in R/numberOfInspections.R \name{numberOfInspections} \alias{numberOfInspections} \title{Number of Inspections} From b5e502c0e80c25bbafedfd07a470b5ec5dbb5b88 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Aug 2022 00:06:48 +0200 Subject: [PATCH 051/141] Move readEuCodedFile() to its own file --- R/readEuCodedFile.R | 275 ++++++++++++++++++++++++++++++++++++++++ R/readEuCodedFiles.R | 276 ----------------------------------------- man/readEuCodedFile.Rd | 2 +- 3 files changed, 276 insertions(+), 277 deletions(-) create mode 100644 R/readEuCodedFile.R diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R new file mode 100644 index 0000000..d816cca --- /dev/null +++ b/R/readEuCodedFile.R @@ -0,0 +1,275 @@ +# readEuCodedFile -------------------------------------------------------------- + +#' Read CCTV Inspection Data in EN13508-2 Format +#' +#' Read CCTV inspection data from file coded according to EN13508-2 +#' +#' @param input.file full path to text file containing CCTV inspection results +#' in the format described in DIN EN 13508-2 +#' @param encoding default: "latin1" +#' @param read.inspections if \code{TRUE}, general inspection data (in +#' #B-blocks) are read, otherwise skipped (use if function fails) +#' @param short.names if \code{TRUE} (default), the short names (codes) as defined +#' in EN13508.2 are used as column names, otherwise more meaningful names are used. +#' See columns\code{Code} and \code{Name}, respectively, in the data frame returned by +#' \code{getCodes()}. +#' @param simple.algorithm if \code{TRUE} (default), a simple (and faster) +#' algorithm is used to extract the general information about the inspections +#' from the #B-headers. It requires that all #B-headers have the same number +#' and order of fields. If \code{FALSE}, another algorithm being able to treat +#' differing #B-header rows is used. +#' @param warn if \code{TRUE}, warnings are shown (e.g. if not all #A-header +#' fields were found) +#' @param dbg if \code{TRUE}, debug messages are shown, else not +#' +#' @return list with elements \code{header.info}, \code{inspections}, +#' \code{observations} +#' @importFrom kwb.utils catAndRun catIf isTryError .logstart .logok +#' @export +#' +readEuCodedFile <- function( + input.file, encoding = "latin1", read.inspections = TRUE, short.names = TRUE, + simple.algorithm = TRUE, warn = TRUE, dbg = TRUE +) +{ + #kwb.utils::assignArgumentDefaults(kwb.en13508.2::readEuCodedFile) + #kwb.utils::assignPackageObjects("kwb.en13508.2") + + eu_lines <- kwb.utils::catAndRun( + dbg = dbg, paste("Reading input file", input.file), + readLines(input.file, encoding = encoding) + ) + + eu_lines <- kwb.utils::catAndRun( + dbg = dbg, "Removing empty lines (if any)", + removeEmptyLines(eu_lines) + ) + + header.info <- kwb.utils::catAndRun( + dbg = dbg, + "Extracting file header", { + getHeaderInfoFromHeaderLines( + header.lines = getHeaderLinesFromEuCodedLines(eu_lines), + warn = warn + ) + }) + + kwb.utils::.logstart(dbg, "Extracting inspection records") + + if (read.inspections) { + + inspections <- if (simple.algorithm) { + getInspectionsFromEuLines(eu_lines, header.info, dbg = dbg > 1L) + } # else NULL + + # If the inspections could not be read with the simple algorithm (due to + # changing header rows) or if the user requests it, try it again with + # another algorithm + if (is.null(inspections)) { + + inspections <- getInspectionsFromEuLines.new( + eu_lines, header.info, dbg = dbg + ) + } + + kwb.utils::catIf(dbg, sprintf( + "%d inspections extracted. ", nrow(inspections) + )) + + } else { + + warning( + "I (yet) cannot read the inspection data (#B-blocks). ", + "So I just returned the number of inspections instead of a ", + "data frame with all information on the inspection!" + ) + + inspections <- length(grep("^#B01", eu_lines)) + } + + kwb.utils::.logok(dbg) + + kwb.utils::.logstart(dbg, "Extracting observation records") + + observations <- try( + getObservationsFromEuLines(eu_lines, header.info, dbg = dbg), + silent = TRUE + ) + + if (kwb.utils::isTryError(observations)) { + headerInfo <- getHeaderInfo(eu_lines) + #View(headerInfo) + observations <- extractObservationData(eu_lines, headerInfo, header.info) + } + + kwb.utils::catIf( + dbg, sprintf("%d observations extracted. ", nrow(observations)) + ) + + kwb.utils::.logok(dbg) + + if (!short.names) { + inspections <- renameColumnsToMeaningful(inspections) + observations <- renameColumnsToMeaningful(observations) + } + + list( + header.info = header.info, + inspections = inspections, + observations = observations + ) +} + +# renameColumnsToMeaningful ---------------------------------------------------- +renameColumnsToMeaningful <- function(x) +{ + codeInfo <- kwb.utils::selectColumns(getCodes(), c("Code", "Name")) + renamings <- kwb.utils::toLookupList(data = codeInfo) + kwb.utils::renameColumns(x, renamings) +} + +# getHeaderLinesFromEuCodedLines ----------------------------------------------- +getHeaderLinesFromEuCodedLines <- function(lines) +{ + grep("^#A", lines, value = TRUE) +} + +# getHeaderInfoFromHeaderLines ------------------------------------------------- +getHeaderInfoFromHeaderLines <- function(header.lines, warn = TRUE) +{ + # original_fields <- do.call(kwb.utils::toLookupList, kwb.utils::toKeysAndValues( + # kwb.utils::collapsed(gsub("^#", "", header.lines), "@"), + # separators = c("@", "=") + # )) + # + # renamed_fields <- kwb.utils::renameColumns(original_fields, list( + # A1 = "encoding", A2 = "language", A3 = "separator", A4 = "decimal", + # A5 = "quote", A6 = "year" + # )) + + # Set quote to "" instead of NA because read.table will give strange results + quote <- findKeyAndExtractValue(header.lines, "A5", warn = warn) + quote <- kwb.utils::defaultIfNA(quote, "") + + list( + encoding = findKeyAndExtractValue(header.lines, "A1", warn = warn), + language = findKeyAndExtractValue(header.lines, "A2", warn = warn), + separator = findKeyAndExtractValue(header.lines, "A3", warn = warn), + decimal = findKeyAndExtractValue(header.lines, "A4", warn = warn), + quote = quote, + year = findKeyAndExtractValue(header.lines, "A6", warn = warn) + ) +} + +# findKeyAndExtractValue ------------------------------------------------------- +findKeyAndExtractValue <- function(keyvalues, key, default = NA, warn = TRUE) +{ + pattern <- paste0("^#", key) + + index <- grep(pattern, keyvalues) + + if (length(index) == 0L) { + + warnMessage <- sprintf( + "Key '#%s' not found in the #A-header of the file.", key + ) + + if (! is.na(default)) { + warnMessage <- paste(warnMessage, "I will use the default:", default) + } + + if (warn) { + message(warnMessage) + warning(warnMessage) + } + + default + + } else { + + strsplit(keyvalues[index], "=")[[1L]][2L] + } +} + +# getInspectionsFromEuLines ---------------------------------------------------- +getInspectionsFromEuLines <- function(eu_lines, header.info, dbg = TRUE) +{ + inspections.complete <- NULL + + header.line.number <- 1L + + continue <- TRUE + + indices.B <- grep("^#B01", eu_lines) + + aborted <- FALSE + + while (! aborted && length(indices.B) > 0L) { + + b.caption.lines <- getValueFromKeyValueString(eu_lines[indices.B]) + + b.captions <- strsplit(b.caption.lines, header.info$separator) + + if (kwb.utils::allAreEqual(b.captions)) { + + inspections <- extractInspectionData( + b.lines = eu_lines[indices.B + 1L], + header.info = header.info, + captions = b.captions[[1L]] + ) + + inspections.complete <- kwb.utils::safeColumnBind( + inspections.complete, inspections + ) + + } else { + + if (dbg) { + message( + "The #B-header lines differ within the file -> I will change the ", + "algorithm..." + ) + } + + aborted <- TRUE + } + + header.line.number <- header.line.number + 1L + + indices.B <- grep(sprintf("^#B%02d", header.line.number), eu_lines) + } + + if (aborted) { + return(NULL) + } + + inspections.complete +} + +# extractInspectionData -------------------------------------------------------- +extractInspectionData <- function(b.lines, header.info, captions) +{ + inspections <- kwb.utils::csvTextToDataFrame( + text = paste(b.lines, collapse = "\n"), + sep = header.info$separator, + dec = header.info$decimal, + quote = header.info$quote, + comment.char = "", + stringsAsFactors = FALSE + ) + + stats::setNames(inspections, captions) +} + +# getValueFromKeyValueString --------------------------------------------------- +getValueFromKeyValueString <- function(keyvalue) +{ + sapply(strsplit(keyvalue, "="), "[", 2L) +} + +# setFilename ------------------------------------------------------------------ +setFilename <- function(data, name) +{ + data[["file"]] <- name + kwb.utils::moveColumnsToFront(data, "file") +} diff --git a/R/readEuCodedFiles.R b/R/readEuCodedFiles.R index fe30c60..0d06114 100644 --- a/R/readEuCodedFiles.R +++ b/R/readEuCodedFiles.R @@ -74,279 +74,3 @@ readEuCodedFiles <- function( # Return the indices of the files that could not be read correctly structure(result, which_failed = which(failed)) } - -# readEuCodedFile -------------------------------------------------------------- - -#' Read CCTV Inspection Data in EN13508-2 Format -#' -#' Read CCTV inspection data from file coded according to EN13508-2 -#' -#' @param input.file full path to text file containing CCTV inspection results -#' in the format described in DIN EN 13508-2 -#' @param encoding default: "latin1" -#' @param read.inspections if \code{TRUE}, general inspection data (in -#' #B-blocks) are read, otherwise skipped (use if function fails) -#' @param short.names if \code{TRUE} (default), the short names (codes) as defined -#' in EN13508.2 are used as column names, otherwise more meaningful names are used. -#' See columns\code{Code} and \code{Name}, respectively, in the data frame returned by -#' \code{getCodes()}. -#' @param simple.algorithm if \code{TRUE} (default), a simple (and faster) -#' algorithm is used to extract the general information about the inspections -#' from the #B-headers. It requires that all #B-headers have the same number -#' and order of fields. If \code{FALSE}, another algorithm being able to treat -#' differing #B-header rows is used. -#' @param warn if \code{TRUE}, warnings are shown (e.g. if not all #A-header -#' fields were found) -#' @param dbg if \code{TRUE}, debug messages are shown, else not -#' -#' @return list with elements \code{header.info}, \code{inspections}, -#' \code{observations} -#' @importFrom kwb.utils catAndRun catIf isTryError .logstart .logok -#' @export -#' -readEuCodedFile <- function( - input.file, encoding = "latin1", read.inspections = TRUE, short.names = TRUE, - simple.algorithm = TRUE, warn = TRUE, dbg = TRUE -) -{ - #kwb.utils::assignArgumentDefaults(kwb.en13508.2::readEuCodedFile) - #kwb.utils::assignPackageObjects("kwb.en13508.2") - - eu_lines <- kwb.utils::catAndRun( - dbg = dbg, paste("Reading input file", input.file), - readLines(input.file, encoding = encoding) - ) - - eu_lines <- kwb.utils::catAndRun( - dbg = dbg, "Removing empty lines (if any)", - removeEmptyLines(eu_lines) - ) - - header.info <- kwb.utils::catAndRun( - dbg = dbg, - "Extracting file header", { - getHeaderInfoFromHeaderLines( - header.lines = getHeaderLinesFromEuCodedLines(eu_lines), - warn = warn - ) - }) - - kwb.utils::.logstart(dbg, "Extracting inspection records") - - if (read.inspections) { - - inspections <- if (simple.algorithm) { - getInspectionsFromEuLines(eu_lines, header.info, dbg = dbg > 1L) - } # else NULL - - # If the inspections could not be read with the simple algorithm (due to - # changing header rows) or if the user requests it, try it again with - # another algorithm - if (is.null(inspections)) { - - inspections <- getInspectionsFromEuLines.new( - eu_lines, header.info, dbg = dbg - ) - } - - kwb.utils::catIf(dbg, sprintf( - "%d inspections extracted. ", nrow(inspections) - )) - - } else { - - warning( - "I (yet) cannot read the inspection data (#B-blocks). ", - "So I just returned the number of inspections instead of a ", - "data frame with all information on the inspection!" - ) - - inspections <- length(grep("^#B01", eu_lines)) - } - - kwb.utils::.logok(dbg) - - kwb.utils::.logstart(dbg, "Extracting observation records") - - observations <- try( - getObservationsFromEuLines(eu_lines, header.info, dbg = dbg), - silent = TRUE - ) - - if (kwb.utils::isTryError(observations)) { - headerInfo <- getHeaderInfo(eu_lines) - #View(headerInfo) - observations <- extractObservationData(eu_lines, headerInfo, header.info) - } - - kwb.utils::catIf( - dbg, sprintf("%d observations extracted. ", nrow(observations)) - ) - - kwb.utils::.logok(dbg) - - if (!short.names) { - inspections <- renameColumnsToMeaningful(inspections) - observations <- renameColumnsToMeaningful(observations) - } - - list( - header.info = header.info, - inspections = inspections, - observations = observations - ) -} - -# renameColumnsToMeaningful ---------------------------------------------------- -renameColumnsToMeaningful <- function(x) -{ - codeInfo <- kwb.utils::selectColumns(getCodes(), c("Code", "Name")) - renamings <- kwb.utils::toLookupList(data = codeInfo) - kwb.utils::renameColumns(x, renamings) -} - -# getHeaderLinesFromEuCodedLines ----------------------------------------------- -getHeaderLinesFromEuCodedLines <- function(lines) -{ - grep("^#A", lines, value = TRUE) -} - -# getHeaderInfoFromHeaderLines ------------------------------------------------- -getHeaderInfoFromHeaderLines <- function(header.lines, warn = TRUE) -{ - # original_fields <- do.call(kwb.utils::toLookupList, kwb.utils::toKeysAndValues( - # kwb.utils::collapsed(gsub("^#", "", header.lines), "@"), - # separators = c("@", "=") - # )) - # - # renamed_fields <- kwb.utils::renameColumns(original_fields, list( - # A1 = "encoding", A2 = "language", A3 = "separator", A4 = "decimal", - # A5 = "quote", A6 = "year" - # )) - - # Set quote to "" instead of NA because read.table will give strange results - quote <- findKeyAndExtractValue(header.lines, "A5", warn = warn) - quote <- kwb.utils::defaultIfNA(quote, "") - - list( - encoding = findKeyAndExtractValue(header.lines, "A1", warn = warn), - language = findKeyAndExtractValue(header.lines, "A2", warn = warn), - separator = findKeyAndExtractValue(header.lines, "A3", warn = warn), - decimal = findKeyAndExtractValue(header.lines, "A4", warn = warn), - quote = quote, - year = findKeyAndExtractValue(header.lines, "A6", warn = warn) - ) -} - -# findKeyAndExtractValue ------------------------------------------------------- -findKeyAndExtractValue <- function(keyvalues, key, default = NA, warn = TRUE) -{ - pattern <- paste0("^#", key) - - index <- grep(pattern, keyvalues) - - if (length(index) == 0L) { - - warnMessage <- sprintf( - "Key '#%s' not found in the #A-header of the file.", key - ) - - if (! is.na(default)) { - warnMessage <- paste(warnMessage, "I will use the default:", default) - } - - if (warn) { - message(warnMessage) - warning(warnMessage) - } - - default - - } else { - - strsplit(keyvalues[index], "=")[[1L]][2L] - } -} - -# getInspectionsFromEuLines ---------------------------------------------------- -getInspectionsFromEuLines <- function(eu_lines, header.info, dbg = TRUE) -{ - inspections.complete <- NULL - - header.line.number <- 1L - - continue <- TRUE - - indices.B <- grep("^#B01", eu_lines) - - aborted <- FALSE - - while (! aborted && length(indices.B) > 0L) { - - b.caption.lines <- getValueFromKeyValueString(eu_lines[indices.B]) - - b.captions <- strsplit(b.caption.lines, header.info$separator) - - if (kwb.utils::allAreEqual(b.captions)) { - - inspections <- extractInspectionData( - b.lines = eu_lines[indices.B + 1L], - header.info = header.info, - captions = b.captions[[1L]] - ) - - inspections.complete <- kwb.utils::safeColumnBind( - inspections.complete, inspections - ) - - } else { - - if (dbg) { - message( - "The #B-header lines differ within the file -> I will change the ", - "algorithm..." - ) - } - - aborted <- TRUE - } - - header.line.number <- header.line.number + 1L - - indices.B <- grep(sprintf("^#B%02d", header.line.number), eu_lines) - } - - if (aborted) { - return(NULL) - } - - inspections.complete -} - -# extractInspectionData -------------------------------------------------------- -extractInspectionData <- function(b.lines, header.info, captions) -{ - inspections <- kwb.utils::csvTextToDataFrame( - text = paste(b.lines, collapse = "\n"), - sep = header.info$separator, - dec = header.info$decimal, - quote = header.info$quote, - comment.char = "", - stringsAsFactors = FALSE - ) - - stats::setNames(inspections, captions) -} - -# getValueFromKeyValueString --------------------------------------------------- -getValueFromKeyValueString <- function(keyvalue) -{ - sapply(strsplit(keyvalue, "="), "[", 2L) -} - -# setFilename ------------------------------------------------------------------ -setFilename <- function(data, name) -{ - data[["file"]] <- name - kwb.utils::moveColumnsToFront(data, "file") -} diff --git a/man/readEuCodedFile.Rd b/man/readEuCodedFile.Rd index e1bfbbd..2e817fe 100644 --- a/man/readEuCodedFile.Rd +++ b/man/readEuCodedFile.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/readEuCodedFiles.R +% Please edit documentation in R/readEuCodedFile.R \name{readEuCodedFile} \alias{readEuCodedFile} \title{Read CCTV Inspection Data in EN13508-2 Format} From b66bd885cedf8f5e131b8380350a2f57fe18addd Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Aug 2022 00:08:54 +0200 Subject: [PATCH 052/141] Clean removeEmptyLines() Use simpler names, return early --- R/utils.R | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/R/utils.R b/R/utils.R index cce49d4..d1979b0 100644 --- a/R/utils.R +++ b/R/utils.R @@ -98,20 +98,19 @@ readPackageFile <- function(file, ...) # removeEmptyLines ------------------------------------------------------------- removeEmptyLines <- function(x, dbg = TRUE) { - empty.line.indices <- grep("^$", x) + indices.empty <- grep("^$", x) - numberOfEmptyLines <- length(empty.line.indices) + n.empty <- length(indices.empty) - if (numberOfEmptyLines > 0) { - - kwb.utils::.logstart(dbg, "Removing", numberOfEmptyLines, "empty lines") - - x <- x[-empty.line.indices] - - kwb.utils::.logok(dbg) + if (n.empty == 0L) { + return(x) } - - x + + kwb.utils::catAndRun( + paste("Removing", n.empty, "empty lines"), + dbg = dbg, + x[-indices.empty] + ) } # valuesToCsv ------------------------------------------------------------------ From bb0760cb8d120a90609d6d7512645081b9a76d43 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Aug 2022 00:22:22 +0200 Subject: [PATCH 053/141] Add getFileHeaderFromEuLines(), replacing two other functions: getHeaderLinesFromEuCodedLines() and getHeaderInfroFromHeaderLines() --- R/extractObservationData.R | 2 +- R/readEuCodedFile.R | 22 ++++++------------- man/extractObservationData.Rd | 2 +- ...st-function-getHeaderInfoFromHeaderLines.R | 14 ------------ ...-function-getHeaderLinesFromEuCodedLines.R | 8 ------- 5 files changed, 9 insertions(+), 39 deletions(-) delete mode 100644 tests/testthat/test-function-getHeaderInfoFromHeaderLines.R delete mode 100644 tests/testthat/test-function-getHeaderLinesFromEuCodedLines.R diff --git a/R/extractObservationData.R b/R/extractObservationData.R index 9bec94e..0d8ab31 100644 --- a/R/extractObservationData.R +++ b/R/extractObservationData.R @@ -3,7 +3,7 @@ #' @param euLines text lines read from EN13508.2-coded file #' @param headerInfo data frame with information about header lines #' @param header.info list as returned by -#' \code{kwb.en13508.2:::getHeaderInfoFromHeaderLines} +#' \code{kwb.en13508.2:::getFileHeaderFromEuLines} #' @return data frame with columns \code{A}, \code{B}, \code{C}, ... as defined #' in EN13508.2 and a column \code{inspno} referring to the inspection number. extractObservationData <- function(euLines, headerInfo, header.info) diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index d816cca..4862614 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -46,13 +46,9 @@ readEuCodedFile <- function( ) header.info <- kwb.utils::catAndRun( - dbg = dbg, - "Extracting file header", { - getHeaderInfoFromHeaderLines( - header.lines = getHeaderLinesFromEuCodedLines(eu_lines), - warn = warn - ) - }) + dbg = dbg, "Extracting file header", + getFileHeaderFromEuLines(eu_lines) + ) kwb.utils::.logstart(dbg, "Extracting inspection records") @@ -128,15 +124,11 @@ renameColumnsToMeaningful <- function(x) kwb.utils::renameColumns(x, renamings) } -# getHeaderLinesFromEuCodedLines ----------------------------------------------- -getHeaderLinesFromEuCodedLines <- function(lines) -{ - grep("^#A", lines, value = TRUE) -} - -# getHeaderInfoFromHeaderLines ------------------------------------------------- -getHeaderInfoFromHeaderLines <- function(header.lines, warn = TRUE) +# getFileHeaderFromEuLines ----------------------------------------------------- +getFileHeaderFromEuLines <- function(eu_lines, warn = TRUE) { + header.lines <- grep("^#A", eu_lines, value = TRUE) + # original_fields <- do.call(kwb.utils::toLookupList, kwb.utils::toKeysAndValues( # kwb.utils::collapsed(gsub("^#", "", header.lines), "@"), # separators = c("@", "=") diff --git a/man/extractObservationData.Rd b/man/extractObservationData.Rd index 410b85f..4c8a8cf 100644 --- a/man/extractObservationData.Rd +++ b/man/extractObservationData.Rd @@ -12,7 +12,7 @@ extractObservationData(euLines, headerInfo, header.info) \item{headerInfo}{data frame with information about header lines} \item{header.info}{list as returned by -\code{kwb.en13508.2:::getHeaderInfoFromHeaderLines}} +\code{kwb.en13508.2:::getFileHeaderFromEuLines}} } \value{ data frame with columns \code{A}, \code{B}, \code{C}, ... as defined diff --git a/tests/testthat/test-function-getHeaderInfoFromHeaderLines.R b/tests/testthat/test-function-getHeaderInfoFromHeaderLines.R deleted file mode 100644 index 1ac03e0..0000000 --- a/tests/testthat/test-function-getHeaderInfoFromHeaderLines.R +++ /dev/null @@ -1,14 +0,0 @@ -test_that("getHeaderInfoFromHeaderLines() works", { - - f <- kwb.en13508.2:::getHeaderInfoFromHeaderLines - - expect_error(f()) - - expect_warning(suppressMessages(f(c("a", "b")))) - - header.lines <- c("#A1=a", "#A2=b", "#A3=c", "#A4=d", "#A5=e", "#A6=f") - - result <- f(header.lines) - - expect_identical(unname(unlist(result)), letters[1:6]) -}) diff --git a/tests/testthat/test-function-getHeaderLinesFromEuCodedLines.R b/tests/testthat/test-function-getHeaderLinesFromEuCodedLines.R deleted file mode 100644 index 2ca681d..0000000 --- a/tests/testthat/test-function-getHeaderLinesFromEuCodedLines.R +++ /dev/null @@ -1,8 +0,0 @@ -test_that("getHeaderLinesFromEuCodedLines() works", { - - f <- kwb.en13508.2:::getHeaderLinesFromEuCodedLines - - expect_error(f()) - - expect_identical(f(c("#A123", "#B", "#C")), "#A123") -}) From c4bb5542c695d60d3f216e5a44d83d8eeff06599 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Aug 2022 00:56:13 +0200 Subject: [PATCH 054/141] Shorten code in getFileHeaderFromEuLines --- R/readEuCodedFile.R | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index 4862614..2ffd4b2 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -127,10 +127,10 @@ renameColumnsToMeaningful <- function(x) # getFileHeaderFromEuLines ----------------------------------------------------- getFileHeaderFromEuLines <- function(eu_lines, warn = TRUE) { - header.lines <- grep("^#A", eu_lines, value = TRUE) + a.lines <- grep("^#A", eu_lines, value = TRUE) # original_fields <- do.call(kwb.utils::toLookupList, kwb.utils::toKeysAndValues( - # kwb.utils::collapsed(gsub("^#", "", header.lines), "@"), + # kwb.utils::collapsed(gsub("^#", "", a.lines), "@"), # separators = c("@", "=") # )) # @@ -140,16 +140,15 @@ getFileHeaderFromEuLines <- function(eu_lines, warn = TRUE) # )) # Set quote to "" instead of NA because read.table will give strange results - quote <- findKeyAndExtractValue(header.lines, "A5", warn = warn) - quote <- kwb.utils::defaultIfNA(quote, "") - + quote <- findKeyAndExtractValue(a.lines, "A5", warn = warn) + list( - encoding = findKeyAndExtractValue(header.lines, "A1", warn = warn), - language = findKeyAndExtractValue(header.lines, "A2", warn = warn), - separator = findKeyAndExtractValue(header.lines, "A3", warn = warn), - decimal = findKeyAndExtractValue(header.lines, "A4", warn = warn), - quote = quote, - year = findKeyAndExtractValue(header.lines, "A6", warn = warn) + encoding = findKeyAndExtractValue(a.lines, "A1", warn = warn), + language = findKeyAndExtractValue(a.lines, "A2", warn = warn), + separator = findKeyAndExtractValue(a.lines, "A3", warn = warn), + decimal = findKeyAndExtractValue(a.lines, "A4", warn = warn), + quote = kwb.utils::defaultIfNA(quote, ""), + year = findKeyAndExtractValue(a.lines, "A6", warn = warn) ) } From 069924c90618fbff4196e473451f44abbc25892d Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Aug 2022 01:00:25 +0200 Subject: [PATCH 055/141] Refactor readEuCodedFile() introducing new functions - getInspectionRecordsFromEuLines(), - getObservationRecordsFromEuLines() --- R/readEuCodedFile.R | 119 +++++++++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 51 deletions(-) diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index 2ffd4b2..6a5dff9 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -28,65 +28,96 @@ #' @export #' readEuCodedFile <- function( - input.file, encoding = "latin1", read.inspections = TRUE, short.names = TRUE, - simple.algorithm = TRUE, warn = TRUE, dbg = TRUE + input.file, + encoding = "latin1", + read.inspections = TRUE, + short.names = TRUE, + simple.algorithm = TRUE, + warn = TRUE, + dbg = TRUE ) { #kwb.utils::assignArgumentDefaults(kwb.en13508.2::readEuCodedFile) #kwb.utils::assignPackageObjects("kwb.en13508.2") - eu_lines <- kwb.utils::catAndRun( - dbg = dbg, paste("Reading input file", input.file), + run <- function(...) kwb.utils::catAndRun(dbg = dbg, ...) + + eu_lines <- run( + paste("Reading input file", input.file), readLines(input.file, encoding = encoding) ) - eu_lines <- kwb.utils::catAndRun( - dbg = dbg, "Removing empty lines (if any)", + eu_lines <- run( + "Removing empty lines (if any)", removeEmptyLines(eu_lines) ) - header.info <- kwb.utils::catAndRun( - dbg = dbg, "Extracting file header", - getFileHeaderFromEuLines(eu_lines) + header.info <- run( + "Extracting file header", + getFileHeaderFromEuLines(eu_lines, warn) ) - kwb.utils::.logstart(dbg, "Extracting inspection records") + inspections <- run( + "Extracting inspection records", + getInspectionRecordsFromEuLines( + eu_lines, header.info, read.inspections, simple.algorithm, dbg + ) + ) - if (read.inspections) { - - inspections <- if (simple.algorithm) { - getInspectionsFromEuLines(eu_lines, header.info, dbg = dbg > 1L) - } # else NULL - - # If the inspections could not be read with the simple algorithm (due to - # changing header rows) or if the user requests it, try it again with - # another algorithm - if (is.null(inspections)) { - - inspections <- getInspectionsFromEuLines.new( - eu_lines, header.info, dbg = dbg - ) - } - - kwb.utils::catIf(dbg, sprintf( - "%d inspections extracted. ", nrow(inspections) - )) - - } else { - + observations <- run( + "Extracting observation records", + getObservationRecordsFromEuLines(eu_lines, header.info, dbg) + ) + + if (!short.names) { + inspections <- renameColumnsToMeaningful(inspections) + observations <- renameColumnsToMeaningful(observations) + } + + list( + header.info = header.info, + inspections = inspections, + observations = observations + ) +} + +# getInspectionRecordsFromEuLines ---------------------------------------------- +getInspectionRecordsFromEuLines <- function( + eu_lines, header.info, read.inspections, simple.algorithm, dbg +) +{ + if (!read.inspections) { + warning( "I (yet) cannot read the inspection data (#B-blocks). ", "So I just returned the number of inspections instead of a ", "data frame with all information on the inspection!" ) - inspections <- length(grep("^#B01", eu_lines)) + return(length(grep("^#B01", eu_lines))) } + + inspections <- if (simple.algorithm) { + getInspectionsFromEuLines(eu_lines, header.info, dbg = dbg > 1L) + } # else NULL - kwb.utils::.logok(dbg) + # If the inspections could not be read with the simple algorithm (due to + # changing header rows) or if the user requests it, try it again with + # another algorithm + if (is.null(inspections)) { + inspections <- getInspectionsFromEuLines.new( + eu_lines, header.info, dbg = dbg + ) + } - kwb.utils::.logstart(dbg, "Extracting observation records") + kwb.utils::catIf(dbg, paste(nrow(inspections), "inspections extracted. ")) + inspections +} + +# getObservationRecordsFromEuLines --------------------------------------------- +getObservationRecordsFromEuLines <- function(eu_lines, header.info, dbg) +{ observations <- try( getObservationsFromEuLines(eu_lines, header.info, dbg = dbg), silent = TRUE @@ -94,26 +125,12 @@ readEuCodedFile <- function( if (kwb.utils::isTryError(observations)) { headerInfo <- getHeaderInfo(eu_lines) - #View(headerInfo) observations <- extractObservationData(eu_lines, headerInfo, header.info) } - kwb.utils::catIf( - dbg, sprintf("%d observations extracted. ", nrow(observations)) - ) + kwb.utils::catIf(dbg, paste(nrow(observations), "observations extracted. ")) - kwb.utils::.logok(dbg) - - if (!short.names) { - inspections <- renameColumnsToMeaningful(inspections) - observations <- renameColumnsToMeaningful(observations) - } - - list( - header.info = header.info, - inspections = inspections, - observations = observations - ) + observations } # renameColumnsToMeaningful ---------------------------------------------------- From bb12ff0aee7d5c2912e090f9a14d04c5b265dd14 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Aug 2022 01:05:03 +0200 Subject: [PATCH 056/141] Replace arg short.names with meaningful.names --- R/readEuCodedFile.R | 12 ++++++------ man/readEuCodedFile.Rd | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index 6a5dff9..c138925 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -9,10 +9,10 @@ #' @param encoding default: "latin1" #' @param read.inspections if \code{TRUE}, general inspection data (in #' #B-blocks) are read, otherwise skipped (use if function fails) -#' @param short.names if \code{TRUE} (default), the short names (codes) as defined -#' in EN13508.2 are used as column names, otherwise more meaningful names are used. -#' See columns\code{Code} and \code{Name}, respectively, in the data frame returned by -#' \code{getCodes()}. +#' @param meaningful.names if \code{FALSE} (default), the short names (codes) as +#' defined in EN13508.2 are used as column names, otherwise more meaningful +#' names are used. See columns\code{Code} and \code{Name}, respectively, in +#' the data frame returned by \code{getCodes()}. #' @param simple.algorithm if \code{TRUE} (default), a simple (and faster) #' algorithm is used to extract the general information about the inspections #' from the #B-headers. It requires that all #B-headers have the same number @@ -31,7 +31,7 @@ readEuCodedFile <- function( input.file, encoding = "latin1", read.inspections = TRUE, - short.names = TRUE, + meaningful.names = FALSE, simple.algorithm = TRUE, warn = TRUE, dbg = TRUE @@ -69,7 +69,7 @@ readEuCodedFile <- function( getObservationRecordsFromEuLines(eu_lines, header.info, dbg) ) - if (!short.names) { + if (meaningful.names) { inspections <- renameColumnsToMeaningful(inspections) observations <- renameColumnsToMeaningful(observations) } diff --git a/man/readEuCodedFile.Rd b/man/readEuCodedFile.Rd index 2e817fe..e46ef67 100644 --- a/man/readEuCodedFile.Rd +++ b/man/readEuCodedFile.Rd @@ -8,7 +8,7 @@ readEuCodedFile( input.file, encoding = "latin1", read.inspections = TRUE, - short.names = TRUE, + meaningful.names = FALSE, simple.algorithm = TRUE, warn = TRUE, dbg = TRUE @@ -23,10 +23,10 @@ in the format described in DIN EN 13508-2} \item{read.inspections}{if \code{TRUE}, general inspection data (in #B-blocks) are read, otherwise skipped (use if function fails)} -\item{short.names}{if \code{TRUE} (default), the short names (codes) as defined -in EN13508.2 are used as column names, otherwise more meaningful names are used. -See columns\code{Code} and \code{Name}, respectively, in the data frame returned by -\code{getCodes()}.} +\item{meaningful.names}{if \code{FALSE} (default), the short names (codes) as +defined in EN13508.2 are used as column names, otherwise more meaningful +names are used. See columns\code{Code} and \code{Name}, respectively, in +the data frame returned by \code{getCodes()}.} \item{simple.algorithm}{if \code{TRUE} (default), a simple (and faster) algorithm is used to extract the general information about the inspections From 6c215dcc4258b2c14ef1d67c40d629a967d13fcd Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Aug 2022 01:31:44 +0200 Subject: [PATCH 057/141] Move and rename functions - getInspectionsFromEuLines() -> getInspectionRecords_v1() - getInspectionsFromEuLines.new() -> getInspectionRecords_v2() --- R/getFileHeaderFromEuLines.R | 57 ++++++ R/getInspectionRecordsFromEuLines.R | 109 ++++++++++ R/getObservationRecordsFromEuLines.R | 17 ++ R/importInspections.R | 6 +- R/readEuCodedFile.R | 192 ------------------ R/readEuCodedFiles.R | 7 + ...> test-function-getInspectionRecords_v1.R} | 4 +- .../test-function-getInspectionRecords_v2.R | 6 + ...t-function-getInspectionsFromEuLines.new.R | 17 -- .../testthat/test-function-readEuCodedFile.R | 2 +- 10 files changed, 201 insertions(+), 216 deletions(-) create mode 100644 R/getFileHeaderFromEuLines.R create mode 100644 R/getInspectionRecordsFromEuLines.R create mode 100644 R/getObservationRecordsFromEuLines.R rename tests/testthat/{test-function-getInspectionsFromEuLines.R => test-function-getInspectionRecords_v1.R} (51%) create mode 100644 tests/testthat/test-function-getInspectionRecords_v2.R delete mode 100644 tests/testthat/test-function-getInspectionsFromEuLines.new.R diff --git a/R/getFileHeaderFromEuLines.R b/R/getFileHeaderFromEuLines.R new file mode 100644 index 0000000..320e1c2 --- /dev/null +++ b/R/getFileHeaderFromEuLines.R @@ -0,0 +1,57 @@ +# getFileHeaderFromEuLines ----------------------------------------------------- +getFileHeaderFromEuLines <- function(eu_lines, warn = TRUE) +{ + a.lines <- grep("^#A", eu_lines, value = TRUE) + + # original_fields <- do.call(kwb.utils::toLookupList, kwb.utils::toKeysAndValues( + # kwb.utils::collapsed(gsub("^#", "", a.lines), "@"), + # separators = c("@", "=") + # )) + # + # renamed_fields <- kwb.utils::renameColumns(original_fields, list( + # A1 = "encoding", A2 = "language", A3 = "separator", A4 = "decimal", + # A5 = "quote", A6 = "year" + # )) + + # Set quote to "" instead of NA because read.table will give strange results + quote <- findKeyAndExtractValue(a.lines, "A5", warn = warn) + + list( + encoding = findKeyAndExtractValue(a.lines, "A1", warn = warn), + language = findKeyAndExtractValue(a.lines, "A2", warn = warn), + separator = findKeyAndExtractValue(a.lines, "A3", warn = warn), + decimal = findKeyAndExtractValue(a.lines, "A4", warn = warn), + quote = kwb.utils::defaultIfNA(quote, ""), + year = findKeyAndExtractValue(a.lines, "A6", warn = warn) + ) +} + +# findKeyAndExtractValue ------------------------------------------------------- +findKeyAndExtractValue <- function(keyvalues, key, default = NA, warn = TRUE) +{ + pattern <- paste0("^#", key) + + index <- grep(pattern, keyvalues) + + if (length(index) == 0L) { + + warnMessage <- sprintf( + "Key '#%s' not found in the #A-header of the file.", key + ) + + if (! is.na(default)) { + warnMessage <- paste(warnMessage, "I will use the default:", default) + } + + if (warn) { + message(warnMessage) + warning(warnMessage) + } + + default + + } else { + + strsplit(keyvalues[index], "=")[[1L]][2L] + } +} diff --git a/R/getInspectionRecordsFromEuLines.R b/R/getInspectionRecordsFromEuLines.R new file mode 100644 index 0000000..7c23598 --- /dev/null +++ b/R/getInspectionRecordsFromEuLines.R @@ -0,0 +1,109 @@ +# getInspectionRecordsFromEuLines ---------------------------------------------- +getInspectionRecordsFromEuLines <- function( + eu_lines, header.info, read.inspections, simple.algorithm, dbg +) +{ + if (!read.inspections) { + + warning( + "I (yet) cannot read the inspection data (#B-blocks). ", + "So I just returned the number of inspections instead of a ", + "data frame with all information on the inspection!" + ) + + return(length(grep("^#B01", eu_lines))) + } + + inspections <- if (simple.algorithm) { + getInspectionRecords_v1(eu_lines, header.info, dbg = dbg > 1L) + } # else NULL + + # If the inspections could not be read with the simple algorithm (due to + # changing header rows) or if the user requests it, try it again with + # another algorithm + if (is.null(inspections)) { + inspections <- getInspectionRecords_v2( + eu_lines, header.info, dbg = dbg + ) + } + + kwb.utils::catIf(dbg, paste(nrow(inspections), "inspections extracted. ")) + + inspections +} + +# getInspectionRecords_v1 ------------------------------------------------------ +getInspectionRecords_v1 <- function(eu_lines, header.info, dbg = TRUE) +{ + inspections.complete <- NULL + + header.line.number <- 1L + + continue <- TRUE + + indices.B <- grep("^#B01", eu_lines) + + aborted <- FALSE + + while (! aborted && length(indices.B) > 0L) { + + b.caption.lines <- getValueFromKeyValueString(eu_lines[indices.B]) + + b.captions <- strsplit(b.caption.lines, header.info$separator) + + if (kwb.utils::allAreEqual(b.captions)) { + + inspections <- extractInspectionData( + b.lines = eu_lines[indices.B + 1L], + header.info = header.info, + captions = b.captions[[1L]] + ) + + inspections.complete <- kwb.utils::safeColumnBind( + inspections.complete, inspections + ) + + } else { + + if (dbg) { + message( + "The #B-header lines differ within the file -> I will change the ", + "algorithm..." + ) + } + + aborted <- TRUE + } + + header.line.number <- header.line.number + 1L + + indices.B <- grep(sprintf("^#B%02d", header.line.number), eu_lines) + } + + if (aborted) { + return(NULL) + } + + inspections.complete +} + +# getValueFromKeyValueString --------------------------------------------------- +getValueFromKeyValueString <- function(keyvalue) +{ + sapply(strsplit(keyvalue, "="), "[", 2L) +} + +# extractInspectionData -------------------------------------------------------- +extractInspectionData <- function(b.lines, header.info, captions) +{ + inspections <- kwb.utils::csvTextToDataFrame( + text = paste(b.lines, collapse = "\n"), + sep = header.info$separator, + dec = header.info$decimal, + quote = header.info$quote, + comment.char = "", + stringsAsFactors = FALSE + ) + + stats::setNames(inspections, captions) +} diff --git a/R/getObservationRecordsFromEuLines.R b/R/getObservationRecordsFromEuLines.R new file mode 100644 index 0000000..22dab09 --- /dev/null +++ b/R/getObservationRecordsFromEuLines.R @@ -0,0 +1,17 @@ +# getObservationRecordsFromEuLines --------------------------------------------- +getObservationRecordsFromEuLines <- function(eu_lines, header.info, dbg) +{ + observations <- try( + getObservationsFromEuLines(eu_lines, header.info, dbg = dbg), + silent = TRUE + ) + + if (kwb.utils::isTryError(observations)) { + headerInfo <- getHeaderInfo(eu_lines) + observations <- extractObservationData(eu_lines, headerInfo, header.info) + } + + kwb.utils::catIf(dbg, paste(nrow(observations), "observations extracted. ")) + + observations +} diff --git a/R/importInspections.R b/R/importInspections.R index 530d48f..b47e329 100644 --- a/R/importInspections.R +++ b/R/importInspections.R @@ -1,6 +1,5 @@ -# getInspectionsFromEuLines.new ------------------------------------------------ - -getInspectionsFromEuLines.new <- function( +# getInspectionRecords_v2 ------------------------------------------------------ +getInspectionRecords_v2 <- function( eu_lines, header.info, dbg = TRUE, getInfo = getInspectionHeaderInfo2 ) { @@ -20,7 +19,6 @@ getInspectionsFromEuLines.new <- function( } # getInspectionHeaderInfo ------------------------------------------------------ - getInspectionHeaderInfo <- function(eu_lines) { # Get list of matching sub expressions diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index c138925..145de64 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -81,58 +81,6 @@ readEuCodedFile <- function( ) } -# getInspectionRecordsFromEuLines ---------------------------------------------- -getInspectionRecordsFromEuLines <- function( - eu_lines, header.info, read.inspections, simple.algorithm, dbg -) -{ - if (!read.inspections) { - - warning( - "I (yet) cannot read the inspection data (#B-blocks). ", - "So I just returned the number of inspections instead of a ", - "data frame with all information on the inspection!" - ) - - return(length(grep("^#B01", eu_lines))) - } - - inspections <- if (simple.algorithm) { - getInspectionsFromEuLines(eu_lines, header.info, dbg = dbg > 1L) - } # else NULL - - # If the inspections could not be read with the simple algorithm (due to - # changing header rows) or if the user requests it, try it again with - # another algorithm - if (is.null(inspections)) { - inspections <- getInspectionsFromEuLines.new( - eu_lines, header.info, dbg = dbg - ) - } - - kwb.utils::catIf(dbg, paste(nrow(inspections), "inspections extracted. ")) - - inspections -} - -# getObservationRecordsFromEuLines --------------------------------------------- -getObservationRecordsFromEuLines <- function(eu_lines, header.info, dbg) -{ - observations <- try( - getObservationsFromEuLines(eu_lines, header.info, dbg = dbg), - silent = TRUE - ) - - if (kwb.utils::isTryError(observations)) { - headerInfo <- getHeaderInfo(eu_lines) - observations <- extractObservationData(eu_lines, headerInfo, header.info) - } - - kwb.utils::catIf(dbg, paste(nrow(observations), "observations extracted. ")) - - observations -} - # renameColumnsToMeaningful ---------------------------------------------------- renameColumnsToMeaningful <- function(x) { @@ -141,143 +89,3 @@ renameColumnsToMeaningful <- function(x) kwb.utils::renameColumns(x, renamings) } -# getFileHeaderFromEuLines ----------------------------------------------------- -getFileHeaderFromEuLines <- function(eu_lines, warn = TRUE) -{ - a.lines <- grep("^#A", eu_lines, value = TRUE) - - # original_fields <- do.call(kwb.utils::toLookupList, kwb.utils::toKeysAndValues( - # kwb.utils::collapsed(gsub("^#", "", a.lines), "@"), - # separators = c("@", "=") - # )) - # - # renamed_fields <- kwb.utils::renameColumns(original_fields, list( - # A1 = "encoding", A2 = "language", A3 = "separator", A4 = "decimal", - # A5 = "quote", A6 = "year" - # )) - - # Set quote to "" instead of NA because read.table will give strange results - quote <- findKeyAndExtractValue(a.lines, "A5", warn = warn) - - list( - encoding = findKeyAndExtractValue(a.lines, "A1", warn = warn), - language = findKeyAndExtractValue(a.lines, "A2", warn = warn), - separator = findKeyAndExtractValue(a.lines, "A3", warn = warn), - decimal = findKeyAndExtractValue(a.lines, "A4", warn = warn), - quote = kwb.utils::defaultIfNA(quote, ""), - year = findKeyAndExtractValue(a.lines, "A6", warn = warn) - ) -} - -# findKeyAndExtractValue ------------------------------------------------------- -findKeyAndExtractValue <- function(keyvalues, key, default = NA, warn = TRUE) -{ - pattern <- paste0("^#", key) - - index <- grep(pattern, keyvalues) - - if (length(index) == 0L) { - - warnMessage <- sprintf( - "Key '#%s' not found in the #A-header of the file.", key - ) - - if (! is.na(default)) { - warnMessage <- paste(warnMessage, "I will use the default:", default) - } - - if (warn) { - message(warnMessage) - warning(warnMessage) - } - - default - - } else { - - strsplit(keyvalues[index], "=")[[1L]][2L] - } -} - -# getInspectionsFromEuLines ---------------------------------------------------- -getInspectionsFromEuLines <- function(eu_lines, header.info, dbg = TRUE) -{ - inspections.complete <- NULL - - header.line.number <- 1L - - continue <- TRUE - - indices.B <- grep("^#B01", eu_lines) - - aborted <- FALSE - - while (! aborted && length(indices.B) > 0L) { - - b.caption.lines <- getValueFromKeyValueString(eu_lines[indices.B]) - - b.captions <- strsplit(b.caption.lines, header.info$separator) - - if (kwb.utils::allAreEqual(b.captions)) { - - inspections <- extractInspectionData( - b.lines = eu_lines[indices.B + 1L], - header.info = header.info, - captions = b.captions[[1L]] - ) - - inspections.complete <- kwb.utils::safeColumnBind( - inspections.complete, inspections - ) - - } else { - - if (dbg) { - message( - "The #B-header lines differ within the file -> I will change the ", - "algorithm..." - ) - } - - aborted <- TRUE - } - - header.line.number <- header.line.number + 1L - - indices.B <- grep(sprintf("^#B%02d", header.line.number), eu_lines) - } - - if (aborted) { - return(NULL) - } - - inspections.complete -} - -# extractInspectionData -------------------------------------------------------- -extractInspectionData <- function(b.lines, header.info, captions) -{ - inspections <- kwb.utils::csvTextToDataFrame( - text = paste(b.lines, collapse = "\n"), - sep = header.info$separator, - dec = header.info$decimal, - quote = header.info$quote, - comment.char = "", - stringsAsFactors = FALSE - ) - - stats::setNames(inspections, captions) -} - -# getValueFromKeyValueString --------------------------------------------------- -getValueFromKeyValueString <- function(keyvalue) -{ - sapply(strsplit(keyvalue, "="), "[", 2L) -} - -# setFilename ------------------------------------------------------------------ -setFilename <- function(data, name) -{ - data[["file"]] <- name - kwb.utils::moveColumnsToFront(data, "file") -} diff --git a/R/readEuCodedFiles.R b/R/readEuCodedFiles.R index 0d06114..9424dfe 100644 --- a/R/readEuCodedFiles.R +++ b/R/readEuCodedFiles.R @@ -74,3 +74,10 @@ readEuCodedFiles <- function( # Return the indices of the files that could not be read correctly structure(result, which_failed = which(failed)) } + +# setFilename ------------------------------------------------------------------ +setFilename <- function(data, name) +{ + data[["file"]] <- name + kwb.utils::moveColumnsToFront(data, "file") +} diff --git a/tests/testthat/test-function-getInspectionsFromEuLines.R b/tests/testthat/test-function-getInspectionRecords_v1.R similarity index 51% rename from tests/testthat/test-function-getInspectionsFromEuLines.R rename to tests/testthat/test-function-getInspectionRecords_v1.R index 1cfc3d6..1932c18 100644 --- a/tests/testthat/test-function-getInspectionsFromEuLines.R +++ b/tests/testthat/test-function-getInspectionRecords_v1.R @@ -1,6 +1,6 @@ -test_that("getInspectionsFromEuLines() works", { +test_that("getInspectionRecords_v1() works", { - f <- kwb.en13508.2:::getInspectionsFromEuLines + f <- kwb.en13508.2:::getInspectionRecords_v1 expect_error(f()) diff --git a/tests/testthat/test-function-getInspectionRecords_v2.R b/tests/testthat/test-function-getInspectionRecords_v2.R new file mode 100644 index 0000000..ed440b0 --- /dev/null +++ b/tests/testthat/test-function-getInspectionRecords_v2.R @@ -0,0 +1,6 @@ +test_that("getInspectionRecords_v2() works", { + + f <- kwb.en13508.2:::getInspectionRecords_v2 + + expect_error(f()) +}) diff --git a/tests/testthat/test-function-getInspectionsFromEuLines.new.R b/tests/testthat/test-function-getInspectionsFromEuLines.new.R deleted file mode 100644 index 30ae036..0000000 --- a/tests/testthat/test-function-getInspectionsFromEuLines.new.R +++ /dev/null @@ -1,17 +0,0 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - -test_that("getInspectionsFromEuLines.new() works", { - - expect_error( - kwb.en13508.2:::getInspectionsFromEuLines.new(eu_lines = 1, header.info = 1) - # invalid 'text' argument - ) - expect_error( - kwb.en13508.2:::getInspectionsFromEuLines.new(eu_lines = "a", header.info = 1) - # object 'results' not found - ) - -}) - diff --git a/tests/testthat/test-function-readEuCodedFile.R b/tests/testthat/test-function-readEuCodedFile.R index b6ca8c5..7d16f68 100644 --- a/tests/testthat/test-function-readEuCodedFile.R +++ b/tests/testthat/test-function-readEuCodedFile.R @@ -7,7 +7,7 @@ test_that("readEuCodedFile() works", { expect_error(f()) result.1 <- f(getExampleFile(), dbg = FALSE) - result.2 <- f(getExampleFile(), dbg = FALSE, short.names = FALSE) + result.2 <- f(getExampleFile(), dbg = FALSE, meaningful.names = TRUE) expect_type(result.1, "list") expect_identical(names(result.1), c("header.info", "inspections", "observations")) From a081b4d2f2788be7e209ad0b99b7ec0643098fff Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Aug 2022 01:41:30 +0200 Subject: [PATCH 058/141] Replace arg "getInfo" with "version" and rename functions: - getInspectionHeaderInfo() -> getInspectionHeaderInfo_v1() - getInspectionHeaderInfo2() -> getInspectionHeaderInfo_v2() --- R/importInspections.R | 17 ++++++++++++----- .../test-function-getInspectionHeaderInfo.R | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/R/importInspections.R b/R/importInspections.R index b47e329..92f3769 100644 --- a/R/importInspections.R +++ b/R/importInspections.R @@ -1,11 +1,17 @@ # getInspectionRecords_v2 ------------------------------------------------------ getInspectionRecords_v2 <- function( - eu_lines, header.info, dbg = TRUE, getInfo = getInspectionHeaderInfo2 + eu_lines, header.info, dbg = TRUE, version = 2L ) { + headerInfos <- if (version == 1L) { + getInspectionHeaderInfo_v1(eu_lines) + } else if (version == 2L) { + getInspectionHeaderInfo_v2(eu_lines) + } + x <- mergeInspectionBlocks(extractInspectionBlocks( eu_lines = eu_lines, - headerInfos = getInfo(eu_lines), + headerInfos = headerInfos, sep = get_elements(header.info, "separator"), dec = get_elements(header.info, "decimal"), quoteCharacter = get_elements(header.info, "quote"), @@ -18,8 +24,8 @@ getInspectionRecords_v2 <- function( ) } -# getInspectionHeaderInfo ------------------------------------------------------ -getInspectionHeaderInfo <- function(eu_lines) +# getInspectionHeaderInfo_v1 --------------------------------------------------- +getInspectionHeaderInfo_v1 <- function(eu_lines) { # Get list of matching sub expressions matches <- kwb.utils::subExpressionMatches("^#B(\\d\\d)=(.*)$", eu_lines) @@ -54,7 +60,8 @@ getInspectionHeaderInfo <- function(eu_lines) stats::setNames(header_rows, unique_headers) } -getInspectionHeaderInfo2 <- function(eu_lines) +# getInspectionHeaderInfo_v2 --------------------------------------------------- +getInspectionHeaderInfo_v2 <- function(eu_lines) { # Get list of matching sub expressions matches <- kwb.utils::extractSubstring("^#B(\\d\\d)=(.*)$", eu_lines, c( diff --git a/tests/testthat/test-function-getInspectionHeaderInfo.R b/tests/testthat/test-function-getInspectionHeaderInfo.R index 513b241..782e569 100644 --- a/tests/testthat/test-function-getInspectionHeaderInfo.R +++ b/tests/testthat/test-function-getInspectionHeaderInfo.R @@ -1,6 +1,6 @@ test_that("getInspectionHeaderInfo() works", { - f <- kwb.en13508.2:::getInspectionHeaderInfo + f <- kwb.en13508.2:::getInspectionHeaderInfo_v1 expect_error(f()) From 5bebb938c1f377742d0113f5d0eafc65c4e8bd71 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Aug 2022 01:49:48 +0200 Subject: [PATCH 059/141] Improve names, use intermediate variable --- R/importInspections.R | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/R/importInspections.R b/R/importInspections.R index 92f3769..ca44a82 100644 --- a/R/importInspections.R +++ b/R/importInspections.R @@ -8,19 +8,21 @@ getInspectionRecords_v2 <- function( } else if (version == 2L) { getInspectionHeaderInfo_v2(eu_lines) } - - x <- mergeInspectionBlocks(extractInspectionBlocks( + + blocks <- extractInspectionBlocks( eu_lines = eu_lines, headerInfos = headerInfos, sep = get_elements(header.info, "separator"), dec = get_elements(header.info, "decimal"), quoteCharacter = get_elements(header.info, "quote"), dbg = dbg - )) + ) + + merged <- mergeInspectionBlocks(inspectionBlocks) structure( - kwb.utils::removeColumns(x, "row"), - B.rows = data.frame(inspno = seq_len(nrow(x)), rows = x$row) + kwb.utils::removeColumns(merged, "row"), + B.rows = data.frame(inspno = seq_len(nrow(merged)), rows = x$row) ) } @@ -149,7 +151,6 @@ extractInspectionBlocks <- function( } # textblockToDataframe --------------------------------------------------------- - textblockToDataframe <- function( textblock, sep, dec, quoteCharacter, captionLine, rowNumbers, dbg = TRUE ) @@ -196,7 +197,6 @@ textblockToDataframe <- function( } # getColumnsToRemove ----------------------------------------------------------- - getColumnsToRemove <- function(x, captions, duplicates, dbg = TRUE) { columnsToRemove <- numeric() @@ -239,7 +239,6 @@ getColumnsToRemove <- function(x, captions, duplicates, dbg = TRUE) } # mergeInspectionBlocks -------------------------------------------------------- - mergeInspectionBlocks <- function(inspectionBlocks) { indices <- seq_along(inspectionBlocks) @@ -293,7 +292,6 @@ mergeInspectionBlocks <- function(inspectionBlocks) } # removeDuplicatedColumns ------------------------------------------------------ - removeDuplicatedColumns <- function(x, dbg = TRUE) { captions <- names(x) @@ -325,7 +323,6 @@ removeDuplicatedColumns <- function(x, dbg = TRUE) } # cleanDuplicatedColumns ------------------------------------------------------- - cleanDuplicatedColumns <- function(x) { captions <- names(x) From 32806d81779df01734aa2301cebfe5954668e408 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Aug 2022 01:56:44 +0200 Subject: [PATCH 060/141] Move function to its own file, rename files --- R/getInspectionRecordsFromEuLines.R | 76 ------------------- R/getInspectionRecords_v1.R | 75 ++++++++++++++++++ ...nspections.R => getInspectionRecords_v2.R} | 2 +- 3 files changed, 76 insertions(+), 77 deletions(-) create mode 100644 R/getInspectionRecords_v1.R rename R/{importInspections.R => getInspectionRecords_v2.R} (99%) diff --git a/R/getInspectionRecordsFromEuLines.R b/R/getInspectionRecordsFromEuLines.R index 7c23598..f96d80d 100644 --- a/R/getInspectionRecordsFromEuLines.R +++ b/R/getInspectionRecordsFromEuLines.R @@ -31,79 +31,3 @@ getInspectionRecordsFromEuLines <- function( inspections } - -# getInspectionRecords_v1 ------------------------------------------------------ -getInspectionRecords_v1 <- function(eu_lines, header.info, dbg = TRUE) -{ - inspections.complete <- NULL - - header.line.number <- 1L - - continue <- TRUE - - indices.B <- grep("^#B01", eu_lines) - - aborted <- FALSE - - while (! aborted && length(indices.B) > 0L) { - - b.caption.lines <- getValueFromKeyValueString(eu_lines[indices.B]) - - b.captions <- strsplit(b.caption.lines, header.info$separator) - - if (kwb.utils::allAreEqual(b.captions)) { - - inspections <- extractInspectionData( - b.lines = eu_lines[indices.B + 1L], - header.info = header.info, - captions = b.captions[[1L]] - ) - - inspections.complete <- kwb.utils::safeColumnBind( - inspections.complete, inspections - ) - - } else { - - if (dbg) { - message( - "The #B-header lines differ within the file -> I will change the ", - "algorithm..." - ) - } - - aborted <- TRUE - } - - header.line.number <- header.line.number + 1L - - indices.B <- grep(sprintf("^#B%02d", header.line.number), eu_lines) - } - - if (aborted) { - return(NULL) - } - - inspections.complete -} - -# getValueFromKeyValueString --------------------------------------------------- -getValueFromKeyValueString <- function(keyvalue) -{ - sapply(strsplit(keyvalue, "="), "[", 2L) -} - -# extractInspectionData -------------------------------------------------------- -extractInspectionData <- function(b.lines, header.info, captions) -{ - inspections <- kwb.utils::csvTextToDataFrame( - text = paste(b.lines, collapse = "\n"), - sep = header.info$separator, - dec = header.info$decimal, - quote = header.info$quote, - comment.char = "", - stringsAsFactors = FALSE - ) - - stats::setNames(inspections, captions) -} diff --git a/R/getInspectionRecords_v1.R b/R/getInspectionRecords_v1.R new file mode 100644 index 0000000..b5f0d3f --- /dev/null +++ b/R/getInspectionRecords_v1.R @@ -0,0 +1,75 @@ +# getInspectionRecords_v1 ------------------------------------------------------ +getInspectionRecords_v1 <- function(eu_lines, header.info, dbg = TRUE) +{ + inspections.complete <- NULL + + header.line.number <- 1L + + continue <- TRUE + + indices.B <- grep("^#B01", eu_lines) + + aborted <- FALSE + + while (! aborted && length(indices.B) > 0L) { + + b.caption.lines <- getValueFromKeyValueString(eu_lines[indices.B]) + + b.captions <- strsplit(b.caption.lines, header.info$separator) + + if (kwb.utils::allAreEqual(b.captions)) { + + inspections <- extractInspectionData( + b.lines = eu_lines[indices.B + 1L], + header.info = header.info, + captions = b.captions[[1L]] + ) + + inspections.complete <- kwb.utils::safeColumnBind( + inspections.complete, inspections + ) + + } else { + + if (dbg) { + message( + "The #B-header lines differ within the file -> I will change the ", + "algorithm..." + ) + } + + aborted <- TRUE + } + + header.line.number <- header.line.number + 1L + + indices.B <- grep(sprintf("^#B%02d", header.line.number), eu_lines) + } + + if (aborted) { + return(NULL) + } + + inspections.complete +} + +# getValueFromKeyValueString --------------------------------------------------- +getValueFromKeyValueString <- function(keyvalue) +{ + sapply(strsplit(keyvalue, "="), "[", 2L) +} + +# extractInspectionData -------------------------------------------------------- +extractInspectionData <- function(b.lines, header.info, captions) +{ + inspections <- kwb.utils::csvTextToDataFrame( + text = paste(b.lines, collapse = "\n"), + sep = header.info$separator, + dec = header.info$decimal, + quote = header.info$quote, + comment.char = "", + stringsAsFactors = FALSE + ) + + stats::setNames(inspections, captions) +} diff --git a/R/importInspections.R b/R/getInspectionRecords_v2.R similarity index 99% rename from R/importInspections.R rename to R/getInspectionRecords_v2.R index ca44a82..a9ed9d0 100644 --- a/R/importInspections.R +++ b/R/getInspectionRecords_v2.R @@ -9,7 +9,7 @@ getInspectionRecords_v2 <- function( getInspectionHeaderInfo_v2(eu_lines) } - blocks <- extractInspectionBlocks( + inspectionBlocks <- extractInspectionBlocks( eu_lines = eu_lines, headerInfos = headerInfos, sep = get_elements(header.info, "separator"), From c694b7b836b560ef44c4a16fd0ac77e5b2e5231d Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Aug 2022 02:08:12 +0200 Subject: [PATCH 061/141] Fix :bug:: Consider variable renaming --- R/getInspectionRecords_v2.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/getInspectionRecords_v2.R b/R/getInspectionRecords_v2.R index a9ed9d0..dcb201b 100644 --- a/R/getInspectionRecords_v2.R +++ b/R/getInspectionRecords_v2.R @@ -22,7 +22,7 @@ getInspectionRecords_v2 <- function( structure( kwb.utils::removeColumns(merged, "row"), - B.rows = data.frame(inspno = seq_len(nrow(merged)), rows = x$row) + B.rows = data.frame(inspno = seq_len(nrow(merged)), rows = merged[["row"]]) ) } From fc5534757655ef75347917b134f63bfe5772d47d Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Aug 2022 07:17:40 +0200 Subject: [PATCH 062/141] Move two versions of functions into two files to be able to compare with diff tool --- R/getInspectionHeaderInfo_v1.R | 37 ++++++++++++++++++ R/getInspectionHeaderInfo_v2.R | 38 +++++++++++++++++++ R/getInspectionRecords_v2.R | 68 ---------------------------------- 3 files changed, 75 insertions(+), 68 deletions(-) create mode 100644 R/getInspectionHeaderInfo_v1.R create mode 100644 R/getInspectionHeaderInfo_v2.R diff --git a/R/getInspectionHeaderInfo_v1.R b/R/getInspectionHeaderInfo_v1.R new file mode 100644 index 0000000..9422b4f --- /dev/null +++ b/R/getInspectionHeaderInfo_v1.R @@ -0,0 +1,37 @@ +# getInspectionHeaderInfo_v1 --------------------------------------------------- +getInspectionHeaderInfo_v1 <- function(eu_lines) +{ + pattern <- "^#B(\\d\\d)=(.*)$" + + # Get list of matching sub expressions + matches <- kwb.utils::subExpressionMatches(pattern, eu_lines) + + # Indices of header lines + header_indices <- which(! sapply(matches, is.null)) + + # Keep only the sub expressions of matching rows + matches <- matches[header_indices] + + # Number of header (#B01 = 1, #B02 = 2) + header_numbers <- as.numeric(sapply(matches, "[[", 1)) + + # Only the header (right of equal sign) + header_lines <- sapply(matches, "[[", 2) + + unique_headers <- unique(header_lines) + + # For each different type of header, determine the line numbers in which it + # occurs + header_rows <- lapply(unique_headers, function(header) { + + indices <- which(header_lines == header) + + header_number <- unique(header_numbers[indices]) + + stopifnot(length(header_number) == 1) + + list(line = header_number, rows = header_indices[indices]) + }) + + stats::setNames(header_rows, unique_headers) +} diff --git a/R/getInspectionHeaderInfo_v2.R b/R/getInspectionHeaderInfo_v2.R new file mode 100644 index 0000000..af4d806 --- /dev/null +++ b/R/getInspectionHeaderInfo_v2.R @@ -0,0 +1,38 @@ +# getInspectionHeaderInfo_v2 --------------------------------------------------- +getInspectionHeaderInfo_v2 <- function(eu_lines) +{ + pattern <- "^#B(\\d\\d)=(.*)$" + + # Get list of matching sub expressions + matches <- kwb.utils::extractSubstring( + pattern = pattern, + x = eu_lines, + index = c(number = 1L, fields = 2L) + ) + + matches$row <- seq_along(eu_lines) + + # Indices of header lines + header_indices <- which(!kwb.utils::isNaOrEmpty(matches$fields)) + + # Number of header (#B01 = 1, #B02 = 2) + header_numbers <- as.integer(matches$number[header_indices]) + + # Only the header (right of equal sign) + header_lines <- matches$fields[header_indices] + + unique_headers <- unique(header_lines) + + # For each different type of header, determine the line numbers in which it + # occurs + header_rows <- lapply(unique_headers, function(header) { + + indices <- which(header_lines == header) + + stopifnot(kwb.utils::allAreEqual(header_numbers[indices])) + + list(line = header_numbers[indices[1L]], rows = header_indices[indices]) + }) + + stats::setNames(header_rows, unique_headers) +} diff --git a/R/getInspectionRecords_v2.R b/R/getInspectionRecords_v2.R index dcb201b..9795cbb 100644 --- a/R/getInspectionRecords_v2.R +++ b/R/getInspectionRecords_v2.R @@ -26,74 +26,6 @@ getInspectionRecords_v2 <- function( ) } -# getInspectionHeaderInfo_v1 --------------------------------------------------- -getInspectionHeaderInfo_v1 <- function(eu_lines) -{ - # Get list of matching sub expressions - matches <- kwb.utils::subExpressionMatches("^#B(\\d\\d)=(.*)$", eu_lines) - - # Indices of header lines - header_indices <- which(! sapply(matches, is.null)) - - # Keep only the sub expressions of matching rows - matches <- matches[header_indices] - - # Number of header (#B01 = 1, #B02 = 2) - header_numbers <- as.numeric(sapply(matches, "[[", 1)) - - # Only the header (right of equal sign) - header_lines <- sapply(matches, "[[", 2) - - unique_headers <- unique(header_lines) - - # For each different type of header, determine the line numbers in which it - # occurs - header_rows <- lapply(unique_headers, function(header) { - - indices <- which(header_lines == header) - - header_number <- unique(header_numbers[indices]) - - stopifnot(length(header_number) == 1) - - list(line = header_number, rows = header_indices[indices]) - }) - - stats::setNames(header_rows, unique_headers) -} - -# getInspectionHeaderInfo_v2 --------------------------------------------------- -getInspectionHeaderInfo_v2 <- function(eu_lines) -{ - # Get list of matching sub expressions - matches <- kwb.utils::extractSubstring("^#B(\\d\\d)=(.*)$", eu_lines, c( - number = 1L, fields = 2L - )) - - matches$row <- seq_along(eu_lines) - - # Indices of header lines - header_indices <- which(!kwb.utils::isNaOrEmpty(matches$fields)) - - # Number of header (#B01 = 1, #B02 = 2) - header_numbers <- as.integer(matches$number[header_indices]) - - # Only the header (right of equal sign) - header_lines <- matches$fields[header_indices] - - unique_headers <- unique(header_lines) - - # For each different type of header, determine the line numbers in which it - # occurs - header_rows <- lapply(unique_headers, function(header) { - indices <- which(header_lines == header) - stopifnot(kwb.utils::allAreEqual(header_numbers[indices])) - list(line = header_numbers[indices[1L]], rows = header_indices[indices]) - }) - - stats::setNames(header_rows, unique_headers) -} - # extractInspectionBlocks ------------------------------------------------------ #' @importFrom kwb.utils isTryError extractInspectionBlocks <- function( From 3d9db6af823327112b4c8b11e0df6173bef6e270 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 8 Aug 2022 02:05:58 +0200 Subject: [PATCH 063/141] Use "L" for integer constants --- R/getInspectionHeaderInfo_v1.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/getInspectionHeaderInfo_v1.R b/R/getInspectionHeaderInfo_v1.R index 9422b4f..ce5ca1f 100644 --- a/R/getInspectionHeaderInfo_v1.R +++ b/R/getInspectionHeaderInfo_v1.R @@ -13,10 +13,10 @@ getInspectionHeaderInfo_v1 <- function(eu_lines) matches <- matches[header_indices] # Number of header (#B01 = 1, #B02 = 2) - header_numbers <- as.numeric(sapply(matches, "[[", 1)) + header_numbers <- as.integer(sapply(matches, "[[", 1L)) # Only the header (right of equal sign) - header_lines <- sapply(matches, "[[", 2) + header_lines <- sapply(matches, "[[", 2L) unique_headers <- unique(header_lines) @@ -28,7 +28,7 @@ getInspectionHeaderInfo_v1 <- function(eu_lines) header_number <- unique(header_numbers[indices]) - stopifnot(length(header_number) == 1) + stopifnot(length(header_number) == 1L) list(line = header_number, rows = header_indices[indices]) }) From 84a6e2c475e33f513988d6c776a2211169680fe4 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 8 Aug 2022 02:07:08 +0200 Subject: [PATCH 064/141] Use nzchar(), add commented microbenchmark code --- R/getInspectionHeaderInfo_v2.R | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/R/getInspectionHeaderInfo_v2.R b/R/getInspectionHeaderInfo_v2.R index af4d806..21fda20 100644 --- a/R/getInspectionHeaderInfo_v2.R +++ b/R/getInspectionHeaderInfo_v2.R @@ -1,9 +1,18 @@ +# eu_lines <- readLines(kwb.en13508.2:::getExampleFile()) +# +# microbenchmark::microbenchmark( +# x = kwb.en13508.2:::getInspectionHeaderInfo_v1(eu_lines), +# y = kwb.en13508.2:::getInspectionHeaderInfo_v2(eu_lines), +# times = 1000, +# check = "identical" +# ) + # getInspectionHeaderInfo_v2 --------------------------------------------------- getInspectionHeaderInfo_v2 <- function(eu_lines) { pattern <- "^#B(\\d\\d)=(.*)$" - # Get list of matching sub expressions + # Get data frame with one row per line and matching sub expressions matches <- kwb.utils::extractSubstring( pattern = pattern, x = eu_lines, @@ -13,7 +22,7 @@ getInspectionHeaderInfo_v2 <- function(eu_lines) matches$row <- seq_along(eu_lines) # Indices of header lines - header_indices <- which(!kwb.utils::isNaOrEmpty(matches$fields)) + header_indices <- which(nzchar(matches$fields)) # Number of header (#B01 = 1, #B02 = 2) header_numbers <- as.integer(matches$number[header_indices]) From 14817d077126ead78c8bcc0cb76bae487d32d7c3 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 8 Aug 2022 02:09:41 +0200 Subject: [PATCH 065/141] Rename arg "quoteCharacter" to "quote" and clean getInspectionRecords_v2.R --- R/getInspectionRecords_v2.R | 80 +++++++++---------- .../test-function-textblockToDataframe.R | 17 +--- 2 files changed, 40 insertions(+), 57 deletions(-) diff --git a/R/getInspectionRecords_v2.R b/R/getInspectionRecords_v2.R index 9795cbb..28b8b60 100644 --- a/R/getInspectionRecords_v2.R +++ b/R/getInspectionRecords_v2.R @@ -14,7 +14,7 @@ getInspectionRecords_v2 <- function( headerInfos = headerInfos, sep = get_elements(header.info, "separator"), dec = get_elements(header.info, "decimal"), - quoteCharacter = get_elements(header.info, "quote"), + quote = get_elements(header.info, "quote"), dbg = dbg ) @@ -29,7 +29,7 @@ getInspectionRecords_v2 <- function( # extractInspectionBlocks ------------------------------------------------------ #' @importFrom kwb.utils isTryError extractInspectionBlocks <- function( - eu_lines, headerInfos, sep, dec, quoteCharacter, dbg = TRUE + eu_lines, headerInfos, sep, dec, quote, dbg = TRUE ) { blocks <- list() @@ -40,43 +40,38 @@ extractInspectionBlocks <- function( #print(i) #i <- 5 - row_numbers <- headerInfos[[i]]$rows + 1 + row_numbers <- headerInfos[[i]]$rows + 1L - textblock <- paste(eu_lines[row_numbers], collapse = "\n") + textblock <- eu_lines[row_numbers] - try_result <- try(silent = TRUE, x <- textblockToDataframe( - textblock, sep, dec, quoteCharacter, captionLine = unique_headers[i], + x <- try(silent = TRUE, textblockToDataframe( + textblock, sep, dec, quote, captionLine = unique_headers[i], rowNumbers = row_numbers, dbg = dbg )) - if (! kwb.utils::isTryError(try_result)) { - - line_number <- headerInfos[[i]]$line - - if (length(blocks) < line_number) { - - blocks[[line_number]] <- list(line = line_number, dataFrames = list()) - } - - last_index <- length(blocks[[line_number]]$dataFrames) - - blocks[[line_number]]$dataFrames[[last_index + 1]] <- x - - } else { - - # Handle the error - stop( - sprintf( + # Handle possible error + if (kwb.utils::isTryError(x)) { + kwb.utils::stopFormatted( + paste0( "\nError reading #B-block number %d (lines %s):\n>>>\n%s\n<<<\n", - i, kwb.utils::collapsed(row_numbers, ", "), textblock - ), - sprintf( - "Original error message: >>>%s<<<\n", - attr(try_result, "condition")$message + "Original error message: >>>%s<<<\n" ), - call. = FALSE + i, + kwb.utils::collapsed(row_numbers, ", "), + paste(textblock, collapse = "\n"), + attr(x, "condition")$message ) } + + line_number <- headerInfos[[i]]$line + + if (length(blocks) < line_number) { + blocks[[line_number]] <- list(line = line_number, dataFrames = list()) + } + + last_index <- length(blocks[[line_number]]$dataFrames) + + blocks[[line_number]]$dataFrames[[last_index + 1L]] <- x } blocks @@ -84,38 +79,37 @@ extractInspectionBlocks <- function( # textblockToDataframe --------------------------------------------------------- textblockToDataframe <- function( - textblock, sep, dec, quoteCharacter, captionLine, rowNumbers, dbg = TRUE + textblock, sep, dec, quote, captionLine, rowNumbers, dbg = TRUE ) { #kwb.utils::catLines(textblock) x <- utils::read.table( - text = textblock, sep = sep, dec = dec, quote = quoteCharacter, + text = textblock, sep = sep, dec = dec, quote = quote, comment.char = "", stringsAsFactors = FALSE #, fill = TRUE ) - captions <- strsplit(captionLine, sep)[[1]] + captions <- strsplit(captionLine, sep)[[1L]] # the number of captions must be equal to the number of columns in x if (length(captions) != ncol(x)) { - textmessage <- sprintf( + kwb.utils::stopFormatted( paste0( "The number of captions (%d) is not equal to the number of columns ", "in the data block (%d). \nCaptions: %s\nFirst data row: %s\n" ), - length(captions), ncol(x), kwb.utils::stringList(captions), - kwb.utils::stringList(x[1, ]) + length(captions), + ncol(x), + kwb.utils::stringList(captions), + kwb.utils::stringList(x[1L, ]) ) - - stop(textmessage, call. = FALSE) - - } else { - - names(x) <- captions } - + + # Name the columns according to the captions + names(x) <- captions + # Check for duplicated columns and remove duplicated columns if all values # within the columns are identical to the corresponding values in the original # column diff --git a/tests/testthat/test-function-textblockToDataframe.R b/tests/testthat/test-function-textblockToDataframe.R index 79686d6..7fe6573 100644 --- a/tests/testthat/test-function-textblockToDataframe.R +++ b/tests/testthat/test-function-textblockToDataframe.R @@ -1,17 +1,6 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# - test_that("textblockToDataframe() works", { - expect_error( - kwb.en13508.2:::textblockToDataframe(textblock = 1, sep = 1) - # invalid 'text' argument - ) - expect_error( - kwb.en13508.2:::textblockToDataframe(textblock = "a", sep = 1) - # argument "quoteCharacter" is missing, with no default - ) - + f <- kwb.en13508.2:::textblockToDataframe + + expect_error(f()) }) - From 2bee26fca612f08e3bf4f8eb490008fcb2f02e0e Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 8 Aug 2022 02:11:15 +0200 Subject: [PATCH 066/141] Clean toEuFormat_v1() - use kwb.utils::findChanges() - improve naming --- R/toEuFormat.R | 81 +++++++++++--------- tests/testthat/test-function-toEuFormat_v1.R | 32 ++++---- 2 files changed, 63 insertions(+), 50 deletions(-) diff --git a/R/toEuFormat.R b/R/toEuFormat.R index 03cefb9..0870c0e 100644 --- a/R/toEuFormat.R +++ b/R/toEuFormat.R @@ -43,8 +43,10 @@ toEuFormat <- function(inspection.data, version = 3L, ..., dbg = TRUE) #' @param dbg whether or not to show debug messages toEuFormat_v1 <- function(header.info, inspections, observations, dbg = TRUE) { - # Save the inspection numbers - inspnos <- get_columns(observations, "inspno") + #kwb.utils::assignPackageObjects("kwb.en13508.2") + + # Get row ranges of observations per inspection + changes <- kwb.utils::findChanges(get_columns(observations, "inspno")) # Remove the column containing the inspection numbers observations <- kwb.utils::removeColumns(observations, "inspno") @@ -52,50 +54,55 @@ toEuFormat_v1 <- function(header.info, inspections, observations, dbg = TRUE) tc <- textConnection("buffer", "w") on.exit(close(tc)) - #kwb.utils::assignPackageObjects("kwb.en13508.2") - writeLines(getHeaderLinesFromHeaderInfo(header.info), tc) - sep <- header.info$separator - - insp.header.line <- inspectionHeaderLine(names(inspections), sep) - obs.header.line <- observationHeaderLine(names(observations), sep) - - insp.numbers <- rownames(inspections) - - n_inspections <- nrow(inspections) + + a.header <- getHeaderLinesFromHeaderInfo(header.info) + b.header <- inspectionHeaderLine(names(inspections), sep) + c.header <- observationHeaderLine(names(observations), sep) # Define helper function - write_table <- function(x) utils::write.table( - x, - file = tc, - sep = sep, - col.names = FALSE, - row.names = FALSE, - append = TRUE, - na = "" - ) + writeTable <- function(header, data, add.z = TRUE) { + writeLines(header, tc) + utils::write.table( + data, + file = tc, + sep = sep, + col.names = FALSE, + row.names = FALSE, + append = TRUE, + na = "" + ) + if (add.z) { + writeLines("#Z", tc) + } + } - # Get index ranges of inspections (see kwb.event::hsEventsOnChange()) - n_obs <- length(inspnos) - change_index <- which(inspnos[1:(n_obs - 1L)] != inspnos[2:n_obs]) + 1L - begin_index <- c(1L, change_index) - end_index = c(change_index - 1L, n_obs) + # Write #A section + writeLines(a.header, tc) + + # Number of inspections + n_inspections <- nrow(inspections) # Loop through the inspections - for (i in seq_len(n_inspections)) { - - kwb.utils::catIf(i %% 100 == 0L, "i =", i, "\n") + for (inspno in seq_len(n_inspections)) { - writeLines(insp.header.line, tc) - write_table(inspections[i, ]) - writeLines(obs.header.line, tc) + kwb.utils::catIf(inspno %% 100 == 0L, "i =", i, "\n") - indices <- begin_index[i]:end_index[i] + # Write #B section + writeTable(header = b.header, data = inspections[inspno, , drop = FALSE]) - write_table(observations[indices, ]) - - if (i < n_inspections) { - writeLines("#Z", tc) + # Get observations for the current inspection + i <- which(changes[["value"]] == inspno) + + if (len <- length(i)) { + + stopifnot(len == 1L) + + # Extract rows of observations + data <- observations[changes[["starts_at"]][i]:changes[["ends_at"]][i], ] + + # Write #C section + writeTable(header = c.header, data = data, add.z = inspno < n_inspections) } } diff --git a/tests/testthat/test-function-toEuFormat_v1.R b/tests/testthat/test-function-toEuFormat_v1.R index 4eeae7f..8f0db14 100644 --- a/tests/testthat/test-function-toEuFormat_v1.R +++ b/tests/testthat/test-function-toEuFormat_v1.R @@ -1,17 +1,23 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# +#library(testthat) -test_that("toEuFormat_v1() works", { +test_that("toEuFormat_v1() and toEuFormat_v2() work", { - expect_error( - kwb.en13508.2:::toEuFormat_v1(header.info = 1, inspections = 1) - # $ operator is invalid for atomic vectors - ) - expect_error( - kwb.en13508.2:::toEuFormat_v1(header.info = list(key = c("a", "b"), value = 1:2), inspections = 1) - # argument "observations" is missing, with no default - ) + f1 <- kwb.en13508.2:::toEuFormat_v1 + f2 <- kwb.en13508.2:::toEuFormat_v1 + + expect_error(f()) -}) + header <- kwb.en13508.2::euCodedFileHeader() + + inspections <- data.frame(AAA = 1:3) + observations <- data.frame(inspno = 1:3, A = 1:3, B = 2:4) + r11 <- f1(header, inspections, observations) + r12 <- f2(header, inspections, observations) + + r21 <- f1(header, inspections, observations[c(1L, 3L), ]) + r22 <- f2(header, inspections, observations[c(1L, 3L), ]) + + expect_identical(r11, r12) + expect_identical(r21, r22) +}) From 82df3dc2172cedd37aaba641b5611f5e7169e503 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 6 Dec 2022 11:37:48 +0100 Subject: [PATCH 067/141] Pass "dbg" to removeEmptyLines() --- R/readEuCodedFile.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index 145de64..9d3c59b 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -49,7 +49,7 @@ readEuCodedFile <- function( eu_lines <- run( "Removing empty lines (if any)", - removeEmptyLines(eu_lines) + removeEmptyLines(eu_lines, dbg = dbg) ) header.info <- run( From 32bb7e2cd827f17f82d38e240b29cead37c80ae8 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 6 Dec 2022 11:59:24 +0100 Subject: [PATCH 068/141] Add material codes (Table C.4), save as UTF-8 --- inst/extdata/eucodes.csv | 171 ++++++++++++++++++++++----------------- 1 file changed, 99 insertions(+), 72 deletions(-) diff --git a/inst/extdata/eucodes.csv b/inst/extdata/eucodes.csv index 6934da8..fa7d95a 100644 --- a/inst/extdata/eucodes.csv +++ b/inst/extdata/eucodes.csv @@ -8,112 +8,139 @@ B3,F,Remarks,Remarks,Anmerkungen, B3,G,Circum1,Circumferential location 1,Lage am Umfang 1, B3,H,Circum2,Circumferential location 2,Lage am Umfang 2, B3,I,LongOrVertLoc,Longitudinal or vertical location,horizontale oder vertikale Lage, -B3,J,ContDefectCode,Continuous defect code,Kode fr Streckenschaden, +B3,J,ContDefectCode,Continuous defect code,Kode für Streckenschaden, B3,K,Joint,Joint,Verbindung, B3,L,DescLocField,Descriptive location field,Beschreibung der Schadensstelle, B3,M,PhotoRef,Photograph reference,Fotoreferenz, B3,N,VideoRef,Video reference,Videoreferenz, -C1,AAA,PipeRef,Pipeline length reference,Haltungsbezeichnung,Rfrence de tronon -C1,AAB,StartNodeRef,Start node reference,Anfangsknotenbezeichnung,Rfrence du noeud de dpart -C1,AAC,StartNodeCoord,Start node coordinate,Anfangsknotenkoordinaten,Coordonnes du noeud de dpart -C1,AAD,Node1Ref,Node 1 reference,Bezeichnung des Knotens 1,Rfrence du noeud 1 -C1,AAE,Node1Coord,Node 1 node coordinate,Koordinaten des Knoten 1,Coordonnes du noeud 1 -C1,AAF,Node2Ref,Node 2 reference,Bezeichnung des Knotens 2,Rfrence du noeud 2 -C1,AAG,Node2Coord,Node 2 node coordinate,Koordinaten des Knoten 2,Coordonnes du noeud 2 -C1,AAH,LongLocStart,Longitudinal location of start of lateral,Beginn einer Nebenleitung in Lngsrichtung,Emplacement longitudinal du point de dpart de la canalisation latrale -C1,AAI,CircLocLatStart,Circumferential location of start of lateral,Lage des Beginns einer Nebenleitung am Umfang,Emplacement circonfrentiel du point de dpart de la canalisation latrale -C1,AAJ,Location,Location,rtliche Lage,Emplacement -C1,AAK,Direction,Direction of inspection,Richtung der Inspektion,Sens de l'coulement -C1,AAL,LocationType,Location type,Angaben zur rtlichen Lage,Type d'emplacement -C1,AAM,Authority,Employing authority,Auftraggeber,Autorit responsable +C1,AAA,PipeRef,Pipeline length reference,Haltungsbezeichnung,Référence de tronçon +C1,AAB,StartNodeRef,Start node reference,Anfangsknotenbezeichnung,Référence du noeud de départ +C1,AAC,StartNodeCoord,Start node coordinate,Anfangsknotenkoordinaten,Coordonnées du noeud de départ +C1,AAD,Node1Ref,Node 1 reference,Bezeichnung des Knotens 1,Référence du noeud 1 +C1,AAE,Node1Coord,Node 1 node coordinate,Koordinaten des Knoten 1,Coordonnées du noeud 1 +C1,AAF,Node2Ref,Node 2 reference,Bezeichnung des Knotens 2,Référence du noeud 2 +C1,AAG,Node2Coord,Node 2 node coordinate,Koordinaten des Knoten 2,Coordonnées du noeud 2 +C1,AAH,LongLocStart,Longitudinal location of start of lateral,Beginn einer Nebenleitung in Längsrichtung,Emplacement longitudinal du point de départ de la canalisation latérale +C1,AAI,CircLocLatStart,Circumferential location of start of lateral,Lage des Beginns einer Nebenleitung am Umfang,Emplacement circonférentiel du point de départ de la canalisation latérale +C1,AAJ,Location,Location,Örtliche Lage,Emplacement +C1,AAK,Direction,Direction of inspection,Richtung der Inspektion,Sens de l'écoulement +C1,AAL,LocationType,Location type,Angaben zur örtlichen Lage,Type d'emplacement +C1,AAM,Authority,Employing authority,Auftraggeber,Autorité responsable C1,AAN,Town,Town or village,Gemeinde,Ville ou village C1,AAO,District,District,Bezirk,Quartier -C1,AAP,SewSysName,Name of sewer system,Bezeichnung des Entwsserungssystems,Nom du rseau d'assainissement -C1,AAQ,Ownership,Land ownership,Eigentumsverhltnisse Eigentumsverhltnisse,Proprit foncire +C1,AAP,SewSysName,Name of sewer system,Bezeichnung des Entwässerungssystems,Nom du réseau d'assainissement +C1,AAQ,Ownership,Land ownership,Eigentumsverhältnisse Eigentumsverhältnisse,Propriété foncière C1,AAR,,Not used,, C1,AAS,,Not used,, C1,AAT,,,Bezeichnung des Knotens 3, C1,AAU,,,Koordinaten des Knotens 2, C1,AAV,,,Anfangspunkt der Inspektion der Nebenleitung, C2,ABA,Standard,Standard,Norm,Norme -C2,ABB,OrigCodeSys,Original coding system,Ursprngliches Kodiersystem,Systme de codage initial -C2,ABC,LongRefPoint,Longitudinal reference point,Bezugspunkt in Lngsrichtung,Point de rfrence longitudinal +C2,ABB,OrigCodeSys,Original coding system,Ursprüngliches Kodiersystem,Système de codage initial +C2,ABC,LongRefPoint,Longitudinal reference point,Bezugspunkt in Längsrichtung,Point de référence longitudinal C2,ABD,,Not used,nicht belegt, -C2,ABE,InspMethod,Method of inspection,Inspektionsverfahren,Mthode d'inspection +C2,ABE,InspMethod,Method of inspection,Inspektionsverfahren,Méthode d'inspection C2,ABF,InspDate,Date of inspection,Datum der Inspektion,Date d'inspection C2,ABG,InspTime,Time of inspection,Uhrzeit der Inspektion,Heure d'inspection C2,ABH,InspectorName,Name of inspector,Name des Untersuchenden,Nom de l'inspecteur -C2,ABI,InspectorJob,Inspectors job reference,Inspektionskennung,Rfrence de fonction de l'inspecteur -C2,ABJ,EmployerJob,Employers job reference,Auftragskennung,Rfrence de fonction de l'employeur -C2,ABK,VideoMedia,Video image storage media,Video-Speichermedium,Stockage des images vido +C2,ABI,InspectorJob,Inspectors job reference,Inspektionskennung,Référence de fonction de l'inspecteur +C2,ABJ,EmployerJob,Employers job reference,Auftragskennung,Référence de fonction de l'employeur +C2,ABK,VideoMedia,Video image storage media,Video-Speichermedium,Stockage des images vidéo C2,ABL,PhotoFormat,Photograph image storage format,Foto-Speichermedium,Stockage des photographies -C2,ABM,VideoLoc,Video image location system,Videozhler,Systme de position sur la bande vido -C2,ABN,PhotoVolRef,Photograph volume reference,Fotoablagereferenz,Rfrence de photographie -C2,ABO,VideoVolRef,Video volume reference,Videoablagereferenz,Rfrence de vido +C2,ABM,VideoLoc,Video image location system,Videozähler,Système de position sur la bande vidéo +C2,ABN,PhotoVolRef,Photograph volume reference,Fotoablagereferenz,Référence de photographie +C2,ABO,VideoVolRef,Video volume reference,Videoablagereferenz,Référence de vidéo C2,ABP,InspPurpose,Purpose of inspection,Inspektionszweck,Objet de l'inspection -C2,ABQ,ExpectedLength,Anticipated length of inspection,Erwartete Inspektionslnge,Etendue d'inspection prvue +C2,ABQ,ExpectedLength,Anticipated length of inspection,Erwartete Inspektionslänge,Etendue d'inspection prévue C2,ABR,VideoFormat,Video Image Format,Video-Speicherformat, C2,ABS,VideoFile,Video Image Filename,Video-Speicherdateiname, C2,ABT,PhotoFile,Photograph Image Filename,Stand der Inspektion, C3,ACA,Shape,Shape,Querschnittsform,Forme -C3,ACB,Height,Height,Hhe,Hauteur +C3,ACB,Height,Height,Höhe,Hauteur C3,ACC,Width,Width,Breite,Largeur -C3,ACD,Material,Material,Werkstoff,Matriau -C3,ACE,LiningType,Lining Type,Auskleidung,Type de revtement -C3,ACF,LinigMaterial,Lining material,Auskleidungswerkstoff,Matriau de revtement -C3,ACG,PipeUnitLength,Pipe unit length,Rohrlnge,Longueur unitaire de conduite -C3,ACH,DepthStartNode,Depth at start node,Tiefe am Anfangsknoten,Profondeur au noeud de dpart -C3,ACI,DepthFinishNode,Depth at finish node,Tiefe am Endknoten,Profondeur au noeud d'arrive +C3,ACD,Material,Material,Werkstoff,Matériau +C3,ACE,LiningType,Lining Type,Auskleidung,Type de revêtement +C3,ACF,LinigMaterial,Lining material,Auskleidungswerkstoff,Matériau de revêtement +C3,ACG,PipeUnitLength,Pipe unit length,Rohrlänge,Longueur unitaire de conduite +C3,ACH,DepthStartNode,Depth at start node,Tiefe am Anfangsknoten,Profondeur au noeud de départ +C3,ACI,DepthFinishNode,Depth at finish node,Tiefe am Endknoten,Profondeur au noeud d'arrivée C3,ACJ,SewerType,Type of sewer,Art der Abwasserleitung oder des Abwasserkanals,Type de collecteur C3,ACK,SewerUse,Use of sewer,Benutzung der Abwasserleitung oder des Abwasserkanals,Utilisation du collecteur -C3,ACL,Strategic,Strategic,Strategische Bedeutung,Position stratgique +C3,ACL,Strategic,Strategic,Strategische Bedeutung,Position stratégique C3,ACM,Cleaning,Cleaning,Reinigung,Nettoyage -C3,ACN,OperationYear,Year came into operation,Jahr der Inbetriebnahme,Anne de mise en service -C5,ADA,Precipitation,Precipitation,Niederschlag,Prcipitations -C5,ADB,Temperature,Temperature,Temperatur,Temprature -C5,ADC,FlowControlMeasures,Flow control measures,Wasserhaltung,Rgulation du dbit +C3,ACN,OperationYear,Year came into operation,Jahr der Inbetriebnahme,Année de mise en service +C4,AA,,en(Asbestzement),Asbestzement,fr(Asbestzement) +C4,AB,,en(Bitumen),Bitumen,fr(Bitumen) +C4,AC,,en(Teerfaser),Teerfaser,fr(Teerfaser) +C4,AD,,en(Ziegelwerk),Ziegelwerk,fr(Ziegelwerk) +C4,AE,,en(Steinzeug),Steinzeug,fr(Steinzeug) +C4,AF,,en(Zementmörtel),Zementmörtel,fr(Zementmörtel) +C4,AG,,en(Beton),Beton,fr(Beton) +C4,AH,,en(Stahlbeton),Stahlbeton,fr(Stahlbeton) +C4,AI,,en(Spritzbeton),Spritzbeton,fr(Spritzbeton) +C4,AJ,,en(Betonsegmente),Betonsegmente,fr(Betonsegmente) +C4,AK,,en(Faserzement),Faserzement,fr(Faserzement) +C4,AL,,en(faserverstärkter Kunststoff),faserverstärkter Kunststoff,fr(faserverstärkter Kunststoff) +C4,AM,,en(Gusseisen),Gusseisen,fr(Gusseisen) +C4,AN,,en(Grauguss),Grauguss,fr(Grauguss) +C4,AO,,en(duktiles Gusseisen),duktiles Gusseisen,fr(duktiles Gusseisen) +C4,AP,,en(Stahl),Stahl,fr(Stahl) +C4,AQ,,en(nicht identifizierte Eisen- oder Stahlart),nicht identifizierte Eisen- oder Stahlart,fr(nicht identifizierte Eisen- oder Stahlart) +C4,AR,,en(Mauerwerk (im Verband)),Mauerwerk (im Verband),fr(Mauerwerk (im Verband)) +C4,AS,,en(Mauerwerk (nicht im Verband)),Mauerwerk (nicht im Verband),fr(Mauerwerk (nicht im Verband)) +C4,AT,,en(Epoxid),Epoxid,fr(Epoxid) +C4,AU,,en(Polyester),Polyester,fr(Polyester) +C4,AV,,en(Polyethylen),Polyethylen,fr(Polyethylen) +C4,AW,,en(Polypropylen),Polypropylen,fr(Polypropylen) +C4,AX,,en(PVC-U),PVC-U,fr(PVC-U) +C4,AY,,en(nicht identifizierter Kunststoff),nicht identifizierter Kunststoff,fr(nicht identifizierter Kunststoff) +C4,AZ,,en(nicht identifizierter Werkstoff),nicht identifizierter Werkstoff,fr(nicht identifizierter Werkstoff) +C4,Z,,en(anderer Werkstoff),anderer Werkstoff,fr(anderer Werkstoff) +C5,ADA,Precipitation,Precipitation,Niederschlag,Précipitations +C5,ADB,Temperature,Temperature,Temperatur,Température +C5,ADC,FlowControlMeasures,Flow control measures,Wasserhaltung,Régulation du débit C5,ADD,,is not used,wird nicht verwendet, -C5,ADE,Remarks,General remark,Allgemeine Anmerkungen,Remarque gnrale -C6,AEA,,Change in Video volume reference,Aenderung Videoablagereferenz,Rfrence de vido -C6,AEB,,Change in Photograph volume reference,Aenderung Fotoablagereferenz,Rfrence de photographie +C5,ADE,Remarks,General remark,Allgemeine Anmerkungen,Remarque générale +C6,AEA,,Change in Video volume reference,Aenderung Videoablagereferenz,Référence de vidéo +C6,AEB,,Change in Photograph volume reference,Aenderung Fotoablagereferenz,Référence de photographie C6,AEC,,Change in Shape,Aenderung Form,Section transversale -C6,AED,,Change in Material,Aenderung Werkstoff,Matriau -C6,AEE,,Change in Lining,Aenderung Auskleidung,Revtement -C6,AEF,,Change in Pipe unit length,Aenderung Rohrlnge,Longueur unitaire de conduite -C6,AEG,,Change in Precipitation,Aenderung Niederschlag,Prcipitations -T4,BAA,,Deformation,Verformung,Dformation +C6,AED,,Change in Material,Aenderung Werkstoff,Matériau +C6,AEE,,Change in Lining,Aenderung Auskleidung,Revêtement +C6,AEF,,Change in Pipe unit length,Aenderung Rohrlänge,Longueur unitaire de conduite +C6,AEG,,Change in Precipitation,Aenderung Niederschlag,Précipitations +T4,BAA,,Deformation,Verformung,Déformation T4,BAB,,Fissure,Rissbildung,Fissure T4,BAC,,Break/Collapse,Rohrbruch/Einsturz,Rupture/effondrement -T4,BAD,,Defective brickwork or masonry,Defektes Mauerwerk,Briquetage ou lments de maonnerie dfectueux -T4,BAE,,Missing mortar,Fehlender Mrtel,Mortier manquant -T4,BAF,,Surface damage,Oberflchenschaden,Dgradation de surface -T4,BAG,,Intruding connection,Einragender Anschluss,Branchement pntrant -T4,BAH,,Defective connection,Schadhafter Anschluss,Raccordement dfectueux -T4,BAI,,Intruding sealing material,Einragendes Dichtungsmaterial,Joint d'tanchit apparent -T4,BAJ,,Displaced joint,Verschobene Verbindung,Dplacement d'assemblage -T4,BAK,,Lining defect,Schadhafte Innenauskleidung,Dfaut de revtement -T4,BAL,,Defective repair,Schadhafte Reparatur,Rparation dfectueuse -T4,BAM,,Weld failure,Schadhafte Schweinaht,Dfaut de soudage -T4,BAN,,Porous pipe,Porses Rohr,Conduite poreuse -T4,BAO,,Soil visible through defect,Boden sichtbar,Sol visible par le dfaut -T4,BAP,,Void visible through defect,Hohlraum sichtbar,Vide visible par le dfaut +T4,BAD,,Defective brickwork or masonry,Defektes Mauerwerk,Briquetage ou éléments de maçonnerie défectueux +T4,BAE,,Missing mortar,Fehlender Mörtel,Mortier manquant +T4,BAF,,Surface damage,Oberflächenschaden,Dégradation de surface +T4,BAG,,Intruding connection,Einragender Anschluss,Branchement pénétrant +T4,BAH,,Defective connection,Schadhafter Anschluss,Raccordement défectueux +T4,BAI,,Intruding sealing material,Einragendes Dichtungsmaterial,Joint d'étanchéité apparent +T4,BAJ,,Displaced joint,Verschobene Verbindung,Déplacement d'assemblage +T4,BAK,,Lining defect,Schadhafte Innenauskleidung,Défaut de revêtement +T4,BAL,,Defective repair,Schadhafte Reparatur,Réparation défectueuse +T4,BAM,,Weld failure,Schadhafte Schweißnaht,Défaut de soudage +T4,BAN,,Porous pipe,Poröses Rohr,Conduite poreuse +T4,BAO,,Soil visible through defect,Boden sichtbar,Sol visible par le défaut +T4,BAP,,Void visible through defect,Hohlraum sichtbar,Vide visible par le défaut T5,BBA,,Roots,Wurzeln,Racines -T5,BBB,,Attached deposits,Anhaftende Stoffe,Dpts adhrents -T5,BBC,,Settled deposits,Ablagerungen,Dpts -T5,BBD,,Ingress of soil,Eindringen von Bodenmaterial,Entre de terre +T5,BBB,,Attached deposits,Anhaftende Stoffe,Dépôts adhérents +T5,BBC,,Settled deposits,Ablagerungen,Dépôts +T5,BBD,,Ingress of soil,Eindringen von Bodenmaterial,Entrée de terre T5,BBE,,Other obstacles,Andere Hindernisse,Autres obstacles T5,BBF,,Infiltration,Infiltration,Infiltration T5,BBG,,Exfiltration,Exfiltration,Exfiltration T5,BBH,,Vermin,Ungeziefer,Vermine T6,BCA,,Connection,Anschluss,Raccordement -T6,BCB,,Point repair,Punktuelle Reparatur,Rparation ponctuelle -T6,BCC,,Curvature of sewer,Krmmung der Leitung,Courbure du collecteur -T6,BCD,,Start node type,Anfangsknoten,Type du noeud de dpart -T6,BCE,,Finish node,Endknoten,Rfrence du noeud d'arrive -T7,BDA,,General photograph,Allgemeines Foto,Photographie gnrale -T7,BDB,,General remark,Allgemeine Anmerkung,Remarque gnrale -T7,BDC,,Inspection abandoned,Inspektion abgebrochen,Inspection abandonne +T6,BCB,,Point repair,Punktuelle Reparatur,Réparation ponctuelle +T6,BCC,,Curvature of sewer,Krümmung der Leitung,Courbure du collecteur +T6,BCD,,Start node type,Anfangsknoten,Type du noeud de départ +T6,BCE,,Finish node,Endknoten,Référence du noeud d'arrivée +T7,BDA,,General photograph,Allgemeines Foto,Photographie générale +T7,BDB,,General remark,Allgemeine Anmerkung,Remarque générale +T7,BDC,,Inspection abandoned,Inspektion abgebrochen,Inspection abandonnée T7,BDD,,Water level,Wasserspiegel,Niveau d'eau -T7,BDE,,Flow in incoming pipe,Zufluss aus einem Anschluss,coulement dans une canalisation entrante -T7,BDF,,Atmosphere within the pipeline,Atmosphre in der Leitung,Atmosphre au sein de la canalisation -T7,BDG,,Loss of vision,Keine Sicht,Perte de visibilit +T7,BDE,,Flow in incoming pipe,Zufluss aus einem Anschluss,Écoulement dans une canalisation entrante +T7,BDF,,Atmosphere within the pipeline,Atmosphäre in der Leitung,Atmosphère au sein de la canalisation +T7,BDG,,Loss of vision,Keine Sicht,Perte de visibilité From 9dbd8b069c47fdc42442b421b674ac281d7b5784 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 6 Dec 2022 12:57:02 +0100 Subject: [PATCH 069/141] Add English material names as found in preliminary English version of EN 13508-2 (EN_13508-2_2008_En.pdf) --- inst/extdata/eucodes.csv | 54 ++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/inst/extdata/eucodes.csv b/inst/extdata/eucodes.csv index fa7d95a..45351c4 100644 --- a/inst/extdata/eucodes.csv +++ b/inst/extdata/eucodes.csv @@ -69,33 +69,33 @@ C3,ACK,SewerUse,Use of sewer,Benutzung der Abwasserleitung oder des Abwasserkana C3,ACL,Strategic,Strategic,Strategische Bedeutung,Position stratégique C3,ACM,Cleaning,Cleaning,Reinigung,Nettoyage C3,ACN,OperationYear,Year came into operation,Jahr der Inbetriebnahme,Année de mise en service -C4,AA,,en(Asbestzement),Asbestzement,fr(Asbestzement) -C4,AB,,en(Bitumen),Bitumen,fr(Bitumen) -C4,AC,,en(Teerfaser),Teerfaser,fr(Teerfaser) -C4,AD,,en(Ziegelwerk),Ziegelwerk,fr(Ziegelwerk) -C4,AE,,en(Steinzeug),Steinzeug,fr(Steinzeug) -C4,AF,,en(Zementmörtel),Zementmörtel,fr(Zementmörtel) -C4,AG,,en(Beton),Beton,fr(Beton) -C4,AH,,en(Stahlbeton),Stahlbeton,fr(Stahlbeton) -C4,AI,,en(Spritzbeton),Spritzbeton,fr(Spritzbeton) -C4,AJ,,en(Betonsegmente),Betonsegmente,fr(Betonsegmente) -C4,AK,,en(Faserzement),Faserzement,fr(Faserzement) -C4,AL,,en(faserverstärkter Kunststoff),faserverstärkter Kunststoff,fr(faserverstärkter Kunststoff) -C4,AM,,en(Gusseisen),Gusseisen,fr(Gusseisen) -C4,AN,,en(Grauguss),Grauguss,fr(Grauguss) -C4,AO,,en(duktiles Gusseisen),duktiles Gusseisen,fr(duktiles Gusseisen) -C4,AP,,en(Stahl),Stahl,fr(Stahl) -C4,AQ,,en(nicht identifizierte Eisen- oder Stahlart),nicht identifizierte Eisen- oder Stahlart,fr(nicht identifizierte Eisen- oder Stahlart) -C4,AR,,en(Mauerwerk (im Verband)),Mauerwerk (im Verband),fr(Mauerwerk (im Verband)) -C4,AS,,en(Mauerwerk (nicht im Verband)),Mauerwerk (nicht im Verband),fr(Mauerwerk (nicht im Verband)) -C4,AT,,en(Epoxid),Epoxid,fr(Epoxid) -C4,AU,,en(Polyester),Polyester,fr(Polyester) -C4,AV,,en(Polyethylen),Polyethylen,fr(Polyethylen) -C4,AW,,en(Polypropylen),Polypropylen,fr(Polypropylen) -C4,AX,,en(PVC-U),PVC-U,fr(PVC-U) -C4,AY,,en(nicht identifizierter Kunststoff),nicht identifizierter Kunststoff,fr(nicht identifizierter Kunststoff) -C4,AZ,,en(nicht identifizierter Werkstoff),nicht identifizierter Werkstoff,fr(nicht identifizierter Werkstoff) -C4,Z,,en(anderer Werkstoff),anderer Werkstoff,fr(anderer Werkstoff) +C4,AA,,Asbestos cement,Asbestzement,fr(Asbestos cement) +C4,AB,,Bitumen,Bitumen,fr(Bitumen) +C4,AC,,Pitch fibre,Teerfaser,fr(Pitch fibre) +C4,AD,,Brickwork,Ziegelwerk,fr(Brickwork) +C4,AE,,Clay,Steinzeug,fr(Clay) +C4,AF,,Cement mortar,Zementmörtel,fr(Cement mortar) +C4,AG,,Concrete,Beton,fr(Concrete) +C4,AH,,Reinforced concrete,Stahlbeton,fr(Reinforced concrete) +C4,AI,,Sprayed concrete,Spritzbeton,fr(Sprayed concrete) +C4,AJ,,Concrete segments,Betonsegmente,fr(Concrete segments) +C4,AK,,Fibre cement,Faserzement,fr(Fibre cement) +C4,AL,,Fibre reinforced plastics,faserverstaerkter Kunststoff,fr(Fibre reinforced plastics) +C4,AM,,Cast iron,Gusseisen,fr(Cast iron) +C4,AN,,Grey cast iron,Grauguss,fr(Grey cast iron) +C4,AO,,Ductile cast iron,duktiles Gusseisen,fr(Ductile cast iron) +C4,AP,,Steel,Stahl,fr(Steel) +C4,AQ,,Unidentified type of Iron or steel,nicht identifizierte Eisen- oder Stahlart,fr(Unidentified type of Iron or steel) +C4,AR,,Masonry (coursed),Mauerwerk (im Verband),fr(Masonry (coursed)) +C4,AS,,Masonry (uncoursed),Mauerwerk (nicht im Verband),fr(Masonry (uncoursed)) +C4,AT,,Epoxy,Epoxid,fr(Epoxy) +C4,AU,,Polyester,Polyester,fr(Polyester) +C4,AV,,Polyethylene,Polyethylen,fr(Polyethylene) +C4,AW,,Polypropylene,Polypropylen,fr(Polypropylene) +C4,AX,,PVC-U,PVC-U,fr(PVC-U) +C4,AY,,Unidentified type of plastics,nicht identifizierter Kunststoff,fr(Unidentified type of plastics) +C4,AZ,,Unidentified material,nicht identifizierter Werkstoff,fr(Unidentified material) +C4,Z,,Other material,anderer Werkstoff,fr(Other material) C5,ADA,Precipitation,Precipitation,Niederschlag,Précipitations C5,ADB,Temperature,Temperature,Temperatur,Température C5,ADC,FlowControlMeasures,Flow control measures,Wasserhaltung,Régulation du débit From d32105cf505b7f638c1853d061cee34949287754 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 11 Sep 2023 19:19:11 +0200 Subject: [PATCH 070/141] Update RoxygenNote in DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8471942..ce6029a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,4 +24,4 @@ Suggests: Remotes: github::kwb-r/kwb.utils Encoding: UTF-8 -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.3 From 888b60d3c0999e81330731a8c91bd59f8f90d54e Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 11 Sep 2023 19:19:47 +0200 Subject: [PATCH 071/141] Stop if columns required for id are missing --- R/createInspectionId.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/createInspectionId.R b/R/createInspectionId.R index bd1e49a..4de0f84 100644 --- a/R/createInspectionId.R +++ b/R/createInspectionId.R @@ -5,6 +5,8 @@ createInspectionId <- function( n.chars = 8L ) { + kwb.utils::checkForMissingColumns(inspections, id.columns) + duplicateInfo <- kwb.utils::findPartialDuplicates(inspections, id.columns) if (! is.null(duplicateInfo)) { From 9f0689f8e9de75004b0e887d5e33b059610e6dee Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 11 Sep 2023 19:20:43 +0200 Subject: [PATCH 072/141] Create time "on the fly" if time column missing --- R/setGlobalInspectionID.R | 57 ++++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index 768f893..bac52d1 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -7,10 +7,18 @@ #' \code{inspections}, \code{observations} #' @param project name of project to which the data are related, such as: #' "Lausanne" +#' @param defaultTime default time string to use if column InspTime is not +#' available. Default: "12:00". A random number will be generated for the +#' seconds, just to increase the chance that setting the time is enough to +#' generate a unique key. #' @return list with the same elements as in \code{inspection.data} but with #' columns \code{inspid} being added to the data frames "inspections" and #' "observations" -setGlobalInspectionID <- function(inspection.data, project = NULL) +setGlobalInspectionID <- function( + inspection.data, + project = NULL, + default.time = "12:00" +) { if (is.null(project)) { stop( @@ -19,22 +27,53 @@ setGlobalInspectionID <- function(inspection.data, project = NULL) ) } - header.info <- kwb.utils::selectElements(inspection.data, "header.info") - inspections <- kwb.utils::selectElements(inspection.data, "inspections") - observations <- kwb.utils::selectElements(inspection.data, "observations") + fetch <- kwb.utils::createAccessor(inspection.data) + + # Just a shortcut + removeEmpty <- function(df) kwb.utils::removeEmptyColumns(df, dbg = FALSE) - inspections <- kwb.utils::removeEmptyColumns(inspections) + inspections <- removeEmpty(fetch("inspections")) + observations <- removeEmpty(fetch("observations")) + inspections[["project"]] <- project + + # The following function requires the column "InspTime". If this column does + # not exist, create it with a default value + if (is.null(inspections[["InspTime"]])) { + + message( + "There is no column 'InspTime' (inspection time). ", + "I will create this column\n", + "and set it to '", default.time, "' (plus random seconds) ", + "for each inspection.\n", + "You may change this time value by setting the argument 'default.time'." + ) + + # We have to fix the random number generator otherwise the times are not + # reproducible! + set.seed(123L) + + # Generate a random number for the seconds + inspections[["InspTime"]] <- sprintf( + "%s:%02d", + default.time, + sample(0:59, size = nrow(inspections), replace = TRUE) + ) + } + + # Create the inspection IDs and store them in column "inspid" inspections[["inspid"]] <- createInspectionId(inspections) - observations <- kwb.utils::removeEmptyColumns(observations) i <- kwb.utils::selectColumns(observations, "inspno") observations[["inspid"]] <- kwb.utils::selectColumns(inspections, "inspid")[i] observations <- kwb.utils::removeColumns(observations, "inspno") + # Just a shortcut + inspidFirst <- function(df) kwb.utils::moveColumnsToFront(df, "inspid") + list( - header.info = header.info, - inspections = kwb.utils::moveColumnsToFront(inspections, "inspid"), - observations = kwb.utils::moveColumnsToFront(observations, "inspid") + header.info = fetch("header.info"), + inspections = inspidFirst(inspections), + observations = inspidFirst(observations) ) } From ed99f94b6d425a671e42debc5343536c9f1c4c20 Mon Sep 17 00:00:00 2001 From: hsonne Date: Wed, 13 Sep 2023 07:41:16 +0200 Subject: [PATCH 073/141] Run kwb.pkgbuild::use_ghactions() --- .Rbuildignore | 1 + .github/workflows/R-CMD-check.yaml | 83 ++++++++++++++++++++++++++++ .github/workflows/pkgdown.yaml | 53 ++++++++++++++++++ .github/workflows/pr-commands.yaml | 51 +++++++++++++++++ .github/workflows/test-coverage.yaml | 48 ++++++++++++++++ 5 files changed, 236 insertions(+) create mode 100644 .github/workflows/R-CMD-check.yaml create mode 100644 .github/workflows/pkgdown.yaml create mode 100644 .github/workflows/pr-commands.yaml create mode 100644 .github/workflows/test-coverage.yaml diff --git a/.Rbuildignore b/.Rbuildignore index 6f183bc..1c2b618 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,3 +11,4 @@ README\.md$ ^\.gitignore$ ^docs$ ^.gitlab-ci\.yml$ +^\.github$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..20bd49a --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,83 @@ +on: + push: + branches: + - master + - main + - dev + pull_request: + branches: + - master + - main + - dev + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: windows-latest, r: 'devel'} + - {os: windows-latest, r: 'oldrel'} + - {os: windows-latest, r: 'release'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + use-public-rspm: true + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), "depends.Rds", version = 2) + shell: Rscript {0} + + - name: Cache R packages + if: runner.os != 'Windows' + uses: actions/cache@v1 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-r-${{ matrix.config.r }}-3-${{ hashFiles('depends.Rds') }} + restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-3- + + - name: Install system dependencies + if: runner.os == 'Linux' + env: + RHUB_PLATFORM: linux-x86_64-ubuntu-gcc + run: | + Rscript -e "remotes::install_github('r-hub/sysreqs')" + sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") + sudo -s eval "$sysreqs" + - name: Install dependencies + run: | + remotes::install_deps(dependencies = TRUE) + remotes::install_cran("rcmdcheck") + shell: Rscript {0} + + - name: Check + run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "warning", check_dir = "check") + shell: Rscript {0} + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@main + with: + name: ${{ runner.os }}-r${{ matrix.config.r }}-results + path: check diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..ac3ba07 --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,53 @@ +on: + push: + branches: + - main + - master + - dev + +name: pkgdown + +jobs: + pkgdown: + runs-on: windows-latest + env: + CURL_SSL_BACKEND: "openssl" + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") + shell: Rscript {0} + + - name: Cache R packages + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + + - name: Install dependencies + run: | + remotes::install_deps(dependencies = TRUE) + install.packages("pkgdown", type = "binary") + shell: Rscript {0} + + - name: Install package + run: R CMD INSTALL . + shell: cmd + + - name: Deploy package + run: | + git config --local user.email "actions@github.com" + git config --local user.name "GitHub Actions" + Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml new file mode 100644 index 0000000..0d3cb71 --- /dev/null +++ b/.github/workflows/pr-commands.yaml @@ -0,0 +1,51 @@ +on: + issue_comment: + types: [created] +name: Commands +jobs: + document: + if: startsWith(github.event.comment.body, '/document') + name: document + runs-on: macOS-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v2 + - uses: r-lib/actions/pr-fetch@master + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + - uses: r-lib/actions/setup-r@master + - name: Install dependencies + run: Rscript -e 'install.packages(c("remotes", "roxygen2"))' -e 'remotes::install_deps(dependencies = TRUE)' + - name: Document + run: Rscript -e 'roxygen2::roxygenise()' + - name: commit + run: | + git add man/\* NAMESPACE + git commit -m 'Document' + - uses: r-lib/actions/pr-push@master + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + style: + if: startsWith(github.event.comment.body, '/style') + name: style + runs-on: macOS-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v2 + - uses: r-lib/actions/pr-fetch@master + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + - uses: r-lib/actions/setup-r@master + - name: Install dependencies + run: Rscript -e 'install.packages("styler")' + - name: Style + run: Rscript -e 'styler::style_pkg()' + - name: commit + run: | + git add \*.R + git commit -m 'Style' + - uses: r-lib/actions/pr-push@master + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 0000000..0f014df --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,48 @@ +on: + push: + branches: + - master + - main + pull_request: + branches: + - master + - main + +name: test-coverage + +jobs: + test-coverage: + runs-on: windows-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") + shell: Rscript {0} + + - name: Cache R packages + uses: actions/cache@v1 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + + - name: Install dependencies + run: | + install.packages(c("remotes")) + remotes::install_deps(dependencies = TRUE) + remotes::install_cran("covr") + shell: Rscript {0} + + - name: Test coverage + run: covr::codecov() + shell: Rscript {0} From 75ddb2ceea9bb96dc299a8c27bf5d804e16ee8dd Mon Sep 17 00:00:00 2001 From: hsonne Date: Wed, 13 Sep 2023 07:42:25 +0200 Subject: [PATCH 074/141] Run kwb.pkgbuild::use_index_md() --- .Rbuildignore | 2 + README.md | 132 +++++--------------------------------------------- index.md | 128 ++++-------------------------------------------- 3 files changed, 22 insertions(+), 240 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 1c2b618..e8a021a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,5 @@ README\.md$ ^docs$ ^.gitlab-ci\.yml$ ^\.github$ +^README\.md$ +^index\.md$ diff --git a/README.md b/README.md index a9d64d4..b0b5984 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,14 @@ -# kwb.en13508.2 +[![R-CMD-check](https://github.com/KWB-R/kwb.en13508.2/workflows/R-CMD-check/badge.svg)](https://github.com/KWB-R/kwb.en13508.2/actions?query=workflow%3AR-CMD-check) +[![pkgdown](https://github.com/KWB-R/kwb.en13508.2/workflows/pkgdown/badge.svg)](https://github.com/KWB-R/kwb.en13508.2/actions?query=workflow%3Apkgdown) +[![codecov](https://codecov.io/github/KWB-R/kwb.en13508.2/branch/main/graphs/badge.svg)](https://codecov.io/github/KWB-R/kwb.en13508.2) +[![Project Status](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental) +[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/kwb.en13508.2)]() +[![R-Universe_Status_Badge](https://kwb-r.r-universe.dev/badges/kwb.en13508.2)](https://kwb-r.r-universe.dev/) -[![Appveyor build status](https://ci.appveyor.com/api/projects/status/i5xx4npr86rg783h/branch/master?svg=true)](https://ci.appveyor.com/project/KWB-R/kwb-en13508-2/branch/master) -[![Build Status](https://travis-ci.org/KWB-R/kwb.en13508.2.svg?branch=master)](https://travis-ci.org/KWB-R/kwb.en13508.2) -[![codecov](https://codecov.io/github/KWB-R/kwb.en13508.2/branch/master/graphs/badge.svg)](https://codecov.io/github/KWB-R/kwb.en13508.2) -[![lifecycle](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://www.tidyverse.org/lifecycle/#stable) -[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/kwb.en13508.2)](http://cran.r-project.org/package=kwb.en13508.2) +# kwb.en13508.2 -This R package provides functions to read and write data on CCTV sewerage -inspections. The format expected when reading and generated when writing is the -text format that is described in the European Norm -[EN 13508-2](http://www.dwa.de/dwa/shop/shop.nsf/Produktanzeige?openform&produktid=P-DWAA-8KTG5R). +functions to read and write CCTV inspections +according to EN13508-2. ## Installation @@ -28,119 +27,10 @@ if (! require("remotes")) { } # Install KWB package 'kwb.en13508.2' from GitHub - -remotes::install_github("kwb-r/kwb.en13508.2") -``` - -## Load the Package - -```r -library(kwb.en13508.2) -``` - -## Read a File Encoded in EN 13508.2 Format - -You can read a file formatted in EN 13508.2 text format using -`readEuCodedFile()`. - -For the purpose of demonstration, we load an example file that is contained -in the package: - -```r -# Set the relative path to the example file -file_path <- "extdata/example_13508_2.txt" - -# Set the absolute path to the example file in the package folder -input_file <- system.file(file_path, package = "kwb.en13508.2") - -# Read the example file -survey <- readEuCodedFile(input_file) -``` - -The result is a list with the three components representing the A-, B- and C- -parts, respectively, of the file: - -* `header.info`: A-part containing information on the file format -* `inspections`: B-parts containing information on the inspections as a whole -* `observations`: C-parts containing information on the detailed observations -that were made during the inspections - -### Part A: Information on the file format - -```r -survey$header.info -``` - -### Part B: Inspections - -```r -head(survey$inspections) -``` - -The codes used as column names correspond to the codes defined in the norm. -We provide a function `getCodes()` in the package that returns a table mapping -these codes to their meanings: - -```r -# Get all codes and meanings defined in the European Norm -code_info <- getCodes(fields = c("Code", "Text_EN")) - -# Show the meanings for the codes that are used in the table of inspections -code_info[code_info$Code %in% names(survey$inspections), ] -``` - - -### Part C: Observations - -```r -head(survey$observations) -``` - -Again, let's have a look at what the columns mean: - -```r -# Show the meanings for the codes that are used in the table of observations -code_info[code_info$Code %in% names(survey$observations), ] -``` - -The column `inspno` groups together observations that belong to the same -inspection. The number in the column refers to the row number in the table of -inspections (`survey$inspections`). For example, to get all the observations -that belong to the third inspection, you may filter `survey$observations` like -in the following. For a more compact output we exclude the tenth column ("F") -containing the remarks: - -```r -survey$observations[survey$observations$inspno == 3, -10] -``` - -The information on the inspection is found in the `inspno`-th row -(here: third row) of `config$inspections`: - -```r -survey$inspections[3, ] -``` - -## Write a File Encoded in EN 13508.2 Format - -Once you have prepared a list with the three components `header.info`, -`inspections` and `observations` as described above, you can use the function -`writeEuCodedFile()` to write a file formatted in EN 13508.2-format: - -```r -# Define the path to an output file -output_file <- file.path(tempdir(), "example_en13508.2.txt") - -# Write the CCTV survey data to the file -writeEuCodedFile(survey, output_file) -``` -The first 20 lines of the file produced read: - -```r -kwb.utils::catLines(readLines(output_file, 20)) +remotes::install_github("KWB-R/kwb.en13508.2") ``` -# Documentation +## Documentation Release: [https://kwb-r.github.io/kwb.en13508.2](https://kwb-r.github.io/kwb.en13508.2) diff --git a/index.md b/index.md index 782a697..1d35728 100644 --- a/index.md +++ b/index.md @@ -1,13 +1,12 @@ -[![Appveyor build status](https://ci.appveyor.com/api/projects/status/i5xx4npr86rg783h/branch/master?svg=true)](https://ci.appveyor.com/project/KWB-R/kwb-en13508-2/branch/master) -[![Build Status](https://travis-ci.org/KWB-R/kwb.en13508.2.svg?branch=master)](https://travis-ci.org/KWB-R/kwb.en13508.2) -[![codecov](https://codecov.io/github/KWB-R/kwb.en13508.2/branch/master/graphs/badge.svg)](https://codecov.io/github/KWB-R/kwb.en13508.2) -[![lifecycle](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://www.tidyverse.org/lifecycle/#stable) -[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/kwb.en13508.2)](http://cran.r-project.org/package=kwb.en13508.2) +[![R-CMD-check](https://github.com/KWB-R/kwb.en13508.2/workflows/R-CMD-check/badge.svg)](https://github.com/KWB-R/kwb.en13508.2/actions?query=workflow%3AR-CMD-check) +[![pkgdown](https://github.com/KWB-R/kwb.en13508.2/workflows/pkgdown/badge.svg)](https://github.com/KWB-R/kwb.en13508.2/actions?query=workflow%3Apkgdown) +[![codecov](https://codecov.io/github/KWB-R/kwb.en13508.2/branch/main/graphs/badge.svg)](https://codecov.io/github/KWB-R/kwb.en13508.2) +[![Project Status](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental) +[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/kwb.en13508.2)]() +[![R-Universe_Status_Badge](https://kwb-r.r-universe.dev/badges/kwb.en13508.2)](https://kwb-r.r-universe.dev/) -This R package provides functions to read and write data on CCTV sewerage -inspections. The format expected when reading and generated when writing is the -text format that is described in the European Norm -[EN 13508-2](http://www.dwa.de/dwa/shop/shop.nsf/Produktanzeige?openform&produktid=P-DWAA-8KTG5R). +functions to read and write CCTV inspections +according to EN13508-2. ## Installation @@ -26,114 +25,5 @@ if (! require("remotes")) { } # Install KWB package 'kwb.en13508.2' from GitHub - -remotes::install_github("kwb-r/kwb.en13508.2") -``` - -## Load the Package - -```r -library(kwb.en13508.2) -``` - -## Read a File Encoded in EN 13508.2 Format - -You can read a file formatted in EN 13508.2 text format using -`readEuCodedFile()`. - -For the purpose of demonstration, we load an example file that is contained -in the package: - -```r -# Set the relative path to the example file -file_path <- "extdata/example_13508_2.txt" - -# Set the absolute path to the example file in the package folder -input_file <- system.file(file_path, package = "kwb.en13508.2") - -# Read the example file -survey <- readEuCodedFile(input_file) -``` - -The result is a list with the three components representing the A-, B- and C- -parts, respectively, of the file: - -* `header.info`: A-part containing information on the file format -* `inspections`: B-parts containing information on the inspections as a whole -* `observations`: C-parts containing information on the detailed observations -that were made during the inspections - -### Part A: Information on the file format - -```r -survey$header.info -``` - -### Part B: Inspections - -```r -head(survey$inspections) -``` - -The codes used as column names correspond to the codes defined in the norm. -We provide a function `getCodes()` in the package that returns a table mapping -these codes to their meanings: - -```r -# Get all codes and meanings defined in the European Norm -code_info <- getCodes(fields = c("Code", "Text_EN")) - -# Show the meanings for the codes that are used in the table of inspections -code_info[code_info$Code %in% names(survey$inspections), ] -``` - - -### Part C: Observations - -```r -head(survey$observations) -``` - -Again, let's have a look at what the columns mean: - -```r -# Show the meanings for the codes that are used in the table of observations -code_info[code_info$Code %in% names(survey$observations), ] -``` - -The column `inspno` groups together observations that belong to the same -inspection. The number in the column refers to the row number in the table of -inspections (`survey$inspections`). For example, to get all the observations -that belong to the third inspection, you may filter `survey$observations` like -in the following. For a more compact output we exclude the tenth column ("F") -containing the remarks: - -```r -survey$observations[survey$observations$inspno == 3, -10] -``` - -The information on the inspection is found in the `inspno`-th row -(here: third row) of `config$inspections`: - -```r -survey$inspections[3, ] -``` - -## Write a File Encoded in EN 13508.2 Format - -Once you have prepared a list with the three components `header.info`, -`inspections` and `observations` as described above, you can use the function -`writeEuCodedFile()` to write a file formatted in EN 13508.2-format: - -```r -# Define the path to an output file -output_file <- file.path(tempdir(), "example_en13508.2.txt") - -# Write the CCTV survey data to the file -writeEuCodedFile(survey, output_file) -``` -The first 20 lines of the file produced read: - -```r -writeLines(readLines(output_file, 20)) +remotes::install_github("KWB-R/kwb.en13508.2") ``` From d3fc9161c6014ecf08b58fac583376e74557a912 Mon Sep 17 00:00:00 2001 From: hsonne Date: Wed, 13 Sep 2023 07:49:49 +0200 Subject: [PATCH 075/141] Manually adapt files similar to what Michael Rustler did here: https://github.com/KWB-R/kwb.umberto/pull/13 --- LICENSE | 2 +- LICENSE.md | 2 +- NEWS.md | 4 ++++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/LICENSE b/LICENSE index 5e594a6..8e334f6 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2015-2019 Kompetenzzentrum Wasser Berlin gGmbH (KWB) +Copyright (c) 2017-2023 Kompetenzzentrum Wasser Berlin gGmbH (KWB) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/LICENSE.md b/LICENSE.md index 597c16b..90998a0 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ # MIT License -Copyright (c) 2015-2019 Kompetenzzentrum Wasser Berlin gGmbH +Copyright (c) 2017-2023 Kompetenzzentrum Wasser Berlin gGmbH Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/NEWS.md b/NEWS.md index cc3f964..35bb3ae 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# Latest changes + +* * Harmonise with [kwb.pkgbuild](https://kwb-r.github.io/kwb.pkgbuild) + # kwb.en13508.2 0.2.0.9000 * getObservationsFromEuLines: Order columns by name, put column “inspno” first. From 2b5e776a4d9be39db096e2d696740b40c2e7a3bd Mon Sep 17 00:00:00 2001 From: hsonne Date: Wed, 13 Sep 2023 07:58:26 +0200 Subject: [PATCH 076/141] Fix documentation of argument --- R/setGlobalInspectionID.R | 2 +- man/setGlobalInspectionID.Rd | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index bac52d1..981ca5f 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -7,7 +7,7 @@ #' \code{inspections}, \code{observations} #' @param project name of project to which the data are related, such as: #' "Lausanne" -#' @param defaultTime default time string to use if column InspTime is not +#' @param default.time default time string to use if column InspTime is not #' available. Default: "12:00". A random number will be generated for the #' seconds, just to increase the chance that setting the time is enough to #' generate a unique key. diff --git a/man/setGlobalInspectionID.Rd b/man/setGlobalInspectionID.Rd index 621bcb8..5e7bde6 100644 --- a/man/setGlobalInspectionID.Rd +++ b/man/setGlobalInspectionID.Rd @@ -4,7 +4,7 @@ \alias{setGlobalInspectionID} \title{Set Global Inspection ID} \usage{ -setGlobalInspectionID(inspection.data, project = NULL) +setGlobalInspectionID(inspection.data, project = NULL, default.time = "12:00") } \arguments{ \item{inspection.data}{list with elements \code{header.info}, @@ -12,6 +12,11 @@ setGlobalInspectionID(inspection.data, project = NULL) \item{project}{name of project to which the data are related, such as: "Lausanne"} + +\item{default.time}{default time string to use if column InspTime is not +available. Default: "12:00". A random number will be generated for the +seconds, just to increase the chance that setting the time is enough to +generate a unique key.} } \value{ list with the same elements as in \code{inspection.data} but with From 97ab211e334cee7ff2b7153bdcf056569d6b2419 Mon Sep 17 00:00:00 2001 From: hsonne Date: Wed, 13 Sep 2023 08:08:34 +0200 Subject: [PATCH 077/141] Remove Travis and Appveyor configuration files --- .travis.yml | 31 ------------------------------- appveyor.yml | 45 --------------------------------------------- 2 files changed, 76 deletions(-) delete mode 100644 .travis.yml delete mode 100644 appveyor.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 9837450..0000000 --- a/.travis.yml +++ /dev/null @@ -1,31 +0,0 @@ -############################################################################## -### Autogenerated with R package kwb.pkgbuild v0.1.1 -### (installed from 'Github (kwb-r/kwb.pkgbuild@0ac3694)' source code on 2019-09-06) -### by calling the function kwb.pkgbuild::use_autopkgdown("kwb.en13508.2") -### (file created at: 2019-09-09 15:29:20) -############################################################################## - - -language: r -sudo: required -cache: packages -r_packages: -- remotes -- covr -matrix: - include: - - r: devel - - r: release - after_success: - - Rscript -e 'covr::codecov()' - before_deploy: - - Rscript -e 'remotes::install_cran("pkgdown")' - deploy: - provider: script - script: Rscript -e 'pkgdown::deploy_site_github(verbose = TRUE)' - skip_cleanup: 'true' - on: - branch: - - master - - dev - - r: oldrel diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index e8efd3b..0000000 --- a/appveyor.yml +++ /dev/null @@ -1,45 +0,0 @@ -# DO NOT CHANGE the "init" and "install" sections below - -# Download script file from GitHub -init: - ps: | - $ErrorActionPreference = "Stop" - Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" - Import-Module '..\appveyor-tool.ps1' - -install: - ps: Bootstrap - -cache: - - C:\RLibrary - -# Adapt as necessary starting from here - -build_script: - - travis-tool.sh install_deps - -test_script: - - travis-tool.sh run_tests - -on_failure: - - 7z a failure.zip *.Rcheck\* - - appveyor PushArtifact failure.zip - -artifacts: - - path: '*.Rcheck\**\*.log' - name: Logs - - - path: '*.Rcheck\**\*.out' - name: Logs - - - path: '*.Rcheck\**\*.fail' - name: Logs - - - path: '*.Rcheck\**\*.Rout' - name: Logs - - - path: '\*_*.tar.gz' - name: Bits - - - path: '\*_*.zip' - name: Bits From 0837c5b07246c4be5a141a47d5adbe6c6c097e45 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 5 Oct 2023 18:32:30 +0200 Subject: [PATCH 078/141] Consider existing time columns but with gaps --- R/setGlobalInspectionID.R | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index 981ca5f..1441e88 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -39,25 +39,31 @@ setGlobalInspectionID <- function( # The following function requires the column "InspTime". If this column does # not exist, create it with a default value - if (is.null(inspections[["InspTime"]])) { + timeColumn <- "InspTime" + inspections <- kwb.utils::hsAddMissingCols(inspections, timeColumn) + + hasNoTime <- is.na(inspections[[timeColumn]]) + + if (any(hasNoTime)) { + + n_missing <- sum(hasNoTime) message( - "There is no column 'InspTime' (inspection time). ", - "I will create this column\n", - "and set it to '", default.time, "' (plus random seconds) ", - "for each inspection.\n", - "You may change this time value by setting the argument 'default.time'." + "There are ", n_missing, " missing inspection times. I will set missing ", + "inspection times to '", default.time, "' (plus random seconds) for ", + "each inspection. You may change this time value by setting the ", + "argument 'default.time'." ) - + # We have to fix the random number generator otherwise the times are not # reproducible! set.seed(123L) # Generate a random number for the seconds - inspections[["InspTime"]] <- sprintf( + inspections[[timeColumn]][hasNoTime] <- sprintf( "%s:%02d", default.time, - sample(0:59, size = nrow(inspections), replace = TRUE) + sample(0:59, size = n_missing, replace = TRUE) ) } From ea793948cffd1cf5559ec5b838a3dc4784f462eb Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 5 Oct 2023 18:35:23 +0200 Subject: [PATCH 079/141] Remove misleading "for each inspection" --- R/setGlobalInspectionID.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index 1441e88..c828dac 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -50,9 +50,8 @@ setGlobalInspectionID <- function( message( "There are ", n_missing, " missing inspection times. I will set missing ", - "inspection times to '", default.time, "' (plus random seconds) for ", - "each inspection. You may change this time value by setting the ", - "argument 'default.time'." + "inspection times to '", default.time, "' (plus random seconds). You ", + "may change this time value by setting the argument 'default.time'." ) # We have to fix the random number generator otherwise the times are not From 0963e35cc59c6947899249bcdfb208d00468fa90 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 6 Oct 2023 19:03:46 +0200 Subject: [PATCH 080/141] Allow for C(hange)-codes in continuous defects and use accessor functions fetch(). Implement two simple tests. --- R/getLineDamageInfo.R | 28 ++++++----- .../test-function-getLineDamageInfo.R | 48 +++++++++++++++---- 2 files changed, 54 insertions(+), 22 deletions(-) diff --git a/R/getLineDamageInfo.R b/R/getLineDamageInfo.R index 12382db..5e562a7 100644 --- a/R/getLineDamageInfo.R +++ b/R/getLineDamageInfo.R @@ -14,25 +14,24 @@ #' @export getLineDamageInfo <- function(observations, dbg = TRUE) { - getcol <- kwb.utils::selectColumns - if (! "J" %in% names(observations)) { message("No column 'J' (line damages) found in table of observations.") return(NULL) } + + fetch <- kwb.utils::createAccessor(observations) - I <- asNumericIfRequired(getcol(observations, "I"), dbg = dbg) - - J <- getcol(observations, "J") + I <- asNumericIfRequired(fetch("I"), dbg = dbg) + J <- fetch("J") # Check if the values in J match the expected pattern stopOnInvalidLineDamageCodes(J) # Split line damage identifier in J into "A" or "B" (ld) and number (ldno) x <- kwb.utils::noFactorDataFrame( - ino = getcol(observations, "inspno"), - ld = substr(J, 1, 1), - ldno = substr(J, 2, nchar(J)) + ino = fetch("inspno"), + ld = substr(J, 1L, 1L), + ldno = substr(J, 2L, nchar(J)) ) info <- merge( @@ -43,11 +42,16 @@ getLineDamageInfo <- function(observations, dbg = TRUE) # Order by inspection number and line damage number info <- kwb.utils::orderBy(info, c("ino", "ldno")) + fetch <- kwb.utils::createAccessor(info) + + beg_x <- I[fetch("beg.at")] + end_x <- I[fetch("end.at")] + kwb.utils::setColumns( info, - beg.x = I[getcol(info, "beg.at")], - end.x = I[getcol(info, "end.at")], - length = I[getcol(info, "end.at")] - I[getcol(info, "beg.at")], + beg.x = beg_x, + end.x = end_x, + length = end_x - beg_x, dbg = FALSE ) } @@ -55,7 +59,7 @@ getLineDamageInfo <- function(observations, dbg = TRUE) # stopOnInvalidLineDamageCodes ------------------------------------------------- #' @importFrom kwb.utils stopFormatted stringList -stopOnInvalidLineDamageCodes <- function(J, pattern = "^$|^[AB]\\d+$") +stopOnInvalidLineDamageCodes <- function(J, pattern = "^$|^[ABC]\\d+$") { unique_values <- unique(J) diff --git a/tests/testthat/test-function-getLineDamageInfo.R b/tests/testthat/test-function-getLineDamageInfo.R index f18f385..c69db23 100644 --- a/tests/testthat/test-function-getLineDamageInfo.R +++ b/tests/testthat/test-function-getLineDamageInfo.R @@ -1,16 +1,44 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2022-07-27 07:01:23. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# +#library(testthat) test_that("getLineDamageInfo() works", { - expect_error( - kwb.rerau:::getLineDamageInfo() - # argument "observations" is missing, with no default + f <- kwb.en13508.2::getLineDamageInfo + + expect_error(f()) + + expect_message(result <- f(data.frame())) + expect_null(result) + + observations <- read.csv(text = " + inspno,I,J + 1,1.0,A1 + 1,2.0, + 1,3.0,B1" + ) + + expected <- read.csv(colClasses = c(ldno = "character"), text = " + ino,ldno,beg.at,end.at,beg.x,end.x,length + 1,1,1,3,1.0,3.0,2.0" ) + expect_identical(f(observations), expected) + + observations <- read.csv(text = " + inspno,I,J + 1,0.0, + 1,1.0,A2 + 1,2.0, + 1,3.0,C2 + 1,4.0,C2 + 1,5.0, + 1,6.0,B2 + 1,7.0," + ) + + expected <- read.csv(colClasses = c(ldno = "character"), text = " + ino,ldno,beg.at,end.at,beg.x,end.x,length + 1,2,2,7,1.0,6.0,5.0" + ) + + expect_identical(f(observations), expected) }) - From 537dcdf09ea0fb6d91c73ccb52242dee6f77b31f Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 24 Nov 2023 15:48:04 +0100 Subject: [PATCH 081/141] Add csv file with mainCode/char1/char2 combis --- inst/extdata/eucodes_full.csv | 474 ++++++++++++++++++++++++++++++++++ 1 file changed, 474 insertions(+) create mode 100644 inst/extdata/eucodes_full.csv diff --git a/inst/extdata/eucodes_full.csv b/inst/extdata/eucodes_full.csv new file mode 100644 index 0000000..cd98f90 --- /dev/null +++ b/inst/extdata/eucodes_full.csv @@ -0,0 +1,474 @@ +# This file was created with the following R code: +# +# paths <- kwb.utils::resolve(list( +# validation = "~/../Downloads/S/sema-plus_lausanne/inspection-data-validation", +# transition = "/1a_Environment_mdb/SEMA_Transition_versions", +# mdb = "/SEMA_Transition_v03_2013_04_23.mdb" +# )) +# +# all_codes <- paths$mdb %>% +# kwb.db::hsGetTable("qry_EuCodesAll", use2007Driver = TRUE) %>% +# kwb.utils::renameColumns(list( +# emcID = "main_code", +# C1 = "char_1", +# C2 = "char_2", +# emcName = "main_code_name", +# C1Name = "char_1_name", +# C2Name = "char_2_name" +# )) +# +# write.csv( +# all_codes, +# "condition-assessment/config/code-info.csv", +# fileEncoding = "UTF-8", +# row.names = FALSE, +# na = "" +# ) +# +"main_code","char_1","char_2","main_code_name","char_1_name","char_2_name" +"AEC","A",,"Form","kreisförmig", +"AEC","B",,"Form","rechteckig", +"AEC","C",,"Form","eiförmig", +"AEC","D",,"Form","U-förmig", +"AEC","E",,"Form","bogenförmig", +"AEC","F",,"Form","oval", +"AEC","X",,"Form","lokaler Querschnitt", +"AEC","Y",,"Form","unbekannt (bei Uebersetzung)", +"AEC","Z",,"Form","andere Form", +"AED","Y",,"Werkstoff","unbekannt (bei Uebersetzung)", +"BAA","A",,"Verformung","vertikal", +"BAA","B",,"Verformung","horizontal", +"BAA","Y",,"Verformung","gesamter Umfang (bei Uebersetzung)", +"BAB","A","A","Rissbildung","Oberflächenriss (Haarriss)","in Längsrichtung" +"BAB","A","B","Rissbildung","Oberflächenriss (Haarriss)","am Rohrumfang" +"BAB","A","C","Rissbildung","Oberflächenriss (Haarriss)","komplexe Rissbildung" +"BAB","A","D","Rissbildung","Oberflächenriss (Haarriss)","gewundene oder spiralförmige Rissbildung" +"BAB","A","E","Rissbildung","Oberflächenriss (Haarriss)","sternförmige Rissbildung" +"BAB","A","Y","Rissbildung","Oberflächenriss (Haarriss)","unbekannt (bei Uebersetzung)" +"BAB","B","A","Rissbildung","Riss","in Längsrichtung" +"BAB","B","B","Rissbildung","Riss","am Rohrumfang" +"BAB","B","C","Rissbildung","Riss","komplexe Rissbildung" +"BAB","B","D","Rissbildung","Riss","gewundene oder spiralförmige Rissbildung" +"BAB","B","E","Rissbildung","Riss","sternförmige Rissbildung" +"BAB","B","Y","Rissbildung","Riss","unbekannt (bei Uebersetzung)" +"BAB","C","A","Rissbildung","Klaffender Riss","in Längsrichtung" +"BAB","C","B","Rissbildung","Klaffender Riss","am Rohrumfang" +"BAB","C","C","Rissbildung","Klaffender Riss","komplexe Rissbildung" +"BAB","C","D","Rissbildung","Klaffender Riss","gewundene oder spiralförmige Rissbildung" +"BAB","C","E","Rissbildung","Klaffender Riss","sternförmige Rissbildung" +"BAB","C","Y","Rissbildung","Klaffender Riss","unbekannt (bei Uebersetzung)" +"BAB","Y","A","Rissbildung","unbekannt (bei Uebersetzung)","in Längsrichtung" +"BAB","Y","B","Rissbildung","unbekannt (bei Uebersetzung)","am Rohrumfang" +"BAB","Y","C","Rissbildung","unbekannt (bei Uebersetzung)","komplexe Rissbildung" +"BAB","Y","D","Rissbildung","unbekannt (bei Uebersetzung)","gewundene oder spiralförmige Rissbildung" +"BAB","Y","E","Rissbildung","unbekannt (bei Uebersetzung)","sternförmige Rissbildung" +"BAB","Y","Y","Rissbildung","unbekannt (bei Uebersetzung)","unbekannt (bei Uebersetzung)" +"BAC","A",,"Rohrbruch/Einsturz","Bruch", +"BAC","B",,"Rohrbruch/Einsturz","Fehlen von Teilen", +"BAC","C",,"Rohrbruch/Einsturz","Einsturz", +"BAD","A","-","Defektes Mauerwerk","verschoben","wenn Charakterisierung 1 <> ""B""" +"BAD","A","A","Defektes Mauerwerk","verschoben","weitere Mauerwerksschicht sichtbar" +"BAD","A","B","Defektes Mauerwerk","verschoben","es ist nichts zu sehen" +"BAD","A","Y","Defektes Mauerwerk","verschoben","unbekannt (bei Uebersetzung)" +"BAD","B","-","Defektes Mauerwerk","fehlend","wenn Charakterisierung 1 <> ""B""" +"BAD","B","A","Defektes Mauerwerk","fehlend","weitere Mauerwerksschicht sichtbar" +"BAD","B","B","Defektes Mauerwerk","fehlend","es ist nichts zu sehen" +"BAD","B","Y","Defektes Mauerwerk","fehlend","unbekannt (bei Uebersetzung)" +"BAD","C","-","Defektes Mauerwerk","Sohle abgesackt","wenn Charakterisierung 1 <> ""B""" +"BAD","C","A","Defektes Mauerwerk","Sohle abgesackt","weitere Mauerwerksschicht sichtbar" +"BAD","C","B","Defektes Mauerwerk","Sohle abgesackt","es ist nichts zu sehen" +"BAD","C","Y","Defektes Mauerwerk","Sohle abgesackt","unbekannt (bei Uebersetzung)" +"BAD","D","-","Defektes Mauerwerk","Einsturz","wenn Charakterisierung 1 <> ""B""" +"BAD","D","A","Defektes Mauerwerk","Einsturz","weitere Mauerwerksschicht sichtbar" +"BAD","D","B","Defektes Mauerwerk","Einsturz","es ist nichts zu sehen" +"BAD","D","Y","Defektes Mauerwerk","Einsturz","unbekannt (bei Uebersetzung)" +"BAE",,,"Fehlender Mörtel",, +"BAF","-","A","Oberflächenschaden","bei Uebersetzung in Abhaengigkeit von Quantifizierung","mechanisch" +"BAF","-","B","Oberflächenschaden","bei Uebersetzung in Abhaengigkeit von Quantifizierung","chemisch — allgemein" +"BAF","-","C","Oberflächenschaden","bei Uebersetzung in Abhaengigkeit von Quantifizierung","chemisch — Beschädigung im oberen Teil des Rohres" +"BAF","-","D","Oberflächenschaden","bei Uebersetzung in Abhaengigkeit von Quantifizierung","chemisch — Beschädigung im unteren Teil des Rohres" +"BAF","-","E","Oberflächenschaden","bei Uebersetzung in Abhaengigkeit von Quantifizierung","Ursache nicht eindeutig feststellbar" +"BAF","-","Z","Oberflächenschaden","bei Uebersetzung in Abhaengigkeit von Quantifizierung","andere Ursachen" +"BAF","A","A","Oberflächenschaden","erhöhte Rauheit","mechanisch" +"BAF","A","B","Oberflächenschaden","erhöhte Rauheit","chemisch — allgemein" +"BAF","A","C","Oberflächenschaden","erhöhte Rauheit","chemisch — Beschädigung im oberen Teil des Rohres" +"BAF","A","D","Oberflächenschaden","erhöhte Rauheit","chemisch — Beschädigung im unteren Teil des Rohres" +"BAF","A","E","Oberflächenschaden","erhöhte Rauheit","Ursache nicht eindeutig feststellbar" +"BAF","A","Z","Oberflächenschaden","erhöhte Rauheit","andere Ursachen" +"BAF","B","A","Oberflächenschaden","Abplatzung","mechanisch" +"BAF","B","B","Oberflächenschaden","Abplatzung","chemisch — allgemein" +"BAF","B","C","Oberflächenschaden","Abplatzung","chemisch — Beschädigung im oberen Teil des Rohres" +"BAF","B","D","Oberflächenschaden","Abplatzung","chemisch — Beschädigung im unteren Teil des Rohres" +"BAF","B","E","Oberflächenschaden","Abplatzung","Ursache nicht eindeutig feststellbar" +"BAF","B","Z","Oberflächenschaden","Abplatzung","andere Ursachen" +"BAF","C","A","Oberflächenschaden","Zuschlagstoffe sichtbar","mechanisch" +"BAF","C","B","Oberflächenschaden","Zuschlagstoffe sichtbar","chemisch — allgemein" +"BAF","C","C","Oberflächenschaden","Zuschlagstoffe sichtbar","chemisch — Beschädigung im oberen Teil des Rohres" +"BAF","C","D","Oberflächenschaden","Zuschlagstoffe sichtbar","chemisch — Beschädigung im unteren Teil des Rohres" +"BAF","C","E","Oberflächenschaden","Zuschlagstoffe sichtbar","Ursache nicht eindeutig feststellbar" +"BAF","C","Z","Oberflächenschaden","Zuschlagstoffe sichtbar","andere Ursachen" +"BAF","D","A","Oberflächenschaden","Zuschlagstoffe einragend","mechanisch" +"BAF","D","B","Oberflächenschaden","Zuschlagstoffe einragend","chemisch — allgemein" +"BAF","D","C","Oberflächenschaden","Zuschlagstoffe einragend","chemisch — Beschädigung im oberen Teil des Rohres" +"BAF","D","D","Oberflächenschaden","Zuschlagstoffe einragend","chemisch — Beschädigung im unteren Teil des Rohres" +"BAF","D","E","Oberflächenschaden","Zuschlagstoffe einragend","Ursache nicht eindeutig feststellbar" +"BAF","D","Z","Oberflächenschaden","Zuschlagstoffe einragend","andere Ursachen" +"BAF","E","A","Oberflächenschaden","Zuschlagstoffe fehlen","mechanisch" +"BAF","E","B","Oberflächenschaden","Zuschlagstoffe fehlen","chemisch — allgemein" +"BAF","E","C","Oberflächenschaden","Zuschlagstoffe fehlen","chemisch — Beschädigung im oberen Teil des Rohres" +"BAF","E","D","Oberflächenschaden","Zuschlagstoffe fehlen","chemisch — Beschädigung im unteren Teil des Rohres" +"BAF","E","E","Oberflächenschaden","Zuschlagstoffe fehlen","Ursache nicht eindeutig feststellbar" +"BAF","E","Z","Oberflächenschaden","Zuschlagstoffe fehlen","andere Ursachen" +"BAF","F","A","Oberflächenschaden","Bewehrung sichtbar","mechanisch" +"BAF","F","B","Oberflächenschaden","Bewehrung sichtbar","chemisch — allgemein" +"BAF","F","C","Oberflächenschaden","Bewehrung sichtbar","chemisch — Beschädigung im oberen Teil des Rohres" +"BAF","F","D","Oberflächenschaden","Bewehrung sichtbar","chemisch — Beschädigung im unteren Teil des Rohres" +"BAF","F","E","Oberflächenschaden","Bewehrung sichtbar","Ursache nicht eindeutig feststellbar" +"BAF","F","Z","Oberflächenschaden","Bewehrung sichtbar","andere Ursachen" +"BAF","G","A","Oberflächenschaden","Bewehrung einragend","mechanisch" +"BAF","G","B","Oberflächenschaden","Bewehrung einragend","chemisch — allgemein" +"BAF","G","C","Oberflächenschaden","Bewehrung einragend","chemisch — Beschädigung im oberen Teil des Rohres" +"BAF","G","D","Oberflächenschaden","Bewehrung einragend","chemisch — Beschädigung im unteren Teil des Rohres" +"BAF","G","E","Oberflächenschaden","Bewehrung einragend","Ursache nicht eindeutig feststellbar" +"BAF","G","Z","Oberflächenschaden","Bewehrung einragend","andere Ursachen" +"BAF","H","A","Oberflächenschaden","Bewehrung korrodiert","mechanisch" +"BAF","H","B","Oberflächenschaden","Bewehrung korrodiert","chemisch — allgemein" +"BAF","H","C","Oberflächenschaden","Bewehrung korrodiert","chemisch — Beschädigung im oberen Teil des Rohres" +"BAF","H","D","Oberflächenschaden","Bewehrung korrodiert","chemisch — Beschädigung im unteren Teil des Rohres" +"BAF","H","E","Oberflächenschaden","Bewehrung korrodiert","Ursache nicht eindeutig feststellbar" +"BAF","H","Z","Oberflächenschaden","Bewehrung korrodiert","andere Ursachen" +"BAF","I","A","Oberflächenschaden","fehlende Wand","mechanisch" +"BAF","I","B","Oberflächenschaden","fehlende Wand","chemisch — allgemein" +"BAF","I","C","Oberflächenschaden","fehlende Wand","chemisch — Beschädigung im oberen Teil des Rohres" +"BAF","I","D","Oberflächenschaden","fehlende Wand","chemisch — Beschädigung im unteren Teil des Rohres" +"BAF","I","E","Oberflächenschaden","fehlende Wand","Ursache nicht eindeutig feststellbar" +"BAF","I","Z","Oberflächenschaden","fehlende Wand","andere Ursachen" +"BAF","J","A","Oberflächenschaden","Korrosionserscheinungen an der Oberfläche","mechanisch" +"BAF","J","B","Oberflächenschaden","Korrosionserscheinungen an der Oberfläche","chemisch — allgemein" +"BAF","J","C","Oberflächenschaden","Korrosionserscheinungen an der Oberfläche","chemisch — Beschädigung im oberen Teil des Rohres" +"BAF","J","D","Oberflächenschaden","Korrosionserscheinungen an der Oberfläche","chemisch — Beschädigung im unteren Teil des Rohres" +"BAF","J","E","Oberflächenschaden","Korrosionserscheinungen an der Oberfläche","Ursache nicht eindeutig feststellbar" +"BAF","J","Z","Oberflächenschaden","Korrosionserscheinungen an der Oberfläche","andere Ursachen" +"BAF","K","A","Oberflächenschaden","Blasen (Beulen)","mechanisch" +"BAF","K","B","Oberflächenschaden","Blasen (Beulen)","chemisch — allgemein" +"BAF","K","C","Oberflächenschaden","Blasen (Beulen)","chemisch — Beschädigung im oberen Teil des Rohres" +"BAF","K","D","Oberflächenschaden","Blasen (Beulen)","chemisch — Beschädigung im unteren Teil des Rohres" +"BAF","K","E","Oberflächenschaden","Blasen (Beulen)","Ursache nicht eindeutig feststellbar" +"BAF","K","Z","Oberflächenschaden","Blasen (Beulen)","andere Ursachen" +"BAF","Z","A","Oberflächenschaden","andere Oberflächenschäden","mechanisch" +"BAF","Z","B","Oberflächenschaden","andere Oberflächenschäden","chemisch — allgemein" +"BAF","Z","C","Oberflächenschaden","andere Oberflächenschäden","chemisch — Beschädigung im oberen Teil des Rohres" +"BAF","Z","D","Oberflächenschaden","andere Oberflächenschäden","chemisch — Beschädigung im unteren Teil des Rohres" +"BAF","Z","E","Oberflächenschaden","andere Oberflächenschäden","Ursache nicht eindeutig feststellbar" +"BAF","Z","Z","Oberflächenschaden","andere Oberflächenschäden","andere Ursachen" +"BAG",,,"Einragender Anschluss",, +"BAH","A",,"Schadhafter Anschluss","Lage des Anschlusses um das Rohr ist falsch", +"BAH","B",,"Schadhafter Anschluss","zurückliegender Anschluss", +"BAH","C",,"Schadhafter Anschluss","Anschluss unvollständig eingebunden", +"BAH","D",,"Schadhafter Anschluss","Anschluss beschädigt", +"BAH","E",,"Schadhafter Anschluss","Anschluss verstopft", +"BAH","Y",,"Schadhafter Anschluss","unbekannt (bei Uebersetzung)", +"BAH","Z",,"Schadhafter Anschluss","andere", +"BAI","A","-","Einragendes Dichtungsmaterial","Dichtring","wenn Charakterisierung 1 = ""Z""" +"BAI","A","A","Einragendes Dichtungsmaterial","Dichtring","sichtbar verschoben, jedoch nicht in die Rohrleitung hineinragend" +"BAI","A","B","Einragendes Dichtungsmaterial","Dichtring","eintragend, aber nicht gebrochen" +"BAI","A","C","Einragendes Dichtungsmaterial","Dichtring","einragend, aber nicht gebrochen" +"BAI","A","D","Einragendes Dichtungsmaterial","Dichtring","einragend und gebrochen" +"BAI","A","Y","Einragendes Dichtungsmaterial","Dichtring","unbekannt (bei Uebersetzung)" +"BAI","Z","-","Einragendes Dichtungsmaterial","andere Dichtungsart","wenn Charakterisierung 1 = ""Z""" +"BAI","Z","A","Einragendes Dichtungsmaterial","andere Dichtungsart","sichtbar verschoben, jedoch nicht in die Rohrleitung hineinragend" +"BAI","Z","B","Einragendes Dichtungsmaterial","andere Dichtungsart","eintragend, aber nicht gebrochen" +"BAI","Z","C","Einragendes Dichtungsmaterial","andere Dichtungsart","einragend, aber nicht gebrochen" +"BAI","Z","D","Einragendes Dichtungsmaterial","andere Dichtungsart","einragend und gebrochen" +"BAI","Z","Y","Einragendes Dichtungsmaterial","andere Dichtungsart","unbekannt (bei Uebersetzung)" +"BAJ","A",,"Verschobene Verbindung","in Längsrichtung", +"BAJ","B",,"Verschobene Verbindung","radial", +"BAJ","C",,"Verschobene Verbindung","im Winkel", +"BAK","A","A","Schadhafte Innenauskleidung","Innenauskleidung abgelöst","in Längsrichtung" +"BAK","A","B","Schadhafte Innenauskleidung","Innenauskleidung abgelöst","radial am Umfang" +"BAK","A","C","Schadhafte Innenauskleidung","Innenauskleidung abgelöst","komplex" +"BAK","A","D","Schadhafte Innenauskleidung","Innenauskleidung abgelöst","spiralförmig" +"BAK","B","A","Schadhafte Innenauskleidung","Innenauskleidung verfärbt","in Längsrichtung" +"BAK","B","B","Schadhafte Innenauskleidung","Innenauskleidung verfärbt","radial am Umfang" +"BAK","B","C","Schadhafte Innenauskleidung","Innenauskleidung verfärbt","komplex" +"BAK","B","D","Schadhafte Innenauskleidung","Innenauskleidung verfärbt","spiralförmig" +"BAK","C","A","Schadhafte Innenauskleidung","Endstelle der Auskleidung schadhaft","in Längsrichtung" +"BAK","C","B","Schadhafte Innenauskleidung","Endstelle der Auskleidung schadhaft","radial am Umfang" +"BAK","C","C","Schadhafte Innenauskleidung","Endstelle der Auskleidung schadhaft","komplex" +"BAK","C","D","Schadhafte Innenauskleidung","Endstelle der Auskleidung schadhaft","spiralförmig" +"BAK","D","A","Schadhafte Innenauskleidung","Falten in der Auskleidung","in Längsrichtung" +"BAK","D","B","Schadhafte Innenauskleidung","Falten in der Auskleidung","radial am Umfang" +"BAK","D","C","Schadhafte Innenauskleidung","Falten in der Auskleidung","komplex" +"BAK","D","D","Schadhafte Innenauskleidung","Falten in der Auskleidung","spiralförmig" +"BAK","E","A","Schadhafte Innenauskleidung","Blasen oder Beulen in der Auskleidung nach innen","in Längsrichtung" +"BAK","E","B","Schadhafte Innenauskleidung","Blasen oder Beulen in der Auskleidung nach innen","radial am Umfang" +"BAK","E","C","Schadhafte Innenauskleidung","Blasen oder Beulen in der Auskleidung nach innen","komplex" +"BAK","E","D","Schadhafte Innenauskleidung","Blasen oder Beulen in der Auskleidung nach innen","spiralförmig" +"BAK","F","A","Schadhafte Innenauskleidung","Beulen außen","in Längsrichtung" +"BAK","F","B","Schadhafte Innenauskleidung","Beulen außen","radial am Umfang" +"BAK","F","C","Schadhafte Innenauskleidung","Beulen außen","komplex" +"BAK","F","D","Schadhafte Innenauskleidung","Beulen außen","spiralförmig" +"BAK","G","A","Schadhafte Innenauskleidung","Ablösen der Innenhaut/Beschichtung","in Längsrichtung" +"BAK","G","B","Schadhafte Innenauskleidung","Ablösen der Innenhaut/Beschichtung","radial am Umfang" +"BAK","G","C","Schadhafte Innenauskleidung","Ablösen der Innenhaut/Beschichtung","komplex" +"BAK","G","D","Schadhafte Innenauskleidung","Ablösen der Innenhaut/Beschichtung","spiralförmig" +"BAK","H","A","Schadhafte Innenauskleidung","Ablösen der Abdeckung der Verbindungsnaht","in Längsrichtung" +"BAK","H","B","Schadhafte Innenauskleidung","Ablösen der Abdeckung der Verbindungsnaht","radial am Umfang" +"BAK","H","C","Schadhafte Innenauskleidung","Ablösen der Abdeckung der Verbindungsnaht","komplex" +"BAK","H","D","Schadhafte Innenauskleidung","Ablösen der Abdeckung der Verbindungsnaht","spiralförmig" +"BAK","I","A","Schadhafte Innenauskleidung","Riss oder Spalt","in Längsrichtung" +"BAK","I","B","Schadhafte Innenauskleidung","Riss oder Spalt","radial am Umfang" +"BAK","I","C","Schadhafte Innenauskleidung","Riss oder Spalt","komplex" +"BAK","I","D","Schadhafte Innenauskleidung","Riss oder Spalt","spiralförmig" +"BAK","J","A","Schadhafte Innenauskleidung","Loch in der Auskleidung","in Längsrichtung" +"BAK","J","B","Schadhafte Innenauskleidung","Loch in der Auskleidung","radial am Umfang" +"BAK","J","C","Schadhafte Innenauskleidung","Loch in der Auskleidung","komplex" +"BAK","J","D","Schadhafte Innenauskleidung","Loch in der Auskleidung","spiralförmig" +"BAK","K","A","Schadhafte Innenauskleidung","Auskleidungsverbindung defekt","in Längsrichtung" +"BAK","K","B","Schadhafte Innenauskleidung","Auskleidungsverbindung defekt","radial am Umfang" +"BAK","K","C","Schadhafte Innenauskleidung","Auskleidungsverbindung defekt","komplex" +"BAK","K","D","Schadhafte Innenauskleidung","Auskleidungsverbindung defekt","spiralförmig" +"BAK","L","A","Schadhafte Innenauskleidung","Auskleidungswerkstoff erscheint weich","in Längsrichtung" +"BAK","L","B","Schadhafte Innenauskleidung","Auskleidungswerkstoff erscheint weich","radial am Umfang" +"BAK","L","C","Schadhafte Innenauskleidung","Auskleidungswerkstoff erscheint weich","komplex" +"BAK","L","D","Schadhafte Innenauskleidung","Auskleidungswerkstoff erscheint weich","spiralförmig" +"BAK","M","A","Schadhafte Innenauskleidung","Harz fehlt im Laminat","in Längsrichtung" +"BAK","M","B","Schadhafte Innenauskleidung","Harz fehlt im Laminat","radial am Umfang" +"BAK","M","C","Schadhafte Innenauskleidung","Harz fehlt im Laminat","komplex" +"BAK","M","D","Schadhafte Innenauskleidung","Harz fehlt im Laminat","spiralförmig" +"BAK","N","A","Schadhafte Innenauskleidung","Ende der Auskleidung ist nicht abgedichtet, um das Rohr oder den Schacht aufzunehmen","in Längsrichtung" +"BAK","N","B","Schadhafte Innenauskleidung","Ende der Auskleidung ist nicht abgedichtet, um das Rohr oder den Schacht aufzunehmen","radial am Umfang" +"BAK","N","C","Schadhafte Innenauskleidung","Ende der Auskleidung ist nicht abgedichtet, um das Rohr oder den Schacht aufzunehmen","komplex" +"BAK","N","D","Schadhafte Innenauskleidung","Ende der Auskleidung ist nicht abgedichtet, um das Rohr oder den Schacht aufzunehmen","spiralförmig" +"BAK","Z","A","Schadhafte Innenauskleidung","anderer Auskleidungsschaden","in Längsrichtung" +"BAK","Z","B","Schadhafte Innenauskleidung","anderer Auskleidungsschaden","radial am Umfang" +"BAK","Z","C","Schadhafte Innenauskleidung","anderer Auskleidungsschaden","komplex" +"BAK","Z","D","Schadhafte Innenauskleidung","anderer Auskleidungsschaden","spiralförmig" +"BAL","A","-","Schadhafte Reparatur","Wand fehlt teilweise","""Art des Verlaufs"" nicht zutreffend" +"BAL","A","A","Schadhafte Reparatur","Wand fehlt teilweise","in Längsrichtung" +"BAL","A","B","Schadhafte Reparatur","Wand fehlt teilweise","radial am Umfang" +"BAL","A","C","Schadhafte Reparatur","Wand fehlt teilweise","komplex" +"BAL","A","D","Schadhafte Reparatur","Wand fehlt teilweise","spiralförmig" +"BAL","B","-","Schadhafte Reparatur","Reparatur zur Abdichtung eines Loches ist schadhaft","""Art des Verlaufs"" nicht zutreffend" +"BAL","B","A","Schadhafte Reparatur","Reparatur zur Abdichtung eines Loches ist schadhaft","in Längsrichtung" +"BAL","B","B","Schadhafte Reparatur","Reparatur zur Abdichtung eines Loches ist schadhaft","radial am Umfang" +"BAL","B","C","Schadhafte Reparatur","Reparatur zur Abdichtung eines Loches ist schadhaft","komplex" +"BAL","B","D","Schadhafte Reparatur","Reparatur zur Abdichtung eines Loches ist schadhaft","spiralförmig" +"BAL","C","-","Schadhafte Reparatur","Ablösen des Reparaturwerkstoffs vom Basisrohr","""Art des Verlaufs"" nicht zutreffend" +"BAL","C","A","Schadhafte Reparatur","Ablösen des Reparaturwerkstoffs vom Basisrohr","in Längsrichtung" +"BAL","C","B","Schadhafte Reparatur","Ablösen des Reparaturwerkstoffs vom Basisrohr","radial am Umfang" +"BAL","C","C","Schadhafte Reparatur","Ablösen des Reparaturwerkstoffs vom Basisrohr","komplex" +"BAL","C","D","Schadhafte Reparatur","Ablösen des Reparaturwerkstoffs vom Basisrohr","spiralförmig" +"BAL","D","-","Schadhafte Reparatur","fehlender Reparaturwerkstoff an der Kontaktfläche","""Art des Verlaufs"" nicht zutreffend" +"BAL","D","A","Schadhafte Reparatur","fehlender Reparaturwerkstoff an der Kontaktfläche","in Längsrichtung" +"BAL","D","B","Schadhafte Reparatur","fehlender Reparaturwerkstoff an der Kontaktfläche","radial am Umfang" +"BAL","D","C","Schadhafte Reparatur","fehlender Reparaturwerkstoff an der Kontaktfläche","komplex" +"BAL","D","D","Schadhafte Reparatur","fehlender Reparaturwerkstoff an der Kontaktfläche","spiralförmig" +"BAL","E","-","Schadhafte Reparatur","überschüssiger Reparaturwerkstoff, der ein Hindernis darstellt","""Art des Verlaufs"" nicht zutreffend" +"BAL","E","A","Schadhafte Reparatur","überschüssiger Reparaturwerkstoff, der ein Hindernis darstellt","in Längsrichtung" +"BAL","E","B","Schadhafte Reparatur","überschüssiger Reparaturwerkstoff, der ein Hindernis darstellt","radial am Umfang" +"BAL","E","C","Schadhafte Reparatur","überschüssiger Reparaturwerkstoff, der ein Hindernis darstellt","komplex" +"BAL","E","D","Schadhafte Reparatur","überschüssiger Reparaturwerkstoff, der ein Hindernis darstellt","spiralförmig" +"BAL","F","-","Schadhafte Reparatur","Loch im Reparaturwerkstoff","""Art des Verlaufs"" nicht zutreffend" +"BAL","F","A","Schadhafte Reparatur","Loch im Reparaturwerkstoff","in Längsrichtung" +"BAL","F","B","Schadhafte Reparatur","Loch im Reparaturwerkstoff","radial am Umfang" +"BAL","F","C","Schadhafte Reparatur","Loch im Reparaturwerkstoff","komplex" +"BAL","F","D","Schadhafte Reparatur","Loch im Reparaturwerkstoff","spiralförmig" +"BAL","G","-","Schadhafte Reparatur","Riss im Reparaturwerkstoff","""Art des Verlaufs"" nicht zutreffend" +"BAL","G","A","Schadhafte Reparatur","Riss im Reparaturwerkstoff","in Längsrichtung" +"BAL","G","B","Schadhafte Reparatur","Riss im Reparaturwerkstoff","radial am Umfang" +"BAL","G","C","Schadhafte Reparatur","Riss im Reparaturwerkstoff","komplex" +"BAL","G","D","Schadhafte Reparatur","Riss im Reparaturwerkstoff","spiralförmig" +"BAL","Y","-","Schadhafte Reparatur","unbekannt (bei Uebersetzung)","""Art des Verlaufs"" nicht zutreffend" +"BAL","Y","A","Schadhafte Reparatur","unbekannt (bei Uebersetzung)","in Längsrichtung" +"BAL","Y","B","Schadhafte Reparatur","unbekannt (bei Uebersetzung)","radial am Umfang" +"BAL","Y","C","Schadhafte Reparatur","unbekannt (bei Uebersetzung)","komplex" +"BAL","Y","D","Schadhafte Reparatur","unbekannt (bei Uebersetzung)","spiralförmig" +"BAL","Z","-","Schadhafte Reparatur","andere","""Art des Verlaufs"" nicht zutreffend" +"BAL","Z","A","Schadhafte Reparatur","andere","in Längsrichtung" +"BAL","Z","B","Schadhafte Reparatur","andere","radial am Umfang" +"BAL","Z","C","Schadhafte Reparatur","andere","komplex" +"BAL","Z","D","Schadhafte Reparatur","andere","spiralförmig" +"BAM","A",,"Schadhafte Schweißnaht","in Längsrichtung", +"BAM","B",,"Schadhafte Schweißnaht","am Umfang", +"BAM","C",,"Schadhafte Schweißnaht","spiralförmiger Verlauf", +"BAN",,,"Poröses Rohr",, +"BAO",,,"Boden sichtbar",, +"BAP",,,"Hohlraum sichtbar",, +"BBA","A",,"Wurzeln","Pfahlwurzel", +"BBA","B",,"Wurzeln","einzelne feine Wurzeln", +"BBA","C",,"Wurzeln","komplexes Wurzelwerk", +"BBA","Y",,"Wurzeln","unbekannt (bei Uebersetzung)", +"BBB","A",,"Anhaftende Stoffe","Inkrustation", +"BBB","B",,"Anhaftende Stoffe","Fett", +"BBB","C",,"Anhaftende Stoffe","Fäulnis", +"BBB","Z",,"Anhaftende Stoffe","andere", +"BBC","A",,"Ablagerungen","feines Material", +"BBC","B",,"Ablagerungen","grobes Material", +"BBC","C",,"Ablagerungen","hartes oder verdichtetes Material", +"BBC","Z",,"Ablagerungen","andere", +"BBD","A",,"Eindringen von Bodenmaterial","Sand", +"BBD","B",,"Eindringen von Bodenmaterial","Torf", +"BBD","C",,"Eindringen von Bodenmaterial","Feinmaterial", +"BBD","D",,"Eindringen von Bodenmaterial","Grobmaterial", +"BBD","Y",,"Eindringen von Bodenmaterial","unbekannt (bei Uebersetzung)", +"BBD","Z",,"Eindringen von Bodenmaterial","andere", +"BBE","A",,"Andere Hindernisse","Ziegel oder Mauerwerk liegen/liegt in der Rohrsohle", +"BBE","B",,"Andere Hindernisse","Bruchstücke einer Abwasserleitung oder eines Abwasserkanals liegen in der Rohrsohle", +"BBE","C",,"Andere Hindernisse","anderer Gegenstand liegt in der Rohrsohle", +"BBE","D",,"Andere Hindernisse","Gegenstand ragt durch die Wand ein", +"BBE","E",,"Andere Hindernisse","Gegenstand in Rohrverbindung eingekeilt", +"BBE","F",,"Andere Hindernisse","Gegenstand dringt durch einen Anschluss/Abzweig ein", +"BBE","G",,"Andere Hindernisse","fremde Leitungen oder Kabel durchqueren die Rohrleitung", +"BBE","H",,"Andere Hindernisse","Gegenstand/Objekt in den Rohrkörper eingebaut", +"BBE","Y",,"Andere Hindernisse","unbekannt (bei Uebersetzung)", +"BBE","Z",,"Andere Hindernisse","andere", +"BBF","A",,"Infiltration","Schwitzen", +"BBF","B",,"Infiltration","Tropfen", +"BBF","C",,"Infiltration","Fließen", +"BBF","D",,"Infiltration","Spritzen", +"BBG",,,"Exfiltration",, +"BBH","A","A","Ungeziefer","Ratte","in der Rohrleitung" +"BBH","A","B","Ungeziefer","Ratte","in einem Anschluss" +"BBH","A","C","Ungeziefer","Ratte","in einer offenen Rohrverbindung" +"BBH","A","Z","Ungeziefer","Ratte","andere" +"BBH","B","A","Ungeziefer","Küchenschabe/Kakerlake","in der Rohrleitung" +"BBH","B","B","Ungeziefer","Küchenschabe/Kakerlake","in einem Anschluss" +"BBH","B","C","Ungeziefer","Küchenschabe/Kakerlake","in einer offenen Rohrverbindung" +"BBH","B","Z","Ungeziefer","Küchenschabe/Kakerlake","andere" +"BBH","Z","A","Ungeziefer","andere","in der Rohrleitung" +"BBH","Z","B","Ungeziefer","andere","in einem Anschluss" +"BBH","Z","C","Ungeziefer","andere","in einer offenen Rohrverbindung" +"BBH","Z","Z","Ungeziefer","andere","andere" +"BCA","A","A","Anschluss","Abzweig","Anschluss offen" +"BCA","A","B","Anschluss","Abzweig","Anschluss geschlossen" +"BCA","B","A","Anschluss","Sattelanschluss — gebohrt","Anschluss offen" +"BCA","B","B","Anschluss","Sattelanschluss — gebohrt","Anschluss geschlossen" +"BCA","C","A","Anschluss","Sattelanschluss — gemeißelt","Anschluss offen" +"BCA","C","B","Anschluss","Sattelanschluss — gemeißelt","Anschluss geschlossen" +"BCA","D","A","Anschluss","einfacher Anschluss — gebohrt","Anschluss offen" +"BCA","D","B","Anschluss","einfacher Anschluss — gebohrt","Anschluss geschlossen" +"BCA","E","A","Anschluss","einfacher Anschluss — gemeißelt","Anschluss offen" +"BCA","E","B","Anschluss","einfacher Anschluss — gemeißelt","Anschluss geschlossen" +"BCA","F","A","Anschluss","? siehe DWA-M 152 (Nov. 2009), S. 24","Anschluss offen" +"BCA","F","B","Anschluss","? siehe DWA-M 152 (Nov. 2009), S. 24","Anschluss geschlossen" +"BCA","G","A","Anschluss","unbekannter Anschluss","Anschluss offen" +"BCA","G","B","Anschluss","unbekannter Anschluss","Anschluss geschlossen" +"BCA","Y","A","Anschluss","unbekannt (bei Uebersetzung)","Anschluss offen" +"BCA","Y","B","Anschluss","unbekannt (bei Uebersetzung)","Anschluss geschlossen" +"BCA","Z","A","Anschluss","andere","Anschluss offen" +"BCA","Z","B","Anschluss","andere","Anschluss geschlossen" +"BCB","A",,"Punktuelle Reparatur","Rohr ausgetauscht", +"BCB","B",,"Punktuelle Reparatur","örtlich begrenzte Innenauskleidung des Rohrs", +"BCB","C",,"Punktuelle Reparatur","Mörtelinjizierung", +"BCB","D",,"Punktuelle Reparatur","Injizierung mit anderem Dichtmittel", +"BCB","E",,"Punktuelle Reparatur","Loch repariert", +"BCB","F",,"Punktuelle Reparatur","örtlich begrenzte Innenauskleidung des Anschlusses", +"BCB","G",,"Punktuelle Reparatur","andere Reparatur des Anschlusses", +"BCB","Y",,"Punktuelle Reparatur","unbekannt (bei Uebersetzung)", +"BCB","Z",,"Punktuelle Reparatur","anderes grabenloses Reparaturverfahren", +"BCC","A","A","Krümmung der Leitung","nach links","nach oben" +"BCC","A","B","Krümmung der Leitung","nach links","nach unten" +"BCC","A","Y","Krümmung der Leitung","nach links","unbekannt (bei Uebersetzung)" +"BCC","B","A","Krümmung der Leitung","nach rechts","nach oben" +"BCC","B","B","Krümmung der Leitung","nach rechts","nach unten" +"BCC","B","Y","Krümmung der Leitung","nach rechts","unbekannt (bei Uebersetzung)" +"BCC","Y","A","Krümmung der Leitung","unbekannt (bei Uebersetzung)","nach oben" +"BCC","Y","B","Krümmung der Leitung","unbekannt (bei Uebersetzung)","nach unten" +"BCC","Y","Y","Krümmung der Leitung","unbekannt (bei Uebersetzung)","unbekannt (bei Uebersetzung)" +"BCD","A","P","Anfangsknoten","Schacht","fuer Uebersetzung von PA" +"BCD","B","P","Anfangsknoten","Inspektionsöffnung","fuer Uebersetzung von PA" +"BCD","C","P","Anfangsknoten","Reinigungsöffnung","fuer Uebersetzung von PA" +"BCD","D","P","Anfangsknoten","Lampenschacht","fuer Uebersetzung von PA" +"BCD","E","P","Anfangsknoten","Auslauf","fuer Uebersetzung von PA" +"BCD","F","P","Anfangsknoten","Verbindung von Kanälen ohne Schacht oder Inspektionsöffnung","fuer Uebersetzung von PA" +"BCD","X","P","Anfangsknoten","ein spezieller vom Auftraggeber definierter Typ","fuer Uebersetzung von PA" +"BCD","Z","P","Anfangsknoten","anderes Bauwerk","fuer Uebersetzung von PA" +"BCE","A","P","Endknoten","Schacht","fuer Uebersetzung von PE" +"BCE","B","P","Endknoten","Inspektionsöffnung","fuer Uebersetzung von PE" +"BCE","C","P","Endknoten","Reinigungsöffnung","fuer Uebersetzung von PE" +"BCE","D","P","Endknoten","Lampenschacht","fuer Uebersetzung von PE" +"BCE","E","P","Endknoten","Auslauf","fuer Uebersetzung von PE" +"BCE","F","P","Endknoten","Verbindung von Kanälen ohne Schacht oder Inspektionsöffnung","fuer Uebersetzung von PE" +"BCE","X","P","Endknoten","ein spezieller vom Auftraggeber definierter Typ","fuer Uebersetzung von PE" +"BCE","Z","P","Endknoten","anderes Bauwerk","fuer Uebersetzung von PE" +"BDA",,,"Allgemeines Foto",, +"BDB",,,"Allgemeine Anmerkung",, +"BDC","A","-","Inspektion abgebrochen","Hindernis","bei Uebersetzung stattdessen Anmerkung" +"BDC","A","A","Inspektion abgebrochen","Hindernis","Inspektionsziel vor Erreichen des Endknotens erreicht" +"BDC","A","B","Inspektion abgebrochen","Hindernis","Inspektion auf Anweisung des Auftraggebers abgebrochen" +"BDC","A","C","Inspektion abgebrochen","Hindernis","die Inspektion der Leitung/des Kanals ist bei Berücksichtigung einer früheren Teilinspektion abgeschlossen" +"BDC","A","D","Inspektion abgebrochen","Hindernis","die Inspektion der Leitung/des Kanals ist bei Berücksichtigung einer früheren Teilinspektion nicht abgeschlossen" +"BDC","A","E","Inspektion abgebrochen","Hindernis","es ist nicht bekannt, ob die Inspektion der gesamten Leitung bei Berücksichtigung einer früheren Teilinspektion abgeschlossen ist" +"BDC","A","Z","Inspektion abgebrochen","Hindernis","andere" +"BDC","B","-","Inspektion abgebrochen","hoher Wasserstand","bei Uebersetzung stattdessen Anmerkung" +"BDC","B","A","Inspektion abgebrochen","hoher Wasserstand","Inspektionsziel vor Erreichen des Endknotens erreicht" +"BDC","B","B","Inspektion abgebrochen","hoher Wasserstand","Inspektion auf Anweisung des Auftraggebers abgebrochen" +"BDC","B","C","Inspektion abgebrochen","hoher Wasserstand","die Inspektion der Leitung/des Kanals ist bei Berücksichtigung einer früheren Teilinspektion abgeschlossen" +"BDC","B","D","Inspektion abgebrochen","hoher Wasserstand","die Inspektion der Leitung/des Kanals ist bei Berücksichtigung einer früheren Teilinspektion nicht abgeschlossen" +"BDC","B","E","Inspektion abgebrochen","hoher Wasserstand","es ist nicht bekannt, ob die Inspektion der gesamten Leitung bei Berücksichtigung einer früheren Teilinspektion abgeschlossen ist" +"BDC","B","Z","Inspektion abgebrochen","hoher Wasserstand","andere" +"BDC","C","-","Inspektion abgebrochen","Versagen der Ausrüstung","bei Uebersetzung stattdessen Anmerkung" +"BDC","C","A","Inspektion abgebrochen","Versagen der Ausrüstung","Inspektionsziel vor Erreichen des Endknotens erreicht" +"BDC","C","B","Inspektion abgebrochen","Versagen der Ausrüstung","Inspektion auf Anweisung des Auftraggebers abgebrochen" +"BDC","C","C","Inspektion abgebrochen","Versagen der Ausrüstung","die Inspektion der Leitung/des Kanals ist bei Berücksichtigung einer früheren Teilinspektion abgeschlossen" +"BDC","C","D","Inspektion abgebrochen","Versagen der Ausrüstung","die Inspektion der Leitung/des Kanals ist bei Berücksichtigung einer früheren Teilinspektion nicht abgeschlossen" +"BDC","C","E","Inspektion abgebrochen","Versagen der Ausrüstung","es ist nicht bekannt, ob die Inspektion der gesamten Leitung bei Berücksichtigung einer früheren Teilinspektion abgeschlossen ist" +"BDC","C","Z","Inspektion abgebrochen","Versagen der Ausrüstung","andere" +"BDC","Y","-","Inspektion abgebrochen","fuer Uebersetzung von IAB","bei Uebersetzung stattdessen Anmerkung" +"BDC","Y","A","Inspektion abgebrochen","fuer Uebersetzung von IAB","Inspektionsziel vor Erreichen des Endknotens erreicht" +"BDC","Y","B","Inspektion abgebrochen","fuer Uebersetzung von IAB","Inspektion auf Anweisung des Auftraggebers abgebrochen" +"BDC","Y","C","Inspektion abgebrochen","fuer Uebersetzung von IAB","die Inspektion der Leitung/des Kanals ist bei Berücksichtigung einer früheren Teilinspektion abgeschlossen" +"BDC","Y","D","Inspektion abgebrochen","fuer Uebersetzung von IAB","die Inspektion der Leitung/des Kanals ist bei Berücksichtigung einer früheren Teilinspektion nicht abgeschlossen" +"BDC","Y","E","Inspektion abgebrochen","fuer Uebersetzung von IAB","es ist nicht bekannt, ob die Inspektion der gesamten Leitung bei Berücksichtigung einer früheren Teilinspektion abgeschlossen ist" +"BDC","Y","Z","Inspektion abgebrochen","fuer Uebersetzung von IAB","andere" +"BDC","Z","-","Inspektion abgebrochen","andere","bei Uebersetzung stattdessen Anmerkung" +"BDC","Z","A","Inspektion abgebrochen","andere","Inspektionsziel vor Erreichen des Endknotens erreicht" +"BDC","Z","B","Inspektion abgebrochen","andere","Inspektion auf Anweisung des Auftraggebers abgebrochen" +"BDC","Z","C","Inspektion abgebrochen","andere","die Inspektion der Leitung/des Kanals ist bei Berücksichtigung einer früheren Teilinspektion abgeschlossen" +"BDC","Z","D","Inspektion abgebrochen","andere","die Inspektion der Leitung/des Kanals ist bei Berücksichtigung einer früheren Teilinspektion nicht abgeschlossen" +"BDC","Z","E","Inspektion abgebrochen","andere","es ist nicht bekannt, ob die Inspektion der gesamten Leitung bei Berücksichtigung einer früheren Teilinspektion abgeschlossen ist" +"BDC","Z","Z","Inspektion abgebrochen","andere","andere" +"BDD","-",,"Wasserspiegel","unbekannt (bei Uebersetzung)", +"BDD","A",,"Wasserspiegel","klar", +"BDD","B",,"Wasserspiegel","Anwendung des Kodes nicht fortgeführt", +"BDD","C",,"Wasserspiegel","trüb", +"BDD","D",,"Wasserspiegel","gefärbt", +"BDD","E",,"Wasserspiegel","trüb und gefärbt", +"BDD","Y",,"Wasserspiegel","unbekannt (bei Uebersetzung)", +"BDE","A","A","Zufluss aus einem Anschluss","klar","falsch angeschlossen, da Schmutzwasser in Regenwasserleitung/- kanal abfließt" +"BDE","A","B","Zufluss aus einem Anschluss","klar","falsch angeschlossen, da Regenwasser in Schmutzwasserleitung/- kanal abfließt" +"BDE","A","C","Zufluss aus einem Anschluss","klar","kein Fehlanschluss erkennbar" +"BDE","A","Y","Zufluss aus einem Anschluss","klar","unbekannt (bei Uebersetzung)" +"BDE","B","A","Zufluss aus einem Anschluss","Anwendung des Kodes nicht fortgeführt","falsch angeschlossen, da Schmutzwasser in Regenwasserleitung/- kanal abfließt" +"BDE","B","B","Zufluss aus einem Anschluss","Anwendung des Kodes nicht fortgeführt","falsch angeschlossen, da Regenwasser in Schmutzwasserleitung/- kanal abfließt" +"BDE","B","C","Zufluss aus einem Anschluss","Anwendung des Kodes nicht fortgeführt","kein Fehlanschluss erkennbar" +"BDE","B","Y","Zufluss aus einem Anschluss","Anwendung des Kodes nicht fortgeführt","unbekannt (bei Uebersetzung)" +"BDE","C","A","Zufluss aus einem Anschluss","trüb","falsch angeschlossen, da Schmutzwasser in Regenwasserleitung/- kanal abfließt" +"BDE","C","B","Zufluss aus einem Anschluss","trüb","falsch angeschlossen, da Regenwasser in Schmutzwasserleitung/- kanal abfließt" +"BDE","C","C","Zufluss aus einem Anschluss","trüb","kein Fehlanschluss erkennbar" +"BDE","C","Y","Zufluss aus einem Anschluss","trüb","unbekannt (bei Uebersetzung)" +"BDE","D","A","Zufluss aus einem Anschluss","gefärbt","falsch angeschlossen, da Schmutzwasser in Regenwasserleitung/- kanal abfließt" +"BDE","D","B","Zufluss aus einem Anschluss","gefärbt","falsch angeschlossen, da Regenwasser in Schmutzwasserleitung/- kanal abfließt" +"BDE","D","C","Zufluss aus einem Anschluss","gefärbt","kein Fehlanschluss erkennbar" +"BDE","D","Y","Zufluss aus einem Anschluss","gefärbt","unbekannt (bei Uebersetzung)" +"BDE","E","A","Zufluss aus einem Anschluss","trüb und gefärbt","falsch angeschlossen, da Schmutzwasser in Regenwasserleitung/- kanal abfließt" +"BDE","E","B","Zufluss aus einem Anschluss","trüb und gefärbt","falsch angeschlossen, da Regenwasser in Schmutzwasserleitung/- kanal abfließt" +"BDE","E","C","Zufluss aus einem Anschluss","trüb und gefärbt","kein Fehlanschluss erkennbar" +"BDE","E","Y","Zufluss aus einem Anschluss","trüb und gefärbt","unbekannt (bei Uebersetzung)" +"BDE","Y","A","Zufluss aus einem Anschluss","unbekannt (bei Uebersetzung)","falsch angeschlossen, da Schmutzwasser in Regenwasserleitung/- kanal abfließt" +"BDE","Y","B","Zufluss aus einem Anschluss","unbekannt (bei Uebersetzung)","falsch angeschlossen, da Regenwasser in Schmutzwasserleitung/- kanal abfließt" +"BDE","Y","C","Zufluss aus einem Anschluss","unbekannt (bei Uebersetzung)","kein Fehlanschluss erkennbar" +"BDE","Y","Y","Zufluss aus einem Anschluss","unbekannt (bei Uebersetzung)","unbekannt (bei Uebersetzung)" +"BDE","YY","A","Zufluss aus einem Anschluss","nicht erkennbar","falsch angeschlossen, da Schmutzwasser in Regenwasserleitung/- kanal abfließt" +"BDE","YY","B","Zufluss aus einem Anschluss","nicht erkennbar","falsch angeschlossen, da Regenwasser in Schmutzwasserleitung/- kanal abfließt" +"BDE","YY","C","Zufluss aus einem Anschluss","nicht erkennbar","kein Fehlanschluss erkennbar" +"BDE","YY","Y","Zufluss aus einem Anschluss","nicht erkennbar","unbekannt (bei Uebersetzung)" +"BDF","A",,"Atmosphäre in der Leitung","Sauerstoffmangel", +"BDF","B",,"Atmosphäre in der Leitung","Schwefelwasserstoff", +"BDF","C",,"Atmosphäre in der Leitung","Methan", +"BDF","Z",,"Atmosphäre in der Leitung","andere", +"BDG","A",,"Keine Sicht","Kamera unter Wasser", +"BDG","B",,"Keine Sicht","Verschlammung", +"BDG","C",,"Keine Sicht","Dämpfe", +"BDG","Z",,"Keine Sicht","andere", From fcf534b87d4d022d5d533caba3fd2224463ea6ac Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 24 Nov 2023 15:49:07 +0100 Subject: [PATCH 082/141] Remove commented header from csv file --- inst/extdata/eucodes_full.csv | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/inst/extdata/eucodes_full.csv b/inst/extdata/eucodes_full.csv index cd98f90..b53d226 100644 --- a/inst/extdata/eucodes_full.csv +++ b/inst/extdata/eucodes_full.csv @@ -1,30 +1,3 @@ -# This file was created with the following R code: -# -# paths <- kwb.utils::resolve(list( -# validation = "~/../Downloads/S/sema-plus_lausanne/inspection-data-validation", -# transition = "/1a_Environment_mdb/SEMA_Transition_versions", -# mdb = "/SEMA_Transition_v03_2013_04_23.mdb" -# )) -# -# all_codes <- paths$mdb %>% -# kwb.db::hsGetTable("qry_EuCodesAll", use2007Driver = TRUE) %>% -# kwb.utils::renameColumns(list( -# emcID = "main_code", -# C1 = "char_1", -# C2 = "char_2", -# emcName = "main_code_name", -# C1Name = "char_1_name", -# C2Name = "char_2_name" -# )) -# -# write.csv( -# all_codes, -# "condition-assessment/config/code-info.csv", -# fileEncoding = "UTF-8", -# row.names = FALSE, -# na = "" -# ) -# "main_code","char_1","char_2","main_code_name","char_1_name","char_2_name" "AEC","A",,"Form","kreisförmig", "AEC","B",,"Form","rechteckig", From 116edac0584fe09fcfa9fa82a89458fae69cf9e8 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 24 Nov 2023 15:50:14 +0100 Subject: [PATCH 083/141] Indicate language in file name --- inst/extdata/{eucodes_full.csv => eucodes_full_de.csv} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename inst/extdata/{eucodes_full.csv => eucodes_full_de.csv} (100%) diff --git a/inst/extdata/eucodes_full.csv b/inst/extdata/eucodes_full_de.csv similarity index 100% rename from inst/extdata/eucodes_full.csv rename to inst/extdata/eucodes_full_de.csv From 976ce7323e8f9a339f41dffa6df540a5f11845f7 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 24 Nov 2023 21:05:55 +0100 Subject: [PATCH 084/141] Add arg. "snakeCase", extract readRenamings() --- R/readEuCodedFile.R | 28 ++++++++++++++++++++++++---- inst/extdata/column-names.csv | 22 ++++++++++++++++++++++ 2 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 inst/extdata/column-names.csv diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index 9d3c59b..13ba871 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -82,10 +82,30 @@ readEuCodedFile <- function( } # renameColumnsToMeaningful ---------------------------------------------------- -renameColumnsToMeaningful <- function(x) +renameColumnsToMeaningful <- function(x, snakeCase = FALSE) { - codeInfo <- kwb.utils::selectColumns(getCodes(), c("Code", "Name")) - renamings <- kwb.utils::toLookupList(data = codeInfo) - kwb.utils::renameColumns(x, renamings) + result <- kwb.utils::renameColumns(x, renamings = readRenamings( + fileName = "eucodes.csv", + columnFrom = "Code", + columnTo = "Name" + )) + + if (snakeCase) { + result <- kwb.utils::renameColumns(x, renamings = readRenamings( + fileName = "column-names.csv", + columnFrom = "name_1", + columnTo = "name_2" + )) + } + + result } +# readRenamings ---------------------------------------------------------------- +readRenamings <- function(fileName, columnFrom, columnTo) +{ + data <- readPackageFile(fileName) + data <- kwb.utils::selectColumns(data, c(columnFrom, columnTo)) + isComplete <- rowSums(nchar(as.matrix(data)) > 0L) == 2L + kwb.utils::toLookupList(data = data[isComplete, ]) +} diff --git a/inst/extdata/column-names.csv b/inst/extdata/column-names.csv new file mode 100644 index 0000000..7964612 --- /dev/null +++ b/inst/extdata/column-names.csv @@ -0,0 +1,22 @@ +type,name_1,name_2 +inspection,inspid,inspection_id +inspection,InspDate,inspection_date +inspection,InspTime,inspection_time +inspection,Node1Ref,node_1_ref +inspection,Node2Ref,node_2_ref +inspection,PipeRef,pipe_ref +inspection,SewSysName,sewer_system +inspection,Material,material +inspection,Width,width +inspection,Height,height +inspection,ExpectedLength,expected_length +observation,inspid,inspection_id +observation,LongOrVertLoc,position +observation,MainCode,main_code +observation,Char1,char_1 +observation,Char2,char_2 +observation,ContDefectCode,cont_defect +observation,Quant1,quant_1 +observation,Quant2,quant_2 +observation,Circum1,circum_1 +observation,Remarks,remarks From f066ca8c9a366f1dcc5543e82e071aaa887c8755 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 17:32:29 +0100 Subject: [PATCH 085/141] Give the pattern as string constant --- R/getHeaderInfo.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/getHeaderInfo.R b/R/getHeaderInfo.R index 7cf722f..169c265 100644 --- a/R/getHeaderInfo.R +++ b/R/getHeaderInfo.R @@ -1,10 +1,10 @@ # getHeaderInfo ---------------------------------------------------------------- getHeaderInfo <- function(euLines) { - pattern <- paste0("^#", c("A", "B", "C", "Z"), collapse = "|") - - headerIndices <- grep(pattern, euLines) - + # Indices of header lines (starting with #A, #B, #C, or #Z + headerIndices <- grep("^#[ABCZ]", euLines) + + # Corresponding header lines headerLines <- euLines[headerIndices] keyValue = strsplit(headerLines, "=") From 3b11eee76d7362b6025e2ec22ea3881e9b06419e Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 17:33:11 +0100 Subject: [PATCH 086/141] Describe more fields in "column-names.csv" --- inst/extdata/column-names.csv | 40 +++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/inst/extdata/column-names.csv b/inst/extdata/column-names.csv index 7964612..aff7a37 100644 --- a/inst/extdata/column-names.csv +++ b/inst/extdata/column-names.csv @@ -1,22 +1,48 @@ type,name_1,name_2 inspection,inspid,inspection_id +inspection,Cleaning,cleaning +inspection,DepthFinishNode,depth_finish_node +inspection,DepthStartNode,depth_start_node +inspection,Direction,direction +inspection,ExpectedLength,expected_length +inspection,Height,height +inspection,InspectorJob,inspector_job +inspection,InspectorName,inspector_name inspection,InspDate,inspection_date +inspection,InspMethod,inspection_method +inspection,InspPurpose,inspection_purpose inspection,InspTime,inspection_time +inspection,LiningMaterial,lining_material +inspection,LiningType,lining_type +inspection,Location,location +inspection,Material,material +inspection,Node1Coord,node_1_coord +inspection,Node2Coord,node_2_coord inspection,Node1Ref,node_1_ref inspection,Node2Ref,node_2_ref +inspection,PhotoFile,photo_file inspection,PipeRef,pipe_ref +inspection,Precipitation,precipitation +inspection,SewerUse,sewer_use inspection,SewSysName,sewer_system -inspection,Material,material +inspection,Shape,shape +inspection,Standard,standard +inspection,StartNodeCoord,start_node_coord +inspection,StartNodeRef,start_node_ref +inspection,Temperature,temperature +inspection,Town,town +inspection,VideoFile,video_file inspection,Width,width -inspection,Height,height -inspection,ExpectedLength,expected_length -observation,inspid,inspection_id -observation,LongOrVertLoc,position -observation,MainCode,main_code observation,Char1,char_1 observation,Char2,char_2 +observation,Circum1,circum_1 +observation,Circum2,circum_2 observation,ContDefectCode,cont_defect +observation,Joint,joint +observation,LongOrVertLoc,position +observation,MainCode,main_code observation,Quant1,quant_1 observation,Quant2,quant_2 -observation,Circum1,circum_1 +observation,PhotoRef,photo_ref observation,Remarks,remarks +observation,VideoRef,video_ref From 52752a7350a638e8bb141ee2cecd0940a135097c Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 17:33:42 +0100 Subject: [PATCH 087/141] Fix typo: "Linig" -> "Lining" --- inst/extdata/eucodes.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/extdata/eucodes.csv b/inst/extdata/eucodes.csv index 45351c4..6a972a2 100644 --- a/inst/extdata/eucodes.csv +++ b/inst/extdata/eucodes.csv @@ -60,7 +60,7 @@ C3,ACB,Height,Height,Höhe,Hauteur C3,ACC,Width,Width,Breite,Largeur C3,ACD,Material,Material,Werkstoff,Matériau C3,ACE,LiningType,Lining Type,Auskleidung,Type de revêtement -C3,ACF,LinigMaterial,Lining material,Auskleidungswerkstoff,Matériau de revêtement +C3,ACF,LiningMaterial,Lining material,Auskleidungswerkstoff,Matériau de revêtement C3,ACG,PipeUnitLength,Pipe unit length,Rohrlänge,Longueur unitaire de conduite C3,ACH,DepthStartNode,Depth at start node,Tiefe am Anfangsknoten,Profondeur au noeud de départ C3,ACI,DepthFinishNode,Depth at finish node,Tiefe am Endknoten,Profondeur au noeud d'arrivée From 1b2224836feffe04e35307822d52099a7ee88065 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 17:43:37 +0100 Subject: [PATCH 088/141] Improve names, use accessor, output "context" --- R/extractObservationData.R | 49 +++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/R/extractObservationData.R b/R/extractObservationData.R index 0d8ab31..d34076d 100644 --- a/R/extractObservationData.R +++ b/R/extractObservationData.R @@ -1,3 +1,5 @@ +# extractObservationData ------------------------------------------------------- + #' Extract Observations from EN13508.2-coded file #' #' @param euLines text lines read from EN13508.2-coded file @@ -8,22 +10,25 @@ #' in EN13508.2 and a column \code{inspno} referring to the inspection number. extractObservationData <- function(euLines, headerInfo, header.info) { - kwb.utils::checkForMissingColumns(headerInfo, c("uniqueKey", "type", "value")) + # Create accessor to data frame headerInfo + fetch <- kwb.utils::createAccessor(headerInfo) - uniqueKeys <- unique(headerInfo[["uniqueKey"]][headerInfo[["type"]] == "C"]) + keys <- unique(fetch("uniqueKey")[fetch("type") == "C"]) colClasses <- sapply( - inspectionDataFieldCodes(), kwb.utils::selectElements, "class" + X = inspectionDataFieldCodes(), + FUN = kwb.utils::selectElements, + elements = "class" ) - dataBlocks <- lapply(uniqueKeys, function(uniqueKey) { + dataBlocks <- lapply(keys, function(key) { - #uniqueKey <- uniqueKeys[1L] - blocks <- extractObservationBlocks(euLines, headerInfo, uniqueKey) + #key <- keys[1L] + blocks <- extractObservationBlocks(euLines, headerInfo, key) - rowsWithKey <- which(headerInfo[["uniqueKey"]] == uniqueKey) + rowsWithKey <- which(fetch("uniqueKey") == key) - captionLine <- headerInfo[["value"]][rowsWithKey][1L] + captionLine <- fetch("value")[rowsWithKey][1L] text <- c(captionLine, do.call(c, blocks)) @@ -40,20 +45,20 @@ extractObservationData <- function(euLines, headerInfo, header.info) header = TRUE ) - result$inspno <- rep(headerInfo$inspno[rowsWithKey], blockLengths) + result[["inspno"]] <- rep(fetch("inspno")[rowsWithKey], blockLengths) - removeEmptyRecords(result) + removeEmptyRecords(result, context = key) }) inspectionData <- kwb.utils::safeRowBindAll(dataBlocks) - inspectionData <- inspectionData[, order(names(inspectionData))] - inspectionData <- kwb.utils::orderBy(inspectionData, c("inspno", "I")) kwb.utils::moveColumnsToFront(inspectionData, "inspno") } +# extractObservationBlocks ----------------------------------------------------- + #' Extract Lines Between #C-Header and #Z End Tag #' #' @param euLines text lines read from EN13508.2-coded file @@ -93,24 +98,24 @@ extractObservationBlocks <- function(euLines, headerInfo, uniqueKey) } # removeEmptyRecords ----------------------------------------------------------- -removeEmptyRecords <- function(data) +removeEmptyRecords <- function(data, context) { - textValues <- as.matrix(kwb.utils::removeColumns(data, "inspno")) - - mode(textValues) <- "character" - - nCharacters <- kwb.utils::defaultIfNA(nchar(textValues), 0L) - - isEmpty <- rowSums(nCharacters) == 0L + # Convert data frame to a matrix of text values (excluding "inspno") + x <- as.matrix(kwb.utils::removeColumns(data, "inspno")) + mode(x) <- "character" + + # Which rows are full of empty text values? + isEmpty <- rowSums(kwb.utils::defaultIfNA(nchar(x), 0L)) == 0L if (any(isEmpty)) { message(sprintf( paste( "Removing %d empty records from observations table (inspection ", - "number(s): %s)" + "number(s): %s, context: %s)" ), sum(isEmpty), - paste(unique(data[["inspno"]][isEmpty]), collapse = ", ") + paste(unique(data[["inspno"]][isEmpty]), collapse = ", "), + context )) } From c12c9e702e6f961e4bfd928e8cb91a282629d4de Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 17:44:09 +0100 Subject: [PATCH 089/141] Fix typo --- R/readAndMergeEuCodedFiles.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/readAndMergeEuCodedFiles.R b/R/readAndMergeEuCodedFiles.R index 877268d..2bfb574 100644 --- a/R/readAndMergeEuCodedFiles.R +++ b/R/readAndMergeEuCodedFiles.R @@ -108,7 +108,7 @@ warnOnDifferingHeaders <- function(x) ))) warning( - "The file headers are differing in the folowing fields:\n\n", + "The file headers are differing in the following fields:\n\n", text, "\n\nI will use the first header.", call. = FALSE From 35a002b708c7afcc7ccb19d854119f02193bc7f9 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 17:44:55 +0100 Subject: [PATCH 090/141] By default, suppress warning on diff. headers --- R/readAndMergeEuCodedFiles.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/readAndMergeEuCodedFiles.R b/R/readAndMergeEuCodedFiles.R index 2bfb574..a913062 100644 --- a/R/readAndMergeEuCodedFiles.R +++ b/R/readAndMergeEuCodedFiles.R @@ -56,14 +56,16 @@ readAndMergeEuCodedFiles <- function( #' #' @export #' -mergeInspectionData <- function(x) +mergeInspectionData <- function(x, warn = FALSE) { if (length(x) == 1L) { return (x[[1L]]) } # Check if there are differences in the file headers - warnOnDifferingHeaders(x) + if (warn) { + warnOnDifferingHeaders(x) + } # Prepare vector of offsets to be added to the inspection number (= row number # in list element "inspections") From 13fc03542feed574767767301451a85022c817f7 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 18:56:49 +0100 Subject: [PATCH 091/141] Add argument "warn" to mergeInspectionData() --- R/readAndMergeEuCodedFiles.R | 3 ++- man/mergeInspectionData.Rd | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/R/readAndMergeEuCodedFiles.R b/R/readAndMergeEuCodedFiles.R index a913062..71b2797 100644 --- a/R/readAndMergeEuCodedFiles.R +++ b/R/readAndMergeEuCodedFiles.R @@ -50,7 +50,8 @@ readAndMergeEuCodedFiles <- function( #' #' @param x list of elements each of which represents inspection data read from #' an EN13508.2-encoded file by means of \code{\link{readEuCodedFile}}. -#' +#' @param warn logical indicating whether to warn about different header +#' information. By default, warnings are not shown. #' @return list with elements \code{header.info}, \code{inspections}, #' \code{observations}. #' diff --git a/man/mergeInspectionData.Rd b/man/mergeInspectionData.Rd index 0e49b55..badc0d6 100644 --- a/man/mergeInspectionData.Rd +++ b/man/mergeInspectionData.Rd @@ -4,11 +4,14 @@ \alias{mergeInspectionData} \title{Merge Inspection Data} \usage{ -mergeInspectionData(x) +mergeInspectionData(x, warn = FALSE) } \arguments{ \item{x}{list of elements each of which represents inspection data read from an EN13508.2-encoded file by means of \code{\link{readEuCodedFile}}.} + +\item{warn}{logical indicating whether to warn about different header +information. By default, warnings are not shown.} } \value{ list with elements \code{header.info}, \code{inspections}, From 12013d6c085ea2acd2632395fb8dbf9357e21af4 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 18:58:23 +0100 Subject: [PATCH 092/141] Add test files generated with kwb.test::create_test_files() and modified --- tests/testthat/test-function-createInspectionId.R | 8 ++++++++ tests/testthat/test-function-extractObservationBlocks.R | 8 ++++++++ tests/testthat/test-function-extractObservationData.R | 7 +++++++ tests/testthat/test-function-getCodeMeanings.R | 8 ++++++++ tests/testthat/test-function-getFileHeaderFromEuLines.R | 7 +++++++ tests/testthat/test-function-getHeaderInfo.R | 7 +++++++ tests/testthat/test-function-getInspectionHeaderInfo_v1.R | 7 +++++++ tests/testthat/test-function-getInspectionHeaderInfo_v2.R | 7 +++++++ .../test-function-getInspectionRecordsFromEuLines.R | 7 +++++++ .../test-function-getObservationRecordsFromEuLines.R | 7 +++++++ tests/testthat/test-function-get_extended_observations.R | 7 +++++++ tests/testthat/test-function-get_extreme_positions.R | 7 +++++++ tests/testthat/test-function-plotObservations.R | 7 +++++++ .../testthat/test-function-readObservationsFromCsvText.R | 7 +++++++ tests/testthat/test-function-readRenamings.R | 7 +++++++ tests/testthat/test-function-removeEmptyRecords.R | 7 +++++++ tests/testthat/test-function-remove_point_damages.R | 7 +++++++ tests/testthat/test-function-renameColumnsToMeaningful.R | 7 +++++++ tests/testthat/test-function-setFilename.R | 7 +++++++ tests/testthat/test-function-setGlobalInspectionID.R | 7 +++++++ tests/testthat/test-function-toEuFormat.R | 7 +++++++ 21 files changed, 150 insertions(+) create mode 100644 tests/testthat/test-function-createInspectionId.R create mode 100644 tests/testthat/test-function-extractObservationBlocks.R create mode 100644 tests/testthat/test-function-extractObservationData.R create mode 100644 tests/testthat/test-function-getCodeMeanings.R create mode 100644 tests/testthat/test-function-getFileHeaderFromEuLines.R create mode 100644 tests/testthat/test-function-getHeaderInfo.R create mode 100644 tests/testthat/test-function-getInspectionHeaderInfo_v1.R create mode 100644 tests/testthat/test-function-getInspectionHeaderInfo_v2.R create mode 100644 tests/testthat/test-function-getInspectionRecordsFromEuLines.R create mode 100644 tests/testthat/test-function-getObservationRecordsFromEuLines.R create mode 100644 tests/testthat/test-function-get_extended_observations.R create mode 100644 tests/testthat/test-function-get_extreme_positions.R create mode 100644 tests/testthat/test-function-plotObservations.R create mode 100644 tests/testthat/test-function-readObservationsFromCsvText.R create mode 100644 tests/testthat/test-function-readRenamings.R create mode 100644 tests/testthat/test-function-removeEmptyRecords.R create mode 100644 tests/testthat/test-function-remove_point_damages.R create mode 100644 tests/testthat/test-function-renameColumnsToMeaningful.R create mode 100644 tests/testthat/test-function-setFilename.R create mode 100644 tests/testthat/test-function-setGlobalInspectionID.R create mode 100644 tests/testthat/test-function-toEuFormat.R diff --git a/tests/testthat/test-function-createInspectionId.R b/tests/testthat/test-function-createInspectionId.R new file mode 100644 index 0000000..74888ae --- /dev/null +++ b/tests/testthat/test-function-createInspectionId.R @@ -0,0 +1,8 @@ +#library(testthat) +test_that("createInspectionId() works", { + + f <- kwb.en13508.2:::createInspectionId + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-extractObservationBlocks.R b/tests/testthat/test-function-extractObservationBlocks.R new file mode 100644 index 0000000..02b3d09 --- /dev/null +++ b/tests/testthat/test-function-extractObservationBlocks.R @@ -0,0 +1,8 @@ +#library(testthat) +test_that("extractObservationBlocks() works", { + + f <- kwb.en13508.2:::extractObservationBlocks + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-extractObservationData.R b/tests/testthat/test-function-extractObservationData.R new file mode 100644 index 0000000..f4c62d6 --- /dev/null +++ b/tests/testthat/test-function-extractObservationData.R @@ -0,0 +1,7 @@ +test_that("extractObservationData() works", { + + f <- kwb.en13508.2:::extractObservationData + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-getCodeMeanings.R b/tests/testthat/test-function-getCodeMeanings.R new file mode 100644 index 0000000..6a1862d --- /dev/null +++ b/tests/testthat/test-function-getCodeMeanings.R @@ -0,0 +1,8 @@ +test_that("getCodeMeanings() works", { + + f <- kwb.en13508.2:::getCodeMeanings + + result <- f() + + expect_identical(names(result), c("CodeTable", "Code", "CodeMeaning")) +}) diff --git a/tests/testthat/test-function-getFileHeaderFromEuLines.R b/tests/testthat/test-function-getFileHeaderFromEuLines.R new file mode 100644 index 0000000..217c84b --- /dev/null +++ b/tests/testthat/test-function-getFileHeaderFromEuLines.R @@ -0,0 +1,7 @@ +test_that("getFileHeaderFromEuLines() works", { + + f <- kwb.en13508.2:::getFileHeaderFromEuLines + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-getHeaderInfo.R b/tests/testthat/test-function-getHeaderInfo.R new file mode 100644 index 0000000..4de0601 --- /dev/null +++ b/tests/testthat/test-function-getHeaderInfo.R @@ -0,0 +1,7 @@ +test_that("getHeaderInfo() works", { + + f <- kwb.en13508.2:::getHeaderInfo + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-getInspectionHeaderInfo_v1.R b/tests/testthat/test-function-getInspectionHeaderInfo_v1.R new file mode 100644 index 0000000..8701005 --- /dev/null +++ b/tests/testthat/test-function-getInspectionHeaderInfo_v1.R @@ -0,0 +1,7 @@ +test_that("getInspectionHeaderInfo_v1() works", { + + f <- kwb.en13508.2:::getInspectionHeaderInfo_v1 + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-getInspectionHeaderInfo_v2.R b/tests/testthat/test-function-getInspectionHeaderInfo_v2.R new file mode 100644 index 0000000..7040468 --- /dev/null +++ b/tests/testthat/test-function-getInspectionHeaderInfo_v2.R @@ -0,0 +1,7 @@ +test_that("getInspectionHeaderInfo_v2() works", { + + f <- kwb.en13508.2:::getInspectionHeaderInfo_v2 + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-getInspectionRecordsFromEuLines.R b/tests/testthat/test-function-getInspectionRecordsFromEuLines.R new file mode 100644 index 0000000..3879e94 --- /dev/null +++ b/tests/testthat/test-function-getInspectionRecordsFromEuLines.R @@ -0,0 +1,7 @@ +test_that("getInspectionRecordsFromEuLines() works", { + + f <- kwb.en13508.2:::getInspectionRecordsFromEuLines + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-getObservationRecordsFromEuLines.R b/tests/testthat/test-function-getObservationRecordsFromEuLines.R new file mode 100644 index 0000000..1933f12 --- /dev/null +++ b/tests/testthat/test-function-getObservationRecordsFromEuLines.R @@ -0,0 +1,7 @@ +test_that("getObservationRecordsFromEuLines() works", { + + f <- kwb.en13508.2:::getObservationRecordsFromEuLines + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-get_extended_observations.R b/tests/testthat/test-function-get_extended_observations.R new file mode 100644 index 0000000..0d874d5 --- /dev/null +++ b/tests/testthat/test-function-get_extended_observations.R @@ -0,0 +1,7 @@ +test_that("get_extended_observations() works", { + + f <- kwb.en13508.2:::get_extended_observations + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-get_extreme_positions.R b/tests/testthat/test-function-get_extreme_positions.R new file mode 100644 index 0000000..3c8b523 --- /dev/null +++ b/tests/testthat/test-function-get_extreme_positions.R @@ -0,0 +1,7 @@ +test_that("get_extreme_positions() works", { + + f <- kwb.en13508.2:::get_extreme_positions + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-plotObservations.R b/tests/testthat/test-function-plotObservations.R new file mode 100644 index 0000000..45d887d --- /dev/null +++ b/tests/testthat/test-function-plotObservations.R @@ -0,0 +1,7 @@ +test_that("plotObservations() works", { + + f <- kwb.en13508.2::plotObservations + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-readObservationsFromCsvText.R b/tests/testthat/test-function-readObservationsFromCsvText.R new file mode 100644 index 0000000..9b2477a --- /dev/null +++ b/tests/testthat/test-function-readObservationsFromCsvText.R @@ -0,0 +1,7 @@ +test_that("readObservationsFromCsvText() works", { + + f <- kwb.en13508.2:::readObservationsFromCsvText + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-readRenamings.R b/tests/testthat/test-function-readRenamings.R new file mode 100644 index 0000000..b6c5301 --- /dev/null +++ b/tests/testthat/test-function-readRenamings.R @@ -0,0 +1,7 @@ +test_that("readRenamings() works", { + + f <- kwb.en13508.2:::readRenamings + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-removeEmptyRecords.R b/tests/testthat/test-function-removeEmptyRecords.R new file mode 100644 index 0000000..72f6427 --- /dev/null +++ b/tests/testthat/test-function-removeEmptyRecords.R @@ -0,0 +1,7 @@ +test_that("removeEmptyRecords() works", { + + f <- kwb.en13508.2:::removeEmptyRecords + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-remove_point_damages.R b/tests/testthat/test-function-remove_point_damages.R new file mode 100644 index 0000000..90465eb --- /dev/null +++ b/tests/testthat/test-function-remove_point_damages.R @@ -0,0 +1,7 @@ +test_that("remove_point_damages() works", { + + f <- kwb.en13508.2:::remove_point_damages + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-renameColumnsToMeaningful.R b/tests/testthat/test-function-renameColumnsToMeaningful.R new file mode 100644 index 0000000..2e1ade7 --- /dev/null +++ b/tests/testthat/test-function-renameColumnsToMeaningful.R @@ -0,0 +1,7 @@ +test_that("renameColumnsToMeaningful() works", { + + f <- kwb.en13508.2:::renameColumnsToMeaningful + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-setFilename.R b/tests/testthat/test-function-setFilename.R new file mode 100644 index 0000000..8854951 --- /dev/null +++ b/tests/testthat/test-function-setFilename.R @@ -0,0 +1,7 @@ +test_that("setFilename() works", { + + f <- kwb.en13508.2:::setFilename + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-setGlobalInspectionID.R b/tests/testthat/test-function-setGlobalInspectionID.R new file mode 100644 index 0000000..5b40ac8 --- /dev/null +++ b/tests/testthat/test-function-setGlobalInspectionID.R @@ -0,0 +1,7 @@ +test_that("setGlobalInspectionID() works", { + + f <- kwb.en13508.2:::setGlobalInspectionID + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-toEuFormat.R b/tests/testthat/test-function-toEuFormat.R new file mode 100644 index 0000000..baee2ae --- /dev/null +++ b/tests/testthat/test-function-toEuFormat.R @@ -0,0 +1,7 @@ +test_that("toEuFormat() works", { + + f <- kwb.en13508.2:::toEuFormat + + expect_error(f()) + +}) From bc2cfc7dc3acdfe39afe8babba2d594d0bae80ca Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 19:33:53 +0100 Subject: [PATCH 093/141] Update DESCRIPTION, testthat.R --- DESCRIPTION | 3 ++- tests/testthat.R | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ce6029a..f56aa49 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,9 +19,10 @@ Imports: digest, kwb.utils Suggests: - testthat, + testthat (>= 3.0.0), ggplot2 Remotes: github::kwb-r/kwb.utils Encoding: UTF-8 RoxygenNote: 7.2.3 +Config/testthat/edition: 3 diff --git a/tests/testthat.R b/tests/testthat.R index 5243687..8b288ba 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,3 +1,11 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html + library(testthat) library(kwb.en13508.2) From 21c3254b985e1ca1a113a9162ec7cf60ace5d45f Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 19:36:06 +0100 Subject: [PATCH 094/141] Add argument "snake.case" to readEuCodedFile() --- R/readEuCodedFile.R | 47 ++++++++++++++++++++++++------------------ man/readEuCodedFile.Rd | 6 +++++- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index 13ba871..d314c2a 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -21,7 +21,8 @@ #' @param warn if \code{TRUE}, warnings are shown (e.g. if not all #A-header #' fields were found) #' @param dbg if \code{TRUE}, debug messages are shown, else not -#' +#' @param snake.case logical indicating whether or not to provide names in +#' snake_case (in contrast to CamelCase) if \code{meaningful.names = TRUE}. #' @return list with elements \code{header.info}, \code{inspections}, #' \code{observations} #' @importFrom kwb.utils catAndRun catIf isTryError .logstart .logok @@ -34,7 +35,8 @@ readEuCodedFile <- function( meaningful.names = FALSE, simple.algorithm = TRUE, warn = TRUE, - dbg = TRUE + dbg = TRUE, + snake.case = FALSE ) { #kwb.utils::assignArgumentDefaults(kwb.en13508.2::readEuCodedFile) @@ -44,7 +46,7 @@ readEuCodedFile <- function( eu_lines <- run( paste("Reading input file", input.file), - readLines(input.file, encoding = encoding) + readLines(input.file, encoding = encoding, warn = FALSE) ) eu_lines <- run( @@ -66,12 +68,17 @@ readEuCodedFile <- function( observations <- run( "Extracting observation records", - getObservationRecordsFromEuLines(eu_lines, header.info, dbg) + getObservationRecordsFromEuLines( + eu_lines = eu_lines, + header.info = header.info, + dbg = dbg, + file = input.file + ) ) if (meaningful.names) { - inspections <- renameColumnsToMeaningful(inspections) - observations <- renameColumnsToMeaningful(observations) + inspections <- renameColumnsToMeaningful(inspections, snake.case) + observations <- renameColumnsToMeaningful(observations, snake.case) } list( @@ -82,19 +89,19 @@ readEuCodedFile <- function( } # renameColumnsToMeaningful ---------------------------------------------------- -renameColumnsToMeaningful <- function(x, snakeCase = FALSE) +renameColumnsToMeaningful <- function(x, snake.case = FALSE) { result <- kwb.utils::renameColumns(x, renamings = readRenamings( - fileName = "eucodes.csv", - columnFrom = "Code", - columnTo = "Name" + file.name = "eucodes.csv", + column.from = "Code", + column.to = "Name" )) - if (snakeCase) { - result <- kwb.utils::renameColumns(x, renamings = readRenamings( - fileName = "column-names.csv", - columnFrom = "name_1", - columnTo = "name_2" + if (snake.case) { + result <- kwb.utils::renameColumns(result, renamings = readRenamings( + file.name = "column-names.csv", + column.from = "name_1", + column.to = "name_2" )) } @@ -102,10 +109,10 @@ renameColumnsToMeaningful <- function(x, snakeCase = FALSE) } # readRenamings ---------------------------------------------------------------- -readRenamings <- function(fileName, columnFrom, columnTo) +readRenamings <- function(file.name, column.from, column.to) { - data <- readPackageFile(fileName) - data <- kwb.utils::selectColumns(data, c(columnFrom, columnTo)) - isComplete <- rowSums(nchar(as.matrix(data)) > 0L) == 2L - kwb.utils::toLookupList(data = data[isComplete, ]) + data <- readPackageFile(file.name) + data <- kwb.utils::selectColumns(data, c(column.from, column.to)) + is.complete <- rowSums(nchar(as.matrix(data)) > 0L) == 2L + kwb.utils::toLookupList(data = data[is.complete, ]) } diff --git a/man/readEuCodedFile.Rd b/man/readEuCodedFile.Rd index e46ef67..8af1308 100644 --- a/man/readEuCodedFile.Rd +++ b/man/readEuCodedFile.Rd @@ -11,7 +11,8 @@ readEuCodedFile( meaningful.names = FALSE, simple.algorithm = TRUE, warn = TRUE, - dbg = TRUE + dbg = TRUE, + snake.case = FALSE ) } \arguments{ @@ -38,6 +39,9 @@ differing #B-header rows is used.} fields were found)} \item{dbg}{if \code{TRUE}, debug messages are shown, else not} + +\item{snake.case}{logical indicating whether or not to provide names in +snake_case (in contrast to CamelCase) if \code{meaningful.names = TRUE}.} } \value{ list with elements \code{header.info}, \code{inspections}, From 0d6772f6e7d4fdd8dad3e437c78014fe24d77b3c Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 19:37:50 +0100 Subject: [PATCH 095/141] Add argument "file" to extractObservationData() and to getObservationRecordsFromEuLines() --- R/extractObservationData.R | 28 ++++++++++++++++++++-------- R/getObservationRecordsFromEuLines.R | 14 ++++++++++++-- man/extractObservationData.Rd | 4 +++- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/R/extractObservationData.R b/R/extractObservationData.R index d34076d..c843c7e 100644 --- a/R/extractObservationData.R +++ b/R/extractObservationData.R @@ -6,9 +6,15 @@ #' @param headerInfo data frame with information about header lines #' @param header.info list as returned by #' \code{kwb.en13508.2:::getFileHeaderFromEuLines} +#' @param file optional. Name of the file from which \code{euLines} were read. #' @return data frame with columns \code{A}, \code{B}, \code{C}, ... as defined #' in EN13508.2 and a column \code{inspno} referring to the inspection number. -extractObservationData <- function(euLines, headerInfo, header.info) +extractObservationData <- function( + euLines, + headerInfo, + header.info, + file = "" +) { # Create accessor to data frame headerInfo fetch <- kwb.utils::createAccessor(headerInfo) @@ -47,7 +53,7 @@ extractObservationData <- function(euLines, headerInfo, header.info) result[["inspno"]] <- rep(fetch("inspno")[rowsWithKey], blockLengths) - removeEmptyRecords(result, context = key) + removeEmptyRecords(result, file = file) }) inspectionData <- kwb.utils::safeRowBindAll(dataBlocks) @@ -98,7 +104,7 @@ extractObservationBlocks <- function(euLines, headerInfo, uniqueKey) } # removeEmptyRecords ----------------------------------------------------------- -removeEmptyRecords <- function(data, context) +removeEmptyRecords <- function(data, file) { # Convert data frame to a matrix of text values (excluding "inspno") x <- as.matrix(kwb.utils::removeColumns(data, "inspno")) @@ -108,14 +114,20 @@ removeEmptyRecords <- function(data, context) isEmpty <- rowSums(kwb.utils::defaultIfNA(nchar(x), 0L)) == 0L if (any(isEmpty)) { + n <- sum(isEmpty) message(sprintf( - paste( - "Removing %d empty records from observations table (inspection ", - "number(s): %s, context: %s)" + paste0( + "Removing %d empty %s from observations table\n", + " inspection %s: %s\n", + " file: \"%s\"\n", + " folder: \"%s\"" ), - sum(isEmpty), + n, + ifelse(n > 1L, "records", "record"), + ifelse(n > 1L, "numbers", "number"), paste(unique(data[["inspno"]][isEmpty]), collapse = ", "), - context + basename(file), + dirname(file) )) } diff --git a/R/getObservationRecordsFromEuLines.R b/R/getObservationRecordsFromEuLines.R index 22dab09..450bdf5 100644 --- a/R/getObservationRecordsFromEuLines.R +++ b/R/getObservationRecordsFromEuLines.R @@ -1,5 +1,10 @@ # getObservationRecordsFromEuLines --------------------------------------------- -getObservationRecordsFromEuLines <- function(eu_lines, header.info, dbg) +getObservationRecordsFromEuLines <- function( + eu_lines, + header.info, + dbg, + file = "" +) { observations <- try( getObservationsFromEuLines(eu_lines, header.info, dbg = dbg), @@ -8,7 +13,12 @@ getObservationRecordsFromEuLines <- function(eu_lines, header.info, dbg) if (kwb.utils::isTryError(observations)) { headerInfo <- getHeaderInfo(eu_lines) - observations <- extractObservationData(eu_lines, headerInfo, header.info) + observations <- extractObservationData( + eu_lines, + headerInfo, + header.info, + file = file + ) } kwb.utils::catIf(dbg, paste(nrow(observations), "observations extracted. ")) diff --git a/man/extractObservationData.Rd b/man/extractObservationData.Rd index 4c8a8cf..28c7db7 100644 --- a/man/extractObservationData.Rd +++ b/man/extractObservationData.Rd @@ -4,7 +4,7 @@ \alias{extractObservationData} \title{Extract Observations from EN13508.2-coded file} \usage{ -extractObservationData(euLines, headerInfo, header.info) +extractObservationData(euLines, headerInfo, header.info, file = "") } \arguments{ \item{euLines}{text lines read from EN13508.2-coded file} @@ -13,6 +13,8 @@ extractObservationData(euLines, headerInfo, header.info) \item{header.info}{list as returned by \code{kwb.en13508.2:::getFileHeaderFromEuLines}} + +\item{file}{optional. Name of the file from which \code{euLines} were read.} } \value{ data frame with columns \code{A}, \code{B}, \code{C}, ... as defined From b7f0288c2ebbd9b688241eb1746be1ad64c726ba Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 19:40:46 +0100 Subject: [PATCH 096/141] Expect snake_case names when creating unique id and change the default time to "22:33" to make it more clear that the time is fictional. --- R/createInspectionId.R | 8 +++++++- R/setGlobalInspectionID.R | 30 ++++++++++++++++-------------- man/setGlobalInspectionID.Rd | 4 ++-- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/R/createInspectionId.R b/R/createInspectionId.R index 4de0f84..f1f6d4a 100644 --- a/R/createInspectionId.R +++ b/R/createInspectionId.R @@ -1,7 +1,13 @@ # createInspectionId ----------------------------------------------------------- createInspectionId <- function( inspections, - id.columns = c("project", "InspDate", "InspTime", "Node1Ref", "Node2Ref"), + id.columns = c( + "project", + "inspection_date", + "inspection_time", + "node_1_ref", + "node_2_ref" + ), n.chars = 8L ) { diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index c828dac..08977a6 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -8,7 +8,7 @@ #' @param project name of project to which the data are related, such as: #' "Lausanne" #' @param default.time default time string to use if column InspTime is not -#' available. Default: "12:00". A random number will be generated for the +#' available. Default: "22:33". A random number will be generated for the #' seconds, just to increase the chance that setting the time is enough to #' generate a unique key. #' @return list with the same elements as in \code{inspection.data} but with @@ -17,7 +17,7 @@ setGlobalInspectionID <- function( inspection.data, project = NULL, - default.time = "12:00" + default.time = "22:33" ) { if (is.null(project)) { @@ -37,21 +37,19 @@ setGlobalInspectionID <- function( inspections[["project"]] <- project - # The following function requires the column "InspTime". If this column does - # not exist, create it with a default value - timeColumn <- "InspTime" + # The following function requires the column "inspection_time". If this + # column does not exist, create it with a default value + timeColumn <- "inspection_time" inspections <- kwb.utils::hsAddMissingCols(inspections, timeColumn) hasNoTime <- is.na(inspections[[timeColumn]]) if (any(hasNoTime)) { - n_missing <- sum(hasNoTime) - message( - "There are ", n_missing, " missing inspection times. I will set missing ", - "inspection times to '", default.time, "' (plus random seconds). You ", - "may change this time value by setting the argument 'default.time'." + "Setting ", n_missing, " missing inspection times to '", default.time, + "' (plus random seconds). You may change this time value by setting the ", + "argument 'default.time'." ) # We have to fix the random number generator otherwise the times are not @@ -66,15 +64,19 @@ setGlobalInspectionID <- function( ) } - # Create the inspection IDs and store them in column "inspid" - inspections[["inspid"]] <- createInspectionId(inspections) + # Create the inspection IDs and store them in column "inspection_id" + inspections[["inspection_id"]] <- createInspectionId(inspections) i <- kwb.utils::selectColumns(observations, "inspno") - observations[["inspid"]] <- kwb.utils::selectColumns(inspections, "inspid")[i] + + observations[["inspection_id"]] <- kwb.utils::selectColumns( + inspections, "inspection_id" + )[i] + observations <- kwb.utils::removeColumns(observations, "inspno") # Just a shortcut - inspidFirst <- function(df) kwb.utils::moveColumnsToFront(df, "inspid") + inspidFirst <- function(df) kwb.utils::moveColumnsToFront(df, "inspection_id") list( header.info = fetch("header.info"), diff --git a/man/setGlobalInspectionID.Rd b/man/setGlobalInspectionID.Rd index 5e7bde6..cfa0d83 100644 --- a/man/setGlobalInspectionID.Rd +++ b/man/setGlobalInspectionID.Rd @@ -4,7 +4,7 @@ \alias{setGlobalInspectionID} \title{Set Global Inspection ID} \usage{ -setGlobalInspectionID(inspection.data, project = NULL, default.time = "12:00") +setGlobalInspectionID(inspection.data, project = NULL, default.time = "22:33") } \arguments{ \item{inspection.data}{list with elements \code{header.info}, @@ -14,7 +14,7 @@ setGlobalInspectionID(inspection.data, project = NULL, default.time = "12:00") "Lausanne"} \item{default.time}{default time string to use if column InspTime is not -available. Default: "12:00". A random number will be generated for the +available. Default: "22:33". A random number will be generated for the seconds, just to increase the chance that setting the time is enough to generate a unique key.} } From 97dc24d21a2087ae6453c134ea59894ff43eb740 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 20:53:52 +0100 Subject: [PATCH 097/141] Remove some empty lines --- R/importObservations.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/R/importObservations.R b/R/importObservations.R index c398f51..d422906 100644 --- a/R/importObservations.R +++ b/R/importObservations.R @@ -1,5 +1,4 @@ # getObservationsFromEuLines --------------------------------------------------- - getObservationsFromEuLines <- function( eu_lines, header.info, old.version = FALSE, dbg = TRUE ) @@ -78,7 +77,6 @@ getObservationsFromEuLines <- function( } # get_observations ------------------------------------------------------------- - get_observations <- function(caption_line, c_body, header_info) { # Select and rename elements from "header_info" into list "arguments" @@ -139,7 +137,6 @@ get_observations <- function(caption_line, c_body, header_info) } # readObservationsFromCsvText -------------------------------------------------- - readObservationsFromCsvText <- function(text, sep, dec, quote, colClasses, ...) { # If colClasses is specified, reduce it to the columns that actually occur @@ -171,7 +168,6 @@ readObservationsFromCsvText <- function(text, sep, dec, quote, colClasses, ...) } # getInspectionNumbers --------------------------------------------------------- - getInspectionNumbers <- function(indices.C, indices.B01, indices.B, indices.Z) { # To find the number of the inspection corresponding to the observation block @@ -210,13 +206,11 @@ getInspectionNumbers <- function(indices.C, indices.B01, indices.B, indices.Z) } # getInspectionNumbers.old ----------------------------------------------------- - getInspectionNumbers.old <- function( indices.C, indices.Z, numberOfInspections, maxline ) { block.begs <- indices.C + 1 - block.ends <- indices.Z - 1 missingBlockEnds <- numberOfInspections - length(block.ends) From d1f85b139bc4bbc93c2eae5cca1e030c52273d88 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 21:00:33 +0100 Subject: [PATCH 098/141] Move getvalueFromKeyValueString() to utils.R --- R/getInspectionRecords_v1.R | 6 ------ R/utils.R | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/getInspectionRecords_v1.R b/R/getInspectionRecords_v1.R index b5f0d3f..8e2e34c 100644 --- a/R/getInspectionRecords_v1.R +++ b/R/getInspectionRecords_v1.R @@ -53,12 +53,6 @@ getInspectionRecords_v1 <- function(eu_lines, header.info, dbg = TRUE) inspections.complete } -# getValueFromKeyValueString --------------------------------------------------- -getValueFromKeyValueString <- function(keyvalue) -{ - sapply(strsplit(keyvalue, "="), "[", 2L) -} - # extractInspectionData -------------------------------------------------------- extractInspectionData <- function(b.lines, header.info, captions) { diff --git a/R/utils.R b/R/utils.R index d1979b0..c950b14 100644 --- a/R/utils.R +++ b/R/utils.R @@ -62,6 +62,12 @@ get_columns <- kwb.utils::selectColumns # get_elements ----------------------------------------------------------------- get_elements <- kwb.utils::selectElements +# getValueFromKeyValueString --------------------------------------------------- +getValueFromKeyValueString <- function(keyvalue) +{ + sapply(strsplit(keyvalue, "="), "[", 2L) +} + # quoteTextIfNeeded ------------------------------------------------------------ quoteTextIfNeeded <- function(x, sep, qchar, qmethod) { From 8e2d239124cb33c0be3e476166aa0ad188388251 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 21:05:30 +0100 Subject: [PATCH 099/141] Specify quantification 1 as numeric and not as character. Let's see whether there are cases where a non-numeric quantification is given... --- inst/extdata/eucodes_de.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/extdata/eucodes_de.csv b/inst/extdata/eucodes_de.csv index c9b0f88..6dddb80 100644 --- a/inst/extdata/eucodes_de.csv +++ b/inst/extdata/eucodes_de.csv @@ -2,7 +2,7 @@ Code,Name,class,meaning A,Code,character,Hauptkode B,char1,character,Charakterisierung 1 C,char2,character,Charakterisierung 2 -D,Dimension1,character,Quantifizierung 1 +D,Dimension1,numeric,Quantifizierung 1 E,Dimension2,numeric,Quantifizierung 2 F,Remarks,character,Anmerkungen G,ClockRefAtFrom,integer,Lage am Umfang 1 From 1c87aab21081606ed3d2d807a6c6fba2417a3936 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 21:39:20 +0100 Subject: [PATCH 100/141] Test for camel/snake case column names --- .../test-function-readAndMergeEuCodedFiles.R | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-function-readAndMergeEuCodedFiles.R b/tests/testthat/test-function-readAndMergeEuCodedFiles.R index 52a05f0..c55e7c5 100644 --- a/tests/testthat/test-function-readAndMergeEuCodedFiles.R +++ b/tests/testthat/test-function-readAndMergeEuCodedFiles.R @@ -1,10 +1,34 @@ +#library(testthat) test_that("readAndMergeEuCodedFiles() works", { f <- kwb.en13508.2:::readAndMergeEuCodedFiles expect_error(f()) - file <- getExampleFile() + file <- kwb.en13508.2:::getExampleFile() + files <- c(file, file) + + result_camel <- f(files, meaningful.names = TRUE) + result_snake <- f(files, meaningful.names = TRUE, snake.case = TRUE) - result <- f(c(file, file)) + elements_ok <- function(x) expect_identical(names(x), c( + "header.info", "inspections", "observations" + )) + + elements_ok(result_camel) + elements_ok(result_snake) + + expect_true(all( + c("MainCode", "Char1", "Char2") %in% names(result_camel$observations) + )) + + expect_true(all( + c("main_code", "char_1", "char_2") %in% names(result_snake$observations) + )) + + expect_true(!anyNA(result_camel$observations$Char1)) + expect_true(!anyNA(result_camel$observations$Char2)) + + expect_true(!anyNA(result_snake$observations$char_1)) + expect_true(!anyNA(result_snake$observations$char_2)) }) From e20aaa07667e9dae2d9493001a056ce29916c806 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 21:43:26 +0100 Subject: [PATCH 101/141] Rename file according to contained function --- R/{importObservations.R => getObservationsFromEuLines.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{importObservations.R => getObservationsFromEuLines.R} (100%) diff --git a/R/importObservations.R b/R/getObservationsFromEuLines.R similarity index 100% rename from R/importObservations.R rename to R/getObservationsFromEuLines.R From 2396d3adb0e3021a03be6606ce129a2287fcb0d2 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 21:55:21 +0100 Subject: [PATCH 102/141] Replace NA with "" in character columns --- R/readAndMergeEuCodedFiles.R | 14 +++++++++++--- R/utils.R | 13 +++++++++++++ ...nction-replaceNaWithEmptyStringInCharColumns.R | 15 +++++++++++++++ 3 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 tests/testthat/test-function-replaceNaWithEmptyStringInCharColumns.R diff --git a/R/readAndMergeEuCodedFiles.R b/R/readAndMergeEuCodedFiles.R index 71b2797..3888f6f 100644 --- a/R/readAndMergeEuCodedFiles.R +++ b/R/readAndMergeEuCodedFiles.R @@ -86,10 +86,18 @@ mergeInspectionData <- function(x, warn = FALSE) obs }) + header.info <- get_elements(x[[1L]], "header.info") + inspections <- kwb.utils::safeRowBindOfListElements(x, "inspections") + observations <- kwb.utils::safeRowBindAll(observations) + + # Replace NA with "" in columns of type character + inspections <- replaceNaWithEmptyStringInCharColumns(inspections) + observations <- replaceNaWithEmptyStringInCharColumns(observations) + list( - header.info = get_elements(x[[1L]], "header.info"), - inspections = kwb.utils::safeRowBindOfListElements(x, "inspections"), - observations = kwb.utils::safeRowBindAll(observations) + header.info = header.info, + inspections = inspections, + observations = observations ) } diff --git a/R/utils.R b/R/utils.R index c950b14..4c52181 100644 --- a/R/utils.R +++ b/R/utils.R @@ -119,6 +119,19 @@ removeEmptyLines <- function(x, dbg = TRUE) ) } +# replaceNaWithEmptyStringInCharColumns ---------------------------------------- +replaceNaWithEmptyStringInCharColumns <- function(x) +{ + is_char <- sapply(x, is.character) + + x[is_char] <- lapply(x[is_char], function(y) { + y[is.na(y)] <- "" + y + }) + + x +} + # valuesToCsv ------------------------------------------------------------------ #' Values to CSV diff --git a/tests/testthat/test-function-replaceNaWithEmptyStringInCharColumns.R b/tests/testthat/test-function-replaceNaWithEmptyStringInCharColumns.R new file mode 100644 index 0000000..85275de --- /dev/null +++ b/tests/testthat/test-function-replaceNaWithEmptyStringInCharColumns.R @@ -0,0 +1,15 @@ +#library(testthat) +test_that("replaceNaWithEmptyStringInCharColumns() works", { + + f <- kwb.en13508.2:::replaceNaWithEmptyStringInCharColumns + + expect_error(f()) + + x <- data.frame( + a = 1:2, + b = c("x", NA) + ) + + expect_identical(f(x)[["b"]], c("x", "")) + +}) From 672e2984a88b001433226ae2ded3f6a7b216119a Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 21:58:08 +0100 Subject: [PATCH 103/141] Add two more tests --- tests/testthat/test-function-getExampleData.R | 11 +++++++++++ tests/testthat/test-function-getExampleFile.R | 7 +++++++ 2 files changed, 18 insertions(+) create mode 100644 tests/testthat/test-function-getExampleData.R create mode 100644 tests/testthat/test-function-getExampleFile.R diff --git a/tests/testthat/test-function-getExampleData.R b/tests/testthat/test-function-getExampleData.R new file mode 100644 index 0000000..590bbdb --- /dev/null +++ b/tests/testthat/test-function-getExampleData.R @@ -0,0 +1,11 @@ +test_that("getExampleData() works", { + + f <- kwb.en13508.2:::getExampleData + + result <- f() + + expect_identical( + names(result), + c("header.info", "inspections", "observations") + ) +}) diff --git a/tests/testthat/test-function-getExampleFile.R b/tests/testthat/test-function-getExampleFile.R new file mode 100644 index 0000000..2a4570e --- /dev/null +++ b/tests/testthat/test-function-getExampleFile.R @@ -0,0 +1,7 @@ +test_that("getExampleFile() works", { + + f <- kwb.en13508.2:::getExampleFile + + expect_true(file.exists(f())) + +}) From ea0f154755422342db025d331d7d2130d26952d2 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 22:11:51 +0100 Subject: [PATCH 104/141] Simplify name --- R/setGlobalInspectionID.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index 08977a6..586c8ad 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -76,11 +76,11 @@ setGlobalInspectionID <- function( observations <- kwb.utils::removeColumns(observations, "inspno") # Just a shortcut - inspidFirst <- function(df) kwb.utils::moveColumnsToFront(df, "inspection_id") + idFirst <- function(df) kwb.utils::moveColumnsToFront(df, "inspection_id") list( header.info = fetch("header.info"), - inspections = inspidFirst(inspections), - observations = inspidFirst(observations) + inspections = idFirst(inspections), + observations = idFirst(observations) ) } From ede8f9df9272bc6c279a15faf8feef70770097a1 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 28 Nov 2023 22:12:19 +0100 Subject: [PATCH 105/141] Check not only for NA but also for empty string --- R/setGlobalInspectionID.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index 586c8ad..da88349 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -42,7 +42,7 @@ setGlobalInspectionID <- function( timeColumn <- "inspection_time" inspections <- kwb.utils::hsAddMissingCols(inspections, timeColumn) - hasNoTime <- is.na(inspections[[timeColumn]]) + hasNoTime <- kwb.utils::isNaOrEmpty(inspections[[timeColumn]]) if (any(hasNoTime)) { n_missing <- sum(hasNoTime) From 35b4d66a8fe28550aee70c89df92decc465f5a35 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 30 Nov 2023 08:31:20 +0100 Subject: [PATCH 106/141] Safely select columns with get_columns() --- R/extractObservationData.R | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/R/extractObservationData.R b/R/extractObservationData.R index c843c7e..d1640d1 100644 --- a/R/extractObservationData.R +++ b/R/extractObservationData.R @@ -75,19 +75,21 @@ extractObservationData <- function( #' below the #C-headers of type specified in \code{uniqueKey} extractObservationBlocks <- function(euLines, headerInfo, uniqueKey) { - kwb.utils::checkForMissingColumns(headerInfo, c("uniqueKey", "type")) + keys <- get_columns(headerInfo, "uniqueKey") + types <- get_columns(headerInfo, "type") - keyMatches <- headerInfo[["uniqueKey"]] == uniqueKey - x <- headerInfo[keyMatches | headerInfo[["type"]] == "Z", ] + x <- headerInfo[keys == uniqueKey | types == "Z", , drop = FALSE] - changes <- kwb.utils::findChanges(x$type) + changes <- kwb.utils::findChanges(get_columns(x, "type")) if (changes$value[1L] == "Z") { changes <- changes[-1L, ] } - from <- x$row[changes$starts_at[changes$value == "C"]] + 1L - to <- x$row[changes$starts_at[changes$value == "Z"]] - 1L + rows <- get_columns(x, "row") + + from <- rows[changes$starts_at[changes$value == "C"]] + 1L + to <- rows[changes$starts_at[changes$value == "Z"]] - 1L # Add a last "to" value if the last EU-line is not "#Z" if (length(to) != length(from)) { From f72e7d1d84471470a6713849acd77741d0466022 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 30 Nov 2023 08:32:47 +0100 Subject: [PATCH 107/141] Add arg. "as.text" to extractObservationData() and safely select elements from header.info --- R/extractObservationData.R | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/R/extractObservationData.R b/R/extractObservationData.R index d1640d1..c4683f7 100644 --- a/R/extractObservationData.R +++ b/R/extractObservationData.R @@ -7,25 +7,34 @@ #' @param header.info list as returned by #' \code{kwb.en13508.2:::getFileHeaderFromEuLines} #' @param file optional. Name of the file from which \code{euLines} were read. +#' @param as.is whether or not to keep columns in their original (text) type. +#' The default is \code{FALSE}, i.e. columns that are expected to contain +#' numeric values are converted to numeric, respecting the decimal separator +#' that is given in \code{header.info} #' @return data frame with columns \code{A}, \code{B}, \code{C}, ... as defined #' in EN13508.2 and a column \code{inspno} referring to the inspection number. extractObservationData <- function( euLines, headerInfo, header.info, - file = "" + file = "", + as.text = FALSE ) { - # Create accessor to data frame headerInfo + # Create accessor function to headerInfo fetch <- kwb.utils::createAccessor(headerInfo) - + keys <- unique(fetch("uniqueKey")[fetch("type") == "C"]) - colClasses <- sapply( - X = inspectionDataFieldCodes(), - FUN = kwb.utils::selectElements, - elements = "class" - ) + colClasses <- if (as.text) { + "character" + } else { + sapply( + X = inspectionDataFieldCodes(), + FUN = kwb.utils::selectElements, + elements = "class" + ) + } dataBlocks <- lapply(keys, function(key) { @@ -44,9 +53,9 @@ extractObservationData <- function( result <- readObservationsFromCsvText( text = text, - sep = header.info$separator, - dec = header.info$decimal, - quote = header.info$quote, + sep = get_elements(header.info, "separator"), + dec = get_elements(header.info, "decimal"), + quote = get_elements(header.info, "quote"), colClasses = colClasses, header = TRUE ) From 34e3a8845246082b633becd48141913e6690f8f7 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 30 Nov 2023 09:17:32 +0100 Subject: [PATCH 108/141] Fix implementation of as.text = TRUE and test it --- R/extractObservationData.R | 24 +++++----- man/extractObservationData.Rd | 13 +++++- .../test-function-extractObservationData.R | 45 +++++++++++++++++++ 3 files changed, 69 insertions(+), 13 deletions(-) diff --git a/R/extractObservationData.R b/R/extractObservationData.R index c4683f7..b43b6da 100644 --- a/R/extractObservationData.R +++ b/R/extractObservationData.R @@ -7,10 +7,10 @@ #' @param header.info list as returned by #' \code{kwb.en13508.2:::getFileHeaderFromEuLines} #' @param file optional. Name of the file from which \code{euLines} were read. -#' @param as.is whether or not to keep columns in their original (text) type. -#' The default is \code{FALSE}, i.e. columns that are expected to contain -#' numeric values are converted to numeric, respecting the decimal separator -#' that is given in \code{header.info} +#' @param as.text whether or not to keep columns in their original (character) +#' type. The default is \code{FALSE}, i.e. columns that are expected to +#' contain numeric values are converted to numeric, respecting the decimal +#' separator that is given in \code{header.info} #' @return data frame with columns \code{A}, \code{B}, \code{C}, ... as defined #' in EN13508.2 and a column \code{inspno} referring to the inspection number. extractObservationData <- function( @@ -26,14 +26,14 @@ extractObservationData <- function( keys <- unique(fetch("uniqueKey")[fetch("type") == "C"]) - colClasses <- if (as.text) { - "character" - } else { - sapply( - X = inspectionDataFieldCodes(), - FUN = kwb.utils::selectElements, - elements = "class" - ) + colClasses <- sapply( + X = inspectionDataFieldCodes(), + FUN = kwb.utils::selectElements, + elements = "class" + ) + + if (as.text) { + colClasses[] <- "character" } dataBlocks <- lapply(keys, function(key) { diff --git a/man/extractObservationData.Rd b/man/extractObservationData.Rd index 28c7db7..ce3f6d8 100644 --- a/man/extractObservationData.Rd +++ b/man/extractObservationData.Rd @@ -4,7 +4,13 @@ \alias{extractObservationData} \title{Extract Observations from EN13508.2-coded file} \usage{ -extractObservationData(euLines, headerInfo, header.info, file = "") +extractObservationData( + euLines, + headerInfo, + header.info, + file = "", + as.text = FALSE +) } \arguments{ \item{euLines}{text lines read from EN13508.2-coded file} @@ -15,6 +21,11 @@ extractObservationData(euLines, headerInfo, header.info, file = "") \code{kwb.en13508.2:::getFileHeaderFromEuLines}} \item{file}{optional. Name of the file from which \code{euLines} were read.} + +\item{as.text}{whether or not to keep columns in their original (character) +type. The default is \code{FALSE}, i.e. columns that are expected to +contain numeric values are converted to numeric, respecting the decimal +separator that is given in \code{header.info}} } \value{ data frame with columns \code{A}, \code{B}, \code{C}, ... as defined diff --git a/tests/testthat/test-function-extractObservationData.R b/tests/testthat/test-function-extractObservationData.R index f4c62d6..c74b519 100644 --- a/tests/testthat/test-function-extractObservationData.R +++ b/tests/testthat/test-function-extractObservationData.R @@ -1,7 +1,52 @@ +#library(testthat) test_that("extractObservationData() works", { f <- kwb.en13508.2:::extractObservationData expect_error(f()) + euLines <- c( + "#C=I,A,B", #1 + "1.0,a,b", #2 + "#Z", #3 + "#C=I,A", #4 + "1.1,a", #5 + "#Z" #6 + ) + + headerInfo <- data.frame( + row = as.integer(c(1, 3, 4, 6)), + type = c("C", "Z", "C", "Z"), + value = c("I,A,B", "", "I,A", ""), + uniqueKey = c("1", "", "2", ""), + inspno = 1L + ) + + header.info <- list( + separator = ",", + quote = '"', + decimal = "." + ) + + result <- f(euLines, headerInfo, header.info) + result_text <- f(euLines, headerInfo, header.info, as.text = TRUE) + + expect_s3_class(result, "data.frame") + expect_s3_class(result_text, "data.frame") + + classes <- function(x) sapply(x, class) + + expect_identical(classes(result), c( + inspno = "integer", + A = "character", + B = "character", + I = "numeric" + )) + + expect_identical(classes(result_text), c( + inspno = "integer", + A = "character", + B = "character", + I = "character" + )) }) From d4930920a38ded34dd6b0f59c4571a8eda0d7994 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 30 Nov 2023 09:18:17 +0100 Subject: [PATCH 109/141] Add test for extractObservationBlocks() --- .../test-function-extractObservationBlocks.R | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/testthat/test-function-extractObservationBlocks.R b/tests/testthat/test-function-extractObservationBlocks.R index 02b3d09..a0bdd25 100644 --- a/tests/testthat/test-function-extractObservationBlocks.R +++ b/tests/testthat/test-function-extractObservationBlocks.R @@ -5,4 +5,31 @@ test_that("extractObservationBlocks() works", { expect_error(f()) + euLines <- c( + "#C=A,B,C", #1 + "1,2,3", #2 + "#Z", #3 + "#C=A,B", #4 + "11,22", #5 + "#Z" #6 + ) + + headerInfo <- data.frame( + row = as.integer(c(1, 3, 4, 6)), + type = c("C", "Z", "C", "Z"), + uniqueKey = c("1", "", "2", "") + ) + + expect_identical(f(euLines, headerInfo, uniqueKey = "no-such-key"), list()) + + expect_identical( + f(euLines, headerInfo, uniqueKey = "1"), + list("1,2,3") + ) + + expect_identical( + f(euLines, headerInfo, uniqueKey = "2"), + list("11,22") + ) + }) From 4c8787c3b2c3eee0035d67c9e423aed0f14f75b7 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 30 Nov 2023 09:19:37 +0100 Subject: [PATCH 110/141] Allow to pass argument "as.text" through --- R/getObservationRecordsFromEuLines.R | 6 ++++-- R/readEuCodedFile.R | 8 ++++++-- man/readEuCodedFile.Rd | 6 +++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/R/getObservationRecordsFromEuLines.R b/R/getObservationRecordsFromEuLines.R index 450bdf5..151411f 100644 --- a/R/getObservationRecordsFromEuLines.R +++ b/R/getObservationRecordsFromEuLines.R @@ -3,7 +3,8 @@ getObservationRecordsFromEuLines <- function( eu_lines, header.info, dbg, - file = "" + file = "", + ... ) { observations <- try( @@ -17,7 +18,8 @@ getObservationRecordsFromEuLines <- function( eu_lines, headerInfo, header.info, - file = file + file = file, + ... ) } diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index d314c2a..108b9c7 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -23,6 +23,8 @@ #' @param dbg if \code{TRUE}, debug messages are shown, else not #' @param snake.case logical indicating whether or not to provide names in #' snake_case (in contrast to CamelCase) if \code{meaningful.names = TRUE}. +#' @param \dots further arguments to be passed to +#' \code{kwb.en13508.2:::getObservationRecordsFromEuLines} #' @return list with elements \code{header.info}, \code{inspections}, #' \code{observations} #' @importFrom kwb.utils catAndRun catIf isTryError .logstart .logok @@ -36,7 +38,8 @@ readEuCodedFile <- function( simple.algorithm = TRUE, warn = TRUE, dbg = TRUE, - snake.case = FALSE + snake.case = FALSE, + ... ) { #kwb.utils::assignArgumentDefaults(kwb.en13508.2::readEuCodedFile) @@ -72,7 +75,8 @@ readEuCodedFile <- function( eu_lines = eu_lines, header.info = header.info, dbg = dbg, - file = input.file + file = input.file, + ... ) ) diff --git a/man/readEuCodedFile.Rd b/man/readEuCodedFile.Rd index 8af1308..cab233e 100644 --- a/man/readEuCodedFile.Rd +++ b/man/readEuCodedFile.Rd @@ -12,7 +12,8 @@ readEuCodedFile( simple.algorithm = TRUE, warn = TRUE, dbg = TRUE, - snake.case = FALSE + snake.case = FALSE, + ... ) } \arguments{ @@ -42,6 +43,9 @@ fields were found)} \item{snake.case}{logical indicating whether or not to provide names in snake_case (in contrast to CamelCase) if \code{meaningful.names = TRUE}.} + +\item{\dots}{further arguments to be passed to +\code{kwb.en13508.2:::getObservationRecordsFromEuLines}} } \value{ list with elements \code{header.info}, \code{inspections}, From 730288062781267d88de5846a1bc9fd49f17a7ee Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 2 Dec 2023 09:45:27 +0100 Subject: [PATCH 111/141] Provide argument "default.time" at high level and set it to "22:22" to indicate "invented" --- R/readAndMergeEuCodedFiles.R | 6 ++++-- R/setGlobalInspectionID.R | 4 ++-- man/readAndMergeEuCodedFiles.Rd | 5 ++++- man/setGlobalInspectionID.Rd | 4 ++-- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/R/readAndMergeEuCodedFiles.R b/R/readAndMergeEuCodedFiles.R index 3888f6f..db66b32 100644 --- a/R/readAndMergeEuCodedFiles.R +++ b/R/readAndMergeEuCodedFiles.R @@ -14,6 +14,7 @@ #' "inspections" and "observations" of the returned list. #' @param project name of project to which the data are related, such as: #' "Lausanne" +#' @param default.time passed to \code{\link{setGlobalInspectionID}} #' @export #' readAndMergeEuCodedFiles <- function( @@ -21,7 +22,8 @@ readAndMergeEuCodedFiles <- function( dbg = FALSE, ..., add.inspid = FALSE, - project = NULL + project = NULL, + default.time = "22:22" ) { # by setting simple.algorithm = FALSE we get unique column names, e.g. "ADE" @@ -39,7 +41,7 @@ readAndMergeEuCodedFiles <- function( return(inspection.data) } - setGlobalInspectionID(inspection.data, project) + setGlobalInspectionID(inspection.data, project, default.time = default.time) } # mergeInspectionData ---------------------------------------------------------- diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index da88349..2ebc2a9 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -8,7 +8,7 @@ #' @param project name of project to which the data are related, such as: #' "Lausanne" #' @param default.time default time string to use if column InspTime is not -#' available. Default: "22:33". A random number will be generated for the +#' available. Default: "22:22". A random number will be generated for the #' seconds, just to increase the chance that setting the time is enough to #' generate a unique key. #' @return list with the same elements as in \code{inspection.data} but with @@ -17,7 +17,7 @@ setGlobalInspectionID <- function( inspection.data, project = NULL, - default.time = "22:33" + default.time = "22:22" ) { if (is.null(project)) { diff --git a/man/readAndMergeEuCodedFiles.Rd b/man/readAndMergeEuCodedFiles.Rd index 476a035..0909f97 100644 --- a/man/readAndMergeEuCodedFiles.Rd +++ b/man/readAndMergeEuCodedFiles.Rd @@ -9,7 +9,8 @@ readAndMergeEuCodedFiles( dbg = FALSE, ..., add.inspid = FALSE, - project = NULL + project = NULL, + default.time = "22:22" ) } \arguments{ @@ -26,6 +27,8 @@ unique inspection ID (inspid) is added to the data frames in elements \item{project}{name of project to which the data are related, such as: "Lausanne"} + +\item{default.time}{passed to \code{\link{setGlobalInspectionID}}} } \description{ Read files in EN13508.2-format using \code{\link{readEuCodedFiles}} and merge diff --git a/man/setGlobalInspectionID.Rd b/man/setGlobalInspectionID.Rd index cfa0d83..644ff40 100644 --- a/man/setGlobalInspectionID.Rd +++ b/man/setGlobalInspectionID.Rd @@ -4,7 +4,7 @@ \alias{setGlobalInspectionID} \title{Set Global Inspection ID} \usage{ -setGlobalInspectionID(inspection.data, project = NULL, default.time = "22:33") +setGlobalInspectionID(inspection.data, project = NULL, default.time = "22:22") } \arguments{ \item{inspection.data}{list with elements \code{header.info}, @@ -14,7 +14,7 @@ setGlobalInspectionID(inspection.data, project = NULL, default.time = "22:33") "Lausanne"} \item{default.time}{default time string to use if column InspTime is not -available. Default: "22:33". A random number will be generated for the +available. Default: "22:22". A random number will be generated for the seconds, just to increase the chance that setting the time is enough to generate a unique key.} } From 4aacb95649f5f98b0ae3cd22022db0275a23de9a Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 2 Dec 2023 09:51:32 +0100 Subject: [PATCH 112/141] Add argument "name.convention" replacing arguments "meaningful.names", "snake.case" Also rename renameColumnsToMeaningful() to applyNameConvention() --- R/readEuCodedFile.R | 26 +++++++++---------- man/readEuCodedFile.Rd | 14 ++++------ .../test-function-applyNameConvention.R | 7 +++++ .../test-function-readAndMergeEuCodedFiles.R | 14 +++++----- .../testthat/test-function-readEuCodedFile.R | 18 ++++++++++--- .../test-function-renameColumnsToMeaningful.R | 7 ----- 6 files changed, 46 insertions(+), 40 deletions(-) create mode 100644 tests/testthat/test-function-applyNameConvention.R delete mode 100644 tests/testthat/test-function-renameColumnsToMeaningful.R diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index 108b9c7..94f4e49 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -9,10 +9,10 @@ #' @param encoding default: "latin1" #' @param read.inspections if \code{TRUE}, general inspection data (in #' #B-blocks) are read, otherwise skipped (use if function fails) -#' @param meaningful.names if \code{FALSE} (default), the short names (codes) as -#' defined in EN13508.2 are used as column names, otherwise more meaningful -#' names are used. See columns\code{Code} and \code{Name}, respectively, in -#' the data frame returned by \code{getCodes()}. +#' @param name.convention one of \code{c("norm", "camel", "snake")} specifying +#' the set of names used in the returned tables. \code{"norm"}: as specified +#' in the norm EN13508.2, \code{"camel"}: \code{CamelCase}, \code{"snake"}: +#' \code{snake_case} #' @param simple.algorithm if \code{TRUE} (default), a simple (and faster) #' algorithm is used to extract the general information about the inspections #' from the #B-headers. It requires that all #B-headers have the same number @@ -21,8 +21,6 @@ #' @param warn if \code{TRUE}, warnings are shown (e.g. if not all #A-header #' fields were found) #' @param dbg if \code{TRUE}, debug messages are shown, else not -#' @param snake.case logical indicating whether or not to provide names in -#' snake_case (in contrast to CamelCase) if \code{meaningful.names = TRUE}. #' @param \dots further arguments to be passed to #' \code{kwb.en13508.2:::getObservationRecordsFromEuLines} #' @return list with elements \code{header.info}, \code{inspections}, @@ -34,16 +32,17 @@ readEuCodedFile <- function( input.file, encoding = "latin1", read.inspections = TRUE, - meaningful.names = FALSE, + name.convention = c("norm", "camel", "snake")[1L], simple.algorithm = TRUE, warn = TRUE, dbg = TRUE, - snake.case = FALSE, ... ) { #kwb.utils::assignArgumentDefaults(kwb.en13508.2::readEuCodedFile) #kwb.utils::assignPackageObjects("kwb.en13508.2") + + name.convention <- match.arg(name.convention, c("norm", "camel", "snake")) run <- function(...) kwb.utils::catAndRun(dbg = dbg, ...) @@ -80,9 +79,10 @@ readEuCodedFile <- function( ) ) - if (meaningful.names) { - inspections <- renameColumnsToMeaningful(inspections, snake.case) - observations <- renameColumnsToMeaningful(observations, snake.case) + if (name.convention != "norm") { + snake.case <- name.convention == "snake" + inspections <- applyNameConvention(inspections, snake.case) + observations <- applyNameConvention(observations, snake.case) } list( @@ -92,8 +92,8 @@ readEuCodedFile <- function( ) } -# renameColumnsToMeaningful ---------------------------------------------------- -renameColumnsToMeaningful <- function(x, snake.case = FALSE) +# applyNameConvention ---------------------------------------------------- +applyNameConvention <- function(x, snake.case = FALSE) { result <- kwb.utils::renameColumns(x, renamings = readRenamings( file.name = "eucodes.csv", diff --git a/man/readEuCodedFile.Rd b/man/readEuCodedFile.Rd index cab233e..240c3b5 100644 --- a/man/readEuCodedFile.Rd +++ b/man/readEuCodedFile.Rd @@ -8,11 +8,10 @@ readEuCodedFile( input.file, encoding = "latin1", read.inspections = TRUE, - meaningful.names = FALSE, + name.convention = c("norm", "camel", "snake")[1L], simple.algorithm = TRUE, warn = TRUE, dbg = TRUE, - snake.case = FALSE, ... ) } @@ -25,10 +24,10 @@ in the format described in DIN EN 13508-2} \item{read.inspections}{if \code{TRUE}, general inspection data (in #B-blocks) are read, otherwise skipped (use if function fails)} -\item{meaningful.names}{if \code{FALSE} (default), the short names (codes) as -defined in EN13508.2 are used as column names, otherwise more meaningful -names are used. See columns\code{Code} and \code{Name}, respectively, in -the data frame returned by \code{getCodes()}.} +\item{name.convention}{one of \code{c("norm", "camel", "snake")} specifying +the set of names used in the returned tables. \code{"norm"}: as specified +in the norm EN13508.2, \code{"camel"}: \code{CamelCase}, \code{"snake"}: +\code{snake_case}} \item{simple.algorithm}{if \code{TRUE} (default), a simple (and faster) algorithm is used to extract the general information about the inspections @@ -41,9 +40,6 @@ fields were found)} \item{dbg}{if \code{TRUE}, debug messages are shown, else not} -\item{snake.case}{logical indicating whether or not to provide names in -snake_case (in contrast to CamelCase) if \code{meaningful.names = TRUE}.} - \item{\dots}{further arguments to be passed to \code{kwb.en13508.2:::getObservationRecordsFromEuLines}} } diff --git a/tests/testthat/test-function-applyNameConvention.R b/tests/testthat/test-function-applyNameConvention.R new file mode 100644 index 0000000..c2c45c9 --- /dev/null +++ b/tests/testthat/test-function-applyNameConvention.R @@ -0,0 +1,7 @@ +test_that("applyNameConvention() works", { + + f <- kwb.en13508.2:::applyNameConvention + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-readAndMergeEuCodedFiles.R b/tests/testthat/test-function-readAndMergeEuCodedFiles.R index c55e7c5..766e1a9 100644 --- a/tests/testthat/test-function-readAndMergeEuCodedFiles.R +++ b/tests/testthat/test-function-readAndMergeEuCodedFiles.R @@ -8,15 +8,15 @@ test_that("readAndMergeEuCodedFiles() works", { file <- kwb.en13508.2:::getExampleFile() files <- c(file, file) - result_camel <- f(files, meaningful.names = TRUE) - result_snake <- f(files, meaningful.names = TRUE, snake.case = TRUE) + result_camel <- f(files, name.convention = "camel") + result_snake <- f(files, name.convention = "snake") - elements_ok <- function(x) expect_identical(names(x), c( - "header.info", "inspections", "observations" - )) + check_top_level <- function(x) { + expect_identical(names(x), c("header.info", "inspections", "observations")) + } - elements_ok(result_camel) - elements_ok(result_snake) + check_top_level(result_camel) + check_top_level(result_snake) expect_true(all( c("MainCode", "Char1", "Char2") %in% names(result_camel$observations) diff --git a/tests/testthat/test-function-readEuCodedFile.R b/tests/testthat/test-function-readEuCodedFile.R index 7d16f68..1016fa4 100644 --- a/tests/testthat/test-function-readEuCodedFile.R +++ b/tests/testthat/test-function-readEuCodedFile.R @@ -1,5 +1,5 @@ #kwb.utils::assignPackageObjects("kwb.en13508.2") - +#library(testthat) test_that("readEuCodedFile() works", { f <- kwb.en13508.2:::readEuCodedFile @@ -7,14 +7,24 @@ test_that("readEuCodedFile() works", { expect_error(f()) result.1 <- f(getExampleFile(), dbg = FALSE) - result.2 <- f(getExampleFile(), dbg = FALSE, meaningful.names = TRUE) + result.2 <- f(getExampleFile(), dbg = FALSE, name.convention = "camel") + result.3 <- f(getExampleFile(), dbg = FALSE, name.convention = "snake") + + check_top_level <- function(x) { + expect_type(x, "list") + expect_identical(names(x), c("header.info", "inspections", "observations")) + } - expect_type(result.1, "list") - expect_identical(names(result.1), c("header.info", "inspections", "observations")) + check_top_level(result.1) + check_top_level(result.2) + check_top_level(result.3) expect_true(all(c("AAA", "AAB", "AAD") %in% names(result.1$inspections))) expect_true(all(c("A", "B", "C") %in% names(result.1$observations))) expect_true(all(c("Node1Ref", "Node2Ref") %in% names(result.2$inspections))) expect_true(all(c("MainCode", "Char1", "Char2") %in% names(result.2$observations))) + + expect_true(all(c("node_1_ref", "node_2_ref") %in% names(result.3$inspections))) + expect_true(all(c("main_code", "char_1", "char_2") %in% names(result.3$observations))) }) diff --git a/tests/testthat/test-function-renameColumnsToMeaningful.R b/tests/testthat/test-function-renameColumnsToMeaningful.R deleted file mode 100644 index 2e1ade7..0000000 --- a/tests/testthat/test-function-renameColumnsToMeaningful.R +++ /dev/null @@ -1,7 +0,0 @@ -test_that("renameColumnsToMeaningful() works", { - - f <- kwb.en13508.2:::renameColumnsToMeaningful - - expect_error(f()) - -}) From ebb2835d24211f75a9ea983be59f63feecec293f Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 2 Dec 2023 10:16:31 +0100 Subject: [PATCH 113/141] Improve createInspectionId() and rename to createHashFromColumns() --- R/createHashFromColumns.R | 20 ++++++++++++ R/createInspectionId.R | 32 ------------------- R/setGlobalInspectionID.R | 11 ++++++- tests/testthat/createHashFromColumns.R | 8 +++++ .../test-function-createInspectionId.R | 8 ----- 5 files changed, 38 insertions(+), 41 deletions(-) create mode 100644 R/createHashFromColumns.R delete mode 100644 R/createInspectionId.R create mode 100644 tests/testthat/createHashFromColumns.R delete mode 100644 tests/testthat/test-function-createInspectionId.R diff --git a/R/createHashFromColumns.R b/R/createHashFromColumns.R new file mode 100644 index 0000000..1a9a799 --- /dev/null +++ b/R/createHashFromColumns.R @@ -0,0 +1,20 @@ +# createHashFromColumns -------------------------------------------------------- +createHashFromColumns <- function(data, columns, n.chars = 8L) +{ + duplicates <- kwb.utils::findPartialDuplicates(data, columns) + + if (!is.null(duplicates)) { + print(duplicates) + stop("There are duplicates in the key columns (see above)!") + } + + keys <- kwb.utils::pasteColumns(data, columns, "|") + + stopifnot(!anyDuplicated(keys)) + + hashes <- kwb.utils::left(unlist(lapply(keys, digest::digest)), n.chars) + + stopifnot(!anyDuplicated(hashes)) + + hashes +} diff --git a/R/createInspectionId.R b/R/createInspectionId.R deleted file mode 100644 index f1f6d4a..0000000 --- a/R/createInspectionId.R +++ /dev/null @@ -1,32 +0,0 @@ -# createInspectionId ----------------------------------------------------------- -createInspectionId <- function( - inspections, - id.columns = c( - "project", - "inspection_date", - "inspection_time", - "node_1_ref", - "node_2_ref" - ), - n.chars = 8L -) -{ - kwb.utils::checkForMissingColumns(inspections, id.columns) - - duplicateInfo <- kwb.utils::findPartialDuplicates(inspections, id.columns) - - if (! is.null(duplicateInfo)) { - print(duplicateInfo) - stop("There are duplicates in the key columns (see above)!") - } - - keyStrings <- kwb.utils::pasteColumns(inspections, id.columns, "|") - - stopifnot(!anyDuplicated(keyStrings)) - - ids <- kwb.utils::left(unlist(lapply(keyStrings, digest::digest)), n.chars) - - stopifnot(!anyDuplicated(ids)) - - ids -} diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index 2ebc2a9..8339f91 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -65,7 +65,16 @@ setGlobalInspectionID <- function( } # Create the inspection IDs and store them in column "inspection_id" - inspections[["inspection_id"]] <- createInspectionId(inspections) + inspections[["inspection_id"]] <- createHashFromColumns( + data = inspections, + columns = c( + "project", + "inspection_date", + "inspection_time", + "node_1_ref", + "node_2_ref" + ) + ) i <- kwb.utils::selectColumns(observations, "inspno") diff --git a/tests/testthat/createHashFromColumns.R b/tests/testthat/createHashFromColumns.R new file mode 100644 index 0000000..d38516b --- /dev/null +++ b/tests/testthat/createHashFromColumns.R @@ -0,0 +1,8 @@ +#library(testthat) +test_that("createHashFromColumns() works", { + + f <- kwb.en13508.2:::createHashFromColumns + + expect_error(f()) + +}) diff --git a/tests/testthat/test-function-createInspectionId.R b/tests/testthat/test-function-createInspectionId.R deleted file mode 100644 index 74888ae..0000000 --- a/tests/testthat/test-function-createInspectionId.R +++ /dev/null @@ -1,8 +0,0 @@ -#library(testthat) -test_that("createInspectionId() works", { - - f <- kwb.en13508.2:::createInspectionId - - expect_error(f()) - -}) From 27a6b0a124b004672e5428d58f533ca130d74bd6 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 2 Dec 2023 10:57:12 +0100 Subject: [PATCH 114/141] Add argument "name.convention" --- R/readAndMergeEuCodedFiles.R | 12 ++++++-- R/setGlobalInspectionID.R | 52 ++++++++++++++++++++++++--------- man/readAndMergeEuCodedFiles.Rd | 3 ++ man/setGlobalInspectionID.Rd | 13 +++++++-- 4 files changed, 61 insertions(+), 19 deletions(-) diff --git a/R/readAndMergeEuCodedFiles.R b/R/readAndMergeEuCodedFiles.R index db66b32..82220f9 100644 --- a/R/readAndMergeEuCodedFiles.R +++ b/R/readAndMergeEuCodedFiles.R @@ -8,6 +8,7 @@ #' @param input.files full path to text file containing CCTV inspection results #' in the format described in DIN EN 13508-2 #' @param dbg if \code{TRUE} debug messages are shown +#' @param name.convention passed to \code{\link{readEuCodedFiles}} #' @param \dots further arguments passed to \code{\link{readEuCodedFiles}} #' @param add.inspid if \code{TRUE} (the default is \code{FALSE}) a globally #' unique inspection ID (inspid) is added to the data frames in elements @@ -20,6 +21,7 @@ readAndMergeEuCodedFiles <- function( input.files, dbg = FALSE, + name.convention = "norm", ..., add.inspid = FALSE, project = NULL, @@ -30,8 +32,9 @@ readAndMergeEuCodedFiles <- function( # and "ADE.1" inspection.data.list <- readEuCodedFiles( input.files = input.files, - simple.algorithm = FALSE, dbg = dbg, + name.convention = name.convention, + simple.algorithm = FALSE, ... ) @@ -41,7 +44,12 @@ readAndMergeEuCodedFiles <- function( return(inspection.data) } - setGlobalInspectionID(inspection.data, project, default.time = default.time) + setGlobalInspectionID( + inspection.data, + project, + default.time = default.time, + name.convention = name.convention + ) } # mergeInspectionData ---------------------------------------------------------- diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index 8339f91..9f59ff1 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -7,17 +7,19 @@ #' \code{inspections}, \code{observations} #' @param project name of project to which the data are related, such as: #' "Lausanne" -#' @param default.time default time string to use if column InspTime is not -#' available. Default: "22:22". A random number will be generated for the +#' @param default.time default time string to use if column is +#' not available. Default: "22:22". A random number will be generated for the #' seconds, just to increase the chance that setting the time is enough to #' generate a unique key. +#' @param name.convention one of \code{c("norm", "camel", "snake")} #' @return list with the same elements as in \code{inspection.data} but with #' columns \code{inspid} being added to the data frames "inspections" and #' "observations" setGlobalInspectionID <- function( inspection.data, project = NULL, - default.time = "22:22" + default.time = "22:22", + name.convention = "norm" ) { if (is.null(project)) { @@ -27,19 +29,22 @@ setGlobalInspectionID <- function( ) } - fetch <- kwb.utils::createAccessor(inspection.data) - # Just a shortcut removeEmpty <- function(df) kwb.utils::removeEmptyColumns(df, dbg = FALSE) - inspections <- removeEmpty(fetch("inspections")) - observations <- removeEmpty(fetch("observations")) + inspections <- removeEmpty(get_elements(inspection.data, "inspections")) + observations <- removeEmpty(get_elements(inspection.data, "observations")) inspections[["project"]] <- project # The following function requires the column "inspection_time". If this # column does not exist, create it with a default value - timeColumn <- "inspection_time" + timeColumn <- get_elements(elements = name.convention, list( + norm = "ABG", + camel = "InspTime", + snake = "inspection_time" + )) + inspections <- kwb.utils::hsAddMissingCols(inspections, timeColumn) hasNoTime <- kwb.utils::isNaOrEmpty(inspections[[timeColumn]]) @@ -63,17 +68,36 @@ setGlobalInspectionID <- function( sample(0:59, size = n_missing, replace = TRUE) ) } - - # Create the inspection IDs and store them in column "inspection_id" - inspections[["inspection_id"]] <- createHashFromColumns( - data = inspections, - columns = c( + + # Columns from which to generate the hash code + columns <- get_elements(elements = name.convention, list( + norm = c( + "project", + "ABF", + "ABG", + "AAD", + "AAF" + ), + camel = c( + "project", + "InspDate", + "InspTime", + "Node1Ref", + "Node2Ref" + ), + snake = c( "project", "inspection_date", "inspection_time", "node_1_ref", "node_2_ref" ) + )) + + # Create the inspection IDs and store them in column "inspection_id" + inspections[["inspection_id"]] <- createHashFromColumns( + data = inspections, + columns = columns ) i <- kwb.utils::selectColumns(observations, "inspno") @@ -88,7 +112,7 @@ setGlobalInspectionID <- function( idFirst <- function(df) kwb.utils::moveColumnsToFront(df, "inspection_id") list( - header.info = fetch("header.info"), + header.info = get_elements(inspection.data, "header.info"), inspections = idFirst(inspections), observations = idFirst(observations) ) diff --git a/man/readAndMergeEuCodedFiles.Rd b/man/readAndMergeEuCodedFiles.Rd index 0909f97..2b785a9 100644 --- a/man/readAndMergeEuCodedFiles.Rd +++ b/man/readAndMergeEuCodedFiles.Rd @@ -7,6 +7,7 @@ readAndMergeEuCodedFiles( input.files, dbg = FALSE, + name.convention = "norm", ..., add.inspid = FALSE, project = NULL, @@ -19,6 +20,8 @@ in the format described in DIN EN 13508-2} \item{dbg}{if \code{TRUE} debug messages are shown} +\item{name.convention}{passed to \code{\link{readEuCodedFiles}}} + \item{\dots}{further arguments passed to \code{\link{readEuCodedFiles}}} \item{add.inspid}{if \code{TRUE} (the default is \code{FALSE}) a globally diff --git a/man/setGlobalInspectionID.Rd b/man/setGlobalInspectionID.Rd index 644ff40..fc68193 100644 --- a/man/setGlobalInspectionID.Rd +++ b/man/setGlobalInspectionID.Rd @@ -4,7 +4,12 @@ \alias{setGlobalInspectionID} \title{Set Global Inspection ID} \usage{ -setGlobalInspectionID(inspection.data, project = NULL, default.time = "22:22") +setGlobalInspectionID( + inspection.data, + project = NULL, + default.time = "22:22", + name.convention = "norm" +) } \arguments{ \item{inspection.data}{list with elements \code{header.info}, @@ -13,10 +18,12 @@ setGlobalInspectionID(inspection.data, project = NULL, default.time = "22:22") \item{project}{name of project to which the data are related, such as: "Lausanne"} -\item{default.time}{default time string to use if column InspTime is not -available. Default: "22:22". A random number will be generated for the +\item{default.time}{default time string to use if column is +not available. Default: "22:22". A random number will be generated for the seconds, just to increase the chance that setting the time is enough to generate a unique key.} + +\item{name.convention}{one of \code{c("norm", "camel", "snake")}} } \value{ list with the same elements as in \code{inspection.data} but with From 99ec0be4863ad5d960e9bff5ca1b67e604132d21 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Jan 2024 18:42:46 +0100 Subject: [PATCH 115/141] Stop with improved error message --- R/readEuCodedFiles.R | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/R/readEuCodedFiles.R b/R/readEuCodedFiles.R index 9424dfe..f15955f 100644 --- a/R/readEuCodedFiles.R +++ b/R/readEuCodedFiles.R @@ -33,11 +33,14 @@ readEuCodedFiles <- function( "input file %d/%d: %s\n", i, length(input.files), input.file )) - inspectionData <- try(readEuCodedFile(input.file, dbg = dbg, ...)) + inspectionData <- try( + readEuCodedFile(input.file, dbg = dbg, ...), + silent = TRUE + ) # Return NULL if an error occurred if (kwb.utils::isTryError(inspectionData)) { - return(NULL) + return(inspectionData) } if (append.file.names) { @@ -49,18 +52,29 @@ readEuCodedFiles <- function( inspectionData }) - - failed <- sapply(result, is.null) + + failed <- sapply(result, kwb.utils::isTryError) # Give a warning about occurred errors if (any(failed)) { - - warning(call. = FALSE, sprintf( - "readEuCodedFile() returned with error for the following %d files:\n%s", - sum(failed), kwb.utils::stringList(basename(input.files[failed])) - )) + stop( + call. = FALSE, + "Import aborted due to errors in ", + sum(failed), + " files:\n\n*** ", + paste0( + basename(input.files[failed]), + ":\n", + as.character(result[failed]), + collapse = "\n*** " + ), + "\nYou may either\n", + "- correct the files,\n", + "- rename them so that they do not end in '.txt' any more, or\n", + "- remove them from the input folder." + ) } - + # Create valid list element names elements <- kwb.utils::substSpecialChars(basename(input.files)) From 51c819cfb33c0a18299c14cf809f5cd9dac28feb Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Jan 2024 19:21:05 +0100 Subject: [PATCH 116/141] Simplify error message (may have been confusing) --- R/setGlobalInspectionID.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index 9f59ff1..08c5644 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -50,12 +50,14 @@ setGlobalInspectionID <- function( hasNoTime <- kwb.utils::isNaOrEmpty(inspections[[timeColumn]]) if (any(hasNoTime)) { + n_missing <- sum(hasNoTime) - message( - "Setting ", n_missing, " missing inspection times to '", default.time, - "' (plus random seconds). You may change this time value by setting the ", - "argument 'default.time'." - ) + + message(sprintf( + "Setting %d missing inspection times to '%s' (plus random seconds).", + n_missing, + default.time + )) # We have to fix the random number generator otherwise the times are not # reproducible! From edd32d398a6c21c9b03213768e91ce90a91df9c2 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 4 Jan 2024 16:56:44 +0100 Subject: [PATCH 117/141] Invert logic, use "result" as variable name --- R/readAndMergeEuCodedFiles.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/R/readAndMergeEuCodedFiles.R b/R/readAndMergeEuCodedFiles.R index 82220f9..cf119f1 100644 --- a/R/readAndMergeEuCodedFiles.R +++ b/R/readAndMergeEuCodedFiles.R @@ -38,18 +38,18 @@ readAndMergeEuCodedFiles <- function( ... ) - inspection.data <- mergeInspectionData(inspection.data.list) + result <- mergeInspectionData(inspection.data.list) - if (!add.inspid) { - return(inspection.data) + if (add.inspid) { + result <- setGlobalInspectionID( + result, + project, + default.time = default.time, + name.convention = name.convention + ) } - setGlobalInspectionID( - inspection.data, - project, - default.time = default.time, - name.convention = name.convention - ) + result } # mergeInspectionData ---------------------------------------------------------- From d2324325d90817b9e9a4deba3207690d6247fa0b Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 4 Jan 2024 16:57:44 +0100 Subject: [PATCH 118/141] Allow to write duplicates to a file --- R/createHashFromColumns.R | 16 +++++-- R/setGlobalInspectionID.R | 45 ++++++++++++++++--- man/setGlobalInspectionID.Rd | 6 ++- .../test-function-setGlobalInspectionID.R | 23 ++++++++++ 4 files changed, 78 insertions(+), 12 deletions(-) diff --git a/R/createHashFromColumns.R b/R/createHashFromColumns.R index 1a9a799..a62572c 100644 --- a/R/createHashFromColumns.R +++ b/R/createHashFromColumns.R @@ -1,18 +1,26 @@ # createHashFromColumns -------------------------------------------------------- -createHashFromColumns <- function(data, columns, n.chars = 8L) +createHashFromColumns <- function(data, columns, nchars = 8L, silent = FALSE) { duplicates <- kwb.utils::findPartialDuplicates(data, columns) if (!is.null(duplicates)) { - print(duplicates) - stop("There are duplicates in the key columns (see above)!") + + if (!silent) { + message( + "Cannot create unique hashes due to duplicates in the key columns (", + kwb.utils::stringList(columns), + ")! Returning -1L. Check attribute 'duplicates'." + ) + } + + return(structure(-1L, duplicates = duplicates)) } keys <- kwb.utils::pasteColumns(data, columns, "|") stopifnot(!anyDuplicated(keys)) - hashes <- kwb.utils::left(unlist(lapply(keys, digest::digest)), n.chars) + hashes <- kwb.utils::left(unlist(lapply(keys, digest::digest)), nchars) stopifnot(!anyDuplicated(hashes)) diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index 08c5644..dabf66b 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -12,6 +12,8 @@ #' seconds, just to increase the chance that setting the time is enough to #' generate a unique key. #' @param name.convention one of \code{c("norm", "camel", "snake")} +#' @param file optional. Path to file to which duplicates are are written (if +#' any). Default: \code{"setGlobalInspectionID_duplicates.txt"} #' @return list with the same elements as in \code{inspection.data} but with #' columns \code{inspid} being added to the data frames "inspections" and #' "observations" @@ -19,7 +21,8 @@ setGlobalInspectionID <- function( inspection.data, project = NULL, default.time = "22:22", - name.convention = "norm" + name.convention = "norm", + file = NULL ) { if (is.null(project)) { @@ -97,10 +100,16 @@ setGlobalInspectionID <- function( )) # Create the inspection IDs and store them in column "inspection_id" - inspections[["inspection_id"]] <- createHashFromColumns( - data = inspections, - columns = columns + hashes <- createHashFromColumns( + data = inspections, + columns = columns, + silent = TRUE ) + + # Check for duplicates in the hashes + stop_on_hash_duplicates(hashes, file = file) + + inspections[["inspection_id"]] <- hashes i <- kwb.utils::selectColumns(observations, "inspno") @@ -111,11 +120,33 @@ setGlobalInspectionID <- function( observations <- kwb.utils::removeColumns(observations, "inspno") # Just a shortcut - idFirst <- function(df) kwb.utils::moveColumnsToFront(df, "inspection_id") + id_first <- function(x) kwb.utils::moveColumnsToFront(x, "inspection_id") list( header.info = get_elements(inspection.data, "header.info"), - inspections = idFirst(inspections), - observations = idFirst(observations) + inspections = id_first(inspections), + observations = id_first(observations) ) } + +# stop_on_hash_duplicates ------------------------------------------------------ +stop_on_hash_duplicates <- function(hashes, file = NULL) +{ + if (identical(kwb.utils::removeAttributes(hashes), -1L)) { + + duplicates <- kwb.utils::getAttribute(hashes, "duplicates") + + if (is.null(file)) { + print(duplicates) + } else { + writeLines(capture.output(print(duplicates)), file) + } + + stop( + "There were duplicates in the key columns (see ", + ifelse(is.null(file), "above", dQuote(file, '"')), + ").", + call. = FALSE + ) + } +} diff --git a/man/setGlobalInspectionID.Rd b/man/setGlobalInspectionID.Rd index fc68193..3691dae 100644 --- a/man/setGlobalInspectionID.Rd +++ b/man/setGlobalInspectionID.Rd @@ -8,7 +8,8 @@ setGlobalInspectionID( inspection.data, project = NULL, default.time = "22:22", - name.convention = "norm" + name.convention = "norm", + file = NULL ) } \arguments{ @@ -24,6 +25,9 @@ seconds, just to increase the chance that setting the time is enough to generate a unique key.} \item{name.convention}{one of \code{c("norm", "camel", "snake")}} + +\item{file}{optional. Path to file to which duplicates are are written (if +any). Default: \code{"setGlobalInspectionID_duplicates.txt"}} } \value{ list with the same elements as in \code{inspection.data} but with diff --git a/tests/testthat/test-function-setGlobalInspectionID.R b/tests/testthat/test-function-setGlobalInspectionID.R index 5b40ac8..8c8996c 100644 --- a/tests/testthat/test-function-setGlobalInspectionID.R +++ b/tests/testthat/test-function-setGlobalInspectionID.R @@ -1,7 +1,30 @@ +library(testthat) + test_that("setGlobalInspectionID() works", { f <- kwb.en13508.2:::setGlobalInspectionID expect_error(f()) + file <- file.path(tempdir(), "duplicates.txt") + + expect_error(f( + inspection.data = list( + inspections = data.frame( + inspection_date = "2024-01-03", + inspection_time = "22:22", + node_1_ref = "A", + node_2_ref = "B" + )[c(1, 1), ], + observations = data.frame( + inspno = 1L + ), + header.info = list() + ), + project = "Lausanne", + name.convention = "snake", + file = file + )) + + expect_true(file.exists(file)) }) From 2ee4a2c324bd8f302e70d302dcaec03cfa3d0f39 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 4 Jan 2024 17:01:12 +0100 Subject: [PATCH 119/141] Import capture.output() --- NAMESPACE | 1 + R/setGlobalInspectionID.R | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 98bbc30..3c3bf7d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,3 +22,4 @@ importFrom(kwb.utils,setColumns) importFrom(kwb.utils,stopFormatted) importFrom(kwb.utils,stringList) importFrom(kwb.utils,substSpecialChars) +importFrom(utils,capture.output) diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index dabf66b..b79ac02 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -130,6 +130,8 @@ setGlobalInspectionID <- function( } # stop_on_hash_duplicates ------------------------------------------------------ + +#'@importFrom utils capture.output stop_on_hash_duplicates <- function(hashes, file = NULL) { if (identical(kwb.utils::removeAttributes(hashes), -1L)) { @@ -139,7 +141,7 @@ stop_on_hash_duplicates <- function(hashes, file = NULL) if (is.null(file)) { print(duplicates) } else { - writeLines(capture.output(print(duplicates)), file) + writeLines(utils::capture.output(print(duplicates)), file) } stop( From f3b82bbc4783518f7dabcea6bebdce947be23e66 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 4 Jan 2024 17:09:05 +0100 Subject: [PATCH 120/141] Add argument "error.file", rename from "file" --- R/readAndMergeEuCodedFiles.R | 8 ++++++-- R/setGlobalInspectionID.R | 16 ++++++++-------- man/readAndMergeEuCodedFiles.Rd | 6 +++++- man/setGlobalInspectionID.Rd | 6 +++--- .../test-function-setGlobalInspectionID.R | 6 +++--- 5 files changed, 25 insertions(+), 17 deletions(-) diff --git a/R/readAndMergeEuCodedFiles.R b/R/readAndMergeEuCodedFiles.R index cf119f1..e678395 100644 --- a/R/readAndMergeEuCodedFiles.R +++ b/R/readAndMergeEuCodedFiles.R @@ -16,6 +16,8 @@ #' @param project name of project to which the data are related, such as: #' "Lausanne" #' @param default.time passed to \code{\link{setGlobalInspectionID}} +#' @param error.file optional. Path to error file, passed to +#' \code{\link{setGlobalInspectionID}}. #' @export #' readAndMergeEuCodedFiles <- function( @@ -25,7 +27,8 @@ readAndMergeEuCodedFiles <- function( ..., add.inspid = FALSE, project = NULL, - default.time = "22:22" + default.time = "22:22", + error.file = NULL ) { # by setting simple.algorithm = FALSE we get unique column names, e.g. "ADE" @@ -45,7 +48,8 @@ readAndMergeEuCodedFiles <- function( result, project, default.time = default.time, - name.convention = name.convention + name.convention = name.convention, + error.file = error.file ) } diff --git a/R/setGlobalInspectionID.R b/R/setGlobalInspectionID.R index b79ac02..fe854b5 100644 --- a/R/setGlobalInspectionID.R +++ b/R/setGlobalInspectionID.R @@ -12,8 +12,8 @@ #' seconds, just to increase the chance that setting the time is enough to #' generate a unique key. #' @param name.convention one of \code{c("norm", "camel", "snake")} -#' @param file optional. Path to file to which duplicates are are written (if -#' any). Default: \code{"setGlobalInspectionID_duplicates.txt"} +#' @param error.file optional. Path to file to which duplicates are are written +#' (if any). Default: \code{"setGlobalInspectionID_duplicates.txt"} #' @return list with the same elements as in \code{inspection.data} but with #' columns \code{inspid} being added to the data frames "inspections" and #' "observations" @@ -22,7 +22,7 @@ setGlobalInspectionID <- function( project = NULL, default.time = "22:22", name.convention = "norm", - file = NULL + error.file = NULL ) { if (is.null(project)) { @@ -107,7 +107,7 @@ setGlobalInspectionID <- function( ) # Check for duplicates in the hashes - stop_on_hash_duplicates(hashes, file = file) + stop_on_hash_duplicates(hashes, error.file = error.file) inspections[["inspection_id"]] <- hashes @@ -132,21 +132,21 @@ setGlobalInspectionID <- function( # stop_on_hash_duplicates ------------------------------------------------------ #'@importFrom utils capture.output -stop_on_hash_duplicates <- function(hashes, file = NULL) +stop_on_hash_duplicates <- function(hashes, error.file = NULL) { if (identical(kwb.utils::removeAttributes(hashes), -1L)) { duplicates <- kwb.utils::getAttribute(hashes, "duplicates") - if (is.null(file)) { + if (is.null(error.file)) { print(duplicates) } else { - writeLines(utils::capture.output(print(duplicates)), file) + writeLines(utils::capture.output(print(duplicates)), error.file) } stop( "There were duplicates in the key columns (see ", - ifelse(is.null(file), "above", dQuote(file, '"')), + ifelse(is.null(error.file), "above", dQuote(error.file, '"')), ").", call. = FALSE ) diff --git a/man/readAndMergeEuCodedFiles.Rd b/man/readAndMergeEuCodedFiles.Rd index 2b785a9..b768bd6 100644 --- a/man/readAndMergeEuCodedFiles.Rd +++ b/man/readAndMergeEuCodedFiles.Rd @@ -11,7 +11,8 @@ readAndMergeEuCodedFiles( ..., add.inspid = FALSE, project = NULL, - default.time = "22:22" + default.time = "22:22", + error.file = NULL ) } \arguments{ @@ -32,6 +33,9 @@ unique inspection ID (inspid) is added to the data frames in elements "Lausanne"} \item{default.time}{passed to \code{\link{setGlobalInspectionID}}} + +\item{error.file}{optional. Path to error file, passed to +\code{\link{setGlobalInspectionID}}.} } \description{ Read files in EN13508.2-format using \code{\link{readEuCodedFiles}} and merge diff --git a/man/setGlobalInspectionID.Rd b/man/setGlobalInspectionID.Rd index 3691dae..ce89c19 100644 --- a/man/setGlobalInspectionID.Rd +++ b/man/setGlobalInspectionID.Rd @@ -9,7 +9,7 @@ setGlobalInspectionID( project = NULL, default.time = "22:22", name.convention = "norm", - file = NULL + error.file = NULL ) } \arguments{ @@ -26,8 +26,8 @@ generate a unique key.} \item{name.convention}{one of \code{c("norm", "camel", "snake")}} -\item{file}{optional. Path to file to which duplicates are are written (if -any). Default: \code{"setGlobalInspectionID_duplicates.txt"}} +\item{error.file}{optional. Path to file to which duplicates are are written +(if any). Default: \code{"setGlobalInspectionID_duplicates.txt"}} } \value{ list with the same elements as in \code{inspection.data} but with diff --git a/tests/testthat/test-function-setGlobalInspectionID.R b/tests/testthat/test-function-setGlobalInspectionID.R index 8c8996c..bfc2607 100644 --- a/tests/testthat/test-function-setGlobalInspectionID.R +++ b/tests/testthat/test-function-setGlobalInspectionID.R @@ -6,7 +6,7 @@ test_that("setGlobalInspectionID() works", { expect_error(f()) - file <- file.path(tempdir(), "duplicates.txt") + error.file <- file.path(tempdir(), "duplicates.txt") expect_error(f( inspection.data = list( @@ -23,8 +23,8 @@ test_that("setGlobalInspectionID() works", { ), project = "Lausanne", name.convention = "snake", - file = file + error.file = error.file )) - expect_true(file.exists(file)) + expect_true(file.exists(error.file)) }) From ba3320571406703dc223c7dd1e283001dcfc45c8 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 22 Mar 2024 13:30:29 +0100 Subject: [PATCH 121/141] Add argument "naToEmpty" --- R/readAndMergeEuCodedFiles.R | 13 +++++++++---- man/mergeInspectionData.Rd | 6 +++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/R/readAndMergeEuCodedFiles.R b/R/readAndMergeEuCodedFiles.R index e678395..b61fa68 100644 --- a/R/readAndMergeEuCodedFiles.R +++ b/R/readAndMergeEuCodedFiles.R @@ -65,13 +65,16 @@ readAndMergeEuCodedFiles <- function( #' @param x list of elements each of which represents inspection data read from #' an EN13508.2-encoded file by means of \code{\link{readEuCodedFile}}. #' @param warn logical indicating whether to warn about different header -#' information. By default, warnings are not shown. +#' information. By default, warnings are not shown. +#' @param naToEmpty logical indicating whether or not to replace \code{NA} with +#' an empty string constant \code{""} in all columns of type character. The +#' default is \code{TRUE}. #' @return list with elements \code{header.info}, \code{inspections}, #' \code{observations}. #' #' @export #' -mergeInspectionData <- function(x, warn = FALSE) +mergeInspectionData <- function(x, warn = FALSE, naToEmpty = TRUE) { if (length(x) == 1L) { return (x[[1L]]) @@ -105,8 +108,10 @@ mergeInspectionData <- function(x, warn = FALSE) observations <- kwb.utils::safeRowBindAll(observations) # Replace NA with "" in columns of type character - inspections <- replaceNaWithEmptyStringInCharColumns(inspections) - observations <- replaceNaWithEmptyStringInCharColumns(observations) + if (naToEmpty) { + inspections <- replaceNaWithEmptyStringInCharColumns(inspections) + observations <- replaceNaWithEmptyStringInCharColumns(observations) + } list( header.info = header.info, diff --git a/man/mergeInspectionData.Rd b/man/mergeInspectionData.Rd index badc0d6..d6f2bc1 100644 --- a/man/mergeInspectionData.Rd +++ b/man/mergeInspectionData.Rd @@ -4,7 +4,7 @@ \alias{mergeInspectionData} \title{Merge Inspection Data} \usage{ -mergeInspectionData(x, warn = FALSE) +mergeInspectionData(x, warn = FALSE, naToEmpty = TRUE) } \arguments{ \item{x}{list of elements each of which represents inspection data read from @@ -12,6 +12,10 @@ an EN13508.2-encoded file by means of \code{\link{readEuCodedFile}}.} \item{warn}{logical indicating whether to warn about different header information. By default, warnings are not shown.} + +\item{naToEmpty}{logical indicating whether or not to replace \code{NA} with +an empty string constant \code{""} in all columns of type character. The +default is \code{TRUE}.} } \value{ list with elements \code{header.info}, \code{inspections}, From 445cf13f01fc1ffeccda5c097a257edf53933f6b Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 22 Mar 2024 17:17:03 +0100 Subject: [PATCH 122/141] Allow to specify the file encoding --- R/readEuCodedFile.R | 58 ++++++++++++++++++++++++++++++++---------- man/readEuCodedFile.Rd | 8 +++++- 2 files changed, 51 insertions(+), 15 deletions(-) diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index 94f4e49..6c38fd6 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -6,7 +6,11 @@ #' #' @param input.file full path to text file containing CCTV inspection results #' in the format described in DIN EN 13508-2 -#' @param encoding default: "latin1" +#' @param encoding default: "latin1", passed to \code{\link{readLines}}, see +#' there. +#' @param file.encoding Encoding to be assumed for the \code{input.file}. +#' The default is \code{NULL} in which case the name of the encoding is read +#' from the \code{#A1} field of the \code{input.file}. #' @param read.inspections if \code{TRUE}, general inspection data (in #' #B-blocks) are read, otherwise skipped (use if function fails) #' @param name.convention one of \code{c("norm", "camel", "snake")} specifying @@ -29,26 +33,37 @@ #' @export #' readEuCodedFile <- function( - input.file, - encoding = "latin1", - read.inspections = TRUE, - name.convention = c("norm", "camel", "snake")[1L], - simple.algorithm = TRUE, - warn = TRUE, - dbg = TRUE, - ... + input.file, + encoding = "latin1", + file.encoding = NULL, + read.inspections = TRUE, + name.convention = c("norm", "camel", "snake")[1L], + simple.algorithm = TRUE, + warn = TRUE, + dbg = TRUE, + ... ) { #kwb.utils::assignArgumentDefaults(kwb.en13508.2::readEuCodedFile) #kwb.utils::assignPackageObjects("kwb.en13508.2") - + name.convention <- match.arg(name.convention, c("norm", "camel", "snake")) run <- function(...) kwb.utils::catAndRun(dbg = dbg, ...) + # If not explicitly given, use the encoding as given in the #A1 header + if (is.null(file.encoding)) { + file.encoding <- getFileEncoding(input.file) + } + eu_lines <- run( - paste("Reading input file", input.file), - readLines(input.file, encoding = encoding, warn = FALSE) + sprintf("Reading %s assuming %s encoding", input.file, file.encoding), + kwb.utils::readLinesWithEncoding( + file = input.file, + fileEncoding = file.encoding, + encoding = encoding, + warn = FALSE + ) ) eu_lines <- run( @@ -78,7 +93,7 @@ readEuCodedFile <- function( ... ) ) - + if (name.convention != "norm") { snake.case <- name.convention == "snake" inspections <- applyNameConvention(inspections, snake.case) @@ -92,7 +107,22 @@ readEuCodedFile <- function( ) } -# applyNameConvention ---------------------------------------------------- +# getFileEncoding -------------------------------------------------------------- +getFileEncoding <- function(file) +{ + encoding <- kwb.utils::selectElements( + x = getFileHeaderFromEuLines(readLines(kwb.utils::safePath(file), n = 6L)), + elements = "encoding" + ) + + if (!encoding %in% (available <- iconvlist())) { + stop(kwb.utils::noSuchElements(encoding, available, "encoding string")) + } + + encoding +} + +# applyNameConvention ---------------------------------------------------------- applyNameConvention <- function(x, snake.case = FALSE) { result <- kwb.utils::renameColumns(x, renamings = readRenamings( diff --git a/man/readEuCodedFile.Rd b/man/readEuCodedFile.Rd index 240c3b5..5295976 100644 --- a/man/readEuCodedFile.Rd +++ b/man/readEuCodedFile.Rd @@ -7,6 +7,7 @@ readEuCodedFile( input.file, encoding = "latin1", + file.encoding = NULL, read.inspections = TRUE, name.convention = c("norm", "camel", "snake")[1L], simple.algorithm = TRUE, @@ -19,7 +20,12 @@ readEuCodedFile( \item{input.file}{full path to text file containing CCTV inspection results in the format described in DIN EN 13508-2} -\item{encoding}{default: "latin1"} +\item{encoding}{default: "latin1", passed to \code{\link{readLines}}, see +there.} + +\item{file.encoding}{Encoding to be assumed for the \code{input.file}. +The default is \code{NULL} in which case the name of the encoding is read +from the \code{#A1} field of the \code{input.file}.} \item{read.inspections}{if \code{TRUE}, general inspection data (in #B-blocks) are read, otherwise skipped (use if function fails)} From dae5c73422de6e4157517f917f288a99d6313e83 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 22 Mar 2024 17:30:02 +0100 Subject: [PATCH 123/141] Rename getFileEncoding() to readFileEncodingFromHeader() and check "file.encoding" for allowed string values --- R/readEuCodedFile.R | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index 6c38fd6..4cee9eb 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -53,9 +53,11 @@ readEuCodedFile <- function( # If not explicitly given, use the encoding as given in the #A1 header if (is.null(file.encoding)) { - file.encoding <- getFileEncoding(input.file) + file.encoding <- readFileEncodingFromHeader(input.file) } + stopOnInvalidEncoding(file.encoding) + eu_lines <- run( sprintf("Reading %s assuming %s encoding", input.file, file.encoding), kwb.utils::readLinesWithEncoding( @@ -107,19 +109,21 @@ readEuCodedFile <- function( ) } -# getFileEncoding -------------------------------------------------------------- -getFileEncoding <- function(file) +# readFileEncodingFromHeader --------------------------------------------------- +readFileEncodingFromHeader <- function(file) { - encoding <- kwb.utils::selectElements( + kwb.utils::selectElements( x = getFileHeaderFromEuLines(readLines(kwb.utils::safePath(file), n = 6L)), elements = "encoding" ) - +} + +# stopOnInvalidEncoding -------------------------------------------------------- +stopOnInvalidEncoding <- function(encoding) +{ if (!encoding %in% (available <- iconvlist())) { stop(kwb.utils::noSuchElements(encoding, available, "encoding string")) } - - encoding } # applyNameConvention ---------------------------------------------------------- From a5df320ff64f1c0cb994ebc211521ced393470e9 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 22 Mar 2024 17:58:56 +0100 Subject: [PATCH 124/141] Change default of "encoding" to "unknown" --- R/readEuCodedFile.R | 4 ++-- man/readEuCodedFile.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index 4cee9eb..5df95ca 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -6,7 +6,7 @@ #' #' @param input.file full path to text file containing CCTV inspection results #' in the format described in DIN EN 13508-2 -#' @param encoding default: "latin1", passed to \code{\link{readLines}}, see +#' @param encoding default: "unknown", passed to \code{\link{readLines}}, see #' there. #' @param file.encoding Encoding to be assumed for the \code{input.file}. #' The default is \code{NULL} in which case the name of the encoding is read @@ -34,7 +34,7 @@ #' readEuCodedFile <- function( input.file, - encoding = "latin1", + encoding = "unknown", file.encoding = NULL, read.inspections = TRUE, name.convention = c("norm", "camel", "snake")[1L], diff --git a/man/readEuCodedFile.Rd b/man/readEuCodedFile.Rd index 5295976..d07fa93 100644 --- a/man/readEuCodedFile.Rd +++ b/man/readEuCodedFile.Rd @@ -6,7 +6,7 @@ \usage{ readEuCodedFile( input.file, - encoding = "latin1", + encoding = "unknown", file.encoding = NULL, read.inspections = TRUE, name.convention = c("norm", "camel", "snake")[1L], @@ -20,7 +20,7 @@ readEuCodedFile( \item{input.file}{full path to text file containing CCTV inspection results in the format described in DIN EN 13508-2} -\item{encoding}{default: "latin1", passed to \code{\link{readLines}}, see +\item{encoding}{default: "unknown", passed to \code{\link{readLines}}, see there.} \item{file.encoding}{Encoding to be assumed for the \code{input.file}. From 198bef1ff5fa5ac7f52672765cc0c8bba376b98b Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 26 Mar 2024 16:41:35 +0100 Subject: [PATCH 125/141] Replace encoding iso-8859-1:1998 with latin1 and add argument "check.encoding" --- NAMESPACE | 3 +-- R/readEuCodedFile.R | 16 ++++++++++++++-- man/readEuCodedFile.Rd | 6 ++++++ 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3c3bf7d..5f80d97 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,12 +11,11 @@ export(readEuCodedFile) export(readEuCodedFiles) export(writeEuCodedFile) export(writeEuCodedFiles) -importFrom(kwb.utils,.logok) -importFrom(kwb.utils,.logstart) importFrom(kwb.utils,catAndRun) importFrom(kwb.utils,catIf) importFrom(kwb.utils,isTryError) importFrom(kwb.utils,orderBy) +importFrom(kwb.utils,readLinesWithEncoding) importFrom(kwb.utils,selectColumns) importFrom(kwb.utils,setColumns) importFrom(kwb.utils,stopFormatted) diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index 5df95ca..aaeb7c4 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -25,11 +25,15 @@ #' @param warn if \code{TRUE}, warnings are shown (e.g. if not all #A-header #' fields were found) #' @param dbg if \code{TRUE}, debug messages are shown, else not +#' @param check.encoding logical indicating whether or not to check if the +#' encoding string that is given in the \code{#A1} header of the file is +#' "known". The default is \code{TRUE}, i.e. the check is performed and an +#' error is thrown if the encoding is not in the list of known encodings. #' @param \dots further arguments to be passed to #' \code{kwb.en13508.2:::getObservationRecordsFromEuLines} #' @return list with elements \code{header.info}, \code{inspections}, #' \code{observations} -#' @importFrom kwb.utils catAndRun catIf isTryError .logstart .logok +#' @importFrom kwb.utils catAndRun readLinesWithEncoding #' @export #' readEuCodedFile <- function( @@ -41,6 +45,7 @@ readEuCodedFile <- function( simple.algorithm = TRUE, warn = TRUE, dbg = TRUE, + check.encoding = TRUE, ... ) { @@ -54,9 +59,16 @@ readEuCodedFile <- function( # If not explicitly given, use the encoding as given in the #A1 header if (is.null(file.encoding)) { file.encoding <- readFileEncodingFromHeader(input.file) + + # Replace "iso-8859-1:1998" with "latin1" + # (see https://de.wikipedia.org/wiki/ISO_8859-1: + # "ISO 8859-1, genauer ISO/IEC 8859-1, auch bekannt als Latin-1 [...]") + file.encoding <- gsub("^iso-8859-1:1998$", "latin1", file.encoding) } - stopOnInvalidEncoding(file.encoding) + if (check.encoding) { + stopOnInvalidEncoding(file.encoding) + } eu_lines <- run( sprintf("Reading %s assuming %s encoding", input.file, file.encoding), diff --git a/man/readEuCodedFile.Rd b/man/readEuCodedFile.Rd index d07fa93..a83c066 100644 --- a/man/readEuCodedFile.Rd +++ b/man/readEuCodedFile.Rd @@ -13,6 +13,7 @@ readEuCodedFile( simple.algorithm = TRUE, warn = TRUE, dbg = TRUE, + check.encoding = TRUE, ... ) } @@ -46,6 +47,11 @@ fields were found)} \item{dbg}{if \code{TRUE}, debug messages are shown, else not} +\item{check.encoding}{logical indicating whether or not to check if the +encoding string that is given in the \code{#A1} header of the file is +"known". The default is \code{TRUE}, i.e. the check is performed and an +error is thrown if the encoding is not in the list of known encodings.} + \item{\dots}{further arguments to be passed to \code{kwb.en13508.2:::getObservationRecordsFromEuLines}} } From 4f5a0ca553c8d8d439505d86b5f1db910915cfb0 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 26 Mar 2024 16:44:23 +0100 Subject: [PATCH 126/141] Add latest change as NEWS entry --- NEWS.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 35bb3ae..b447b40 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,13 +1,10 @@ # Latest changes -* * Harmonise with [kwb.pkgbuild](https://kwb-r.github.io/kwb.pkgbuild) +* Harmonise with [kwb.pkgbuild](https://kwb-r.github.io/kwb.pkgbuild) +* Add argument check.encoding to eadEuCodedFile() # kwb.en13508.2 0.2.0.9000 * getObservationsFromEuLines: Order columns by name, put column “inspno” first. - * Added a NEWS.md file to track changes to the package. - * see http://style.tidyverse.org/news.html for writing a good NEWS.md - - From 6b4575c1bccdfdfa6f9e7a1e66c429a110e748a4 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 26 Mar 2024 17:48:11 +0100 Subject: [PATCH 127/141] Replace also "ISO-8859-1" with "latin1" --- R/readEuCodedFile.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index aaeb7c4..f990cb3 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -63,7 +63,12 @@ readEuCodedFile <- function( # Replace "iso-8859-1:1998" with "latin1" # (see https://de.wikipedia.org/wiki/ISO_8859-1: # "ISO 8859-1, genauer ISO/IEC 8859-1, auch bekannt als Latin-1 [...]") - file.encoding <- gsub("^iso-8859-1:1998$", "latin1", file.encoding) + file.encoding <- gsub( + "^iso-8859-1(:1998)?$", + "latin1", + file.encoding, + ignore.case = TRUE + ) } if (check.encoding) { From c1f33c5134d5e0b8e2f0435d89aa3d68bc76d099 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 26 Mar 2024 18:54:26 +0100 Subject: [PATCH 128/141] Accept "latin1" as encoding string --- R/readEuCodedFile.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/readEuCodedFile.R b/R/readEuCodedFile.R index f990cb3..5ae611e 100644 --- a/R/readEuCodedFile.R +++ b/R/readEuCodedFile.R @@ -138,7 +138,7 @@ readFileEncodingFromHeader <- function(file) # stopOnInvalidEncoding -------------------------------------------------------- stopOnInvalidEncoding <- function(encoding) { - if (!encoding %in% (available <- iconvlist())) { + if (!encoding %in% (available <- c("latin1", iconvlist()))) { stop(kwb.utils::noSuchElements(encoding, available, "encoding string")) } } From 83294a2c805d96d2bf7bffb7fa7b2e04077ad6c7 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Mar 2024 11:23:58 +0100 Subject: [PATCH 129/141] Use "L" to indicate "integer" (long) --- R/getInspectionRecords_v2.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/getInspectionRecords_v2.R b/R/getInspectionRecords_v2.R index 28b8b60..6e28fca 100644 --- a/R/getInspectionRecords_v2.R +++ b/R/getInspectionRecords_v2.R @@ -260,7 +260,7 @@ cleanDuplicatedColumns <- function(x) #message("There are columns with suffixes '.x' or '.y'") - if (length(indices[[1]]) != length(indices[[2]])) stop( + if (length(indices[[1L]]) != length(indices[[2L]])) stop( "Missing columns with suffix '.x' or '.y':\n", kwb.utils::stringList(captions[all_indices]) ) From 4ab30eb466648cc56ac53927ec17355c535fcc64 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Mar 2024 11:24:50 +0100 Subject: [PATCH 130/141] Add Roxygen export directives --- R/getInspectionRecords_v2.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/getInspectionRecords_v2.R b/R/getInspectionRecords_v2.R index 6e28fca..d479baa 100644 --- a/R/getInspectionRecords_v2.R +++ b/R/getInspectionRecords_v2.R @@ -1,4 +1,5 @@ # getInspectionRecords_v2 ------------------------------------------------------ +#' @importFrom kwb.utils removeColumns getInspectionRecords_v2 <- function( eu_lines, header.info, dbg = TRUE, version = 2L ) @@ -27,7 +28,7 @@ getInspectionRecords_v2 <- function( } # extractInspectionBlocks ------------------------------------------------------ -#' @importFrom kwb.utils isTryError +#' @importFrom kwb.utils collapsed isTryError stopFormatted extractInspectionBlocks <- function( eu_lines, headerInfos, sep, dec, quote, dbg = TRUE ) @@ -78,6 +79,7 @@ extractInspectionBlocks <- function( } # textblockToDataframe --------------------------------------------------------- +#' @importFrom kwb.utils makeUnique setColumns stopFormatted stringList textblockToDataframe <- function( textblock, sep, dec, quote, captionLine, rowNumbers, dbg = TRUE ) @@ -165,6 +167,7 @@ getColumnsToRemove <- function(x, captions, duplicates, dbg = TRUE) } # mergeInspectionBlocks -------------------------------------------------------- +#' @importFrom kwb.utils hsSafeName moveToFront safeRowBindAll stringList mergeInspectionBlocks <- function(inspectionBlocks) { indices <- seq_along(inspectionBlocks) @@ -218,6 +221,7 @@ mergeInspectionBlocks <- function(inspectionBlocks) } # removeDuplicatedColumns ------------------------------------------------------ +#' @importFrom kwb.utils stringList removeDuplicatedColumns <- function(x, dbg = TRUE) { captions <- names(x) @@ -249,6 +253,7 @@ removeDuplicatedColumns <- function(x, dbg = TRUE) } # cleanDuplicatedColumns ------------------------------------------------------- +#' @importFrom kwb.utils removeExtension stringList cleanDuplicatedColumns <- function(x) { captions <- names(x) From 078d23d4465e7803183d2777a1b8a202d16b629d Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Mar 2024 11:26:23 +0100 Subject: [PATCH 131/141] Use kwb.utils::collapsed() --- R/getInspectionRecords_v2.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/getInspectionRecords_v2.R b/R/getInspectionRecords_v2.R index d479baa..f1107d0 100644 --- a/R/getInspectionRecords_v2.R +++ b/R/getInspectionRecords_v2.R @@ -59,7 +59,7 @@ extractInspectionBlocks <- function( ), i, kwb.utils::collapsed(row_numbers, ", "), - paste(textblock, collapse = "\n"), + kwb.utils::collapsed(textblock, "\n"), attr(x, "condition")$message ) } From fd6ef99be226aa302ba4dfafb927cb0923bacded Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Mar 2024 11:27:19 +0100 Subject: [PATCH 132/141] Use normal output instead of message --- R/getInspectionRecords_v2.R | 29 ++++++++++--------- .../test-function-cleanDuplicatedColumns.R | 4 ++- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/R/getInspectionRecords_v2.R b/R/getInspectionRecords_v2.R index f1107d0..6339d48 100644 --- a/R/getInspectionRecords_v2.R +++ b/R/getInspectionRecords_v2.R @@ -125,13 +125,16 @@ textblockToDataframe <- function( } # getColumnsToRemove ----------------------------------------------------------- +#' @importFrom kwb.utils allAreEqual catIf printIf stringList getColumnsToRemove <- function(x, captions, duplicates, dbg = TRUE) { + catIf <- kwb.utils::catIf + columnsToRemove <- numeric() for (duplicate in duplicates) { - message("Column '", duplicate, "' exists multiple times!") + catIf(dbg, sprintf("Column '%s' exists multiple times!\n", duplicate)) columns <- which(captions == duplicate) @@ -139,25 +142,25 @@ getColumnsToRemove <- function(x, captions, duplicates, dbg = TRUE) if (all(allEqualInRow)) { - columnsToRemove <- c(columnsToRemove, columns[-1]) + columnsToRemove <- c(columnsToRemove, columns[-1L]) - message( - "For each row, the values in the duplicated rows are equal ", - "-> I removed the duplicated columns!" + catIf( + dbg, "For each row, the values in the duplicated rows are equal ", + "-> I removed the duplicated columns.\n" ) if (dbg) { - - cat("The values in the duplicated columns are:\n") - - x.output <- x[, columns] - - print(x.output[! duplicated(x.output), ]) + x.out <- x[, columns] + kwb.utils::printIf( + TRUE, + x = x.out[! duplicated(x.out), ], + caption = "The values in the duplicated columns are" + ) } } - kwb.utils::catIf( - dbg && length(columnsToRemove) > 0, + catIf( + dbg && length(columnsToRemove), "columnsToRemove:", kwb.utils::stringList(columnsToRemove), "\n" ) } diff --git a/tests/testthat/test-function-cleanDuplicatedColumns.R b/tests/testthat/test-function-cleanDuplicatedColumns.R index c3bcf47..f94e357 100644 --- a/tests/testthat/test-function-cleanDuplicatedColumns.R +++ b/tests/testthat/test-function-cleanDuplicatedColumns.R @@ -1,3 +1,5 @@ +#library(testthat) + test_that("cleanDuplicatedColumns() works", { f <- kwb.en13508.2:::cleanDuplicatedColumns @@ -6,7 +8,7 @@ test_that("cleanDuplicatedColumns() works", { x <- data.frame(a.x = 1:2, a.y = 1:2, id = 1:2) - expect_output(result <- f(x)) + expect_message(capture.output(result <- f(x))) expect_identical(result, stats::setNames(x[, -2L, ], c("a", "id"))) }) From f3c8c13af042eef9f7c81a62d5675ef0d0092929 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Mar 2024 11:27:30 +0100 Subject: [PATCH 133/141] Update NAMESPACE --- NAMESPACE | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 5f80d97..0017686 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,11 +11,20 @@ export(readEuCodedFile) export(readEuCodedFiles) export(writeEuCodedFile) export(writeEuCodedFiles) +importFrom(kwb.utils,allAreEqual) importFrom(kwb.utils,catAndRun) importFrom(kwb.utils,catIf) +importFrom(kwb.utils,collapsed) +importFrom(kwb.utils,hsSafeName) importFrom(kwb.utils,isTryError) +importFrom(kwb.utils,makeUnique) +importFrom(kwb.utils,moveToFront) importFrom(kwb.utils,orderBy) +importFrom(kwb.utils,printIf) importFrom(kwb.utils,readLinesWithEncoding) +importFrom(kwb.utils,removeColumns) +importFrom(kwb.utils,removeExtension) +importFrom(kwb.utils,safeRowBindAll) importFrom(kwb.utils,selectColumns) importFrom(kwb.utils,setColumns) importFrom(kwb.utils,stopFormatted) From ea7df69c89b89d6aa2fc5b66995623e266746448 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Mar 2024 11:48:01 +0100 Subject: [PATCH 134/141] Try to find out why GitHub Actions fail --- tests/testthat/test-function-setGlobalInspectionID.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-function-setGlobalInspectionID.R b/tests/testthat/test-function-setGlobalInspectionID.R index bfc2607..78128c3 100644 --- a/tests/testthat/test-function-setGlobalInspectionID.R +++ b/tests/testthat/test-function-setGlobalInspectionID.R @@ -1,4 +1,4 @@ -library(testthat) +#library(testthat) test_that("setGlobalInspectionID() works", { @@ -8,7 +8,7 @@ test_that("setGlobalInspectionID() works", { error.file <- file.path(tempdir(), "duplicates.txt") - expect_error(f( + expect_error(regexp = "There were duplicates", f( inspection.data = list( inspections = data.frame( inspection_date = "2024-01-03", @@ -26,5 +26,13 @@ test_that("setGlobalInspectionID() works", { error.file = error.file )) + if (!file.exists(error.file)) { + message( + message("error.file does not exist: ", error.file) + "files in dirname(error.file): ", + kwb.utils::stringList(dir(dirname(error.file))) + ) + } + expect_true(file.exists(error.file)) }) From f417055eb462e414ae2938e6732de9a7fb5b6897 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Mar 2024 11:56:16 +0100 Subject: [PATCH 135/141] Try harder --- tests/testthat/test-function-setGlobalInspectionID.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-function-setGlobalInspectionID.R b/tests/testthat/test-function-setGlobalInspectionID.R index 78128c3..1ab605c 100644 --- a/tests/testthat/test-function-setGlobalInspectionID.R +++ b/tests/testthat/test-function-setGlobalInspectionID.R @@ -27,12 +27,12 @@ test_that("setGlobalInspectionID() works", { )) if (!file.exists(error.file)) { - message( - message("error.file does not exist: ", error.file) - "files in dirname(error.file): ", + stop( + "error.file does not exist: ", error.file, "\n", + "files in dirname(error.file):\n", kwb.utils::stringList(dir(dirname(error.file))) ) } - expect_true(file.exists(error.file)) + #expect_true(file.exists(error.file)) }) From d61b6fa5513ab393ba55b3a34f882803f8ca95ff Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 28 Mar 2024 13:48:37 +0100 Subject: [PATCH 136/141] Using kwb.utils >= v0.15.0 should have fixed it --- tests/testthat/test-function-setGlobalInspectionID.R | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/tests/testthat/test-function-setGlobalInspectionID.R b/tests/testthat/test-function-setGlobalInspectionID.R index 1ab605c..6333f41 100644 --- a/tests/testthat/test-function-setGlobalInspectionID.R +++ b/tests/testthat/test-function-setGlobalInspectionID.R @@ -26,13 +26,5 @@ test_that("setGlobalInspectionID() works", { error.file = error.file )) - if (!file.exists(error.file)) { - stop( - "error.file does not exist: ", error.file, "\n", - "files in dirname(error.file):\n", - kwb.utils::stringList(dir(dirname(error.file))) - ) - } - - #expect_true(file.exists(error.file)) + expect_true(file.exists(error.file)) }) From 74103c7011ff3f0e8de5ba1b9005f58738714d29 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 29 Mar 2024 07:04:12 +0100 Subject: [PATCH 137/141] Fix rotten comment --- R/readEuCodedFiles.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/readEuCodedFiles.R b/R/readEuCodedFiles.R index f15955f..9379a17 100644 --- a/R/readEuCodedFiles.R +++ b/R/readEuCodedFiles.R @@ -38,7 +38,7 @@ readEuCodedFiles <- function( silent = TRUE ) - # Return NULL if an error occurred + # Return the error object if an error occurred if (kwb.utils::isTryError(inspectionData)) { return(inspectionData) } From 7b7994604b0bbf586d6d6e8367f49ebccb214f39 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 29 Mar 2024 07:06:31 +0100 Subject: [PATCH 138/141] Add package version requirement in DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f56aa49..0e7d551 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,7 +17,7 @@ URL: https://github.com/kwb-r/kwb.en13508.2 BugReports: https://github.com/kwb-r/kwb.en13508.2/issues Imports: digest, - kwb.utils + kwb.utils (>= 0.15.0) Suggests: testthat (>= 3.0.0), ggplot2 From 79b8d662afd9cf28139806e4f8f4232eb83e81c5 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 29 Mar 2024 07:28:00 +0100 Subject: [PATCH 139/141] Update NEWS based on commits in PR #2 --- NEWS.md | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 107 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index b447b40..a924584 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,110 @@ -# Latest changes +## kwb.en13508.2 v0.3.0 (2024-03-28) -* Harmonise with [kwb.pkgbuild](https://kwb-r.github.io/kwb.pkgbuild) -* Add argument check.encoding to eadEuCodedFile() +### General + +* use GitHub Actions instead of Travis and AppVeyor +* rewrite README.md (where has the original content gone?) +* add tests +* move function definitions between files + +### Modifications to data files in the package + +* add material codes to eucodes.csv and save as UTF-8 +* add eucodes_full.csv file with main_code/char_1/char_2 combinations +* describe more fields in column-names.csv +* specify quantification 1 as numeric + +### Modifications to (exported or private) functions (in alphabetical order) + +* extractInspectionBlocks + - rename argument "quoteCharacter" to "quote" +* extractObservationBlocks() + - add a last "to" value if the last EU-line is not "#Z" +* extractObservationData() + - add arguments "file", "as.text" + - remove empty records from table of observations + - run readObservationsFromCsvText() with "colClasses" being set to the + expected column types +* get_code_meanings() + - rename to getCodeMeanings() +* getCodes() + - allow "table" to be a vector of table names + - stop if table name does not exist + - reset the row names +* getInspectionHeaderInfo() + - rename to getInspectionHeaderInfo_v1() +* getInspectionHeaderInfo2() + - rename to getInspectionHeaderInfo_v2() + - use new argument "version" instead of old argument "getInfo" +* getInspectionRecords_v2() + - use normal console output instead of message +* getInspectionsFromEuLines() + - add argument "dbg" to control whether to show a message or not + - rename to getInspectionRecords_v1() +* getInspectionsFromEuLines.new() + - rename to getInspectionRecords_v2() +* getObservationsFromEuLines() + - order columns by name, put column “inspno” first +* mergeInspectionData() + - add arguments "warn", "naToEmpty" +* readAndMergeEuCodedFiles() + - add arguments "add.inspid", "error.file" to optionally add globally unique + inspection IDs and to specify where to store related error messages + - add arguments "project", "default.time", "name.convention" + - clean code + - fix typo in warning +* readEuCodedFile() + - add arguments "name.convention", "file.encoding", "check.encoding" + - change default value of argument "encoding" from "latin1" to "unknown" + - use new function extractObservationData() to read observation data if the + original function getObservationsFromEuLines() fails + - pass "dbg" through to getInspectionsFromEuLines(), removeEmptyLines() + - refactor +* readEuCodedFiles() + - let "file" be the first column + - in the error message, report on files that failed to be imported +* readObservationsFromCsvText() + - reduce "colClasses" to columns that actually occur +* removeEmptyLines() + - clean code +* textblockToDataframe() + - rename argument "quoteCharacter" to "quote" +* warnOnDifferingHeaders() + - improve output -# kwb.en13508.2 0.2.0.9000 +### Add exported functions -* getObservationsFromEuLines: Order columns by name, put column “inspno” first. -* Added a NEWS.md file to track changes to the package. -* see http://style.tidyverse.org/news.html for writing a good NEWS.md +* getLineDamageInfo() + - moved from kwb.rerau + - allow for C-codes (change in continuous defect) in this function + +### Add private functions + +* createHashFromColumns() +* extdataFile() +* extractObservationBlocks() +* extractObservationData() +* getExampleData() +* getExampleFile() +* getFileHeaderFromEuLines() +* getHeaderInfo() +* getInspectionHeaderInfo2() +* getInspectionRecordsFromEuLines() +* getObservationRecordsFromEuLines() +* readFileEncodingFromHeader() +* readObservationsFromCsvText() +* setFilename() +* setGlobalInspectionID() +* toEuFormat() + +### Remove private functions + +* getHeaderLinesFromEuCodedLines() +* getHeaderInfroFromHeaderLines() +* order_by() + +## kwb.en13508.2 v0.2.0.9000 (2019-09-09) + +* Add a NEWS.md file to track changes to the package. + See http://style.tidyverse.org/news.html for writing a good NEWS.md +* Harmonise with [kwb.pkgbuild](https://kwb-r.github.io/kwb.pkgbuild) From 4d0bc9218029c02f3e960427e925f2e7090a8ebd Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 29 Mar 2024 09:02:00 +0100 Subject: [PATCH 140/141] Check and correct NEWS.md --- NEWS.md | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/NEWS.md b/NEWS.md index a924584..2b53aa1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,21 +10,13 @@ ### Modifications to data files in the package * add material codes to eucodes.csv and save as UTF-8 -* add eucodes_full.csv file with main_code/char_1/char_2 combinations -* describe more fields in column-names.csv -* specify quantification 1 as numeric +* add eucodes_full_de.csv file with main_code/char_1/char_2 combinations +* in eucodes_de.csv, specify quantification 1 as numeric ### Modifications to (exported or private) functions (in alphabetical order) * extractInspectionBlocks - rename argument "quoteCharacter" to "quote" -* extractObservationBlocks() - - add a last "to" value if the last EU-line is not "#Z" -* extractObservationData() - - add arguments "file", "as.text" - - remove empty records from table of observations - - run readObservationsFromCsvText() with "colClasses" being set to the - expected column types * get_code_meanings() - rename to getCodeMeanings() * getCodes() @@ -33,14 +25,9 @@ - reset the row names * getInspectionHeaderInfo() - rename to getInspectionHeaderInfo_v1() -* getInspectionHeaderInfo2() - - rename to getInspectionHeaderInfo_v2() - - use new argument "version" instead of old argument "getInfo" -* getInspectionRecords_v2() - - use normal console output instead of message * getInspectionsFromEuLines() - - add argument "dbg" to control whether to show a message or not - rename to getInspectionRecords_v1() + - add argument "dbg" to control whether to show a message or not * getInspectionsFromEuLines.new() - rename to getInspectionRecords_v2() * getObservationsFromEuLines() @@ -51,20 +38,17 @@ - add arguments "add.inspid", "error.file" to optionally add globally unique inspection IDs and to specify where to store related error messages - add arguments "project", "default.time", "name.convention" - - clean code - fix typo in warning + - clean code * readEuCodedFile() - add arguments "name.convention", "file.encoding", "check.encoding" - change default value of argument "encoding" from "latin1" to "unknown" - - use new function extractObservationData() to read observation data if the - original function getObservationsFromEuLines() fails - - pass "dbg" through to getInspectionsFromEuLines(), removeEmptyLines() + - use new function getObservationRecordsFromEuLines() + - pass "dbg" through to removeEmptyLines(), getInspectionRecordsFromEuLines() - refactor * readEuCodedFiles() - let "file" be the first column - in the error message, report on files that failed to be imported -* readObservationsFromCsvText() - - reduce "colClasses" to columns that actually occur * removeEmptyLines() - clean code * textblockToDataframe() @@ -83,16 +67,22 @@ * createHashFromColumns() * extdataFile() * extractObservationBlocks() + - add a last "to" value if the last EU-line is not "#Z" * extractObservationData() + - remove empty records from table of observations + - run readObservationsFromCsvText() with "colClasses" being set to the + expected column types or to "character" if as.text is TRUE * getExampleData() * getExampleFile() * getFileHeaderFromEuLines() * getHeaderInfo() -* getInspectionHeaderInfo2() +* getInspectionHeaderInfo_v2() * getInspectionRecordsFromEuLines() * getObservationRecordsFromEuLines() + - call extractObservationData() if calling getObservationsFromEuLines() fails * readFileEncodingFromHeader() * readObservationsFromCsvText() + - reduce "colClasses" to columns that actually occur * setFilename() * setGlobalInspectionID() * toEuFormat() @@ -100,7 +90,7 @@ ### Remove private functions * getHeaderLinesFromEuCodedLines() -* getHeaderInfroFromHeaderLines() +* getHeaderInfoFromHeaderLines() * order_by() ## kwb.en13508.2 v0.2.0.9000 (2019-09-09) From 3b9700232994bbaa7103866735709fdca94049f2 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 29 Mar 2024 09:09:54 +0100 Subject: [PATCH 141/141] Bump version, add hint on version requirement --- DESCRIPTION | 2 +- NEWS.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0e7d551..ab7cd69 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: kwb.en13508.2 Title: Read and Write CCTV Inspection Data According to Norm EN13508-2 -Version: 0.2.0.9000 +Version: 0.3.0 Authors@R: c(person(given = "Hauke", family = "Sonnenberg", diff --git a/NEWS.md b/NEWS.md index 2b53aa1..25ccd5a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ ### General +* require kwb.utils in version >= 0.15.0 * use GitHub Actions instead of Travis and AppVeyor * rewrite README.md (where has the original content gone?) * add tests