From 6d27139e93388e0d4d6bb15290b294ee71bb0d09 Mon Sep 17 00:00:00 2001 From: samgregory Date: Fri, 6 May 2022 15:45:15 +1000 Subject: [PATCH 1/7] Create emuR-metadata.R --- R/emuR-metadata.R | 567 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 567 insertions(+) create mode 100644 R/emuR-metadata.R diff --git a/R/emuR-metadata.R b/R/emuR-metadata.R new file mode 100644 index 00000000..d08987c0 --- /dev/null +++ b/R/emuR-metadata.R @@ -0,0 +1,567 @@ + +## Some constants +metadata.extension = "meta_json" + +## I will need this function until 0.9.0 of dplyr is released, +#possibly fixing the issue with all NA columns supplied to coalesce +# The implementation comes from https://stackoverflow.com/a/19254510 +coalesce <- function(...) { + Reduce(function(x, y) { + i <- which(is.na(x)) + x[i] <- y[i] + x}, + list(...)) +} + + +#' Functions for gathering metata specified for recordings in an emuR database. +#' +#' Metadata of a recording is stored in 'meta_json' files. Metadata may be set at the database, session and bundle level. +#' The functions goes through the database metadata file, session metadata files and metadata files associated with +#' each bundle, parses the JSON data and returns \code{\link[dplyr]{tibble}} with one row per bundle in the database. +#' Database default values are supressed by information set in a session metadata file, and session level data are in +#' turn surpressed by data given at the bundle level. +#' The structure of the metadata does not have to be consistent across meta_json files. +#' New columns are added to the as new fields are detected. +#' +#' The function \code{export_metadata} outputs the metadata as an Excel file instead, with bundle, session and database +#' tabs. The "bundle" tab gives the complete set of all the metadata that are active for each bundle, regardness where +#' it was set (for the bundle directly, or as a session / database default value). +#' +#' The user is expected to use the functions \code{export_metadata} and \code{import_metadata} to fix +#' accedental inconsistencies in the metadata of a database across bundles by exporting all +#' information to an Excel file using \code{export_metadata}, edit columns and values (including moving inconsistently +#' spelled metadata fields into a single column with the intended name) using Excel or another editor that complies with +#' the OOXML Workbook ISO/IEC 29500:2008 standard. The user is also expected to keep the indented structure of the Excel +#' file (one row per bundle or session, and each column except for those indicating session and bundle names containing +#' metadata), otherwise it is possible that the file may not be read in again by \code{\link{import_metadata}} to set +#' updated values. +#' +#' +#' @param emuDBhandle The database handle of an emuR database. +#' @param Excelfile The full path and file name of the Excel file that the metadata should be written to. The function will not overwrite this file, unless \code{overwrite} is set to \code{TRUE}. +#' @param overwrite The default behaviour is that an Excel file should not be +#' overwritten if it exists already. If this parameter is \code{TRUE} then the file will be overwritten. +#' +#' @rdname export_metadata +#' @export +#' +#' @return A data frame containing inforamtion about the 'meta_json' files found +#' \describe{ +#' \item{session}{The name of the session.} +#' \item{bundle}{The bundle name} +#' } +#' In addition, the \code{\link[dplyr]{tibble}} will contain one column for every type of information given in any of the 'meta_json' files. +#' +#' +#' @examples +#' \dontrun{ +#' create_emuRdemoData() +#' ae <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) +#' +#' # Database-wide default information +#' add_metadata(ae,list("Accent"="Northern","Elicitation"="Scripted")) +#' #Bundle specific information +#' add_metadata(ae,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") +#' get_metadata(ae) -> res +#' print(res) +#' rm(ae) +#' unlink(file.path(tempdir(),"emuR_demoData"),recursive=TRUE) +#' } +#' + +get_metadata <- function(emuDBhandle,overwrite=FALSE,session=".*"){ + res <- export_metadata(emuDBhandle=emuDBhandle,overwrite=overwrite) + return(res) +} + + +#' +#' @rdname export_metadata +#' @export +#' + +export_metadata <- function(emuDBhandle,Excelfile=NULL,overwrite=FALSE){ + #Start with checking consistency regarding output file + if(! overwrite && !is.null(Excelfile) && file.exists(Excelfile)){ + stop("Could not write output file ",Excelfile,": File exists but should not be overwritten.") + } + + check_emuDBhandle(emuDBhandle) + + bundles <- list_bundles(emuDBhandle) %>% + dplyr::rename(bundle=name) + metafiles <- list_files(emuDBhandle,fileExtension = metadata.extension) + #Use the bundle list as a scaffold for a data fram to hold the content of all metadata files + # metacontent <- metafiles[c("bundle","absolute_file_path")] + for(currFile in na.omit(metafiles$absolute_file_path)){ + jsonmeta <- jsonlite::read_json(currFile,simplifyVector = TRUE) + + # Now start inserting data from the metafiles + for(col in names(jsonmeta)){ + metafiles[metafiles$absolute_file_path == currFile,col] <- jsonmeta[[col]] + } + } + # Now make sure that all bundles have a row + metafiles <- bundles %>% + dplyr::left_join(metafiles,by=c("session","bundle")) %>% + dplyr::select(-file,-absolute_file_path) + + + # Include the possibility of having default meta data for a sessions (in a _ses folder) + sessJSONFiles <- list.files(file.path(emuDBhandle$basePath),pattern=paste0(".*.",metadata.extension),recursive = TRUE,full.names = FALSE) + + # Remove meta files associated with bundles + + sessJSONFiles <- sessJSONFiles[! grepl(bundle.dir.suffix,sessJSONFiles) & grepl(session.suffix,sessJSONFiles)] + + sessions <- list_sessions(emuDBhandle) %>% + dplyr::rename(session=name) + + # Run only if there are session metadata files + if(length(sessJSONFiles) > 0){ + sessJSONFilesDF <- data.frame(stringr::str_split(sessJSONFiles,pattern = .Platform$file.sep,simplify = TRUE),stringsAsFactors=FALSE) + names(sessJSONFilesDF) <- c("session","session_metadata_file") + # The session needs to be without suffix so that metadata may be joinded by session later + + sessJSONFilesDF$session <- gsub(paste0(session.suffix,"$"),"",sessJSONFilesDF$session) + + sessJSONFilesDF <- na.omit(sessJSONFilesDF) + + + for(row in 1:nrow(sessJSONFilesDF)){ + currFile <- as.vector(sessJSONFilesDF[[row,"session_metadata_file"]]) + currSession <- as.vector(sessJSONFilesDF[[row,"session"]]) + + currSessionDir <- paste0(currSession,session.suffix) + + + jsonmeta <- jsonlite::read_json(file.path(emuDBhandle$basePath,currSessionDir,currFile),simplifyVector = TRUE) + + # Now start inserting data from the session metadata file + for(col in names(jsonmeta)){ + sessJSONFilesDF[sessJSONFilesDF$session == currSession,col] <- jsonmeta[[col]] + } + } + + #Add session meta data to the workbook, + #or just empty sessions speficiations if there are no session metadata files + + sessJSONFilesDF <- sessions %>% + dplyr::left_join(sessJSONFilesDF,by="session") + + # Make the merger with bundle files to make the final output tibble + metafiles %>% + dplyr::left_join(sessJSONFilesDF,by="session",suffix=c("","_sessionmetadatafile")) %>% + dplyr::select(-session_metadata_file) -> metafiles + + } + + # Now check and load metadata set at the database level + + load_DBconfig(emuDBhandle) -> dbCfg + + if(is.null(dbCfg$metadataDefaults)){ + dbDefaults <- data.frame() + }else{ + dbDefaults <- as.data.frame(dbCfg$metadataDefaults,stringsAsFactors=FALSE) + if(length(dbDefaults) > 0){ + #This means that the field is not just empty + # Repeat the rows so that the columns may be merged + dbMeta <- as.data.frame(c(metafiles["bundle"],dbDefaults)) %>% + + dplyr::mutate_if(is.factor,as.character) + metafiles <- metafiles %>% + dplyr::mutate_if(is.factor,as.character)%>% + dplyr::left_join(dbMeta,by="bundle",suffix=c("","_database")) %>% + dplyr::distinct() ## This is needed since duplicate rows are introduced by the join by dbMeta + + + } + } + + #Now, there may be a metadata X column set at the bundle level, an X_sessionmetadatafile + # column set at the session level, and an X_database column for the whole database. + # These need to be reconsiled + + + cols <- names(metafiles) + duplicates <- grep("_(database|sessionmetadatafile)$",cols,value=TRUE) + duplicated <- unique(gsub("_(database|sessionmetadatafile)$","",cols)) + + + for(bundleoriginal in duplicated){ + + sessColName <- paste0(bundleoriginal,"_sessionmetadatafile") + sessVec <- ifelse(exists(sessColName,metafiles),metafiles[,sessColName],NA) + dbColName <- paste0(bundleoriginal,"_database") + dbVec <- ifelse(exists(dbColName,metafiles),metafiles[,dbColName],NA) + ## This seems odd, but it makes sure that NAs are repeated for the length of vectors. + tempDF <- data.frame(metafiles[[bundleoriginal]], + sessVec, + dbVec,stringsAsFactors = FALSE) %>% + + dplyr::mutate_if(is.factor,as.character) + + names(tempDF) <- c("bundle","session","database") + # Here the result is the first non-NA value for each row (or NA if the row in tempDF contains only NAs) + metafiles[bundleoriginal] <- with(tempDF,coalesce(bundle,session,database)) + # This works since you can always remove a column without an error message (even non-existing ones) + metafiles[sessColName] <- NULL + metafiles[dbColName] <- NULL + } + + #Prepare an Excel workbook, if one should be written + if(!is.null(Excelfile)){ + wb <- openxlsx::createWorkbook(paste(emuDBhandle$dbName,"bundle")) + openxlsx::addWorksheet(wb,"bundles") + openxlsx::writeDataTable(wb,"bundles",x=metafiles,keepNA = FALSE,withFilter=FALSE) + openxlsx::freezePane(wb,"bundles",firstActiveCol = 5) + openxlsx::setColWidths(wb,"bundles",cols=5:30,widths = 18) + + # session information + + openxlsx::addWorksheet(wb,"sessions") + openxlsx::writeDataTable(wb,"sessions",x=sessJSONFilesDF,keepNA = FALSE,withFilter=FALSE) + openxlsx::freezePane(wb,"sessions",firstActiveCol = 3) + openxlsx::setColWidths(wb,"sessions",cols=3:30,widths = 18) + #database defaults + openxlsx::addWorksheet(wb,"database") + openxlsx::writeDataTable(wb,"database",x=dbDefaults,keepNA = FALSE,withFilter=FALSE) + # We do not need to check owrwriting here as that is handled by saveWorkbook + openxlsx::saveWorkbook(wb,file=Excelfile,overwrite=overwrite) + } + + return(metafiles) + +} + +#' Functions to import or add metadata information to database bundles. +#' +#' The function takes an appropriately structured Excel file and uses the +#' information to set metadata for bundles. +#' +#' The first sheet ("bundles") in the Excel file should begin with the folowing two columns: +#' \itemize{ +#' \item session +#' \item bundle +#' } +#' and then go on to have some columns which contains the metadata. Each row in the +#' data contains the information and metadata for a bundle (in the specific session). +#' The simples way to get an appropriately structed Excel file is to create one from a database using the +#' \code{\link{export_metadata}} function on an existing database and given an output file. +#' +#' Please be aware that bundles that are speficied in the Excel file will have +#' their metadata files (ending with '.meta_json') overwritten when using the +#' \code{import_metadata}. So, please make sure to remove the rows of bundles that should +#' not be altered from the Excel file before importing the metadata from it using this function. +#' +#' Date and time fields are assumed to follow the ISO8601 specification, and an attempt to convert them to the +#' approprite JSON representation will be made. The user should be aware that this conversion is made however, and +#' watch out unexpected results in advanced cases. +#' +#' @param emuDBhandle The emuR database handle of the database. +#' @param Excelfile The path to a properly formated Excel (.xlsx) file. +#' +#' @return A vector of .meta_json files updated by the call. The path for each file is given relative to the base of the EmuR database. +#' @export +#' +import_metadata <- function(emuDBhandle,Excelfile){ + if(!file.exists(Excelfile)){ + stop("Unable to open the metadata Excel file.\nThe file ",Excelfile," does not exist!") + } + openxlsx::read.xlsx(Excelfile,sheet="bundles",detectDates=TRUE) -> meta + + #Make sure we have an output file with full path + meta <- meta %>% + + dplyr::mutate(metadatafile=file.path(emuDBhandle$basePath, + paste0(session,session.suffix), + paste0(bundle,bundle.dir.suffix), + paste0(bundle,".",metadata.extension)) + ) + #Now to the main business of the function + + json <- c() + for(r in 1:nrow(meta)){ + meta %>% + + dplyr::slice(r) %>% + dplyr::select(-session,-bundle,-metadatafile) %>% + dplyr::select_if(function(x) !is.na(x)) -> jsondat + currJSON <- ifelse(length(jsondat) > 0, + jsonlite::toJSON(jsondat,raw="base64",na="null",complex="string",factor="string",POSIXt="ISO8601",Date="ISO8601",null="null",dataframe = "rows"), + "[{}]" #Just an empty JSON vector + ) + json <- c(json, currJSON) + } + json <- data.frame("json"=json) + towrite <- meta %>% + + dplyr::bind_cols(json) %>% + dplyr::mutate(json=as.character(json)) %>% + dplyr::select(json,metadatafile) + + #Write the bundle metadata files + for(r in 1:nrow(towrite)){ + fileConn <- file(towrite[r,"metadatafile"]) + writeLines(towrite[r,"json"], fileConn) + close(fileConn) + } + bFiles <- gsub(paste0(emuDBhandle$basePath,"/"),"",towrite[["metadatafile"]]) + + ## Now process session metadata files + + openxlsx::read.xlsx(Excelfile,sheet="sessions") -> sessionMeta + sessjsondat <- sessionMeta %>% + dplyr::select(-session) + json <- sessjsondat %>% + dplyr::rowwise() %>% + dplyr::do(json=jsonlite::toJSON(.,raw="base64",na="null",complex="string",factor="string",POSIXt="ISO8601",Date="ISO8601",null="null",dataframe = "rows")) %>% + unlist() + json <- data.frame("json"=as.vector(json)) + + towriteSess <- sessionMeta %>% + dplyr::mutate(session_metadata_file=paste0(session,".",metadata.extension)) %>% + dplyr::bind_cols(json) %>% + dplyr::select(session,session_metadata_file,json) + ## Här finns inte session_metadata_file + #Write the bundle metadata files + for(r in 1:nrow(towriteSess)){ + outFile <- file.path(emuDBhandle$basePath, + paste0(towriteSess[r,"session"],session.suffix), + towriteSess[r,"session_metadata_file"]) + fileConn <- file(outFile) + writeLines(as.character(towriteSess[r,"json"]), fileConn) + close(fileConn) + outFile <- gsub(paste0(emuDBhandle$basePath,"/"),"",outFile) + sFiles <- ifelse(exists("sFiles"),c(sFiles,outFile),c(outFile)) + } + + # Now inject database wide metadata + + load_DBconfig(emuDBhandle) -> dbCfg + openxlsx::read.xlsx(Excelfile,sheet="database") -> dbMeta + dbCfg$metadataDefaults <- as.list(dbMeta) + store_DBconfig(emuDBhandle,dbCfg) + + return(c(sFiles,bFiles)) +} + + + +#' A utility function used for programatically setting metadata for a bundle, or default values for a session or an entire database. +#' +#' The function takes a list and a specification of where the metadata should be set. The default behaviour is to +#' keep already set metadata, and overwrite only the values that are named in the list. The user may change this +#' behaviour by setting \code{reset.before.add=TRUE}, in which case all previous bundle, session +#' or database level metadata will be replaced with the contents of the list. +#' +#' If a bundle name and a \code{session} name is provided, the metadata will be inserted only for that fully speficied \code{bundle}. +#' If only a \code{bundle} name is provided, the function will add the metadata for the bundle only if there is just +#' one session in the database. If there are multiple \code{session}s, the function will given an error. +#' +#' If no \code{session} or \code{bundle} names are provided, the metadata will be inserted as default values for the entire database. +#' +#' @param emuDBhandle An Emu database handle +#' @param metadataList A list specifying the metadata to be set. If set to an empty list (\code{list()}) the function will clear all metadata, if the argument \code{reset.before.add=TRUE} is given by the user. +#' @param bundle An optional name of a bundle +#' @param session An optional name of a session +#' @param reset.before.add If set to TRUE, the function will ignore previously set metadata and simply add the metadata supplied in the list. +#' +#' @return +#' @export +#' +#' @examples +#' \dontrun{ +#' create_emuRdemoData() +#' ae <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) +#' +#' # Database-wide default information +#' add_metadata(ae,list("Accent"="Northern","Elicitation"="Scripted")) +#' #Bundle specific information +#' add_metadata(ae,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") +#' get_metadata(ae) -> res +#' print(res) +#' rm(ae) +#' unlink(file.path(tempdir(),"emuR_demoData"),recursive=TRUE) +#' } +#' +add_metadata <- function(emuDBhandle,metadataList,bundle=NULL,session=NULL, reset.before.add=FALSE){ + + if(is.null(bundle) & is.null(session)){ + #Database wide injection + + load_DBconfig(emuDBhandle) -> dbCfg + + if(reset.before.add){ + dbCfg$metadataDefaults <- as.list(metadataList) + } else { + #Append data + prev <- dbCfg$metadataDefaults + prev[names(metadataList)] <- metadataList + dbCfg$metadataDefaults <- prev + } + + + store_DBconfig(emuDBhandle,dbCfg) + + } else { + # Here we store metadata in either session wide or bundle speficit metadata files + # Since these files use the same structure, the business here is to set the correct metadatafile filename. + + if(! is.null(session) & is.null(bundle)){ + #Session level metadata + + metadatafile <- file.path(emuDBhandle$basePath, + paste0(session,session.suffix), + paste0(session,".",metadata.extension)) + } + + + if(! is.null(bundle)){ + #Bundle metadata + if(is.null(session)){ + ses <- list_sessions(ae_test) + if(nrow(ses) == 1){ + #use the name of the only available session + session <- ses[[1]] + }else{ + stop("If you provide a bundle name you need to provide a session name if there are more than one sessions in the database.") + } + + } + + metadatafile <- file.path(emuDBhandle$basePath, + paste0(session,session.suffix), + paste0(bundle,bundle.dir.suffix), + paste0(bundle,".",metadata.extension)) + + + } + if(reset.before.add | ! file.exists(metadatafile) ){ + #Start fresh / overwrite previous values + jsonmetaList <- list() + }else{ + + #Read in previous values + jsonmetaList <- as.list(jsonlite::read_json(metadatafile,simplifyVector = TRUE)) + + } + #set / overwrite metadata from list + jsonmetaList[names(metadataList)] <- metadataList + + jsonlite::write_json(jsonmetaList,metadatafile) + } +} + +#' Add identifying information based on the content of the wave file to the metadata information for the bundle. +#' +#' This function will extract information (lenght of recording and a checksum) from the wav file associated with a bundle, and add it to the set of metadata +#' for the bundle. This information can later be used to verify that the file has not been altered later on, or to deidentify +#' wav files in a reversable manner for use outside of the emuR framework. Deidentified files are sometimes useful for blinded randomized +#' perceptual testing, and the ability to reverse the procedure is then essential to link the results of the evaluation back to the original +#' recording extracted from the emuR data base. The user may create checksums by multiple algorithms by running the function again with different \code{algorithm} arguments. +#' +#' @param emuDBhandle The handle for the emuR database. +#' @param sessionPattern A regexp pattern that allows the user to limit which sessions should be affected by the manipulation. +#' @param bundlePattern A regexp pattern that allows the user to limit which bundles to include. +#' @param algorithm The name of the hashing algorithm, according to the \code{\link[digest]{digest}} function. +#' +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' create_emuRdemoData() +#' ae <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) +#' +#' #Add a md5 digest to the metadata of all bundles +#' add_digests(ae,algorithm = "md5") +#' +#' #Add a "sha1" checksum (the default) to some bundles +#' add_digests(ae,bundlePattern = "msajc0.*") +#' get_metadata(ae) -> res +#' print(res) +#' rm(ae) +#' unlink(file.path(tempdir(),"emuR_demoData"),recursive=TRUE) +#' } +#' +add_digests <- function(emuDBhandle,sessionPattern=".*",bundlePattern=".*",algorithm="sha1"){ + wavs <- list_files(emuDBhandle,fileExtension = "*.wav",sessionPattern=sessionPattern,bundlePattern=bundlePattern) + for(f in 1:nrow(wavs)){ + inFile <- unlist(wavs[f,"absolute_file_path"],use.names = FALSE) + session <- unlist(wavs[f,"session"],use.names = FALSE) + bundle <- unlist(wavs[f,"bundle"],use.names = FALSE) + + wrassp::read.AsspDataObj(inFile) -> w + options(digits=15) + attr(w,"sampleRate") -> sr + attr(w,"endRecord") - attr(w,"startRecord") +1 -> samples + samples / sr *1000 -> duration + rm(w) + digest::digest(inFile,file=TRUE,algo=algorithm) -> checksum + metadata <- list("Bundle.Duration.ms"=duration) + metadata[paste0("Bundle.",algorithm,"_checksum")] <- checksum + + add_metadata(emuDBhandle,metadata,session=session,bundle=bundle) + + } +} + + + +#' Create a biography of the labels in a list of segments in a tidy manner +#' +#' @param segs_tbl The \code{\link[dplyr]{tibble}} that is the result \code{\link[emuR]{query}} call. +#' @param emuDBhandle A \code{\link{emuR}} database handle. +#' @param compute_digests Should information that describes the recorded sound files be computed so that is is definitelly part of the +#' added metadata information. +#' @param algorithm The checksum algorithm that should be used when computing sound file information. +#' +#' @return A \code{\link[dplyr]{tibble}} +#' @export +#' +#' @examples +#' \dontrun{ +#' create_emuRdemoData() +#' ae <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) +#' +#' # Database-wide default information +#' add_metadata(ae,list("Accent"="Northern","Elicitation"="Scripted")) +#' #Bundle specific information +#' add_metadata(ae,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") +#' +#' # Get all the 'n' segments in the database +#' query(ae_test,"Phonetic = n",resultType = "tibble") -> ae_nt +#' # Add information related to the nature the recording sessions +#' # e.g. the speaker ID, the date of the recording +#' ae_nt %>% biographize(ae_test) %>% glimpse() +#' # This code does the same as the above, but it will also compute new +#' # information that is strictly aimed at identifying the recording +#' # (length of recording (in ms) and a sha1 digest of the wav file). +#' ae_nt %>% +#' biographize(ae_test,compute_digests=TRUE,algorithm="sha1") %>% +#' glimpse() +#' rm(ae) +#' unlink(file.path(tempdir(),"emuR_demoData"),recursive=TRUE) +#' } +#' +biographize <- function(segs_tbl,emuDBhandle,compute_digests=FALSE,algorithm="sha1") { + #make sure that the first argument is a segment list, and that + # it contains "session" and "bundle" columns. + if(! is.data.frame(segs_tbl) || !c("session", "bundle") %in% names(segs_tbl)){ + out <- paste("The input to the",match.call()[[1]], "has to be a 'tibble' or a 'data.frame'.") + stop(out) + } + if(compute_digests==TRUE){ + add_digests(emuDBhandle,algorithm = algorithm) + } + #Here we use the special mode of export_medatata to get a data structure rather than an Excel file. + mdata <- get_metadata(emuDBhandle,session = ".*") + + out <- segs_tbl %>% + dplyr::left_join(mdata,by = c("session", "bundle")) + + return(out) +} From 712835bc0ed6b9a6a8b72692691f9663e41eb2e4 Mon Sep 17 00:00:00 2001 From: samgregory Date: Fri, 6 May 2022 15:47:22 +1000 Subject: [PATCH 2/7] Add files via upload --- man/add_digests.Rd | 2941 ++++++++++++++++++++++++++++++++++++++ man/add_metadata.Rd | 2987 +++++++++++++++++++++++++++++++++++++++ man/biographize.Rd | 2957 +++++++++++++++++++++++++++++++++++++++ man/export_metadata.Rd | 3021 ++++++++++++++++++++++++++++++++++++++++ man/import_metadata.Rd | 2914 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 14820 insertions(+) create mode 100644 man/add_digests.Rd create mode 100644 man/add_metadata.Rd create mode 100644 man/biographize.Rd create mode 100644 man/export_metadata.Rd create mode 100644 man/import_metadata.Rd diff --git a/man/add_digests.Rd b/man/add_digests.Rd new file mode 100644 index 00000000..e710fee0 --- /dev/null +++ b/man/add_digests.Rd @@ -0,0 +1,2941 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + emuR/add_digests.Rd at master · FredrikKarlssonSpeech/emuR + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Skip to content + + + + + + + + + + + + + + +
+ +
+ + + + + + + +
+ + + +
+ + + + + + + + + +
+ + + + + + + + + + + + + + + + + + +
+ +
+ + + + FredrikKarlssonSp...  /   + emuR  /   + +
+
+ + + +
+ + +
+ + +
+ + + +
+
+ Tip: + Type # to search pull requests +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type # to search issues +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type # to search discussions +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type ! to search projects +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type @ to search teams +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type @ to search people and organizations +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type > to activate command mode +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Go to your accessibility settings to change your keyboard shortcuts +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type author:@me to search your content +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:pr to filter to pull requests +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:issue to filter to issues +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:project to filter to projects +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:open to filter to open content +
+
+ Type ? for help and tips +
+
+
+ +
+ +
+
+ We’ve encountered an error and some results aren't available at this time. Type a new search or try again later. +
+
+ + No results matched your search + + + + + + + + + + +
+ + + + + Search for issues and pull requests + + # + + + + Search for issues, pull requests, discussions, and projects + + # + + + + Search for organizations, repositories, and users + + @ + + + + Search for projects + + ! + + + + Search for files + + / + + + + Activate command mode + + > + + + + Search your issues, pull requests, and discussions + + # author:@me + + + + Search your issues, pull requests, and discussions + + # author:@me + + + + Filter to pull requests + + # is:pr + + + + Filter to issues + + # is:issue + + + + Filter to discussions + + # is:discussion + + + + Filter to projects + + # is:project + + + + Filter to open issues, pull requests, and discussions + + # is:open + + + + + + + + + + + + + + + + +
+
+
+ +
+ + + + + + + + + + +
+ + + + +
+
+
+ + + + + + + + + +
+ +
+ +
+

+ + + / + + emuR + + + Public +

+ + forked from IPS-LMU/emuR + + +
+ +
    + + + +
  • + +
    + + + + + + + Watch + + + 0 + + + + +
    +
    +

    Notifications

    + +
    + +
    +
    + + + + + + + + +
    + + +
    + + + + + Get push notifications on iOS or Android. + +
    +
    +
    +
    + + + + +
    +
    +
    + + + +
  • + +
  • + Fork + 17 + + +
  • + +
  • + + +
    +
    + + +
    +
    + +
    +
    + + + + +
    + +
    +
    + + + + + + + +
    + +
    +
    +
    +
    +
    +
  • + + + +
+ +
+ +
+
+ + + + +
+ + + +
+ Open in github.dev + Open in a new github.dev tab + + + + + + +
+ + +
+ + + + + + +Permalink + +
+ +
+
+ + + master + + + + +
+
+
+ Switch branches/tags + +
+ + + +
+ +
+ +
+ + +
+ +
+ + + + + + + + + + + + + + + +
+ + +
+
+
+
+ +
+ +
+ + + Go to file + + +
+ + + + + +
+
+
+ + + + + + + + + +
+ +
+
+
 
+
+ +
+
 
+ Cannot retrieve contributors at this time +
+
+ + + + + + + + + + + + +
+ +
+ + +
+ + 46 lines (40 sloc) + + 1.85 KB +
+ +
+ + + + +
+ + + + + + + + + + + + + + + +
+ +
+
+ +
+
+ +
+ +
+
+ + + +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/emuR-metadata.R
\name{add_digests}
\alias{add_digests}
\title{Add identifying information based on the content of the wave file to the metadata information for the bundle.}
\usage{
add_digests(
emuDBhandle,
sessionPattern = ".*",
bundlePattern = ".*",
algorithm = "sha1"
)
}
\arguments{
\item{emuDBhandle}{The handle for the emuR database.}
+
\item{sessionPattern}{A regexp pattern that allows the user to limit which sessions should be affected by the manipulation.}
+
\item{bundlePattern}{A regexp pattern that allows the user to limit which bundles to include.}
+
\item{algorithm}{The name of the hashing algorithm, according to the \code{\link[digest]{digest}} function.}
}
\description{
This function will extract information (lenght of recording and a checksum) from the wav file associated with a bundle, and add it to the set of metadata
for the bundle. This information can later be used to verify that the file has not been altered later on, or to deidentify
wav files in a reversable manner for use outside of the emuR framework. Deidentified files are sometimes useful for blinded randomized
perceptual testing, and the ability to reverse the procedure is then essential to link the results of the evaluation back to the original
recording extracted from the emuR data base. The user may create checksums by multiple algorithms by running the function again with different \code{algorithm} arguments.
}
\examples{
\dontrun{
create_emuRdemoData()
ae <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB"))
+
#Add a md5 digest to the metadata of all bundles
add_digests(ae,algorithm = "md5")
+
#Add a "sha1" checksum (the default) to some bundles
add_digests(ae,bundlePattern = "msajc0.*")
get_metadata(ae) -> res
print(res)
rm(ae)
unlink(file.path(tempdir(),"emuR_demoData"),recursive=TRUE)
}
+
}
+
+ + + +
+ +
+ + + + +
+ + +
+ + +
+
+ + +
+ +
+ + +
+ +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + diff --git a/man/add_metadata.Rd b/man/add_metadata.Rd new file mode 100644 index 00000000..a9977852 --- /dev/null +++ b/man/add_metadata.Rd @@ -0,0 +1,2987 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + emuR/add_metadata.Rd at master · FredrikKarlssonSpeech/emuR + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Skip to content + + + + + + + + + + + + + + +
+ +
+ + + + + + + +
+ + + +
+ + + + + + + + + +
+ + + + + + + + + + + + + + + + + + +
+ +
+ + + + FredrikKarlssonSp...  /   + emuR  /   + +
+
+ + + +
+ + +
+ + +
+ + + +
+
+ Tip: + Type # to search pull requests +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type # to search issues +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type # to search discussions +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type ! to search projects +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type @ to search teams +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type @ to search people and organizations +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type > to activate command mode +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Go to your accessibility settings to change your keyboard shortcuts +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type author:@me to search your content +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:pr to filter to pull requests +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:issue to filter to issues +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:project to filter to projects +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:open to filter to open content +
+
+ Type ? for help and tips +
+
+
+ +
+ +
+
+ We’ve encountered an error and some results aren't available at this time. Type a new search or try again later. +
+
+ + No results matched your search + + + + + + + + + + +
+ + + + + Search for issues and pull requests + + # + + + + Search for issues, pull requests, discussions, and projects + + # + + + + Search for organizations, repositories, and users + + @ + + + + Search for projects + + ! + + + + Search for files + + / + + + + Activate command mode + + > + + + + Search your issues, pull requests, and discussions + + # author:@me + + + + Search your issues, pull requests, and discussions + + # author:@me + + + + Filter to pull requests + + # is:pr + + + + Filter to issues + + # is:issue + + + + Filter to discussions + + # is:discussion + + + + Filter to projects + + # is:project + + + + Filter to open issues, pull requests, and discussions + + # is:open + + + + + + + + + + + + + + + + +
+
+
+ +
+ + + + + + + + + + +
+ + + + +
+
+
+ + + + + + + + + +
+ +
+ +
+

+ + + / + + emuR + + + Public +

+ + forked from IPS-LMU/emuR + + +
+ +
    + + + +
  • + +
    + + + + + + + Watch + + + 0 + + + + +
    +
    +

    Notifications

    + +
    + +
    +
    + + + + + + + + +
    + + +
    + + + + + Get push notifications on iOS or Android. + +
    +
    +
    +
    + + + + +
    +
    +
    + + + +
  • + +
  • + Fork + 17 + + +
  • + +
  • + + +
    +
    + + +
    +
    + +
    +
    + + + + +
    + +
    +
    + + + + + + + +
    + +
    +
    +
    +
    +
    +
  • + + + +
+ +
+ +
+
+ + + + +
+ + + +
+ Open in github.dev + Open in a new github.dev tab + + + + + + +
+ + +
+ + + + + + +Permalink + +
+ +
+
+ + + master + + + + +
+
+
+ Switch branches/tags + +
+ + + +
+ +
+ +
+ + +
+ +
+ + + + + + + + + + + + + + + +
+ + +
+
+
+
+ +
+ +
+ + + Go to file + + +
+ + + + + +
+
+
+ + + + + + + + + +
+ +
+
+
 
+
+ +
+
 
+ Cannot retrieve contributors at this time +
+
+ + + + + + + + + + + + +
+ +
+ + +
+ + 57 lines (49 sloc) + + 2.19 KB +
+ +
+ + + + +
+ + + + + + + + + + + + + + + +
+ +
+
+ +
+
+ +
+ +
+
+ + + +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/emuR-metadata.R
\name{add_metadata}
\alias{add_metadata}
\title{A utility function used for programatically setting metadata for a bundle, or default values for a session or an entire database.}
\usage{
add_metadata(
emuDBhandle,
metadataList,
bundle = NULL,
session = NULL,
reset.before.add = FALSE
)
}
\arguments{
\item{emuDBhandle}{An Emu database handle}
+
\item{metadataList}{A list specifying the metadata to be set. If set to an empty list (\code{list()}) the function will clear all metadata, if the argument \code{reset.before.add=TRUE} is given by the user.}
+
\item{bundle}{An optional name of a bundle}
+
\item{session}{An optional name of a session}
+
\item{reset.before.add}{If set to TRUE, the function will ignore previously set metadata and simply add the metadata supplied in the list.}
}
\value{
+
}
\description{
The function takes a list and a specification of where the metadata should be set. The default behaviour is to
keep already set metadata, and overwrite only the values that are named in the list. The user may change this
behaviour by setting \code{reset.before.add=TRUE}, in which case all previous bundle, session
or database level metadata will be replaced with the contents of the list.
}
\details{
If a bundle name and a \code{session} name is provided, the metadata will be inserted only for that fully speficied \code{bundle}.
If only a \code{bundle} name is provided, the function will add the metadata for the bundle only if there is just
one session in the database. If there are multiple \code{session}s, the function will given an error.
+
If no \code{session} or \code{bundle} names are provided, the metadata will be inserted as default values for the entire database.
}
\examples{
\dontrun{
create_emuRdemoData()
ae <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB"))
+
# Database-wide default information
add_metadata(ae,list("Accent"="Northern","Elicitation"="Scripted"))
#Bundle specific information
add_metadata(ae,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003")
get_metadata(ae) -> res
print(res)
rm(ae)
unlink(file.path(tempdir(),"emuR_demoData"),recursive=TRUE)
}
+
}
+
+ + + +
+ +
+ + + + +
+ + +
+ + +
+
+ + +
+ +
+ + +
+ +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + diff --git a/man/biographize.Rd b/man/biographize.Rd new file mode 100644 index 00000000..d6146639 --- /dev/null +++ b/man/biographize.Rd @@ -0,0 +1,2957 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + emuR/biographize.Rd at master · FredrikKarlssonSpeech/emuR + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Skip to content + + + + + + + + + + + + + + +
+ +
+ + + + + + + +
+ + + +
+ + + + + + + + + +
+ + + + + + + + + + + + + + + + + + +
+ +
+ + + + FredrikKarlssonSp...  /   + emuR  /   + +
+
+ + + +
+ + +
+ + +
+ + + +
+
+ Tip: + Type # to search pull requests +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type # to search issues +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type # to search discussions +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type ! to search projects +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type @ to search teams +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type @ to search people and organizations +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type > to activate command mode +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Go to your accessibility settings to change your keyboard shortcuts +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type author:@me to search your content +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:pr to filter to pull requests +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:issue to filter to issues +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:project to filter to projects +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:open to filter to open content +
+
+ Type ? for help and tips +
+
+
+ +
+ +
+
+ We’ve encountered an error and some results aren't available at this time. Type a new search or try again later. +
+
+ + No results matched your search + + + + + + + + + + +
+ + + + + Search for issues and pull requests + + # + + + + Search for issues, pull requests, discussions, and projects + + # + + + + Search for organizations, repositories, and users + + @ + + + + Search for projects + + ! + + + + Search for files + + / + + + + Activate command mode + + > + + + + Search your issues, pull requests, and discussions + + # author:@me + + + + Search your issues, pull requests, and discussions + + # author:@me + + + + Filter to pull requests + + # is:pr + + + + Filter to issues + + # is:issue + + + + Filter to discussions + + # is:discussion + + + + Filter to projects + + # is:project + + + + Filter to open issues, pull requests, and discussions + + # is:open + + + + + + + + + + + + + + + + +
+
+
+ +
+ + + + + + + + + + +
+ + + + +
+
+
+ + + + + + + + + +
+ +
+ +
+

+ + + / + + emuR + + + Public +

+ + forked from IPS-LMU/emuR + + +
+ +
    + + + +
  • + +
    + + + + + + + Watch + + + 0 + + + + +
    +
    +

    Notifications

    + +
    + +
    +
    + + + + + + + + +
    + + +
    + + + + + Get push notifications on iOS or Android. + +
    +
    +
    +
    + + + + +
    +
    +
    + + + +
  • + +
  • + Fork + 17 + + +
  • + +
  • + + +
    +
    + + +
    +
    + +
    +
    + + + + +
    + +
    +
    + + + + + + + +
    + +
    +
    +
    +
    +
    +
  • + + + +
+ +
+ +
+
+ + + + +
+ + + +
+ Open in github.dev + Open in a new github.dev tab + + + + + + +
+ + +
+ + + + + + +Permalink + +
+ +
+
+ + + master + + + + +
+
+
+ Switch branches/tags + +
+ + + +
+ +
+ +
+ + +
+ +
+ + + + + + + + + + + + + + + +
+ + +
+
+
+
+ +
+ +
+ + + Go to file + + +
+ + + + + +
+
+
+ + + + + + + + + +
+ +
+
+
 
+
+ +
+
 
+ Cannot retrieve contributors at this time +
+
+ + + + + + + + + + + + +
+ +
+ + +
+ + 50 lines (44 sloc) + + 1.8 KB +
+ +
+ + + + +
+ + + + + + + + + + + + + + + +
+ +
+
+ +
+
+ +
+ +
+
+ + + +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/emuR-metadata.R
\name{biographize}
\alias{biographize}
\title{Create a biography of the labels in a list of segments in a tidy manner}
\usage{
biographize(segs_tbl, emuDBhandle, compute_digests = FALSE, algorithm = "sha1")
}
\arguments{
\item{segs_tbl}{The \code{\link[dplyr]{tibble}} that is the result \code{\link[emuR]{query}} call.}
+
\item{emuDBhandle}{A \code{\link{emuR}} database handle.}
+
\item{compute_digests}{Should information that describes the recorded sound files be computed so that is is definitelly part of the
added metadata information.}
+
\item{algorithm}{The checksum algorithm that should be used when computing sound file information.}
}
\value{
A \code{\link[dplyr]{tibble}}
}
\description{
Create a biography of the labels in a list of segments in a tidy manner
}
\examples{
\dontrun{
create_emuRdemoData()
ae <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB"))
+
# Database-wide default information
add_metadata(ae,list("Accent"="Northern","Elicitation"="Scripted"))
#Bundle specific information
add_metadata(ae,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003")
+
# Get all the 'n' segments in the database
query(ae_test,"Phonetic = n",resultType = "tibble") -> ae_nt
# Add information related to the nature the recording sessions
# e.g. the speaker ID, the date of the recording
ae_nt \%>\% biographize(ae_test) \%>\% glimpse()
# This code does the same as the above, but it will also compute new
# information that is strictly aimed at identifying the recording
# (length of recording (in ms) and a sha1 digest of the wav file).
ae_nt \%>\%
biographize(ae_test,compute_digests=TRUE,algorithm="sha1") \%>\%
glimpse()
rm(ae)
unlink(file.path(tempdir(),"emuR_demoData"),recursive=TRUE)
}
+
}
+
+ + + +
+ +
+ + + + +
+ + +
+ + +
+
+ + +
+ +
+ + +
+ +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + diff --git a/man/export_metadata.Rd b/man/export_metadata.Rd new file mode 100644 index 00000000..debdc8d6 --- /dev/null +++ b/man/export_metadata.Rd @@ -0,0 +1,3021 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + emuR/export_metadata.Rd at master · FredrikKarlssonSpeech/emuR + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Skip to content + + + + + + + + + + + + + + +
+ +
+ + + + + + + +
+ + + +
+ + + + + + + + + +
+ + + + + + + + + + + + + + + + + + +
+ +
+ + + + FredrikKarlssonSp...  /   + emuR  /   + +
+
+ + + +
+ + +
+ + +
+ + + +
+
+ Tip: + Type # to search pull requests +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type # to search issues +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type # to search discussions +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type ! to search projects +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type @ to search teams +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type @ to search people and organizations +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type > to activate command mode +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Go to your accessibility settings to change your keyboard shortcuts +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type author:@me to search your content +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:pr to filter to pull requests +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:issue to filter to issues +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:project to filter to projects +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:open to filter to open content +
+
+ Type ? for help and tips +
+
+
+ +
+ +
+
+ We’ve encountered an error and some results aren't available at this time. Type a new search or try again later. +
+
+ + No results matched your search + + + + + + + + + + +
+ + + + + Search for issues and pull requests + + # + + + + Search for issues, pull requests, discussions, and projects + + # + + + + Search for organizations, repositories, and users + + @ + + + + Search for projects + + ! + + + + Search for files + + / + + + + Activate command mode + + > + + + + Search your issues, pull requests, and discussions + + # author:@me + + + + Search your issues, pull requests, and discussions + + # author:@me + + + + Filter to pull requests + + # is:pr + + + + Filter to issues + + # is:issue + + + + Filter to discussions + + # is:discussion + + + + Filter to projects + + # is:project + + + + Filter to open issues, pull requests, and discussions + + # is:open + + + + + + + + + + + + + + + + +
+
+
+ +
+ + + + + + + + + + +
+ + + + +
+
+
+ + + + + + + + + +
+ +
+ +
+

+ + + / + + emuR + + + Public +

+ + forked from IPS-LMU/emuR + + +
+ +
    + + + +
  • + +
    + + + + + + + Watch + + + 0 + + + + +
    +
    +

    Notifications

    + +
    + +
    +
    + + + + + + + + +
    + + +
    + + + + + Get push notifications on iOS or Android. + +
    +
    +
    +
    + + + + +
    +
    +
    + + + +
  • + +
  • + Fork + 17 + + +
  • + +
  • + + +
    +
    + + +
    +
    + +
    +
    + + + + +
    + +
    +
    + + + + + + + +
    + +
    +
    +
    +
    +
    +
  • + + + +
+ +
+ +
+
+ + + + +
+ + + +
+ Open in github.dev + Open in a new github.dev tab + + + + + + +
+ + +
+ + + + + + +Permalink + +
+ +
+
+ + + master + + + + +
+
+
+ Switch branches/tags + +
+ + + +
+ +
+ +
+ + +
+ +
+ + + + + + + + + + + + + + + +
+ + +
+
+
+
+ +
+ +
+ + + Go to file + + +
+ + + + + +
+
+
+ + + + + + + + + +
+ +
+
+
 
+
+ +
+
 
+ Cannot retrieve contributors at this time +
+
+ + + + + + + + + + + + +
+ +
+ + +
+ + 66 lines (60 sloc) + + 3.29 KB +
+ +
+ + + + +
+ + + + + + + + + + + + + + + +
+ +
+
+ +
+
+ +
+ +
+
+ + + +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/emuR-metadata.R
\name{get_metadata}
\alias{get_metadata}
\alias{export_metadata}
\title{Functions for gathering metata specified for recordings in an emuR database.}
\usage{
get_metadata(emuDBhandle, overwrite = FALSE, session = ".*")
+
export_metadata(emuDBhandle, Excelfile = NULL, overwrite = FALSE)
}
\arguments{
\item{emuDBhandle}{The database handle of an emuR database.}
+
\item{overwrite}{The default behaviour is that an Excel file should not be
overwritten if it exists already. If this parameter is \code{TRUE} then the file will be overwritten.}
+
\item{Excelfile}{The full path and file name of the Excel file that the metadata should be written to. The function will not overwrite this file, unless \code{overwrite} is set to \code{TRUE}.}
}
\value{
A data frame containing inforamtion about the 'meta_json' files found
\describe{
\item{session}{The name of the session.}
\item{bundle}{The bundle name}
}
In addition, the \code{\link[dplyr]{tibble}} will contain one column for every type of information given in any of the 'meta_json' files.
}
\description{
Metadata of a recording is stored in 'meta_json' files. Metadata may be set at the database, session and bundle level.
The functions goes through the database metadata file, session metadata files and metadata files associated with
each bundle, parses the JSON data and returns \code{\link[dplyr]{tibble}} with one row per bundle in the database.
Database default values are supressed by information set in a session metadata file, and session level data are in
turn surpressed by data given at the bundle level.
The structure of the metadata does not have to be consistent across meta_json files.
New columns are added to the as new fields are detected.
}
\details{
The function \code{export_metadata} outputs the metadata as an Excel file instead, with bundle, session and database
tabs. The "bundle" tab gives the complete set of all the metadata that are active for each bundle, regardness where
it was set (for the bundle directly, or as a session / database default value).
+
The user is expected to use the functions \code{export_metadata} and \code{import_metadata} to fix
accedental inconsistencies in the metadata of a database across bundles by exporting all
information to an Excel file using \code{export_metadata}, edit columns and values (including moving inconsistently
spelled metadata fields into a single column with the intended name) using Excel or another editor that complies with
the OOXML Workbook ISO/IEC 29500:2008 standard. The user is also expected to keep the indented structure of the Excel
file (one row per bundle or session, and each column except for those indicating session and bundle names containing
metadata), otherwise it is possible that the file may not be read in again by \code{\link{import_metadata}} to set
updated values.
}
\examples{
\dontrun{
create_emuRdemoData()
ae <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB"))
+
# Database-wide default information
add_metadata(ae,list("Accent"="Northern","Elicitation"="Scripted"))
#Bundle specific information
add_metadata(ae,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003")
get_metadata(ae) -> res
print(res)
rm(ae)
unlink(file.path(tempdir(),"emuR_demoData"),recursive=TRUE)
}
+
}
+
+ + + +
+ +
+ + + + +
+ + +
+ + +
+
+ + +
+ +
+ + +
+ +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + diff --git a/man/import_metadata.Rd b/man/import_metadata.Rd new file mode 100644 index 00000000..5630d314 --- /dev/null +++ b/man/import_metadata.Rd @@ -0,0 +1,2914 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + emuR/import_metadata.Rd at master · FredrikKarlssonSpeech/emuR + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Skip to content + + + + + + + + + + + + + + +
+ +
+ + + + + + + +
+ + + +
+ + + + + + + + + +
+ + + + + + + + + + + + + + + + + + +
+ +
+ + + + FredrikKarlssonSp...  /   + emuR  /   + +
+
+ + + +
+ + +
+ + +
+ + + +
+
+ Tip: + Type # to search pull requests +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type # to search issues +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type # to search discussions +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type ! to search projects +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type @ to search teams +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type @ to search people and organizations +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type > to activate command mode +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Go to your accessibility settings to change your keyboard shortcuts +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type author:@me to search your content +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:pr to filter to pull requests +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:issue to filter to issues +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:project to filter to projects +
+
+ Type ? for help and tips +
+
+
+ +
+
+ Tip: + Type is:open to filter to open content +
+
+ Type ? for help and tips +
+
+
+ +
+ +
+
+ We’ve encountered an error and some results aren't available at this time. Type a new search or try again later. +
+
+ + No results matched your search + + + + + + + + + + +
+ + + + + Search for issues and pull requests + + # + + + + Search for issues, pull requests, discussions, and projects + + # + + + + Search for organizations, repositories, and users + + @ + + + + Search for projects + + ! + + + + Search for files + + / + + + + Activate command mode + + > + + + + Search your issues, pull requests, and discussions + + # author:@me + + + + Search your issues, pull requests, and discussions + + # author:@me + + + + Filter to pull requests + + # is:pr + + + + Filter to issues + + # is:issue + + + + Filter to discussions + + # is:discussion + + + + Filter to projects + + # is:project + + + + Filter to open issues, pull requests, and discussions + + # is:open + + + + + + + + + + + + + + + + +
+
+
+ +
+ + + + + + + + + + +
+ + + + +
+
+
+ + + + + + + + + +
+ +
+ +
+

+ + + / + + emuR + + + Public +

+ + forked from IPS-LMU/emuR + + +
+ +
    + + + +
  • + +
    + + + + + + + Watch + + + 0 + + + + +
    +
    +

    Notifications

    + +
    + +
    +
    + + + + + + + + +
    + + +
    + + + + + Get push notifications on iOS or Android. + +
    +
    +
    +
    + + + + +
    +
    +
    + + + +
  • + +
  • + Fork + 17 + + +
  • + +
  • + + +
    +
    + + +
    +
    + +
    +
    + + + + +
    + +
    +
    + + + + + + + +
    + +
    +
    +
    +
    +
    +
  • + + + +
+ +
+ +
+
+ + + + +
+ + + +
+ Open in github.dev + Open in a new github.dev tab + + + + + + +
+ + +
+ + + + + + +Permalink + +
+ +
+
+ + + master + + + + +
+
+
+ Switch branches/tags + +
+ + + +
+ +
+ +
+ + +
+ +
+ + + + + + + + + + + + + + + +
+ + +
+
+
+
+ +
+ +
+ + + Go to file + + +
+ + + + + +
+
+
+ + + + + + + + + +
+ +
+
+
 
+
+ +
+
 
+ Cannot retrieve contributors at this time +
+
+ + + + + + + + + + + + +
+ +
+ + +
+ + 40 lines (37 sloc) + + 1.74 KB +
+ +
+ + + + +
+ + + + + + + + + + + + + + + +
+ +
+
+ +
+
+ +
+ +
+
+ + + +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/emuR-metadata.R
\name{import_metadata}
\alias{import_metadata}
\title{Functions to import or add metadata information to database bundles.}
\usage{
import_metadata(emuDBhandle, Excelfile)
}
\arguments{
\item{emuDBhandle}{The emuR database handle of the database.}
+
\item{Excelfile}{The path to a properly formated Excel (.xlsx) file.}
}
\value{
A vector of .meta_json files updated by the call. The path for each file is given relative to the base of the EmuR database.
}
\description{
The function takes an appropriately structured Excel file and uses the
information to set metadata for bundles.
}
\details{
The first sheet ("bundles") in the Excel file should begin with the folowing two columns:
\itemize{
\item session
\item bundle
}
and then go on to have some columns which contains the metadata. Each row in the
data contains the information and metadata for a bundle (in the specific session).
The simples way to get an appropriately structed Excel file is to create one from a database using the
\code{\link{export_metadata}} function on an existing database and given an output file.
+
Please be aware that bundles that are speficied in the Excel file will have
their metadata files (ending with '.meta_json') overwritten when using the
\code{import_metadata}. So, please make sure to remove the rows of bundles that should
not be altered from the Excel file before importing the metadata from it using this function.
+
Date and time fields are assumed to follow the ISO8601 specification, and an attempt to convert them to the
approprite JSON representation will be made. The user should be aware that this conversion is made however, and
watch out unexpected results in advanced cases.
}
+
+ + + +
+ +
+ + + + +
+ + +
+ + +
+
+ + +
+ +
+ + +
+ +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + From a19bdc130a10bfe5bdd1f11dde24eb8e4f328ac0 Mon Sep 17 00:00:00 2001 From: samgregory Date: Fri, 6 May 2022 16:31:15 +1000 Subject: [PATCH 3/7] Update emuR-metadata.R Compare --- R/emuR-metadata.R | 258 ++++++++++++++++++++++++++-------------------- 1 file changed, 147 insertions(+), 111 deletions(-) diff --git a/R/emuR-metadata.R b/R/emuR-metadata.R index d08987c0..27b3d723 100644 --- a/R/emuR-metadata.R +++ b/R/emuR-metadata.R @@ -14,7 +14,7 @@ coalesce <- function(...) { } -#' Functions for gathering metata specified for recordings in an emuR database. +#' Functions for gathering metadata specified for recordings in an emuR database. #' #' Metadata of a recording is stored in 'meta_json' files. Metadata may be set at the database, session and bundle level. #' The functions goes through the database metadata file, session metadata files and metadata files associated with @@ -42,6 +42,7 @@ coalesce <- function(...) { #' @param Excelfile The full path and file name of the Excel file that the metadata should be written to. The function will not overwrite this file, unless \code{overwrite} is set to \code{TRUE}. #' @param overwrite The default behaviour is that an Excel file should not be #' overwritten if it exists already. If this parameter is \code{TRUE} then the file will be overwritten. +#' @param manditory A vector of column names which the function will ensure are present in both the bundle and session level metadata. #' #' @rdname export_metadata #' @export @@ -51,27 +52,25 @@ coalesce <- function(...) { #' \item{session}{The name of the session.} #' \item{bundle}{The bundle name} #' } -#' In addition, the \code{\link[dplyr]{tibble}} will contain one column for every type of information given in any of the 'meta_json' files. +#' In addition, the [dplyr::tibble] will contain one column for every type of information given in any of the 'meta_json' files. #' #' #' @examples #' \dontrun{ #' create_emuRdemoData() -#' ae <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) -#' +#' ae_test <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) +#' #' # Database-wide default information -#' add_metadata(ae,list("Accent"="Northern","Elicitation"="Scripted")) +#' add_metadata(ae_test,list("Accent"="Northern","Elicitation"="Scripted")) #' #Bundle specific information -#' add_metadata(ae,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") -#' get_metadata(ae) -> res +#' add_metadata(ae_test,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") +#' get_metadata(ae_test) -> res #' print(res) -#' rm(ae) -#' unlink(file.path(tempdir(),"emuR_demoData"),recursive=TRUE) #' } #' -get_metadata <- function(emuDBhandle,overwrite=FALSE,session=".*"){ - res <- export_metadata(emuDBhandle=emuDBhandle,overwrite=overwrite) +get_metadata <- function(emuDBhandle,session=".*", manditory=c()){ + res <- export_metadata(emuDBhandle=emuDBhandle,manditory=manditory) return(res) } @@ -81,25 +80,26 @@ get_metadata <- function(emuDBhandle,overwrite=FALSE,session=".*"){ #' @export #' -export_metadata <- function(emuDBhandle,Excelfile=NULL,overwrite=FALSE){ +export_metadata <- function(emuDBhandle,Excelfile=NULL,overwrite=FALSE, manditory=c()){ #Start with checking consistency regarding output file if(! overwrite && !is.null(Excelfile) && file.exists(Excelfile)){ stop("Could not write output file ",Excelfile,": File exists but should not be overwritten.") } - check_emuDBhandle(emuDBhandle) +check_emuDBhandle(emuDBhandle) bundles <- list_bundles(emuDBhandle) %>% dplyr::rename(bundle=name) metafiles <- list_files(emuDBhandle,fileExtension = metadata.extension) - #Use the bundle list as a scaffold for a data fram to hold the content of all metadata files + #Use the bundle list as a scaffold for a dataframe to hold the content of all metadata files # metacontent <- metafiles[c("bundle","absolute_file_path")] for(currFile in na.omit(metafiles$absolute_file_path)){ jsonmeta <- jsonlite::read_json(currFile,simplifyVector = TRUE) # Now start inserting data from the metafiles for(col in names(jsonmeta)){ - metafiles[metafiles$absolute_file_path == currFile,col] <- jsonmeta[[col]] + #Assignment of NULL is problematic, so we need to make sure we never do that + metafiles[metafiles$absolute_file_path == currFile,col] <- ifelse(is.null(jsonmeta[[col]]),NA,jsonmeta[[col]]) } } # Now make sure that all bundles have a row @@ -107,70 +107,61 @@ export_metadata <- function(emuDBhandle,Excelfile=NULL,overwrite=FALSE){ dplyr::left_join(metafiles,by=c("session","bundle")) %>% dplyr::select(-file,-absolute_file_path) + #Add missing manditory columns + missingManditory <- setdiff(manditory, names(metafiles)) + metafiles[, missingManditory] <- NA - # Include the possibility of having default meta data for a sessions (in a _ses folder) - sessJSONFiles <- list.files(file.path(emuDBhandle$basePath),pattern=paste0(".*.",metadata.extension),recursive = TRUE,full.names = FALSE) - - # Remove meta files associated with bundles - - sessJSONFiles <- sessJSONFiles[! grepl(bundle.dir.suffix,sessJSONFiles) & grepl(session.suffix,sessJSONFiles)] + #For sessions it makes less sense to use list_files as a base for finding the metadata files + #so we pre-generate the expected names and check which ones actually exist. sessions <- list_sessions(emuDBhandle) %>% dplyr::rename(session=name) - # Run only if there are session metadata files - if(length(sessJSONFiles) > 0){ - sessJSONFilesDF <- data.frame(stringr::str_split(sessJSONFiles,pattern = .Platform$file.sep,simplify = TRUE),stringsAsFactors=FALSE) - names(sessJSONFilesDF) <- c("session","session_metadata_file") - # The session needs to be without suffix so that metadata may be joinded by session later - - sessJSONFilesDF$session <- gsub(paste0(session.suffix,"$"),"",sessJSONFilesDF$session) - - sessJSONFilesDF <- na.omit(sessJSONFilesDF) + sessions$session_metadata_file <- file.path(emuDBhandle$basePath, + paste0(sessions,session.suffix), + paste(sessions,"meta_json",sep=".")) + sessJSONFilesDF <- sessions[file.exists(sessions$session_metadata_file),] - for(row in 1:nrow(sessJSONFilesDF)){ - currFile <- as.vector(sessJSONFilesDF[[row,"session_metadata_file"]]) - currSession <- as.vector(sessJSONFilesDF[[row,"session"]]) + for(row in seq_len(nrow(sessJSONFilesDF))){ - currSessionDir <- paste0(currSession,session.suffix) + jsonmeta <- jsonlite::read_json(sessJSONFilesDF[row,"session_metadata_file"],simplifyVector = TRUE) - - jsonmeta <- jsonlite::read_json(file.path(emuDBhandle$basePath,currSessionDir,currFile),simplifyVector = TRUE) - - # Now start inserting data from the session metadata file - for(col in names(jsonmeta)){ - sessJSONFilesDF[sessJSONFilesDF$session == currSession,col] <- jsonmeta[[col]] - } + # Now start inserting data from the session metadata file + for(col in names(jsonmeta)){ + #Assignment of NULL to a data.frame value is problematic, so we make sure we never do + sessJSONFilesDF[row,col] <- ifelse(is.null(jsonmeta[[col]]),NA,jsonmeta[[col]]) } + } + #Make sure that all sessions have a row in the output + sessJSONFilesDF <- sessions %>% + dplyr::select(-session_metadata_file) %>% + dplyr::left_join(sessJSONFilesDF,by="session") %>% + dplyr::select(-session_metadata_file) - #Add session meta data to the workbook, - #or just empty sessions speficiations if there are no session metadata files - - sessJSONFilesDF <- sessions %>% - dplyr::left_join(sessJSONFilesDF,by="session") + #Add missing manditory columns + missingManditory <- setdiff(manditory, names(sessJSONFilesDF)) + sessJSONFilesDF[, missingManditory] <- NA - # Make the merger with bundle files to make the final output tibble - metafiles %>% - dplyr::left_join(sessJSONFilesDF,by="session",suffix=c("","_sessionmetadatafile")) %>% - dplyr::select(-session_metadata_file) -> metafiles + # Make the merger with bundle files to make the final output tibble + metafiles %>% + dplyr::left_join(sessJSONFilesDF,by="session",suffix=c("","_sessionmetadatafile")) -> metafiles - } # Now check and load metadata set at the database level load_DBconfig(emuDBhandle) -> dbCfg if(is.null(dbCfg$metadataDefaults)){ - dbDefaults <- data.frame() + dbMeta <- data.frame() }else{ dbDefaults <- as.data.frame(dbCfg$metadataDefaults,stringsAsFactors=FALSE) if(length(dbDefaults) > 0){ #This means that the field is not just empty # Repeat the rows so that the columns may be merged dbMeta <- as.data.frame(c(metafiles["bundle"],dbDefaults)) %>% - dplyr::mutate_if(is.factor,as.character) + metafiles <- metafiles %>% dplyr::mutate_if(is.factor,as.character)%>% dplyr::left_join(dbMeta,by="bundle",suffix=c("","_database")) %>% @@ -227,7 +218,12 @@ export_metadata <- function(emuDBhandle,Excelfile=NULL,overwrite=FALSE){ openxlsx::setColWidths(wb,"sessions",cols=3:30,widths = 18) #database defaults openxlsx::addWorksheet(wb,"database") - openxlsx::writeDataTable(wb,"database",x=dbDefaults,keepNA = FALSE,withFilter=FALSE) + if(ncol(dbMeta)> 0){ + openxlsx::writeDataTable(wb,"database",x=dbMeta,keepNA = FALSE,withFilter=FALSE) + }else{ + openxlsx::writeComment(wb,"database",col=1,row=1, + openxlsx::createComment("Here the user may set database-wide metadata by setting by writing a parameter name as the header (top row) of a column, and the value underneath",author="EmuR")) + } # We do not need to check owrwriting here as that is handled by saveWorkbook openxlsx::saveWorkbook(wb,file=Excelfile,overwrite=overwrite) } @@ -236,6 +232,7 @@ export_metadata <- function(emuDBhandle,Excelfile=NULL,overwrite=FALSE){ } + #' Functions to import or add metadata information to database bundles. #' #' The function takes an appropriately structured Excel file and uses the @@ -248,7 +245,7 @@ export_metadata <- function(emuDBhandle,Excelfile=NULL,overwrite=FALSE){ #' } #' and then go on to have some columns which contains the metadata. Each row in the #' data contains the information and metadata for a bundle (in the specific session). -#' The simples way to get an appropriately structed Excel file is to create one from a database using the +#' The simples way to get an appropriately structured Excel file is to create one from a database using the #' \code{\link{export_metadata}} function on an existing database and given an output file. #' #' Please be aware that bundles that are speficied in the Excel file will have @@ -276,10 +273,10 @@ import_metadata <- function(emuDBhandle,Excelfile){ meta <- meta %>% dplyr::mutate(metadatafile=file.path(emuDBhandle$basePath, - paste0(session,session.suffix), - paste0(bundle,bundle.dir.suffix), - paste0(bundle,".",metadata.extension)) - ) + paste0(session,session.suffix), + paste0(bundle,bundle.dir.suffix), + paste0(bundle,".",metadata.extension)) + ) #Now to the main business of the function json <- c() @@ -289,11 +286,11 @@ import_metadata <- function(emuDBhandle,Excelfile){ dplyr::slice(r) %>% dplyr::select(-session,-bundle,-metadatafile) %>% dplyr::select_if(function(x) !is.na(x)) -> jsondat - currJSON <- ifelse(length(jsondat) > 0, - jsonlite::toJSON(jsondat,raw="base64",na="null",complex="string",factor="string",POSIXt="ISO8601",Date="ISO8601",null="null",dataframe = "rows"), - "[{}]" #Just an empty JSON vector - ) - json <- c(json, currJSON) + currJSON <- ifelse(length(jsondat) > 0, + jsonlite::toJSON(jsondat,raw="base64",na="null",complex="string",factor="string",POSIXt="ISO8601",Date="ISO8601",null="null",dataframe = "rows"), + "[{}]" #Just an empty JSON vector + ) + json <- c(json, currJSON) } json <- data.frame("json"=json) towrite <- meta %>% @@ -317,7 +314,7 @@ import_metadata <- function(emuDBhandle,Excelfile){ dplyr::select(-session) json <- sessjsondat %>% dplyr::rowwise() %>% - dplyr::do(json=jsonlite::toJSON(.,raw="base64",na="null",complex="string",factor="string",POSIXt="ISO8601",Date="ISO8601",null="null",dataframe = "rows")) %>% + dplyr::do(json=jsonlite::toJSON(.,raw="base64",na="null",complex="string",factor="string",POSIXt="ISO8601",Date="ISO8601",null="null",dataframe = "rows",auto_unbox=TRUE)) %>% unlist() json <- data.frame("json"=as.vector(json)) @@ -350,50 +347,63 @@ import_metadata <- function(emuDBhandle,Excelfile){ -#' A utility function used for programatically setting metadata for a bundle, or default values for a session or an entire database. +#' A utility function used for programatically setting metadata for a bundle, or +#' default values for a session or an entire database. #' -#' The function takes a list and a specification of where the metadata should be set. The default behaviour is to -#' keep already set metadata, and overwrite only the values that are named in the list. The user may change this -#' behaviour by setting \code{reset.before.add=TRUE}, in which case all previous bundle, session -#' or database level metadata will be replaced with the contents of the list. +#' The function takes a list and a specification of where the metadata should be +#' set. The default behaviour is to keep already set metadata, and overwrite +#' only the values that are named in the list. The user may change this +#' behaviour by setting \code{reset.before.add=TRUE}, in which case all previous +#' bundle, session or database level metadata will be replaced with the contents +#' of the list. #' -#' If a bundle name and a \code{session} name is provided, the metadata will be inserted only for that fully speficied \code{bundle}. -#' If only a \code{bundle} name is provided, the function will add the metadata for the bundle only if there is just -#' one session in the database. If there are multiple \code{session}s, the function will given an error. +#' If a bundle name and a \code{session} name is provided, the metadata will be +#' inserted only for that fully specified \code{bundle}. If only a \code{bundle} +#' name is provided, the function will add the metadata for the bundle only if +#' there is just one session in the database. If there are multiple +#' \code{session}s, the function will given an error. #' -#' If no \code{session} or \code{bundle} names are provided, the metadata will be inserted as default values for the entire database. +#' If no \code{session} or \code{bundle} names are provided, the metadata will +#' be inserted as default values for the entire database. Please note that +#' database wide ingested metadata defaults currently interferes with the Emu +#' web app and should not be put into general use before this issue has been +#' fixed in the Emu web app. #' #' @param emuDBhandle An Emu database handle -#' @param metadataList A list specifying the metadata to be set. If set to an empty list (\code{list()}) the function will clear all metadata, if the argument \code{reset.before.add=TRUE} is given by the user. +#' @param metadataList A list specifying the metadata to be set. If set to an +#' empty list (\code{list()}) the function will clear all metadata, if the +#' argument \code{reset.before.add=TRUE} is given by the user. The user may +#' also clear (remove from the set of defined metadata) by setting the +#' property to NULL. #' @param bundle An optional name of a bundle #' @param session An optional name of a session -#' @param reset.before.add If set to TRUE, the function will ignore previously set metadata and simply add the metadata supplied in the list. +#' @param reset.before.add If set to TRUE, the function will ignore previously +#' set metadata and simply add the metadata supplied in the list. #' #' @return #' @export #' -#' @examples +#' @examples #' \dontrun{ #' create_emuRdemoData() -#' ae <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) -#' +#' ae_test <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) +#' #' # Database-wide default information -#' add_metadata(ae,list("Accent"="Northern","Elicitation"="Scripted")) +#' add_metadata(ae_test,list("Accent"="Northern","Elicitation"="Scripted")) #' #Bundle specific information -#' add_metadata(ae,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") -#' get_metadata(ae) -> res +#' add_metadata(ae_test,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") +#' get_metadata(ae_test) -> res #' print(res) -#' rm(ae) -#' unlink(file.path(tempdir(),"emuR_demoData"),recursive=TRUE) #' } #' add_metadata <- function(emuDBhandle,metadataList,bundle=NULL,session=NULL, reset.before.add=FALSE){ - if(is.null(bundle) & is.null(session)){ + if(is.null(bundle) && is.null(session)){ + #Database wide injection load_DBconfig(emuDBhandle) -> dbCfg - + if(reset.before.add){ dbCfg$metadataDefaults <- as.list(metadataList) } else { @@ -407,22 +417,22 @@ add_metadata <- function(emuDBhandle,metadataList,bundle=NULL,session=NULL, rese store_DBconfig(emuDBhandle,dbCfg) } else { - # Here we store metadata in either session wide or bundle speficit metadata files + # Here we store metadata in either session wide or bundle specific metadata files # Since these files use the same structure, the business here is to set the correct metadatafile filename. if(! is.null(session) & is.null(bundle)){ #Session level metadata metadatafile <- file.path(emuDBhandle$basePath, - paste0(session,session.suffix), - paste0(session,".",metadata.extension)) + paste0(session,session.suffix), + paste0(session,".",metadata.extension)) } if(! is.null(bundle)){ #Bundle metadata if(is.null(session)){ - ses <- list_sessions(ae_test) + ses <- list_sessions(emuDBhandle) if(nrow(ses) == 1){ #use the name of the only available session session <- ses[[1]] @@ -449,12 +459,13 @@ add_metadata <- function(emuDBhandle,metadataList,bundle=NULL,session=NULL, rese } #set / overwrite metadata from list - jsonmetaList[names(metadataList)] <- metadataList - - jsonlite::write_json(jsonmetaList,metadatafile) + #jsonmetaList[names(metadataList)] <- metadataList + jsonmetaList <- utils::modifyList(jsonmetaList,metadataList,keep.null = FALSE) + jsonlite::write_json(jsonmetaList,metadatafile,auto_unbox=TRUE) } } + #' Add identifying information based on the content of the wave file to the metadata information for the bundle. #' #' This function will extract information (lenght of recording and a checksum) from the wav file associated with a bundle, and add it to the set of metadata @@ -474,17 +485,15 @@ add_metadata <- function(emuDBhandle,metadataList,bundle=NULL,session=NULL, rese #' @examples #' \dontrun{ #' create_emuRdemoData() -#' ae <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) -#' +#' ae_test <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) +#' #' #Add a md5 digest to the metadata of all bundles -#' add_digests(ae,algorithm = "md5") -#' +#' add_digests(ae_test,algorithm = "md5") +#' #' #Add a "sha1" checksum (the default) to some bundles -#' add_digests(ae,bundlePattern = "msajc0.*") -#' get_metadata(ae) -> res +#' add_digests(ae_test,bundlePattern = "msajc0.*") +#' get_metadata(ae_test) -> res #' print(res) -#' rm(ae) -#' unlink(file.path(tempdir(),"emuR_demoData"),recursive=TRUE) #' } #' add_digests <- function(emuDBhandle,sessionPattern=".*",bundlePattern=".*",algorithm="sha1"){ @@ -525,13 +534,13 @@ add_digests <- function(emuDBhandle,sessionPattern=".*",bundlePattern=".*",algor #' @examples #' \dontrun{ #' create_emuRdemoData() -#' ae <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) -#' +#' ae_test <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) +#' #' # Database-wide default information -#' add_metadata(ae,list("Accent"="Northern","Elicitation"="Scripted")) +#' add_metadata(ae_test,list("Accent"="Northern","Elicitation"="Scripted")) #' #Bundle specific information -#' add_metadata(ae,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") -#' +#' add_metadata(ae_test,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") +#' #' # Get all the 'n' segments in the database #' query(ae_test,"Phonetic = n",resultType = "tibble") -> ae_nt #' # Add information related to the nature the recording sessions @@ -540,11 +549,11 @@ add_digests <- function(emuDBhandle,sessionPattern=".*",bundlePattern=".*",algor #' # This code does the same as the above, but it will also compute new #' # information that is strictly aimed at identifying the recording #' # (length of recording (in ms) and a sha1 digest of the wav file). -#' ae_nt %>% -#' biographize(ae_test,compute_digests=TRUE,algorithm="sha1") %>% +#' ae_nt %>% +#' biographize(ae_test,compute_digests=TRUE,algorithm="sha1") %>% #' glimpse() -#' rm(ae) -#' unlink(file.path(tempdir(),"emuR_demoData"),recursive=TRUE) +#' rm(ae_test) +#' #' } #' biographize <- function(segs_tbl,emuDBhandle,compute_digests=FALSE,algorithm="sha1") { @@ -560,8 +569,35 @@ biographize <- function(segs_tbl,emuDBhandle,compute_digests=FALSE,algorithm="sh #Here we use the special mode of export_medatata to get a data structure rather than an Excel file. mdata <- get_metadata(emuDBhandle,session = ".*") - out <- segs_tbl %>% + out <- segs_tbl %>% dplyr::left_join(mdata,by = c("session", "bundle")) return(out) } + + + +### INTERACTIVE testing + +#unlink_emuRDemoDir() +#create_ae_db() -> emuDBhandle +#add_metadata(emuDBhandle,session = "0000",bundle = "msajc003",metadataList = list(Gender=NA,Age=10)) +#rstudioapi::navigateToFile(file.path(emuDBhandle$basePath,"ae_DBconfig.json")) +#add_metadata(emuDBhandle,metadataList = list(Gender=NA,Age=10)) + +#add_metadata(emuDBhandle,session = "0000",bundle = "msajc003",metadataList = list(Gender=NULL,Age=20)) + +# dir.create(file.path(emuDBhandle$basePath,"temp")) +# for(i in 1:11){ +# file.copy(file.path(emuDBhandle$basePath,"0000_ses"),file.path(emuDBhandle$basePath,"temp"),recursive = TRUE) +# newName <- paste0(i,i,i,i,"_ses") +# file.rename(file.path(emuDBhandle$basePath,"temp","0000_ses"),file.path(emuDBhandle$basePath,"temp",newName)) +# file.copy(file.path(emuDBhandle$basePath,"temp",newName),emuDBhandle$basePath,recursive = TRUE) +# unlink(file.path(emuDBhandle$basePath,"temp",newName),recursive = TRUE) +# } + + + +#export_metadata(ae,Excelfile = "~/Desktop/out.xlsx",overwrite = TRUE) +#rstudioapi::navigateToFile(list_files(emuDBhandle,"meta_json")$absolute_file_path) +#export_metadata(ae,Excelfile = "~/Desktop/out.xlsx") From a0222538f2469dbfc0f8751112353fa40462bcd6 Mon Sep 17 00:00:00 2001 From: samgregory Date: Fri, 13 May 2022 20:52:59 +1000 Subject: [PATCH 4/7] Add files via upload --- tests/testthat/helpers_emuR-metadata.R | 55 +++++++++ tests/testthat/test_emuR-metadata.R | 149 +++++++++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 tests/testthat/helpers_emuR-metadata.R create mode 100644 tests/testthat/test_emuR-metadata.R diff --git a/tests/testthat/helpers_emuR-metadata.R b/tests/testthat/helpers_emuR-metadata.R new file mode 100644 index 00000000..4e985009 --- /dev/null +++ b/tests/testthat/helpers_emuR-metadata.R @@ -0,0 +1,55 @@ +library(emuR) +library(dplyr) + +create_ae_db <- function(){ + demodir <- file.path(tempdir(),"emuR_demoData") + + if(!dir.exists(demodir)){ + create_emuRdemoData() + } + db <- load_emuDB(file.path(demodir,"ae_emuDB")) + + # We need two copies of the session, which in R requires some manual intervention + dirs <- list.dirs(file.path(db$basePath,"0000_ses")) + dirs <- gsub("0000_ses","0001_ses",dirs) + for(currDir in dirs){ + dir.create(currDir,showWarnings = FALSE,recursive = TRUE) + } + + inFiles <- list.files(file.path(db$basePath,"0000_ses"),full.names = TRUE,recursive = TRUE,include.dirs = FALSE) + outFiles <- gsub("0000_ses","0001_ses",inFiles) + file.copy(from=inFiles,to=outFiles,recursive = FALSE) + return(db) +} + +unlink_emuRDemoDir <- function(){ + demodir <- file.path(tempdir(),"emuR_demoData") + res <- unlink(demodir,recursive = TRUE) + binRes <- c(TRUE,FALSE)[res+1] + return(binRes) +} + + +make_dummy_metafiles <- function(db){ + + + sess1 <- file.copy(from=file.path(getwd(),"metadata_extras","session.meta"),to=file.path(db$basePath,"0000_ses","0000_meta.json")) + + sess2 <- file.copy(from=file.path(getwd(),"metadata_extras","session_0001.meta"),to=file.path(db$basePath,"0001_ses","0001_meta.json")) + + outMetaFiles <- list_bundles(db) %>% + dplyr::mutate(absolute_file_path=file.path(db$basePath,paste0(session,"_ses"),paste0(name,"_bndl"),paste0(name,"_meta.json"))) %>% + dplyr::arrange(absolute_file_path) %>% + dplyr::slice(-1,-11) ## One file per session should be missing so that we may test bundle and session defaults + # "ae_emuDB/0000_ses/msajc003_bndl/msajc003.meta_json" + # "ae_emuDB/0001_ses/msajc015_bndl/msajc015.meta_json" + # should be missing. + + res <- file.copy(from=file.path(getwd(),"metadata_extras","bundle.meta"),to=outMetaFiles$absolute_file_path) + + res <- c(sess1,sess2,res) + # store database wide default values + res2 <- file.copy(from=file.path(getwd(),"metadata_extras","db.meta"),to=file.path(db$basePath,"ae_meta.json")) + +} + diff --git a/tests/testthat/test_emuR-metadata.R b/tests/testthat/test_emuR-metadata.R new file mode 100644 index 00000000..30ed82f0 --- /dev/null +++ b/tests/testthat/test_emuR-metadata.R @@ -0,0 +1,149 @@ +# context("Bundle (session and database) metadata") +# + +dbName = "ae" +path2demoData = file.path(tempdir(), + "emuR_demoData") +path2orig = file.path(tempdir(), + "emuR_demoData", + paste0(dbName, emuDB.suffix)) +path2testData = file.path(tempdir(), + "emuR_testthat") +path2db = file.path(path2testData, + paste0(dbName, emuDB.suffix)) + +#dir.create(path2testData) + + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", + envir = .emuR_pkgEnv) + +#working test +test_that("Test of the digest adding function",{ + # delete, copy and load + unlink(path2db, recursive = TRUE) + unlink(file.path(path2testData, "fromLegacy"), + recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae_test = load_emuDB(path2db, + verbose = FALSE) + + add_digests(ae_test,algorithm="md5",verbose=FALSE) + add_digests(ae_test,verbose=FALSE) + add_digests(ae_test,algorithm="sha512",verbose=FALSE) + + md <- get_metadata(ae_test, verbose=FALSE) + + expect_false(all(is.na(md[c("Bundle.Duration.ms","Bundle.md5_checksum","Bundle.sha512_checksum")]))) +} +) + +#failing test +test_that("Metadata is collected correctly by get_metadata",{ + # delete, copy and load + unlink(path2db, recursive = TRUE) + unlink(file.path(path2testData, "fromLegacy"), + recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae_test = load_emuDB(path2db, + verbose = FALSE) + + ## Create a second session + dirs <- list.dirs(file.path(ae_test$basePath,"0000_ses")) + dirs <- gsub("0000_ses","0001_ses",dirs) + for(currDir in dirs){ + dir.create(currDir,showWarnings = FALSE,recursive = TRUE) + } + + inFiles <- list.files(file.path(ae_test$basePath,"0000_ses"),full.names = TRUE,recursive = TRUE,include.dirs = FALSE) + outFiles <- gsub("0000_ses","0001_ses",inFiles) + file.copy(from=inFiles,to=outFiles,recursive = FALSE) + + + make_dummy_metafiles(ae_test) + + res <- get_metadata(ae_test, clearCache=TRUE, verbose=FALSE) + + resnames <- names(as.data.frame(res)) + namesShouldBe <- c("session", "bundle", "Session.Date", "Session.Time", "Participant.ID", "Researcher", "Gender", "Condition", "Setup") + outputShouldBe <- tibble::as_tibble(openxlsx::read.xlsx(file.path("..","metadata_extras","expected_meta.xlsx"),sheet="bundles")) + + expect_named(res,namesShouldBe) + expect_equal(res,outputShouldBe) + +} +) + + +test_that("Import of metadata from an Excel file produces an exected result",{ + # delete, copy and load + unlink(path2db, recursive = TRUE) + unlink(file.path(path2testData, "fromLegacy"), + recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae_test = load_emuDB(path2db, + verbose = FALSE) + + ## Create a second session + dirs <- list.dirs(file.path(ae_test$basePath,"0000_ses")) + dirs <- gsub("0000_ses","0001_ses",dirs) + for(currDir in dirs){ + dir.create(currDir,showWarnings = TRUE,recursive = TRUE) + } + + inFiles <- list.files(file.path(ae_test$basePath,"0000_ses"),full.names = TRUE,recursive = TRUE,include.dirs = FALSE) + outFiles <- gsub("0000_ses","0001_ses",inFiles) + file.copy(from=inFiles,to=outFiles,recursive = FALSE) + + make_dummy_metafiles(ae_test) + dummyRes <- get_metadata(ae_test, clearCache=TRUE, verbose=FALSE) + + unlink(path2db, recursive = TRUE) + unlink(file.path(path2testData, "fromLegacy"), + recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae_test = load_emuDB(path2db, + verbose = FALSE) + + ## Create the second session again + dirs <- list.dirs(file.path(ae_test$basePath,"0000_ses")) + dirs <- gsub("0000_ses","0001_ses",dirs) + for(currDir in dirs){ + dir.create(currDir,showWarnings = FALSE,recursive = TRUE) + } + + inFiles <- list.files(file.path(ae_test$basePath,"0000_ses"),full.names = TRUE,recursive = TRUE,include.dirs = FALSE) + outFiles <- gsub("0000_ses","0001_ses",inFiles) + file.copy(from=inFiles,to=outFiles,recursive = FALSE) + + import_metadata(ae_test,file.path("..","metadata_extras","expected_meta.xlsx"),verbose=FALSE) + importRes <- get_metadata(ae_test, verbose=FALSE) + importRes <- importRes %>% + dplyr::select(session,bundle,Session.Date,Session.Time,Participant.ID,Researcher,Gender,Condition,Setup) %>% + dplyr::arrange(session,bundle,Session.Date,Session.Time,Participant.ID,Researcher,Gender,Condition,Setup) + + dummyRes <- dummyRes %>% + dplyr::select(session,bundle,Session.Date,Session.Time,Participant.ID,Researcher,Gender,Condition,Setup) %>% + dplyr::arrange(session,bundle,Session.Date,Session.Time,Participant.ID,Researcher,Gender,Condition,Setup) + + + expect_identical(na.omit(dummyRes), na.omit(importRes)) + expect_equal(as.list(table(is.na(dummyRes))),list(`FALSE`=122,`TRUE`=4)) + expect_equal(as.list(table(is.na(importRes))),list(`FALSE`=122,`TRUE`=4)) +} +) + + + + + + From e4904dd8fd7b1a336b43c688b6734fd853def5df Mon Sep 17 00:00:00 2001 From: samgregory Date: Fri, 13 May 2022 20:54:19 +1000 Subject: [PATCH 5/7] Add files via upload --- tests/testthat/metadata_extras/bundle.meta | 1 + tests/testthat/metadata_extras/db.meta | 1 + .../testthat/metadata_extras/expected_meta.xlsx | Bin 0 -> 13421 bytes tests/testthat/metadata_extras/session.meta | 1 + .../testthat/metadata_extras/session_0001.meta | 1 + 5 files changed, 4 insertions(+) create mode 100644 tests/testthat/metadata_extras/bundle.meta create mode 100644 tests/testthat/metadata_extras/db.meta create mode 100644 tests/testthat/metadata_extras/expected_meta.xlsx create mode 100644 tests/testthat/metadata_extras/session.meta create mode 100644 tests/testthat/metadata_extras/session_0001.meta diff --git a/tests/testthat/metadata_extras/bundle.meta b/tests/testthat/metadata_extras/bundle.meta new file mode 100644 index 00000000..6d03d8d1 --- /dev/null +++ b/tests/testthat/metadata_extras/bundle.meta @@ -0,0 +1 @@ +[{"Session.Date":"2019-01-01","Session.Time":"09:43:54","Participant.ID":"PD"}] diff --git a/tests/testthat/metadata_extras/db.meta b/tests/testthat/metadata_extras/db.meta new file mode 100644 index 00000000..a7cb0576 --- /dev/null +++ b/tests/testthat/metadata_extras/db.meta @@ -0,0 +1 @@ +[{"Condition":"PD","Setup":"Monologue","Gender":"Male"}] diff --git a/tests/testthat/metadata_extras/expected_meta.xlsx b/tests/testthat/metadata_extras/expected_meta.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..9f32cf8c489cacc7ec805d1670ec617b8b1affa7 GIT binary patch literal 13421 zcmeHtbyQXB*7v3x0V(P3mImoYq`OPXO*hgAN(*cO>F#cjlJ4$q>5$HM>pAy&4#)eA z_up^4ckO4aHTKwR{`M2I=Twk|g2n{E0^k7v04ZRpOdsAC0stU^0|1@^;32g|ZLA$l ztsV7M+-yx9beUX%Z%ERiA!$+pkl^?Kd;K5YfqvCK%QhCYFF4mvB&Lk$rPE?CsDlK; zfvMYYG?Tp8Wt61A(e=XyHKLf+&N$rJ5FFXr_+!f#e96`YY_mS!d%wk?L_>rKnMu$! zzMSd0rED_$D4CW;6BZb=-%_!?er?IWqc8p7NmfE%W5qLb~Le^=MTj)BCSlkyj7H%c&!rPjswSpQBSuZuT^IwwCZs0R| zm$t(N2C%g~)2G?Bk%CM zV@e`dU7QQ~p$0YJ6b7JXieaRLkQGrsbQIK<9K^DxDRt)wQJWDyA~<4qBgq?iP3-Eb z=S2yTy#dL8AamqdV+a5mGX+3hg@-4myFe_JX*;HBD8RVFdp^KIR6=<#?Jt3v536l9 zsEHGJj9>87`yc4KR_^igRYOCgVkN$8=(&9r%8tTh?UB0`20ha<(DpXrEm%Gu9-sgU zf0NuQRTlD7Fgv~k3nmg+a`o*^-#9Qc{kZ>EV*d||>0g#!7$qm)#)2GpD0LgueKD~R zi6$cJA}rNFs^aA%J^!RCEQ11np_!ThO$E;%O47U0>$Z1pfj?}goAi8@y)Xm=laIXG zr6@4=+Rh1%j>`U%gk9lEJG%44*~EFgq>LMlb7KTuL2YiDY|j$8#MrS|8QKt&Iv&E) zEP_Dn7xBJYopPFsMwi796C%p{MS-O?T&X+JLkZqfpR%`61mE+#+#8L@>UJnS*#bGMRn*p35hnWP#?Nef?@RiHITbIIK(!0%5oSv{Imk!v92Cu-+i3a+wWETrfID z`kk6R>^r)w1glslSODM&m^Hv0^jF@=RMobbWx@1HseCZFO5#ODGapxQ65_}NifA)O zfb5XVr1tuqee!p_m+VMl`edghJ}_@-q%=DWnr-d4E3cbVhJv&x5+j8jXkoumF)(aF4?T&}K=8Vn)4kO+Z)%wMo zs#e!B$@s+e=+tlFVP^PsMZX&j9I%J?5N$9wUJhH%fM7SQcv?GHO-I2syHnQyFfXU{ z#1=$>&yu8_PTX4dlp-{0m$}9}(2W1)Uzq+v?G#qRUp_-IQmqJt8+9REd62CA9!}jM zJ|w8f5uLtIzwiju%i0_7w5G!Qk*XF>xe;3uRvENJ7w;cC%JR*aZMx6R(g3omxhIJw zn1^+|*LU1`T#&v1&m+zPjrE%GUTqYfn~#JP9!{&%n6pB8lT9U_zjb|4g%PLY-vdVR#=)GIOZ4lkF3u)5G_lz_9m+Z7HGwop~*@Gmw@)Ck1 zH{R2TmJnF-E|N~G(F;f*d)VOw+#Yi!9PQ+}9GX`! zXM|FgyRPKvd?+7!drdK61e=qlUz?ZU^js5CUAGV8z7wB;$$Sr}*7$j;>vh^z+%ma! z24ju3t5y@wssw8HL@Z5Yp+~0W$`BoB)0Qt|R$~jF{OOHcKSh?nm$nTx4R3vd;vjlp zvjD$t3R~Cf{$Qxv^VVzUqS!zjGoBQ@bv}8D+@t|w<0CNt{?m^jos*Y5g8=|2@qR=A zKN;N7+!Sca{PWKGLqYCoE;!6`qj@*S-w?BV*+3x-N78OXL+;Cc=8PU*Jhdx(S45^w zBu%l=+zc`iLQI#Q?Vne7lPbL(^E$Lt|Kjz%Xnp;YOoteU)GV_$LYgWao7#r|`k3R% zDmST&CXk)no*-@M|-c+08 z2WaJ?$F5^vk_ZdL*CveG@RAb5bmI7DnhN%`Ztp^ITX5Pnnw8Yp7zqwEy%w$JuH-ir zm}3FT^WLd%;a_97oH1GR6Ge3Hqr5=!VHcN6Vh|V@T(5nhGj=L;u*x3$O8q8-`h;&POaEz^D ztH$U&+?f_FE$Rssk95z3C@67hSKw=-h>Mc&jg4V5ee}ktD4;~-6`Fw$0eiuFy$MHD zT8U%$-gpxO+!h3w!!T``iNUB9aTPkvBDkLT0L@k5?8408x2Z1cS}_VdWUb*Yj#Met z###U*XCdE8^31M!cpqZ57UNpdBKwqOvk!GRP))jCC?<`x>q-^yTOsMlKCvQ4X_1Oh zI=FgGJ)cyG%$Q;Y4)B&aJ3XL5Cw6P5Vd7@{V=d3C6zwi;`tjLo4Tn2Qsk zd&#&Nbk>xPD|AY|N?UJgKNQAcCACpwLqRTPcjwK~iL{dd1@B+cDC!U4;rMJ5Io(j| zgGiMutKW~jeIjz%?NEk`r7)=&uc>-DEjSJ-irEbsX1&aUf8NfYL7Y0fdoUCVd*D@z z_SVRaWRz2=KOrmh@`Mq~P@wpdAh(E31KnRly?n<*HR3sg$B^3DU}-?7(V##A?NtcmA1C zr3ffqAsI z;!u~kswjH0tQhZYf0J;3JTFG8(|Wc>+#>=TJ|#xoX~DRDPMw;?y{@+Mu!@?BoA9r~ zgt=LxTd9aRc;~85A-|LxXMr9{Ltc9_!RTZ@_+UzQ;Uaq{V48OgS2+sgu zWac;CXu|o;V+q)s^~|c(G;76~B71rDj_lj#d8qW1c<9bmgM;jeAQ- zA+jT4fR&H*=@o_E-1MyPh#yYqC7wT`jnuxkw9SBbtV4L9oE1U)DM#N26IY^b5heF` zracl%Q0=n=GaFJTfstV}@;a(~WoD+4f8C<;e_dx` zf1j5WA-kDV4VTSl%lK*Tp<6F=4a@V8=9}b@z??F$x77vpoSLYr*l6GU?X>5qqW0Pg zWLEE^TVuA3gZc%6TxGkkD$w$NomC0E#2M0*E1EKiu3Mtv+B&u_%QK~7LyP)3Gnb>1A94n%zv@9fvL{^3R=(S6U{$a`0I>sP^wLIrc}~Bg?|en#V2$ zv)<3BJD&3BX))KS9h6_=FV?TbtQ0r0UL`r~PmVQ%ZNNXRL}^K6#0}UDA_nJ)aDTZ$ z4(6t&jt1Brq9r-Rf-jR}yx6Ts86ZSVmFlajU-`8hRnf$6=WD4Pt;@vruPT;6DqFR(Jt%2H z+u@@6NA(0BpksI<1ondcA)RA6TDk_fk#qy>+X1b~O&_%r*=3n*VlJnkf^yh)CG1fF z4+R;Sudi_Hly@&hbE01w zz0*|{3*pb=Qe-%#I*s8Ex#+gj21 zqi3k<8P3N!ZADDlq!r4}qWNHtfGo^GxH>IuW^W|X;)#%4eyEggR)_cd!!KIUhi^~j z%@o-}zTTVLayTBs$sz3Di1MV|w$EFYn@G&(ujcgc;k{*zkMEwzE|px%<=g2nWDbT4 z`!4*#6FZz+&v#fZZ=ybP&Pm;v`0!+C+*!WYuY`0vXfi;>XXro|>K7kl}`ed?2YN#Yn+= z!DpB3vfJ+FG+F#~%dPobWP#acKy8(Nj9&AZl7VGnR58tnN8Xi3XlRWHi>xVWT-zAl zTvLl*kZxuls5}S`eq@CyU|}3LTkG4qx2LV-JH0EZXemk4({oBYhH}MSc z1uQ6(;9trAAuxXgp+Cjur-$`31PzF2Mf}QwCU)refOc*@>W=m?yYc;S!8f?V?P`>2 z>28UUX0By%;Uo4v)SK5imPFqv4lrljbJAj9;O#qICQEh1y*$jrtb5eYxa%&=V*MSMl zA+6R$SUpANxe}baXymB8*yN0s6(m#n+9&qxyBc@Z?%OF z$F9czYVkjH#{XxFFaLC6VFvb(j;s1>9 zoH?mkt-2Bov4NwUh=;N=zD}|eE8p`lQw)R?g)=*NI&(=bBK{>j#s`--7%wxCi@#4I z+e{er?wU@$>9=fOu}TqU47L*v;E3Ws*omLZPE?VzS!F?MEU5(Bi6m}>;ZYjt2^iHr z!z{}NVzV{wcCsFgYTwa``-^Z<1Wcw}c)#a1x3~P?_fGG#@_Hoj3Q`BmF#}?{)r8V! zO(nb^+zDRNl!e#8l%#>4ElA5AelO7 z8&vYTr;sxWXCX8DwH@waX<)&pgJ&2HC~z@Z81}G63*D3Z(xHWQqIcCwvqaN)b||`S z1C~W_ka%I3NUHQTyHd0kVr=@=2AAobp3P$9Ox+=^HlfWchVe{ZWl9+V5J*`_k@5Q+ z`qmHO3#?4;*?o8V;!nfkKH_iSyJiLpQq$sH=p281b17Y(klY{JM`5LEa;krM|7_WN zop^;rjM*^*>O5uBBCWK|OTE3DE#rwITNrKM*r1?y%=~OL>#?&Appc)X%Ty$#m&u-( zk5AxP`Lf!pwEF@?^|Zs0^LGqa_0C<(6miIFwzwT@M7IcYhf0j}LCP$%9D=mY&n;l; z(bZAHG0f(V`8i4{r>EiNKW^T~iG5%WbQ!y9=f@H_gK+rRYlOgg1?=UleR3S`$=5+e zT((W?JZ6lHb;?$nr{32xuP4FIJ9Z2~kb-apKpl$N^C)Ky-;}-bPsp-l%rl+AI{JiB zQDTd`JnegK+Sp;q+`?myYJ;UmiP47zt1*PpLa0}In0%pGUq$uV$|8A!K6#l>riJE> zvWvQjioabTy`=QJV|Ag_WDC%U;d|>A`Vm??3}LOU5o@|?fn?o*CO4m*qXzeP@6YjO+r_|HWZ*H~LTPudE zR7^3-TUn6B;`2ZZT+)}{yY@RMkS*K#u!dA=qmJapSGKNFy!-WHyz*;A;s9hZP8il< zdD)68X4;BJ?x}Q&kec+mwot@pRQdFbm-#$--5IXj1d=eZrnM0)6FKb)yH)+|N~R4W zF|UcmLK5-2n-_cY1;psfmA_)?2=3>Hyu`u^aE+;Bw~-rWp((ehrv+_B@PyugbWa#u zCywyjmc9+kIn3|oMaHXDX(d32g`e}>v^9qplP6bwygg3A4Ahm-;+OS+h)7#qlKr4t zRkmteI${HhU(;I1^kSh$72_Vy>sF`5`?Y>xS1i;GbK;O1MsX>jYH9~BLUXA_Z}C@i z0V}FuL8UL_gEuXMU6NVJ3{q(+qyf3Ew0=`Th{cu|dV#)*6Z%F#Y_g%~qri6%L!oc6 zFNPTqAiQD|iYzu8e4(t07R6^yY3v~wNwM%pED&eJfV>AcqLwuSr_EEX=8{?V-_m`@}bnk-jb&g`k*DA^(s0IB92&#EzV_DN!%cR0_LK>r#=;!aL+Na!6u=zNFrsjNGA2hOEmJ^7tzSem+1V6 z;{HJ%x9hE!L3-C1mJ^q}pnijQ{e|1rHTDNMG~&R-1)Jvm4V`X zdHrIMHERb>71>5r`0R1QY!#yH#jjYV8;rW>XV@h)X7vGWch1lFty&}04jd0yD>rkH zFkZg!|E4oUlAbIa12RKX95T>f$K0AXJxY85aG07{TnM3EfHjaoUNqX#6uNWAcCUe? zVcRXY9enLm*JrW>nZCGL<|67t_S}Z(%o@Q~?I%pBi@Hz64x8&>lTRJ}>m`zO#|huT zvnD~-GvxCF)PKL^|Hn(%t!nD-O(Pi@WXN77` zHivK_4dcG53!Mbs?(xCNZd{ej*$m(0S+dMm>V>p#2@^!?@)<3L9>77}KQ$)8)84~8 zuqjsuXTDMX@L(Mr-QJiw{BT<`RducASTH?__XOb^8_E~BW&QTc**P%lS z1&+*Kiz-EOBj<=I6(9MOMov8VNLeP=B&3eoixM)|En6=c8&VW>cotNqWdZVa+0(Fg z((^tjnuQCy?}l(ksY_@t)Y7+5)?2uC=#3^t^Q28YLx|K5!la}KWwt-;r~gRRUP^dz zvceCnw8vI;FCci2Am)jHJtX6;{%+}{vz1Q18S)Wu3%_3qyXBMT5Kj9k*i03nZ3xTJvvliZBerhWC|A zKJJLKK%ecq`;`T}`iNB%N1s!O@0H`(f#(7^8|J;rt$1)^C^l1Gj_yzoSTE+=&iTu_v>HKkni&2X8bnZMd0ejvz(S}Z-fG_HiW87R%Q zVmgMf6T9&{rR6_yH@mE8u7I4~0{Hea;)E9nqQUmp^Tm>!B652_T(2gk3sG854j0+V z=L{gHY^7)n)UXM-d9Lv7yT1p2f1;OXzGGZE_(H>+8Fysz2>IcQwpU!qK`?jRyFN{p zy4TVy@}CMw;^&#v3yg?5SN2Or=#K)qQoSorFs-M)BD&J{lg@|+N0?~`N{0A)EoBIK z^m`J?w?#-}!ge^}!bH<1!BlKJVPHyQmSjh7=qgc=AN0^K1xJdF9PPNC^nA7JfmW9+ z3P%hpBEevIh=S&!6CTtLxaIn-Z+;{^*CPaWTp7VW0_LAQZEk39YNG6DZ((irleHbA zT3`K9HT@zAy7^o=XOJ*^P-+36CjAuu2^wAnj8;xNtzQjwE>4W2BGc90bhCGzhIftJ z3CyR;)+p0F0|WzLOs7MS-m8WlKgx9qx*6Pns5?NJlaq%sT3!2tINeM~oplG#1dcgn zRyNe>M=oRS?c1?r9w+R~bR-`u{A47%F73RJlvUd!S542CvL)|k+Fp}f`KiKe=i6dI z+~v{N79idS!QRehSdwIRk@76axNta+0x|idb{&iOJ}`OC2dbv{3TX+ZppZZDq07ei zmuWyBk+k*1nMAt}D5Sj7Lt;g_GE8`9ZYHdGhrZevy1*kAy^r#_Gx4T7QdkHZp-;Rz z@qPc2dFX~Er|y8+T87CLmH_eA73h|~+(d!5nYfH&^pYZB8kD3z$69hvyX>`4p#R$) z=g0oU9uMAeQSgqVfh({kHpU9}HntAThPJjp6bZOj;lFiP@XjShFM$;*TGtZX4RPoq ze%$~Kc{g3wKFu3b-%QVDsVxnakD&)fQFmw2z|Xu^Dd*0DO#+^$FEkejoSVL}d<;p$ zh^Veva$hJ*cUiMVt!5ld920F`9a>uqT-$04IDw{fKGt^y@x^z5p6TLoN4Q~UATc6Th z=_y3OH+st2eyH4W9Z6e>L$voU_f=1y1{gk3`+yqd-}0<9HmnEj<`egJ%Cdfh*IFxG zJdCj&e9o-JUeop!Nq1{qDKU%RdMD57Y?E;H#db^CYP;}({aaP9=p(yLwCWdu;< zwab~?7^LW7dr$$2wQ09^saM^R%+NmXBGg*K{+4)^B}v*q0&mfVibBN9@%jy^3rYav zEZ0Dn#Dk7rr?-f{-cYYw{Doj1?bZGD+3(pI-n}yN9L&zS;Q01W1>$IE1U8DyKOcXD z5HUY(W8fj&%@c`>W69|3usmf68Hv=;5fee7vIt{VPu%TVIozAW=yi78H&6N^SwFG zDAd!lyU#2#`N`X_<@FjL(W&Kg!aVM{UD6pIz_-Z5*bXk-g+^?On z#uZ+#%SKs8X=9{{^bUl$&#CPURC7`FRol>a;FHQl7DyOLU{BdGGuxeHdwNRG?|dg~ ztj<(*9WiUUj1pH5ar-u4p8JB-S|{~J0-K;4Fg9fuGi(u52YeSOj|taPF=A={KAwOz9XzCm4} zR_(3ijv;X?T{!4-{IHf|Y$~;<_O{mSPe53{nO2#{sFv0KElfDn`b<&+&uEOQkvMkW zUk86rG_0dv684L^D@|q$Q^nn0M(~(B}XLDfxgNZDy@76_dV$H%QkiIoL-dI zsKgjkg@&POl@?!wwCX^$#v8kHxTX&DdCPYKm!GoHJ3Z$tVaUTa63M@O8~@y#JL6qG zxTL8VYi-FH@v@L}`%-pqib9}djEbtS_Jpc;r8xZ$gPJ>2)b zbg!YwrVnZ#xlm{K!HEOT=Bz%$#%?d&N;b?luzg)We4$s7#_eUU$NLK0B-wb;Q4g@Y z3`-cY^xG3&0rmwaflv6;?@#!TELZfatXZR3ja;3_Am4EjNQ_~YesyxSb$Mb5MDg%Nt8$?n%8q(&3o5D72HhSTvR(~n zifve$e({g8Avr0S86u9&O-5$faV}eDG*X90FTgGgR$lVW;0k-9M*1QU^j;UB+Jy}$ zLN`;=gDaZk7XT$gIS{gqr!M7v6C^!N1om3O_mT;PPo%qO!+fQ2; zhS<|z*$-ifz21+!YflH-b%ge_yvn`YWJkETUOm$lvr%BW{Ft{rAC+TDPDmnyIWms4 zVb*bHyz(dPi^xf-g{tr*#e{}NBcJSb?+PQY&S&l_REsxLrn~wUE|e)@bDS61HB9Bb z7pStyhf{D33^N;D?(qa?Q4=iDGro)S4O+&%FZVpHTcRdDOt+~rL);vAcUKPN#Bai7 zPc2)*f1nL)cEj(RbpL=4#Z`Q)Z%iCq<36!U%!`A$D-!hH*cj0@cOgGRX%-r}9*UO% z6SzZ=;H-MJ&Fb;nXovJ-gH#E8oR;8-2mP;pim{Eo>3R7i%vKoytalX08-dE#0ZS9(OIOyVuHK}N1%3v;irAj4f zA)){+p`sM;6s{Nh{SsDu(1F}d^I7)@1i@#lSe*FW_*@45xqR4NtXi7LP~-H$k>@L!_aU{ib1$9s48#*~~w3}dGBE%b^B+DM~So_+&KpR1)?eHmdson!98PEv( zAEV|-2YY`ZDs=W{a_sHS0|}0&jjFJs#; zf=(ri_Cr9LSFc`1^E?P;AZ^RA@Mw+VYliRAqm6V4GO`xdstO~jV@xV|8R+c+S-U$j zCM-evY}X?(rl#kv%|0AahUSAbUUUIv7DJ=q`gKN_ZS@k>KHN3#x?uZu6o!P&v!u}X z)IuX00uO{8zd{rUNJg+X@%x_qf9?Cfp1bW|JkVc=Q{r~!=IMv zXU**~;N#-WFF*xw%KYzuKXHFTJw|z)Y5#??4Q?^}4a!fa`7z4l?9(q4e3G9izj9HJ zNgoIKzeo?s{vdrE@;^rSd(ire9sn?;0016`u#c%9#|XdZS*U;h|6e`9-!A^K_xKAz zgZ3W?KN;ll93FcozYuuo|AFw^WBj{8{6!A{ATa)A5B_By6=Y$+Dgpq&0Dn5bb(W|f I#Zkcj062~$y8r+H literal 0 HcmV?d00001 diff --git a/tests/testthat/metadata_extras/session.meta b/tests/testthat/metadata_extras/session.meta new file mode 100644 index 00000000..91c7e2b4 --- /dev/null +++ b/tests/testthat/metadata_extras/session.meta @@ -0,0 +1 @@ +[{"Researcher":"Fredrik","Participant.ID":"PD1"}] diff --git a/tests/testthat/metadata_extras/session_0001.meta b/tests/testthat/metadata_extras/session_0001.meta new file mode 100644 index 00000000..82ae9a7c --- /dev/null +++ b/tests/testthat/metadata_extras/session_0001.meta @@ -0,0 +1 @@ +[{"Researcher":"Kalle","Participant.ID":"PD2","Gender":"Female"}] From da142029c2114336dce41563eab13a64a33da7fe Mon Sep 17 00:00:00 2001 From: samgregory Date: Fri, 13 May 2022 21:06:03 +1000 Subject: [PATCH 6/7] Initial implementation of a metadata functionality Complete with documentation and unit testing. New package dependency: memoise for metadata caching Package suggestion: openslxs for metadata <-> Excel import/export --- R/emuR-metadata.R | 1371 +++++++++++++++++++++++++-------------------- 1 file changed, 768 insertions(+), 603 deletions(-) diff --git a/R/emuR-metadata.R b/R/emuR-metadata.R index 27b3d723..18904167 100644 --- a/R/emuR-metadata.R +++ b/R/emuR-metadata.R @@ -1,603 +1,768 @@ - -## Some constants -metadata.extension = "meta_json" - -## I will need this function until 0.9.0 of dplyr is released, -#possibly fixing the issue with all NA columns supplied to coalesce -# The implementation comes from https://stackoverflow.com/a/19254510 -coalesce <- function(...) { - Reduce(function(x, y) { - i <- which(is.na(x)) - x[i] <- y[i] - x}, - list(...)) -} - - -#' Functions for gathering metadata specified for recordings in an emuR database. -#' -#' Metadata of a recording is stored in 'meta_json' files. Metadata may be set at the database, session and bundle level. -#' The functions goes through the database metadata file, session metadata files and metadata files associated with -#' each bundle, parses the JSON data and returns \code{\link[dplyr]{tibble}} with one row per bundle in the database. -#' Database default values are supressed by information set in a session metadata file, and session level data are in -#' turn surpressed by data given at the bundle level. -#' The structure of the metadata does not have to be consistent across meta_json files. -#' New columns are added to the as new fields are detected. -#' -#' The function \code{export_metadata} outputs the metadata as an Excel file instead, with bundle, session and database -#' tabs. The "bundle" tab gives the complete set of all the metadata that are active for each bundle, regardness where -#' it was set (for the bundle directly, or as a session / database default value). -#' -#' The user is expected to use the functions \code{export_metadata} and \code{import_metadata} to fix -#' accedental inconsistencies in the metadata of a database across bundles by exporting all -#' information to an Excel file using \code{export_metadata}, edit columns and values (including moving inconsistently -#' spelled metadata fields into a single column with the intended name) using Excel or another editor that complies with -#' the OOXML Workbook ISO/IEC 29500:2008 standard. The user is also expected to keep the indented structure of the Excel -#' file (one row per bundle or session, and each column except for those indicating session and bundle names containing -#' metadata), otherwise it is possible that the file may not be read in again by \code{\link{import_metadata}} to set -#' updated values. -#' -#' -#' @param emuDBhandle The database handle of an emuR database. -#' @param Excelfile The full path and file name of the Excel file that the metadata should be written to. The function will not overwrite this file, unless \code{overwrite} is set to \code{TRUE}. -#' @param overwrite The default behaviour is that an Excel file should not be -#' overwritten if it exists already. If this parameter is \code{TRUE} then the file will be overwritten. -#' @param manditory A vector of column names which the function will ensure are present in both the bundle and session level metadata. -#' -#' @rdname export_metadata -#' @export -#' -#' @return A data frame containing inforamtion about the 'meta_json' files found -#' \describe{ -#' \item{session}{The name of the session.} -#' \item{bundle}{The bundle name} -#' } -#' In addition, the [dplyr::tibble] will contain one column for every type of information given in any of the 'meta_json' files. -#' -#' -#' @examples -#' \dontrun{ -#' create_emuRdemoData() -#' ae_test <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) -#' -#' # Database-wide default information -#' add_metadata(ae_test,list("Accent"="Northern","Elicitation"="Scripted")) -#' #Bundle specific information -#' add_metadata(ae_test,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") -#' get_metadata(ae_test) -> res -#' print(res) -#' } -#' - -get_metadata <- function(emuDBhandle,session=".*", manditory=c()){ - res <- export_metadata(emuDBhandle=emuDBhandle,manditory=manditory) - return(res) -} - - -#' -#' @rdname export_metadata -#' @export -#' - -export_metadata <- function(emuDBhandle,Excelfile=NULL,overwrite=FALSE, manditory=c()){ - #Start with checking consistency regarding output file - if(! overwrite && !is.null(Excelfile) && file.exists(Excelfile)){ - stop("Could not write output file ",Excelfile,": File exists but should not be overwritten.") - } - -check_emuDBhandle(emuDBhandle) - - bundles <- list_bundles(emuDBhandle) %>% - dplyr::rename(bundle=name) - metafiles <- list_files(emuDBhandle,fileExtension = metadata.extension) - #Use the bundle list as a scaffold for a dataframe to hold the content of all metadata files - # metacontent <- metafiles[c("bundle","absolute_file_path")] - for(currFile in na.omit(metafiles$absolute_file_path)){ - jsonmeta <- jsonlite::read_json(currFile,simplifyVector = TRUE) - - # Now start inserting data from the metafiles - for(col in names(jsonmeta)){ - #Assignment of NULL is problematic, so we need to make sure we never do that - metafiles[metafiles$absolute_file_path == currFile,col] <- ifelse(is.null(jsonmeta[[col]]),NA,jsonmeta[[col]]) - } - } - # Now make sure that all bundles have a row - metafiles <- bundles %>% - dplyr::left_join(metafiles,by=c("session","bundle")) %>% - dplyr::select(-file,-absolute_file_path) - - #Add missing manditory columns - missingManditory <- setdiff(manditory, names(metafiles)) - metafiles[, missingManditory] <- NA - - #For sessions it makes less sense to use list_files as a base for finding the metadata files - #so we pre-generate the expected names and check which ones actually exist. - - sessions <- list_sessions(emuDBhandle) %>% - dplyr::rename(session=name) - - sessions$session_metadata_file <- file.path(emuDBhandle$basePath, - paste0(sessions,session.suffix), - paste(sessions,"meta_json",sep=".")) - - sessJSONFilesDF <- sessions[file.exists(sessions$session_metadata_file),] - - for(row in seq_len(nrow(sessJSONFilesDF))){ - - jsonmeta <- jsonlite::read_json(sessJSONFilesDF[row,"session_metadata_file"],simplifyVector = TRUE) - - # Now start inserting data from the session metadata file - for(col in names(jsonmeta)){ - #Assignment of NULL to a data.frame value is problematic, so we make sure we never do - sessJSONFilesDF[row,col] <- ifelse(is.null(jsonmeta[[col]]),NA,jsonmeta[[col]]) - } - } - #Make sure that all sessions have a row in the output - sessJSONFilesDF <- sessions %>% - dplyr::select(-session_metadata_file) %>% - dplyr::left_join(sessJSONFilesDF,by="session") %>% - dplyr::select(-session_metadata_file) - - #Add missing manditory columns - missingManditory <- setdiff(manditory, names(sessJSONFilesDF)) - sessJSONFilesDF[, missingManditory] <- NA - - # Make the merger with bundle files to make the final output tibble - metafiles %>% - dplyr::left_join(sessJSONFilesDF,by="session",suffix=c("","_sessionmetadatafile")) -> metafiles - - - # Now check and load metadata set at the database level - - load_DBconfig(emuDBhandle) -> dbCfg - - if(is.null(dbCfg$metadataDefaults)){ - dbMeta <- data.frame() - }else{ - dbDefaults <- as.data.frame(dbCfg$metadataDefaults,stringsAsFactors=FALSE) - if(length(dbDefaults) > 0){ - #This means that the field is not just empty - # Repeat the rows so that the columns may be merged - dbMeta <- as.data.frame(c(metafiles["bundle"],dbDefaults)) %>% - dplyr::mutate_if(is.factor,as.character) - - metafiles <- metafiles %>% - dplyr::mutate_if(is.factor,as.character)%>% - dplyr::left_join(dbMeta,by="bundle",suffix=c("","_database")) %>% - dplyr::distinct() ## This is needed since duplicate rows are introduced by the join by dbMeta - - - } - } - - #Now, there may be a metadata X column set at the bundle level, an X_sessionmetadatafile - # column set at the session level, and an X_database column for the whole database. - # These need to be reconsiled - - - cols <- names(metafiles) - duplicates <- grep("_(database|sessionmetadatafile)$",cols,value=TRUE) - duplicated <- unique(gsub("_(database|sessionmetadatafile)$","",cols)) - - - for(bundleoriginal in duplicated){ - - sessColName <- paste0(bundleoriginal,"_sessionmetadatafile") - sessVec <- ifelse(exists(sessColName,metafiles),metafiles[,sessColName],NA) - dbColName <- paste0(bundleoriginal,"_database") - dbVec <- ifelse(exists(dbColName,metafiles),metafiles[,dbColName],NA) - ## This seems odd, but it makes sure that NAs are repeated for the length of vectors. - tempDF <- data.frame(metafiles[[bundleoriginal]], - sessVec, - dbVec,stringsAsFactors = FALSE) %>% - - dplyr::mutate_if(is.factor,as.character) - - names(tempDF) <- c("bundle","session","database") - # Here the result is the first non-NA value for each row (or NA if the row in tempDF contains only NAs) - metafiles[bundleoriginal] <- with(tempDF,coalesce(bundle,session,database)) - # This works since you can always remove a column without an error message (even non-existing ones) - metafiles[sessColName] <- NULL - metafiles[dbColName] <- NULL - } - - #Prepare an Excel workbook, if one should be written - if(!is.null(Excelfile)){ - wb <- openxlsx::createWorkbook(paste(emuDBhandle$dbName,"bundle")) - openxlsx::addWorksheet(wb,"bundles") - openxlsx::writeDataTable(wb,"bundles",x=metafiles,keepNA = FALSE,withFilter=FALSE) - openxlsx::freezePane(wb,"bundles",firstActiveCol = 5) - openxlsx::setColWidths(wb,"bundles",cols=5:30,widths = 18) - - # session information - - openxlsx::addWorksheet(wb,"sessions") - openxlsx::writeDataTable(wb,"sessions",x=sessJSONFilesDF,keepNA = FALSE,withFilter=FALSE) - openxlsx::freezePane(wb,"sessions",firstActiveCol = 3) - openxlsx::setColWidths(wb,"sessions",cols=3:30,widths = 18) - #database defaults - openxlsx::addWorksheet(wb,"database") - if(ncol(dbMeta)> 0){ - openxlsx::writeDataTable(wb,"database",x=dbMeta,keepNA = FALSE,withFilter=FALSE) - }else{ - openxlsx::writeComment(wb,"database",col=1,row=1, - openxlsx::createComment("Here the user may set database-wide metadata by setting by writing a parameter name as the header (top row) of a column, and the value underneath",author="EmuR")) - } - # We do not need to check owrwriting here as that is handled by saveWorkbook - openxlsx::saveWorkbook(wb,file=Excelfile,overwrite=overwrite) - } - - return(metafiles) - -} - - -#' Functions to import or add metadata information to database bundles. -#' -#' The function takes an appropriately structured Excel file and uses the -#' information to set metadata for bundles. -#' -#' The first sheet ("bundles") in the Excel file should begin with the folowing two columns: -#' \itemize{ -#' \item session -#' \item bundle -#' } -#' and then go on to have some columns which contains the metadata. Each row in the -#' data contains the information and metadata for a bundle (in the specific session). -#' The simples way to get an appropriately structured Excel file is to create one from a database using the -#' \code{\link{export_metadata}} function on an existing database and given an output file. -#' -#' Please be aware that bundles that are speficied in the Excel file will have -#' their metadata files (ending with '.meta_json') overwritten when using the -#' \code{import_metadata}. So, please make sure to remove the rows of bundles that should -#' not be altered from the Excel file before importing the metadata from it using this function. -#' -#' Date and time fields are assumed to follow the ISO8601 specification, and an attempt to convert them to the -#' approprite JSON representation will be made. The user should be aware that this conversion is made however, and -#' watch out unexpected results in advanced cases. -#' -#' @param emuDBhandle The emuR database handle of the database. -#' @param Excelfile The path to a properly formated Excel (.xlsx) file. -#' -#' @return A vector of .meta_json files updated by the call. The path for each file is given relative to the base of the EmuR database. -#' @export -#' -import_metadata <- function(emuDBhandle,Excelfile){ - if(!file.exists(Excelfile)){ - stop("Unable to open the metadata Excel file.\nThe file ",Excelfile," does not exist!") - } - openxlsx::read.xlsx(Excelfile,sheet="bundles",detectDates=TRUE) -> meta - - #Make sure we have an output file with full path - meta <- meta %>% - - dplyr::mutate(metadatafile=file.path(emuDBhandle$basePath, - paste0(session,session.suffix), - paste0(bundle,bundle.dir.suffix), - paste0(bundle,".",metadata.extension)) - ) - #Now to the main business of the function - - json <- c() - for(r in 1:nrow(meta)){ - meta %>% - - dplyr::slice(r) %>% - dplyr::select(-session,-bundle,-metadatafile) %>% - dplyr::select_if(function(x) !is.na(x)) -> jsondat - currJSON <- ifelse(length(jsondat) > 0, - jsonlite::toJSON(jsondat,raw="base64",na="null",complex="string",factor="string",POSIXt="ISO8601",Date="ISO8601",null="null",dataframe = "rows"), - "[{}]" #Just an empty JSON vector - ) - json <- c(json, currJSON) - } - json <- data.frame("json"=json) - towrite <- meta %>% - - dplyr::bind_cols(json) %>% - dplyr::mutate(json=as.character(json)) %>% - dplyr::select(json,metadatafile) - - #Write the bundle metadata files - for(r in 1:nrow(towrite)){ - fileConn <- file(towrite[r,"metadatafile"]) - writeLines(towrite[r,"json"], fileConn) - close(fileConn) - } - bFiles <- gsub(paste0(emuDBhandle$basePath,"/"),"",towrite[["metadatafile"]]) - - ## Now process session metadata files - - openxlsx::read.xlsx(Excelfile,sheet="sessions") -> sessionMeta - sessjsondat <- sessionMeta %>% - dplyr::select(-session) - json <- sessjsondat %>% - dplyr::rowwise() %>% - dplyr::do(json=jsonlite::toJSON(.,raw="base64",na="null",complex="string",factor="string",POSIXt="ISO8601",Date="ISO8601",null="null",dataframe = "rows",auto_unbox=TRUE)) %>% - unlist() - json <- data.frame("json"=as.vector(json)) - - towriteSess <- sessionMeta %>% - dplyr::mutate(session_metadata_file=paste0(session,".",metadata.extension)) %>% - dplyr::bind_cols(json) %>% - dplyr::select(session,session_metadata_file,json) - ## Här finns inte session_metadata_file - #Write the bundle metadata files - for(r in 1:nrow(towriteSess)){ - outFile <- file.path(emuDBhandle$basePath, - paste0(towriteSess[r,"session"],session.suffix), - towriteSess[r,"session_metadata_file"]) - fileConn <- file(outFile) - writeLines(as.character(towriteSess[r,"json"]), fileConn) - close(fileConn) - outFile <- gsub(paste0(emuDBhandle$basePath,"/"),"",outFile) - sFiles <- ifelse(exists("sFiles"),c(sFiles,outFile),c(outFile)) - } - - # Now inject database wide metadata - - load_DBconfig(emuDBhandle) -> dbCfg - openxlsx::read.xlsx(Excelfile,sheet="database") -> dbMeta - dbCfg$metadataDefaults <- as.list(dbMeta) - store_DBconfig(emuDBhandle,dbCfg) - - return(c(sFiles,bFiles)) -} - - - -#' A utility function used for programatically setting metadata for a bundle, or -#' default values for a session or an entire database. -#' -#' The function takes a list and a specification of where the metadata should be -#' set. The default behaviour is to keep already set metadata, and overwrite -#' only the values that are named in the list. The user may change this -#' behaviour by setting \code{reset.before.add=TRUE}, in which case all previous -#' bundle, session or database level metadata will be replaced with the contents -#' of the list. -#' -#' If a bundle name and a \code{session} name is provided, the metadata will be -#' inserted only for that fully specified \code{bundle}. If only a \code{bundle} -#' name is provided, the function will add the metadata for the bundle only if -#' there is just one session in the database. If there are multiple -#' \code{session}s, the function will given an error. -#' -#' If no \code{session} or \code{bundle} names are provided, the metadata will -#' be inserted as default values for the entire database. Please note that -#' database wide ingested metadata defaults currently interferes with the Emu -#' web app and should not be put into general use before this issue has been -#' fixed in the Emu web app. -#' -#' @param emuDBhandle An Emu database handle -#' @param metadataList A list specifying the metadata to be set. If set to an -#' empty list (\code{list()}) the function will clear all metadata, if the -#' argument \code{reset.before.add=TRUE} is given by the user. The user may -#' also clear (remove from the set of defined metadata) by setting the -#' property to NULL. -#' @param bundle An optional name of a bundle -#' @param session An optional name of a session -#' @param reset.before.add If set to TRUE, the function will ignore previously -#' set metadata and simply add the metadata supplied in the list. -#' -#' @return -#' @export -#' -#' @examples -#' \dontrun{ -#' create_emuRdemoData() -#' ae_test <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) -#' -#' # Database-wide default information -#' add_metadata(ae_test,list("Accent"="Northern","Elicitation"="Scripted")) -#' #Bundle specific information -#' add_metadata(ae_test,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") -#' get_metadata(ae_test) -> res -#' print(res) -#' } -#' -add_metadata <- function(emuDBhandle,metadataList,bundle=NULL,session=NULL, reset.before.add=FALSE){ - - if(is.null(bundle) && is.null(session)){ - - #Database wide injection - - load_DBconfig(emuDBhandle) -> dbCfg - - if(reset.before.add){ - dbCfg$metadataDefaults <- as.list(metadataList) - } else { - #Append data - prev <- dbCfg$metadataDefaults - prev[names(metadataList)] <- metadataList - dbCfg$metadataDefaults <- prev - } - - - store_DBconfig(emuDBhandle,dbCfg) - - } else { - # Here we store metadata in either session wide or bundle specific metadata files - # Since these files use the same structure, the business here is to set the correct metadatafile filename. - - if(! is.null(session) & is.null(bundle)){ - #Session level metadata - - metadatafile <- file.path(emuDBhandle$basePath, - paste0(session,session.suffix), - paste0(session,".",metadata.extension)) - } - - - if(! is.null(bundle)){ - #Bundle metadata - if(is.null(session)){ - ses <- list_sessions(emuDBhandle) - if(nrow(ses) == 1){ - #use the name of the only available session - session <- ses[[1]] - }else{ - stop("If you provide a bundle name you need to provide a session name if there are more than one sessions in the database.") - } - - } - - metadatafile <- file.path(emuDBhandle$basePath, - paste0(session,session.suffix), - paste0(bundle,bundle.dir.suffix), - paste0(bundle,".",metadata.extension)) - - - } - if(reset.before.add | ! file.exists(metadatafile) ){ - #Start fresh / overwrite previous values - jsonmetaList <- list() - }else{ - - #Read in previous values - jsonmetaList <- as.list(jsonlite::read_json(metadatafile,simplifyVector = TRUE)) - - } - #set / overwrite metadata from list - #jsonmetaList[names(metadataList)] <- metadataList - jsonmetaList <- utils::modifyList(jsonmetaList,metadataList,keep.null = FALSE) - jsonlite::write_json(jsonmetaList,metadatafile,auto_unbox=TRUE) - } -} - - -#' Add identifying information based on the content of the wave file to the metadata information for the bundle. -#' -#' This function will extract information (lenght of recording and a checksum) from the wav file associated with a bundle, and add it to the set of metadata -#' for the bundle. This information can later be used to verify that the file has not been altered later on, or to deidentify -#' wav files in a reversable manner for use outside of the emuR framework. Deidentified files are sometimes useful for blinded randomized -#' perceptual testing, and the ability to reverse the procedure is then essential to link the results of the evaluation back to the original -#' recording extracted from the emuR data base. The user may create checksums by multiple algorithms by running the function again with different \code{algorithm} arguments. -#' -#' @param emuDBhandle The handle for the emuR database. -#' @param sessionPattern A regexp pattern that allows the user to limit which sessions should be affected by the manipulation. -#' @param bundlePattern A regexp pattern that allows the user to limit which bundles to include. -#' @param algorithm The name of the hashing algorithm, according to the \code{\link[digest]{digest}} function. -#' -#' -#' @export -#' -#' @examples -#' \dontrun{ -#' create_emuRdemoData() -#' ae_test <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) -#' -#' #Add a md5 digest to the metadata of all bundles -#' add_digests(ae_test,algorithm = "md5") -#' -#' #Add a "sha1" checksum (the default) to some bundles -#' add_digests(ae_test,bundlePattern = "msajc0.*") -#' get_metadata(ae_test) -> res -#' print(res) -#' } -#' -add_digests <- function(emuDBhandle,sessionPattern=".*",bundlePattern=".*",algorithm="sha1"){ - wavs <- list_files(emuDBhandle,fileExtension = "*.wav",sessionPattern=sessionPattern,bundlePattern=bundlePattern) - for(f in 1:nrow(wavs)){ - inFile <- unlist(wavs[f,"absolute_file_path"],use.names = FALSE) - session <- unlist(wavs[f,"session"],use.names = FALSE) - bundle <- unlist(wavs[f,"bundle"],use.names = FALSE) - - wrassp::read.AsspDataObj(inFile) -> w - options(digits=15) - attr(w,"sampleRate") -> sr - attr(w,"endRecord") - attr(w,"startRecord") +1 -> samples - samples / sr *1000 -> duration - rm(w) - digest::digest(inFile,file=TRUE,algo=algorithm) -> checksum - metadata <- list("Bundle.Duration.ms"=duration) - metadata[paste0("Bundle.",algorithm,"_checksum")] <- checksum - - add_metadata(emuDBhandle,metadata,session=session,bundle=bundle) - - } -} - - - -#' Create a biography of the labels in a list of segments in a tidy manner -#' -#' @param segs_tbl The \code{\link[dplyr]{tibble}} that is the result \code{\link[emuR]{query}} call. -#' @param emuDBhandle A \code{\link{emuR}} database handle. -#' @param compute_digests Should information that describes the recorded sound files be computed so that is is definitelly part of the -#' added metadata information. -#' @param algorithm The checksum algorithm that should be used when computing sound file information. -#' -#' @return A \code{\link[dplyr]{tibble}} -#' @export -#' -#' @examples -#' \dontrun{ -#' create_emuRdemoData() -#' ae_test <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) -#' -#' # Database-wide default information -#' add_metadata(ae_test,list("Accent"="Northern","Elicitation"="Scripted")) -#' #Bundle specific information -#' add_metadata(ae_test,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") -#' -#' # Get all the 'n' segments in the database -#' query(ae_test,"Phonetic = n",resultType = "tibble") -> ae_nt -#' # Add information related to the nature the recording sessions -#' # e.g. the speaker ID, the date of the recording -#' ae_nt %>% biographize(ae_test) %>% glimpse() -#' # This code does the same as the above, but it will also compute new -#' # information that is strictly aimed at identifying the recording -#' # (length of recording (in ms) and a sha1 digest of the wav file). -#' ae_nt %>% -#' biographize(ae_test,compute_digests=TRUE,algorithm="sha1") %>% -#' glimpse() -#' rm(ae_test) -#' -#' } -#' -biographize <- function(segs_tbl,emuDBhandle,compute_digests=FALSE,algorithm="sha1") { - #make sure that the first argument is a segment list, and that - # it contains "session" and "bundle" columns. - if(! is.data.frame(segs_tbl) || !c("session", "bundle") %in% names(segs_tbl)){ - out <- paste("The input to the",match.call()[[1]], "has to be a 'tibble' or a 'data.frame'.") - stop(out) - } - if(compute_digests==TRUE){ - add_digests(emuDBhandle,algorithm = algorithm) - } - #Here we use the special mode of export_medatata to get a data structure rather than an Excel file. - mdata <- get_metadata(emuDBhandle,session = ".*") - - out <- segs_tbl %>% - dplyr::left_join(mdata,by = c("session", "bundle")) - - return(out) -} - - - -### INTERACTIVE testing - -#unlink_emuRDemoDir() -#create_ae_db() -> emuDBhandle -#add_metadata(emuDBhandle,session = "0000",bundle = "msajc003",metadataList = list(Gender=NA,Age=10)) -#rstudioapi::navigateToFile(file.path(emuDBhandle$basePath,"ae_DBconfig.json")) -#add_metadata(emuDBhandle,metadataList = list(Gender=NA,Age=10)) - -#add_metadata(emuDBhandle,session = "0000",bundle = "msajc003",metadataList = list(Gender=NULL,Age=20)) - -# dir.create(file.path(emuDBhandle$basePath,"temp")) -# for(i in 1:11){ -# file.copy(file.path(emuDBhandle$basePath,"0000_ses"),file.path(emuDBhandle$basePath,"temp"),recursive = TRUE) -# newName <- paste0(i,i,i,i,"_ses") -# file.rename(file.path(emuDBhandle$basePath,"temp","0000_ses"),file.path(emuDBhandle$basePath,"temp",newName)) -# file.copy(file.path(emuDBhandle$basePath,"temp",newName),emuDBhandle$basePath,recursive = TRUE) -# unlink(file.path(emuDBhandle$basePath,"temp",newName),recursive = TRUE) -# } - - - -#export_metadata(ae,Excelfile = "~/Desktop/out.xlsx",overwrite = TRUE) -#rstudioapi::navigateToFile(list_files(emuDBhandle,"meta_json")$absolute_file_path) -#export_metadata(ae,Excelfile = "~/Desktop/out.xlsx") + +## Some constants +metadata.suffix = "_meta.json" + +## I will need this function until 0.9.0 of dplyr is released, +#possibly fixing the issue with all NA columns supplied to coalesce +# The implementation comes from https://stackoverflow.com/a/19254510 +coalesce <- function(...) { + Reduce(function(x, y) { + i <- which(is.na(x)) + x[i] <- y[i] + x}, + list(...)) +} + + +#' Functions for gathering metadata specified for recordings in an emuR database. +#' +#' Metadata of a recording is stored in '_meta.json' files. Metadata may be set at the database, session and bundle level. +#' The functions goes through the database metadata file, session metadata files and metadata files associated with +#' each bundle, parses the JSON data and returns \code{\link[dplyr]{tibble}} with one row per bundle in the database. +#' Database default values are supressed by information set in a session metadata file, and session level data are in +#' turn surpressed by data given at the bundle level. +#' The structure of the metadata does not have to be consistent across meta_json files. +#' New columns are added to the as new fields are detected. +#' +#' The function \code{export_metadata} outputs the metadata as an Excel file instead, with bundle, session and database +#' tabs. The "bundle" tab gives the complete set of all the metadata that are active for each bundle, regardness where +#' it was set (for the bundle directly, or as a session / database default value). +#' +#' The user is expected to use the functions \code{export_metadata} and \code{import_metadata} to fix +#' accedental inconsistencies in the metadata of a database across bundles by exporting all +#' information to an Excel file using \code{export_metadata}, edit columns and values (including moving inconsistently +#' spelled metadata fields into a single column with the intended name) using Excel or another editor that complies with +#' the OOXML Workbook ISO/IEC 29500:2008 standard. The user is also expected to keep the indented structure of the Excel +#' file (one row per bundle or session, and each column except for those indicating session and bundle names containing +#' metadata), otherwise it is possible that the file may not be read in again by \code{\link{import_metadata}} to set +#' updated values. +#' +#' +#' @param emuDBhandle The database handle of an emuR database. +#' @param Excelfile The full path and file name of the Excel file that the metadata should be written to. The function will not overwrite this file, unless \code{overwrite} is set to \code{TRUE}. +#' @param overwrite The default behaviour is that an Excel file should not be +#' overwritten if it exists already. If this parameter is \code{TRUE} then the file will be overwritten. +#' @param manditory A vector of column names which the function will ensure are present in both the bundle and session level metadata. +#' +#' @rdname export_metadata +#' @export +#' +#' @return A data frame containing inforamtion about the '_meta.json' files found +#' \describe{ +#' \item{session}{The name of the session.} +#' \item{bundle}{The bundle name} +#' } +#' In addition, the [dplyr::tibble] will contain one column for every type of information given in any of the 'meta_json' files. +#' +#' +#' @examples +#' \dontrun{ +#' create_emuRdemoData() +#' ae_test <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) +#' +#' # Database-wide default information +#' add_metadata(ae_test,list("Accent"="Northern","Elicitation"="Scripted")) +#' #Bundle specific information +#' add_metadata(ae_test,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") +#' get_metadata(ae_test) -> res +#' print(res) +#' } +#' + +get_metadata <- function(emuDBhandle,sessionPattern=".*", bundlePattern=".*",manditory=c(),clearCache=FALSE, verbose=TRUE){ + if(clearCache){ + if(verbose){ + cat("INFO: Clearing metadata cache\n") + } + memoise::forget(metadata_cache) + } + if(exists("metadata_cache")){ + if(!memoise::has_cache(metadata_cache)(emuDBhandle,manditory)){ + if(verbose){ + res <- metadata_cache(emuDBhandle,manditory,progressBar=TRUE) + } else { + res <- metadata_cache(emuDBhandle,manditory,progressBar=FALSE) + } + } else { + res <- metadata_cache(emuDBhandle,manditory) + } + } else { + metadata_cache <- memoise::memoise(export_metadata) + res <- metadata_cache(emuDBhandle,manditory) + } + return(res%>% dplyr::filter(grepl(sessionPattern,session)) %>% dplyr::filter(grepl(bundlePattern,bundle))) +} + + +#' +#' @rdname export_metadata +#' @export +#' + +export_metadata <- function(emuDBhandle,xlsxFile=NULL,overwrite=FALSE, manditory=c(), progressBar=FALSE){ + #Start with checking consistency regarding output file + if(overwrite && !is.null(xlsxFile) && file.access(xlsxFile,2)){ + stop("Can not write output to file ",xlsxFile,": File exists but does not have permission to be overwriten.") + } + if(!overwrite && !is.null(xlsxFile) && file.access(xlsxFile,0)){ + stop("Can not write output to file ",xlsxFile,": Set variable overwrite=TRUE if you want to overwrite.") + } + if (!is.null(xlsxFile) && !requireNamespace("openxlsx", quietly = TRUE)){ + stop("'openxlsx' package required to run export metadata to an xlsx file.") + } + if (!is.null(xlsxFile) && !progressBar){ + #Exporting to xlsx without progress bar is not recommended + progressBar = TRUE + } + check_emuDBhandle(emuDBhandle) + + bundles <- list_bundles(emuDBhandle) %>% + dplyr::rename(bundle=name) + if(progressBar){ + cat(paste0("INFO: Reading _meta.json files for ", nrow(bundles), " bundles\n")) + pb <- txtProgressBar(min = 0, max = nrow(bundles), style = 3) + } + metafiles <- list_metafiles(emuDBhandle) + + #Use the bundle list as a scaffold for a dataframe to hold the content of all metadata files + # metacontent <- metafiles[c("bundle","absolute_file_path")] + i <- 0 + for(currFile in na.omit(metafiles$absolute_file_path)){ + jsonmeta <- jsonlite::read_json(currFile,simplifyVector = TRUE) + i <- i+1 + if(progressBar){ + setTxtProgressBar(pb, i) + } + # Now start inserting data from the metafiles + for(col in names(jsonmeta)){ + #Assignment of NULL is problematic, so we need to make sure we never do that + metafiles[metafiles$absolute_file_path == currFile,col] <- ifelse(is.null(jsonmeta[[col]]),NA,jsonmeta[[col]]) + } + } + if(progressBar){ + close(pb) + } + # Now make sure that all bundles have a row + metafiles <- bundles %>% + dplyr::left_join(metafiles,by=c("session","bundle")) %>% + dplyr::select(-file,-absolute_file_path) + + #Add missing manditory columns + missingManditory <- setdiff(manditory, names(metafiles)) + metafiles[, missingManditory] <- NA + + #For sessions it makes less sense to use list_files as a base for finding the metadata files + #so we pre-generate the expected names and check which ones actually exist. + + sessions <- list_sessions(emuDBhandle) %>% + dplyr::rename(session=name) + + sessions$session_metadata_file <- file.path(emuDBhandle$basePath, + paste0(sessions$session,session.suffix), + paste0(sessions$session,metadata.suffix)) + + sessJSONFilesDF <- sessions[file.exists(sessions$session_metadata_file),] + + for(row in seq_len(nrow(sessJSONFilesDF))){ + + jsonmeta <- jsonlite::read_json(sessJSONFilesDF[row,"session_metadata_file"],simplifyVector = TRUE) + + # Now start inserting data from the session metadata file + for(col in names(jsonmeta)){ + #Assignment of NULL to a data.frame value is problematic, so we make sure we never do + sessJSONFilesDF[row,col] <- ifelse(is.null(jsonmeta[[col]]),NA,jsonmeta[[col]]) + } + } + #Make sure that all sessions have a row in the output + sessJSONFilesDF <- sessions %>% + dplyr::select(-session_metadata_file) %>% + dplyr::left_join(sessJSONFilesDF,by="session") %>% + dplyr::select(-session_metadata_file) + + #Add missing manditory columns + missingManditory <- setdiff(manditory, names(sessJSONFilesDF)) + sessJSONFilesDF[, missingManditory] <- NA + + # Make the merger with bundle files to make the final output tibble + metafiles %>% + dplyr::left_join(sessJSONFilesDF,by="session",suffix=c("","_sessionmetadatafile")) -> metafiles + + # Now check and load metadata set at the database level + + database_metadata_file <- file.path(emuDBhandle$basePath, + paste0(emuDBhandle$dbName,metadata.suffix)) + + if(!file.exists(database_metadata_file)){ + dbMeta <- data.frame() + }else{ + jsonmeta <- jsonlite::read_json(database_metadata_file,simplifyVector = TRUE) + + dbDefaults <- as.data.frame(jsonmeta,stringsAsFactors=FALSE) + if(length(dbDefaults) > 0){ + #This means that the field is not just empty + # Repeat the rows so that the columns may be merged + dbMeta <- as.data.frame(c(metafiles["bundle"],dbDefaults)) %>% + dplyr::mutate_if(is.factor,as.character) + + metafiles <- metafiles %>% + dplyr::mutate_if(is.factor,as.character)%>% + dplyr::left_join(dbMeta,by="bundle",suffix=c("","_databasemetadatafile")) %>% + dplyr::distinct() ## This is needed since duplicate rows are introduced by the join by dbMeta + + + } + } + + #Now, there may be a metadata X column set at the bundle level, an X_sessionmetadatafile + # column set at the session level, and an X_database column for the whole database. + # These need to be reconsiled + + + cols <- names(metafiles) + duplicates <- grep("_(databasemetadatafile|sessionmetadatafile)$",cols,value=TRUE) + duplicated <- unique(gsub("_(databasemetadatafile|sessionmetadatafile)$","",cols)) + + + for(bundleoriginal in duplicated){ + + sessColName <- paste0(bundleoriginal,"_sessionmetadatafile") + sessVec <- ifelse(exists(sessColName,metafiles),metafiles[,sessColName],NA) + dbColName <- paste0(bundleoriginal,"_databasemetadatafile") + dbVec <- ifelse(exists(dbColName,metafiles),metafiles[,dbColName],NA) + ## This seems odd, but it makes sure that NAs are repeated for the length of vectors. + tempDF <- data.frame(metafiles[[bundleoriginal]], + sessVec, + dbVec,stringsAsFactors = FALSE) %>% + + dplyr::mutate_if(is.factor,as.character) + + names(tempDF) <- c("bundle","session","database") + # Here the result is the first non-NA value for each row (or NA if the row in tempDF contains only NAs) + metafiles[bundleoriginal] <- with(tempDF,coalesce(bundle,session,database)) + # This works since you can always remove a column without an error message (even non-existing ones) + metafiles[sessColName] <- NULL + metafiles[dbColName] <- NULL + } + + #Prepare an OpenXML Excel workbook, if one should be written + if(!is.null(xlsxFile)){ + wb <- openxlsx::createWorkbook(paste(emuDBhandle$dbName,"bundle")) + openxlsx::addWorksheet(wb,"bundles") + openxlsx::writeDataTable(wb,"bundles",x=metafiles,keepNA = FALSE,withFilter=FALSE) + openxlsx::freezePane(wb,"bundles",firstActiveCol = 5) + openxlsx::setColWidths(wb,"bundles",cols=5:30,widths = 18) + + # session information + + openxlsx::addWorksheet(wb,"sessions") + openxlsx::writeDataTable(wb,"sessions",x=sessJSONFilesDF,keepNA = FALSE,withFilter=FALSE) + openxlsx::freezePane(wb,"sessions",firstActiveCol = 3) + openxlsx::setColWidths(wb,"sessions",cols=3:30,widths = 18) + #database defaults + openxlsx::addWorksheet(wb,"database") + if(ncol(dbMeta)> 0){ + openxlsx::writeDataTable(wb,"database",x=dbMeta,keepNA = FALSE,withFilter=FALSE) + }else{ + openxlsx::writeComment(wb,"database",col=1,row=1, + openxlsx::createComment("The database default keys go in the header (top row) of a column",author="EmuR")) + openxlsx::writeComment(wb,"database",col=1,row=2, + openxlsx::createComment("The database default values go below in the second row of a column, and the value underneath",author="EmuR")) + + } + # We do not need to check overwriting here as that is handled by saveWorkbook + openxlsx::saveWorkbook(wb,file=xlsxFile,overwrite=overwrite) + } + + return(metafiles) + +} + + +#' Functions to import or add metadata information to database bundles. +#' +#' The function takes an appropriately structured Excel file and uses the +#' information to set metadata for bundles. +#' +#' The first sheet ("bundles") in the Excel file should begin with the folowing two columns: +#' \itemize{ +#' \item session +#' \item bundle +#' } +#' and then go on to have some columns which contains the metadata. Each row in the +#' data contains the information and metadata for a bundle (in the specific session). +#' The simples way to get an appropriately structured Excel file is to create one from a database using the +#' \code{\link{export_metadata}} function on an existing database and given an output file. +#' +#' Please be aware that bundles that are speficied in the Excel file will have +#' their metadata files (ending with '_meta.json') overwritten when using the +#' \code{import_metadata}. So, please make sure to remove the rows of bundles that should +#' not be altered from the Excel file before importing the metadata from it using this function. +#' +#' Date and time fields are assumed to follow the ISO8601 specification, and an attempt to convert them to the +#' approprite JSON representation will be made. The user should be aware that this conversion is made however, and +#' watch out unexpected results in advanced cases. +#' +#' @param emuDBhandle The emuR database handle of the database. +#' @param xlsxFile The path to a properly formated Excel (.xlsx) file. +#' +#' @return A vector of _meta.json files updated by the call. The path for each file is given relative to the base of the EmuR database. +#' @export +#' +import_metadata <- function(emuDBhandle,xlsxFile, deleteExisting=FALSE, verbose=TRUE){ + if (!requireNamespace("openxlsx", quietly = TRUE)){ + stop("'openxlsx' package required to run import_metadata()") + } + if(!file.exists(xlsxFile)){ + stop("Unable to open the metadata Excel file.\nThe file ",xlsxFile," does not exist!") + } + + #Open xlsx file and read sheets to dataframes + + openxlsx::read.xlsx(xlsxFile,sheet="bundles",detectDates=TRUE) -> meta + openxlsx::read.xlsx(xlsxFile,sheet="sessions") -> sessionMeta + openxlsx::read.xlsx(xlsxFile,sheet="database") -> dbMeta + + if(nrow(dbMeta) > 1){ + stop("Sheet 'database' in ",xlsxFile," contains more than the allowable one row (+ header)") + } + if(verbose){ + cat(paste0("INFO: Reading ", xlsxFile, " and removing inherited values\n")) + pb <- txtProgressBar(min = 0, max = 1 + nrow(sessionMeta), style = 3) + } + + #Remove duplicate inheritance values from session and bundles + for(col in names(dbMeta)){ + if(!is.na(dbMeta[[col]])){ + if(!is.null(sessionMeta[[col]])){ + sessionMeta[[col]] <- ifelse(sessionMeta[[col]] == dbMeta[[col]], NA, sessionMeta[[col]]) + } + if(!is.null(meta[[col]])){ + meta[[col]] <- ifelse(meta[[col]] == dbMeta[[col]], NA, meta[[col]]) + } + } + } + if(verbose){ + setTxtProgressBar(pb, 1) + } + for(row in 1:nrow(sessionMeta)){ + for(col in names(sessionMeta)){ + if(col != "session" && !is.na(sessionMeta[[col]][row])){ + if(!is.null(meta[[col]])){ + meta[[col]] <- ifelse(meta[["session"]] == sessionMeta[["session"]][row] & meta[[col]] == sessionMeta[[col]][row], NA, meta[[col]]) + } + } + } + if(verbose){ + setTxtProgressBar(pb, 1 + row) + } + } + if(verbose){ + close(pb) + } + + #Write any remaining (non-NA) rows to _meta.json files + if(verbose){ + cat(paste0("INFO: Writing _meta.json for ", nrow(meta), " bundles, ", nrow(sessionMeta), " sessions and ", nrow(dbMeta), " database\n")) + pb <- txtProgressBar(min = 0, max = nrow(sessionMeta) + nrow(meta), style = 3) + } + + #Give the bundle dataframe an output file column with full path + meta <- meta %>% + dplyr::mutate(metadatafile=file.path(emuDBhandle$basePath, + paste0(session,session.suffix), + paste0(bundle,bundle.dir.suffix), + paste0(bundle,metadata.suffix)), + json=rep(NA, nrow(meta))) + #Create json string from non-NA columns + colsOnly <- meta %>% dplyr::select(-session,-bundle,-metadatafile,-json) + for(row in 1:nrow(colsOnly)){ + if(verbose){ + setTxtProgressBar(pb, row) + } + colsOnly[row,] %>% + dplyr::select(where(function(x) !is.na(x))) -> jsondat + + if(length(jsondat) > 0){ + meta$json[row] <-jsonlite::toJSON(jsondat,raw="base64",na="null",complex="string",factor="string",POSIXt="ISO8601",Date="ISO8601",null="null",dataframe = "rows") + #Write the bundle metadata files + fileConn <- file(meta$metadatafile[row]) + writeLines(as.character(meta$json[row]), fileConn) + close(fileConn) + } else if(file.exists(meta$metadatafile[row]) && deleteExisting){ + file.remove(meta$metadatafile[row]) + } + } + + bFiles <- meta %>% dplyr::filter(!is.na(json)) %>% + dplyr::select(metadatafile) + + ## Now process session metadata files + + sessionMeta <- sessionMeta %>% + dplyr::mutate(metadatafile=file.path(emuDBhandle$basePath, + paste0(session,session.suffix), + paste0(session,metadata.suffix)), + json=rep(NA, nrow(sessionMeta))) + #Create json string from non-NA columns + colsOnly <- sessionMeta %>% dplyr::select(-session,-metadatafile,-json) + pbOffset = nrow(meta) + for(row in 1:nrow(colsOnly)){ + if(verbose){ + setTxtProgressBar(pb, row + pbOffset) + } + colsOnly[row,] %>% + dplyr::select(where(function(x) !is.na(x))) -> jsondat + + if(length(jsondat) > 0){ + sessionMeta$json[row] <-jsonlite::toJSON(jsondat,raw="base64",na="null",complex="string",factor="string",POSIXt="ISO8601",Date="ISO8601",null="null",dataframe = "rows") + #Write the session metadata files + fileConn <- file(sessionMeta$metadatafile[row]) + writeLines(as.character(sessionMeta$json[row]), fileConn) + close(fileConn) + } else if(file.exists(sessionMeta$metadatafile[row]) && deleteExisting){ + file.remove(sessionMeta$metadatafile[row]) + } + } + + sFiles <- sessionMeta %>% dplyr::filter(!is.na(json)) %>% + dplyr::select(metadatafile) + + if(verbose){ + close(pb) + } + + # Now inject database wide metadata + database_metadata_file <- file.path(emuDBhandle$basePath, + paste0(emuDBhandle$dbName,metadata.suffix)) + + if(nrow(dbMeta) == 1){ + dbMetaJSON <-jsonlite::toJSON(dbMeta,raw="base64",na="null",complex="string",factor="string",POSIXt="ISO8601",Date="ISO8601",null="null",dataframe = "rows") + + fileConn <- file(database_metadata_file) + writeLines(as.character(dbMetaJSON), fileConn) + close(fileConn) + + return(c(database_metadata_file,sFiles,bFiles)) + } else { + if(file.exists(database_metadata_file) && deleteExisting){ + file.remove(database_metadata_file) + } + return(c(sFiles,bFiles)) + } +} + + +#' A utility function used for programatically setting metadata for a bundle or +#' session or default values for an entire database. +#' +#' The function takes a list and a specification of where the metadata should be +#' set. The default behaviour is to keep already set metadata, and overwrite +#' only the values that are named in the list. The user may change this +#' behaviour by setting \code{reset.before.add=TRUE}, in which case all previous +#' bundle, session or database level metadata will be replaced with the contents +#' of the list. +#' +#' If a bundle name and a \code{session} name is provided, the metadata will be +#' inserted only for that fully specified \code{bundle}. If only a \code{bundle} +#' name is provided, the function will add the metadata for the bundle only if +#' there is just one session in the database. If there are multiple +#' \code{session}s, the function will given an error. +#' +#' If no \code{session} or \code{bundle} names are provided, the metadata will +#' be inserted as default values for the entire database. Please note that +#' database wide ingested metadata defaults currently interferes with the Emu +#' web app and should not be put into general use before this issue has been +#' fixed in the Emu web app. +#' +#' @param emuDBhandle An Emu database handle +#' @param metadataList A list specifying the metadata to be set. If set to an +#' empty list (\code{list()}) the function will clear all metadata, if the +#' argument \code{reset.before.add=TRUE} is given by the user. The user may +#' also clear (remove from the set of defined metadata) by setting the +#' property to NULL. +#' @param bundle An optional name of a bundle +#' @param session An optional name of a session +#' @param reset.before.add If set to TRUE, the function will ignore previously +#' set metadata and simply add the metadata supplied in the list. +#' +#' @return +#' @export +#' +#' @examples +#' \dontrun{ +#' create_emuRdemoData() +#' ae_test <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) +#' +#' # Database-wide default information +#' add_metadata(ae_test,list("Accent"="Northern","Elicitation"="Scripted")) +#' #Bundle specific information +#' add_metadata(ae_test,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") +#' get_metadata(ae_test) -> res +#' print(res) +#' } +#' +add_metadata <- function(emuDBhandle,metadataList,bundle=NULL,session=NULL, reset.before.add=FALSE){ + # Here we store metadata in either database wide, session wide or bundle specific metadata files + # Since these files use the same structure, the only difference is to set the correct metadatafile filename. + + if(is.null(bundle) && is.null(session)){ + + #Database level metadata + + metadatafile <- file.path(emuDBhandle$basePath, + paste0(emuDBhandle$dbName,metadata.suffix)) + + } else if(! is.null(session) & is.null(bundle)){ + #Session level metadata + + metadatafile <- file.path(emuDBhandle$basePath, + paste0(session,session.suffix), + paste0(session,metadata.suffix)) + } else if(! is.null(bundle)){ + #Bundle metadata + if(is.null(session)){ + ses <- list_sessions(emuDBhandle) + if(nrow(ses) == 1){ + #use the name of the only available session + session <- ses[[1]] + }else{ + stop("If you provide a bundle name you need to provide a session name if there are more than one sessions in the database.") + } + + } + + metadatafile <- file.path(emuDBhandle$basePath, + paste0(session,session.suffix), + paste0(bundle,bundle.dir.suffix), + paste0(bundle,metadata.suffix)) + + + } + + if(reset.before.add | ! file.exists(metadatafile) ){ + #Start fresh / overwrite previous values + jsonmetaList <- list() + } else{ + + #Read in previous values + jsonmetaList <- as.list(jsonlite::read_json(metadatafile,simplifyVector = TRUE)) + + } + #set / overwrite metadata from list + #jsonmetaList[names(metadataList)] <- metadataList + jsonmetaList <- utils::modifyList(jsonmetaList,metadataList,keep.null = FALSE) + jsonlite::write_json(jsonmetaList,metadatafile,auto_unbox=TRUE) + #Reset the metadata cache now that we have writen new data to it + memoise::forget(metadata_cache) + +} + + +#' Add identifying information based on the content of the wave file to the metadata information for the bundle. +#' +#' This function will extract information (lenght of recording and a checksum) from the wav file associated with a bundle, and add it to the set of metadata +#' for the bundle. This information can later be used to verify that the file has not been altered later on, or to deidentify +#' wav files in a reversable manner for use outside of the emuR framework. De-identified files are sometimes useful for blinded randomized +#' perceptual testing, and the ability to reverse the procedure is then essential to link the results of the evaluation back to the original +#' recording extracted from the emuR data base. The user may create checksums by multiple algorithms by running the function again with different \code{algorithm} arguments. +#' +#' @param emuDBhandle The handle for the emuR database. +#' @param sessionPattern A regexp pattern that allows the user to limit which sessions should be affected by the manipulation. +#' @param bundlePattern A regexp pattern that allows the user to limit which bundles to include. +#' @param algorithm The name of the hashing algorithm, according to the \code{\link[digest]{digest}} function. +#' +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' create_emuRdemoData() +#' ae_test <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) +#' +#' #Add a md5 digest to the metadata of all bundles +#' add_digests(ae_test,algorithm = "md5") +#' +#' #Add a "sha1" checksum (the default) to some bundles +#' add_digests(ae_test,bundlePattern = "msajc0.*") +#' get_metadata(ae_test) -> res +#' print(res) +#' } +#' +add_digests <- function(emuDBhandle,sessionPattern=".*",bundlePattern=".*",algorithm="sha1", verbose=TRUE){ + wavs <- list_files(emuDBhandle,fileExtension = "*.wav",sessionPattern=sessionPattern,bundlePattern=bundlePattern) + if(verbose){ + cat(paste0("INFO: Writing ",algorithm, " checksums for ", nrow(wavs), " bundles\n")) + pb <- txtProgressBar(min = 0, max = nrow(wavs), style = 3) + } + for(f in 1:nrow(wavs)){ + inFile <- unlist(wavs[f,"absolute_file_path"],use.names = FALSE) + session <- unlist(wavs[f,"session"],use.names = FALSE) + bundle <- unlist(wavs[f,"bundle"],use.names = FALSE) + + wrassp::read.AsspDataObj(inFile) -> w + options(digits=15) + attr(w,"sampleRate") -> sr + attr(w,"endRecord") - attr(w,"startRecord") +1 -> samples + samples / sr *1000 -> duration + rm(w) + digest::digest(inFile,file=TRUE,algo=algorithm) -> checksum + metadata <- list("Bundle.Duration.ms"=duration) + metadata[paste0("Bundle.",algorithm,"_checksum")] <- checksum + + add_metadata(emuDBhandle,metadata,session=session,bundle=bundle) + if(verbose) { + setTxtProgressBar(pb, f) + } + } + if(verbose) { + close(pb) + } +} + + +#' Create a biography of the labels in a list of segments in a tidy manner +#' +#' @param segs_tbl The \code{\link[dplyr]{tibble}} that is the result \code{\link[emuR]{query}} call. +#' @param emuDBhandle A \code{\link{emuR}} database handle. +#' @param compute_digests Should information that describes the recorded sound files be computed so that is is definitelly part of the +#' added metadata information. +#' @param algorithm The checksum algorithm that should be used when computing sound file information. +#' +#' @return A \code{\link[dplyr]{tibble}} +#' @export +#' +#' @examples +#' \dontrun{ +#' create_emuRdemoData() +#' ae_test <- load_emuDB(file.path(tempdir(),"emuR_demoData","ae_emuDB")) +#' +#' # Database-wide default information +#' add_metadata(ae_test,list("Accent"="Northern","Elicitation"="Scripted")) +#' #Bundle specific information +#' add_metadata(ae_test,list("Speaker.Sex"="Male","Date"="2020-03-04"),session="0000",bundle="msajc003") +#' +#' # Get all the 'n' segments in the database +#' query(ae_test,"Phonetic = n",resultType = "tibble") -> ae_nt +#' # Add information related to the nature the recording sessions +#' # e.g. the speaker ID, the date of the recording +#' ae_nt %>% biographize(ae_test) %>% glimpse() +#' # This code does the same as the above, but it will also compute new +#' # information that is strictly aimed at identifying the recording +#' # (length of recording (in ms) and a sha1 digest of the wav file). +#' ae_nt %>% +#' biographize(ae_test,compute_digests=TRUE,algorithm="sha1") %>% +#' glimpse() +#' rm(ae_test) +#' +#' } +#' +biographize <- function(segs_tbl,emuDBhandle,compute_digests=FALSE,algorithm="sha1") { + #make sure that the first argument is a segment list, and that + # it contains "session" and "bundle" columns. + if(! is.data.frame(segs_tbl) || isFALSE(c("session", "bundle") %in% names(segs_tbl))){ + out <- paste("The input to the",match.call()[[1]], "has to be an emuDB seglist 'tibble' or a 'data.frame'.") + stop(out) + } + if(compute_digests==TRUE){ + add_digests(emuDBhandle,algorithm = algorithm) + } + #Here we use the special mode of export_medatata to get a data structure rather than an Excel file. + mdata <- get_metadata(emuDBhandle) + + out <- segs_tbl %>% + dplyr::left_join(mdata,by = c("session", "bundle")) + + return(out) +} + + +##' List metadata files of emuDB +##' +##' List metadata files (only those within bundles) belonging to emuDB. +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param fileExtension file extension of files (default .json) +##' @param sessionPattern A (RegEx) pattern matching sessions to be searched from the database +##' @param bundlePattern A (RegEx) pattern matching bundles to be searched from the database +##' @return file paths as character vector +##' @export +##' @importFrom rlang .data +##' @keywords emuDB database schema Emu +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # list all files of ae emuDB +##' list_metafiles(emuDBhandle = ae) +##' +##' # list all files of ae emuDB in bundles ending with '3' +##' list_metafiles(emuDBhandle = ae, bundlePattern=".*3$") +##' +##' } +##' +list_metafiles <- function(emuDBhandle, + fileExtension = ".json", + sessionPattern = ".*", + bundlePattern = ".*"){ + + check_emuDBhandle(emuDBhandle) + + fileList = list.files(path = file.path(emuDBhandle$basePath), + recursive = T, + pattern = paste0("*_meta", fileExtension, "$")) %>% + tibble::enframe(name = NULL) %>% + tidyr::separate(col = .data$value, + into = c("session", "bundle", "file"), + sep = .Platform$file.sep, + extra = "drop", + fill = "right") %>% + dplyr::filter(!is.na(.data$session)) %>% + dplyr::filter(!is.na(.data$bundle)) %>% + dplyr::filter(!is.na(.data$file)) %>% + + dplyr::filter (endsWith(.data$session, "_ses")) %>% + dplyr::filter (endsWith(.data$bundle, "_bndl")) %>% + + dplyr::mutate(session = stringr::str_remove(.data$session, "_ses$")) %>% + dplyr::mutate(bundle = stringr::str_remove(.data$bundle, "_bndl$")) %>% + + dplyr::filter (stringr::str_detect(.data$session, sessionPattern)) %>% + dplyr::filter (stringr::str_detect(.data$bundle, bundlePattern)) %>% + + dplyr::mutate (absolute_file_path = file.path(emuDBhandle$basePath, + paste0(.data$session, "_ses"), + paste0(.data$bundle, "_bndl"), + file)) + + return (fileList) +} + +###Initialise metadata cache +metadata_cache <- memoise::memoise(export_metadata, omit_args = c("progressBar")) + +### INTERACTIVE testing + +#unlink_emuRDemoDir() +#create_ae_db() -> emuDBhandle +#add_metadata(emuDBhandle,session = "0000",bundle = "msajc003",metadataList = list(Gender=NA,Age=10)) +#rstudioapi::navigateToFile(file.path(emuDBhandle$basePath,"ae_DBconfig.json")) +#add_metadata(emuDBhandle,metadataList = list(Gender=NA,Age=10)) + +#add_metadata(emuDBhandle,session = "0000",bundle = "msajc003",metadataList = list(Gender=NULL,Age=20)) + +# dir.create(file.path(emuDBhandle$basePath,"temp")) +# for(i in 1:11){ +# file.copy(file.path(emuDBhandle$basePath,"0000_ses"),file.path(emuDBhandle$basePath,"temp"),recursive = TRUE) +# newName <- paste0(i,i,i,i,"_ses") +# file.rename(file.path(emuDBhandle$basePath,"temp","0000_ses"),file.path(emuDBhandle$basePath,"temp",newName)) +# file.copy(file.path(emuDBhandle$basePath,"temp",newName),emuDBhandle$basePath,recursive = TRUE) +# unlink(file.path(emuDBhandle$basePath,"temp",newName),recursive = TRUE) +# } + + + +#export_metadata(ae,Excelfile = "~/Desktop/out.xlsx",overwrite = TRUE) +#rstudioapi::navigateToFile(list_files(emuDBhandle,"meta_json")$absolute_file_path) +#export_metadata(ae,Excelfile = "~/Desktop/out.xlsx") \ No newline at end of file From 95444cd99aab2ddc5d63464fdd35ed59b7045646 Mon Sep 17 00:00:00 2001 From: samgregory Date: Fri, 13 May 2022 21:06:54 +1000 Subject: [PATCH 7/7] Initial implementation of a metadata functionality Complete with documentation and unit testing. New package dependency: memoise for metadata caching Package suggestion: openslxs for metadata <-> Excel import/export --- DESCRIPTION | 8 +++++--- NAMESPACE | 7 +++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fe8bd1ab..cf8d26a3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,7 +37,8 @@ Imports: rstudioapi(>= 0.10), httr(>= 1.4.1), V8(>= 3.4.0), - cli(>= 2.5.0) + cli(>= 2.5.0), + memoise(>= 2.0.0) Suggests: stats, grDevices, @@ -46,11 +47,12 @@ Suggests: testthat(>= 0.7.1.99), compare(>= 0.2.4), knitr(>= 1.7), - rmarkdown(>= 0.9.2) + rmarkdown(>= 0.9.2), + openxlsx Encoding: UTF-8 LazyLoad: yes LazyData: yes ZipData: no URL: https://github.com/IPS-LMU/emuR, https://ips-lmu.github.io/The-EMU-SDMS-Manual/ BugReports: https://github.com/IPS-LMU/emuR/issues -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 diff --git a/NAMESPACE b/NAMESPACE index 8afd353b..778e4ba9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -43,10 +43,12 @@ S3method(utt,emusegs) export(Slope.test) export(add_attrDefLabelGroup) export(add_attributeDefinition) +export(add_digests) export(add_files) export(add_labelGroup) export(add_levelDefinition) export(add_linkDefinition) +export(add_metadata) export(add_perspective) export(add_ssffTrackDefinition) export(as.spectral) @@ -58,6 +60,7 @@ export(bayes.lab) export(bayesian.metric) export(bayesplot) export(bind) +export(biographize) export(buildtrack) export(cen.sub) export(classify) @@ -113,6 +116,7 @@ export(euclidean) export(expand_labels) export(export_BPFCollection) export(export_TextGridCollection) +export(export_metadata) export(export_seglistToTxtCollection) export(fapply) export(frames) @@ -123,9 +127,11 @@ export(get.time.element) export(get.trackkeywrd) export(get_legalLabels) export(get_levelCanvasesOrder) +export(get_metadata) export(get_signalCanvasesOrder) export(get_trackdata) export(import_mediaFiles) +export(import_metadata) export(is.seglist) export(is.spectral) export(is.trackdata) @@ -141,6 +147,7 @@ export(list_files) export(list_labelGroups) export(list_levelDefinitions) export(list_linkDefinitions) +export(list_metafiles) export(list_perspectives) export(list_sampleRates) export(list_sessions)