diff --git a/R/CriteriaComparison.R b/R/CriteriaComparison.R index f60ac346..a12215f8 100644 --- a/R/CriteriaComparison.R +++ b/R/CriteriaComparison.R @@ -186,15 +186,17 @@ TADA_CreatePairRef <- function(.data, ph = TRUE, hardness = TRUE, temp = TRUE, #' Pair Results for Numeric Criteria Calculation (UNDER ACTIVE DEVELOPMENT) #' #' This function pairs TADA results with results from specified characteristics from the same -#' MonitoringLocation within a user-specified time window to facilitate the calculation of numeric -#' criteria. The columns created by TADA_AutoClean are required to run this function. If they are not -#' present in the data frame, the function will stop and print an error message. +#' TADA.MonitoringLocation within a user-specified time window to facilitate the calculation of +#' numeric criteria. The columns created by TADA_AutoClean are required to run this function. If +#' they are not present in the data frame, the function will stop and print an error message. #' #' Users can provide a pairing reference file (can be created using TADA_CreatePairRef) to specify #' which combinations of TADA.CharacteristicName, TADA.ResultMeasure.MeasureUnit, #' TADA.MethodSpeciationName, and TADA.ResultSampleFractionText should be used for hardness, pH, -#' temperature, salinity, chloride or other user-defined groups. If no ref is specified, all possible -#' combinations for hardness, pH, temperature, salinity and chloride will be used. +#' temperature, salinity, chloride or other user-defined groups. If no ref is specified, all +#' possible combinations for hardness, pH, temperature, salinity and chloride will be used. It is +#' highly reccomended that users perform all unit conversion and synonym harmonization before using +#' TADA_PairForCriteriaCalc. #' #' @param .data TADA dataframe #' @@ -282,7 +284,7 @@ TADA_PairForCriteriaCalc <- function(.data, ref = "null", hours_range = 4) { ) %>% dplyr::select( TADA.CharacteristicName, TADA.ResultMeasureValue, TADA.ResultMeasure.MeasureUnitCode, - ActivityIdentifier, MonitoringLocationIdentifier, ActivityStartDateTime, + ActivityIdentifier, TADA.MonitoringLocationIdentifier, ActivityStartDateTime, TADA.ResultSampleFractionText, TADA.MethodSpeciationName ) %>% dplyr::left_join(ref.subset, @@ -340,11 +342,11 @@ TADA_PairForCriteriaCalc <- function(.data, ref = "null", hours_range = 4) { dplyr::filter( !ResultIdentifier %in% pair.activityid$ResultIdentifier, !is.na(ActivityStartDateTime), - MonitoringLocationIdentifier %in% pair.subset$MonitoringLocationIdentifier + TADA.MonitoringLocationIdentifier %in% pair.subset$TADA.MonitoringLocationIdentifier ) %>% dplyr::left_join(pair.subset2, relationship = "many-to-many", - by = dplyr::join_by(MonitoringLocationIdentifier) + by = dplyr::join_by(TADA.MonitoringLocationIdentifier) ) %>% dplyr::group_by(ResultIdentifier) %>% # Figure out fastest time comparison method - needs to be absolute time comparison diff --git a/R/DepthProfile.R b/R/DepthProfile.R index 93d005f6..17cc7fd6 100644 --- a/R/DepthProfile.R +++ b/R/DepthProfile.R @@ -7,7 +7,7 @@ #' bottom up to 2m (or user specified value) from bottom = "Bottom", and all depths #' in between the Surface and Bottom are assigned to the "Middle" category. #' -#' When more than one result is available for a MonitoringLocationIdentifier, +#' When more than one result is available for a TADA.MonitoringLocationIdentifier, #' ActivityStartDate, OrganizationIdentifier, and TADA.CharacteristicName, the #' user can choose a single result value (average, max, or min value) to use for that #' day and location. If results vary with depth, the user may also define whether @@ -22,12 +22,12 @@ #' "max". The default is dailyagg = "none". When dailyagg = "none", all results #' will be retained. When dailyagg == "avg", the mean value in each group of #' results (as determined by the depth category) will be identified or calculated for each -#' MonitoringLocation, ActivityDate, Organization ID, and TADA.CharacteristicName combination. +#' TADA.MonitoringLocation, ActivityDate, Organization ID, and TADA.CharacteristicName combination. #' When dailyagg == "min" or when dailyagg == "max", the min or max #' value in each group of results (as determined by the depth category) will -#' be identified or calculated for each MonitoringLocation, ActivityDate, and TADA.CharacteristicName -#' combination. An additional column, TADA.DepthProfileAggregation.Flag will be added -#' to describe aggregation. +#' be identified or calculated for each TADA.MonitoringLocation, ActivityDate, and +#' TADA.CharacteristicName combination. An additional column, TADA.DepthProfileAggregation.Flag will +#' be added to describe aggregation. #' #' @param bycategory character argument with options "no", "all", "surface", "middle", #' "bottom". The default is bycategory = "no" which means that any aggregate values @@ -100,7 +100,7 @@ TADA_FlagDepthCategory <- function(.data, bycategory = "no", bottomvalue = 2, su "TADA.CharacteristicName", "TADA.ResultMeasure.MeasureUnitCode", "ResultIdentifier", - "MonitoringLocationIdentifier", + "TADA.MonitoringLocationIdentifier", "OrganizationIdentifier", "ActivityStartDate" )) @@ -178,7 +178,7 @@ TADA_FlagDepthCategory <- function(.data, bycategory = "no", bottomvalue = 2, su TADA.ConsolidatedDepth.Unit = tolower(TADA.ConsolidatedDepth.Unit) ) %>% # use group_by to identify profile data - dplyr::group_by(ActivityStartDate, MonitoringLocationIdentifier, OrganizationIdentifier) %>% + dplyr::group_by(ActivityStartDate, TADA.MonitoringLocationIdentifier, OrganizationIdentifier) %>% # determine the number of Depths per group dplyr::mutate( DepthsPerGroup = length(unique(TADA.ConsolidatedDepth)), @@ -218,10 +218,10 @@ TADA_FlagDepthCategory <- function(.data, bycategory = "no", bottomvalue = 2, su } if (bycategory == "all") { - print("TADA_FlagDepthCategory: Grouping results by MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, ActivityStartDate, and TADA.DepthCategory.Flag for aggregation by TADA.DepthCategory.Flag.") + print("TADA_FlagDepthCategory: Grouping results by TADA.MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, ActivityStartDate, and TADA.DepthCategory.Flag for aggregation by TADA.DepthCategory.Flag.") group.list <- c( - "MonitoringLocationIdentifier", "OrganizationIdentifier", + "TADA.MonitoringLocationIdentifier", "OrganizationIdentifier", "TADA.CharacteristicName", "ActivityStartDate", "TADA.DepthCategory.Flag" ) @@ -230,10 +230,10 @@ TADA_FlagDepthCategory <- function(.data, bycategory = "no", bottomvalue = 2, su } if (bycategory == "no") { - print("TADA_FlagDepthCategory: Grouping results by MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for entire water column.") + print("TADA_FlagDepthCategory: Grouping results by TADA.MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for entire water column.") group.list <- c( - "MonitoringLocationIdentifier", "OrganizationIdentifier", + "TADA.MonitoringLocationIdentifier", "OrganizationIdentifier", "TADA.CharacteristicName", "ActivityStartDate" ) @@ -241,10 +241,10 @@ TADA_FlagDepthCategory <- function(.data, bycategory = "no", bottomvalue = 2, su } if (bycategory == "surface") { - print("TADA_FlagDepthCategory: Grouping results by MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for surface samples only.") + print("TADA_FlagDepthCategory: Grouping results by TADA.MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for surface samples only.") group.list <- c( - "MonitoringLocationIdentifier", "OrganizationIdentifier", + "TADA.MonitoringLocationIdentifier", "OrganizationIdentifier", "TADA.CharacteristicName", "ActivityStartDate" ) @@ -253,10 +253,10 @@ TADA_FlagDepthCategory <- function(.data, bycategory = "no", bottomvalue = 2, su } if (bycategory == "middle") { - print("TADA_FlagDepthCategory: Grouping results by MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for middle samples only.") + print("TADA_FlagDepthCategory: Grouping results by TADA.MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for middle samples only.") group.list <- c( - "MonitoringLocationIdentifier", "OrganizationIdentifier", + "TADA.MonitoringLocationIdentifier", "OrganizationIdentifier", "TADA.CharacteristicName", "ActivityStartDate" ) @@ -265,10 +265,10 @@ TADA_FlagDepthCategory <- function(.data, bycategory = "no", bottomvalue = 2, su } if (bycategory == "bottom") { - print("TADA_FlagDepthCategory: Grouping results by MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for bottom samples only.") + print("TADA_FlagDepthCategory: Grouping results by TADA.MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for bottom samples only.") group.list <- c( - "MonitoringLocationIdentifier", "OrganizationIdentifier", + "TADA.MonitoringLocationIdentifier", "OrganizationIdentifier", "TADA.CharacteristicName", "ActivityStartDate" ) @@ -447,7 +447,7 @@ TADA_FlagDepthCategory <- function(.data, bycategory = "no", bottomvalue = 2, su #' This function identifies depth profiles within a data frame to assist the user in #' selecting params for TADA_DepthProfilePlot. A TADA compatible data set is required. #' If TADA_FlagDepthCategory has not yet been run, it will be run as part of this -#' function. The output data frame is grouped by MonitoringLocationIdentifier, +#' function. The output data frame is grouped by TADA.MonitoringLocationIdentifier, #' OrganizationIdentifier, and ActivityStartDate. #' #' A new column, TADA.CharacteristicsForDepthProfile, is created which lists the @@ -458,7 +458,7 @@ TADA_FlagDepthCategory <- function(.data, bycategory = "no", bottomvalue = 2, su #' @param .data TADA dataframe which must include the columns ActivityStartDate, #' TADA.ConsolidatedDepth, TADA.ConsolidatedDepth.Unit, TADA.ConsolidatedDepth.Bottom, #' TADA.ResultMeasureValue, TADA.ResultMeasureValue.UnitCode, -#' OrganizationIdentifier, MonitoringLocationName, MonitoringLocationIdentifier, +#' OrganizationIdentifier, TADA.MonitoringLocationName, TADA.MonitoringLocationIdentifier, #' and TADA.ComparableDataIdentifier. #' #' @param nresults Boolean argument with options "TRUE" or "FALSE". The @@ -469,7 +469,7 @@ TADA_FlagDepthCategory <- function(.data, bycategory = "no", bottomvalue = 2, su #' @param nvalue numeric argument to specify the number of results required to identify #' a depth profile. The default is 2, which means that a depth profile will be identified #' if 2 or more results at different depths exists for the same ActivityStartDate, -#' MonitoringLocationIdentifier, OrganizationIdentifier, and TADA.ComparableDataIdentifier. +#' TADA.MonitoringLocationIdentifier, OrganizationIdentifier, and TADA.ComparableDataIdentifier. #' A few characteristics are excluded from this requirement because they are expected to #' have only a single result in depth units (ex: secchi disk depth). #' @@ -481,8 +481,8 @@ TADA_FlagDepthCategory <- function(.data, bycategory = "no", bottomvalue = 2, su #' FALSE. When aggregates = TRUE, all aggregate values are included when identifying #' depth profile data. #' -#' @return A dataframe with the columns MonitoringLocationIdentifier, -#' MonitoringLocationName, OrganizationIdentifier, ActivityStartDate, +#' @return A dataframe with the columns TADA.MonitoringLocationIdentifier, +#' TADA.MonitoringLocationName, OrganizationIdentifier, ActivityStartDate, #' TADA.CharacteristicsForDepthProfile. Based on the user input for the nresults #' param, TADA.CharacteristicsForDepthProfile may or may not contain the number #' of results for each characteristic. @@ -542,12 +542,12 @@ TADA_IDDepthProfiles <- function(.data, nresults = TRUE, nvalue = 2, aggregates if (nresults == TRUE) { .data <- .data %>% dplyr::select( - MonitoringLocationIdentifier, MonitoringLocationName, MonitoringLocationTypeName, + TADA.MonitoringLocationIdentifier, TADA.MonitoringLocationName, TADA.MonitoringLocationTypeName, OrganizationIdentifier, ActivityStartDate, TADA.CharacteristicName, TADA.ComparableDataIdentifier, TADA.ConsolidatedDepth, TADA.ConsolidatedDepth.Unit, TADA.ConsolidatedDepth.Bottom ) %>% dplyr::group_by( - MonitoringLocationIdentifier, OrganizationIdentifier, ActivityStartDate, + TADA.MonitoringLocationIdentifier, OrganizationIdentifier, ActivityStartDate, TADA.ComparableDataIdentifier ) %>% dplyr::mutate( @@ -559,7 +559,7 @@ TADA_IDDepthProfiles <- function(.data, nresults = TRUE, nvalue = 2, aggregates ) %>% dplyr::filter(TADA.NResults >= nvalue | TADA.CharacteristicName %in% depth.params) %>% dplyr::ungroup() %>% - dplyr::group_by(MonitoringLocationIdentifier, OrganizationIdentifier, ActivityStartDate) %>% + dplyr::group_by(TADA.MonitoringLocationIdentifier, OrganizationIdentifier, ActivityStartDate) %>% # check that for results with only a single depth unit (ex: secchi disk depth) that other results are available in group dplyr::mutate(MeanResults = mean(TADA.NResults)) %>% dplyr::filter(MeanResults > 1) %>% @@ -571,7 +571,7 @@ TADA_IDDepthProfiles <- function(.data, nresults = TRUE, nvalue = 2, aggregates TADA.CharacteristicsForDepthProfile = stringr::str_replace_all(paste(sort(unique(unlist(strsplit(TADA.CharacteristicsForDepthProfile, ";", )))), collapse = ";"), " ;", "; ") ) %>% dplyr::select( - MonitoringLocationIdentifier, MonitoringLocationName, MonitoringLocationTypeName, OrganizationIdentifier, ActivityStartDate, + TADA.MonitoringLocationIdentifier, TADA.MonitoringLocationName, TADA.MonitoringLocationTypeName, OrganizationIdentifier, ActivityStartDate, TADA.CharacteristicsForDepthProfile ) %>% unique() @@ -582,18 +582,18 @@ TADA_IDDepthProfiles <- function(.data, nresults = TRUE, nvalue = 2, aggregates if (nresults == FALSE) { .data <- .data %>% dplyr::select( - MonitoringLocationIdentifier, MonitoringLocationName, MonitoringLocationTypeName, + TADA.MonitoringLocationIdentifier, TADA.MonitoringLocationName, TADA.MonitoringLocationTypeName, OrganizationIdentifier, ActivityStartDate, TADA.CharacteristicName, TADA.ComparableDataIdentifier, TADA.ConsolidatedDepth, TADA.ConsolidatedDepth.Unit, TADA.ConsolidatedDepth.Bottom ) %>% dplyr::group_by( - MonitoringLocationIdentifier, OrganizationIdentifier, ActivityStartDate, + TADA.MonitoringLocationIdentifier, OrganizationIdentifier, ActivityStartDate, TADA.ComparableDataIdentifier ) %>% dplyr::mutate(TADA.NResults = length(unique(TADA.ConsolidatedDepth))) %>% dplyr::filter(TADA.NResults >= nvalue | TADA.CharacteristicName %in% depth.params) %>% dplyr::ungroup() %>% - dplyr::group_by(MonitoringLocationIdentifier, OrganizationIdentifier, ActivityStartDate) %>% + dplyr::group_by(TADA.MonitoringLocationIdentifier, OrganizationIdentifier, ActivityStartDate) %>% # check that for results with only a single depth unit (ex: secchi disk depth) that other results are available in group dplyr::mutate(MeanResults = mean(TADA.NResults)) %>% dplyr::filter(MeanResults > 1) %>% @@ -605,7 +605,7 @@ TADA_IDDepthProfiles <- function(.data, nresults = TRUE, nvalue = 2, aggregates TADA.CharacteristicsForDepthProfile = stringr::str_replace_all(paste(sort(unique(unlist(strsplit(TADA.CharacteristicsForDepthProfile, ";", )))), collapse = ";"), " ;", "; ") ) %>% dplyr::select( - MonitoringLocationIdentifier, MonitoringLocationName, MonitoringLocationTypeName, OrganizationIdentifier, ActivityStartDate, + TADA.MonitoringLocationIdentifier, TADA.MonitoringLocationName, TADA.MonitoringLocationTypeName, OrganizationIdentifier, ActivityStartDate, TADA.CharacteristicsForDepthProfile ) %>% unique() @@ -629,8 +629,8 @@ TADA_IDDepthProfiles <- function(.data, nresults = TRUE, nvalue = 2, aggregates #' These groups will be specific to your data frame. The TADA_IDDepthProfiles can be #' used to identify available groups. #' -#' @param location A single MonitoringLocationIdentifier to plot the depth profile. -#' A MonitoringLocationIdentifier must be entered or an error will be returned and +#' @param location A single TADA.MonitoringLocationIdentifier to plot the depth profile. +#' A TADA.MonitoringLocationIdentifier must be entered or an error will be returned and #' no depth profile will be created. #' #' @param activity_date The date the depth profile results were collected. @@ -651,7 +651,7 @@ TADA_IDDepthProfiles <- function(.data, nresults = TRUE, nvalue = 2, aggregates #' depth units should be used for the plot. Default is "m". #' #' @return A depth profile plot displaying up to three parameters for a single -#' MonitoringLocationIdentifier. Displaying depth categories is optional with the +#' TADA.MonitoringLocationIdentifier. Displaying depth categories is optional with the #' depthcat argument. #' #' @export @@ -758,18 +758,18 @@ TADA_DepthProfilePlot <- function(.data, if (is.null(location)) { - print("TADA_DepthProfilePlot: No MonitoringLocationIdentifier selected, a depth profile cannot be generated.") + print("TADA_DepthProfilePlot: No TADA.MonitoringLocationIdentifier selected, a depth profile cannot be generated.") stop() - if (!location %in% param.check$MonitoringLocationIdentifier) { - print("TADA_DepthProfilePlot: MonitoringLocationIdentifier selected is not in data set.") + if (!location %in% param.check$TADA.MonitoringLocationIdentifier) { + print("TADA_DepthProfilePlot: TADA.MonitoringLocationIdentifier selected is not in data set.") stop() } - if (location %in% param.check$MonitoringLocationIdentifier) { - print("TADA_DepthProfilePlot: MonitoringLocationIdentifier selected.") + if (location %in% param.check$TADA.MonitoringLocationIdentifier) { + print("TADA_DepthProfilePlot: TADA.MonitoringLocationIdentifier selected.") } } @@ -861,8 +861,8 @@ TADA_DepthProfilePlot <- function(.data, "TADA.DepthCategory.Flag", "TADA.ResultMeasureValue", "TADA.ResultMeasure.MeasureUnitCode", - "MonitoringLocationIdentifier", - "MonitoringLocationName", + "TADA.MonitoringLocationIdentifier", + "TADA.MonitoringLocationName", "ActivityStartDate", "ActivityStartDateTime", "TADA.ConsolidatedDepth", @@ -890,7 +890,7 @@ TADA_DepthProfilePlot <- function(.data, depthprofile.avail <- .data %>% dplyr::filter( !is.na(TADA.ConsolidatedDepth), - MonitoringLocationIdentifier %in% location, + TADA.MonitoringLocationIdentifier %in% location, ActivityStartDate %in% activity_date, TADA.ActivityMediaName == "WATER" ) %>% @@ -901,7 +901,7 @@ TADA_DepthProfilePlot <- function(.data, dplyr::slice_sample(n = 1) %>% dplyr::ungroup() %>% dplyr::group_by( - MonitoringLocationIdentifier, TADA.ComparableDataIdentifier, + TADA.MonitoringLocationIdentifier, TADA.ComparableDataIdentifier, ActivityStartDate ) %>% dplyr::mutate(N = length(TADA.ResultMeasureValue)) %>% @@ -946,12 +946,12 @@ TADA_DepthProfilePlot <- function(.data, depth.params.avail <- .data %>% dplyr::filter( - MonitoringLocationIdentifier %in% location, + TADA.MonitoringLocationIdentifier %in% location, TADA.CharacteristicName %in% depth.params, ActivityStartDate %in% activity_date, TADA.ActivityMediaName == "WATER" ) %>% - dplyr::group_by(TADA.CharacteristicName, ActivityStartDate, MonitoringLocationIdentifier) %>% + dplyr::group_by(TADA.CharacteristicName, ActivityStartDate, TADA.MonitoringLocationIdentifier) %>% dplyr::slice_sample(n = 1) %>% dplyr::ungroup() @@ -1003,7 +1003,7 @@ TADA_DepthProfilePlot <- function(.data, # this subset must include all fields included in plot hover below plot.data <- profile.data %>% dplyr::filter(dplyr::if_any(TADA.ComparableDataIdentifier, ~ .x %in% groups)) %>% - dplyr::select(dplyr::all_of(reqcols), "TADA.ComparableDataIdentifier", "ActivityStartDateTime", "MonitoringLocationName", "TADA.ActivityMediaName", "ActivityMediaSubdivisionName", "ActivityRelativeDepthName", "TADA.CharacteristicName", "TADA.MethodSpeciationName", "TADA.ResultSampleFractionText") %>% + dplyr::select(dplyr::all_of(reqcols), "TADA.ComparableDataIdentifier", "ActivityStartDateTime", "TADA.MonitoringLocationName", "TADA.ActivityMediaName", "ActivityMediaSubdivisionName", "ActivityRelativeDepthName", "TADA.CharacteristicName", "TADA.MethodSpeciationName", "TADA.ResultSampleFractionText") %>% dplyr::mutate(TADA.ResultMeasure.MeasureUnitCode = ifelse(is.na(TADA.ResultMeasure.MeasureUnitCode), "NA", TADA.ResultMeasure.MeasureUnitCode )) @@ -1032,7 +1032,7 @@ TADA_DepthProfilePlot <- function(.data, " and ", param3$TADA.CharacteristicName[1], " for ", - plot.data$MonitoringLocationName[1], + plot.data$TADA.MonitoringLocationName[1], " on ", format(as.Date(plot.data$ActivityStartDate[1]), "%B %d, %Y") ), @@ -1049,7 +1049,7 @@ TADA_DepthProfilePlot <- function(.data, param2$TADA.CharacteristicName[1], " for ", # figure out addition of weird \n in name - plot.data$MonitoringLocationName[1], + plot.data$TADA.MonitoringLocationName[1], " on ", format(as.Date(plot.data$ActivityStartDate[1]), "%B %d, %Y") ), @@ -1064,7 +1064,7 @@ TADA_DepthProfilePlot <- function(.data, param1$TADA.CharacteristicName[1], " for ", # figure out addition of weird \n in name - plot.data$MonitoringLocationName[1], + plot.data$TADA.MonitoringLocationName[1], " on ", format(as.Date(plot.data$ActivityStartDate[1]), "%B %d, %Y") ), diff --git a/R/Figures.R b/R/Figures.R index e5daa832..e5112e24 100644 --- a/R/Figures.R +++ b/R/Figures.R @@ -40,11 +40,11 @@ #' #' # Create multiple boxplots with additional grouping columns and view the first #' # plot in list. In this example, we will group data in the input dataframe -#' # by both the TADA.ComparableDataIdentifier and the MonitoringLocationTypeName +#' # by both the TADA.ComparableDataIdentifier and the TADA.MonitoringLocationTypeName #' # (e.g. stream, reservoir, canal, etc.) #' # Load example data frame: #' data(Data_Nutrients_UT) -#' Boxplot_output <- TADA_Boxplot(Data_Nutrients_UT, id_cols = c("TADA.ComparableDataIdentifier", "MonitoringLocationTypeName")) +#' Boxplot_output <- TADA_Boxplot(Data_Nutrients_UT, id_cols = c("TADA.ComparableDataIdentifier", "TADA.MonitoringLocationTypeName")) #' # This example generates 32 box plots. #' Boxplot_output[[2]] #' Boxplot_output[[25]] @@ -216,10 +216,10 @@ TADA_Boxplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")) { #' #' # Create multiple histograms with additional grouping columns and view the first #' # plot in list. In this example, we will group by both TADA.ComparableDataIdentifier -#' # and MonitoringLocationTypeName (e.g. stream, reservoir, canal, etc.) +#' # and TADA.MonitoringLocationTypeName (e.g. stream, reservoir, canal, etc.) #' # Load example data frame: #' data(Data_Nutrients_UT) -#' Histogram_output <- TADA_Histogram(Data_Nutrients_UT, id_cols = c("TADA.ComparableDataIdentifier", "MonitoringLocationTypeName")) +#' Histogram_output <- TADA_Histogram(Data_Nutrients_UT, id_cols = c("TADA.ComparableDataIdentifier", "TADA.MonitoringLocationTypeName")) #' # This example generates 32 histograms #' Histogram_output[[10]] #' Histogram_output[[25]] @@ -365,9 +365,15 @@ TADA_Histogram <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")) #' #' @param .data TADA data frame containing the data downloaded from the WQP, where #' each row represents a unique data record. Data frame must include the columns -#' 'MonitoringLocationIdentifier','MonitoringLocationName','TADA.LatitudeMeasure', +#' 'TADA.MonitoringLocationIdentifier','TADA.MonitoringLocationName','TADA.LatitudeMeasure', #' 'TADA.LongitudeMeasure', 'ResultIdentifier', 'ActivityStartDate', 'TADA.CharacteristicName', #' and 'OrganizationIdentifier' to run this function. +#' +#' @param identifier A character argument to select whether the TADA.MonitoringLocationIdentifier +#' (which may included grouped sites is TADA_FindNearbySites has been run) or the original WQP +#' MonitoringLocationIdentifier and associated coordinates are used for mapping. Identifier equals +#' "tada" is the default and will used the TADA prefixed monitoring location columns. Identifier +#' equals "wqp" will use the originals. #' #' @return A leaflet map that shows all sites in the data frame, where larger point sizes #' indicate more results collected at a site, and darker point colors indicate more @@ -390,7 +396,8 @@ TADA_Histogram <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")) #' TADA_OverviewMap(Data_6Tribes_5y_Harmonized) #' } #' -TADA_OverviewMap <- function(.data) { + +TADA_OverviewMap <- function(.data, identifier = "tada") { suppressMessages(suppressWarnings({ quiet({ # taken from this stackoverflow: https://stackoverflow.com/questions/58505589/circles-in-legend-for-leaflet-map-with-addcirclemarkers-in-r-without-shiny @@ -401,8 +408,20 @@ TADA_OverviewMap <- function(.data) { return(leaflet::addLegend(map, colors = colorAdditions, labels = labelAdditions, opacity = opacity, title = "Measurements")) } + if(identifier == "tada") { + ml_id <- "TADA.MonitoringLocationIdentifier" + + ml_name <- "TADA.MonitoringLocationName" + } + + if(identifier == "wqp") { + ml_id <- "MonitoringLocationIdentifier" + + ml_name <- "MonitoringLocationName" + } + sumdat <- .data %>% - dplyr::group_by(MonitoringLocationIdentifier, MonitoringLocationName, TADA.LatitudeMeasure, TADA.LongitudeMeasure) %>% + dplyr::group_by(!!rlang::sym(ml_id), !!rlang::sym(ml_name), TADA.LatitudeMeasure, TADA.LongitudeMeasure) %>% dplyr::summarise("Sample_Count" = length(unique(ResultIdentifier)), "Visit_Count" = length(unique(ActivityStartDate)), "Parameter_Count" = length(unique(TADA.CharacteristicName)), "Organization_Count" = length(unique(OrganizationIdentifier))) param_counts <- sort(unique(sumdat$Parameter_Count)) @@ -509,8 +528,8 @@ TADA_OverviewMap <- function(.data) { weight = 1.5, radius = sumdat$radius, popup = paste0( - "Site ID: ", sumdat$MonitoringLocationIdentifier, - "
Site Name: ", sumdat$MonitoringLocationName, + "Site ID: ", rlang::eval_tidy(rlang::sym(ml_id), sumdat), + "
Site Name: ", rlang::eval_tidy(rlang::sym(ml_name), sumdat), "
Measurement Count: ", sumdat$Sample_Count, "
Visit Count: ", sumdat$Visit_Count, "
Characteristic Count: ", sumdat$Parameter_Count @@ -560,8 +579,14 @@ TADA_OverviewMap <- function(.data) { #' #' @param .data TADA data frame containing the data downloaded from the WQP, where #' each row represents a unique data record. Data frame must include the columns -#' 'MonitoringLocationIdentifier','MonitoringLocationName','TADA.LatitudeMeasure', +#' 'TADA.MonitoringLocationIdentifier','TADA.MonitoringLocationName','TADA.LatitudeMeasure', #' and 'TADA.LongitudeMeasure' to run this function. +#' +#' @param identifier A character argument to select whether the TADA.MonitoringLocationIdentifier +#' (which may included grouped sites if TADA_FindNearbySites has been run) or the original WQP +#' MonitoringLocationIdentifier and associated coordinates are used for mapping. Identifier equals +#' "tada" is the default and will used the TADA prefixed monitoring location columns. Identifier +#' equals "wqp" will use the originals. #' #' @return A leaflet map that shows all sites in the data frame that contain #' flagged data in the form of: @@ -586,61 +611,98 @@ TADA_OverviewMap <- function(.data) { #' TADA_FlaggedSitesMap(Data_6Tribes_5y_Harmonized) #' } #' -TADA_FlaggedSitesMap <- function(.data) { - invalid <- TADA_FlagCoordinates(.data, flaggedonly = TRUE) - lowres <- invalid[invalid$TADA.InvalidCoordinates.Flag == "Imprecise_lessthan3decimaldigits", ] - outsideusa <- invalid[invalid$TADA.InvalidCoordinates.Flag %in% c("LAT_OutsideUSA", "LONG_OutsideUSA"), ] - nearby <- TADA_FindNearbySites(.data) - print(colnames(nearby)) - nearby <- TADA_GetUniqueNearbySites(nearby) + +TADA_FlaggedSitesMap <- function(.data, identifier = "tada") { + + # check to see if TADA_FlagCoordinate has been run on TADA df + if(!"TADA.InvalidCoordinates.Flag" %in% names(.data)) { + + # if TADA_FlagCoordinates has not been run, run it + .data <- TADA_FlagCoordinates(.data) + } + + # create subset of imprecise sites + lowres <- .data[.data$TADA.InvalidCoordinates.Flag == "Imprecise_lessthan3decimaldigits", ] + + # create subset out outside usa sites + outsideusa <- .data[.data$TADA.InvalidCoordinates.Flag %in% c("LAT_OutsideUSA", "LONG_OutsideUSA"), ] + + # check to see if TADA_NearbySites has been run on TADA df + if(!"TADA.NearbySites.Flag" %in% names(.data)) { + + # if TADA_FlagCoordinates has not been run, run it + .data <- TADA_FindNearbySites(.data) + } + + # create subset of unique nearby sites + nearby <- TADA_GetUniqueNearbySites(.data) lowresIcon <- leaflet::makeAwesomeIcon(icon = "circle", library = "fa", iconColor = "#ffffff", markerColor = "green") outsideIcon <- leaflet::makeAwesomeIcon(icon = "circle", library = "fa", iconColor = "#ffffff", markerColor = "darkblue") nearbyIcon <- leaflet::makeAwesomeIcon(icon = "circle", library = "fa", iconColor = "#ffffff", markerColor = "pink") + + # columns for custom popup and map based on identifier selected by user + + lat_name <- ifelse(identifier == "wqx", "LatitudeMeasure", + "TADA.LatitudeMeasure") + + long_name <- ifelse(identifier == "wqx", "LongitudeMeasure", + "TADA.LongitudeMeasure") + + ml_name <- ifelse(identifier == "wqx", "MonitoringLocationName", + "TADA.MonitoringLocationName") + + ml_type <- ifelse(identifier == "wqx", "MonitoringLocationTypeName", + "TADA.MonitoringLocationTypeName") + + # create custom popup function + custom.popup <- function(.data) { + + meta.flag <- ifelse(.data$MonitoringLocationIdentifier %in% nearby$MonitoringLocationIdentifier, + "*Original and TADA monitoring location metadata may be different due to grouping of nearby sites", + "*Original and TADA monitoring location metadata match") + + popup = paste0( + "TADA.MonitoringLocationIdentifier: ", .data$TADA.MonitoringLocationIdentifier, + "
MonitoringLocationIdentifier: ", .data$MonitoringLocationIdentifier, + "
", ml_name, ": ", .data[[ml_name]], + "
", ml_type, ": ", .data[[ml_type]], + "
", lat_name, ": ", .data[[lat_name]], + "
", long_name, ": ", .data[[long_name]], + "
", + "
", meta.flag + ) + } + + # create map map <- leaflet::leaflet() %>% leaflet::addProviderTiles("Esri.WorldTopoMap", group = "World topo", options = leaflet::providerTileOptions(updateWhenZooming = FALSE, updateWhenIdle = TRUE)) %>% leaflet.extras::addResetMapButton() # button to reset to initial zoom and lat/long if (nrow(outsideusa) > 0) { - map <- map %>% leaflet::addAwesomeMarkers(~TADA.LongitudeMeasure, - ~TADA.LatitudeMeasure, + map <- map %>% leaflet::addAwesomeMarkers(~as.numeric(outsideusa[[long_name]]), + ~as.numeric(outsideusa[[lat_name]]), icon = outsideIcon, - # label = ~as.character(MonitoringLocationIdentifier), - popup = paste0( - "Site ID: ", outsideusa$MonitoringLocationIdentifier, - "
Site Name: ", outsideusa$MonitoringLocationName, - "
Latitude: ", outsideusa$TADA.LatitudeMeasure, - "
Longitude: ", outsideusa$TADA.LongitudeMeasure - ), + # label = ~as.character(TADA.MonitoringLocationIdentifier), + popup = custom.popup(outsideusa), data = outsideusa ) } if (nrow(lowres) > 0) { - map <- map %>% leaflet::addAwesomeMarkers(~TADA.LongitudeMeasure, - ~TADA.LatitudeMeasure, + map <- map %>% leaflet::addAwesomeMarkers(~as.numeric(lowres[[long_name]]), + ~as.numeric(lowres[[lat_name]]), icon = lowresIcon, - # label = ~as.character(MonitoringLocationIdentifier), - popup = paste0( - "Site ID: ", lowres$MonitoringLocationIdentifier, - "
Site Name: ", lowres$MonitoringLocationName, - "
Latitude: ", lowres$TADA.LatitudeMeasure, - "
Longitude: ", lowres$TADA.LongitudeMeasure - ), + # label = ~as.character(TADA.MonitoringLocationIdentifier), + popup = custom.popup(lowres), data = lowres ) } if (nrow(nearby) > 0) { - map <- map %>% leaflet::addAwesomeMarkers(~TADA.LongitudeMeasure, - ~TADA.LatitudeMeasure, + map <- map %>% leaflet::addAwesomeMarkers(~as.numeric(nearby[[long_name]]), + ~as.numeric(nearby[[lat_name]]), icon = nearbyIcon, # label = ~as.character(TADA.MonitoringLocationIdentifier), - popup = paste0( - "Nearby Group Name: ", nearby$TADA.MonitoringLocationIdentifier, - "
Site ID: ", nearby$MonitoringLocationIdentifier, - "
Site Name: ", nearby$MonitoringLocationName, - "
Latitude: ", nearby$TADA.LatitudeMeasure, - "
Longitude: ", nearby$TADA.LongitudeMeasure - ), + popup = custom.popup(nearby), data = nearby ) } @@ -762,14 +824,14 @@ TADA_FieldValuesPie <- function(.data, field = "null", characteristicName = "nul #' df <- dplyr::filter(Data_6Tribes_5y_Harmonized, TADA.ComparableDataIdentifier == "TOTAL PHOSPHORUS, MIXED FORMS_UNFILTERED_AS P_UG/L") #' TADA_Scatterplot(df, id_cols = "TADA.ComparableDataIdentifier") #' # Creates a scatterplot for each monitoring location -#' TADA_Scatterplot(df, id_cols = c("TADA.ComparableDataIdentifier", "MonitoringLocationName")) +#' TADA_Scatterplot(df, id_cols = c("TADA.ComparableDataIdentifier", "TADA.MonitoringLocationName")) #' #' # Create multiple scatterplots with additional grouping columns and view the first #' # plot in list. In this example, we will group by both TADA.ComparableDataIdentifier -#' # and MonitoringLocationTypeName (e.g. stream, reservoir, canal, etc.) +#' # and TADA.MonitoringLocationTypeName (e.g. stream, reservoir, canal, etc.) #' # Load example dataset: #' data(Data_Nutrients_UT) -#' Scatterplot_output <- TADA_Scatterplot(Data_Nutrients_UT, id_cols = c("TADA.ComparableDataIdentifier", "MonitoringLocationTypeName")) +#' Scatterplot_output <- TADA_Scatterplot(Data_Nutrients_UT, id_cols = c("TADA.ComparableDataIdentifier", "TADA.MonitoringLocationTypeName")) #' # This example generates 47 scatterplots #' Scatterplot_output[[10]] #' Scatterplot_output[[25]] @@ -822,8 +884,8 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier") mode = "markers", x = plot.data$ActivityStartDate, # currently uses start date only, may want to change to just ActivityStartDateTime in the future, but for now ActivityStartDateTime includes NAs when time is not available. Including ActivityStartDateTime in hover feature instead. y = plot.data$TADA.ResultMeasureValue, - # consider adding color or shapes to make it easier to see sites and/or possible realtive result values - # color = ~MonitoringLocationName, + # consider adding color or shapes to make it easier to see sites and/or possible relative result values + # color = ~TADA.MonitoringLocationName, # colors = RColorBrewer::brewer.pal(3, "Set2"), marker = list(color = tada.pal[1, 1]), # marker color stroke = I(tada.pal[1, 2]), # marker border color @@ -833,7 +895,7 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier") "Result:", paste0(plot.data$TADA.ResultMeasureValue, " ", plot.data$TADA.ResultMeasure.MeasureUnitCode), "
", "Activity Start Date:", plot.data$ActivityStartDate, "
", "Activity Start Date Time:", plot.data$ActivityStartDateTime, "
", - "Monitoring Location Name:", plot.data$MonitoringLocationName, "
", + "Monitoring Location Name:", plot.data$TADA.MonitoringLocationName, "
", "Media:", plot.data$TADA.ActivityMediaName, "
", "Media Subdivision:", plot.data$ActivityMediaSubdivisionName, "
", "Result Depth:", paste0( @@ -902,8 +964,8 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier") #' #' @param .data TADA data frame containing the data downloaded from the WQP, #' where each row represents a unique data record. Data frame must include the -#' columns 'TADA.ComparableDataIdentifier', 'TADA.ResultMeasureValue', and 'TADA.ResultMeasure.MeasureUnitCode' -#' to run this function. +#' columns 'TADA.ComparableDataIdentifier', 'TADA.ResultMeasureValue', and +#' 'TADA.ResultMeasure.MeasureUnitCode' to run this function. #' #' @param id_cols The column in the dataset used to identify the unique groups to #' be plotted. Defaults to 'TADA.ComparableDataIdentifier', which should be @@ -916,8 +978,8 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier") #' @param groups A vector of two identifiers from the id_cols column. For #' example, if the id_cols is 'TADA.ComparableDataIdentifier', the groups could #' be 'DISSOLVED OXYGEN (DO)_NA_NA_UG/L' and 'PH_NA_NA_NA'. These groups will -#' be specific to your dataset. If the id_cols is 'MonitoringLocationName', -#' the groups could be 'Upper Red Lake: West' and 'Upper Red Lake: West-Central'. +#' be specific to your dataset. If the id_cols is 'TADA.MonitoringLocationName', +#' the groups could be 'UPPER RED LAKE: WEST' and 'UPPER RED LAKE: WEST-CENTRAL'. #' #' @return A single plotly scatterplot figure with one x-axis (Date/Time) and a #' left and right y-axis showing the units of the two characteristic groups @@ -974,7 +1036,7 @@ TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableD plot.data <- as.data.frame(.data) # this subset must include all fields included in plot hover below - plot.data <- subset(plot.data, plot.data[, id_cols] %in% groups)[, c(id_cols, reqcols, depthcols, "ActivityStartDateTime", "MonitoringLocationName", "TADA.ActivityMediaName", "ActivityMediaSubdivisionName", "ActivityRelativeDepthName", "TADA.CharacteristicName", "TADA.MethodSpeciationName", "TADA.ResultSampleFractionText")] + plot.data <- subset(plot.data, plot.data[, id_cols] %in% groups)[, c(id_cols, reqcols, depthcols, "ActivityStartDateTime", "TADA.MonitoringLocationName", "TADA.ActivityMediaName", "ActivityMediaSubdivisionName", "ActivityRelativeDepthName", "TADA.CharacteristicName", "TADA.MethodSpeciationName", "TADA.ResultSampleFractionText")] plot.data$name <- gsub("_NA", "", plot.data[, id_cols]) plot.data$name <- gsub("_", " ", plot.data$name) @@ -1069,7 +1131,7 @@ TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableD "Result:", paste0(param1$TADA.ResultMeasureValue, " ", param1$TADA.ResultMeasure.MeasureUnitCode), "
", "Activity Start Date:", param1$ActivityStartDate, "
", "Activity Start Date Time:", param1$ActivityStartDateTime, "
", - "Monitoring Location Name:", param1$MonitoringLocationName, "
", + "Monitoring Location Name:", param1$TADA.MonitoringLocationName, "
", "Media:", param1$TADA.ActivityMediaName, "
", "Media Subdivision:", param1$ActivityMediaSubdivisionName, "
", "Result Depth:", paste0( @@ -1113,7 +1175,7 @@ TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableD "Result:", paste0(param2$TADA.ResultMeasureValue, " ", param2$TADA.ResultMeasure.MeasureUnitCode), "
", "Activity Start Date:", param2$ActivityStartDate, "
", "Activity Start Date Time:", param2$ActivityStartDateTime, "
", - "Monitoring Location Name:", param2$MonitoringLocationName, "
", + "Monitoring Location Name:", param2$TADA.MonitoringLocationName, "
", "Media:", param2$TADA.ActivityMediaName, "
", "Media Subdivision:", param2$ActivityMediaSubdivisionName, "
", "Result Depth:", paste0( @@ -1144,7 +1206,7 @@ TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableD #' @param .data TADA data frame where each row represents a unique record. Data frame must include #' the columns 'TADA.ComparableDataIdentifier', 'TADA.ResultMeasureValue', #' 'TADA.ResultMeasure.MeasureUnitCode', 'ActivityStartDate', 'ActivityStartDateTime', -#' 'ActivityStartDateTime', 'MonitoringLocationName', 'TADA.ActivityMediaName', +#' 'ActivityStartDateTime', 'TADA.MonitoringLocationName', 'TADA.ActivityMediaName', #' 'ActivityMediaSubdivisionName', 'TADA.ResultDepthHeightMeasure.MeasureValue', #' 'TADA.ResultDepthHeightMeasure.MeasureValue', 'TADA.ResultDepthHeightMeasure.MeasureUnitCode', #' 'ActivityRelativeDepthName', 'TADA.ActivityDepthHeightMeasure.MeasureValue', @@ -1153,15 +1215,15 @@ TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableD #' and TADA.ActivityBottomDepthHeightMeasure.MeasureUnitCode to run this function. #' #' @param group_col The column in the dataset used to identify the groups -#' plotted. Defaults to MonitoringLocationName. This input is flexible, and allows for the use of +#' plotted. Defaults to TADA.MonitoringLocationName. This input is flexible, and allows for the use of #' other identifiers such as StateCode, CountyCode or user-created groups based on concatenation #' of other variables (e.g. characteristic name, site type, site name, year, organization, etc.) #' #' @param groups A vector of up to four identifiers from the id_cols column #' to specify the groups that will be plotted for a TADA.ComparableDataIdentifier. #' These groups will be specific to your dataset. For example, in the example data set -#' Data_6Tribes_5y_Harmonized if group_col is 'MonitoringLocationName', the groups could be -#' 'Upper Red Lake: West', 'Upper Red Lake: West-Central', and 'Upper Red Lake: East Central'. +#' Data_6Tribes_5y_Harmonized if group_col is 'TADA.MonitoringLocationName', the groups could be +#' 'UPPER RED LAKE: WEST', 'UPPER RED LAKE: WEST-CENTRAL', and 'UPPER RED LAKE: EAST CENTRAL'. #' #' @return A plotly scatterplot(s) figure with one x-axis (Date/Time) and a #' left axis showing the units of a single TADA.ComparableDataIdentifier plotted on the same @@ -1188,12 +1250,12 @@ TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableD #' # Filter the example data so it includes only one TADA.ComparableDataIdentifier #' df <- dplyr::filter(Data_6Tribes_5y_Harmonized, TADA.ComparableDataIdentifier %in% c("TOTAL PHOSPHORUS, MIXED FORMS_UNFILTERED_AS P_UG/L")) #' # Creates a scatterplot of the three specified sites of interest in the same plot. -#' TADA_GroupedScatterplot(df, group_col = "MonitoringLocationName", groups = c("Upper Red Lake: West", "Upper Red Lake: West-Central", "Upper Red Lake: East Central")) +#' TADA_GroupedScatterplot(df, group_col = "TADA.MonitoringLocationName", groups = c("UPPER RED LAKE: WEST", "UPPER RED LAKE: WEST-CENTRAL", "UPPER RED LAKE: EAST CENTRAL")) #' -#' # If no groups are selected, return the 4 groups (by MonitoringLocationName) with the greatest number of results -#' TADA_GroupedScatterplot(df, group_col = "MonitoringLocationName") +#' # If no groups are selected, return the 4 groups (by TADA.MonitoringLocationName) with the greatest number of results +#' TADA_GroupedScatterplot(df, group_col = "TADA.MonitoringLocationName") #' -TADA_GroupedScatterplot <- function(.data, group_col = "MonitoringLocationName", groups = NULL) { +TADA_GroupedScatterplot <- function(.data, group_col = "TADA.MonitoringLocationName", groups = NULL) { # check .data is data.frame TADA_CheckType(.data, "data.frame", "Input object") @@ -1204,7 +1266,7 @@ TADA_GroupedScatterplot <- function(.data, group_col = "MonitoringLocationName", "TADA.ResultMeasure.MeasureUnitCode", "ActivityStartDate", "ActivityStartDateTime", - "MonitoringLocationName" + "TADA.MonitoringLocationName" ) # add user-selected group_col to list of required columns @@ -1303,7 +1365,7 @@ TADA_GroupedScatterplot <- function(.data, group_col = "MonitoringLocationName", plot.data <- as.data.frame(.data) # this subset must include all fields included in plot hover below - plot.data <- subset(plot.data, plot.data[, group_col] %in% groups)[, unique(c(group_col, reqcols, depthcols, "TADA.ComparableDataIdentifier", "ActivityStartDateTime", "MonitoringLocationName", "TADA.ActivityMediaName", "ActivityMediaSubdivisionName", "ActivityRelativeDepthName", "TADA.CharacteristicName", "TADA.MethodSpeciationName", "TADA.ResultSampleFractionText"))] + plot.data <- subset(plot.data, plot.data[, group_col] %in% groups)[, unique(c(group_col, reqcols, depthcols, "TADA.ComparableDataIdentifier", "ActivityStartDateTime", "TADA.MonitoringLocationName", "TADA.ActivityMediaName", "ActivityMediaSubdivisionName", "ActivityRelativeDepthName", "TADA.CharacteristicName", "TADA.MethodSpeciationName", "TADA.ResultSampleFractionText"))] plot.data <- dplyr::arrange(plot.data, ActivityStartDate) @@ -1397,7 +1459,7 @@ TADA_GroupedScatterplot <- function(.data, group_col = "MonitoringLocationName", "Result:", paste0(param[[j]]$TADA.ResultMeasureValue, " ", param[[j]]$TADA.ResultMeasure.MeasureUnitCode), "
", "Activity Start Date:", param[[j]]$ActivityStartDate, "
", "Activity Start Date Time:", param[[j]]$ActivityStartDateTime, "
", - "Monitoring Location Name:", param[[j]]$MonitoringLocationName, "
", + "Monitoring Location Name:", param[[j]]$TADA.MonitoringLocationName, "
", "Media:", param[[j]]$TADA.ActivityMediaName, "
", "Media Subdivision:", param[[j]]$ActivityMediaSubdivisionName, "
", "Result Depth:", paste0( diff --git a/R/Filtering.R b/R/Filtering.R index f09626fb..26af29b2 100644 --- a/R/Filtering.R +++ b/R/Filtering.R @@ -44,7 +44,7 @@ TADA_FieldCounts <- function(.data, display = c("key", "most", "all"), character "ActivityMediaSubdivisionName", "ActivityCommentText", "ResultCommentText", - "MonitoringLocationTypeName", + "TADA.MonitoringLocationTypeName", "StateCode", "OrganizationFormalName", "TADA.CharacteristicName", @@ -79,8 +79,10 @@ TADA_FieldCounts <- function(.data, display = c("key", "most", "all"), character "ActivityRelativeDepthName", "ProjectIdentifier", "ProjectName", + "TADA.MonitoringLocationIdentifier", "MonitoringLocationIdentifier", "MonitoringLocationName", + "MonitoringLocationTypeName", "ActivityCommentText", "SampleAquifer", "HydrologicCondition", @@ -112,7 +114,6 @@ TADA_FieldCounts <- function(.data, display = c("key", "most", "all"), character "ResultDetectionQuantitationLimitUrl", "DetectionQuantitationLimitTypeName", "ProviderName", - "MonitoringLocationTypeName", "MonitoringLocationDescriptionText", "HUCEightDigitCode", "HorizontalCollectionMethodName", @@ -191,10 +192,14 @@ TADA_FieldValuesTable <- function(.data, field = "null", characteristicName = "n if (!field %in% names(.data)) { stop("Field input does not exist in dataset. Please populate the 'field' argument with a valid field name. Enter ?TADA_FieldValuesTable in console for more information.") } + + # change NAs to "NA" (character string) + .data[[field]][is.na(.data[[field]])] <- "NA" # filter to characteristic if provided if (!characteristicName %in% c("null")) { - .data <- subset(.data, .data$TADA.CharacteristicName %in% c(characteristicName)) + .data <- .data %>% + dplyr::filter(TADA.CharacteristicName %in% characteristicName) if (dim(.data)[1] < 1) { stop("Characteristic name(s) provided are not contained within the input dataset. Note that TADA converts characteristic names to ALL CAPS for easier harmonization.") } @@ -278,11 +283,9 @@ TADA_AnalysisDataFilter <- function(.data, # import MonitoringLocationTypeNames and TADA.Media.Flags sw.sitetypes <- utils::read.csv(system.file("extdata", "WQXMonitoringLocationTypeNameRef.csv", package = "EPATADA")) %>% dplyr::select(Name, TADA.Media.Flag) %>% - dplyr::rename( - ML.Media.Flag = TADA.Media.Flag, - MonitoringLocationTypeName = Name - ) - + dplyr::rename(ML.Media.Flag = TADA.Media.Flag) %>% + dplyr::mutate(MonitoringLocationTypeName = toupper(Name)) %>% + dplyr::select(-Name) # add TADA.Media.Flag column .data <- .data %>% @@ -300,7 +303,7 @@ TADA_AnalysisDataFilter <- function(.data, ActivityMediaSubdivisionName == "Surface Water" ~ "Surface Water", !ActivityMediaName %in% c("WATER", "Water", "water") ~ ActivityMediaName )) %>% - # add TADA.Media.Flag for additional rows based on MonitoringLocationTypeName + # add TADA.Media.Flag for additional rows based on TADA.MonitoringLocationTypeName dplyr::left_join(sw.sitetypes, by = "MonitoringLocationTypeName") %>% dplyr::mutate( TADA.Media.Flag = ifelse(is.na(TADA.Media.Flag), diff --git a/R/RequiredCols.R b/R/RequiredCols.R index d54daaac..67387a2c 100644 --- a/R/RequiredCols.R +++ b/R/RequiredCols.R @@ -170,7 +170,9 @@ require.cols <- c( "StateCode", "CountyCode", "MonitoringLocationName", # required + "TADA.MonitoringLocationName", # generated "MonitoringLocationTypeName", + "TADA.MonitoringLocationTypeName", #generated "MonitoringLocationDescriptionText", "LatitudeMeasure", "TADA.LatitudeMeasure", # generated diff --git a/R/Tables.R b/R/Tables.R index f5bb154c..9a98e6ad 100644 --- a/R/Tables.R +++ b/R/Tables.R @@ -21,7 +21,7 @@ TADA_SummarizeColumn <- function(.data, col = "TADA.CharacteristicName") { wqp_summary <- .data %>% dplyr::group_by(summ) %>% dplyr::summarize( - n_sites = length(unique(MonitoringLocationIdentifier)), + n_sites = length(unique(TADA.MonitoringLocationIdentifier)), n_records = length(TADA.ResultMeasureValue), .groups = "drop" ) %>% @@ -51,7 +51,7 @@ TADA_SummarizeColumn <- function(.data, col = "TADA.CharacteristicName") { #' columns 'TADA.ResultMeasureValue', 'TADA.ResultMeasure.MeasureUnitCode', #' 'TADA.ResultSampleFractionText', 'TADA.MethodSpeciationName', #' 'TADA.ComparableDataIdentifier', 'TADA.CensoredData.Flag', -#' 'DetectionQuantitationLimitTypeName', and 'MonitoringLocationIdentifier' to +#' 'DetectionQuantitationLimitTypeName', and 'TADA.MonitoringLocationIdentifier' to #' run this function. The 'TADA.ComparableDataIdentifier' can be added to the #' data frame by running the function TADA_CreateComparableID(). #' @@ -59,8 +59,8 @@ TADA_SummarizeColumn <- function(.data, col = "TADA.CharacteristicName") { #' 'TADA.ComparableDataIdentifier' as a grouping column. However, the user may #' want to summarize their dataset by additional grouping columns. For #' example, a user may want to create a summary table where each row is -#' specific to one comparable data identifier AND one monitoring location. -#' This input would look like: group_cols = c("MonitoringLocationIdentifier") +#' specific to one comparable data identifier AND one TADA monitoring location. +#' This input would look like: group_cols = c("TADA.MonitoringLocationIdentifier") #' #' @return stats table #' @@ -92,7 +92,7 @@ TADA_Stats <- function(.data, group_cols = c("TADA.ComparableDataIdentifier")) { dplyr::filter(!is.na(TADA.ResultMeasureValue)) %>% dplyr::group_by(dplyr::across(dplyr::all_of(group_cols))) %>% dplyr::summarize( - Location_Count = length(unique(MonitoringLocationIdentifier)), + Location_Count = length(unique(TADA.MonitoringLocationIdentifier)), Measurement_Count = length(unique(ResultIdentifier)), Non_Detect_Count = length(TADA.CensoredData.Flag[TADA.CensoredData.Flag %in% c("Non-Detect")]), Non_Detect_Pct = length(TADA.CensoredData.Flag[TADA.CensoredData.Flag %in% c("Non-Detect")]) / length(TADA.CensoredData.Flag) * 100, diff --git a/R/Transformations.R b/R/Transformations.R index affc79de..7b88cfb1 100644 --- a/R/Transformations.R +++ b/R/Transformations.R @@ -233,7 +233,7 @@ TADA_HarmonizeSynonyms <- function(.data, ref, np_speciation = TRUE) { #' @param .data TADA dataframe, ideally harmonized using TADA_HarmonizeSynonyms. #' If user wants to consider grouping N or P subspecies across multiple #' organizations, user should have run TADA_FindNearbySites and grouped all -#' nearby sites to one common MonitoringLocationIdentifier, +#' nearby sites to one common TADA.MonitoringLocationIdentifier, #' TADA.LatitudeMeasure, TADA.LongitudeMeasure, etc. #' @param sum_ref Optional. A custom summation reference dataframe the user has #' loaded into the R environment. Dataframe must have same columns as default @@ -267,7 +267,7 @@ TADA_CalculateTotalNP <- function(.data, sum_ref, daily_agg = c("max", "min", "m "TADA.ResultMeasure.MeasureUnitCode", "TADA.ResultMeasureValue", "ActivityStartDate", - "MonitoringLocationIdentifier", + "TADA.MonitoringLocationIdentifier", "ActivityTypeCode" ) TADA_CheckColumns(.data, expected_cols = req_cols) @@ -293,7 +293,7 @@ TADA_CalculateTotalNP <- function(.data, sum_ref, daily_agg = c("max", "min", "m "ActivityStartDate", # "ActivityStartDateTime", #does not make sense to include for daily agg "ActivityRelativeDepthName", - "MonitoringLocationIdentifier", + "TADA.MonitoringLocationIdentifier", "MonitoringLocationName", "TADA.LongitudeMeasure", "TADA.LatitudeMeasure", @@ -317,7 +317,7 @@ TADA_CalculateTotalNP <- function(.data, sum_ref, daily_agg = c("max", "min", "m thecols <- grpcols[!grpcols %in% c("TADA.ComparableDataIdentifier")] # # find nearby sites - # nearsites = unique(sum_dat[,c("MonitoringLocationIdentifier","TADA.LatitudeMeasure","TADA.LongitudeMeasure")]) + # nearsites = unique(sum_dat[,c("TADA.MonitoringLocationIdentifier","TADA.LatitudeMeasure","TADA.LongitudeMeasure")]) # nearsites = TADA_FindNearbySites(nearsites) # nearsites = subset(nearsites, !nearsites$TADA.NearbySiteGroups%in%c("No nearby sites")) diff --git a/R/Utilities.R b/R/Utilities.R index a0a5860d..8f24bc2d 100644 --- a/R/Utilities.R +++ b/R/Utilities.R @@ -87,7 +87,8 @@ utils::globalVariables(c( "palette.colors", "rect", "rgb", "text", "CodeNoSpeciation", "ResultMeasure.MeasureUnitCode.Upper", "TADA.MonitoringLocationIdentifier", "StringA", "StringB", "MeasureUnitCode.match", "TADA.ActivityTopDepthHeightMeasure.MeasureValue", "group_id", "time_diff_lead", "time_diff_lag", - "NResults", "missing.group", "TADA.PairingGroup", "TADA.PairingGroup.Rank", "timediff" + "NResults", "missing.group", "TADA.PairingGroup", "TADA.PairingGroup.Rank", "timediff", + "TADA.MonitoringLocationName", "TADA.MonitoringLocationTypeName" )) # global variables for tribal feature layers used in TADA_OverviewMap in Utilities.R @@ -168,6 +169,9 @@ VATribeUrl <- "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer #' TADA.ActivityBottomDepthHeightMeasure.MeasureUnitCode (character) #' TADA.LatitudeMeasure (numeric) #' TADA.LongitudeMeasure (numeric) +#' TADA.MonitoringLocationIdentifier (character) +#' TADA.MonitoringLocationName (character) +#' TADA.MonitoringLocationTypeName (character) #' #' Please note that the number of TADA-specific depth columns in the returned #' dataframe depends upon the number of depth columns with one or more results @@ -260,6 +264,27 @@ TADA_AutoClean <- function(.data) { # create uppercase version of original ResultMeasure.MeasureUnitCode .data$TADA.ResultMeasure.MeasureUnitCode <- toupper(.data$ResultMeasure.MeasureUnitCode) } + + if ("TADA.MonitoringLocationIdentifier" %in% colnames(.data)) { + .data <- .data + } else { + # create uppercase version of original MonitoringLocationIdentifier + .data$TADA.MonitoringLocationIdentifier <- toupper(.data$MonitoringLocationIdentifier) + } + + if ("TADA.MonitoringLocationName" %in% colnames(.data)) { + .data <- .data + } else { + # create uppercase version of original MonitoringLocationName + .data$TADA.MonitoringLocationName <- toupper(.data$MonitoringLocationName) + } + + if ("TADA.MonitoringLocationTypeName" %in% colnames(.data)) { + .data <- .data + } else { + # create uppercase version of original MonitoringLocationName + .data$TADA.MonitoringLocationTypeName <- toupper(.data$MonitoringLocationTypeName) + } # Transform "Dissolved oxygen (DO)" characteristic name to "DISSOLVED OXYGEN SATURATION" IF # result unit is "%" or "% SATURATN". @@ -734,7 +759,7 @@ TADA_CreateComparableID <- function(.data) { #' #' @param delimiter Character argument The character used to delimit the string passed in #' delimited_string. Defaults to a comma. -#' +#' #' @return String. #' #' @export @@ -758,9 +783,27 @@ TADA_FormatDelimitedString <- function(delimited_string, delimiter = ",") { #' @param .data TADA dataframe OR TADA sites dataframe #' @param dist_buffer Numeric. The maximum distance (in meters) two sites can be #' from one another to be considered "nearby" and grouped together. +#' +#'@param org_hierarchy Vector of organization identifiers that acts as the order in which the +#' function should select representative metadata for grouped sites based on the organization +#' that collected the data. If left blank, the function does not factor organization in to the +#' metadata selection process. When a vector is provided, the metadata will first be selected by +#' organization and the "meta_select" argument will only be applied in cases where more than +#' one set of metadata per site grouping are available from the highest ranking organization +#' available. +#' +#' @param meta_select Character argument to determine how metadata should be selected if no +#' org_hierarchy is specified or if multiple options for metadata from the same organization +#' exist. Options are "oldest", which selects the metadata associated with the oldest result from +#' the grouped nearby sites, "newest", which selects the metadata associated with the newest +#' result from the grouped nearby sites, "count" which selects the metadata associated with the +#' greatest number of results, and "random" which selects random metadata from the site group. +#' The default is meta_select = "random". #' #' @return Input dataframe with a TADA.MonitoringLocationIdentifier column that indicates -#' the nearby site groups each monitoring location belongs to. +#' the nearby site groups each monitoring location belongs to. Related metadata, including +#' TADA.MonitoringLocationName, TADA.LatitudeMeasure, TADA.LongitudeMeasure, and +#' TADA.MonitoringLocationTypeName are added to the input df. #' #' @export #' @@ -774,7 +817,10 @@ TADA_FindNearbySites <- function(.data, dist_buffer = 100) { TADA_CheckColumns(.data, required_cols) # create spatial dataset based on sites - data_sf <- unique(.data[, c("MonitoringLocationIdentifier", "TADA.LongitudeMeasure", "TADA.LatitudeMeasure")]) + data_sf <- .data %>% + dplyr::select("MonitoringLocationIdentifier", "TADA.LongitudeMeasure", "TADA.LatitudeMeasure") %>% + unique() + # convert to sf object data_sf <- sf::st_as_sf(data_sf, coords = c("TADA.LongitudeMeasure", "TADA.LatitudeMeasure"), @@ -816,36 +862,45 @@ TADA_FindNearbySites <- function(.data, dist_buffer = 100) { # get unique groups (since represented multiple times for each site looped through, above) groups <- unique(groups) - if (dim(groups)[1] > 0) { # if there are groups of nearby sites... - # create group ID's for easier understanding - # grp <- data.frame(TADA.SiteGroup = unique(groups$TADA.SiteGroup), TADA.SiteGroupID = paste0("Group_", 1:length(unique(groups$TADA.SiteGroup)))) - # groups <- merge(groups, grp, all.x = TRUE) - # groups <- unique(groups[, !names(groups) %in% c("TADA.SiteGroup")]) + if (dim(groups)[1] > 0) { + # create group id numbers + group_ids <- groups %>% + dplyr::group_by(TADA.MonitoringLocationIdentifier) %>% + dplyr::mutate(TADA.SiteGroup = dplyr::cur_group_id()) %>% + dplyr::ungroup() %>% + dplyr::group_by(MonitoringLocationIdentifier) %>% + dplyr::mutate(TADA.MonitoringLocationIdentifier = paste(TADA.MonitoringLocationIdentifier, collapse = ","), + TADA.SiteGroup = paste(TADA.SiteGroup, collapse = ",")) %>% + dplyr::distinct() %>% + dplyr::ungroup() # find any sites within multiple groups - summ_sites <- groups %>% + summ_sites <- group_ids %>% dplyr::group_by(MonitoringLocationIdentifier) %>% dplyr::mutate(GroupCount = 1:length(MonitoringLocationIdentifier)) # pivot wider if a site belongs to multiple groups - groups_wide <- merge(groups, summ_sites, all.x = TRUE) - groups_wide <- tidyr::pivot_wider(groups_wide, id_cols = "MonitoringLocationIdentifier", names_from = "GroupCount", names_prefix = "TADA.MonitoringLocationIdentifier", values_from = "TADA.MonitoringLocationIdentifier") + groups_prep <- merge(group_ids, summ_sites, all.x = TRUE) + groups_wide <- tidyr::pivot_wider(groups_prep, id_cols = "MonitoringLocationIdentifier", names_from = "GroupCount", names_prefix = "TADA.MonitoringLocationIdentifier", values_from = "TADA.MonitoringLocationIdentifier") + ids_wide <- tidyr::pivot_wider(groups_prep, id_cols = "MonitoringLocationIdentifier", names_from = "GroupCount", names_prefix = "TADA.SiteGroup", values_from = "TADA.SiteGroup") # merge data to site groupings .data <- merge(.data, groups_wide, all.x = TRUE) + .data <- merge(.data, ids_wide, all.x = TRUE) # concatenate and move site id cols to right place grpcols <- names(.data)[grepl("TADA.MonitoringLocationIdentifier", names(.data))] + idcols <- names (.data)[grepl("TADA.SiteGroup", names(.data))] - .data <- .data %>% tidyr::unite(col = TADA.MonitoringLocationIdentifier, dplyr::all_of(grpcols), sep = ", ", na.rm = TRUE) - } - - if (!"TADA.MonitoringLocationIdentifier" %in% colnames(.data)) { - .data$TADA.MonitoringLocationIdentifier <- NA + .data <- .data %>% + tidyr::unite(col = TADA.MonitoringLocationIdentifier.New, dplyr::all_of(grpcols), sep = ", ", na.rm = TRUE) %>% + dplyr::mutate(TADA.MonitoringLocationIdentifier = ifelse(!is.na(TADA.MonitoringLocationIdentifier.New), + TADA.MonitoringLocationIdentifier.New, TADA.MonitoringLocationIdentifier)) + + .data <- .data %>% + tidyr::unite(col = TADA.SiteGroup, dplyr::all_of(idcols), sep = ", ", na.rm = TRUE) %>% + dplyr::mutate(TADA.SiteGroup = ifelse(TADA.SiteGroup == "", "No nearby sites", TADA.SiteGroup)) } - .data <- .data %>% - dplyr::mutate(TADA.MonitoringLocationIdentifier = ifelse(TADA.MonitoringLocationIdentifier == "", MonitoringLocationIdentifier, TADA.MonitoringLocationIdentifier)) - if (dim(groups)[1] == 0) { # #if no groups, give a TADA.MonitoringLocationIdentifier column filled with NA print("No nearby sites detected using input buffer distance.") } @@ -858,18 +913,129 @@ TADA_FindNearbySites <- function(.data, dist_buffer = 100) { return(.data) } + # select and assign metadata randomly for grouped sites when meta_select equals "random" + + if(meta_select == "random") { + + select_meta <- grouped_sites %>% + dplyr::select(TADA.MonitoringLocationIdentifier.New1, TADA.MonitoringLocationName, + TADA.LatitudeMeasure, TADA.LongitudeMeasure, TADA.MonitoringLocationTypeName) %>% + dplyr::distinct() %>% + dplyr::group_by(TADA.MonitoringLocationIdentifier.New1) %>% + dplyr::slice_sample(n = 1) %>% + dplyr::rename(TADA.MonitoringLocationName.New = TADA.MonitoringLocationName, + TADA.LatitudeMeasure.New = TADA.LatitudeMeasure, + TADA.LongitudeMeasure.New = TADA.LongitudeMeasure, + TADA.MonitoringLocationTypeName.New = TADA.MonitoringLocationTypeName) %>% + dplyr::mutate(TADA.NearbySites.Flag = "This monitoring location was grouped with other nearby site(s). Metadata were selected randomly") + + } + + if(meta_select == "oldest") { + + select_meta <- grouped_sites %>% + dplyr::select(TADA.MonitoringLocationIdentifier.New1, TADA.MonitoringLocationName, + TADA.LatitudeMeasure, TADA.LongitudeMeasure, TADA.MonitoringLocationTypeName, + ActivityStartDate) %>% + dplyr::distinct() %>% + dplyr::group_by(TADA.MonitoringLocationIdentifier.New1) %>% + dplyr::slice_min(ActivityStartDate) %>% + dplyr::slice_sample(n = 1) %>% + dplyr::rename(TADA.MonitoringLocationName.New = TADA.MonitoringLocationName, + TADA.LatitudeMeasure.New = TADA.LatitudeMeasure, + TADA.LongitudeMeasure.New = TADA.LongitudeMeasure, + TADA.MonitoringLocationTypeName.New = TADA.MonitoringLocationTypeName) %>% + dplyr::mutate(TADA.NearbySites.Flag = "This monitoring location was grouped with other nearby site(s). Metadata were selected from the oldest result available.") + + } + + if(meta_select == "newest") { + + select_meta <- grouped_sites %>% + dplyr::select(TADA.MonitoringLocationIdentifier.New1, TADA.MonitoringLocationName, + TADA.LatitudeMeasure, TADA.LongitudeMeasure, TADA.MonitoringLocationTypeName, + ActivityStartDate) %>% + dplyr::distinct() %>% + dplyr::group_by(TADA.MonitoringLocationIdentifier.New1) %>% + dplyr::slice_max(ActivityStartDate) %>% + dplyr::slice_sample(n = 1) %>% + dplyr::rename(TADA.MonitoringLocationName.New = TADA.MonitoringLocationName, + TADA.LatitudeMeasure.New = TADA.LatitudeMeasure, + TADA.LongitudeMeasure.New = TADA.LongitudeMeasure, + TADA.MonitoringLocationTypeName.New = TADA.MonitoringLocationTypeName) %>% + dplyr::mutate(TADA.NearbySites.Flag = "This monitoring location was grouped with other nearby site(s). Metadata were selected from the newest result available.") + + } + + if(meta_select == "count") { + + select_meta <- grouped_sites %>% + dplyr::group_by(TADA.MonitoringLocationIdentifier.New1) %>% + dplyr::mutate(NCount = length(TADA.ResultMeasureValue)) %>% + dplyr::select(TADA.MonitoringLocationIdentifier, TADA.MonitoringLocationName, + TADA.LatitudeMeasure, TADA.LongitudeMeasure, TADA.MonitoringLocationTypeName, + NCount) %>% + dplyr::distinct() %>% + dplyr::group_by(TADA.MonitoringLocationIdentifier) %>% + dplyr::slice_max(NCount) %>% + dplyr::slice_sample(n = 1) %>% + dplyr::rename(TADA.MonitoringLocationName.New = TADA.MonitoringLocationName, + TADA.LatitudeMeasure.New = TADA.LatitudeMeasure, + TADA.LongitudeMeasure.New = TADA.LongitudeMeasure, + TADA.MonitoringLocationTypeName.New = TADA.MonitoringLocationTypeName) %>% + dplyr::mutate(TADA.NearbySites.Flag = "This monitoring location was grouped with other nearby site(s). Metadata were selected from the newest result available.") + + } + + .data <- .data %>% + dplyr::full_join(select_meta, by = dplyr::join_by(TADA.MonitoringLocationIdentifier.New1)) + + .data <- .data %>% + dplyr::ungroup() %>% + dplyr::mutate(TADA.MonitoringLocationName = ifelse(!ResultIdentifier %in% grouped_resultids, + TADA.MonitoringLocationName, TADA.MonitoringLocationName.New), + TADA.LatitudeMeasure = ifelse(!ResultIdentifier %in% grouped_resultids, + TADA.LatitudeMeasure, TADA.LatitudeMeasure.New), + TADA.LongitudeMeasure = ifelse(!ResultIdentifier %in% grouped_resultids, + TADA.LongitudeMeasure, TADA.LongitudeMeasure.New), + TADA.MonitoringLocationTypeName = ifelse(!ResultIdentifier %in% grouped_resultids, + TADA.MonitoringLocationTypeName, TADA.MonitoringLocationTypeName.New), + TADA.MonitoringLocationIdentifier = ifelse(TADA.MonitoringLocationIdentifier.New1 == "", + TADA.MonitoringLocationIdentifier, TADA.MonitoringLocationIdentifier.New1)) %>% + dplyr::select(-TADA.MonitoringLocationIdentifier.New1, -TADA.MonitoringLocationName.New, + -TADA.LatitudeMeasure.New, -TADA.LongitudeMeasure.New, + -TADA.MonitoringLocationTypeName.New) + } + + if (dim(groups)[1] == 0) { + + print("No nearby sites detected using input buffer distance.") + + .data <- .data %>% + dplyr::mutate(TADA.NearbySites.Flag = "No nearby sites detected using input buffer distance.") + } + + .data <- TADA_OrderCols(.data) %>% + dplyr::mutate(TADA.NearbySites.Flag = ifelse(is.na(TADA.NearbySites.Flag), + "No nearby sites detected using input buffer distance.", + TADA.NearbySites.Flag)) + + return(.data) +} + #' Get grouped monitoring stations that are near each other #' #' This function takes a TADA dataset that contains grouped nearby monitoring stations #' and returns a unique dataset of the original MonitoringLocationIdentifier, the grouped -#' TADA.MonitoringLocationIdentifier, TADA.LongitudeMeasure, and TADA.LatitudeMeasure, -#' filtered for only those stations that have a nearby station. +#' TADA.MonitoringLocationIdentifier, as well as the original and TADA-prefixed LongitudeMeasure, +#' LatitudeMeasure, MonitoringLocationName, and MonitoringLocationTypeName, filtered for only those +#' stations that have a nearby station. #' #' @param .data TADA dataframe #' -#' @return New dataframe with unique values for MonitoringLocationIdentifier, -#' TADA.MonitoringLocationIdentifier, TADA.LongitudeMeasure, and TADA.LatitudeMeasure +#' @return New dataframe with unique combinations of original and TADA MonitoringLocationIdentifier, +#' LongitudeMeasure, LatitudeMeasure, MonitoringLocationName, and MonitoringLocationTypeName. #' #' @export #' @@ -878,13 +1044,24 @@ TADA_GetUniqueNearbySites <- function(.data) { TADA_CheckType(.data, "data.frame", "Input object") # .data required columns - required_cols <- c("MonitoringLocationIdentifier", "TADA.MonitoringLocationIdentifier", "TADA.LongitudeMeasure", "TADA.LatitudeMeasure") + required_cols <- c("MonitoringLocationIdentifier", "TADA.MonitoringLocationIdentifier", + "MonitoringLocationName", "TADA.MonitoringLocationName", + "LongitudeMeasure", "TADA.LongitudeMeasure", + "LatitudeMeasure", "TADA.LatitudeMeasure", + "MonitoringLocationTypeName", "TADA.MonitoringLocationTypeName", + "MonitoringLocationDescriptionText", "TADA.NearbySites.Flag") # check .data has required columns TADA_CheckColumns(.data, required_cols) - .data <- .data[c("MonitoringLocationIdentifier", "MonitoringLocationName", "MonitoringLocationTypeName", "MonitoringLocationDescriptionText", "TADA.MonitoringLocationIdentifier", "TADA.LongitudeMeasure", "TADA.LatitudeMeasure")] - .data <- unique(dplyr::filter(.data, grepl(",", TADA.MonitoringLocationIdentifier))) - + # filter only for locations with nearby sites + .data <- .data %>% + dplyr::filter(!is.na(TADA.NearbySites.Flag), + TADA.NearbySites.Flag != "No nearby sites detected using input buffer distance.") %>% + # retain only required columns + dplyr::select(dplyr::all_of(required_cols)) %>% + # retain only unique records + dplyr::distinct() + return(.data) } @@ -1000,7 +1177,7 @@ TADA_RandomTestingData <- function(number_of_days = 1, choose_random_state = FAL #' # Select maximum value per day, site, comparable data identifier, result detection condition, #' # and activity type code. Clean all non-maximum measurements from grouped data. #' Data_6Tribes_5y_agg <- TADA_AggregateMeasurements(Data_6Tribes_5y, -#' grouping_cols = c("ActivityStartDate", "MonitoringLocationIdentifier", +#' grouping_cols = c("ActivityStartDate", "TADA.MonitoringLocationIdentifier", #' "TADA.ComparableDataIdentifier", "ResultDetectionConditionText", #' "ActivityTypeCode"), #' agg_fun = "max", clean = TRUE) @@ -1008,11 +1185,11 @@ TADA_RandomTestingData <- function(number_of_days = 1, choose_random_state = FAL #' # Calculate a mean value per day, site, comparable data identifier, result detection condition, #' # and activity type code. Keep all measurements used to calculate mean measurement. #' Data_6Tribes_5y_agg <- TADA_AggregateMeasurements(Data_6Tribes_5y, -#' grouping_cols = c("ActivityStartDate", "MonitoringLocationIdentifier", +#' grouping_cols = c("ActivityStartDate", "TADA.MonitoringLocationIdentifier", #' "TADA.ComparableDataIdentifier", "ResultDetectionConditionText", #' "ActivityTypeCode"), #' agg_fun = "mean", clean = FALSE) -TADA_AggregateMeasurements <- function(.data, grouping_cols = c("ActivityStartDate", "MonitoringLocationIdentifier", "TADA.ComparableDataIdentifier", "ResultDetectionConditionText", "ActivityTypeCode"), agg_fun = c("max", "min", "mean"), clean = TRUE) { +TADA_AggregateMeasurements <- function(.data, grouping_cols = c("ActivityStartDate", "TADA.MonitoringLocationIdentifier", "TADA.ComparableDataIdentifier", "ResultDetectionConditionText", "ActivityTypeCode"), agg_fun = c("max", "min", "mean"), clean = TRUE) { TADA_CheckColumns(.data, grouping_cols) agg_fun <- match.arg(agg_fun) diff --git a/R/test.R b/R/test.R new file mode 100644 index 00000000..b57baa36 --- /dev/null +++ b/R/test.R @@ -0,0 +1,100 @@ +TADA_FindNearbySites <- function(.data, dist_buffer = 100) { + # check .data is data.frame + TADA_CheckType(.data, "data.frame", "Input object") + + # .data required columns + required_cols <- c("MonitoringLocationIdentifier", "TADA.LongitudeMeasure", "TADA.LatitudeMeasure") + # check .data has required columns + TADA_CheckColumns(.data, required_cols) + + # create spatial dataset based on sites + data_sf <- .data %>% + dplyr::select("MonitoringLocationIdentifier", "TADA.LongitudeMeasure", "TADA.LatitudeMeasure") %>% + unique() + + # convert to sf object + data_sf <- sf::st_as_sf(data_sf, + coords = c("TADA.LongitudeMeasure", "TADA.LatitudeMeasure"), + # Change to your CRS + crs = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs" + ) + # create a distance matrix in meters + dist.mat <- data.frame(sf::st_distance(data_sf)) # Great Circle distance since in lat/lon + + row.names(dist.mat) <- data_sf$MonitoringLocationIdentifier + colnames(dist.mat) <- data_sf$MonitoringLocationIdentifier + + # convert distances to those within buffer (1) and beyond buffer (0) + dist.mat1 <- apply(dist.mat, c(1, 2), function(x) { + if (x <= dist_buffer) { + x <- 1 + } else { + x <- 0 + } + }) + + # create empty dataframe for groups + groups <- data.frame() + + # loop through distance matrix and extract site groups that are within the buffer distance from one another + for (i in 1:dim(dist.mat1)[1]) { + fsite <- rownames(dist.mat1)[i] # focal site + dat <- data.frame(Count = dist.mat1[i, ]) # get focal site count row as a column + dat$MonitoringLocationIdentifier <- colnames(dist.mat1) # give df site names along with counts + sites <- dat$MonitoringLocationIdentifier[dat$Count == 1] # filter to sites within buffer + sites1 <- sites[!sites %in% fsite] # get site list within buffer that does not include focal site + if (length(sites1) > 0) { # if this list is greater than 0, combine sites within buffer into data frame + df <- data.frame(MonitoringLocationIdentifier = sites, TADA.MonitoringLocationIdentifier = paste0(sites, collapse = ",")) + df[c("TADA.MonitoringLocationIdentifier")] <- lapply(df[c("TADA.MonitoringLocationIdentifier")], TADA_FormatDelimitedString) + groups <- plyr::rbind.fill(groups, df) + } + } + + # get unique groups (since represented multiple times for each site looped through, above) + groups <- unique(groups) + + if (dim(groups)[1] > 0) { + # create group id numbers + group_ids <- groups %>% + dplyr::group_by(TADA.MonitoringLocationIdentifier) %>% + dplyr::mutate(TADA.SiteGroup = dplyr::cur_group_id()) %>% + dplyr::ungroup() %>% + dplyr::group_by(MonitoringLocationIdentifier) %>% + dplyr::mutate(TADA.MonitoringLocationIdentifier = paste(TADA.MonitoringLocationIdentifier, collapse = ","), + TADA.SiteGroup = paste(TADA.SiteGroup, collapse = ",")) %>% + dplyr::distinct() %>% + dplyr::ungroup() + } + + # find any sites within multiple groups + summ_sites <- group_ids %>% + dplyr::group_by(MonitoringLocationIdentifier) %>% + dplyr::mutate(GroupCount = 1:length(MonitoringLocationIdentifier)) + + # pivot wider if a site belongs to multiple groups + groups_prep <- merge(group_ids, summ_sites, all.x = TRUE) + groups_wide <- tidyr::pivot_wider(groups_prep, id_cols = "MonitoringLocationIdentifier", names_from = "GroupCount", names_prefix = "TADA.MonitoringLocationIdentifier", values_from = "TADA.MonitoringLocationIdentifier") + ids_wide <- tidyr::pivot_wider(groups_prep, id_cols = "MonitoringLocationIdentifier", names_from = "GroupCount", names_prefix = "TADA.SiteGroup", values_from = "TADA.SiteGroup") + # merge data to site groupings + .data <- merge(.data, groups_wide, all.x = TRUE) + .data <- merge(.data, ids_wide, all.x = TRUE) + + # concatenate and move site id cols to right place + grpcols <- names(.data)[grepl("TADA.MonitoringLocationIdentifier", names(.data))] + idcols <- names (.data)[grepl("TADA.SIteGroup", names(.data))] + + .data <- .data %>% tidyr::unite(col = TADA.MonitoringLocationIdentifier.New, dplyr::all_of(grpcols), sep = ", ", na.rm = TRUE) + .data <- .data %>% tidyr::unite(col = TADA.SiteGroup, dplyr::all_of(idcols), sep = ", ", na.rm = TRUE) + + if (dim(groups)[1] == 0) { # #if no groups, give a TADA.MonitoringLocationIdentifier column filled with NA + print("No nearby sites detected using input buffer distance.") + } + + # order columns + if ("ResultIdentifier" %in% names(.data)) { + .data <- TADA_OrderCols(.data) + } + + return(.data) +} + diff --git a/data/Data_6Tribes_5y.rda b/data/Data_6Tribes_5y.rda index c86ea92a..8f06dfd3 100644 Binary files a/data/Data_6Tribes_5y.rda and b/data/Data_6Tribes_5y.rda differ diff --git a/data/Data_6Tribes_5y_Harmonized.rda b/data/Data_6Tribes_5y_Harmonized.rda index a84ab235..fb58f968 100644 Binary files a/data/Data_6Tribes_5y_Harmonized.rda and b/data/Data_6Tribes_5y_Harmonized.rda differ diff --git a/data/Data_NCTCShepherdstown_HUC12.rda b/data/Data_NCTCShepherdstown_HUC12.rda index 21d5766e..37624819 100644 Binary files a/data/Data_NCTCShepherdstown_HUC12.rda and b/data/Data_NCTCShepherdstown_HUC12.rda differ diff --git a/data/Data_Nutrients_UT.rda b/data/Data_Nutrients_UT.rda index 98684fcb..d3c049d5 100644 Binary files a/data/Data_Nutrients_UT.rda and b/data/Data_Nutrients_UT.rda differ diff --git a/data/Data_R5_TADAPackageDemo.rda b/data/Data_R5_TADAPackageDemo.rda index 9d2fe716..80e3d2aa 100644 Binary files a/data/Data_R5_TADAPackageDemo.rda and b/data/Data_R5_TADAPackageDemo.rda differ diff --git a/inst/WORDLIST b/inst/WORDLIST index 79616f83..26e2c881 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -323,6 +323,8 @@ dataRetrieval datetimes depthcat devtools +df +dlist dplyr du eXchange @@ -412,6 +414,7 @@ waterdata waterqualitydata webserviceurl wikipedia +wqp wqx writeLayer www diff --git a/man/TADA_AggregateMeasurements.Rd b/man/TADA_AggregateMeasurements.Rd index 6fece3d5..2fe2760f 100644 --- a/man/TADA_AggregateMeasurements.Rd +++ b/man/TADA_AggregateMeasurements.Rd @@ -6,7 +6,7 @@ \usage{ TADA_AggregateMeasurements( .data, - grouping_cols = c("ActivityStartDate", "MonitoringLocationIdentifier", + grouping_cols = c("ActivityStartDate", "TADA.MonitoringLocationIdentifier", "TADA.ComparableDataIdentifier", "ResultDetectionConditionText", "ActivityTypeCode"), agg_fun = c("max", "min", "mean"), clean = TRUE @@ -46,7 +46,7 @@ data(Data_6Tribes_5y) # Select maximum value per day, site, comparable data identifier, result detection condition, # and activity type code. Clean all non-maximum measurements from grouped data. Data_6Tribes_5y_agg <- TADA_AggregateMeasurements(Data_6Tribes_5y, - grouping_cols = c("ActivityStartDate", "MonitoringLocationIdentifier", + grouping_cols = c("ActivityStartDate", "TADA.MonitoringLocationIdentifier", "TADA.ComparableDataIdentifier", "ResultDetectionConditionText", "ActivityTypeCode"), agg_fun = "max", clean = TRUE) @@ -54,7 +54,7 @@ Data_6Tribes_5y_agg <- TADA_AggregateMeasurements(Data_6Tribes_5y, # Calculate a mean value per day, site, comparable data identifier, result detection condition, # and activity type code. Keep all measurements used to calculate mean measurement. Data_6Tribes_5y_agg <- TADA_AggregateMeasurements(Data_6Tribes_5y, - grouping_cols = c("ActivityStartDate", "MonitoringLocationIdentifier", + grouping_cols = c("ActivityStartDate", "TADA.MonitoringLocationIdentifier", "TADA.ComparableDataIdentifier", "ResultDetectionConditionText", "ActivityTypeCode"), agg_fun = "mean", clean = FALSE) diff --git a/man/TADA_AutoClean.Rd b/man/TADA_AutoClean.Rd index bc6d23a5..5cbddaa2 100644 --- a/man/TADA_AutoClean.Rd +++ b/man/TADA_AutoClean.Rd @@ -38,6 +38,9 @@ TADA.ActivityBottomDepthHeightMeasure.MeasureValueDataTypes.Flag (character) TADA.ActivityBottomDepthHeightMeasure.MeasureUnitCode (character) TADA.LatitudeMeasure (numeric) TADA.LongitudeMeasure (numeric) +TADA.MonitoringLocationIdentifier (character) +TADA.MonitoringLocationName (character) +TADA.MonitoringLocationTypeName (character) Please note that the number of TADA-specific depth columns in the returned dataframe depends upon the number of depth columns with one or more results diff --git a/man/TADA_Boxplot.Rd b/man/TADA_Boxplot.Rd index 27505e54..422304d6 100644 --- a/man/TADA_Boxplot.Rd +++ b/man/TADA_Boxplot.Rd @@ -49,11 +49,11 @@ Boxplots_TPbyOrg[[2]] # Create multiple boxplots with additional grouping columns and view the first # plot in list. In this example, we will group data in the input dataframe -# by both the TADA.ComparableDataIdentifier and the MonitoringLocationTypeName +# by both the TADA.ComparableDataIdentifier and the TADA.MonitoringLocationTypeName # (e.g. stream, reservoir, canal, etc.) # Load example data frame: data(Data_Nutrients_UT) -Boxplot_output <- TADA_Boxplot(Data_Nutrients_UT, id_cols = c("TADA.ComparableDataIdentifier", "MonitoringLocationTypeName")) +Boxplot_output <- TADA_Boxplot(Data_Nutrients_UT, id_cols = c("TADA.ComparableDataIdentifier", "TADA.MonitoringLocationTypeName")) # This example generates 32 box plots. Boxplot_output[[2]] Boxplot_output[[25]] diff --git a/man/TADA_CalculateTotalNP.Rd b/man/TADA_CalculateTotalNP.Rd index 5d480509..83ae9854 100644 --- a/man/TADA_CalculateTotalNP.Rd +++ b/man/TADA_CalculateTotalNP.Rd @@ -10,7 +10,7 @@ TADA_CalculateTotalNP(.data, sum_ref, daily_agg = c("max", "min", "mean")) \item{.data}{TADA dataframe, ideally harmonized using TADA_HarmonizeSynonyms. If user wants to consider grouping N or P subspecies across multiple organizations, user should have run TADA_FindNearbySites and grouped all -nearby sites to one common MonitoringLocationIdentifier, +nearby sites to one common TADA.MonitoringLocationIdentifier, TADA.LatitudeMeasure, TADA.LongitudeMeasure, etc.} \item{sum_ref}{Optional. A custom summation reference dataframe the user has diff --git a/man/TADA_DepthProfilePlot.Rd b/man/TADA_DepthProfilePlot.Rd index 79babfd8..7a9de924 100644 --- a/man/TADA_DepthProfilePlot.Rd +++ b/man/TADA_DepthProfilePlot.Rd @@ -29,8 +29,8 @@ For example, the groups could be 'DISSOLVED OXYGEN (DO)_NA_NA_UG/L' and 'PH_NA_N These groups will be specific to your data frame. The TADA_IDDepthProfiles can be used to identify available groups.} -\item{location}{A single MonitoringLocationIdentifier to plot the depth profile. -A MonitoringLocationIdentifier must be entered or an error will be returned and +\item{location}{A single TADA.MonitoringLocationIdentifier to plot the depth profile. +A TADA.MonitoringLocationIdentifier must be entered or an error will be returned and no depth profile will be created.} \item{activity_date}{The date the depth profile results were collected.} @@ -52,7 +52,7 @@ depth units should be used for the plot. Default is "m".} } \value{ A depth profile plot displaying up to three parameters for a single -MonitoringLocationIdentifier. Displaying depth categories is optional with the +TADA.MonitoringLocationIdentifier. Displaying depth categories is optional with the depthcat argument. } \description{ diff --git a/man/TADA_FindNearbySites.Rd b/man/TADA_FindNearbySites.Rd index ff7d785b..cac9f66c 100644 --- a/man/TADA_FindNearbySites.Rd +++ b/man/TADA_FindNearbySites.Rd @@ -4,17 +4,40 @@ \alias{TADA_FindNearbySites} \title{Identify and group nearby monitoring locations (UNDER ACTIVE DEVELOPMENT)} \usage{ -TADA_FindNearbySites(.data, dist_buffer = 100) +TADA_FindNearbySites( + .data, + dist_buffer = 100, + meta_select = "random", + org_hierarchy = "none" +) } \arguments{ \item{.data}{TADA dataframe OR TADA sites dataframe} \item{dist_buffer}{Numeric. The maximum distance (in meters) two sites can be from one another to be considered "nearby" and grouped together.} + +\item{meta_select}{Character argument to determine how metadata should be selected if no +org_hierarchy is specified or if multiple options for metadata from the same organization +exist. Options are "oldest", which selects the metadata associated with the oldest result from +the grouped nearby sites, "newest", which selects the metadata associated with the newest +result from the grouped nearby sites, "count" which selects the metadata associated with the +greatest number of results, and "random" which selects random metadata from the site group. +The default is meta_select = "random".} + +\item{org_hierarchy}{Vector of organization identifiers that acts as the order in which the +function should select representative metadata for grouped sites based on the organization +that collected the data. If left blank, the function does not factor organization in to the +metadata selection process. When a vector is provided, the metadata will first be selected by +organization and the "meta_select" argument will only be applied in cases where more than +one set of metadata per site grouping are available from the highest ranking organization +available.} } \value{ Input dataframe with a TADA.MonitoringLocationIdentifier column that indicates -the nearby site groups each monitoring location belongs to. +the nearby site groups each monitoring location belongs to. Related metadata, including +TADA.MonitoringLocationName, TADA.LatitudeMeasure, TADA.LongitudeMeasure, and +TADA.MonitoringLocationTypeName are added to the input df. } \description{ This function takes a TADA dataset and creates a distance matrix for all diff --git a/man/TADA_FlagDepthCategory.Rd b/man/TADA_FlagDepthCategory.Rd index c0ebdd50..a82c9ce0 100644 --- a/man/TADA_FlagDepthCategory.Rd +++ b/man/TADA_FlagDepthCategory.Rd @@ -43,12 +43,12 @@ will still be determined.} "max". The default is dailyagg = "none". When dailyagg = "none", all results will be retained. When dailyagg == "avg", the mean value in each group of results (as determined by the depth category) will be identified or calculated for each -MonitoringLocation, ActivityDate, Organization ID, and TADA.CharacteristicName combination. +TADA.MonitoringLocation, ActivityDate, Organization ID, and TADA.CharacteristicName combination. When dailyagg == "min" or when dailyagg == "max", the min or max value in each group of results (as determined by the depth category) will -be identified or calculated for each MonitoringLocation, ActivityDate, and TADA.CharacteristicName -combination. An additional column, TADA.DepthProfileAggregation.Flag will be added -to describe aggregation.} +be identified or calculated for each TADA.MonitoringLocation, ActivityDate, and +TADA.CharacteristicName combination. An additional column, TADA.DepthProfileAggregation.Flag will +be added to describe aggregation.} \item{aggregatedonly}{Boolean argument with options "TRUE" or "FALSE". The default is aggregatedonly = "FALSE" which means that all results are returned. @@ -78,7 +78,7 @@ bottom up to 2m (or user specified value) from bottom = "Bottom", and all depths in between the Surface and Bottom are assigned to the "Middle" category. } \details{ -When more than one result is available for a MonitoringLocationIdentifier, +When more than one result is available for a TADA.MonitoringLocationIdentifier, ActivityStartDate, OrganizationIdentifier, and TADA.CharacteristicName, the user can choose a single result value (average, max, or min value) to use for that day and location. If results vary with depth, the user may also define whether diff --git a/man/TADA_FlaggedSitesMap.Rd b/man/TADA_FlaggedSitesMap.Rd index ce2e43bf..725fd9d1 100644 --- a/man/TADA_FlaggedSitesMap.Rd +++ b/man/TADA_FlaggedSitesMap.Rd @@ -4,13 +4,19 @@ \alias{TADA_FlaggedSitesMap} \title{Create Flagged Sites Map} \usage{ -TADA_FlaggedSitesMap(.data) +TADA_FlaggedSitesMap(.data, identifier = "tada") } \arguments{ \item{.data}{TADA data frame containing the data downloaded from the WQP, where each row represents a unique data record. Data frame must include the columns -'MonitoringLocationIdentifier','MonitoringLocationName','TADA.LatitudeMeasure', +'TADA.MonitoringLocationIdentifier','TADA.MonitoringLocationName','TADA.LatitudeMeasure', and 'TADA.LongitudeMeasure' to run this function.} + +\item{identifier}{A character argument to select whether the TADA.MonitoringLocationIdentifier +(which may included grouped sites if TADA_FindNearbySites has been run) or the original WQP +MonitoringLocationIdentifier and associated coordinates are used for mapping. Identifier equals +"tada" is the default and will used the TADA prefixed monitoring location columns. Identifier +equals "wqp" will use the originals.} } \value{ A leaflet map that shows all sites in the data frame that contain diff --git a/man/TADA_GetUniqueNearbySites.Rd b/man/TADA_GetUniqueNearbySites.Rd index 0432ddd5..f8fa60bb 100644 --- a/man/TADA_GetUniqueNearbySites.Rd +++ b/man/TADA_GetUniqueNearbySites.Rd @@ -10,12 +10,13 @@ TADA_GetUniqueNearbySites(.data) \item{.data}{TADA dataframe} } \value{ -New dataframe with unique values for MonitoringLocationIdentifier, -TADA.MonitoringLocationIdentifier, TADA.LongitudeMeasure, and TADA.LatitudeMeasure +New dataframe with unique combinations of original and TADA MonitoringLocationIdentifier, +LongitudeMeasure, LatitudeMeasure, MonitoringLocationName, and MonitoringLocationTypeName. } \description{ This function takes a TADA dataset that contains grouped nearby monitoring stations and returns a unique dataset of the original MonitoringLocationIdentifier, the grouped -TADA.MonitoringLocationIdentifier, TADA.LongitudeMeasure, and TADA.LatitudeMeasure, -filtered for only those stations that have a nearby station. +TADA.MonitoringLocationIdentifier, as well as the original and TADA-prefixed LongitudeMeasure, +LatitudeMeasure, MonitoringLocationName, and MonitoringLocationTypeName, filtered for only those +stations that have a nearby station. } diff --git a/man/TADA_GroupedScatterplot.Rd b/man/TADA_GroupedScatterplot.Rd index d6e53116..386139e7 100644 --- a/man/TADA_GroupedScatterplot.Rd +++ b/man/TADA_GroupedScatterplot.Rd @@ -6,7 +6,7 @@ \usage{ TADA_GroupedScatterplot( .data, - group_col = "MonitoringLocationName", + group_col = "TADA.MonitoringLocationName", groups = NULL ) } @@ -14,7 +14,7 @@ TADA_GroupedScatterplot( \item{.data}{TADA data frame where each row represents a unique record. Data frame must include the columns 'TADA.ComparableDataIdentifier', 'TADA.ResultMeasureValue', 'TADA.ResultMeasure.MeasureUnitCode', 'ActivityStartDate', 'ActivityStartDateTime', -'ActivityStartDateTime', 'MonitoringLocationName', 'TADA.ActivityMediaName', +'ActivityStartDateTime', 'TADA.MonitoringLocationName', 'TADA.ActivityMediaName', 'ActivityMediaSubdivisionName', 'TADA.ResultDepthHeightMeasure.MeasureValue', 'TADA.ResultDepthHeightMeasure.MeasureValue', 'TADA.ResultDepthHeightMeasure.MeasureUnitCode', 'ActivityRelativeDepthName', 'TADA.ActivityDepthHeightMeasure.MeasureValue', @@ -23,15 +23,15 @@ the columns 'TADA.ComparableDataIdentifier', 'TADA.ResultMeasureValue', and TADA.ActivityBottomDepthHeightMeasure.MeasureUnitCode to run this function.} \item{group_col}{The column in the dataset used to identify the groups -plotted. Defaults to MonitoringLocationName. This input is flexible, and allows for the use of +plotted. Defaults to TADA.MonitoringLocationName. This input is flexible, and allows for the use of other identifiers such as StateCode, CountyCode or user-created groups based on concatenation of other variables (e.g. characteristic name, site type, site name, year, organization, etc.)} \item{groups}{A vector of up to four identifiers from the id_cols column to specify the groups that will be plotted for a TADA.ComparableDataIdentifier. These groups will be specific to your dataset. For example, in the example data set -Data_6Tribes_5y_Harmonized if group_col is 'MonitoringLocationName', the groups could be -'Upper Red Lake: West', 'Upper Red Lake: West-Central', and 'Upper Red Lake: East Central'.} +Data_6Tribes_5y_Harmonized if group_col is 'TADA.MonitoringLocationName', the groups could be +'UPPER RED LAKE: WEST', 'UPPER RED LAKE: WEST-CENTRAL', and 'UPPER RED LAKE: EAST CENTRAL'.} } \value{ A plotly scatterplot(s) figure with one x-axis (Date/Time) and a @@ -60,9 +60,9 @@ data(Data_6Tribes_5y_Harmonized) # Filter the example data so it includes only one TADA.ComparableDataIdentifier df <- dplyr::filter(Data_6Tribes_5y_Harmonized, TADA.ComparableDataIdentifier \%in\% c("TOTAL PHOSPHORUS, MIXED FORMS_UNFILTERED_AS P_UG/L")) # Creates a scatterplot of the three specified sites of interest in the same plot. -TADA_GroupedScatterplot(df, group_col = "MonitoringLocationName", groups = c("Upper Red Lake: West", "Upper Red Lake: West-Central", "Upper Red Lake: East Central")) +TADA_GroupedScatterplot(df, group_col = "TADA.MonitoringLocationName", groups = c("UPPER RED LAKE: WEST", "UPPER RED LAKE: WEST-CENTRAL", "UPPER RED LAKE: EAST CENTRAL")) -# If no groups are selected, return the 4 groups (by MonitoringLocationName) with the greatest number of results -TADA_GroupedScatterplot(df, group_col = "MonitoringLocationName") +# If no groups are selected, return the 4 groups (by TADA.MonitoringLocationName) with the greatest number of results +TADA_GroupedScatterplot(df, group_col = "TADA.MonitoringLocationName") } diff --git a/man/TADA_Histogram.Rd b/man/TADA_Histogram.Rd index f7c2bd9f..62535620 100644 --- a/man/TADA_Histogram.Rd +++ b/man/TADA_Histogram.Rd @@ -43,10 +43,10 @@ TADA_Histogram(df, id_cols = "TADA.ComparableDataIdentifier") # Create multiple histograms with additional grouping columns and view the first # plot in list. In this example, we will group by both TADA.ComparableDataIdentifier -# and MonitoringLocationTypeName (e.g. stream, reservoir, canal, etc.) +# and TADA.MonitoringLocationTypeName (e.g. stream, reservoir, canal, etc.) # Load example data frame: data(Data_Nutrients_UT) -Histogram_output <- TADA_Histogram(Data_Nutrients_UT, id_cols = c("TADA.ComparableDataIdentifier", "MonitoringLocationTypeName")) +Histogram_output <- TADA_Histogram(Data_Nutrients_UT, id_cols = c("TADA.ComparableDataIdentifier", "TADA.MonitoringLocationTypeName")) # This example generates 32 histograms Histogram_output[[10]] Histogram_output[[25]] diff --git a/man/TADA_IDDepthProfiles.Rd b/man/TADA_IDDepthProfiles.Rd index 9a30681c..5480acda 100644 --- a/man/TADA_IDDepthProfiles.Rd +++ b/man/TADA_IDDepthProfiles.Rd @@ -10,7 +10,7 @@ TADA_IDDepthProfiles(.data, nresults = TRUE, nvalue = 2, aggregates = FALSE) \item{.data}{TADA dataframe which must include the columns ActivityStartDate, TADA.ConsolidatedDepth, TADA.ConsolidatedDepth.Unit, TADA.ConsolidatedDepth.Bottom, TADA.ResultMeasureValue, TADA.ResultMeasureValue.UnitCode, -OrganizationIdentifier, MonitoringLocationName, MonitoringLocationIdentifier, +OrganizationIdentifier, TADA.MonitoringLocationName, TADA.MonitoringLocationIdentifier, and TADA.ComparableDataIdentifier.} \item{nresults}{Boolean argument with options "TRUE" or "FALSE". The @@ -21,7 +21,7 @@ When nresults = FALSE.} \item{nvalue}{numeric argument to specify the number of results required to identify a depth profile. The default is 2, which means that a depth profile will be identified if 2 or more results at different depths exists for the same ActivityStartDate, -MonitoringLocationIdentifier, OrganizationIdentifier, and TADA.ComparableDataIdentifier. +TADA.MonitoringLocationIdentifier, OrganizationIdentifier, and TADA.ComparableDataIdentifier. A few characteristics are excluded from this requirement because they are expected to have only a single result in depth units (ex: secchi disk depth).} @@ -34,8 +34,8 @@ FALSE. When aggregates = TRUE, all aggregate values are included when identifyin depth profile data.} } \value{ -A dataframe with the columns MonitoringLocationIdentifier, -MonitoringLocationName, OrganizationIdentifier, ActivityStartDate, +A dataframe with the columns TADA.MonitoringLocationIdentifier, +TADA.MonitoringLocationName, OrganizationIdentifier, ActivityStartDate, TADA.CharacteristicsForDepthProfile. Based on the user input for the nresults param, TADA.CharacteristicsForDepthProfile may or may not contain the number of results for each characteristic. @@ -44,7 +44,7 @@ of results for each characteristic. This function identifies depth profiles within a data frame to assist the user in selecting params for TADA_DepthProfilePlot. A TADA compatible data set is required. If TADA_FlagDepthCategory has not yet been run, it will be run as part of this -function. The output data frame is grouped by MonitoringLocationIdentifier, +function. The output data frame is grouped by TADA.MonitoringLocationIdentifier, OrganizationIdentifier, and ActivityStartDate. } \details{ diff --git a/man/TADA_OverviewMap.Rd b/man/TADA_OverviewMap.Rd index 9a11adbc..80a1aff8 100644 --- a/man/TADA_OverviewMap.Rd +++ b/man/TADA_OverviewMap.Rd @@ -4,14 +4,20 @@ \alias{TADA_OverviewMap} \title{Create Overview Map} \usage{ -TADA_OverviewMap(.data) +TADA_OverviewMap(.data, identifier = "tada") } \arguments{ \item{.data}{TADA data frame containing the data downloaded from the WQP, where each row represents a unique data record. Data frame must include the columns -'MonitoringLocationIdentifier','MonitoringLocationName','TADA.LatitudeMeasure', +'TADA.MonitoringLocationIdentifier','TADA.MonitoringLocationName','TADA.LatitudeMeasure', 'TADA.LongitudeMeasure', 'ResultIdentifier', 'ActivityStartDate', 'TADA.CharacteristicName', and 'OrganizationIdentifier' to run this function.} + +\item{identifier}{A character argument to select whether the TADA.MonitoringLocationIdentifier +(which may included grouped sites is TADA_FindNearbySites has been run) or the original WQP +MonitoringLocationIdentifier and associated coordinates are used for mapping. Identifier equals +"tada" is the default and will used the TADA prefixed monitoring location columns. Identifier +equals "wqp" will use the originals.} } \value{ A leaflet map that shows all sites in the data frame, where larger point sizes diff --git a/man/TADA_PairForCriteriaCalc.Rd b/man/TADA_PairForCriteriaCalc.Rd index 83e893ba..a573903b 100644 --- a/man/TADA_PairForCriteriaCalc.Rd +++ b/man/TADA_PairForCriteriaCalc.Rd @@ -20,16 +20,18 @@ in the pairing ref. } \description{ This function pairs TADA results with results from specified characteristics from the same -MonitoringLocation within a user-specified time window to facilitate the calculation of numeric -criteria. The columns created by TADA_AutoClean are required to run this function. If they are not -present in the data frame, the function will stop and print an error message. +TADA.MonitoringLocation within a user-specified time window to facilitate the calculation of +numeric criteria. The columns created by TADA_AutoClean are required to run this function. If +they are not present in the data frame, the function will stop and print an error message. } \details{ Users can provide a pairing reference file (can be created using TADA_CreatePairRef) to specify which combinations of TADA.CharacteristicName, TADA.ResultMeasure.MeasureUnit, TADA.MethodSpeciationName, and TADA.ResultSampleFractionText should be used for hardness, pH, -temperature, salinity, chloride or other user-defined groups. If no ref is specified, all possible -combinations for hardness, pH, temperature, salinity and chloride will be used. +temperature, salinity, chloride or other user-defined groups. If no ref is specified, all +possible combinations for hardness, pH, temperature, salinity and chloride will be used. It is +highly reccomended that users perform all unit conversion and synonym harmonization before using +TADA_PairForCriteriaCalc. } \examples{ diff --git a/man/TADA_Scatterplot.Rd b/man/TADA_Scatterplot.Rd index cb53292b..1f1b60b4 100644 --- a/man/TADA_Scatterplot.Rd +++ b/man/TADA_Scatterplot.Rd @@ -43,14 +43,14 @@ TADA_Scatterplot(Data_6Tribes_5y_Harmonized, id_cols = "TADA.ComparableDataIdent df <- dplyr::filter(Data_6Tribes_5y_Harmonized, TADA.ComparableDataIdentifier == "TOTAL PHOSPHORUS, MIXED FORMS_UNFILTERED_AS P_UG/L") TADA_Scatterplot(df, id_cols = "TADA.ComparableDataIdentifier") # Creates a scatterplot for each monitoring location -TADA_Scatterplot(df, id_cols = c("TADA.ComparableDataIdentifier", "MonitoringLocationName")) +TADA_Scatterplot(df, id_cols = c("TADA.ComparableDataIdentifier", "TADA.MonitoringLocationName")) # Create multiple scatterplots with additional grouping columns and view the first # plot in list. In this example, we will group by both TADA.ComparableDataIdentifier -# and MonitoringLocationTypeName (e.g. stream, reservoir, canal, etc.) +# and TADA.MonitoringLocationTypeName (e.g. stream, reservoir, canal, etc.) # Load example dataset: data(Data_Nutrients_UT) -Scatterplot_output <- TADA_Scatterplot(Data_Nutrients_UT, id_cols = c("TADA.ComparableDataIdentifier", "MonitoringLocationTypeName")) +Scatterplot_output <- TADA_Scatterplot(Data_Nutrients_UT, id_cols = c("TADA.ComparableDataIdentifier", "TADA.MonitoringLocationTypeName")) # This example generates 47 scatterplots Scatterplot_output[[10]] Scatterplot_output[[25]] diff --git a/man/TADA_Stats.Rd b/man/TADA_Stats.Rd index 3ff272f5..ee288434 100644 --- a/man/TADA_Stats.Rd +++ b/man/TADA_Stats.Rd @@ -12,7 +12,7 @@ where each row represents a unique data record. Data frame must include the columns 'TADA.ResultMeasureValue', 'TADA.ResultMeasure.MeasureUnitCode', 'TADA.ResultSampleFractionText', 'TADA.MethodSpeciationName', 'TADA.ComparableDataIdentifier', 'TADA.CensoredData.Flag', -'DetectionQuantitationLimitTypeName', and 'MonitoringLocationIdentifier' to +'DetectionQuantitationLimitTypeName', and 'TADA.MonitoringLocationIdentifier' to run this function. The 'TADA.ComparableDataIdentifier' can be added to the data frame by running the function TADA_CreateComparableID().} @@ -20,8 +20,8 @@ data frame by running the function TADA_CreateComparableID().} 'TADA.ComparableDataIdentifier' as a grouping column. However, the user may want to summarize their dataset by additional grouping columns. For example, a user may want to create a summary table where each row is -specific to one comparable data identifier AND one monitoring location. -This input would look like: group_cols = c("MonitoringLocationIdentifier")} +specific to one comparable data identifier AND one TADA monitoring location. +This input would look like: group_cols = c("TADA.MonitoringLocationIdentifier")} } \value{ stats table diff --git a/man/TADA_TwoCharacteristicScatterplot.Rd b/man/TADA_TwoCharacteristicScatterplot.Rd index b55dd674..f3d775da 100644 --- a/man/TADA_TwoCharacteristicScatterplot.Rd +++ b/man/TADA_TwoCharacteristicScatterplot.Rd @@ -13,8 +13,8 @@ TADA_TwoCharacteristicScatterplot( \arguments{ \item{.data}{TADA data frame containing the data downloaded from the WQP, where each row represents a unique data record. Data frame must include the -columns 'TADA.ComparableDataIdentifier', 'TADA.ResultMeasureValue', and 'TADA.ResultMeasure.MeasureUnitCode' -to run this function.} +columns 'TADA.ComparableDataIdentifier', 'TADA.ResultMeasureValue', and +'TADA.ResultMeasure.MeasureUnitCode' to run this function.} \item{id_cols}{The column in the dataset used to identify the unique groups to be plotted. Defaults to 'TADA.ComparableDataIdentifier', which should be @@ -27,8 +27,8 @@ organization, etc.)} \item{groups}{A vector of two identifiers from the id_cols column. For example, if the id_cols is 'TADA.ComparableDataIdentifier', the groups could be 'DISSOLVED OXYGEN (DO)_NA_NA_UG/L' and 'PH_NA_NA_NA'. These groups will -be specific to your dataset. If the id_cols is 'MonitoringLocationName', -the groups could be 'Upper Red Lake: West' and 'Upper Red Lake: West-Central'.} +be specific to your dataset. If the id_cols is 'TADA.MonitoringLocationName', +the groups could be 'UPPER RED LAKE: WEST' and 'UPPER RED LAKE: WEST-CENTRAL'.} } \value{ A single plotly scatterplot figure with one x-axis (Date/Time) and a diff --git a/vignettes/TADAModule1_AdvancedTraining.Rmd b/vignettes/TADAModule1_AdvancedTraining.Rmd index 4e289735..b4d7e279 100644 --- a/vignettes/TADAModule1_AdvancedTraining.Rmd +++ b/vignettes/TADAModule1_AdvancedTraining.Rmd @@ -101,7 +101,7 @@ them (enter 1 into the console). ```{r install_TADA, eval = F, results = 'hide'} remotes::install_github("USEPA/EPATADA", - ref = "develop", + ref = "482-create-tadamonitoringlocationidentifier-in-tada_autoclean", dependencies = TRUE ) ``` @@ -623,7 +623,7 @@ functions and pipes. # get table of characteristics with number of results, sites, and organizations dataset_cens_summary <- dataset_cens %>% dplyr::group_by(TADA.CharacteristicName) %>% - dplyr::summarise(Result_Count = length(ResultIdentifier), Site_Count = length(unique(MonitoringLocationIdentifier)), Org_Count = length(unique(OrganizationIdentifier))) %>% + dplyr::summarise(Result_Count = length(ResultIdentifier), Site_Count = length(unique(TADA.MonitoringLocationIdentifier)), Org_Count = length(unique(OrganizationIdentifier))) %>% dplyr::arrange(desc(Result_Count)) ``` @@ -653,7 +653,7 @@ upon your program's goals and methods, you might want to filter out some of the types you see. ```{r location pie} -TADA_FieldValuesPie(dataset_cens, field = "MonitoringLocationTypeName") +TADA_FieldValuesPie(dataset_cens, field = "TADA.MonitoringLocationTypeName") ``` One of the next big steps is data harmonization: translating and diff --git a/vignettes/TADAModule1_BeginnerTraining.Rmd b/vignettes/TADAModule1_BeginnerTraining.Rmd index 418c6cd6..570f7703 100644 --- a/vignettes/TADAModule1_BeginnerTraining.Rmd +++ b/vignettes/TADAModule1_BeginnerTraining.Rmd @@ -83,7 +83,7 @@ console). ```{r install_TADA, eval = F, results = 'hide'} remotes::install_github("USEPA/EPATADA", - ref = "develop", + ref = "482-create-tadamonitoringlocationidentifier-in-tada_autoclean", dependencies = TRUE ) @@ -379,16 +379,17 @@ your program's goals and methods, you might want to further filter the monitoring location types in the data set. ```{r MonitoringLocations, fig.width=8, fig.height=6, fig.fullwidth=TRUE} -MonLocTypNam_Pie <- TADA_FieldValuesPie(R5Profile, field = "MonitoringLocationTypeName") +MonLocTypNam_Pie <- TADA_FieldValuesPie(R5Profile, field = "TADA.MonitoringLocationTypeName") MonLocTypNam_Pie ``` **Question 4: How many unique Monitoring Location Types -(MonitoringLocationTypeName) are present? Which is the most common?** +(TADA.MonitoringLocationTypeName) are present? Which is the most +common?** ```{r question4} -FieldValues_MLs_table <- TADA_FieldValuesTable(R5Profile, field = "MonitoringLocationTypeName") +FieldValues_MLs_table <- TADA_FieldValuesTable(R5Profile, field = "TADA.MonitoringLocationTypeName") mlt_n <- length(unique(FieldValues_MLs_table$Value)) @@ -401,7 +402,7 @@ the mlt_most_common[1] subset of results. ```{r MonitoringLocationsMostCommon} R5Profile <- R5Profile %>% - dplyr::filter(MonitoringLocationTypeName == dplyr::pull(mlt_most_common[1])) + dplyr::filter(TADA.MonitoringLocationTypeName == dplyr::pull(mlt_most_common[1])) ``` Next we can review station locations and summary information using the @@ -950,9 +951,9 @@ To start, review the list of parameters in the dataframe using the **TADA_FieldValuesTable** function. ```{r TADA_FieldValuesTable_chars, results = 'hide'} -Char_Pie <- FieldValuesTable_Chars <- TADA_FieldValuesTable(R5ProfileClean8, field = "TADA.CharacteristicName") +Char_Table <- FieldValuesTable_Chars <- TADA_FieldValuesTable(R5ProfileClean8, field = "TADA.CharacteristicName") -Char_Pie +Char_Table ``` Next, we can revisit the **TADA_FieldCounts** function at the @@ -1023,6 +1024,8 @@ included within each of the following fields: ```{r TADA_FieldCounts_char} R5_FieldCounts_Chars <- TADA_FieldCounts(R5ProfileClean8, display = "most", characteristicName = "DISSOLVED OXYGEN (DO)") + +R5_FieldCounts_Chars ``` Selecting a parameter generates the list above, which is subset by the @@ -1040,15 +1043,20 @@ we review values from the *SampleCollectionMethod.MethodName* field for Dissolved Oxygen (DO) results. ```{r DO_Method, fig.width=8, fig.height=6, fig.fullwidth=TRUE} -# C Create pie chart for SampleCollectionMethod.MethodName for Dissolved Oxygen (DO results) -DO_SCM_Pie <- TADA_FieldValuesPie(R5ProfileClean8, field = "SampleCollectionMethod.MethodName", characteristicName = "DISSOLVED OXYGEN (DO)") -DO_SCM_Pie +# # Create subset of DO data + DO_R5ProfileClean8 <- R5ProfileClean8 %>% dplyr::filter(TADA.CharacteristicName == "DISSOLVED OXYGEN (DO)") +# +# # Create pie chart for SampleCollectionMethod.MethodName for Dissolved Oxygen (DO results) + DO_SCM_Pie <- TADA_FieldValuesPie(R5ProfileClean8, field = "SampleCollectionMethod.MethodName", characteristicName = "DISSOLVED OXYGEN (DO)") +# + DO_SCM_Pie # Create table for SampleCollectionMethod.MethodName for Dissolved Oxygen (DO results) FieldValuesTable_DO_scm <- TADA_FieldValuesTable(R5ProfileClean8, field = "SampleCollectionMethod.MethodName", characteristicName = "DISSOLVED OXYGEN (DO)") ``` +\ Generate a scatterplot with two characteristics. ```{r review_identifiers} diff --git a/vignettes/WQXValidationService.Rmd b/vignettes/WQXValidationService.Rmd index 2034a931..f637b8f7 100644 --- a/vignettes/WQXValidationService.Rmd +++ b/vignettes/WQXValidationService.Rmd @@ -165,9 +165,10 @@ TADA leverages many of the WQX domain tables. [TADA_ConvertResultUnits()](https://usepa.github.io/EPATADA/reference/TADA_ConvertResultUnits.html) All TADA Reference and Validation Tables are also available in the R -Package [HERE](https://github.com/USEPA/EPATADA/tree/develop/inst/extdata). -TADA pulls the WQX Validation Table and other domain tables into TADA -and updates them automatically whenever changes are made +Package +[HERE](https://github.com/USEPA/EPATADA/tree/develop/inst/extdata). TADA +pulls the WQX Validation Table and other domain tables into TADA and +updates them automatically whenever changes are made -