Skip to content

Commit

Permalink
Merge pull request #206 from COMPASS-DOE/heatmap-simplify
Browse files Browse the repository at this point in the history
Simplify code and add comments
  • Loading branch information
stephpenn1 authored Sep 3, 2024
2 parents 83e5fec + 4313f92 commit 1d036ea
Show file tree
Hide file tree
Showing 7 changed files with 172 additions and 26 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

COMPASS-FME processing pipeline for environmental sensor data.

Latest data release is `v1-0`, 29 May 2024, with 172.6M observations.
Latest data release is `v1-1`, 05 August 2024, with 241.4M observations.

This work is supported by [COMPASS-FME](https://compass.pnnl.gov), a
multi-institutional project funded by the U.S. Department of Energy,
Expand Down
49 changes: 28 additions & 21 deletions availability_graph.R
Original file line number Diff line number Diff line change
@@ -1,37 +1,44 @@

library(tidyverse)

fls <- list.files("~/Documents/v1-1/", pattern = "*.csv$", full.names = TRUE, recursive = TRUE)
fls <- list.files("~/Documents/v1-1", pattern = "*.csv$", full.names = TRUE, recursive = TRUE)

# filter out TMP
fls[!grepl("TMP", fls)] -> fls

results <- list()

for(i in seq_along(fls)) {
message(basename(fls[i]))
for(f in fls) {
message(basename(f))

results[[basename(fls[i])]] <- readr::read_csv(fls[i]) %>% group_by(Site, Instrument, year(TIMESTAMP), month(TIMESTAMP)) %>% summarise(n = n())
results[[f]] <- readr::read_csv(f, col_types = "ccTccccdccii") %>%
mutate(ts_str = format(TIMESTAMP, "%b-%Y")) %>%
group_by(Site, Instrument, ts_str) %>%
summarise(n = n(),
# retain the timestamp for correct sorting later
TIMESTAMP = mean(TIMESTAMP),
.groups = "drop")
}

bind_rows(results) %>%
rename(Year = `year(TIMESTAMP)`, Month = `month(TIMESTAMP)`) %>%
arrange(Site, Year, Month) -> r

r %>% group_by(Site, Instrument, Year, Month) %>%
summarise(n = sum(n)) %>%
filter(Site =="GCW", !is.na(Instrument)) %>%
mutate(data_present = ifelse(n > 0, "Yes", "No"),
Month = month.abb[Month],
date = ym(paste(Year, Month))) %>%
arrange(date) %>%
mutate(
ts_str = format(date, "%b-%Y"),
ts_fct = factor(ts_str, levels = unique(ts_str))) %>%
select(-n) %>%
# each file is a site and plot; sum by site
group_by(Site, Instrument, ts_str) %>%
summarise(n = sum(n),
TIMESTAMP = mean(TIMESTAMP),
.groups = "drop") %>%
# not sure why this next line is here
filter(Site != "GCW", !is.na(Instrument)) %>%
mutate(data_present = if_else(n > 0, "Yes", "No")) %>%
# create the factor month-year
arrange(TIMESTAMP) %>%
mutate(ts_fct = factor(ts_str, levels = unique(ts_str))) %>%
# ...and plot
ggplot(aes(x = ts_fct, y = Instrument, fill = Instrument)) +
geom_raster(colour = "white", hjust = 0, vjust = 0) +
geom_raster(hjust = 0, vjust = 0) +
facet_wrap(~Site, ncol = 1, strip.position = "left") +
theme_minimal() +
scale_y_discrete(position = "right") +
theme(axis.text.x = element_text(angle = 90, vjust = -0.25, hjust = 1),
theme(axis.text.x = element_text(angle = 90, vjust = -0.8, hjust = 1),
axis.text.y = element_text(vjust = 1.25),
axis.ticks.y=element_blank(),
panel.grid.minor.y = element_blank(),
Expand All @@ -48,4 +55,4 @@ r %>% group_by(Site, Instrument, Year, Month) %>%
legend.text = element_text(size=16),
legend.title = element_text(size=18)) -> p

ggsave("~/Documents/synoptic_avail_GCW.png", height = 6, width = 15)
ggsave("~/Documents/synoptic_avail.png", height = 12, width = 15)
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"TOA5","TEMPEST_REDOX_fresh","CR1000","86614","CR1000.Std.31.03","CPU:COMPASS_Redox_Tempest_2024v1.CR1","14439","Redox15"
"TIMESTAMP","RECORD","Statname","BattV","RedoxRa(1)","RedoxRa(2)","RedoxRa(3)","RedoxRa(4)","RedoxRa(5)","RedoxRa(6)","RedoxRa(7)","RedoxRa(8)","RedoxRa(9)","RedoxRa(10)","RedoxRa(11)","RedoxRa(12)","RedoxRa(13)","RedoxRa(14)","RedoxRa(15)","RedoxRa(16)","RedoxRa(17)","RedoxRa(18)","RedoxRa(19)","RedoxRa(20)","RedoxRb(1)","RedoxRb(2)","RedoxRb(3)","RedoxRb(4)","RedoxRb(5)","RedoxRb(6)","RedoxRb(7)","RedoxRb(8)","RedoxRb(9)","RedoxRb(10)","RedoxRb(11)","RedoxRb(12)","RedoxRb(13)","RedoxRb(14)","RedoxRb(15)","RedoxRb(16)","RedoxRb(17)","RedoxRb(18)","RedoxRb(19)","RedoxRb(20)"
"TS","RN","","Volts","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV"
"","","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp"
"2024-06-30 18:30:00",5235,"NAN",11.93,929.1661,573.9766,542.5515,-36.52115,557.8394,537.2856,596.5688,632.9201,643.1121,480.3806,444.7087,508.5783,601.6648,628.3337,642.2627,623.2377,621.5391,599.7963,606.421,650.5861,930.3552,571.7684,542.8912,-31.76491,559.538,542.0419,600.4757,630.542,646.5093,476.3038,445.3882,507.8988,602.8538,629.0132,644.9806,625.9556,625.6158,599.2867,609.8184,647.3587
"2024-06-30 18:45:00",5236,"NAN",11.93,932.2237,571.5985,543.5707,-35.33209,558.5189,536.9459,597.7579,630.2022,647.1888,475.2846,447.9362,506.3701,603.0237,628.5036,642.9422,623.9172,624.7665,598.777,611.1772,647.6984,934.0922,571.5985,549.0063,-31.93478,559.7079,538.8144,596.5688,631.3913,644.4709,475.4545,448.9554,508.4084,607.1005,629.3529,646.8491,626.8049,623.7473,599.6264,608.2896,649.0574
"2024-06-30 19:00:00",5237,"NAN",11.93,929.1845,571.6099,547.1487,-34.48345,562.267,538.8251,599.2985,630.3846,644.144,475.1241,445.397,506.8897,604.7344,629.0256,647.7112,624.7789,621.3815,598.6191,606.433,649.0702,929.1845,572.9688,543.2417,-30.06685,558.0203,538.8251,597.0902,631.7435,646.1824,476.4831,446.4163,509.4377,603.885,630.3846,645.1632,626.6475,622.061,599.4684,607.792,650.5991
2 changes: 1 addition & 1 deletion synoptic/driver.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ source("helpers.R")
# Settings ----------------------------------------------------

ROOT <- "./data_TEST"
VERSION <- "1-0"
VERSION <- "1-1"

# Log file ----------------------------------------------------

Expand Down
93 changes: 93 additions & 0 deletions synoptic/flmd-generator.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# flmd-generator.R
# Generate file-level metadata for an L1 release
# https://github.com/ess-dive-community/essdive-file-level-metadata/
# BBL August 2024

library(tidyr)
library(dplyr)

# Need to run this script from within synoptic directory
if(basename(getwd()) != "synoptic") {
stop("Working directory needs to be synoptic/")
}

# Get all the files and fill in path and name data
library(tibble)
results <- tibble(file = list.files(path = "./data/L1/",
recursive = TRUE,
full.names = TRUE),
sort = seq_along(file),
File_Path = gsub("^\\./data/L1//", "", dirname(file)),
File_Name = basename(file)
)
results$file <- NULL
message("Found ", nrow(results), " files to process")

# Isolate the data files - pattern of xxx_xx_nnnnnnnn-nnnnnnnn_*.csv
message("Processing data files...")
data_files <- grep("^[A-Za-z]+_[A-Za-z]+_[0-9]{8}-[0-9]{8}.+\\.csv$", results$File_Name)
df <- results[data_files,]

find_start_end_dates <- function(x) {
x <- separate(x, File_Name, sep = "_", into = c("Site", "Plot", "Timerange","Level","version"), remove = FALSE)
x <- separate(x, Timerange, sep = "-", into = c("Start_Date", "End_Date"))
x$Start_Date <- as.Date(x$Start_Date, format = "%Y%m%d")
x$End_Date <- as.Date(x$End_Date, format = "%Y%m%d")
return(x)
}
df <- find_start_end_dates(df)
df$File_Description <- paste(format(df$Start_Date, "%b %Y"),
"sensor data for",
df$Plot,
"plot at",
df$Site,
"site")
df$Missing_Value_Codes <- "'NA'"

# Isolate the plot files
message("Processing plot files...")
plot_files <- grep("^[A-Za-z]+_[A-Za-z]+_[0-9]{8}-[0-9]{8}.+\\.pdf$", results$File_Name)
pf <- results[plot_files,]
pf <- find_start_end_dates(pf)
pf$File_Description <- paste("Plots of",
format(pf$Start_Date, "%b %Y"),
"sensor data for",
paste0(pf$Site, "-", pf$Plot))

# Isolate the site-year metadata files
message("Processing metadata files...")
metadata_files <- grep("metadata.txt$", results$File_Name)
mdf <- results[metadata_files,]
mdf$File_Description <- paste("Metadata for all data files in", mdf$File_Path, "folder")

# Isolate the special files (currently, sample R scripts)
message("Processing special files...")
special_files_info <-
tribble(~File_Name, ~File_Description,
"README_v1-1.txt", "Overall documentation file for the v1-1 release",
"README.md", "Minimal README about the folder",
"create-time-series.R", "Sample R code to create a time series from data",
"cumulative-observations.R", "Sample R code to plot cumulative observations")
special_files <- which(results$File_Name %in% special_files_info$File_Name)
sf <- results[special_files,]
sf <- left_join(sf, special_files_info, by = "File_Name")

# Other files
message("Checking other files...")
other_files <- results[-c(data_files, plot_files, metadata_files, special_files),]
if(nrow(other_files) > 0) {
print(other_files)
stop("There are 'other' files, i.e. with no description. You may need to ",
"update the 'special_files_info' in the script if this is a new data release")
}

bind_rows(df, pf, mdf, sf) %>%
arrange(sort) %>%
mutate(Standard = "") %>%
select(File_Name, File_Description, Standard, Start_Date,
End_Date, Missing_Value_Codes, File_Path) ->
flmd

readr::write_csv(flmd, "flmd.csv", na = "")

message("All done!")
6 changes: 3 additions & 3 deletions synoptic/metadata/L1_metadata/README_v1-1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ DATA STRUCTURE
—----------------------------------
Data are organized into {SITE_YEAR} folders, with up to 12 monthly
comma-separated value (CSV) files in each folder for each plot at that
site. Sites include CRC (Crane Creek), GCW (GCReW), GWI (Goodiwn
site. Sites include CRC (Crane Creek), GCW (GCReW), GWI (Goodwin
Island), MSM (Moneystump Marsh), OWC (Old Woman Creek), PTR (Portage
River), SWH (Sweet Hall Marsh), and TMP (TEMPEST experiment). See
site-specific metadata files in each folder.
Expand All @@ -56,8 +56,8 @@ change, only that your scripts using L1 data will probably still work.

CHANGELOG
—----------------------------------
Version 1-1 released 2024-XX-XX
* Covers late 2019 through June 2024 for TEMPEST and all synoptic sites
Version 1-1 released 2024-08-05
* Covers late 2019 through July 2024 for TEMPEST and all synoptic sites
* TEMPEST redox data now available starting April 2024
* Now includes high-frequency (1 and 5 min interval) data from TEMPEST floods

Expand Down
39 changes: 39 additions & 0 deletions synoptic/metadata/design_table.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2465,6 +2465,45 @@ TEMPEST_REDOX_salt,Redox5,"RedoxRb({2,6,10,14,18})",TMP,S,Redox,B,{1:5},F4,,redo
TEMPEST_REDOX_salt,Redox5,"RedoxRb({3,7,11,15,19})",TMP,S,Redox,B,{1:5},F4,,redox_35cm,
TEMPEST_REDOX_salt,Redox5,"RedoxRb({4,8,12,16,20})",TMP,S,Redox,B,{1:5},F4,,redox_55cm,
,,,,,,,,,,,
TEMPEST_REDOX_control,Redox15,Format,TMP,C,Redox,A,,,,,
TEMPEST_REDOX_control,Redox15,RECORD,TMP,C,Redox,A,,,,,
TEMPEST_REDOX_control,Redox15,Statname,TMP,C,Redox,A,,,,,
TEMPEST_REDOX_control,Redox15,BattV,TMP,C,Redox,A,,,,,
TEMPEST_REDOX_control,Redox15,"RedoxRa({1,5,9,13,17})",TMP,C,Redox,A,{1:5},,,,
TEMPEST_REDOX_control,Redox15,"RedoxRa({2,6,10,14,18})",TMP,C,Redox,A,{1:5},,,,
TEMPEST_REDOX_control,Redox15,"RedoxRa({3,7,11,15,19})",TMP,C,Redox,A,{1:5},,,,
TEMPEST_REDOX_control,Redox15,"RedoxRa({4,8,12,16,20})",TMP,C,Redox,A,{1:5},,,,
TEMPEST_REDOX_control,Redox15,"RedoxRb({1,5,9,13,17})",TMP,C,Redox,B,{1:5},,,,
TEMPEST_REDOX_control,Redox15,"RedoxRb({2,6,10,14,18})",TMP,C,Redox,B,{1:5},,,,
TEMPEST_REDOX_control,Redox15,"RedoxRb({3,7,11,15,19})",TMP,C,Redox,B,{1:5},,,,
TEMPEST_REDOX_control,Redox15,"RedoxRb({4,8,12,16,20})",TMP,C,Redox,B,{1:5},,,,
,,,,,,,,,,,
TEMPEST_REDOX_fresh,Redox15,Format,TMP,F,Redox,A,,,,,
TEMPEST_REDOX_fresh,Redox15,RECORD,TMP,F,Redox,A,,,,,
TEMPEST_REDOX_fresh,Redox15,Statname,TMP,F,Redox,A,,,,,
TEMPEST_REDOX_fresh,Redox15,BattV,TMP,F,Redox,A,,,,,
TEMPEST_REDOX_fresh,Redox15,"RedoxRa({1,5,9,13,17})",TMP,F,Redox,A,{1:5},,,,
TEMPEST_REDOX_fresh,Redox15,"RedoxRa({2,6,10,14,18})",TMP,F,Redox,A,{1:5},,,,
TEMPEST_REDOX_fresh,Redox15,"RedoxRa({3,7,11,15,19})",TMP,F,Redox,A,{1:5},,,,
TEMPEST_REDOX_fresh,Redox15,"RedoxRa({4,8,12,16,20})",TMP,F,Redox,A,{1:5},,,,
TEMPEST_REDOX_fresh,Redox15,"RedoxRb({1,5,9,13,17})",TMP,F,Redox,B,{1:5},,,,
TEMPEST_REDOX_fresh,Redox15,"RedoxRb({2,6,10,14,18})",TMP,F,Redox,B,{1:5},,,,
TEMPEST_REDOX_fresh,Redox15,"RedoxRb({3,7,11,15,19})",TMP,F,Redox,B,{1:5},,,,
TEMPEST_REDOX_fresh,Redox15,"RedoxRb({4,8,12,16,20})",TMP,F,Redox,B,{1:5},,,,
,,,,,,,,,,,
TEMPEST_REDOX_salt,Redox15,Format,TMP,S,Redox,A,,,,,
TEMPEST_REDOX_salt,Redox15,RECORD,TMP,S,Redox,A,,,,,
TEMPEST_REDOX_salt,Redox15,Statname,TMP,S,Redox,A,,,,,
TEMPEST_REDOX_salt,Redox15,BattV,TMP,S,Redox,A,,,,,
TEMPEST_REDOX_salt,Redox15,"RedoxRa({1,5,9,13,17})",TMP,S,Redox,A,{1:5},,,,
TEMPEST_REDOX_salt,Redox15,"RedoxRa({2,6,10,14,18})",TMP,S,Redox,A,{1:5},,,,
TEMPEST_REDOX_salt,Redox15,"RedoxRa({3,7,11,15,19})",TMP,S,Redox,A,{1:5},,,,
TEMPEST_REDOX_salt,Redox15,"RedoxRa({4,8,12,16,20})",TMP,S,Redox,A,{1:5},,,,
TEMPEST_REDOX_salt,Redox15,"RedoxRb({1,5,9,13,17})",TMP,S,Redox,B,{1:5},,,,
TEMPEST_REDOX_salt,Redox15,"RedoxRb({2,6,10,14,18})",TMP,S,Redox,B,{1:5},,,,
TEMPEST_REDOX_salt,Redox15,"RedoxRb({3,7,11,15,19})",TMP,S,Redox,B,{1:5},,,,
TEMPEST_REDOX_salt,Redox15,"RedoxRb({4,8,12,16,20})",TMP,S,Redox,B,{1:5},,,,
,,,,,,,,,,,
GCREW_MET,GCREW_MET_15min,Format,GCW,W,,,,,,,
GCREW_MET,GCREW_MET_15min,RECORD,GCW,W,,,,,,,
GCREW_MET,GCREW_MET_15min,Statname,GCW,W,,,,,,,
Expand Down

0 comments on commit 1d036ea

Please sign in to comment.