COMPASS-DOE · bpbond · May 8, 2024 · May 1, 2024 · May 6, 2024 · May 6, 2024
diff --git a/synoptic/L0.qmd b/synoptic/L0.qmd
@@ -6,9 +6,7 @@ params:
   html_outfile: "L0.html"
   DATA_ROOT: "data_TEST/"
   RAW: "Raw/"
-  RAW_DONE: "Raw_done/"
   L0: "L0/"
-  remove_input_files: false
   logfile: ""
   run_parallel: false
 date: now
@@ -31,15 +29,12 @@ This script
 
 -   Writes as CSV files with row/col/hash info in filename
 
--   Moves the raw files to a Raw_done folder
-
 ## Initializing
 
 ```{r init}
 #| include: false
 
 RAW <- file.path(params$DATA_ROOT, params$RAW)
-RAW_DONE <- file.path(params$DATA_ROOT, params$RAW_DONE)
 L0 <- file.path(params$DATA_ROOT, params$L0)
 
 library(tidyr)
@@ -60,8 +55,6 @@ I see `r length(files_to_process)` files to process in `r RAW`.
 
 Output directory is `r L0`.
 
-Moving done files to `r RAW_DONE`.
-
 HTML outfile is `r params$html_outfile`.
 
 Logfile is `r params$logfile`.
@@ -127,17 +120,11 @@ f <- function(fn, new_dir) {
             overwrites <<- overwrites + 1
         }
 
-        # Write the new file...
+        # Write the new file
         new_fqfn <- file.path(new_dir, new_fn)
         message("\tWriting ", new_fqfn)
         write.csv(dat_long, new_fqfn, row.names = FALSE)
         rm(dat_long)
-        # ...and move to 'Raw_done' folder
-        if(params$remove_input_files) {
-            message("\tArchiving raw input files")
-            file.copy(fn, file.path(params$raw_done, basefn), overwrite = FALSE)
-            file.remove(fn)
-        }
     }
 
     # Return an informational data frame about file processed, dimensions, etc.

diff --git a/synoptic/L1.qmd b/synoptic/L1.qmd
@@ -13,7 +13,6 @@ params:
   METADATA_COLUMNS_TABLE: "L1_metadata_columns.csv"
   L1_VERSION: "???"
   debug: false
-  remove_input_files: false
   write_plots: true
   logfile: ""
   run_parallel: false
@@ -80,9 +79,7 @@ f <- function(dir_name, dirs_to_process, out_dir) {
     message("\tIt has ", length(d), " files")
 
     # Read all files in a folder
-    dat_raw <- read_csv_group(d,
-                              remove_input_files = params$remove_input_files, 
-                              col_types = "cccccTdccccccccdii")
+    dat_raw <- read_csv_group(d, col_types = "cccccTdccccccccdii")
 
     message("\tTotal data: ", nrow(dat_raw), " rows, ", ncol(dat_raw), " columns")
 
@@ -100,11 +97,12 @@ f <- function(dir_name, dirs_to_process, out_dir) {
         site <- dat$Site[1]
         plot <- dat$Plot[1]
 
-        # Order columns following the column metadata...
+        # Check for metadata columns that are missing...
         if(!all(column_md$Column %in% colnames(dat))) {
             stop("Column metadata file ", params$METADATA_COLUMNS_TABLE,
                  " has entries not in data: ", setdiff(column_md$Column, colnames(dat)))
         }
+        # ...order and remove columns not in the metadata...
         dat <- dat[column_md$Column]
         # ...and sort rows
         dat <- dat[order(dat$TIMESTAMP),]

diff --git a/synoptic/L1_normalize.qmd b/synoptic/L1_normalize.qmd
@@ -12,7 +12,6 @@ params:
   METADATA_VARS_TABLE: "L1_metadata/L1_metadata_variables.csv"
   OOS: "out-of-service/"
   debug: false
-  remove_input_files: false
   logfile: ""
   run_parallel: false
 date: now
@@ -275,12 +274,6 @@ f <- function(fn, out_dir, design_table) {
     }
 
     rm(dat)
-
-    if(params$remove_input_files) {
-        message("\tRemoving input files")
-        file.remove(fn)
-    }
-
     return(smry)
 }
 

diff --git a/synoptic/data_TEST/Raw/GCREW_MET_GCREW_MET_15min_20230322020009.dat b/synoptic/data_TEST/Raw/GCREW_MET_GCREW_MET_15min_20230322020009.dat
diff --git a/synoptic/docs/making-a-new-release.md b/synoptic/docs/making-a-new-release.md
@@ -40,12 +40,12 @@ script). Of course, this is much slower.
 9. Double-check the final release README file.
 
 10. You may want to clean up the resulting L1 folder; for example,
-remove (using `rm -r`) the `.DS_Store` files created by MacOS.
+remove (`find ./ -name ".DS_Store" | xargs rm`) hidden files created by MacOS.
 
 11. Push the data to the COMPASS HPC. For example:
 
 ```
-data % rsync -av --exclude=".*" L1/ <user>@compass.pnl.gov:/compass/datasets/fme_data_release/sensor_data/Level1/v1-0/
+rsync -av --exclude=".*" L1/ <user>@compass.pnl.gov:/compass/datasets/fme_data_release/sensor_data/Level1/v1-0/
 ```
 
 (Follow the same procedure for `Raw`, `L0`, and `Logs` outputs.)

diff --git a/synoptic/helpers.R b/synoptic/helpers.R
@@ -47,8 +47,7 @@ copy_output <- function(from, to, overwrite = TRUE) {
 # Read a vector of CSV files with the same column structure, optionally
 # removing them as we read, and bind data together. The read error count
 # is returned as an attribute of the output
-read_csv_group <- function(files, col_types = NULL,
-                           remove_input_files = FALSE, quiet = FALSE, ...) {
+read_csv_group <- function(files, col_types = NULL, quiet = FALSE, ...) {
     # Warnings are not allowed here, as this usually means a column format
     # problem that we want to fix immediately
     oldwarn <- options()$warn
@@ -58,7 +57,6 @@ read_csv_group <- function(files, col_types = NULL,
     readf <- function(fn, quiet, ...) {
         if(!quiet) message("\tReading ", basename(fn))
         x <- read_csv(fn, col_types = col_types, ...)
-        if(remove_input_files) file.remove(fn)
         x
     }
     # Read all files, bind data frames, and return
@@ -191,44 +189,25 @@ write_to_folders <- function(x, root_dir, data_level, site, plot,
 # L1, L2, and Logs folders
 reset <- function(root = here::here("synoptic/data_TEST")) {
     message("root is ", root)
-    items <- list.files(file.path(root, "L0/"), pattern = "*.csv",
-                        full.names = TRUE)
-    message("Removing ", length(items), " files in L0")
-    lapply(items, file.remove)
-
-    items <- list.files(file.path(root, "L1_normalize/"), recursive = TRUE,
-                        pattern = "*.csv",
-                        full.names = TRUE)
-    message("Removing ", length(items), " files in L1_normalize")
-    lapply(items, file.remove)
-
-    items <- list.files(file.path(root, "L1/"), recursive = TRUE,
-                        include.dirs = FALSE, full.names = TRUE)
-    items <- items[basename(items) != "README.md"]
-    message("Removing ", length(items), " files in L1")
-    lapply(items, file.remove)
-    items <- list.files(file.path(root, "L1/"), recursive = TRUE,
-                        include.dirs = TRUE, full.names = TRUE)
-    items <- items[basename(items) != "README.md"]
-    message("Removing ", length(items), " directories in L1")
-    lapply(items, file.remove)
-
-    items <- list.files(file.path(root, "L2/"), recursive = TRUE,
-                        include.dirs = FALSE, full.names = TRUE)
-    items <- items[basename(items) != "README.md"]
-    message("Removing ", length(items), " files in L2")
-    lapply(items, file.remove)
-    items <- list.files(file.path(root, "L2/"), recursive = TRUE,
-                        include.dirs = TRUE, full.names = TRUE)
-    items <- items[basename(items) != "README.md"]
-    message("Removing ", length(items), " directories in L1a")
-    lapply(items, file.remove)
-
-    items <- list.files(file.path(root, "Logs/"), pattern = "(txt|html)$",
-                        recursive = TRUE,
-                        include.dirs = FALSE, full.names = TRUE)
-    message("Removing ", length(items), " log files in Logs")
-    lapply(items, file.remove)
+
+    remove_files_folders <- function(dir, pat = "(pdf|csv)$", rec = TRUE) {
+        items <- list.files(file.path(root, dir), recursive = rec,
+                            pattern = pat,
+                            full.names = TRUE)
+        message("Removing ", length(items), " files in ", dir)
+        lapply(items, file.remove)
+        items <- list.files(file.path(root, dir), recursive = rec,
+                            include.dirs = TRUE, full.names = TRUE)
+        items <- items[basename(items) != "README.md"]
+        message("Removing ", length(items), " directories in ", dir)
+        lapply(items, file.remove)
+    }
+
+    remove_files_folders("L0/")
+    remove_files_folders("L1_normalize/")
+    remove_files_folders("L1/")
+    remove_files_folders("L2/")
+    remove_files_folders("Logs/", pat = "(txt|html)$")
 
     message("All done.")
 }

diff --git a/synoptic/metadata/L1_metadata/GCW.txt b/synoptic/metadata/L1_metadata/GCW.txt
@@ -7,6 +7,12 @@ transect spans a mid- to late-successional (~80 years old) temperate,
 deciduous coastal forest and a mesohaline marsh along the Rhode River in
 Edgewater, MD.
 
+IMPORTANT DATA NOTE: GCW-W's meteorological data are pulled from the
+GCReW met station, which is a different instrument than at the other
+synoptic sites. It has four variables that appear nowhere else:
+wx_vpd15, wx_svp15, wx_minws15, and wx_gcrew_rain15 in CENTIMETERS (not
+mm). See variables table below.
+
 Contacts for the GCReW Synoptic site: 
 Stephanie J. Wilson <[email protected]>
 Pat Megonigal <[email protected]>

diff --git a/synoptic/metadata/L1_metadata/L1_metadata_columns.csv b/synoptic/metadata/L1_metadata/L1_metadata_columns.csv
@@ -1,6 +1,8 @@
 # L1 data will be ordered following rows below, and these descriptions inserted into metadata
 Column,Description
-TIMESTAMP,Datalogger timestamp (EST) (POSIXct)
+Site,Site code; see site info above (character)
+Plot,Plot code; see site info above (character)
+TIMESTAMP,Datalogger timestamp in EST (POSIXct)
 Instrument,Name of measurement instrument (character)
 Instrument_ID,Identifier of instrument within plot (character)
 Sensor_ID,"Identifier of individual sensor, tree, etc. being measured (character)"

diff --git a/synoptic/metadata/L1_metadata/L1_metadata_variables.csv b/synoptic/metadata/L1_metadata/L1_metadata_variables.csv
@@ -34,13 +34,13 @@ WaterLevel,Pressure600,gw_pressure,psi,mbar,x * 68.948,-10,910,Vented pressure c
 WaterLevel,Depth600,gw_depth,,,x * 1,,,
 WaterLevel,Voltage_Ext600A,gw_voltage_ext,V,V,x * 1,,,External battery voltage coming into the Aquatroll
 WaterLevel,Battery_Int600A,gw_battery,%,%,x * 1,0,100,Internal battery percentage
-ClimaVue50_15min,SlrFD_W_Avg,wx_slr_fd15,MJ/m2,MJ/m2,x * 1,,,Average solar flux over 15 minute period
-ClimaVue50_15min,SlrTF_MJ_Tot,wx_slr_tf15,W/m2,W/m2,x * 1,,,Total solar flux in over minute period
+ClimaVue50_15min,SlrFD_W_Avg,wx_slr_fd15,W/m2,MJ/m2,x * 1,,,Average solar flux over 15 minute period
+ClimaVue50_15min,SlrTF_MJ_Tot,wx_slr_tf15,MJ/m2,W/m2,x * 1,,,Total solar flux in over minute period
 ClimaVue50_15min,Rain_mm_Tot,wx_rain15,mm,mm,x * 1,,,Total rain over 15 minute period
 ClimaVue50_15min,WS_ms_S_WVT,wx_windspeed15,m/s,m/s,x * 1,,,Wind speed
 ClimaVue50_15min,WindDir_D1_WVT,wx_winddir15,degrees,degrees,x * 1,0,359,Wind direction
 ClimaVue50_15min,WindDir_SD1_WVT,wx_winddir_sd115,degrees,degrees,x * 1,0,359,Standard deviation of wind direction part of CS windvector function
-ClimaVue50_15min,MaxWS_ms_Max,wx_maxws15,m/s,m/s,x * 1,0,30,Maximum wind speed in15 minute period
+ClimaVue50_15min,MaxWS_ms_Max,wx_maxws15,m/s,m/s,x * 1,0,30,Maximum wind speed in 15 minute period
 ClimaVue50_15min,MaxWS_ms_TMx,wx_maxws_tmx15,m/s,m/s,x * 1,,,Time of maximum windspeed in 15 minute period
 ClimaVue50_15min,Invalid_Wind_Tot,wx_invalid_wind15,count,count,x * 1,,,Total invalid wind measurements over 15 minute period
 ClimaVue50_15min,AirT_C_Avg,wx_tempavg15,degC,degC,x * 1,-50,60,Average air temperature over 15 minute period
@@ -96,6 +96,10 @@ ClimaVue50_24hr,TiltWE_deg_Min,,degrees,degrees,x * 1,-90,90,Minimum degree of s
 ClimaVue50_24hr,PAR_Den_C_Avg,wx_par_den24,µmol/m2/s,µmol/m2/s,x * 1,0,2000,Average photosynthetically active radiation (PAR) flux density over 24 hr period; site specific correction factor applied
 ClimaVue50_24hr,PAR_Tot_C_Tot,wx_par_tot24,mmol/m2,mmol/m2,x * 1,,,Sum of total photosynthetically active radiation (PAR) flux density over 24 hr period
 ClimaVue50_24hr,CVMeta,,,,,,,"Current configuration of ClimaVue sensors (serial number, etc). Format is a13CAMPBELLCLIM50xxxxxx-yyyyyyyyy, where a is the SDI-12 address, xxxxxx is the model, and yyyyyyyyy is the serial number."
+GCREW_MET_15min,VPD,wx_vpd15,Pa,Pa,x * 1,,,Vapor pressure deficiti; from the GCREW met station and only appears at GCW-W
+GCREW_MET_15min,SVP,wx_svp15,Pa,Pa,x * 1,,,Saturated vapor pressure deficiti; from the GCREW met station and only appears at GCW-W
+GCREW_MET_15min,WS_ms_RM_Min,wx_minws15,m/s,m/s,x * 1,0,30,Maximum wind speed in 15 minute period; from the GCREW met station and only appears at GCW-W
+GCREW_MET_15min,Rain_cm_Tot,wx_gcrew_rain15,cm,cm,x * 1,0,960,Total rain over 15 minute period IN CENTIMETERS; from the GCREW met station and only appears at GCW-W
 ExoTable,Conductivity,sonde_conductivity,,,x * 1,,,
 ExoTable,FDOM_QSU,sonde_fdom,QSU,QSU,x * 1,0,300,"Fluorescent dissolved organic matter concentration, Quinine Sulfate Units"
 ExoTable,FDOM_RFU,sonde_fdom_rfu,RFU,RFU,x * 1,0,100,"Fluorescent dissolved organic matter concentration, Relative Fluorescent Units"

diff --git a/synoptic/metadata/L1_metadata/README_v1-0.txt b/synoptic/metadata/L1_metadata/README_v1-0.txt
@@ -39,7 +39,7 @@ Version 1-0 released 2024-05-15
 * Covers late 2019 through April 2024 for TEMPEST and all synoptic sites
 * Restructured for ease of use, with metadata (location, sensor ID, etc) in separate columns
 * SWH plot naming reworked for new upland plot; mirroring TMP C to GCW UP
-* GCReW weather station variables are now mirrored to TMP and GCW 
+* GCReW weather station variables are now mirrored to GCW-W 
 * Many fixes to variable units and bounds
 * Out-of-service is valid for AquaTROLL and EXO
 

diff --git a/synoptic/metadata/L1_metadata/TMP.txt b/synoptic/metadata/L1_metadata/TMP.txt
@@ -4,6 +4,9 @@ freshwater ("F"; 38.87403N, 76.5516W), and saltwater ("S"; 38.8744N,
 76.5525W). The TEMPEST experiment is in a mid- to late-successional (~80
 years old) temperate, deciduous coastal forest.
 
+NOTE: Meteorological data for TEMPEST are available in the GCW-W files
+and pulled from the GCReW met station. See notes in the GCW metadata.
+
 Contact for the TEMPEST site: 
 J. Patrick Megonigal [email protected]