Merge pull request #21 from Gulf-IEA/SST-and-Kd490-ERDDAP-fix

SST and kd490 erddap fix
Gulf-IEA · Jun 11, 2024 · 4402c4c · 4402c4c
2 parents 2d27a47 + cc5edd1
commit 4402c4c
Show file tree

Hide file tree

Showing 6 changed files with 92 additions and 606 deletions.
diff --git a/.Rhistory b/.Rhistory
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,5 @@
 .Rproj.user
 
 /.quarto/
+
+.Rhistory
diff --git a/indicator_objects/Carib_SST.RData b/indicator_objects/Carib_SST.RData
diff --git a/indicator_objects/turbidity.RData b/indicator_objects/turbidity.RData
diff --git a/indicator_processing/automated_download/kd490.R b/indicator_processing/automated_download/kd490.R
@@ -1,4 +1,3 @@
-# 
 # Turbidity indicator
 #
 # direct download from ERDDAP
@@ -9,72 +8,79 @@
 rm(list = ls())
 dev.off()
 
+library(lubridate)
 library(maps)
 library(plotTimeSeries)
+library(rerddap)
 
 load("indicator_processing/spec_file.RData")
 
-# define urls  --------------------------------
-
-beg <- "https://coastwatch.pfeg.noaa.gov/erddap/griddap/nesdisVHNSQkd490Monthly.csv?kd_490[(2012-01-02T12:00:00Z):1:("
-
-url_pr <- paste0(beg, terminal_year, "-12-01T12:00:00Z)][(0.0):1:(0.0)][(18.7):1:(17.7)][(-67.5):1:(-65.2)]")
-url_st <- paste0(beg, terminal_year, "-12-01T12:00:00Z)][(0.0):1:(0.0)][(18.41):1:(18.20)][(-65.12):1:(-64.66)]")
-url_sc <- paste0(beg, terminal_year, "-12-01T12:00:00Z)][(0.0):1:(0.0)][(17.86):1:(17.59)][(-64.94):1:(-64.41)]")
-
-# previously separated St. Thomas and St. John but highly correlated
-#url_sj <- paste0(beg, terminal_year, "-12-01T12:00:00Z)][(0.0):1:(0.0)][(18.37):1:(18.20)][(-64.8):1:(-64.66)]")
-
-# automated downloads ------------------
-
-download.file(url = url_pr, destfile = "test_pr.csv")
-download.file(url = url_st, destfile = "test_st.csv")
-#download.file(url = url_sj, destfile = "test_sj.csv")
-download.file(url = url_sc, destfile = "test_sc.csv")
-
-# read files ----------------------------
-kdlabs <- read.table("test_pr.csv", sep = ",", header = T, skip = 0)
-kd_pr <- read.table("test_pr.csv", sep = ",", header = T, skip = 1)
-kd_st <- read.table("test_st.csv", sep = ",", header = T, skip = 1)
-#kd_sj <- read.table("test_sj.csv", sep = ",", header = T, skip = 1)
-kd_sc <- read.table("test_sc.csv", sep = ",", header = T, skip = 1)
-
-# remove .csv files ---------------------
-file.remove("test_pr.csv")
-file.remove("test_st.csv")
-#file.remove("test_sj.csv")
-file.remove("test_sc.csv")
-
-# assign labels to data frames ----------
-names(kd_pr) <- names(kdlabs)
-names(kd_st) <- names(kdlabs)
-#names(kd_sj) <- names(kdlabs)
-names(kd_sc) <- names(kdlabs)
-
-head(kd_pr)
-head(kd_st)
-#head(kd_sj)
-head(kd_sc)
-
-# calculate means -----------------------
-ind_pr <- tapply(kd_pr$kd_490, kd_pr$time, mean, na.rm = T)
-ind_st <- tapply(kd_st$kd_490, kd_st$time, mean, na.rm = T)
-#ind_sj <- tapply(kd_sj$kd_490, kd_sj$time, mean, na.rm = T)
-ind_sc <- tapply(kd_sc$kd_490, kd_sc$time, mean, na.rm = T)
-
-# check names are the same --------------
-summary(names(ind_pr) == names(ind_st))
-#summary(names(ind_pr) == names(ind_sj))
-summary(names(ind_pr) == names(ind_sc))
-
-#uli <- tapply(kd$kd_490, kd$time, function(x) quantile(x, probs = c(0.95), na.rm = T))
-#lli <- tapply(kd$kd_490, kd$time, function(x) quantile(x, probs = c(0.05), na.rm = T))
+# define years  --------------------------------
+styear <- 2012
+enyear <- terminal_year
+
+# define geographic scope  --------------------------------
+pr_coord <- matrix(c(-67.5, -65.2, 17.7, 18.7), 2, 2)
+sc_coord <- matrix(c(-64.94, -64.41, 17.59, 17.86), 2, 2)
+st_coord <- matrix(c(-65.12, -64.66, 18.20, 18.41), 2, 2)
+
+# get ERDDAP info  --------------------------------
+kd490 <- info('nesdisVHNSQkd490Monthly',
+              url = 'https://coastwatch.pfeg.noaa.gov/erddap')
+# alternative with longer timeseries
+# kd490 <- info('pmlEsaCCI60OceanColorDaily',
+#               url = 'https://coastwatch.pfeg.noaa.gov/erddap')
+
+
+# empty data  -------------------------------------------------
+pr_dat <- sc_dat <- st_dat <- setNames(data.frame(matrix(NA,length(styear:enyear)*12,3)),
+                                       c("year", "mon", 'mean'))
+m <- 1
+n <- 0
+
+# download by year to avoid timeout errors --------------------
+for (yr in styear:enyear) { 
+
+  ### BDT rERDDAP fix
+  n <- n + 12
+  ### Puerto Rico
+  pr_kd490_grab <- griddap(kd490, fields = 'kd_490', 
+                           time = c(paste0(yr,'-01-15'), paste0(yr,'-12-15')), 
+                           longitude = pr_coord[ ,1], 
+                           latitude = pr_coord[ ,2])
+
+  pr_dat[m:n,] <- aggregate(pr_kd490_grab$data$kd_490, 
+                            by = list(year(pr_kd490_grab$data$time), month(pr_kd490_grab$data$time)),
+                            mean, na.rm = T)
+
+  ### St Croix
+  sc_kd490_grab <- griddap(kd490, fields = 'kd_490', 
+                           time = c(paste0(yr,'-01-15'), paste0(yr,'-12-15')), 
+                           longitude = sc_coord[ ,1], 
+                           latitude = sc_coord[ ,2])
+
+  sc_dat[m:n,] <- aggregate(sc_kd490_grab$data$kd_490, 
+                            by = list(year(sc_kd490_grab$data$time), month(sc_kd490_grab$data$time)),
+                            mean, na.rm = T)
+
+  ### St Thomas
+  st_kd490_grab <- griddap(kd490, fields = 'kd_490', 
+                           time = c(paste0(yr,'-01-15'), paste0(yr,'-12-15')), 
+                           longitude = st_coord[ ,1], 
+                           latitude = st_coord[ ,2])
+
+  st_dat[m:n,] <- aggregate(st_kd490_grab$data$kd_490, 
+                            by = list(year(st_kd490_grab$data$time), month(st_kd490_grab$data$time)),
+                            mean, na.rm = T)
+
+  m <- n + 1
+}
 
 # create indicator object --------------
 
-datdata <- strftime(names(ind_pr), format = "%m-%Y")
-inddata <- data.frame(cbind(ind_pr, ind_st, ind_sc), row.names = datdata)
-labs <- c(rep("Turbidity from ocean color data", 3), rep("Diffuse attenuation coefficient at 490 nm (kd^2)", 3), 
+datdata <- paste(sprintf('%02d',pr_dat$mon),pr_dat$year,sep='-')
+inddata <- data.frame(cbind(pr_dat$mean, st_dat$mean, sc_dat$mean), row.names = datdata)
+labs <- c(rep("Turbidity from ocean color data", 3), rep("Diffuse attenuation coefficient at 490 nm (m^-1)", 3), 
           "Puerto Rico", "St. Thomas", "St. Croix")
 indnames <- data.frame(matrix(labs, nrow = 3, byrow = T))
 
@@ -88,5 +94,3 @@ save(ind, file = "indicator_objects/turbidity.RData")
 plotIndicatorTimeSeries(ind, coltoplot = 1:3, plotrownum = 3, dateformat = "%m-%Y", sublabel = T, trendAnalysis = F, widadj = 1.5)
 plotIndicatorTimeSeries(ind, coltoplot = 1:3, plotrownum = 3, dateformat = "%m-%Y", sublabel = T, trendAnalysis = F, widadj = 1.5, anom = "mon")
 
-
-
diff --git a/indicator_processing/automated_download/sst.R b/indicator_processing/automated_download/sst.R
@@ -1,56 +1,49 @@
-
 # fix for dealing with internet timeout error 
 # https://stackoverflow.com/questions/68666187/internetopenurl-failed-a-connection-with-the-server-could-not-be-established
 
 rm(list = ls())
 dev.off()
 
+library(lubridate)
 library(maps)
 library(plotTimeSeries)
+library(rerddap)
 
 load("indicator_processing/spec_file.RData")
 
-# devtools::install_github("mdsumner/ncdf4")
-# library(ncdf4)
-
 # define years  --------------------------------
 styear <- 1982
 enyear <- terminal_year
 
+# get ERDDAP info  --------------------------------
+sst <- info('ncdcOisst21Agg_LonPM180') # this may work better
+
 # empty data  -------------------------------------------------
-dat <- data.frame(row.names = c("year", "mon", "PR_mean", "PR_min", "PR_max", "VI_mean", "VI_min", "VI_max"))
+dat <- setNames(data.frame(matrix(NA,length(styear:enyear)*12,5)),
+                c("year", "mon", 'mean', 'min', 'max'))
+m <- 1
+n <- 0
 
 # download by year to avoid timeout errors --------------------
 for (yr in styear:enyear) { 
 
-# url from ERDDAP for OISST, download and read ----------------
-  url <- paste0("https://coastwatch.pfeg.noaa.gov/erddap/griddap/ncdcOisst21Agg.csv?sst[(", yr, "-01-01T12:00:00Z):1:(", yr, "-12-31T12:00:00Z)][(0.0):1:(0.0)][(17):1:(19)][(292):1:(296)]")
-
-  #  url_vi <- paste0("https://coastwatch.pfeg.noaa.gov/erddap/griddap/ncdcOisst21Agg.csv?sst[(", yr, "-01-01T12:00:00Z):1:(", yr, "-12-31T12:00:00Z)][(0.0):1:(0.0)][(17):1:(19)][(294.75):1:(296)]")
-  #  url_pr <- paste0("https://coastwatch.pfeg.noaa.gov/erddap/griddap/ncdcOisst21Agg.csv?sst[(", yr, "-01-01T12:00:00Z):1:(", yr, "-12-31T12:00:00Z)][(0.0):1:(0.0)][(17):1:(19)][(292):1:(294.75)]")
-  #  download for entire Caribbean because USVI and PR highly correlated. 
-    download.file(url = url, destfile = "st.csv")
-
-  labs <- read.table("st.csv", sep = ",", header = T, skip = 0)
-  sst_vi <- read.table("st.csv", sep = ",", header = T, skip = 1)
-  names(sst_vi) <- names(labs)
-
-# extract month -----------------------------------------------
-  sst_vi$mon <- strftime(sst_vi$time, format = "%m")
-
-# calculate mean, min, max and concatenate --------------------
-  ind_vi <- as.numeric(tapply(sst_vi$sst, sst_vi$mon, mean, na.rm = T))
-  min_vi <- as.numeric(tapply(sst_vi$sst, sst_vi$mon, min, na.rm = T))
-  max_vi <- as.numeric(tapply(sst_vi$sst, sst_vi$mon, max, na.rm = T))
+  ### BDT rERDDAP fix
+  sst_grab <- griddap(sst, fields = 'sst', 
+                      time = c(paste0(yr,'-01-01'), paste0(yr,'-12-31')), 
+                      longitude = c(min_lon, max_lon), 
+                      latitude = c(min_lat, max_lat))
+
+  sst_agg <- aggregate(sst_grab$data$sst, 
+                       by = list(year(sst_grab$data$time), month(sst_grab$data$time)), 
+                       function(x) c(mean(x, na.rm = T), min(x, na.rm = T), max(x, na.rm = T)))
+
+  n <- n + 12
+  dat[m:n,] <- data.frame(sst_agg[,1:2], unlist(sst_agg[,3]))
+  m <- n + 1
 
-  tempdat <- data.frame(yr, unique(sst_vi$mon), ind_vi, min_vi, max_vi, stringsAsFactors = F)
-  dat <- data.frame(rbind(dat, tempdat), stringsAsFactors = F)
 }
 
-file.remove("st.csv")
-
-# add row names and yearmonth column --------------------------
-names(dat) <- c("year", "mon", "mean", "min", "max")
+# add yearmonth column --------------------------
 dat$yrmon <- paste0(dat$mon, "-", dat$year)
 dat
 head(dat)
@@ -59,21 +52,20 @@ tail(dat)
 # check outputs and look at correlations ---------------------
 table(dat$year)
 table(dat$mon)
-matplot(dat[3:5], type = "l") #, col = c(4, 4, 4, 2, 2, 2), lty = c(1, 2, 3, 1, 2, 3))
+matplot(dat[3:5], type = "l")
 cor(dat[3:5])
 plot(dat[3:5])
 
 # format into indicator object ------------------
 
 labs <- c(rep("U.S. Caribbean sea surface temperature", 3), rep("degrees Celsius", 3), 
           "monthly mean", "monthly minimum", "monthly maximum")
-          #          "Puerto Rico - mean", "USVI - mean", "Puerto Rico - monthly minimum", "USVI - monthly minimum", "Puerto Rico - monthly maximum", "USVI - monthly maximum")
 
 indnames <- data.frame(matrix(labs, nrow = 3, byrow = T))
 inddata <- data.frame(dat[c(3:5)])
 datdata <- dat$yrmon
 
-s <- list(labels = indnames, indicators = inddata, datelist = datdata) #, ulim = ulidata, llim = llidata)
+s <- list(labels = indnames, indicators = inddata, datelist = datdata)
 class(s) <- "indicatordata"
 s