bczernecki · bczernecki · Oct 18, 2024 · Oct 16, 2024 · Oct 16, 2024 · Oct 17, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -27,7 +27,7 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.0
+RoxygenNote: 7.3.2
 Depends: 
     R (>= 4.0.0)
 Imports: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -5,6 +5,7 @@ export(hydro_imgw)
 export(hydro_imgw_annual)
 export(hydro_imgw_daily)
 export(hydro_imgw_monthly)
+export(hydro_shortening_imgw)
 export(meteo_imgw)
 export(meteo_imgw_daily)
 export(meteo_imgw_datastore)
@@ -13,6 +14,7 @@ export(meteo_imgw_monthly)
 export(meteo_noaa_co2)
 export(meteo_noaa_hourly)
 export(meteo_ogimet)
+export(meteo_shortening_imgw)
 export(nearest_stations_imgw)
 export(nearest_stations_noaa)
 export(nearest_stations_ogimet)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# climate 1.2.2
+
+* Major fixes for adjusting code to stay in line with CRAN policies
+* Fixes for `hydro_imgw()` set of functions due to changes in encoding and metadata structure
+
+
 # climate 1.2.1
 
 * Corrected duplicated column names for IMGW-PIB stations

diff --git a/R/check_locale.R b/R/check_locale.R
@@ -6,6 +6,8 @@
 #' in the Polish metservice's repository and therefore will be forced to
 #' use ASCII//TRANSLIT
 #' @noRd
+#' @keywords internal
+#' @return 1 if the locale is not UTF-8, 0 otherwise
 
 check_locale = function() {
 

diff --git a/R/clean_metadata_hydro.R b/R/clean_metadata_hydro.R
@@ -5,38 +5,36 @@
 #' @param interval temporal interval
 #' @importFrom utils read.fwf
 #' @keywords internal
+#' @noRd
+
 clean_metadata_hydro = function(address, interval) {
   temp = tempfile()
 
   test_url(link = address, output = temp)
-  a = readLines(temp, warn = FALSE)
-
-  a = iconv(a, from = "cp1250", to = "ASCII//TRANSLIT") # remove polish characters
-  a = gsub(a, pattern = "\\?", replacement = "") # removing extra characters after conversion
-
-  # additional workarounds for mac os but not only...
+  a = read.csv(temp, header = FALSE, stringsAsFactors = FALSE, 
+               fileEncoding = "CP1250", skip = 1, sep = "\t")$V1
+  a = gsub(a, pattern = "\\?", replacement = "") 
   a = gsub(x = a, pattern = "'", replacement = "")
-  a = gsub(x = a, pattern = "\\^", replacement = "")
+  a = trimws(gsub(x = a, pattern = "\\^", replacement = ""))
+  a = gsub(a, pattern = "\\s+", replacement = " ")
 
   if (interval == "monthly") {
-    b = list(data.frame(parameters = a[3:12])) # sklad danych jeszcze nie wiem jak ominąć problem kontroli
-    # ale on może się zmienić nie wiem czy nie lepiej wykluczyć ostatni rok
+    b = list(data.frame(parameters = a[1:10]))
   }
   if (interval == "daily") {
-    b = data.frame(parameters = a[3:12])
+    b = data.frame(parameters = a[1:10])
   }
   if (interval == "semiannual_and_annual") {
-    godzina = paste0(a[15], ":", a[16]) # nie jestem pewien czy tak bo w dokumentacji jest podzial na dwie kolumny,
-    #ale w pliku jest jedna kolumna a pomiaru brak
-    data = c(a[12:14], godzina)
+    godzina = paste0(a[13], ":", a[14])
+    data = c(a[10:12], godzina)
     data_od = paste0("wystapienie_od_", data)
-    data_do = paste0("wystapienie_od_", data)
-    SPT = unlist(strsplit(a[10], "]/")) # stan/przeplyw/temperatura
+    data_do = paste0("wystapienie_do_", data)
+    SPT = unlist(strsplit(a[8], "]/")) # stan/przeplyw/temperatura
     SPT[1] = paste0(SPT[1], "]")
     SPT[2] = paste0(SPT[2], "]")
     b = NULL
     for (i in seq_along(SPT)) {
-      tmp = c(a[3:9], SPT[i], data_od, data_do)
+      tmp = c(a[1:7], SPT[i], data_od, data_do)
       b = cbind(b, tmp)
     }
     b = list("H" = data.frame(parameters = b[, 1]),

diff --git a/R/clean_metadata_meteo.R b/R/clean_metadata_meteo.R
@@ -8,16 +8,12 @@
 #' @importFrom stats na.omit
 #' @importFrom stringi stri_trans_general
 #' @keywords internal 
-#'
+#' @noRd
 
 clean_metadata_meteo = function(address, rank = "synop", interval = "hourly") {
 
   temp = tempfile()
   test_url(link = address, output = temp)
-
-  # a = readLines(temp, warn = FALSE, encoding = "CP1250") # doesn't work on mac,
-  # thus:
-  # a = iconv(a, from = "CP1250", to = "ASCII//TRANSLIT")
   a = read.csv(temp, header = FALSE, stringsAsFactors = FALSE, 
                fileEncoding = "CP1250")$V1
   a = gsub(a, pattern = "\\?", replacement = "")
@@ -27,9 +23,6 @@ clean_metadata_meteo = function(address, rank = "synop", interval = "hourly") {
   a = gsub(x = a, pattern = "'", replacement = "")
   a = gsub(x = a, pattern = "\\^0", replacement = "")
   a = data.frame(V1 = a[nchar(a) > 3], stringsAsFactors = FALSE)
-  # this one does not work on windows
-  # a = suppressWarnings(na.omit(read.fwf(address, widths = c(1000),
-  #                                        fileEncoding = "CP1250", stringsAsFactors = FALSE)))
   length_char = max(nchar(a$V1), na.rm = TRUE)
 
   if (rank == "precip" && interval == "hourly") length_char = 40 # exception for precip / hourly

diff --git a/R/co2_demo.R b/R/co2_demo.R
@@ -6,7 +6,7 @@
 #'
 #' @docType data
 #' @keywords datasets meteo
-#' @examples
+#' @returns data.frame with historical CO2 concentrations
 #' data(co2_demo)
 #' head(co2_demo)
 "co2_demo"
diff --git a/R/get_coord_from_string.R b/R/get_coord_from_string.R
@@ -7,13 +7,8 @@
 #' @format The returned object is the geographic coordinates using WGS84 (EPSG:4326) in decimal format.
 #' Negative values mean western or southern Hemisphere
 #' @keywords internal
-#'
-#' @examples
-#' \donttest{
-#'  txt = "12120:   Leba (Poland)\nLatitude: 54-45N    Longitude: 017-32E    Altitude: 2 m."
-#'  climate:::get_coord_from_string(txt, pattern = "Latitude")
-#' }
-#'
+#' @noRd
+
 get_coord_from_string = function(txt, pattern = "Longitude") {
   tt = gregexpr(pattern, txt)
   start = tt[[1]][1] + attributes(tt[[1]])$match.length + 1

diff --git a/R/hydro_imgw.R b/R/hydro_imgw.R
@@ -21,7 +21,7 @@
 #' @return A data.frame with columns describing the hydrological parameters
 #' (e.g. flow, water level) where each row represent a measurement,
 #' depending on the interval, at a given hour, month or year.
-#' If `coords = TRUE` additional two columns with geografic coordinates are added.
+#' If `coords = TRUE` additional two columns with geographic coordinates are added.
 #' @examples
 #' \donttest{
 #'   x = hydro_imgw("monthly", year = 1999)

diff --git a/R/hydro_imgw_annual.R b/R/hydro_imgw_annual.R
@@ -18,6 +18,7 @@
 #' @importFrom utils download.file unzip read.csv
 #' @importFrom data.table fread
 #' @export
+#' @returns data.frame with historical hydrological data for the semi-annual and annual period
 #' @examples
 #' \donttest{
 #' hydro_yearly = hydro_imgw_annual(year = 2000, value = "H", station = "ANNOPOL")
@@ -72,7 +73,7 @@ hydro_imgw_annual_bp = function(year = year,
   ind = grep(readHTMLTable(a)[[1]]$Name, pattern = "/")
   catalogs = as.character(readHTMLTable(a)[[1]]$Name[ind])
   catalogs = gsub(x = catalogs, pattern = "/", replacement = "")
-  # less files to read:
+
   catalogs = catalogs[catalogs %in% as.character(year)]
   if (length(catalogs) == 0) {
     stop("Selected year(s) is/are not available in the database.", call. = FALSE)
@@ -81,10 +82,7 @@ hydro_imgw_annual_bp = function(year = year,
 
   all_data = vector("list", length = length(catalogs))
   for (i in seq_along(catalogs)) {
-    # i = 1
     catalog = catalogs[i]
-    #print(i)
-
     address = paste0(base_url, interval_pl, "/", catalog, "/polr_", value, "_", catalog, ".zip")
 
     temp = tempfile()
@@ -104,19 +102,19 @@ hydro_imgw_annual_bp = function(year = year,
     all_data[[i]] = data1
   }
   all_data = do.call(rbind, all_data)
-  # ten sam warunek braku danych lub obserwacji dla wszytkich wartosci
   all_data[all_data == 99999.999] = NA
   all_data = all_data[, !duplicated(colnames(all_data))]
 
   # coords
   if (coords) {
-    all_data = merge(climate::imgw_hydro_stations, all_data, by.x = "id", by.y = "Kod stacji", all.y = TRUE)
+    all_data = merge(climate::imgw_hydro_stations, all_data, by.x = "id", by.y = "Nazwa rzeki/jeziora", all.y = TRUE)
   }
   #station selection
   if (!is.null(station)) {
     if (is.character(station)) {
       all_data = all_data[substr(all_data$`Nazwa stacji`, 1, nchar(station)) == station, ]
       if (nrow(all_data) == 0) {
+
         stop("Selected station(s) is not available in the database.", call. = FALSE)
       }
     } else if (is.numeric(station)) {

diff --git a/R/hydro_imgw_daily.R b/R/hydro_imgw_daily.R
@@ -16,7 +16,7 @@
 #' @importFrom utils download.file unzip read.csv
 #' @importFrom data.table fread
 #' @export
-#'
+#' @returns data.frame with historical hydrological data for the daily time interval
 #' @examples \donttest{
 #'   daily = hydro_imgw_daily(year = 2000)
 #' }
@@ -76,7 +76,6 @@
   all_data = vector("list", length = length(catalogs))
   for (i in seq_along(catalogs)) {
     catalog = catalogs[i]
-    # print(i)
     iterator = c("01", "02", "03", "04", "05", "06",
                 "07", "08", "09", "10", "11", "12")
     data = NULL
@@ -96,17 +95,9 @@
       }
       # extra exception for a current year according to information provided by IMGW-PIB:
       # i.e.:
-      # "Do czasu zakończenia kontroli przepływów z roku hydrologicznego 2020 (do około poczatku sierpnia 2021),
-      # rekordy z  danymi z roku 2020 mają format:
-      #Kod stacji
-      #Nazwa stacji
-      #Nazwa rzeki/jeziora
-      #Rok hydrologiczny
-      #Wskaźnik miesiąca w roku hydrologicznym
-      #Dzień
-      #Stan wody [cm]
-      #Temperatura wody [st. C]
-      #Miesiąc kalendarzowy
+      # "Do czasu zakonczenia kontroli przeplywow rekordy z danymi z roku 2020 maja format:
+      #Kod stacji  #Nazwa stacji  #Nazwa rzeki/jeziora  #Rok hydrologiczny  #Wskaznik miesiaca w roku hydrologicznym
+      #Dzien  #Stan wody [cm]  #Temperatura wody [st. C]    #Miesiac kalendarzowy
       if (ncol(data1) == 9) {
         data1$flow = NA
         data1 = data1[, c(1:7, 10, 8:9)]
@@ -131,9 +122,7 @@
 
     colnames(data2) = meta[[2]][, 1]
     all_data[[i]] = merge(data, data2,
-                         by = c("Kod stacji", "Nazwa stacji",
-                               "Rok hydrologiczny", "Nazwa rzeki/jeziora",
-                               "Wskaznik miesiaca w roku hydrologicznym", "Dzien"),
+                         by = intersect(colnames(data), colnames(data2)),
                          all.x = TRUE)
   }
 
@@ -166,11 +155,7 @@
     }
   }
 
-  all_data = all_data[order(all_data$`Nazwa stacji`,
-                            all_data$`Rok hydrologiczny`,
-                            all_data$`Wskaznik miesiaca w roku hydrologicznym`,
-                            all_data$`Dzien`), ]
-  # dodanie opcji  dla skracania kolumn i usuwania duplikatow:
+  all_data = all_data[do.call(order, all_data[grep(x = colnames(all_data), "Nazwa stacji|Rok hydro|w roku hydro|Dzie")]), ]
   all_data = hydro_shortening_imgw(all_data, col_names = col_names, ...)
 
   return(all_data)

diff --git a/R/hydro_imgw_monthly.R b/R/hydro_imgw_monthly.R
@@ -16,6 +16,7 @@
 #' @importFrom utils download.file unzip read.csv
 #' @importFrom data.table fread
 #' @export
+#' @returns data.frame with historical hydrological data for the monthly summaries
 #'
 #' @examples \donttest{
 #'   monthly = hydro_imgw_monthly(year = 2000)
@@ -120,10 +121,7 @@
       stop("Selected station(s) are not in the proper format.", call. = FALSE)
     }
   }
-
-  all_data = all_data[order(all_data$`Nazwa stacji`,
-                            all_data$`Rok hydrologiczny`,
-                            all_data$`Wskaznik miesiaca w roku hydrologicznym`), ]
+  all_data = all_data[do.call(order, all_data[grep(x = colnames(all_data), "Nazwa stacji|Rok hydrologiczny|w roku hydro")]), ]
   all_data = hydro_shortening_imgw(all_data, col_names = col_names, ...)
 
   return(all_data)

diff --git a/R/hydro_metadata_imgw.R b/R/hydro_metadata_imgw.R
@@ -6,12 +6,7 @@
 #' @param interval temporal resolution of the data ("daily" , "monthly", or "semiannual_and_annual")
 #' @param allow_failure logical - whether to proceed or stop on failure. By default set to TRUE (i.e. don't stop on error). For debugging purposes change to FALSE
 #' @keywords internal
-#' @examples
-#' \donttest{
-#'   meta = climate:::hydro_metadata_imgw(interval = "daily")
-#'   meta = climate:::hydro_metadata_imgw(interval = "monthly")
-#'   meta = climate:::hydro_metadata_imgw(interval = "semiannual_and_annual")
-#' }
+#' @noRd
 
 hydro_metadata_imgw = function(interval, allow_failure = TRUE) {
 

diff --git a/R/hydro_shortening_imgw.R b/R/hydro_shortening_imgw.R
@@ -9,13 +9,14 @@
 #' "polish" - original names in the dataset
 #' @param remove_duplicates whether to remove duplicated column names
 #' (default TRUE - i.e., columns with duplicated names are deleted)
-#' @keywords internal
+#' @export
+#' @returns data.frame with shorten names of hydrological parameters
 #' @examples
 #' \donttest{
-#'   monthly = hydro_imgw("monthly", year = 1969)
+#'   monthly = data = hydro_imgw("monthly", year = 1969, col_names = "polish")
 #'   
 #'   if (is.data.frame(monthly)) {
-#'   abbr = climate:::hydro_shortening_imgw(data = monthly,
+#'   abbr = hydro_shortening_imgw(data = monthly,
 #'       col_names = "full",
 #'       remove_duplicates = TRUE)
 #'   head(abbr)

diff --git a/R/meteo_imgw.R b/R/meteo_imgw.R
@@ -1,7 +1,7 @@
-#' Meteorological data from IMGW
+#' Meteorological data from the IMGW-PIB official repository
 #'
 #' Downloading hourly, daily, and monthly meteorological data from the
-#' SYNOP / CLIMATE / PRECIP stations available in the danepubliczne.imgw.pl collection
+#' SYNOP / CLIMATE / PRECIP stations available in the danepubliczne.imgw.pl collection.
 #'
 #' @param interval temporal resolution of the data ("hourly", "daily", "monthly")
 #' @param rank rank of the stations: "synop" (default), "climate" or "precip"