Skip to content

Commit

Permalink
fix: hydro imgw daily
Browse files Browse the repository at this point in the history
  • Loading branch information
bczernecki committed Oct 20, 2024
1 parent 68af360 commit d114c26
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 53 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: climate
Title: Interface to Download Meteorological (and Hydrological) Datasets
Version: 1.2.1
Version: 1.2.2
Authors@R: c(person(given = "Bartosz",
family = "Czernecki",
role = c("aut", "cre"),
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# climate 1.2.3

* Major fixes for adjusting code to recognize different encoding and directory structure for (IMGW) hydrological datasets


# climate 1.2.2

* Major fixes for adjusting code to stay in line with CRAN policies
Expand Down
134 changes: 83 additions & 51 deletions R/hydro_imgw_daily.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ hydro_imgw_daily_bp = function(year,
base_url = "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/"
interval = "daily"
interval_pl = "dobowe"

# initiate empty objects:
all_data = NULL
codz_data = NULL
zjaw_data = NULL

temp = tempfile()
test_url(link = paste0(base_url, interval_pl, "/"), output = temp)
Expand All @@ -68,63 +73,89 @@ hydro_imgw_daily_bp = function(year,
catalogs = as.character(readHTMLTable(a)[[1]]$Name[ind])
catalogs = gsub(x = catalogs, pattern = "/", replacement = "")
catalogs = catalogs[catalogs %in% as.character(year)]

if (length(catalogs) == 0) {
stop("Selected year(s) is/are not available in the database.", call. = FALSE)
}
meta = hydro_metadata_imgw(interval)

all_data = vector("list", length = length(catalogs))
for (i in seq_along(catalogs)) {
catalog = catalogs[i]
iterator = c("01", "02", "03", "04", "05", "06",
"07", "08", "09", "10", "11", "12")
data = NULL

temp = tempfile()
test_url(link = paste0(base_url, interval_pl, "/", catalog), output = temp)
b = readLines(temp, warn = FALSE)

files_in_dir = readHTMLTable(b)[[1]]$Name
ind = grep(files_in_dir, pattern = "zip")
codz_files = grep(x = files_in_dir, pattern = "codz", value = TRUE)
zjaw_files = grep(x = files_in_dir, pattern = "zjaw", value = TRUE)
iterator = c(codz_files, zjaw_files)

for (j in seq_along(iterator)) {
address = paste0(base_url, interval_pl, "/", catalog, "/codz_", catalog, "_", iterator[j], ".zip")
temp = tempfile()
temp2 = tempfile()
test_url(address, temp)
#download.file(address, temp)
unzip(zipfile = temp, exdir = temp2)
file1 = paste(temp2, dir(temp2), sep = "/")[1]

# file pattern for codz:
if (grepl(x = iterator[j], "codz")) {
address = paste0(base_url, interval_pl, "/", catalog, "/", iterator[j])
temp = tempfile()
temp2 = tempfile()
test_url(link = address, output = temp)
unzip(zipfile = temp, exdir = temp2)
file1 = paste(temp2, dir(temp2), sep = "/")[1]

if (translit) {
data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f ISO-8859-2 -t ASCII//TRANSLIT", file1)))
} else {
data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ",",
fileEncoding = "CP1250"),
warning = function(w) {
read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";")
})
}
# extra exception for a current year according to information provided by IMGW-PIB:, i.e.:
# "Do czasu zakonczenia kontroli przeplywow rekordy z danymi z roku 2020 maja format:
# Kod stacji #Nazwa stacji #Nazwa rzeki/jeziora #Rok hydrologiczny #Wskaznik miesiaca w roku hydrologicznym
# Dzien #Stan wody [cm] #Temperatura wody [st. C] #Miesiac kalendarzowy
if (ncol(data1) == 9) {
data1$flow = NA
data1 = data1[, c(1:7, 10, 8:9)]
}

if (translit) {
data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1)))
} else {
data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")
}
# extra exception for a current year according to information provided by IMGW-PIB:
# i.e.:
# "Do czasu zakonczenia kontroli przeplywow rekordy z danymi z roku 2020 maja format:
#Kod stacji #Nazwa stacji #Nazwa rzeki/jeziora #Rok hydrologiczny #Wskaznik miesiaca w roku hydrologicznym
#Dzien #Stan wody [cm] #Temperatura wody [st. C] #Miesiac kalendarzowy
if (ncol(data1) == 9) {
data1$flow = NA
data1 = data1[, c(1:7, 10, 8:9)]
colnames(data1) = meta[[1]][, 1]
codz_data = rbind(codz_data, data1)
} # end of codz_


# start of zjaw_ section:
if (grepl(x = iterator[j], "zjaw")) {
address = paste0(base_url, interval_pl, "/", catalog, "/", iterator[j])
temp = tempfile()
temp2 = tempfile()
test_url(address, temp)
unzip(zipfile = temp, exdir = temp2)
file2 = paste(temp2, dir(temp2), sep = "/")[1]

if (translit) {
data2 = as.data.frame(data.table::fread(cmd = paste("iconv -f ISO-8859-2 -t ASCII//TRANSLIT", file1)))
} else {
data2 = tryCatch(expr = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, sep = ",",
fileEncoding = "CP1250"),
warning = function(w) {
read.csv(file2, header = FALSE, stringsAsFactors = FALSE, sep = ";")
})
}

colnames(data2) = meta[[2]][, 1]
zjaw_data = rbind(zjaw_data, data2)
}

colnames(data1) = meta[[1]][, 1]
data = rbind(data, data1)
}
address = paste0(base_url, interval_pl, "/", catalog, "/zjaw_", catalog, ".zip")

temp = tempfile()
temp2 = tempfile()
test_url(address, temp)
unzip(zipfile = temp, exdir = temp2)
file2 = paste(temp2, dir(temp2), sep = "/")[1]

if (translit) {
data2 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file2)))
} else {
data2 = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")
}

colnames(data2) = meta[[2]][, 1]
all_data[[i]] = merge(data, data2,
by = intersect(colnames(data), colnames(data2)),
all.x = TRUE)
}

} #end of loop for (usually monthly) zip files in a given year

all_data[[length(all_data) + 1]] = merge(codz_data, zjaw_data,
by = intersect(colnames(codz_data), colnames(zjaw_data)),
all.x = TRUE)

} # end of loop for years (if more than 1 specified)

all_data = do.call(rbind, all_data)
all_data[all_data == 9999] = NA
Expand All @@ -138,14 +169,15 @@ hydro_imgw_daily_bp = function(year,
by.y = "Kod stacji",
all.y = TRUE)
}

#station selection
if (!is.null(station)) {
if (is.character(station)) {
all_data = all_data[substr(all_data$`Nazwa stacji`, 1, nchar(station)) == station, ]
if (nrow(all_data) == 0) {
stop("Selected station(s) is not available in the database.", call. = FALSE)
}
} else if (is.numeric(station)) {
} else if (is.numeric(station)) {
all_data = all_data[all_data$`Kod stacji` %in% station, ]
if (nrow(all_data) == 0) {
stop("Selected station(s) is not available in the database.", call. = FALSE)
Expand All @@ -154,9 +186,9 @@ hydro_imgw_daily_bp = function(year,
stop("Selected station(s) are not in the proper format.", call. = FALSE)
}
}

all_data = all_data[do.call(order, all_data[grep(x = colnames(all_data), "Nazwa stacji|Rok hydro|w roku hydro|Dzie")]), ]
all_data = hydro_shortening_imgw(all_data, col_names = col_names, ...)

return(all_data)
}
}
2 changes: 1 addition & 1 deletion tests/testthat/test-meteo_imgw_daily.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ test_that("meteo_imgw_daily", {
message("No internet connection! \n")
return(invisible(NULL))
} else {
y <- 1900 # year not supported
y = 1900 # year not supported
expect_message(meteo_imgw_daily(rank = "synop", year = y, status = TRUE,
coords = TRUE, allow_failure = TRUE))
}
Expand Down

0 comments on commit d114c26

Please sign in to comment.