From b120bf30a6d433d69d15a4827afad71f0722478c Mon Sep 17 00:00:00 2001
From: Johannes Koch <jokoch@pik-potsdam.de>
Date: Mon, 13 May 2024 16:12:09 +0200
Subject: [PATCH] Fix bug when replacing_NAs in tibble without a year column

---
 R/adapt_source.R         | 13 ++++++++-----
 R/convertGDP.R           |  2 +-
 R/transform_user_input.R |  2 +-
 vignettes/handle_NAs.Rmd | 22 +++++++++++-----------
 4 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/R/adapt_source.R b/R/adapt_source.R
index c316049..59ccc43 100644
--- a/R/adapt_source.R
+++ b/R/adapt_source.R
@@ -16,20 +16,23 @@ adapt_source_USA <- function(gdp, source, replace_NAs) {
 }
 
 #
-adapt_source <- function(gdp, source, with_regions, replace_NAs) {
+adapt_source <- function(gdp, source, with_regions, replace_NAs, require_year_column) {
   rlang::check_installed(c("zoo"), reason = "in order for 'replace_NAs' to work.")
 
   . <- NULL
 
   # Create adapted source object
+  ## Columns by which to identify missing source entries
+  hcol <- if (require_year_column) c("iso3c", "year") else "iso3c"
   source_adapted <- source %>%
-    # Add any iso3c-year combinations from gdp, not available in source
+    # Add any hcol combinations from gdp, not available in source
     dplyr::bind_rows(gdp %>%
                        {if ("gdpuc_region" %in% colnames(.)) dplyr::filter(., is.na(.data$gdpuc_region)) else .} %>%
-                       dplyr::select("iso3c", "year") %>%
+                       dplyr::select(tidyselect::all_of(hcol)) %>%
                        dplyr::distinct() %>%
-                       dplyr::anti_join(source, by = c("iso3c", "year"))) %>%
-    tidyr::complete(.data$iso3c, .data$year)
+                       dplyr::anti_join(source, by = tidyselect::all_of(hcol))) %>%
+    tidyr::complete(.data$iso3c, .data$year) %>%
+    dplyr::filter(!is.na(.data$year))
 
   if (replace_NAs[1] == "linear") {
     # Make sure that source contains observations for every year between min and max years.
diff --git a/R/convertGDP.R b/R/convertGDP.R
index 10ca093..ba8f648 100644
--- a/R/convertGDP.R
+++ b/R/convertGDP.R
@@ -166,7 +166,7 @@ convertGDP <- function(gdp,
   x <- transform_internal(x, gdp, with_regions, internal$require_year_column)
 
   if (return_cfs) {
-    return(list("result" = x, "cfs" = do.call(get_conversion_factors, arg[1:6])))
+    return(list("result" = x, "cfs" = do.call(get_conversion_factors, arg[1:7])))
   } else {
     return(x)
   }
diff --git a/R/transform_user_input.R b/R/transform_user_input.R
index 0ace9ee..51b61fa 100644
--- a/R/transform_user_input.R
+++ b/R/transform_user_input.R
@@ -72,7 +72,7 @@ transform_user_input <- function(gdp, unit_in, unit_out, source, use_USA_deflato
       (!is.null(replace_NAs) && !any(sapply(c(NA, 0, "no_conversion"), setequal, replace_NAs))) ) {
     if (use_USA_deflator_for_all || replace_NAs[1] == "with_USA") source <- adapt_source_USA(gdp, source, replace_NAs)
     if (!is.null(replace_NAs) && !any(sapply(c(NA, 0, "no_conversion", "with_USA"), setequal, replace_NAs))){
-      source <- adapt_source(gdp, source, with_regions, replace_NAs)
+      source <- adapt_source(gdp, source, with_regions, replace_NAs, require_year_column)
     }
     source_name <- paste0(source_name, "_adapted")
   }
diff --git a/vignettes/handle_NAs.Rmd b/vignettes/handle_NAs.Rmd
index 7c06252..fa38759 100644
--- a/vignettes/handle_NAs.Rmd
+++ b/vignettes/handle_NAs.Rmd
@@ -29,9 +29,9 @@ Below, the `return_cfs` argument is set to `TRUE` to inspect the conversion fact
 ```{r}
 library(GDPuc)
 
-# Test with Aruba -> iso3c = ABW
+# Test with Venezuela -> iso3c = VEN
 my_gdp <- tibble::tibble(
-  iso3c = c("ABW"),
+  iso3c = c("VEN"),
   year = 2010:2014,
   value = 100:104
 )
@@ -64,7 +64,7 @@ If set to 0, resulting NAs are set to 0.
 
 ```{r}
 my_gdp <- tibble::tibble(
-  iso3c = "ABW",
+  iso3c = "VEN",
   year = 2010:2014,
   value = 100:104
 )
@@ -88,7 +88,7 @@ If set to "no_conversion", NAs are replaced with the values in the gdp argument.
 
 ```{r}
 my_gdp <- tibble::tibble(
-  iso3c = "ABW",
+  iso3c = "VEN",
   year = 2010:2014,
   value = 100:104
 )
@@ -112,7 +112,7 @@ If set to "linear", missing conversion factors are inter- and extrapolated linea
 
 ```{r}
 my_gdp <- tibble::tibble(
-  iso3c = "ABW",
+  iso3c = "VEN",
   year = 2010:2014,
   value = 100:104
 )
@@ -135,13 +135,13 @@ If set to "regional_average", the regional GDP-weighted averages will be used. R
 
 ```{r}
 my_gdp <- tibble::tibble(
-  iso3c = "ABW",
+  iso3c = "VEN",
   year = 2010:2014,
   value = 100:104
 )
 
 my_mapping_data_frame <- tibble::tibble(
-  iso3c = c("ABW", "BRA", "ARG", "COL"),
+  iso3c = c("VEN", "BRA", "ARG", "COL"),
   region = "LAM"
 )
 
@@ -170,13 +170,13 @@ if any missing conversion factors still lead to NAs, these are replaced with 0.
 ```{r}
 # Create an imaginary country XXX, and add it to the Latin America region
 my_gdp <- tibble::tibble(
-  iso3c = c("ABW", "XXX"),
+  iso3c = c("VEN", "XXX"),
   year = 2010,
   value = 100
 )
 
 my_mapping_data_frame <- tibble::tibble(
-  iso3c = c("ABW", "BRA", "ARG", "COL", "XXX"),
+  iso3c = c("VEN", "BRA", "ARG", "COL", "XXX"),
   region = "LAM"
 )
 
@@ -199,7 +199,7 @@ If set to `1`, missing conversion factors are set to 1. **To be deprecated, use
 
 ```{r}
 my_gdp <- tibble::tibble(
-  iso3c = "ABW",
+  iso3c = "VEN",
   year = 2010:2014,
   value = 100:104
 )
@@ -215,7 +215,7 @@ x$result
 
 x$cfs
 
-# Why is the deflator above not 1? That is because for ABW, only the deflator value in 2019 was set to 1. 
+# Why is the deflator above not 1? That is because for VEN, only the deflator value in 2019 was set to 1. 
 # In 2005 the deflator was in the order of magnitude of 100. Obviously setting the deflator to 1 in 2019 is 
 # completely misleading.
 ```