From 7b45e92ba56573e3f882c665f25ff2e27fc93940 Mon Sep 17 00:00:00 2001
From: Peter Owen <pete.owen1@hotmail.co.uk>
Date: Wed, 16 Aug 2023 11:38:59 +1000
Subject: [PATCH 01/11] add data_subclass column

---
 R/seifa.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/seifa.R b/R/seifa.R
index 9013760..bcc8ebe 100644
--- a/R/seifa.R
+++ b/R/seifa.R
@@ -119,7 +119,7 @@ get_seifa <- function(structure = c('sa1','sa2','lga','postcode','suburb'),
 
   if (file.exists(filename)) {
     ind <- map(sheet_names, ~ get_seifa_index_sheet(filename, .x, structure, year), .id = 'seifa_index') %>%
-      list_rbind()
+      list_rbind(names_to = 'data_subclass')
     return(ind)
   } else {
     warning('Download of ABS file failed. Please check your internet connection and try again.')

From 0548a7f445ba11c57fe48372823e83b1e3f793a8 Mon Sep 17 00:00:00 2001
From: Peter Owen <pete.owen1@hotmail.co.uk>
Date: Wed, 16 Aug 2023 15:47:16 +1000
Subject: [PATCH 02/11] allow for seifa summary

---
 R/seifa.R                    | 254 +++++++++++++++++++++--------------
 man/get_seifa.Rd             |   2 +-
 man/get_seifa_index_sheet.Rd |  11 +-
 tests/testthat/test-seifa.R  |  94 ++++++++-----
 4 files changed, 226 insertions(+), 135 deletions(-)

diff --git a/R/seifa.R b/R/seifa.R
index bcc8ebe..65cc300 100644
--- a/R/seifa.R
+++ b/R/seifa.R
@@ -1,5 +1,3 @@
-
-
 #' @title Import SEIFA Data from ABS
 #' @description The function will download all SEIFA data, for a specified spatial structure,
 #' to a temporary excel file and then merge sheets into a single `data.frame`. This `data.frame`
@@ -43,62 +41,75 @@
 #'
 #' @examples
 #' \dontrun{
-#'   get_seifa(structure = 'lga', data_subclass = 'irsed', year = 2016)
+#' get_seifa(structure = "lga", data_subclass = "irsed", year = 2016)
 #' }
 #'
-get_seifa <- function(structure = c('sa1','sa2','lga','postcode','suburb'),
-                      data_subclass = c('irsed', 'irsead', 'ier', 'ieo'),
+get_seifa <- function(structure = c("sa1", "sa2", "lga", "postcode", "suburb"),
+                      data_subclass = c("irsed", "irsead", "ier", "ieo"),
                       year = NULL) {
-
   # TODO: 2006 SEIFA has the Statistical Local Area (SLA) structure, not the
   # Statistical Level Areas (SA1, SA2) structures. Would need to update logic to
   # handle 2006.
-  release_years = c(2011, 2016, 2021)
+  release_years <- c(2011, 2016, 2021)
+
+  stopifnot(
+    "data_subclass must be either:
+            1. some combination of: 'irsed', 'irsead', 'ier', 'ieo'
+            2. just the value 'summary'" =
+      all(data_subclass %in% c("irsed", "irsead", "ier", "ieo")) |
+        (data_subclass == "summary" & length(data_subclass) == 1)
+  )
 
-  stopifnot(all(data_subclass %in% c('irsed', 'irsead', 'ier', 'ieo')))
 
   # match excel sheet names to data_subclass
-  sheet_names <- c('irsed'   = 'Table 2',
-                   'irsead'  = 'Table 3',
-                   'ier'     = 'Table 4',
-                   'ieo'     = 'Table 5')
+  sheet_names <- c(
+    "summary" = "Table 1",
+    "irsed" = "Table 2",
+    "irsead" = "Table 3",
+    "ier" = "Table 4",
+    "ieo" = "Table 5"
+  )
 
   sheet_names <- sheet_names[data_subclass]
 
-    # match spatial structures to specific urls
+  # match spatial structures to specific urls
   structure <- match.arg(structure, several.ok = FALSE)
 
-  urls <- list( '2011' = c( 'sa1' = 'https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20sa1%20indexes.xls&2033.0.55.001&Data%20Cubes&9828E2819C30D96DCA257B43000E923E&0&2011&05.04.2013&Latest',
-                            'sa2' = 'https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20SA2%20Indexes.xls&2033.0.55.001&Data%20Cubes&76D0BC44356DC34ACA257B3B001A4913&0&2011&12.11.2014&Latest',
-                            'lga' = 'https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20lga%20indexes.xls&2033.0.55.001&Data%20Cubes&28EF8569335AC7CDCA257BAB00136B0F&0&2011&18.07.2013&Latest',
-                            'postcode' = 'https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20POA%20Indexes.xls&2033.0.55.001&Data%20Cubes&209B3364525C82CCCA257B3B001A4D56&0&2011&12.11.2014&Latest',
-                            'suburb' = 'https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20ssc%20indexes.xls&2033.0.55.001&Data%20Cubes&F40D0630B245D5DCCA257B43000EA0F1&0&2011&05.04.2013&Latest'),
-
-                '2016' = c( 'sa1' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20sa1%20indexes.xls&2033.0.55.001&Data%20Cubes&40A0EFDE970A1511CA25825D000F8E8D&0&2016&27.03.2018&Latest',
-                            'sa2' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20sa2%20indexes.xls&2033.0.55.001&Data%20Cubes&C9F7AD36397CB43DCA25825D000F917C&0&2016&27.03.2018&Latest',
-                            'lga' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20lga%20indexes.xls&2033.0.55.001&Data%20Cubes&5604C75C214CD3D0CA25825D000F91AE&0&2016&27.03.2018&Latest',
-                            'postcode' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20poa%20indexes.xls&2033.0.55.001&Data%20Cubes&DC124D1DAC3D9FDDCA25825D000F9267&0&2016&27.03.2018&Latest',
-                            'suburb' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20ssc%20indexes.xls&2033.0.55.001&Data%20Cubes&863031D939DE8105CA25825D000F91D2&0&2016&27.03.2018&Latest'),
-
-                '2021' = c( 'sa1' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Statistical%20Area%20Level%201%2C%20Indexes%2C%20SEIFA%202021.xlsx',
-                            'sa2' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Statistical%20Area%20Level%202%2C%20Indexes%2C%20SEIFA%202021.xlsx',
-                            'lga' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Local%20Government%20Area%2C%20Indexes%2C%20SEIFA%202021.xlsx',
-                            'postcode' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Postal%20Area%2C%20Indexes%2C%20SEIFA%202021.xlsx',
-                            'suburb' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Suburbs%20and%20Localities%2C%20Indexes%2C%20SEIFA%202021.xlsx' )
-
-                )
-
-
-  if( is.null(year) ){
-    year = as.character(max(release_years))
-  }else{
-    if(! (is.numeric(year) | is.character(year) ) ){
-      stop('year must either be an integer or character string.')
+  urls <- list(
+    "2011" = c(
+      "sa1" = "https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20sa1%20indexes.xls&2033.0.55.001&Data%20Cubes&9828E2819C30D96DCA257B43000E923E&0&2011&05.04.2013&Latest",
+      "sa2" = "https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20SA2%20Indexes.xls&2033.0.55.001&Data%20Cubes&76D0BC44356DC34ACA257B3B001A4913&0&2011&12.11.2014&Latest",
+      "lga" = "https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20lga%20indexes.xls&2033.0.55.001&Data%20Cubes&28EF8569335AC7CDCA257BAB00136B0F&0&2011&18.07.2013&Latest",
+      "postcode" = "https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20POA%20Indexes.xls&2033.0.55.001&Data%20Cubes&209B3364525C82CCCA257B3B001A4D56&0&2011&12.11.2014&Latest",
+      "suburb" = "https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20ssc%20indexes.xls&2033.0.55.001&Data%20Cubes&F40D0630B245D5DCCA257B43000EA0F1&0&2011&05.04.2013&Latest"
+    ),
+    "2016" = c(
+      "sa1" = "https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20sa1%20indexes.xls&2033.0.55.001&Data%20Cubes&40A0EFDE970A1511CA25825D000F8E8D&0&2016&27.03.2018&Latest",
+      "sa2" = "https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20sa2%20indexes.xls&2033.0.55.001&Data%20Cubes&C9F7AD36397CB43DCA25825D000F917C&0&2016&27.03.2018&Latest",
+      "lga" = "https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20lga%20indexes.xls&2033.0.55.001&Data%20Cubes&5604C75C214CD3D0CA25825D000F91AE&0&2016&27.03.2018&Latest",
+      "postcode" = "https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20poa%20indexes.xls&2033.0.55.001&Data%20Cubes&DC124D1DAC3D9FDDCA25825D000F9267&0&2016&27.03.2018&Latest",
+      "suburb" = "https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20ssc%20indexes.xls&2033.0.55.001&Data%20Cubes&863031D939DE8105CA25825D000F91D2&0&2016&27.03.2018&Latest"
+    ),
+    "2021" = c(
+      "sa1" = "https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Statistical%20Area%20Level%201%2C%20Indexes%2C%20SEIFA%202021.xlsx",
+      "sa2" = "https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Statistical%20Area%20Level%202%2C%20Indexes%2C%20SEIFA%202021.xlsx",
+      "lga" = "https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Local%20Government%20Area%2C%20Indexes%2C%20SEIFA%202021.xlsx",
+      "postcode" = "https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Postal%20Area%2C%20Indexes%2C%20SEIFA%202021.xlsx",
+      "suburb" = "https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Suburbs%20and%20Localities%2C%20Indexes%2C%20SEIFA%202021.xlsx"
+    )
+  )
+
+
+  if (is.null(year)) {
+    year <- as.character(max(release_years))
+  } else {
+    if (!(is.numeric(year) | is.character(year))) {
+      stop("year must either be an integer or character string.")
     }
     year <- as.character(year)
 
-    if(! any(year %in% as.character(release_years))){
-      stop('year is not a valid release year, please check SEIFA webpage.')
+    if (!any(year %in% as.character(release_years))) {
+      stop("year is not a valid release year, please check SEIFA webpage.")
     }
   }
 
@@ -106,26 +117,28 @@ get_seifa <- function(structure = c('sa1','sa2','lga','postcode','suburb'),
 
   # Get file extension if possible, otherwise assume xls.
   url_ext <- tools::file_ext(sub("\\?.+", "", url))
-  if(url_ext == ""){url_ext <- 'xls'}
+  if (url_ext == "") {
+    url_ext <- "xls"
+  }
 
-  filename <- tempfile(fileext = paste0('.',url_ext) )
+  filename <- tempfile(fileext = paste0(".", url_ext))
 
   try({
-    download.file(url, destfile = filename, mode = 'wb')
-    message(paste0('ABS ', toupper(structure),' file downloaded to: \n'),
-            paste0('    ', filename),
-            appendLF = TRUE)
+    download.file(url, destfile = filename, mode = "wb")
+    message(paste0("ABS ", toupper(structure), " file downloaded to: \n"),
+      paste0("    ", filename),
+      appendLF = TRUE
+    )
   })
 
   if (file.exists(filename)) {
-    ind <- map(sheet_names, ~ get_seifa_index_sheet(filename, .x, structure, year), .id = 'seifa_index') %>%
-      list_rbind(names_to = 'data_subclass')
+    ind <- map(sheet_names, ~ get_seifa_index_sheet(filename, .x, structure, data_subclass, year), .id = "seifa_index") %>%
+      list_rbind(names_to = "data_subclass")
     return(ind)
   } else {
-    warning('Download of ABS file failed. Please check your internet connection and try again.')
+    warning("Download of ABS file failed. Please check your internet connection and try again.")
     return(NULL)
   }
-
 }
 
 
@@ -148,77 +161,114 @@ get_seifa <- function(structure = c('sa1','sa2','lga','postcode','suburb'),
 #' @examples
 #' \dontrun{
 #'
-#'   get_seifa_index_sheet('downloaded_filename.xls', sheetname = 'Table 2', structure = 'lga')
+#' get_seifa_index_sheet("downloaded_filename.xls", sheetname = "Table 2", structure = "lga")
 #' }
 #'
-get_seifa_index_sheet <- function(filename, sheetname, structure = c('sa1','sa2','lga','postcode','suburb'), year) {
-
+get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2", "lga", "postcode", "suburb"), data_subclass, year) {
   structure <- match.arg(structure, several.ok = FALSE)
 
-  column_names <- c('area_code',
-                    'area_name',
-                    'population',
-                    'score',
-                    'blank1',
-                    'rank_aus',
-                    'decile_aus',
-                    'percentile_aus',
-                    'blank2',
-                    'state',
-                    'rank_state',
-                    'decile_state',
-                    'percentile_state',
-                    'min_score_sa1_area',
-                    'max_score_sa1_area',
-                    'percent_usual_resident_pop_without_sa1_score')
+  column_names <- c(
+    "area_code",
+    "area_name",
+    "population",
+    "score",
+    "blank1",
+    "rank_aus",
+    "decile_aus",
+    "percentile_aus",
+    "blank2",
+    "state",
+    "rank_state",
+    "decile_state",
+    "percentile_state",
+    "min_score_sa1_area",
+    "max_score_sa1_area",
+    "percent_usual_resident_pop_without_sa1_score"
+  )
 
   # Add column for SEIFA releases >= 2016 with structures suburb or postcode.
-  if (structure %in% c('suburb','postcode') && year >= 2016 ) {
-    column_names <- c(column_names, 'caution_poor_sa1_representation')
+  if (structure %in% c("suburb", "postcode") && year >= 2016) {
+    column_names <- c(column_names, "caution_poor_sa1_representation")
   }
 
-  if (structure == 'postcode') {
-    column_names <- column_names[-grep('area_name', column_names)]
-    if(year >= 2016){
-      column_names <- c(column_names, 'postcode_crosses_state_boundary')
+  if (structure == "postcode") {
+    column_names <- column_names[-grep("area_name", column_names)]
+    if (year >= 2016) {
+      column_names <- c(column_names, "postcode_crosses_state_boundary")
     }
   }
 
-  if (structure == 'sa1') {
-    column_names <- c('sa1_7_code',
-                      'sa1_11_code',
-                      'population',
-                      'score',
-                      'blank1',
-                      'rank_aus',
-                      'decile_aus',
-                      'percentile_aus',
-                      'blank2',
-                      'state',
-                      'rank_state',
-                      'decile_state',
-                      'percentile_state')
+  if (structure == "sa1") {
+    column_names <- c(
+      "sa1_7_code",
+      "sa1_11_code",
+      "population",
+      "score",
+      "blank1",
+      "rank_aus",
+      "decile_aus",
+      "percentile_aus",
+      "blank2",
+      "state",
+      "rank_state",
+      "decile_state",
+      "percentile_state"
+    )
 
     # remove sa1_11_code column for 2011 release.
-    if( year == 2011) {
-      column_names <- column_names[-grep('sa1_11_code', column_names)]
-    }else if( year == 2021) {
-      column_names <- column_names[-grep('sa1_7_code', column_names)]
+    if (year == 2011) {
+      column_names <- column_names[-grep("sa1_11_code", column_names)]
+    } else if (year == 2021) {
+      column_names <- column_names[-grep("sa1_7_code", column_names)]
+    }
+  }
+
+  if (length(data_subclass) == 1) {
+    if (data_subclass == "summary") {
+      if (year != 2011) {
+        column_names <- c(
+          "area_code",
+          "area_name",
+          "irsed_score",
+          "irsed_decile",
+          "irsead_score",
+          "irsead_decile",
+          "ier_score",
+          "ier_decile",
+          "ieo_score",
+          "ieo_decile",
+          "population"
+        )
+      } else if (year == 2011) {
+        column_names <- c(
+          "area_code",
+          "irsed_score",
+          "irsed_decile",
+          "irsead_score",
+          "irsead_decile",
+          "ier_score",
+          "ier_decile",
+          "ieo_score",
+          "ieo_decile",
+          "population"
+        )
+      }
     }
   }
 
   suppressWarnings({
     df <- read_excel(filename,
-                     sheetname,
-                     skip = 6,
-                     col_names = column_names,
-                     na = c("", "NA") ) %>%
-      dplyr::filter(across(ends_with('_code'), ~ !is.na(.x))) %>%
-      select(-starts_with('blank')) %>%
-      mutate(structure = structure) %>%
+      sheetname,
+      skip = 6,
+      col_names = column_names,
+      na = c("", "NA")
+    ) %>%
+      dplyr::filter(across(ends_with("_code"), ~ !is.na(.x))) %>%
+      select(-starts_with("blank")) %>%
+      mutate(structure = structure,
+             year = year) %>%
       relocate(structure)
   })
 
   return(df)
-
 }
diff --git a/man/get_seifa.Rd b/man/get_seifa.Rd
index ca2c2f7..7a6b679 100644
--- a/man/get_seifa.Rd
+++ b/man/get_seifa.Rd
@@ -53,7 +53,7 @@ For All ABS SEIFA spreadsheets go to \href{https://www.abs.gov.au/AUSSTATS/abs@.
 }
 \examples{
 \dontrun{
-  get_seifa(structure = 'lga', data_subclass = 'irsed', year = 2016)
+get_seifa(structure = "lga", data_subclass = "irsed", year = 2016)
 }
 
 }
diff --git a/man/get_seifa_index_sheet.Rd b/man/get_seifa_index_sheet.Rd
index b15f9f6..afcf52f 100644
--- a/man/get_seifa_index_sheet.Rd
+++ b/man/get_seifa_index_sheet.Rd
@@ -8,6 +8,7 @@ get_seifa_index_sheet(
   filename,
   sheetname,
   structure = c("sa1", "sa2", "lga", "postcode", "suburb"),
+  data_subclass,
   year
 )
 }
@@ -19,6 +20,14 @@ get_seifa_index_sheet(
 \item{structure}{character spatial structure of the data to be parsed. The spatial structure is
 important as the shape of the data in the ABS spreadsheets if different for some structures.}
 
+\item{data_subclass}{character vector matching available SEIFA indexes:
+\itemize{
+  \item{irsed}{ - Index of Relative Socio-economic Disadvantage}
+  \item{irsead}{ - Index of Relative Socio-economic Advantage and Disadvantage}
+  \item{ier}{ - Index of Economic Resources}
+  \item{ieo}{ - Index of Education and Occupation}
+}}
+
 \item{year}{a character string or numeric of the release year of SEIFA object, eg "2016"; 2011.}
 }
 \value{
@@ -32,7 +41,7 @@ from \url{https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/2033.0.55.0012016
 \examples{
 \dontrun{
 
-  get_seifa_index_sheet('downloaded_filename.xls', sheetname = 'Table 2', structure = 'lga')
+get_seifa_index_sheet("downloaded_filename.xls", sheetname = "Table 2", structure = "lga")
 }
 
 }
diff --git a/tests/testthat/test-seifa.R b/tests/testthat/test-seifa.R
index c8b4268..05b174b 100644
--- a/tests/testthat/test-seifa.R
+++ b/tests/testthat/test-seifa.R
@@ -1,5 +1,6 @@
 # Define columns for each spreadsheet.
-column_names <- list( '2011' = list( 'sa1' = c('structure',
+column_names <- list( '2011' = list( 'sa1' = c('data_subclass',
+                                               'structure',
                                                'sa1_7_code',
                                                'population',
                                                'score',
@@ -9,8 +10,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                'state',
                                                'rank_state',
                                                'decile_state',
-                                               'percentile_state'),
-                                     'sa2' =c('structure',
+                                               'percentile_state',
+                                               'year'),
+                                     'sa2' =c('data_subclass',
+                                              'structure',
                                               'area_code',
                                               'area_name',
                                               'population',
@@ -24,8 +27,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                               'percentile_state',
                                               'min_score_sa1_area',
                                               'max_score_sa1_area',
-                                              'percent_usual_resident_pop_without_sa1_score'),
-                                     'lga' = c('structure',
+                                              'percent_usual_resident_pop_without_sa1_score',
+                                              'year'),
+                                     'lga' = c('data_subclass',
+                                               'structure',
                                                'area_code',
                                                'area_name',
                                                'population',
@@ -39,8 +44,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                'percentile_state',
                                                'min_score_sa1_area',
                                                'max_score_sa1_area',
-                                               'percent_usual_resident_pop_without_sa1_score'),
-                                     'postcode' = c('structure',
+                                               'percent_usual_resident_pop_without_sa1_score',
+                                               'year'),
+                                     'postcode' = c('data_subclass',
+                                                    'structure',
                                                     'area_code',
                                                     'population',
                                                     'score',
@@ -53,8 +60,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                     'percentile_state',
                                                     'min_score_sa1_area',
                                                     'max_score_sa1_area',
-                                                    'percent_usual_resident_pop_without_sa1_score'),
-                                     'suburb' = c('structure',
+                                                    'percent_usual_resident_pop_without_sa1_score',
+                                                    'year'),
+                                     'suburb' = c('data_subclass',
+                                                  'structure',
                                                   'area_code',
                                                   'area_name',
                                                   'population',
@@ -68,9 +77,11 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                   'percentile_state',
                                                   'min_score_sa1_area',
                                                   'max_score_sa1_area',
-                                                  'percent_usual_resident_pop_without_sa1_score')
+                                                  'percent_usual_resident_pop_without_sa1_score',
+                                                  'year')
                                      ),
-                      '2016' = list( 'sa1' = c('structure',
+                      '2016' = list( 'sa1' = c('data_subclass',
+                                               'structure',
                                                'sa1_7_code',
                                                'sa1_11_code',
                                                'population',
@@ -81,8 +92,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                'state',
                                                'rank_state',
                                                'decile_state',
-                                               'percentile_state'),
-                                     'sa2' =c('structure',
+                                               'percentile_state',
+                                               'year'),
+                                     'sa2' =c('data_subclass',
+                                              'structure',
                                               'area_code',
                                               'area_name',
                                               'population',
@@ -96,8 +109,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                               'percentile_state',
                                               'min_score_sa1_area',
                                               'max_score_sa1_area',
-                                              'percent_usual_resident_pop_without_sa1_score'),
-                                     'lga' = c('structure',
+                                              'percent_usual_resident_pop_without_sa1_score',
+                                              'year'),
+                                     'lga' = c('data_subclass',
+                                               'structure',
                                                'area_code',
                                                'area_name',
                                                'population',
@@ -111,8 +126,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                'percentile_state',
                                                'min_score_sa1_area',
                                                'max_score_sa1_area',
-                                               'percent_usual_resident_pop_without_sa1_score'),
-                                     'postcode' = c('structure',
+                                               'percent_usual_resident_pop_without_sa1_score',
+                                               'year'),
+                                     'postcode' = c('data_subclass',
+                                                    'structure',
                                                     'area_code',
                                                     'population',
                                                     'score',
@@ -127,8 +144,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                     'max_score_sa1_area',
                                                     'percent_usual_resident_pop_without_sa1_score',
                                                     'caution_poor_sa1_representation',
-                                                    'postcode_crosses_state_boundary'),
-                                     'suburb' = c('structure',
+                                                    'postcode_crosses_state_boundary',
+                                                    'year'),
+                                     'suburb' = c('data_subclass',
+                                                  'structure',
                                                   'area_code',
                                                   'area_name',
                                                   'population',
@@ -143,9 +162,11 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                   'min_score_sa1_area',
                                                   'max_score_sa1_area',
                                                   'percent_usual_resident_pop_without_sa1_score',
-                                                  'caution_poor_sa1_representation')
+                                                  'caution_poor_sa1_representation',
+                                                  'year')
                       ),
-                      '2021' = list( 'sa1' = c('structure',
+                      '2021' = list( 'sa1' = c('data_subclass',
+                                               'structure',
                                                'sa1_11_code',
                                                'population',
                                                'score',
@@ -155,8 +176,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                'state',
                                                'rank_state',
                                                'decile_state',
-                                               'percentile_state'),
-                                     'sa2' =c('structure',
+                                               'percentile_state',
+                                               'year'),
+                                     'sa2' =c('data_subclass',
+                                              'structure',
                                               'area_code',
                                               'area_name',
                                               'population',
@@ -170,8 +193,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                               'percentile_state',
                                               'min_score_sa1_area',
                                               'max_score_sa1_area',
-                                              'percent_usual_resident_pop_without_sa1_score'),
-                                     'lga' = c('structure',
+                                              'percent_usual_resident_pop_without_sa1_score',
+                                              'year'),
+                                     'lga' = c('data_subclass',
+                                               'structure',
                                                'area_code',
                                                'area_name',
                                                'population',
@@ -185,8 +210,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                'percentile_state',
                                                'min_score_sa1_area',
                                                'max_score_sa1_area',
-                                               'percent_usual_resident_pop_without_sa1_score'),
-                                     'postcode' = c('structure',
+                                               'percent_usual_resident_pop_without_sa1_score',
+                                               'year'),
+                                     'postcode' = c('data_subclass',
+                                                    'structure',
                                                     'area_code',
                                                     'population',
                                                     'score',
@@ -201,8 +228,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                     'max_score_sa1_area',
                                                     'percent_usual_resident_pop_without_sa1_score',
                                                     'caution_poor_sa1_representation',
-                                                    'postcode_crosses_state_boundary'),
-                                     'suburb' = c('structure',
+                                                    'postcode_crosses_state_boundary',
+                                                    'year'),
+                                     'suburb' = c('data_subclass',
+                                                  'structure',
                                                   'area_code',
                                                   'area_name',
                                                   'population',
@@ -217,7 +246,8 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                   'min_score_sa1_area',
                                                   'max_score_sa1_area',
                                                   'percent_usual_resident_pop_without_sa1_score',
-                                                  'caution_poor_sa1_representation')
+                                                  'caution_poor_sa1_representation',
+                                                  'year')
                       )
                     )
 
@@ -297,9 +327,11 @@ test_that('sa1 spreadsheet can be parsed for 2016 release', {
                                             mustWork = TRUE),
                                 'Table 2',
                                 'sa1',
+                                'irsed',
                                 year = '2016')
 
     expect_is(df, 'data.frame')
-    expect_equal(colnames(df), column_names[['2016']][['sa1']])
+    ### data_subclass is added in the next step
+    expect_equal(colnames(df), column_names[['2016']][['sa1']][2:length(column_names[['2016']][['sa1']])] )
   }
 )

From 4327c257af23ff11541ac37042d0492a9ea7babd Mon Sep 17 00:00:00 2001
From: Peter Owen <pete.owen1@hotmail.co.uk>
Date: Wed, 16 Aug 2023 15:57:31 +1000
Subject: [PATCH 03/11] consider '-' as missing value in spreadsheets

---
 R/seifa.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/seifa.R b/R/seifa.R
index 65cc300..086edd4 100644
--- a/R/seifa.R
+++ b/R/seifa.R
@@ -261,7 +261,7 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
       sheetname,
       skip = 6,
       col_names = column_names,
-      na = c("", "NA")
+      na = c("", "NA",'-')
     ) %>%
       dplyr::filter(across(ends_with("_code"), ~ !is.na(.x))) %>%
       select(-starts_with("blank")) %>%

From 4ec81441d422b9722dec7d7aa6085d5bd6bf23fb Mon Sep 17 00:00:00 2001
From: Peter Owen <pete.owen1@hotmail.co.uk>
Date: Wed, 16 Aug 2023 18:13:00 +1000
Subject: [PATCH 04/11] add cases for sa1s column changes

---
 R/seifa.R | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/R/seifa.R b/R/seifa.R
index 086edd4..5be6b05 100644
--- a/R/seifa.R
+++ b/R/seifa.R
@@ -225,10 +225,9 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
 
   if (length(data_subclass) == 1) {
     if (data_subclass == "summary") {
-      if (year != 2011) {
+      if (year == 2011 | (year == 2021 & structure == "sa1" )) {
         column_names <- c(
           "area_code",
-          "area_name",
           "irsed_score",
           "irsed_decile",
           "irsead_score",
@@ -239,9 +238,11 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
           "ieo_decile",
           "population"
         )
-      } else if (year == 2011) {
+      }
+      else {
         column_names <- c(
           "area_code",
+          "area_name",
           "irsed_score",
           "irsed_decile",
           "irsead_score",
@@ -256,6 +257,8 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
     }
   }
 
+
+
   suppressWarnings({
     df <- read_excel(filename,
       sheetname,

From a62e35ea5ca7d82da41de7f0f65a9e4f97052099 Mon Sep 17 00:00:00 2001
From: Peter Owen <pete.owen1@hotmail.co.uk>
Date: Thu, 17 Aug 2023 13:01:46 +1000
Subject: [PATCH 05/11] update columns for seifa summary tables

---
 R/seifa.R | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/R/seifa.R b/R/seifa.R
index 5be6b05..c6cc5c0 100644
--- a/R/seifa.R
+++ b/R/seifa.R
@@ -225,7 +225,9 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
 
   if (length(data_subclass) == 1) {
     if (data_subclass == "summary") {
-      if (year == 2011 | (year == 2021 & structure == "sa1" )) {
+      if ((year == 2011) |
+          (year == 2021 & structure == "sa1" )
+          ){
         column_names <- c(
           "area_code",
           "irsed_score",
@@ -239,6 +241,38 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
           "population"
         )
       }
+      else if(year == 2016 & structure == 'suburb'){
+        column_names <- c(
+          "area_code",
+          "area_name",
+          "irsed_score",
+          "irsed_decile",
+          "irsead_score",
+          "irsead_decile",
+          "ier_score",
+          "ier_decile",
+          "ieo_score",
+          "ieo_decile",
+          "population",
+          'data_warning'
+        )
+      }
+      else if (year %in% c(2016,2021) & structure == 'postcode') {
+        column_names <- c(
+          "area_code",
+          "irsed_score",
+          "irsed_decile",
+          "irsead_score",
+          "irsead_decile",
+          "ier_score",
+          "ier_decile",
+          "ieo_score",
+          "ieo_decile",
+          "population",
+          'data_warning',
+          'postcode_crosses_state_boundaries'
+        )
+      }
       else {
         column_names <- c(
           "area_code",

From 79c8bd70414bd292073878ba09b2a9c358ca1253 Mon Sep 17 00:00:00 2001
From: Peter Owen <pete.owen1@hotmail.co.uk>
Date: Thu, 17 Aug 2023 13:59:00 +1000
Subject: [PATCH 06/11] tidy style

---
 R/seifa.R | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/R/seifa.R b/R/seifa.R
index c6cc5c0..9499bae 100644
--- a/R/seifa.R
+++ b/R/seifa.R
@@ -226,8 +226,8 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
   if (length(data_subclass) == 1) {
     if (data_subclass == "summary") {
       if ((year == 2011) |
-          (year == 2021 & structure == "sa1" )
-          ){
+        (year == 2021 & structure == "sa1")
+      ) {
         column_names <- c(
           "area_code",
           "irsed_score",
@@ -240,8 +240,7 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
           "ieo_decile",
           "population"
         )
-      }
-      else if(year == 2016 & structure == 'suburb'){
+      } else if (year == 2016 & structure == "suburb") {
         column_names <- c(
           "area_code",
           "area_name",
@@ -254,10 +253,9 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
           "ieo_score",
           "ieo_decile",
           "population",
-          'data_warning'
+          "data_warning"
         )
-      }
-      else if (year %in% c(2016,2021) & structure == 'postcode') {
+      } else if (year %in% c(2016, 2021) & structure == "postcode") {
         column_names <- c(
           "area_code",
           "irsed_score",
@@ -269,11 +267,10 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
           "ieo_score",
           "ieo_decile",
           "population",
-          'data_warning',
-          'postcode_crosses_state_boundaries'
+          "data_warning",
+          "postcode_crosses_state_boundaries"
         )
-      }
-      else {
+      } else {
         column_names <- c(
           "area_code",
           "area_name",
@@ -298,12 +295,14 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
       sheetname,
       skip = 6,
       col_names = column_names,
-      na = c("", "NA",'-')
+      na = c("", "NA", "-")
     ) %>%
       dplyr::filter(across(ends_with("_code"), ~ !is.na(.x))) %>%
       select(-starts_with("blank")) %>%
-      mutate(structure = structure,
-             year = year) %>%
+      mutate(
+        structure = structure,
+        year = year
+      ) %>%
       relocate(structure)
   })
 

From 5366453457ef9290a01c8fca68242393c8236d4c Mon Sep 17 00:00:00 2001
From: Peter Owen <pete.owen1@hotmail.co.uk>
Date: Thu, 17 Aug 2023 15:03:54 +1000
Subject: [PATCH 07/11] update pkgdown

---
 pkgdown/_pkgdown.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml
index c29a864..48b0d6d 100644
--- a/pkgdown/_pkgdown.yml
+++ b/pkgdown/_pkgdown.yml
@@ -27,6 +27,7 @@ reference:
       - starts_with("asced")
       - starts_with("asc")
       - auholidays
+      - school_terms
   - title: "Importing ABS Data"
     desc: >
       Functions for retrieving ABS data
@@ -34,6 +35,7 @@ reference:
       - read_absmap
       - get_seifa
       - get_seifa_index_sheet
+      - read_correspondence_tbl
   - title: "Helper functions"
     desc: >
       Functions for cleaning data and working with datasets

From 52fd632518fde3800c4f6670f994c44463bdcbc6 Mon Sep 17 00:00:00 2001
From: peteowen1 <pete.owen1@hotmail.co.uk>
Date: Wed, 7 Feb 2024 11:03:15 +1100
Subject: [PATCH 08/11] make area_code a character variable

---
 R/seifa.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/seifa.R b/R/seifa.R
index 9499bae..2003062 100644
--- a/R/seifa.R
+++ b/R/seifa.R
@@ -300,6 +300,7 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
       dplyr::filter(across(ends_with("_code"), ~ !is.na(.x))) %>%
       select(-starts_with("blank")) %>%
       mutate(
+        area_code = as.character(area_code),
         structure = structure,
         year = year
       ) %>%

From 99f3587e531d10f691559aa1ee4cf8cf21053361 Mon Sep 17 00:00:00 2001
From: peteowen1 <pete.owen1@hotmail.co.uk>
Date: Wed, 7 Feb 2024 11:24:23 +1100
Subject: [PATCH 09/11] conditional area_code update

only mutate area_code if the column exists
also change filtering from across to if_any as dplyr guidlines
---
 R/seifa.R | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/R/seifa.R b/R/seifa.R
index 2003062..0aed079 100644
--- a/R/seifa.R
+++ b/R/seifa.R
@@ -297,15 +297,18 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
       col_names = column_names,
       na = c("", "NA", "-")
     ) %>%
-      dplyr::filter(across(ends_with("_code"), ~ !is.na(.x))) %>%
+      dplyr::filter(if_any(ends_with("_code"), ~ !is.na(.x))) %>%
       select(-starts_with("blank")) %>%
       mutate(
-        area_code = as.character(area_code),
         structure = structure,
         year = year
       ) %>%
+      mutate(across(
+        .cols = any_of("area_code"),  # Specify the column name
+        .fns = ~ as.character(.)  # Conditionally convert to character
+      )) %>%
       relocate(structure)
   })
-
+  
   return(df)
 }

From 9f28f44426a76e889fe1c589f5480cf5ab0504a2 Mon Sep 17 00:00:00 2001
From: peteowen1 <pete.owen1@hotmail.co.uk>
Date: Wed, 7 Feb 2024 14:21:47 +1100
Subject: [PATCH 10/11] change to ends_with _code

might as well make all area codes change to character for consistency (now including the sa1_7 and sa1_11 columns)
also filter out if code and/or name is null, not just code
---
 R/seifa.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/seifa.R b/R/seifa.R
index 0aed079..86bf780 100644
--- a/R/seifa.R
+++ b/R/seifa.R
@@ -297,14 +297,14 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
       col_names = column_names,
       na = c("", "NA", "-")
     ) %>%
-      dplyr::filter(if_any(ends_with("_code"), ~ !is.na(.x))) %>%
+      dplyr::filter(if_any(ends_with(c("_name","_code")), ~ !is.na(.x))) %>%
       select(-starts_with("blank")) %>%
       mutate(
         structure = structure,
         year = year
       ) %>%
       mutate(across(
-        .cols = any_of("area_code"),  # Specify the column name
+        .cols = any_of(ends_with("_code")),  # Specify the column name
         .fns = ~ as.character(.)  # Conditionally convert to character
       )) %>%
       relocate(structure)

From 1bdf270f45ddf57be586a5382dff321bd618fdee Mon Sep 17 00:00:00 2001
From: peteowen1 <pete.owen1@hotmail.co.uk>
Date: Wed, 7 Feb 2024 14:24:37 +1100
Subject: [PATCH 11/11] needs if_all not if_any
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

filter works best to remove row unless it has a code and a name (this helps remove the © Commonwealth of Australia 2023 row)
---
 R/seifa.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/seifa.R b/R/seifa.R
index 86bf780..be6b3e9 100644
--- a/R/seifa.R
+++ b/R/seifa.R
@@ -297,7 +297,7 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2
       col_names = column_names,
       na = c("", "NA", "-")
     ) %>%
-      dplyr::filter(if_any(ends_with(c("_name","_code")), ~ !is.na(.x))) %>%
+      dplyr::filter(if_all(ends_with(c("_name","_code")), ~ !is.na(.x))) %>%
       select(-starts_with("blank")) %>%
       mutate(
         structure = structure,