diff --git a/R/seifa.R b/R/seifa.R
index 9013760..be6b3e9 100644
--- a/R/seifa.R
+++ b/R/seifa.R
@@ -1,5 +1,3 @@
-
-
 #' @title Import SEIFA Data from ABS
 #' @description The function will download all SEIFA data, for a specified spatial structure,
 #' to a temporary excel file and then merge sheets into a single `data.frame`. This `data.frame`
@@ -43,62 +41,75 @@
 #'
 #' @examples
 #' \dontrun{
-#'   get_seifa(structure = 'lga', data_subclass = 'irsed', year = 2016)
+#' get_seifa(structure = "lga", data_subclass = "irsed", year = 2016)
 #' }
 #'
-get_seifa <- function(structure = c('sa1','sa2','lga','postcode','suburb'),
-                      data_subclass = c('irsed', 'irsead', 'ier', 'ieo'),
+get_seifa <- function(structure = c("sa1", "sa2", "lga", "postcode", "suburb"),
+                      data_subclass = c("irsed", "irsead", "ier", "ieo"),
                       year = NULL) {
-
   # TODO: 2006 SEIFA has the Statistical Local Area (SLA) structure, not the
   # Statistical Level Areas (SA1, SA2) structures. Would need to update logic to
   # handle 2006.
-  release_years = c(2011, 2016, 2021)
+  release_years <- c(2011, 2016, 2021)
+
+  stopifnot(
+    "data_subclass must be either:
+            1. some combination of: 'irsed', 'irsead', 'ier', 'ieo'
+            2. just the value 'summary'" =
+      all(data_subclass %in% c("irsed", "irsead", "ier", "ieo")) |
+        (data_subclass == "summary" & length(data_subclass) == 1)
+  )
 
-  stopifnot(all(data_subclass %in% c('irsed', 'irsead', 'ier', 'ieo')))
 
   # match excel sheet names to data_subclass
-  sheet_names <- c('irsed'   = 'Table 2',
-                   'irsead'  = 'Table 3',
-                   'ier'     = 'Table 4',
-                   'ieo'     = 'Table 5')
+  sheet_names <- c(
+    "summary" = "Table 1",
+    "irsed" = "Table 2",
+    "irsead" = "Table 3",
+    "ier" = "Table 4",
+    "ieo" = "Table 5"
+  )
 
   sheet_names <- sheet_names[data_subclass]
 
-    # match spatial structures to specific urls
+  # match spatial structures to specific urls
   structure <- match.arg(structure, several.ok = FALSE)
 
-  urls <- list( '2011' = c( 'sa1' = 'https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20sa1%20indexes.xls&2033.0.55.001&Data%20Cubes&9828E2819C30D96DCA257B43000E923E&0&2011&05.04.2013&Latest',
-                            'sa2' = 'https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20SA2%20Indexes.xls&2033.0.55.001&Data%20Cubes&76D0BC44356DC34ACA257B3B001A4913&0&2011&12.11.2014&Latest',
-                            'lga' = 'https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20lga%20indexes.xls&2033.0.55.001&Data%20Cubes&28EF8569335AC7CDCA257BAB00136B0F&0&2011&18.07.2013&Latest',
-                            'postcode' = 'https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20POA%20Indexes.xls&2033.0.55.001&Data%20Cubes&209B3364525C82CCCA257B3B001A4D56&0&2011&12.11.2014&Latest',
-                            'suburb' = 'https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20ssc%20indexes.xls&2033.0.55.001&Data%20Cubes&F40D0630B245D5DCCA257B43000EA0F1&0&2011&05.04.2013&Latest'),
-
-                '2016' = c( 'sa1' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20sa1%20indexes.xls&2033.0.55.001&Data%20Cubes&40A0EFDE970A1511CA25825D000F8E8D&0&2016&27.03.2018&Latest',
-                            'sa2' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20sa2%20indexes.xls&2033.0.55.001&Data%20Cubes&C9F7AD36397CB43DCA25825D000F917C&0&2016&27.03.2018&Latest',
-                            'lga' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20lga%20indexes.xls&2033.0.55.001&Data%20Cubes&5604C75C214CD3D0CA25825D000F91AE&0&2016&27.03.2018&Latest',
-                            'postcode' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20poa%20indexes.xls&2033.0.55.001&Data%20Cubes&DC124D1DAC3D9FDDCA25825D000F9267&0&2016&27.03.2018&Latest',
-                            'suburb' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20ssc%20indexes.xls&2033.0.55.001&Data%20Cubes&863031D939DE8105CA25825D000F91D2&0&2016&27.03.2018&Latest'),
-
-                '2021' = c( 'sa1' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Statistical%20Area%20Level%201%2C%20Indexes%2C%20SEIFA%202021.xlsx',
-                            'sa2' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Statistical%20Area%20Level%202%2C%20Indexes%2C%20SEIFA%202021.xlsx',
-                            'lga' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Local%20Government%20Area%2C%20Indexes%2C%20SEIFA%202021.xlsx',
-                            'postcode' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Postal%20Area%2C%20Indexes%2C%20SEIFA%202021.xlsx',
-                            'suburb' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Suburbs%20and%20Localities%2C%20Indexes%2C%20SEIFA%202021.xlsx' )
-
-                )
-
-
-  if( is.null(year) ){
-    year = as.character(max(release_years))
-  }else{
-    if(! (is.numeric(year) | is.character(year) ) ){
-      stop('year must either be an integer or character string.')
+  urls <- list(
+    "2011" = c(
+      "sa1" = "https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20sa1%20indexes.xls&2033.0.55.001&Data%20Cubes&9828E2819C30D96DCA257B43000E923E&0&2011&05.04.2013&Latest",
+      "sa2" = "https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20SA2%20Indexes.xls&2033.0.55.001&Data%20Cubes&76D0BC44356DC34ACA257B3B001A4913&0&2011&12.11.2014&Latest",
+      "lga" = "https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20lga%20indexes.xls&2033.0.55.001&Data%20Cubes&28EF8569335AC7CDCA257BAB00136B0F&0&2011&18.07.2013&Latest",
+      "postcode" = "https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20POA%20Indexes.xls&2033.0.55.001&Data%20Cubes&209B3364525C82CCCA257B3B001A4D56&0&2011&12.11.2014&Latest",
+      "suburb" = "https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&2033.0.55.001%20ssc%20indexes.xls&2033.0.55.001&Data%20Cubes&F40D0630B245D5DCCA257B43000EA0F1&0&2011&05.04.2013&Latest"
+    ),
+    "2016" = c(
+      "sa1" = "https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20sa1%20indexes.xls&2033.0.55.001&Data%20Cubes&40A0EFDE970A1511CA25825D000F8E8D&0&2016&27.03.2018&Latest",
+      "sa2" = "https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20sa2%20indexes.xls&2033.0.55.001&Data%20Cubes&C9F7AD36397CB43DCA25825D000F917C&0&2016&27.03.2018&Latest",
+      "lga" = "https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20lga%20indexes.xls&2033.0.55.001&Data%20Cubes&5604C75C214CD3D0CA25825D000F91AE&0&2016&27.03.2018&Latest",
+      "postcode" = "https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20poa%20indexes.xls&2033.0.55.001&Data%20Cubes&DC124D1DAC3D9FDDCA25825D000F9267&0&2016&27.03.2018&Latest",
+      "suburb" = "https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20ssc%20indexes.xls&2033.0.55.001&Data%20Cubes&863031D939DE8105CA25825D000F91D2&0&2016&27.03.2018&Latest"
+    ),
+    "2021" = c(
+      "sa1" = "https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Statistical%20Area%20Level%201%2C%20Indexes%2C%20SEIFA%202021.xlsx",
+      "sa2" = "https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Statistical%20Area%20Level%202%2C%20Indexes%2C%20SEIFA%202021.xlsx",
+      "lga" = "https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Local%20Government%20Area%2C%20Indexes%2C%20SEIFA%202021.xlsx",
+      "postcode" = "https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Postal%20Area%2C%20Indexes%2C%20SEIFA%202021.xlsx",
+      "suburb" = "https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Suburbs%20and%20Localities%2C%20Indexes%2C%20SEIFA%202021.xlsx"
+    )
+  )
+
+
+  if (is.null(year)) {
+    year <- as.character(max(release_years))
+  } else {
+    if (!(is.numeric(year) | is.character(year))) {
+      stop("year must either be an integer or character string.")
     }
     year <- as.character(year)
 
-    if(! any(year %in% as.character(release_years))){
-      stop('year is not a valid release year, please check SEIFA webpage.')
+    if (!any(year %in% as.character(release_years))) {
+      stop("year is not a valid release year, please check SEIFA webpage.")
     }
   }
 
@@ -106,26 +117,28 @@ get_seifa <- function(structure = c('sa1','sa2','lga','postcode','suburb'),
 
   # Get file extension if possible, otherwise assume xls.
   url_ext <- tools::file_ext(sub("\\?.+", "", url))
-  if(url_ext == ""){url_ext <- 'xls'}
+  if (url_ext == "") {
+    url_ext <- "xls"
+  }
 
-  filename <- tempfile(fileext = paste0('.',url_ext) )
+  filename <- tempfile(fileext = paste0(".", url_ext))
 
   try({
-    download.file(url, destfile = filename, mode = 'wb')
-    message(paste0('ABS ', toupper(structure),' file downloaded to: \n'),
-            paste0('    ', filename),
-            appendLF = TRUE)
+    download.file(url, destfile = filename, mode = "wb")
+    message(paste0("ABS ", toupper(structure), " file downloaded to: \n"),
+      paste0("    ", filename),
+      appendLF = TRUE
+    )
   })
 
   if (file.exists(filename)) {
-    ind <- map(sheet_names, ~ get_seifa_index_sheet(filename, .x, structure, year), .id = 'seifa_index') %>%
-      list_rbind()
+    ind <- map(sheet_names, ~ get_seifa_index_sheet(filename, .x, structure, data_subclass, year), .id = "seifa_index") %>%
+      list_rbind(names_to = "data_subclass")
     return(ind)
   } else {
-    warning('Download of ABS file failed. Please check your internet connection and try again.')
+    warning("Download of ABS file failed. Please check your internet connection and try again.")
     return(NULL)
   }
-
 }
 
 
@@ -148,77 +161,154 @@ get_seifa <- function(structure = c('sa1','sa2','lga','postcode','suburb'),
 #' @examples
 #' \dontrun{
 #'
-#'   get_seifa_index_sheet('downloaded_filename.xls', sheetname = 'Table 2', structure = 'lga')
+#' get_seifa_index_sheet("downloaded_filename.xls", sheetname = "Table 2", structure = "lga")
 #' }
 #'
-get_seifa_index_sheet <- function(filename, sheetname, structure = c('sa1','sa2','lga','postcode','suburb'), year) {
-
+get_seifa_index_sheet <- function(filename, sheetname, structure = c("sa1", "sa2", "lga", "postcode", "suburb"), data_subclass, year) {
   structure <- match.arg(structure, several.ok = FALSE)
 
-  column_names <- c('area_code',
-                    'area_name',
-                    'population',
-                    'score',
-                    'blank1',
-                    'rank_aus',
-                    'decile_aus',
-                    'percentile_aus',
-                    'blank2',
-                    'state',
-                    'rank_state',
-                    'decile_state',
-                    'percentile_state',
-                    'min_score_sa1_area',
-                    'max_score_sa1_area',
-                    'percent_usual_resident_pop_without_sa1_score')
+  column_names <- c(
+    "area_code",
+    "area_name",
+    "population",
+    "score",
+    "blank1",
+    "rank_aus",
+    "decile_aus",
+    "percentile_aus",
+    "blank2",
+    "state",
+    "rank_state",
+    "decile_state",
+    "percentile_state",
+    "min_score_sa1_area",
+    "max_score_sa1_area",
+    "percent_usual_resident_pop_without_sa1_score"
+  )
 
   # Add column for SEIFA releases >= 2016 with structures suburb or postcode.
-  if (structure %in% c('suburb','postcode') && year >= 2016 ) {
-    column_names <- c(column_names, 'caution_poor_sa1_representation')
+  if (structure %in% c("suburb", "postcode") && year >= 2016) {
+    column_names <- c(column_names, "caution_poor_sa1_representation")
   }
 
-  if (structure == 'postcode') {
-    column_names <- column_names[-grep('area_name', column_names)]
-    if(year >= 2016){
-      column_names <- c(column_names, 'postcode_crosses_state_boundary')
+  if (structure == "postcode") {
+    column_names <- column_names[-grep("area_name", column_names)]
+    if (year >= 2016) {
+      column_names <- c(column_names, "postcode_crosses_state_boundary")
     }
   }
 
-  if (structure == 'sa1') {
-    column_names <- c('sa1_7_code',
-                      'sa1_11_code',
-                      'population',
-                      'score',
-                      'blank1',
-                      'rank_aus',
-                      'decile_aus',
-                      'percentile_aus',
-                      'blank2',
-                      'state',
-                      'rank_state',
-                      'decile_state',
-                      'percentile_state')
+  if (structure == "sa1") {
+    column_names <- c(
+      "sa1_7_code",
+      "sa1_11_code",
+      "population",
+      "score",
+      "blank1",
+      "rank_aus",
+      "decile_aus",
+      "percentile_aus",
+      "blank2",
+      "state",
+      "rank_state",
+      "decile_state",
+      "percentile_state"
+    )
 
     # remove sa1_11_code column for 2011 release.
-    if( year == 2011) {
-      column_names <- column_names[-grep('sa1_11_code', column_names)]
-    }else if( year == 2021) {
-      column_names <- column_names[-grep('sa1_7_code', column_names)]
+    if (year == 2011) {
+      column_names <- column_names[-grep("sa1_11_code", column_names)]
+    } else if (year == 2021) {
+      column_names <- column_names[-grep("sa1_7_code", column_names)]
+    }
+  }
+
+  if (length(data_subclass) == 1) {
+    if (data_subclass == "summary") {
+      if ((year == 2011) |
+        (year == 2021 & structure == "sa1")
+      ) {
+        column_names <- c(
+          "area_code",
+          "irsed_score",
+          "irsed_decile",
+          "irsead_score",
+          "irsead_decile",
+          "ier_score",
+          "ier_decile",
+          "ieo_score",
+          "ieo_decile",
+          "population"
+        )
+      } else if (year == 2016 & structure == "suburb") {
+        column_names <- c(
+          "area_code",
+          "area_name",
+          "irsed_score",
+          "irsed_decile",
+          "irsead_score",
+          "irsead_decile",
+          "ier_score",
+          "ier_decile",
+          "ieo_score",
+          "ieo_decile",
+          "population",
+          "data_warning"
+        )
+      } else if (year %in% c(2016, 2021) & structure == "postcode") {
+        column_names <- c(
+          "area_code",
+          "irsed_score",
+          "irsed_decile",
+          "irsead_score",
+          "irsead_decile",
+          "ier_score",
+          "ier_decile",
+          "ieo_score",
+          "ieo_decile",
+          "population",
+          "data_warning",
+          "postcode_crosses_state_boundaries"
+        )
+      } else {
+        column_names <- c(
+          "area_code",
+          "area_name",
+          "irsed_score",
+          "irsed_decile",
+          "irsead_score",
+          "irsead_decile",
+          "ier_score",
+          "ier_decile",
+          "ieo_score",
+          "ieo_decile",
+          "population"
+        )
+      }
     }
   }
 
+
+
   suppressWarnings({
     df <- read_excel(filename,
-                     sheetname,
-                     skip = 6,
-                     col_names = column_names,
-                     na = c("", "NA") ) %>%
-      dplyr::filter(across(ends_with('_code'), ~ !is.na(.x))) %>%
-      select(-starts_with('blank')) %>%
-      mutate(structure = structure) %>%
+      sheetname,
+      skip = 6,
+      col_names = column_names,
+      na = c("", "NA", "-")
+    ) %>%
+      dplyr::filter(if_all(ends_with(c("_name","_code")), ~ !is.na(.x))) %>%
+      select(-starts_with("blank")) %>%
+      mutate(
+        structure = structure,
+        year = year
+      ) %>%
+      mutate(across(
+        .cols = any_of(ends_with("_code")),  # Specify the column name
+        .fns = ~ as.character(.)  # Conditionally convert to character
+      )) %>%
       relocate(structure)
   })
-
+  
   return(df)
-
 }
diff --git a/man/get_seifa.Rd b/man/get_seifa.Rd
index ca2c2f7..7a6b679 100644
--- a/man/get_seifa.Rd
+++ b/man/get_seifa.Rd
@@ -53,7 +53,7 @@ For All ABS SEIFA spreadsheets go to \href{https://www.abs.gov.au/AUSSTATS/abs@.
 }
 \examples{
 \dontrun{
-  get_seifa(structure = 'lga', data_subclass = 'irsed', year = 2016)
+get_seifa(structure = "lga", data_subclass = "irsed", year = 2016)
 }
 
 }
diff --git a/man/get_seifa_index_sheet.Rd b/man/get_seifa_index_sheet.Rd
index b15f9f6..afcf52f 100644
--- a/man/get_seifa_index_sheet.Rd
+++ b/man/get_seifa_index_sheet.Rd
@@ -8,6 +8,7 @@ get_seifa_index_sheet(
   filename,
   sheetname,
   structure = c("sa1", "sa2", "lga", "postcode", "suburb"),
+  data_subclass,
   year
 )
 }
@@ -19,6 +20,14 @@ get_seifa_index_sheet(
 \item{structure}{character spatial structure of the data to be parsed. The spatial structure is
 important as the shape of the data in the ABS spreadsheets if different for some structures.}
 
+\item{data_subclass}{character vector matching available SEIFA indexes:
+\itemize{
+  \item{irsed}{ - Index of Relative Socio-economic Disadvantage}
+  \item{irsead}{ - Index of Relative Socio-economic Advantage and Disadvantage}
+  \item{ier}{ - Index of Economic Resources}
+  \item{ieo}{ - Index of Education and Occupation}
+}}
+
 \item{year}{a character string or numeric of the release year of SEIFA object, eg "2016"; 2011.}
 }
 \value{
@@ -32,7 +41,7 @@ from \url{https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/2033.0.55.0012016
 \examples{
 \dontrun{
 
-  get_seifa_index_sheet('downloaded_filename.xls', sheetname = 'Table 2', structure = 'lga')
+get_seifa_index_sheet("downloaded_filename.xls", sheetname = "Table 2", structure = "lga")
 }
 
 }
diff --git a/tests/testthat/test-seifa.R b/tests/testthat/test-seifa.R
index c8b4268..05b174b 100644
--- a/tests/testthat/test-seifa.R
+++ b/tests/testthat/test-seifa.R
@@ -1,5 +1,6 @@
 # Define columns for each spreadsheet.
-column_names <- list( '2011' = list( 'sa1' = c('structure',
+column_names <- list( '2011' = list( 'sa1' = c('data_subclass',
+                                               'structure',
                                                'sa1_7_code',
                                                'population',
                                                'score',
@@ -9,8 +10,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                'state',
                                                'rank_state',
                                                'decile_state',
-                                               'percentile_state'),
-                                     'sa2' =c('structure',
+                                               'percentile_state',
+                                               'year'),
+                                     'sa2' =c('data_subclass',
+                                              'structure',
                                               'area_code',
                                               'area_name',
                                               'population',
@@ -24,8 +27,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                               'percentile_state',
                                               'min_score_sa1_area',
                                               'max_score_sa1_area',
-                                              'percent_usual_resident_pop_without_sa1_score'),
-                                     'lga' = c('structure',
+                                              'percent_usual_resident_pop_without_sa1_score',
+                                              'year'),
+                                     'lga' = c('data_subclass',
+                                               'structure',
                                                'area_code',
                                                'area_name',
                                                'population',
@@ -39,8 +44,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                'percentile_state',
                                                'min_score_sa1_area',
                                                'max_score_sa1_area',
-                                               'percent_usual_resident_pop_without_sa1_score'),
-                                     'postcode' = c('structure',
+                                               'percent_usual_resident_pop_without_sa1_score',
+                                               'year'),
+                                     'postcode' = c('data_subclass',
+                                                    'structure',
                                                     'area_code',
                                                     'population',
                                                     'score',
@@ -53,8 +60,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                     'percentile_state',
                                                     'min_score_sa1_area',
                                                     'max_score_sa1_area',
-                                                    'percent_usual_resident_pop_without_sa1_score'),
-                                     'suburb' = c('structure',
+                                                    'percent_usual_resident_pop_without_sa1_score',
+                                                    'year'),
+                                     'suburb' = c('data_subclass',
+                                                  'structure',
                                                   'area_code',
                                                   'area_name',
                                                   'population',
@@ -68,9 +77,11 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                   'percentile_state',
                                                   'min_score_sa1_area',
                                                   'max_score_sa1_area',
-                                                  'percent_usual_resident_pop_without_sa1_score')
+                                                  'percent_usual_resident_pop_without_sa1_score',
+                                                  'year')
                                      ),
-                      '2016' = list( 'sa1' = c('structure',
+                      '2016' = list( 'sa1' = c('data_subclass',
+                                               'structure',
                                                'sa1_7_code',
                                                'sa1_11_code',
                                                'population',
@@ -81,8 +92,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                'state',
                                                'rank_state',
                                                'decile_state',
-                                               'percentile_state'),
-                                     'sa2' =c('structure',
+                                               'percentile_state',
+                                               'year'),
+                                     'sa2' =c('data_subclass',
+                                              'structure',
                                               'area_code',
                                               'area_name',
                                               'population',
@@ -96,8 +109,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                               'percentile_state',
                                               'min_score_sa1_area',
                                               'max_score_sa1_area',
-                                              'percent_usual_resident_pop_without_sa1_score'),
-                                     'lga' = c('structure',
+                                              'percent_usual_resident_pop_without_sa1_score',
+                                              'year'),
+                                     'lga' = c('data_subclass',
+                                               'structure',
                                                'area_code',
                                                'area_name',
                                                'population',
@@ -111,8 +126,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                'percentile_state',
                                                'min_score_sa1_area',
                                                'max_score_sa1_area',
-                                               'percent_usual_resident_pop_without_sa1_score'),
-                                     'postcode' = c('structure',
+                                               'percent_usual_resident_pop_without_sa1_score',
+                                               'year'),
+                                     'postcode' = c('data_subclass',
+                                                    'structure',
                                                     'area_code',
                                                     'population',
                                                     'score',
@@ -127,8 +144,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                     'max_score_sa1_area',
                                                     'percent_usual_resident_pop_without_sa1_score',
                                                     'caution_poor_sa1_representation',
-                                                    'postcode_crosses_state_boundary'),
-                                     'suburb' = c('structure',
+                                                    'postcode_crosses_state_boundary',
+                                                    'year'),
+                                     'suburb' = c('data_subclass',
+                                                  'structure',
                                                   'area_code',
                                                   'area_name',
                                                   'population',
@@ -143,9 +162,11 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                   'min_score_sa1_area',
                                                   'max_score_sa1_area',
                                                   'percent_usual_resident_pop_without_sa1_score',
-                                                  'caution_poor_sa1_representation')
+                                                  'caution_poor_sa1_representation',
+                                                  'year')
                       ),
-                      '2021' = list( 'sa1' = c('structure',
+                      '2021' = list( 'sa1' = c('data_subclass',
+                                               'structure',
                                                'sa1_11_code',
                                                'population',
                                                'score',
@@ -155,8 +176,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                'state',
                                                'rank_state',
                                                'decile_state',
-                                               'percentile_state'),
-                                     'sa2' =c('structure',
+                                               'percentile_state',
+                                               'year'),
+                                     'sa2' =c('data_subclass',
+                                              'structure',
                                               'area_code',
                                               'area_name',
                                               'population',
@@ -170,8 +193,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                               'percentile_state',
                                               'min_score_sa1_area',
                                               'max_score_sa1_area',
-                                              'percent_usual_resident_pop_without_sa1_score'),
-                                     'lga' = c('structure',
+                                              'percent_usual_resident_pop_without_sa1_score',
+                                              'year'),
+                                     'lga' = c('data_subclass',
+                                               'structure',
                                                'area_code',
                                                'area_name',
                                                'population',
@@ -185,8 +210,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                'percentile_state',
                                                'min_score_sa1_area',
                                                'max_score_sa1_area',
-                                               'percent_usual_resident_pop_without_sa1_score'),
-                                     'postcode' = c('structure',
+                                               'percent_usual_resident_pop_without_sa1_score',
+                                               'year'),
+                                     'postcode' = c('data_subclass',
+                                                    'structure',
                                                     'area_code',
                                                     'population',
                                                     'score',
@@ -201,8 +228,10 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                     'max_score_sa1_area',
                                                     'percent_usual_resident_pop_without_sa1_score',
                                                     'caution_poor_sa1_representation',
-                                                    'postcode_crosses_state_boundary'),
-                                     'suburb' = c('structure',
+                                                    'postcode_crosses_state_boundary',
+                                                    'year'),
+                                     'suburb' = c('data_subclass',
+                                                  'structure',
                                                   'area_code',
                                                   'area_name',
                                                   'population',
@@ -217,7 +246,8 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
                                                   'min_score_sa1_area',
                                                   'max_score_sa1_area',
                                                   'percent_usual_resident_pop_without_sa1_score',
-                                                  'caution_poor_sa1_representation')
+                                                  'caution_poor_sa1_representation',
+                                                  'year')
                       )
                     )
 
@@ -297,9 +327,11 @@ test_that('sa1 spreadsheet can be parsed for 2016 release', {
                                             mustWork = TRUE),
                                 'Table 2',
                                 'sa1',
+                                'irsed',
                                 year = '2016')
 
     expect_is(df, 'data.frame')
-    expect_equal(colnames(df), column_names[['2016']][['sa1']])
+    ### data_subclass is added in the next step
+    expect_equal(colnames(df), column_names[['2016']][['sa1']][2:length(column_names[['2016']][['sa1']])] )
   }
 )