Skip to content

Commit

Permalink
Merge pull request runapp-aus#100 from JustGitting/master
Browse files Browse the repository at this point in the history
Add support for retrieving 2021 SEIFA data to get_seifa()
  • Loading branch information
Will Mackey authored Jul 26, 2023
2 parents a59a54e + ad25e83 commit 1f29ba2
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 21 deletions.
36 changes: 30 additions & 6 deletions R/seifa.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@


#' @title Import 2016 SEIFA Data from ABS
#' @title Import SEIFA Data from ABS
#' @description The function will download all SEIFA data, for a specified spatial structure,
#' to a temporary excel file and then merge sheets into a single `data.frame`. This `data.frame`
#' also includes the ABS population count for the given spatial structure. For more information
#' on SEIFA indexes go to
#' \url{https://www.abs.gov.au/AUSSTATS/[email protected]/Lookup/2033.0.55.001Main+Features12016?OpenDocument}
#' \url{https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia}
#'
#' To learn more about the individual data-sets for each year, please visit their respective pages:
#' 2011
#' \url{https://www.abs.gov.au/AUSSTATS/[email protected]/allprimarymainfeatures/8C5F5BB699A0921CCA258259000BA619}
#' 2016
#' \url{https://www.abs.gov.au/ausstats/[email protected]/mf/2033.0.55.001}
#' 2021
#' \url{https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021}

#' @param structure character value for the desired spatial area. Must be one of:
#' \itemize{
#' \item{sa1}{ - download size 51.6 MB}
Expand All @@ -23,6 +31,7 @@
#' \item{ieo}{ - Index of Education and Occupation}
#' }
#' @param year a character string or numeric of the release year of SEIFA object, eg "2016"; 2011.
#'
#' @importFrom purrr map
#' @importFrom purrr list_rbind
#' @importFrom utils download.file
Expand All @@ -34,15 +43,17 @@
#'
#' @examples
#' \dontrun{
#'
#' get_seifa(structure = 'lga', data_subclass = 'irsed', year = 2016)
#' }
#'
get_seifa <- function(structure = c('sa1','sa2','lga','postcode','suburb'),
data_subclass = c('irsed', 'irsead', 'ier', 'ieo'),
year = NULL) {

release_years = c(2011, 2016)
# TODO: 2006 SEIFA has the Statistical Local Area (SLA) structure, not the
# Statistical Level Areas (SA1, SA2) structures. Would need to update logic to
# handle 2006.
release_years = c(2011, 2016, 2021)

stopifnot(all(data_subclass %in% c('irsed', 'irsead', 'ier', 'ieo')))

Expand All @@ -67,7 +78,14 @@ get_seifa <- function(structure = c('sa1','sa2','lga','postcode','suburb'),
'sa2' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20sa2%20indexes.xls&2033.0.55.001&Data%20Cubes&C9F7AD36397CB43DCA25825D000F917C&0&2016&27.03.2018&Latest',
'lga' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20lga%20indexes.xls&2033.0.55.001&Data%20Cubes&5604C75C214CD3D0CA25825D000F91AE&0&2016&27.03.2018&Latest',
'postcode' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20poa%20indexes.xls&2033.0.55.001&Data%20Cubes&DC124D1DAC3D9FDDCA25825D000F9267&0&2016&27.03.2018&Latest',
'suburb' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20ssc%20indexes.xls&2033.0.55.001&Data%20Cubes&863031D939DE8105CA25825D000F91D2&0&2016&27.03.2018&Latest')
'suburb' = 'https://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&2033055001%20-%20ssc%20indexes.xls&2033.0.55.001&Data%20Cubes&863031D939DE8105CA25825D000F91D2&0&2016&27.03.2018&Latest'),

'2021' = c( 'sa1' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Statistical%20Area%20Level%201%2C%20Indexes%2C%20SEIFA%202021.xlsx',
'sa2' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Statistical%20Area%20Level%202%2C%20Indexes%2C%20SEIFA%202021.xlsx',
'lga' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Local%20Government%20Area%2C%20Indexes%2C%20SEIFA%202021.xlsx',
'postcode' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Postal%20Area%2C%20Indexes%2C%20SEIFA%202021.xlsx',
'suburb' = 'https://www.abs.gov.au/statistics/people/people-and-communities/socio-economic-indexes-areas-seifa-australia/2021/Suburbs%20and%20Localities%2C%20Indexes%2C%20SEIFA%202021.xlsx' )

)


Expand All @@ -86,7 +104,11 @@ get_seifa <- function(structure = c('sa1','sa2','lga','postcode','suburb'),

url <- urls[[year]][structure]

filename <- tempfile(fileext = '.xls')
# Get file extension if possible, otherwise assume xls.
url_ext <- tools::file_ext(sub("\\?.+", "", url))
if(url_ext == ""){url_ext <- 'xls'}

filename <- tempfile(fileext = paste0('.',url_ext) )

try({
download.file(url, destfile = filename, mode = 'wb')
Expand Down Expand Up @@ -180,6 +202,8 @@ get_seifa_index_sheet <- function(filename, sheetname, structure = c('sa1','sa2'
# remove sa1_11_code column for 2011 release.
if( year == 2011) {
column_names <- column_names[-grep('sa1_11_code', column_names)]
}else if( year == 2021) {
column_names <- column_names[-grep('sa1_7_code', column_names)]
}
}

Expand Down
13 changes: 10 additions & 3 deletions man/get_seifa.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

98 changes: 86 additions & 12 deletions tests/testthat/test-seifa.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,80 @@ column_names <- list( '2011' = list( 'sa1' = c('structure',
'max_score_sa1_area',
'percent_usual_resident_pop_without_sa1_score',
'caution_poor_sa1_representation')
),
'2021' = list( 'sa1' = c('structure',
'sa1_11_code',
'population',
'score',
'rank_aus',
'decile_aus',
'percentile_aus',
'state',
'rank_state',
'decile_state',
'percentile_state'),
'sa2' =c('structure',
'area_code',
'area_name',
'population',
'score',
'rank_aus',
'decile_aus',
'percentile_aus',
'state',
'rank_state',
'decile_state',
'percentile_state',
'min_score_sa1_area',
'max_score_sa1_area',
'percent_usual_resident_pop_without_sa1_score'),
'lga' = c('structure',
'area_code',
'area_name',
'population',
'score',
'rank_aus',
'decile_aus',
'percentile_aus',
'state',
'rank_state',
'decile_state',
'percentile_state',
'min_score_sa1_area',
'max_score_sa1_area',
'percent_usual_resident_pop_without_sa1_score'),
'postcode' = c('structure',
'area_code',
'population',
'score',
'rank_aus',
'decile_aus',
'percentile_aus',
'state',
'rank_state',
'decile_state',
'percentile_state',
'min_score_sa1_area',
'max_score_sa1_area',
'percent_usual_resident_pop_without_sa1_score',
'caution_poor_sa1_representation',
'postcode_crosses_state_boundary'),
'suburb' = c('structure',
'area_code',
'area_name',
'population',
'score',
'rank_aus',
'decile_aus',
'percentile_aus',
'state',
'rank_state',
'decile_state',
'percentile_state',
'min_score_sa1_area',
'max_score_sa1_area',
'percent_usual_resident_pop_without_sa1_score',
'caution_poor_sa1_representation')
)
)

Expand Down Expand Up @@ -217,15 +291,15 @@ for(release_year in release_years){


test_that('sa1 spreadsheet can be parsed for 2016 release', {
df <- get_seifa_index_sheet(system.file('extdata',
'sa1_seifa_indexes_test.xls',
package = 'strayr',
mustWork = TRUE),
'Table 2',
'sa1',
year = '2016')

expect_is(df, 'data.frame')
expect_equal(colnames(df), column_names[['2016']][['sa1']])

})
df <- get_seifa_index_sheet(system.file('extdata',
'sa1_seifa_indexes_test.xls',
package = 'strayr',
mustWork = TRUE),
'Table 2',
'sa1',
year = '2016')

expect_is(df, 'data.frame')
expect_equal(colnames(df), column_names[['2016']][['sa1']])
}
)

0 comments on commit 1f29ba2

Please sign in to comment.