Skip to content

Commit

Permalink
Merge pull request #101 from mdblocker/main
Browse files Browse the repository at this point in the history
wru 2020 update (2.0.0)
  • Loading branch information
1beb authored Jul 12, 2023
2 parents b34e108 + a4bdf69 commit e6ce666
Show file tree
Hide file tree
Showing 24 changed files with 262 additions and 76 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,8 @@ vignettes/*.pdf
# the local check files
..Rcheck

# local build files
src/RcppExports.o
src/aux_funs.o
src/sample_me.o
src/wru.so
1 change: 1 addition & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ Date Version Comment
2020-05-17 0.1-12 Some Census API improvements (thanks to Silvia Kim)
2022-06-17 1.0.0 Updates to BISG, inclusion of fBISG and other package improvements
2022-10-04 1.0.1 Bug fixes for census url and census year
2023-06-12 2.0.0 Updated defaults to 2020 data, specifiy as next major version 2.0.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: wru
Version: 1.0.0010
Date: 2022-07-26
Version: 2.0.0000
Date: 2023-06-12
Title: Who are You? Bayesian Prediction of Racial Category Using Surname, First Name, Middle Name, and
Geolocation
Authors@R: c(
Expand Down Expand Up @@ -42,6 +42,6 @@ LazyLoad: yes
LazyData: yes
LazyDataCompression: xz
License: GPL (>= 3)
RoxygenNote: 7.2.1
RoxygenNote: 7.2.3
Encoding: UTF-8
Config/testthat/edition: 3
4 changes: 2 additions & 2 deletions R/census_geo_api.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#' If \code{TRUE}, function will return Pr(Geolocation, Sex | Race).
#' If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).
#' @param year A character object specifying the year of U.S. Census data to be downloaded.
#' Use \code{"2010"}, or \code{"2020"}. Default is \code{"2010"}.
#' Use \code{"2010"}, or \code{"2020"}. Default is \code{"2020"}.
#' Warning: 2020 U.S. Census data is downloaded only when \code{\var{age}} and
#' \code{\var{sex}} are both \code{FALSE}.
#' @param retry The number of retries at the census website if network interruption occurs.
Expand All @@ -48,7 +48,7 @@
#' @importFrom purrr map_dfr
#' @keywords internal

census_geo_api <- function(key = NULL, state, geo = "tract", age = FALSE, sex = FALSE, year = "2010", retry = 3, save_temp = NULL, counties = NULL) {
census_geo_api <- function(key = NULL, state, geo = "tract", age = FALSE, sex = FALSE, year = "2020", retry = 3, save_temp = NULL, counties = NULL) {

if (missing(key)) {
stop('Must enter U.S. Census API key, which can be requested at https://api.census.gov/data/key_signup.html.')
Expand Down
4 changes: 2 additions & 2 deletions R/census_helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#' If \code{TRUE}, function will return Pr(Geolocation, Sex | Race).
#' If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).
#' @param year A character object specifying the year of U.S. Census data to be downloaded.
#' Use \code{"2010"}, or \code{"2020"}. Default is \code{"2010"}.
#' Use \code{"2010"}, or \code{"2020"}. Default is \code{"2020"}.
#' Warning: 2020 U.S. Census data is downloaded only when \code{\var{age}} and
#' \code{\var{sex}} are both \code{FALSE}.
#' @param census.data A optional census object of class \code{list} containing
Expand Down Expand Up @@ -71,7 +71,7 @@
#'
#' @keywords internal

census_helper <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, year = "2010", census.data = NULL, retry = 3, use.counties = FALSE) {
census_helper <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, year = "2020", census.data = NULL, retry = 3, use.counties = FALSE) {
if (is.null(census.data) || (typeof(census.data) != "list")) {
toDownload <- TRUE
} else {
Expand Down
4 changes: 2 additions & 2 deletions R/census_helper_v2.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#' If \code{TRUE}, function will return Pr(Geolocation, Sex | Race).
#' If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).
#' @param year A character object specifying the year of U.S. Census data to be downloaded.
#' Use \code{"2010"}, or \code{"2020"}. Default is \code{"2010"}.
#' Use \code{"2010"}, or \code{"2020"}. Default is \code{"2020"}.
#' @param census.data A optional census object of class \code{list} containing
#' pre-saved Census geographic data. Can be created using \code{get_census_data} function.
#' If \code{\var{census.data}} is provided, the \code{\var{year}} element must
Expand All @@ -56,7 +56,7 @@
#'
#' @keywords internal

census_helper_new <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, year = "2010", census.data = NULL, retry = 3, use.counties = FALSE) {
census_helper_new <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, year = "2020", census.data = NULL, retry = 3, use.counties = FALSE) {

if (geo == "precinct") {
stop("Error: census_helper_new function does not currently support precinct-level data.")
Expand Down
10 changes: 5 additions & 5 deletions R/get_census_api.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
#' This function obtains U.S. Census data via the public API. User
#' can specify the variables and region(s) for which to obtain data.
#'
#' @param data_url URL root of the API, including the question mark,
#' e.g., \code{"https://api.census.gov/data/2010/dec/dec/sf1?"}.
#' @param data_url URL root of the API,
#' e.g., \code{"https://api.census.gov/data/2020/dec/pl"}.
#' @param key A required character object containing user's Census API key,
#' which can be requested \href{https://api.census.gov/data/key_signup.html}{here}.
#' @param var.names A character vector of variables to get,
#' e.g., \code{c("P005003","P005004","P005005", "P005006")}.
#' e.g., \code{c("P2_005N", "P2_006N", "P2_007N", "P2_008N")}.
#' If there are more than 50 variables, then function will automatically
#' split variables into separate queries.
#' @param region Character object specifying which region to obtain data for.
Expand All @@ -23,8 +23,8 @@
#' @examples
#' \dontrun{
#' get_census_api(
#' data_url = "https://api.census.gov/data/2010/dec/sf1?", key = "...",
#' var.names = c("P005003", "P005004", "P005005", "P005006"), region = "for=county:*&in=state:34"
#' data_url = "https://api.census.gov/data/2020/dec/pl", key = "...",
#' var.names = c("P2_005N", "P2_006N", "P2_007N", "P2_008N"), region = "for=county:*&in=state:34"
#' )
#' }
#'
Expand Down
10 changes: 5 additions & 5 deletions R/get_census_api_2.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
#' It is used by the \code{get_census_api} function. The user should not need to call this
#' function directly.
#'
#' @param data_url URL root of the API, including the question mark,
#' e.g., \code{"https://api.census.gov/data/2010/dec/sf1?"}.
#' @param data_url URL root of the API,
#' e.g., \code{"https://api.census.gov/data/2020/dec/pl"}.
#' @param key A required character object containing user's Census API key,
#' which can be requested \href{https://api.census.gov/data/key_signup.html}{here}.
#' @param get A character vector of variables to get,
#' e.g., \code{c("P005003","P005004","P005005", "P005006")}.
#' e.g., \code{c("P2_005N", "P2_006N", "P2_007N", "P2_008N")}.
#' If there are more than 50 variables, then function will automatically
#' split variables into separate queries.
#' @param region Character object specifying which region to obtain data for.
Expand All @@ -22,8 +22,8 @@
#' If unsuccessful, function prints the URL query that was constructed.
#'
#' @examples
#' \dontrun{try(get_census_api_2(data_url = "https://api.census.gov/data/2010/dec/sf1?", key = "...",
#' get = c("P005003","P005004","P005005", "P005006"), region = "for=county:*&in=state:34"))}
#' \dontrun{try(get_census_api_2(data_url = "https://api.census.gov/data/2020/dec/pl", key = "...",
#' get = c("P2_005N", "P2_006N", "P2_007N", "P2_008N"), region = "for=county:*&in=state:34"))}
#'
#' @references
#' Based on code authored by Nicholas Nagle, which is available
Expand Down
6 changes: 4 additions & 2 deletions R/get_census_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#' If \code{TRUE}, function will return Pr(Geolocation, Sex | Race).
#' If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).
#' @param year A character object specifying the year of U.S. Census data to be downloaded.
#' Use \code{"2010"}, or \code{"2020"}. Default is \code{"2010"}.
#' Use \code{"2010"}, or \code{"2020"}. Default is \code{"2020"}.
#' Warning: 2020 U.S. Census data is downloaded only when \code{\var{age}} and
#' \code{\var{sex}} are both \code{FALSE}.
#' @param census.geo An optional character vector specifying what level of
Expand All @@ -34,7 +34,7 @@
#' @examples
#' \dontrun{get_census_data(key = "...", states = c("NJ", "NY"), age = TRUE, sex = FALSE)}
#' \dontrun{get_census_data(key = "...", states = "MN", age = FALSE, sex = FALSE, year = "2020")}
get_census_data <- function(key = NULL, states, age = FALSE, sex = FALSE, year = "2010", census.geo = "block", retry = 3, county.list = NULL) {
get_census_data <- function(key = NULL, states, age = FALSE, sex = FALSE, year = "2020", census.geo = "block", retry = 3, county.list = NULL) {

if (is.null(key)) {
# Matches tidycensus name for env var
Expand All @@ -47,6 +47,8 @@ get_census_data <- function(key = NULL, states, age = FALSE, sex = FALSE, year =

states <- toupper(states)

message("Collecting ", year, " Census data...")

CensusObj <- NULL
for (s in states) {
CensusObj[[s]] <- list(state = s, age = age, sex = sex, year = year)
Expand Down
4 changes: 2 additions & 2 deletions R/merge_surnames.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#' named 'surname' containing list of surnames to be merged with Census lists.
#' @param surname.year An object of class \code{numeric} indicating which year
#' Census Surname List is from. Accepted values are \code{2010} and \code{2000}.
#' Default is \code{2010}.
#' Default is \code{2020}.
#' @param name.data An object of class \code{data.frame}. Must contain a leading
#' column of surnames, and 5 subsequent columns, with Pr(Race | Surname) for each
#' of the five major racial categories.
Expand All @@ -54,7 +54,7 @@
#'
#' @keywords internal

merge_surnames <- function(voter.file, surname.year = 2010, name.data, clean.surname = TRUE, impute.missing = TRUE) {
merge_surnames <- function(voter.file, surname.year = 2020, name.data, clean.surname = TRUE, impute.missing = TRUE) {

if ("surname" %in% names(voter.file) == FALSE) {
stop('Data does not contain surname field.')
Expand Down
12 changes: 5 additions & 7 deletions R/predict_race.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@
#' Default is \code{TRUE}.
#' @param surname.only A \code{TRUE}/\code{FALSE} object. If \code{TRUE}, race predictions will
#' only use surname data and calculate Pr(Race | Surname). Default is \code{FALSE}.
#' @param surname.year A number to specify the year of the census surname statistics.
#' These surname statistics is stored in the data, and will be automatically loaded.
#' The default value is \code{2010}, which means the surname statistics from the
#' 2010 census will be used. Currently, the other available choices are \code{2000} and \code{2020}.
#' @param census.geo An optional character vector specifying what level of
#' geography to use to merge in U.S. Census geographic data. Currently
#' \code{"county"}, \code{"tract"}, \code{"block_group"}, \code{"block"}, and \code{"place"}
Expand Down Expand Up @@ -64,7 +60,7 @@
#' If \code{TRUE}, \code{\var{voter.file}} should include a numerical variable \code{\var{sex}},
#' where \code{\var{sex}} is coded as 0 for males and 1 for females.
#' @param year An optional character vector specifying the year of U.S. Census geographic
#' data to be downloaded. Use \code{"2010"}, or \code{"2020"}. Default is \code{"2010"}.
#' data to be downloaded. Use \code{"2010"}, or \code{"2020"}. Default is \code{"2020"}.
#' @param party An optional character object specifying party registration field
#' in \code{\var{voter.file}}, e.g., \code{\var{party} = "PartyReg"}.
#' If specified, race/ethnicity predictions will be conditioned
Expand Down Expand Up @@ -136,11 +132,13 @@
#' @export

predict_race <- function(voter.file, census.surname = TRUE, surname.only = FALSE,
surname.year = 2010, census.geo, census.key = NULL, census.data = NULL, age = FALSE,
sex = FALSE, year = "2010", party = NULL, retry = 3, impute.missing = TRUE,
census.geo, census.key = NULL, census.data = NULL, age = FALSE,
sex = FALSE, year = "2020", party = NULL, retry = 3, impute.missing = TRUE,
use.counties = FALSE, model = "BISG", race.init = NULL, name.dictionaries = NULL,
names.to.use = "surname", control = NULL) {

message("Predicting race for ", year)

## Check model type
if (!(model %in% c("BISG", "fBISG"))) {
stop(
Expand Down
12 changes: 6 additions & 6 deletions R/race_prediction_funs.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ NULL
#' @keywords internal

.predict_race_old <- function(voter.file,
census.surname = TRUE, surname.only = FALSE, surname.year = 2010, name.dictionaries = NULL,
census.geo, census.key, census.data = NULL, age = FALSE, sex = FALSE, year = "2010",
census.surname = TRUE, surname.only = FALSE, surname.year = 2020, name.dictionaries = NULL,
census.geo, census.key, census.data = NULL, age = FALSE, sex = FALSE, year = "2020",
party, retry = 3, impute.missing = TRUE, use.counties = FALSE) {

# warning: 2020 census data only support prediction when both age and sex are equal to FALSE
Expand Down Expand Up @@ -101,7 +101,7 @@ NULL
## Merge in Pr(Race | Surname) if necessary
if (census.surname) {
if (!(surname.year %in% c(2000, 2010, 2020))) {
stop(paste(surname.year, "is not a valid surname.year. It should be 2000, 2010 (default) or 2020."))
stop(paste(surname.year, "is not a valid surname.year. It should be 2000, 2010 or 2020 (default)."))
}
voter.file <- merge_surnames(voter.file, surname.year = surname.year, name.data = NULL, impute.missing = impute.missing)
} else {
Expand Down Expand Up @@ -261,14 +261,14 @@ NULL
#' New race prediction function, implementing classical BISG with augmented
#' surname dictionary, as well as first and middle name information.
#' @rdname modfuns
predict_race_new <- function(voter.file, names.to.use, year = "2010",age = FALSE, sex = FALSE,
predict_race_new <- function(voter.file, names.to.use, year = "2020",age = FALSE, sex = FALSE,
census.geo, census.key = NULL, name.dictionaries, surname.only=FALSE,
census.data = NULL, retry = 0, impute.missing = TRUE, census.surname = FALSE,
use.counties = FALSE) {

# Check years
if (!(year %in% c("2000", "2010", "2020"))){
stop("Year should be one of 2000, 2010, or 2020.")
stop("Year should be one of 2000, 2010, or 2020 (default).")
}
# Define 2020 race marginal
race.margin <- c(r_whi=0.5783619, r_bla=0.1205021, r_his=0.1872988,
Expand Down Expand Up @@ -430,7 +430,7 @@ predict_race_new <- function(voter.file, names.to.use, year = "2010",age = FALSE
#' error correction, fully Bayesian model) with augmented
#' surname dictionary, as well as first and middle name information.
#' @rdname modfuns
predict_race_me <- function(voter.file, names.to.use, year = "2010",age = FALSE, sex = FALSE,
predict_race_me <- function(voter.file, names.to.use, year = "2020",age = FALSE, sex = FALSE,
census.geo, census.key, name.dictionaries, surname.only=FALSE,
census.data = NULL, retry = 0, impute.missing = TRUE, census.surname = FALSE,
use.counties = FALSE, race.init, ctrl)
Expand Down
4 changes: 2 additions & 2 deletions man/census_geo_api.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/census_helper.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/census_helper_new.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions man/get_census_api.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions man/get_census_api_2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/get_census_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit e6ce666

Please sign in to comment.