Skip to content

Commit

Permalink
back to dplyr, idk other way to fix negbin estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
pachadotdev committed Mar 17, 2024
1 parent fb28a74 commit 8351409
Show file tree
Hide file tree
Showing 18 changed files with 120 additions and 43 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ Authors@R: c(
comment = c(ORCID = "0000-0003-1017-7574"))
)
Imports:
data.table,
dplyr,
Formula,
generics,
magrittr,
MASS,
rlang,
stats
Suggests:
knitr,
Expand Down
18 changes: 14 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ S3method(tidy,felm)
S3method(vcov,apes)
S3method(vcov,feglm)
S3method(vcov,felm)
export("%>%")
export(apes)
export(augment)
export(bias_corr)
Expand All @@ -41,13 +42,22 @@ export(tidy)
importFrom(Formula,Formula)
importFrom(MASS,negative.binomial)
importFrom(MASS,theta.ml)
importFrom(data.table,":=")
importFrom(data.table,.SD)
importFrom(data.table,setDT)
importFrom(data.table,setkeyv)
importFrom(dplyr,across)
importFrom(dplyr,all_of)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,mutate)
importFrom(dplyr,pull)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
importFrom(dplyr,ungroup)
importFrom(dplyr,vars)
importFrom(generics,augment)
importFrom(generics,glance)
importFrom(generics,tidy)
importFrom(magrittr,"%>%")
importFrom(rlang,":=")
importFrom(rlang,sym)
importFrom(stats,as.formula)
importFrom(stats,binomial)
importFrom(stats,gaussian)
Expand Down
3 changes: 2 additions & 1 deletion R/capybara-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
#' and Wanner (2020).
#'
#' @name capybara-package
#' @importFrom data.table setDT setkeyv := .SD
#' @importFrom dplyr across all_of filter group_by mutate pull select summarise ungroup vars
#' @importFrom Formula Formula
#' @importFrom MASS negative.binomial theta.ml
#' @importFrom rlang sym :=
#' @importFrom stats as.formula binomial model.matrix na.omit gaussian poisson
#' pnorm printCoefmat rgamma rlogis rnorm rpois terms vcov predict
#' @importFrom utils combn
Expand Down
7 changes: 3 additions & 4 deletions R/feglm.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,13 @@ feglm <- function(
k <- length(k.vars)

# Generate temporary variable ----
setkeyv(data, k.vars)
tmp.var <- temp_var_(data)

# Drop observations that do not contribute to the log likelihood ----
drop_by_link_type_(data, lhs, family, tmp.var, k.vars, control)
data <- drop_by_link_type_(data, lhs, family, tmp.var, k.vars, control)

# Transform fixed effects and clusters to factors ----
transform_fe_(data, formula, k.vars)
data <- transform_fe_(data, formula, k.vars)

# Determine the number of dropped observations ----
nt <- nrow(data)
Expand Down Expand Up @@ -126,7 +125,7 @@ feglm <- function(
start_guesses_(beta.start, eta.start, y, X, beta, nt, wt, p, family)

# Get names and number of levels in each fixed effects category ----
nms.fe <- lapply(data[, k.vars, with = FALSE], levels)
nms.fe <- lapply(select(data, all_of(k.vars)), levels)
lvls.k <- vapply(nms.fe, length, integer(1))

# Generate auxiliary list of indexes for different sub panels ----
Expand Down
3 changes: 1 addition & 2 deletions R/fenegbin.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ fenegbin <- function(
k <- length(k.vars)

# Generate temporary variable ----
setkeyv(data, k.vars)
tmp.var <- temp_var_(data)

# Drop observations that do not contribute to the log likelihood ----
Expand Down Expand Up @@ -92,7 +91,7 @@ fenegbin <- function(
start_guesses_(beta.start, eta.start, y, X, beta, nt, wt, p, family)

# Get names and number of levels in each fixed effects category ----
nms.fe <- lapply(data[, k.vars, with = FALSE], levels)
nms.fe <- lapply(select(data, all_of(k.vars)), levels)
lvls.k <- vapply(nms.fe, length, integer(1))

# Generate auxiliary list of indexes for different sub panels ----
Expand Down
8 changes: 5 additions & 3 deletions R/generics_vcov.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,14 @@ vcov.feglm <- function(
}

# Ensure cluster variables are factors
D[, (cl.vars) := lapply(.SD, check_factor_), .SDcols = cl.vars]
D <- mutate(D, across(all_of(cl.vars), check_factor_))

# Join cluster variables and scores
sp.vars <- colnames(G)
G <- cbind(D, G)
rm(D)

# Multiway clustering by Cameron, Gelbach, and Miller (2011)
setkeyv(G, cl.vars)
B <- matrix(0.0, p, p)
for (i in seq.int(length(cl.vars))) {
# Compute outer product for all possible combinations
Expand All @@ -130,7 +129,10 @@ vcov.feglm <- function(
cl <- cl.combn[, j]
B.r <- B.r + crossprod_(
as.matrix(
G[, lapply(.SD, sum), by = mget(cl), .SDcols = sp.vars][, (cl) := NULL]
G %>%
group_by(!!sym(cl)) %>%
summarise(across(sp.vars, sum), .groups = "drop") %>%
select(-!!sym(cl))
),
NA_real_, FALSE, FALSE
)
Expand Down
43 changes: 27 additions & 16 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ partial_mu_eta_ <- function(eta, family, order) {

temp_var_ <- function(data) {
repeat {
tmp.var <- paste0(sample(letters, 5L, replace = TRUE), collapse = "")
tmp.var <- paste0("capybara_internal_variable_", sample(letters, 5L, replace = TRUE), collapse = "")
if (!(tmp.var %in% colnames(data))) {
break
}
Expand Down Expand Up @@ -108,8 +108,7 @@ update_formula_ <- function(formula) {
}

model_frame_ <- function(data, formula, weights) {
setDT(data)
data <- data[, c(all.vars(formula), weights), with = FALSE]
data <- select(data, all_of(c(all.vars(formula), weights)))

lhs <- names(data)[[1L]]

Expand All @@ -129,33 +128,38 @@ model_frame_ <- function(data, formula, weights) {
check_response_ <- function(data, lhs, family) {
if (family[["family"]] == "binomial") {
# Check if 'y' is numeric
if (data[, is.numeric(get(lhs))]) {
if (is.numeric(pull(select(data, !!sym(lhs))))) {
# Check if 'y' is in [0, 1]
if (data[, any(get(lhs) < 0.0 | get(lhs) > 1.0)]) {
if (nrow(filter(data, !!sym(lhs) < 0.0 | !!sym(lhs) > 1.0)) > 0L) {
stop("Model response has to be within the unit interval.",
call. = FALSE
)
}
} else {
# Check if 'y' is factor and transform otherwise
data[, (1L) := check_factor_(get(lhs))]
data <- mutate(data, !!sym(lhs) := check_factor_(!!sym(lhs)))

# Check if the number of levels equals two
if (data[, length(levels(get(lhs)))] != 2L) {
if (nrow(summarise(data, n_levels = nlevels(!!sym(lhs)))) != 2L) {
stop("Model response has to be binary.", call. = FALSE)
}

# Ensure 'y' is 0-1 encoded
data[, (1L) := as.numeric(get(lhs)) - 1.0]
## if lhs is not numeric, convert it
if (!is.numeric(pull(select(data, !!sym(lhs))))) {
data <- mutate(data, !!sym(lhs) := as.numeric(!!sym(lhs)) - 1.0)
} else {
data <- mutate(data, !!sym(lhs) := !!sym(lhs) - 1.0)
}
}
} else if (family[["family"]] %in% c("Gamma", "inverse.gaussian")) {
# Check if 'y' is strictly positive
if (data[, any(get(lhs) <= 0.0)]) {
if (nrow(filter(data, !!sym(lhs) <= 0.0)) > 0L) {
stop("Model response has to be strictly positive.", call. = FALSE)
}
} else if (family[["family"]] != "gaussian") {
# Check if 'y' is positive
if (data[, any(get(lhs) < 0.0)]) {
if (nrow(filter(data, !!sym(lhs) < 0.0)) > 0L) {
stop("Model response has to be positive.", call. = FALSE)
}
}
Expand All @@ -168,13 +172,16 @@ drop_by_link_type_ <- function(data, lhs, family, tmp.var, k.vars, control) {
# Drop observations that do not contribute to the log-likelihood
ncheck <- nrow(data)
for (j in k.vars) {
data[, (tmp.var) := mean(get(lhs)), by = eval(j)]
data <- data %>%
group_by(!!sym(j)) %>%
mutate(!!sym(tmp.var) := mean(!!sym(lhs))) %>%
ungroup()
if (family[["family"]] == "binomial") {
data <- data[get(tmp.var) > 0.0 & get(tmp.var) < 1.0]
data <- filter(data, !!sym(tmp.var) > 0.0 & !!sym(tmp.var) < 1.0)
} else {
data <- data[get(tmp.var) > 0.0]
data <- filter(data, !!sym(tmp.var) > 0.0)
}
data[, (tmp.var) := NULL]
data <- select(data, -!!sym(tmp.var))
}

# Check termination
Expand All @@ -184,15 +191,19 @@ drop_by_link_type_ <- function(data, lhs, family, tmp.var, k.vars, control) {
}
}
}

data
}

transform_fe_ <- function(data, formula, k.vars) {
data[, (k.vars) := lapply(.SD, check_factor_), .SDcols = k.vars]
data <- mutate(data, across(all_of(k.vars), check_factor_))

if (length(formula)[[2L]] > 2L) {
add.vars <- attr(terms(formula, rhs = 3L), "term.labels")
data[, (add.vars) := lapply(.SD, check_factor_), .SDcols = add.vars]
data <- mutate(data, across(all_of(add.vars), check_factor_))
}

data
}

nobs_ <- function(nobs.full, nobs.na, nt) {
Expand Down
2 changes: 1 addition & 1 deletion R/internals.R
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ getScoreMatrix <- function(object) {
MX * (nu * w)
}

# Returns suitable name for a tempordrop_by_link_type_ary variable
# Returns suitable name for a temporary variable
temp_var_ <- function(data) {
repeat {
tmp.var <- paste0(sample(letters, 5L, replace = TRUE), collapse = "")
Expand Down
14 changes: 14 additions & 0 deletions R/utils-pipe.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#' Pipe operator
#'
#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
#'
#' @name %>%
#' @rdname pipe
#' @keywords internal
#' @export
#' @importFrom magrittr %>%
#' @usage lhs \%>\% rhs
#' @param lhs A value or the magrittr placeholder.
#' @param rhs A function call using the magrittr semantics.
#' @return The result of calling `rhs(lhs)`.
NULL
2 changes: 1 addition & 1 deletion docs/pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ pkgdown: 2.0.7
pkgdown_sha: ~
articles:
intro: intro.html
last_built: 2024-03-04T05:22Z
last_built: 2024-03-17T19:40Z

2 changes: 1 addition & 1 deletion docs/reference/apes.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/reference/bias_corr.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/reference/feglm.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/reference/felm.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 21 additions & 2 deletions docs/reference/fenegbin.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/reference/fepoisson.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions docs/reference/pipe.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/pipe.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 8351409

Please sign in to comment.