Merge pull request #4 from mccarthy-m-g/chapter-vignettes

Add chapter example articles
mccarthy-m-g · Apr 11, 2024 · 9adea57 · 9adea57
2 parents e1b8a99 + 7b0fa96
commit 9adea57
Show file tree

Hide file tree

Showing 63 changed files with 30,131 additions and 25,460 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -6,3 +6,6 @@
 ^docs$
 ^pkgdown$
 ^\.github$
+^vignettes/articles$
+^doc$
+^Meta$
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -30,7 +30,8 @@ jobs:
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::pkgdown, local::.
+          extra-packages: any::pkgdown, local::., any::Matrix
+          pak-version: devel
           needs: website
 
       - name: Build site

diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,5 @@
 .Ruserdata
 inst/doc
 docs
+/doc/
+/Meta/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,6 +22,30 @@ Suggests:
     rmarkdown,
     dplyr,
     tidyr
+Config/Needs/website:
+    broom,
+    broom.mixed,
+    corrr,
+    dplyr,
+    easystats,
+    ggh4x,
+    ggplot2,
+    glmmTMB,
+    gt,
+    lavaan,
+    lme4,
+    modelbased,
+    muhaz,
+    patchwork (>= 1.2.0),
+    performance,
+    purrr (>= 1.0.0),
+    scales,
+    slider,
+    stringr,
+    survival,
+    tidyr,
+    tidySEM,
+    vctrs
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3

diff --git a/R/data.R b/R/data.R
@@ -311,9 +311,9 @@ NULL
 #'   \item{`alcohol_use`}{Natural logarithm of the averaged scores of three six-point items measuring frequency of beer, wine, and liquor consumption, respectively.}
 #'   \item{`peer_pressure`}{Natural logarithm of a six-point item measuring frequency friends offered alcoholic drinks during the past month.}
 #' }
-#' #' @note
+#' @note
 #' Barnes, Farrell, and Banerjee (1994) report a sample of only 699 adolescents;
-#' however, they note that this was an ongoing longitudinal study which likely
+#' however, they note that this was an ongoing longitudinal study, which likely
 #' explains the sample size discrepancy with the data used by Singer and Willett
 #' (2003).
 #' @source
@@ -491,11 +491,12 @@ NULL
 #'   \item{`censor`}{Censoring status.}
 #'   \item{`interview_age`}{Age at time of interview.}
 #'   \item{`female`}{Binary indicator for whether the adult is a female.}
-#'   \item{`nsibs`}{Number of siblings.}
+#'   \item{`siblings`}{Number of siblings.}
 #'   \item{`bigfamily`}{Binary indicator for whether the adult has five or more siblings.}
 #'   \item{`period`}{Age each record corresponds to.}
-#'   \item{`depress`}{Binary indicator for whether the adult experienced a depressive episode.}
-#'   \item{`pd`}{Binary indicator for whether the adult's parents divorced at this or any previous age.}
+#'   \item{`depressive_episode`}{Binary indicator for whether the adult experienced a depressive episode.}
+#'   \item{`parental_divorce`}{Binary indicator for whether the adult's parents divorced at this or any previous age.}
+#'   \item{`parental_divorce_now`}{Binary indicator for whether the adult's parents divorced during the current period.}
 #' }
 #' @source
 #' Wheaton, B., Roszell, P., & Hall, K. (1997). The impact of twenty childhood
@@ -521,8 +522,6 @@ NULL
 #'   \item{`censor`}{Censoring status.}
 #'   \item{`abused`}{Binary indicator for whether the adolescent was abused.}
 #'   \item{`black`}{Binary indicator for whether the adolescent is black.}
-#'   \item{`abused_black`}{Binary indicator for whether the adolescent was abused and is black.}
-#'   \item{`d8-d18`}{Discrete time indicators for each age.}
 #'   \item{`period`}{Age each record corresponds to.}
 #'   \item{`event`}{Binary indicator for whether the adolescent was arrested.}
 #' }
@@ -550,8 +549,6 @@ NULL
 #'   \item{`last_term`}{The term a student stopped enrolling in mathematics courses.}
 #'   \item{`woman`}{Binary indicators for whether the student identified as a woman.}
 #'   \item{`censor`}{Censoring status.}
-#'   \item{`hs11-hs12`}{Discrete time indicators for each term of high school.}
-#'   \item{`coll1-coll3`}{Discrete time indicators for each term of college.}
 #'   \item{`term`}{Term each record corresponds to.}
 #'   \item{`event`}{Binary indicator for whether the student stopped enrolling in mathematics courses at a given term.}
 #' }
@@ -746,29 +743,45 @@ NULL
 
 #' Days to cocaine relapse after abstinence
 #'
-#' Data from Hall and colleagues (1990) measuring the relation between the
-#' number of days of relapse to cocaine use and several predictors that might
-#' be associated relapse in a sample of 104 newly abstinent cocaine users.
-#' Former addicts were followed for up to 12 weeks or until relapse.
+#' A subset of unpublished data from Hall, Havassy, and Wasserman (1990)
+#' measuring the relation between the number of days until relapse to cocaine
+#' use and several predictors that might be associated with relapse in a sample
+#' of 104 newly abstinent cocaine users who recently completed an
+#' abstinence-oriented treatment program. Former cocaine users were followed for
+#' up to 12 weeks post-treatment or until they used cocaine for 7 consecutive
+#' days. Self-reported abstinence was confirmed at each interview by the absence
+#' of cocaine in urine specimens.
 #'
 #' @format
 #' A person-period data frame with `r nrow(cocaine_relapse_2)` rows and
 #' `r ncol(cocaine_relapse_2)` columns:
 #'
 #' \describe{
 #'   \item{`id`}{Participant ID.}
-#'   \item{`days`}{Number of days of relapse to cocaine use.}
-#'   \item{`censor`}{Censoring status.}
+#'   \item{`days`}{Number of days until relapse to cocaine use or censoring. Relapse was defined as 4 or more days of cocaine use during the week preceding an interview. Study dropouts and lost participants were coded as relapsing to cocaine use, with the number of days until relapse coded as occurring the week after the last follow-up interview attended.}
+#'   \item{`censor`}{Censoring status (0 = relapsed, 1 = censored).}
 #'   \item{`needle`}{Binary indicator for whether cocaine was ever used intravenously.}
-#'   \item{`base_mood`}{Positive mood score on a standardized question at an intake interview taken during the last week of treatment.}
+#'   \item{`base_mood`}{Total score on the positive mood subscales (Activity and Happiness) of the Mood Questionnaire (Ryman, Biersner, & LaRocco, 1974), taken at an intake interview during the last week of treatment. Each item used a five point Likert score (ranging from 0 = not at all, to 4 = extremely).}
 #'   \item{`followup`}{Week of follow-up interview.}
-#'   \item{`mood`}{Positive mood score on a standardized question.}
+#'   \item{`mood`}{Total score on the positive mood subscales (Activity and Happiness) of the Mood Questionnaire (Ryman, Biersner, & LaRocco, 1974), taken during follow-up interviews each week post-treatment. Each item used a five point Likert score (ranging from 0 = not at all, to 4 = extremely).}
 #' }
+#' @note
+#' Hall, Havassy, and Wasserman (1990) measured time to relapse in weeks, not
+#' days; however, to use these data to illustrate imputation strategies, Singer
+#' and Willett (2003) converted the weekly relapse information into days, then
+#' jittered these event times, effectively converting them from discrete-time to
+#' continuous-time. Additionally, Hall, Havassy, and Wasserman (1990) do not
+#' report following cocaine users in their study, thus, this appears to be
+#' unpublished data.
 #' @source
 #' Hall, S. M., Havassy, B. E., & Wasserman, D. A. (1990). Commitment to
 #' abstinence and acute stress in relapse to alcohol, opiates, and nicotine.
 #' Journal of Consulting and Clinical Psychology, 58, 175–181.
 #' <https://doi.org/10.1037//0022-006x.58.2.175>
+#' @references
+#' Ryman, D. H., Biersner, R. J., & La Rocco, J. M. (1974). Reliabilities and
+#' validities of the Mood Questionnaire. Psychological Reports, 35, 479-484.
+#' <https://doi.org/10.2466/pr0.1974.35.1.479>
 "cocaine_relapse_2"
 
 #' Days to psychiatric hospital discharge

diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@
 [![R-CMD-check](https://github.com/mccarthy-m-g/alda/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/mccarthy-m-g/alda/actions/workflows/R-CMD-check.yaml)
 <!-- badges: end -->
 
-This package contains 31 data sets provided by Singer and Willet (2003) with their book, Applied longitudinal data analysis: Modeling change and event occurrence, that are suitable for longitudinal mixed effects modelling, longitudinal structural equation modelling, and survival analysis. All the data sets in this package are real data from real studies; however, most were modified by Singer and Willet (2003) for the illustration of statistical methods, so they may not match the results of the original studies. Additionally, the early intervention data used throughout Chapter 3 is not included in this package because it is not openly available.
+This package contains 31 data sets provided by Singer and Willet (2003) with their book, *Applied longitudinal data analysis: Modeling change and event occurrence*, that are suitable for longitudinal mixed effects modelling, longitudinal structural equation modelling, and survival analysis. All the data sets in this package are real data from real studies; however, most were modified by Singer and Willet (2003) for the illustration of statistical methods, so they may not match the results of the original studies. Additionally, the early intervention data used throughout Chapter 3 is not included in this package because it is not openly available.
 
 There are ten data sets for longitudinal mixed effects modelling:
 
@@ -36,16 +36,38 @@ There are twenty data sets for survival analysis:
 - `?math_dropout`: Math course history (Chapter 12)
 - `?honking`: Time to horn honking (Chapter 13)
 - `?alcohol_relapse`: Weeks to alcohol relapse after treatment (Chapter 13)
-- `?judges`: Supreme Court justice tenure (Chapter 13)
+- `?judges`: Supreme Court justice tenure (Chapters 13 and 15)
 - `?first_depression_2`: Age of first depression (Chapter 13)
 - `?health_workers`: Length of health worker employment (Chapter 13)
-- `?rearrest`: Days to inmate recidivism (Chapter 14)
+- `?rearrest`: Days to inmate recidivism (Chapters 14 and 15)
 - `?first_cocaine`: Age of first cocaine use (Chapter 15)
 - `?cocaine_relapse_2`: Days to cocaine relapse after abstinence (Chapter 15)
 - `?psychiatric_discharge`: Days to psychiatric hospital discharge (Chapter 15)
 - `?physicians`: Physician career history (Chapter 15)
 - `?monkeys`: Piagetian monkeys (Chapter 15)
 
+## Vignettes and articles
+
+There is one vignette with tips and tricks for working with longitudinal data:
+
+- `vignette("longitudinal-data-organization")`
+
+There are fourteen articles on the package documentation website demonstrating how to recreate examples from the textbook in R:
+
+- [Chapter 2](articles/chapter-2.html): Exploring longitudinal data on change
+- [Chapter 4](articles/chapter-4.html): Doing data analysis with the multilevel model for change
+- [Chapter 5](articles/chapter-5.html): Treating time more flexibly
+- [Chapter 6](articles/chapter-6.html): Modeling discontinuous and nonlinear change
+- [Chapter 7](articles/chapter-7.html): Examining the multilevel model’s error covariance structure
+- [Chapter 8](articles/chapter-8.html): Modeling change using covariance structure analysis
+- [Chapter 9](articles/chapter-9.html): A framework for investigating event occurrence
+- [Chapter 10](articles/chapter-10.html): Describing discrete-time event occurrence data
+- [Chapter 11](articles/chapter-11.html): Fitting basic discrete-time hazard models
+- [Chapter 12](articles/chapter-12.html): Extending the discrete-time hazard model
+- [Chapter 13](articles/chapter-13.html): Describing continuous-time event occurrence data
+- [Chapter 14](articles/chapter-14.html): Fitting the Cox regression model
+- [Chapter 15](articles/chapter-15.html): Extending the Cox regression model
+
 ## Documentation
 
 See at <https://mccarthy-m-g.github.io/alda/> and also in the installed package: `help(package = "alda")`.

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -6,6 +6,7 @@ development:
 template:
   bootstrap: 5
 
+# Reference page
 reference:
 - title: Data for longitudinal mixed effects modelling
   desc:
@@ -49,3 +50,48 @@ reference:
   - psychiatric_discharge
   - physicians
   - monkeys
+
+navbar:
+  structure:
+    left:  [reference, articles, examples, news]
+  components:
+    # Use custom article menus so textbook examples can have their own menu.
+    articles:
+      text: Tips & Tricks
+      menu:
+      - text: Longitudinal data organization
+        href: articles/longitudinal-data-organization.html
+    examples:
+      text: Textbook examples
+      menu:
+      - text: Longitudinal mixed effects modelling
+      - text: Chapter 2
+        href: articles/chapter-2.html
+      - text: Chapter 4
+        href: articles/chapter-4.html
+      - text: Chapter 5
+        href: articles/chapter-5.html
+      - text: Chapter 6
+        href: articles/chapter-6.html
+      - text: Chapter 7
+        href: articles/chapter-7.html
+      - text: -------
+      - text: Longitudinal structural equation modelling
+      - text: Chapter 8
+        href: articles/chapter-8.html
+      - text: -------
+      - text: Survival analysis
+      - text: Chapter 9
+        href: articles/chapter-9.html
+      - text: Chapter 10
+        href: articles/chapter-10.html
+      - text: Chapter 11
+        href: articles/chapter-11.html
+      - text: Chapter 12
+        href: articles/chapter-12.html
+      - text: Chapter 13
+        href: articles/chapter-13.html
+      - text: Chapter 14
+        href: articles/chapter-14.html
+      - text: Chapter 15
+        href: articles/chapter-15.html
diff --git a/data-raw/data.R b/data-raw/data.R
@@ -10,7 +10,15 @@ library(janitor)
 sas_data  <- list.files("data-raw/data/raw", full.names = TRUE)
 filenames <- list.files("data-raw/data/raw") |> path_ext_remove()
 sas_data  <- set_names(sas_data, filenames)
-tidy_data <- map(sas_data, \(.x) .x |> read_sas() |> clean_names())
+tidy_data <- sas_data |>
+  map(
+    \(.x) {
+      .x |>
+        read_sas() |>
+        clean_names() |>
+        mutate(across(any_of("id"), as.factor))
+    }
+  )
 
 # Chapter 2 -------------------------------------------------------------------
 
@@ -153,24 +161,31 @@ tenure <- tidy_data$tenure_orig |>
 
 first_depression_1 <- tidy_data$depression_pp |>
   rename(
-    depress = event,
+    depressive_episode = event,
     interview_age = age,
-    censor_age = censage
+    censor_age = censage,
+    siblings = nsibs,
+    parental_divorce = pd,
+    parental_divorce_now = pdnow
   ) |>
   select(-(censor_age:aged), -(sibs12:sibs9plus), -(one:age_18cub))
 
 first_arrest <- tidy_data$firstarrest_pp |>
-  rename(
-    abused_black = ablack
-  )
+  select(-starts_with("d"), -ablack)
 
 math_dropout <- tidy_data$mathdropout_pp |>
   rename(
     woman = female,
     term = period,
     last_term = lastpd
   ) |>
-  select(-c(one, ltime))
+  select(
+    -c(one, ltime, fltime),
+    -starts_with("hs"),
+    -starts_with("coll"),
+    -starts_with("fhs"),
+    -starts_with("fcoll")
+  )
 
 # Chapter 13 ------------------------------------------------------------------
 
@@ -228,6 +243,7 @@ cocaine_relapse_2 <- tidy_data$relapse_days |>
     cols = starts_with("mood"),
     names_to = "followup",
     names_pattern = "([[:digit:]]+)",
+    names_transform = list(followup = as.integer),
     values_to = "mood"
   )
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,3 +4,5 @@ @@
     .Ruserdata
     inst/doc
     docs
+    /doc/
+    /Meta/