#3 updated slides with some jtools and broom functions

TelethonKids · May 29, 2019 · 97ee149 · 97ee149
1 parent c73fee4
commit 97ee149
Show file tree

Hide file tree

Showing 3 changed files with 133 additions and 19 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/R/03_analysis_modelling.R b/R/03_analysis_modelling.R
@@ -25,7 +25,7 @@ library(grid)
 library(tidyverse)
 library(ggpubr)
 library(ggfortify) #autoplot function, see below in diagnostics
-theme_set(theme_classic())
+theme_set(theme_minimal())
 
 #### Read in data ----
 # Read in a CSV files
@@ -72,7 +72,7 @@ no_out %>%
   labs(title = "Days: 1 to 3 outlier removed", x = "", y = "Measurment") +
   geom_boxplot() +
   scale_color_telethonkids("light") +
-  theme_minimal() +
+  theme_minimal()
 
 
 
@@ -139,8 +139,32 @@ model_da2_day3 <- lm(day3 ~ day2, data=no_out)
 summary(model_da2_day3)
 
 #' What do you see in the R output?
+#' Is this in line with tidy data?
+
+library(broom) #tidy data in linear model output
+
+tidy(model_da2_day3)
+
+# 
+augment(model_da2_day3)
 
+# More model statistics
+glance(model_da2_day3)
 
+# jtools
+summ(model_da2_day3, scale = TRUE, part.corr = TRUE, confint = TRUE, pvals = FALSE)
+
+# Forrest plot for the coefficients: using jtools
+
+library(jtools)
+model <-lm(day3 ~ day2 + male + smoker + intervention, data=no_out)
+plot_summs(model)
+
+# You can also compare estimates of different models
+
+model1 <-lm(day3 ~ day2 , data=no_out)
+model2 <-lm(day3 ~ day2 , data=no_out)
+plot_summs(model1,model2)
 
 #### Model Diagnostic plots ----
 #' With base R it is very easy to look at the model diagnostic plots.
@@ -209,8 +233,13 @@ summary(modelSmoker)
 #'family to binomial(link='logit'). Let's see if being a smoker is associated with being male.
 
 logModelSmoker <- glm(smoker ~ male, data=no_out, family=binomial(link='logit'))
-summary(logModelSmoker)
+summ(logModelSmoker)
+
+# For the glm you can specify different models and perform for example a Poisson regression
 
+#### Another often used model is the linesr mixed effects model, for inclusion
+#' of random and fixed effects
 
+# lme(day2 ~ day3 + male, data=no_out,  random = ~ 1|id)
 
 
diff --git a/vignettes/03_analysis_modelling.Rmd b/vignettes/03_analysis_modelling.Rmd
@@ -25,12 +25,30 @@ library(gridExtra)
 library(grid)
 library(tidyverse)
 library(ggpubr)
+library(broom)
+library(jtools)
+library(tidyr)
+
 data(iris)
 
 ```
 
 # Statistical Modelling in R
 
+## Notes
+
+order of included R functions:
+
+>- <code>lm()</code>  
+>- <code>summary()</code>  
+>- <code>ggplot() + statqq()</code>  
+>- <code>ggplot() + geom_smooth()</code>  
+>- <code>tidy(), augment(), glance()</code>  
+>- <code>summ(), plot_summs()</code>  
+>- <code>autoplot()</code>  
+>- <code>glm() (different families)</code>  
+
+
 ## What we cover
 
 >- Linear Regression
@@ -65,7 +83,7 @@ In a linear regression, we aim to find a model: <br/>
 
 The command in R for a linear model is <br/>
 
-  <code>lm(y~x)</code>.
+  <code style="aligne:center">lm(y~x)</code>.
 
 **y** is the outcome variable that interests us and **x** is the variable that we want to test in its association with **y**
 
@@ -88,14 +106,12 @@ We know that the linear regression has the assumtptions:
 -
 
 
-## QQ-plot: 
-```{r, echo=FALSE, out.extra = 'class="centre" style="width: 700px;"', warning=FALSE}
-library(tidyr)
-
+## QQ-plot: {.smaller}
+```{r, echo=TRUE, out.extra = 'class="centre" style="width: 700px;"', warning=FALSE}
 tki_demo %>%
   filter(day2 < 100) %>%
   gather(Days, measurement, day1:day3, factor_key=TRUE) %>%
-  ggplot( aes(sample=measurement, color=Days))+stat_qq()
+  ggplot( aes(sample=measurement, color=Days)) + stat_qq() + facet_wrap(~Days)
 
 ```
 
@@ -164,7 +180,10 @@ grid.arrange(plot1, plot2, plot3, plot4, nrow=2, ncol=2)
 
 ```
 
-## Linear Regression
+# Linear Regression
+
+## The <code>lm()</code> function
+
 In the plots, we could already see, that <em>petal length</em> and <em>petal width</em> seem to be associated. This is obvious when drawing a line in the plot.
 Let's now perform a linear regression model in R.
 
@@ -183,15 +202,36 @@ In R, we add the <code>summary()</code> function to the <code>lm()</code> functi
 
 <code>summary(lm(y~x, data=data))</code>
 
-## Example Results
+## Example Results {.smaller}
 
 ```{r, message=FALSE,  warning=FALSE, error=FALSE, echo = FALSE,out.extra = 'class="centre" style="width: 500px;"'}
 #summary(lm(Petal.Length~Petal.Width, data=iris))
 lm1 <- lm(Petal.Length~Petal.Width, data=iris)
 library(sjPlot)
-tab_model(lm1, file="output.html")
+#tab_model(lm1, file="output.html")
+summary(lm1)
+```
+# <code>jtools</code> and <code>broom</code>
+
+## Improving the accessibility of the <code>lm()</code> results
+
+The output before contains a lot of relevant information, but it is not straighforward to access the individual parameters like p-values and betas
+The <code>broom</code> R package is in line with the "tidy" data handling in R and turns the linear model results into an easy accessible tibble format:
+
+```{r}
+tidy(lm1)
 ```
 
+
+## Improving output style {.smaller}
+
+The <code>broom</code> package helps with the accessibility of the output, but the style of the output is not very appealing for a publiation or a report. The <code>jtools</code> package helps with this and has other nice functionalities such as forrest plots for coefficients and confidence intervals:
+
+```{r results = 'asis'}
+export_summs(lm1)
+```
+
+
 ## Diagnostics
 
 <div align="center">
@@ -217,16 +257,13 @@ The <b>R<sup>2</sup></b> statistic is a measure of how much variance in the data
 The <b>R<sup>2</sup></b> statistic ranges from 0 to 1, where 1 means the model explains 100% of the variance. This means the model fits the data perfectly.
 
 
-## Diagnostic Plots
+## Diagnostic Plots {.smaller}
 
-<div align="center">
-```{r, echo=FALSE, warning=FALSE, error=FALSE}
-# par(mfrow = c(2, 2))
-# plot(model)
+```{r, echo=TRUE, warning=FALSE, error=FALSE}
 library(ggfortify)
 autoplot(model)
 ```
-</div>
+
 
 
 
@@ -243,8 +280,56 @@ ggplot(model.diag.metrics, aes(Petal.Length, Petal.Width)) +
 </div>
 
 # Multiple Linear Regression
-## How to?
+## How to? {.smaller}
 Multiple linear regression works with the same function inR : <code>lm(y ~ x + covar1 + covar2 + ... + covarx , data=data)</code>
+The R standard output is also very messy for reports, but helps with a first visual inspection in the command line:
+
+```{r}
+summary(lm(day3 ~ day2 + male + intervention, data=tki_demo))
+```
+
+## Example 1: one model {.smaller}
+With the demo data set:
+```{r}
+export_summs(lm(day3 ~ day2 + male + smoker, data = tki_demo))
+```
+
+## Example 2: more models {.smaller}
+With the demo data set:
+```{r}
+lm1 <- lm(day3 ~ day2, data = tki_demo)
+lm2 <- lm(day3 ~ day2 + male + smoker, data = tki_demo)
+export_summs(lm1, lm2)
+```
+## Forrest plot to compare coefficients in the model
+Often we want to visualise the coefficients in the model to see their impact on the outcome, or visualise the coefficient of specific variable in two models, that differ only in the adjusted covariates. The <code>jtools</code> package has a nice function to do this very easily, utilising <code>ggplot2</code>:  
+<code>plot_summs()</code>
+
+## Example 1: one model
+
+```{r}
+plot_summs(lm1)
+```
+
+## Example 2: more models
+
+```{r}
+plot_summs(lm1, lm2)
+```
+
+## Example 3: compare specific coefficients
+
+```{r}
+plot_summs(lm1, lm2, coefs="day2")
+```
+
+
+## Getting more info: Confidence Intervalls {.smaller}
+With <code>jtools</code> we can access more information from the model in an easier step. Here, we access the confidence intervall and variance inflation factor (for multicollinearity testing), but leave out the p-values:
+
+```{r}
+summ(lm2, scale = TRUE, vifs = TRUE, part.corr = TRUE, confint = TRUE, pvals = FALSE)$coeftable
+```
 
 # Logistic regression