diff --git a/modules/Data_Output/Data_Output.Rmd b/modules/Data_Output/Data_Output.Rmd index 1af192cb5..e09dd0583 100644 --- a/modules/Data_Output/Data_Output.Rmd +++ b/modules/Data_Output/Data_Output.Rmd @@ -1,14 +1,6 @@ ---- -title: "Data Output" -output: - ioslides_presentation: - css: ../../docs/styles.css - widescreen: yes ---- - -```{r, echo = FALSE} -library(tidyverse) +```{r, echo = FALSE, message = FALSE, error = FALSE} knitr::opts_chunk$set(comment = "") +library(tidyverse) ``` -## Data Output +## Data Output {.smaller} While its nice to be able to read in a variety of data formats, it's equally important to be able to output data somewhere. @@ -102,7 +94,7 @@ save.image(file = "my_environment.RData") ``` -## Using RStudio for importing/exporting data +## Using RStudio for importing/exporting data {.smaller} If there is an `.rds` or `.RData` file that you want to work with, you can open it into your environment using the file icon. @@ -131,12 +123,14 @@ ggsave(filename = "saved_plot.png", # will save in working directory width = 6, height = 3.5) # by default in inches ``` -## Summary {.small} +## Summary - Use `write_csv()` and `write_delim()` from the `readr` package to write your (modified) data - `.rds` files can be handy for saving intermediate work - Can save environment (or subset) using `save()` and `save.image()` +## Resources & Lab {.small} + 🏠 [Class Website](https://jhudatascience.org/intro_to_r/) 💻 [Data Output Lab](https://jhudatascience.org/intro_to_r/modules/Data_Output/lab/Data_Output_Lab.Rmd) @@ -145,7 +139,7 @@ ggsave(filename = "saved_plot.png", # will save in working directory 📃 [Day 2 Cheatsheet](https://jhudatascience.org/intro_to_r/modules/cheatsheets/Day-2.pdf) -```{r, fig.alt="The End", out.width = "50%", echo = FALSE, fig.align='center'} +```{r, fig.alt="The End", out.width = "35%", echo = FALSE, fig.align='center'} knitr::include_graphics(here::here("images/the-end-g23b994289_1280.jpg")) ``` diff --git a/modules/Functions/Functions.Rmd b/modules/Functions/Functions.Rmd index c9e3cb31e..95709c113 100644 --- a/modules/Functions/Functions.Rmd +++ b/modules/Functions/Functions.Rmd @@ -7,13 +7,9 @@ output: --- ```{r, echo = FALSE, message = FALSE} -library(dplyr) -library(knitr) -library(stringr) -library(tidyr) +knitr::opts_chunk$set(comment = "") +library(tidyverse) library(emo) -library(readr) -opts_chunk$set(comment = "") ``` @@ -227,13 +223,13 @@ get_top <- function(dat, row = 1, col = 1) { get_top(dat = cars) ``` -## Functions for tibbles +## Functions for tibbles - curly braces Can create function with an argument that allows inputting a column name for `select` or other `dplyr` operation: ```{r} clean_dataset <- function(dataset, col_name) { - my_data_out <- dataset %>% select({{col_name}}) # Note the curly braces + my_data_out <- dataset %>% select({{col_name}}) # Note the curly braces {{}} write_csv(my_data_out, "clean_data.csv") return(my_data_out) } @@ -241,6 +237,20 @@ clean_dataset <- function(dataset, col_name) { clean_dataset(dataset = mtcars, col_name = "cyl") ``` +## Functions for tibbles - curly braces + +```{r} +# Another example: get means and missing for a specific column +get_summary <- function(dataset, col_name) { + dataset %>% + summarise(mean = mean({{col_name}}, na.rm = TRUE), + na_count = sum(is.na({{col_name}}))) +} + +get_summary(mtcars, hp) +``` + + ## Summary - Simple functions take the form: @@ -248,6 +258,7 @@ clean_dataset(dataset = mtcars, col_name = "cyl") - Can specify defaults like `function(x = 1, y = 2){x + y}` -`return` will provide a value as output - `print` will simply print the value on the screen but not save it +- Specify a column (from a tibble) inside a function using `{{double curly braces}}` ## Lab Part 1 @@ -261,7 +272,7 @@ clean_dataset(dataset = mtcars, col_name = "cyl") ## Using your custom functions: `sapply()`- a base R function -Now that you've made a function... You can "apply" functions easily with `sapply()`! +Now that you've made a function... you can "apply" functions easily with `sapply()`! These functions take the form: @@ -295,12 +306,21 @@ select(cars, VehYear:VehicleAge) %>% ## Using your custom functions "on the fly" to iterate +Also called an "anonymous function". + ```{r comment=""} select(cars, VehYear:VehicleAge) %>% sapply(function(x) x / 1000) %>% head() ``` +## Anonymous functions: alternative syntax + +```{r comment=""} +select(cars, VehYear:VehicleAge) %>% + sapply(\(x) x / 1000) %>% + head() +``` # across @@ -328,7 +348,7 @@ or mutate(across(.cols = , .fns = function)) ``` -- List columns first : `.cols = ` +- List columns first: `.cols = ` - List function next: `.fns = ` - If there are arguments to a function (e.g., `na.rm = TRUE`), the function may need to be modified to an anonymous function, e.g., `\(x) mean(x, na.rm = TRUE)` @@ -341,7 +361,7 @@ Combining with `summarize()` cars_dbl <- cars %>% select(Make, starts_with("Veh")) cars_dbl %>% - summarize(across(.cols = everything(), .fns = mean)) + summarize(across(.cols = everything(), .fns = mean)) # no parentheses ``` @@ -352,7 +372,7 @@ Can use with other tidyverse functions like `group_by`! ```{r} cars_dbl %>% group_by(Make) %>% - summarize(across(.cols = everything(), .fns = mean)) + summarize(across(.cols = everything(), .fns = mean)) # no parentheses ``` @@ -439,6 +459,17 @@ airquality %>% ``` +## GUT CHECK! + +Why use `across()`? + +A. Efficiency - faster and less repetitive + +B. Calculate the cross product + +C. Connect across datasets + + ## `purrr` package Similar to across, `purrr` is a package that allows you to apply a function to multiple columns in a data frame or multiple data objects in a list. @@ -480,7 +511,11 @@ AQ_list %>% sapply(colMeans, na.rm = TRUE) 💻 [Lab](https://jhudatascience.org/intro_to_r/modules/Functions/lab/Functions_Lab.Rmd) -```{r, fig.alt="The End", out.width = "50%", echo = FALSE, fig.align='center'} +📃 [Day 9 Cheatsheet](https://jhudatascience.org/intro_to_r/modules/cheatsheets/Day-9.pdf) + +📃 [Posit's `purrr` Cheatsheet](https://rstudio.github.io/cheatsheets/purrr.pdf) + +```{r, fig.alt="The End", out.width = "35%", echo = FALSE, fig.align='center'} knitr::include_graphics(here::here("images/the-end-g23b994289_1280.jpg")) ``` diff --git a/modules/Functions/lab/Functions_Lab.Rmd b/modules/Functions/lab/Functions_Lab.Rmd index 9f42c4bb5..236a4a550 100644 --- a/modules/Functions/lab/Functions_Lab.Rmd +++ b/modules/Functions/lab/Functions_Lab.Rmd @@ -5,7 +5,7 @@ editor_options: chunk_output_type: console --- -```{r setup, include=FALSE} +```{r setup, include = FALSE, error = FALSE} knitr::opts_chunk$set(echo = TRUE) ``` @@ -13,10 +13,8 @@ knitr::opts_chunk$set(echo = TRUE) Load all the libraries we will use in this lab. -```{r message=FALSE} -library(readr) -library(dplyr) -library(ggplot2) +```{r message = FALSE} +library(tidyverse) ``` ### 1.1 diff --git a/modules/Functions/lab/Functions_Lab_Key.Rmd b/modules/Functions/lab/Functions_Lab_Key.Rmd index b9c1770d7..0877e8a7f 100644 --- a/modules/Functions/lab/Functions_Lab_Key.Rmd +++ b/modules/Functions/lab/Functions_Lab_Key.Rmd @@ -5,7 +5,7 @@ editor_options: chunk_output_type: console --- -```{r setup, include=FALSE} +```{r setup, include = FALSE, error = FALSE} knitr::opts_chunk$set(echo = TRUE) ``` @@ -13,10 +13,8 @@ knitr::opts_chunk$set(echo = TRUE) Load all the libraries we will use in this lab. -```{r message=FALSE} -library(readr) -library(dplyr) -library(ggplot2) +```{r message = FALSE} +library(tidyverse) ``` ### 1.1