Merge pull request #637 from jhudsl/remove-jhur

Remove jhur
jhudsl · Jan 6, 2025 · 6dcae6a · 6dcae6a
2 parents 7ff216e + e0d2f6e
commit 6dcae6a
Show file tree

Hide file tree

Showing 19 changed files with 40 additions and 379 deletions.
diff --git a/modules/Data_Classes/Data_Classes.Rmd b/modules/Data_Classes/Data_Classes.Rmd
@@ -221,8 +221,7 @@ as.numeric(x_fact)
 ## Class conversion in with a dataset
 
 ```{r, results='hide'}
-library(jhur)
-circ <- read_circulator()
+circ <- read_csv("https://jhudatascience.org/intro_to_r/data/Charm_City_Circulator_Ridership.csv")
 ```
 
 ```{r}

diff --git a/modules/Data_Classes/lab/Data_Classes_Lab_Key.Rmd b/modules/Data_Classes/lab/Data_Classes_Lab_Key.Rmd
@@ -69,10 +69,9 @@ rand_char_vect # Numbers now have quotation marks
 
 ### 1.6
 
-Read in the Charm City Circulator data using `read_circulator()` function from `jhur` package using the code supplied in the chunk. Or alternatively using the url link.
+Read in the Charm City Circulator data.
 
 ```{r}
-circ <- read_circulator()
 circ <- read_csv(file = "http://jhudatascience.org/intro_to_r/data/Charm_City_Circulator_Ridership.csv")
 ```
 

diff --git a/modules/Data_Cleaning/Data_Cleaning.Rmd b/modules/Data_Cleaning/Data_Cleaning.Rmd
@@ -1008,16 +1008,15 @@ Base R:
 ## some data to work with
 
 ```{r readSal, echo = TRUE, eval = TRUE, message=FALSE, warning=FALSE}
-library(jhur)
-Sal = read_salaries() # or
+Sal <- read_csv("https://jhudatascience.org/intro_to_r/data/Baltimore_City_Employee_Salaries_FY2015.csv") # or
 ```
 
 ## Showing difference in `str_extract`
 
 `str_extract` extracts just the matched string
 
 ```{r}
-ss = str_extract(Sal$Name, "Rawling")
+ss <- str_extract(Sal$Name, "Rawling")
 head(ss)
 ss[ !is.na(ss)]
 ```

diff --git a/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd b/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
@@ -16,7 +16,6 @@ You can Download as a CSV in your current working directory.  Note its also avai
 
 ```{r}
 library(tidyverse)
-library(jhur)
 # install.packages("naniar")
 library(naniar)
 ```

diff --git a/modules/Data_Input/Data_Input.Rmd b/modules/Data_Input/Data_Input.Rmd
@@ -37,12 +37,7 @@ https://jhudatascience.org/intro_to_r/resources/R_Projects.html
     * comma separated (e.g. '.csv')
     * tab delimited (e.g. '.txt')
     * Microsoft Excel (e.g. '.xlsx')
-
-
-## Note: data for demonstration
-
-* We have added functionality to load some datasets directly in the `jhur` package
-
+
 
 ## Data Input
 

diff --git a/modules/Data_Summarization/Data_Summarization.Rmd b/modules/Data_Summarization/Data_Summarization.Rmd
@@ -13,7 +13,6 @@ opts_chunk$set(comment = "", message = FALSE)
 suppressWarnings({library(dplyr)})
 library(readr)
 library(tidyverse)
-library(jhur)
 ```
 
 <style type="text/css">
@@ -100,11 +99,10 @@ sum(z)
 
 ## Some examples
 
-We can use the `jhu_cars` to explore different ways of summarizing data.  The `head` command displays the first rows of an object:
+We can use the `mtcars` built-in dataset.  The `head` command displays the first rows of an object:
 
 ```{r}
-library(jhur)
-head(jhu_cars)
+head(mtcars)
 ```
 
 
@@ -134,10 +132,10 @@ me %>%
 ## Statistical summarization the "tidy" way
 
 ```{r}
-jhu_cars %>% pull(hp) %>% mean() # alt: pull(jhu_cars, hp) %>% mean()
-jhu_cars %>% pull(wt) %>% median()
-jhu_cars %>% pull(hp) %>% quantile()
-jhu_cars %>% pull(wt) %>% quantile(probs = 0.6)
+mtcars %>% pull(hp) %>% mean() # alt: pull(mtcars, hp) %>% mean()
+mtcars %>% pull(wt) %>% median()
+mtcars %>% pull(hp) %>% quantile()
+mtcars %>% pull(wt) %>% quantile(probs = 0.6)
 ```
 
 
@@ -146,17 +144,17 @@ jhu_cars %>% pull(wt) %>% quantile(probs = 0.6)
 `pull()` converts a single data column into a vector. This allows you to run summary functions on these data. Once you have "pulled" the data column out, you don't have to name it again in any piped summary functions.
 
 ```{r}
-cars_wt <- jhu_cars %>% pull(wt)
+cars_wt <- mtcars %>% pull(wt)
 class(cars_wt)
 cars_wt
 ```
 
 ```{r, eval=FALSE}
-jhu_cars %>% pull(wt) %>% range(wt) # Incorrect
+mtcars %>% pull(wt) %>% range(wt) # Incorrect
 ```
 
 ```{r}
-jhu_cars %>% pull(wt) %>% range() # Correct
+mtcars %>% pull(wt) %>% range() # Correct
 ```
 
 
@@ -166,22 +164,10 @@ jhu_cars %>% pull(wt) %>% range() # Correct
 
 Let's read in a `tibble` of values from TB incidence.
 
-If you have the `jhur` package installed successfully:
-
 ```{r}
-library(jhur)
-tb <- read_tb()
+tb <- read_csv("https://jhudatascience.org/intro_to_r/data/tb.csv")
 ```
 
-<br>
-If not, download the `xlsx` file from http://jhudatascience.org/intro_to_r/data/tb_incidence.xlsx and read it in:
-
-```{r eval = FALSE}
-library(readxl)
-tb <- read_excel("tb_incidence.xlsx")
-```
-
-
 ## TB Incidence 
 
 Check out the data:
@@ -321,7 +307,7 @@ Here we will be using the Youth Tobacco Survey data:
 http://jhudatascience.org/intro_to_r/data/Youth_Tobacco_Survey_YTS_Data.csv
 
 ```{r}
-yts <- read_yts()
+yts <- read_csv("http://jhudatascience.org/intro_to_r/data/Youth_Tobacco_Survey_YTS_Data.csv")
 head(yts)
 ```
 
@@ -501,7 +487,7 @@ mtcars %>% group_by(cyl) %>% summarize(n()) # n() typically used with summarize
 These functions require a column as a vector using `pull()`.
 
 ```{r, message = FALSE}
-yts <- read_yts()
+yts <- read_csv("http://jhudatascience.org/intro_to_r/data/Youth_Tobacco_Survey_YTS_Data.csv")
 yts_loc <- yts %>% pull(LocationDesc) # pull() to make a vector
 yts_loc %>% unique() # similar to distinct()
 ```

diff --git a/modules/Data_Summarization/lab/Data_Summarization_Lab_Key.Rmd b/modules/Data_Summarization/lab/Data_Summarization_Lab_Key.Rmd
@@ -19,21 +19,11 @@ The data is from http://data.baltimorecity.gov/Transportation/Bike-Lanes/xzfj-gy
 You can Download as a CSV in your current working directory.  Note its also available at: 	http://jhudatascience.org/intro_to_r/data/Bike_Lanes.csv 
 
 ```{r, echo = TRUE, message=FALSE, error = FALSE}
-library(readr)
-library(dplyr)
 library(tidyverse)
-library(jhur)
 
 bike <- read_csv(file = "http://jhudatascience.org/intro_to_r/data/Bike_Lanes.csv")
 ```
 
-or use 
-
-```{r}
-library(jhur)
-bike <- read_bike()
-```
-
 ### 1.1 
 
 How many bike "lanes" are currently in Baltimore?  You can assume each observation/row is a different bike "lane".  (hint: how do you get the number of rows of a data set? You can use `dim()` or `nrow()` or another function).

diff --git a/modules/Data_Visualization/Data_Visualization.Rmd b/modules/Data_Visualization/Data_Visualization.Rmd
@@ -14,7 +14,6 @@ opts_chunk$set(echo = TRUE,
                fig.height = 4,
                fig.width = 7,
                comment = "")
-library(jhur)
 library(tidyverse)
 library(tidyr)
 library(emo)

diff --git a/modules/Data_Visualization/lab/Data_Visualization_Lab_Key.Rmd b/modules/Data_Visualization/lab/Data_Visualization_Lab_Key.Rmd
@@ -15,18 +15,17 @@ Load the packages.
 
 ```{r, message=FALSE}
 library(tidyverse)
-library(jhur)
 ```
 
-Read in the Bike Lanes Dataset using `read_bike()` function from `jhur` package or using the `read_csv` function with the following link:
+Read in the Bike Lanes Dataset using the `read_csv` function with the following link:
 http://jhudatascience.org/intro_to_r/data/Bike_Lanes.csv
 
 Assign the data to an object called `bike`. 
 
 Then, use the provided code to compute a data frame `bike_agg` with aggregate summary of bike lanes: average length of lanes (`lane_avg_length`) for each year (`dateInstalled`). 
 
 ```{r}
-bike <- read_bike()
+bike <- read_csv(file = "http://jhudatascience.org/intro_to_r/data/Bike_Lanes.csv")
 
 bike_agg <- bike %>%
   # filter data to keep only these observations for which year is non-0

diff --git a/modules/Esquisse_Data_Visualization/Esquisse_Data_Visualization.Rmd b/modules/Esquisse_Data_Visualization/Esquisse_Data_Visualization.Rmd
@@ -144,9 +144,8 @@ knitr::include_graphics("images/stop.png")
 ## Wide & Long Data Example
 
 ```{r message=FALSE}
-library(jhur)
 library(dplyr)
-wide_circ <- read_circulator()
+wide_circ <- read_csv("https://jhudatascience.org/intro_to_r/data/Charm_City_Circulator_Ridership.csv")
 glimpse(wide_circ)
 ```
 

diff --git a/modules/Esquisse_Data_Visualization/lab/Esquisse_Data_Visualization_Lab_Key.Rmd b/modules/Esquisse_Data_Visualization/lab/Esquisse_Data_Visualization_Lab_Key.Rmd
@@ -13,7 +13,6 @@ install.packages("ggplot2")
 ```{r, comment = FALSE}
 library(esquisse)
 library(ggplot2)
-library(jhur)
 ```
 
 ### 1.1
@@ -61,10 +60,10 @@ ggplot(Orange) +
 Launch Esquisse on any selection of the following datasets we have worked with before and explore!
 
 ```{r}
-yts <- read_yts()
-tb <- read_tb()
-bike <- read_bike()
-circ <- read_circulator()
+yts <- read_csv("http://jhudatascience.org/intro_to_r/data/Youth_Tobacco_Survey_YTS_Data.csv")
+tb <- read_csv("https://jhudatascience.org/intro_to_r/data/tb.csv")
+bike <- read_csv(file = "http://jhudatascience.org/intro_to_r/data/Bike_Lanes.csv")
+circ <- read_csv("https://jhudatascience.org/intro_to_r/data/Charm_City_Circulator_Ridership.csv")
 vacc <- read_csv("http://jhudatascience.org/intro_to_r/data/USA_covid19_vaccinations.csv")
 ```
 

diff --git a/modules/Factors/lab/Factors_Lab_Key.Rmd b/modules/Factors/lab/Factors_Lab_Key.Rmd
@@ -14,13 +14,11 @@ library(ggplot2)
 
 ### 1.0
 
-Load the Youth Tobacco Survey data (using the `jhur` library function `read_yts()`). `select` "Sample_Size",  "Education", and "LocationAbbr". Name this data "yts". 
+Load the Youth Tobacco Survey data from http://jhudatascience.org/intro_to_r/data/Youth_Tobacco_Survey_YTS_Data.csv. `select` "Sample_Size",  "Education", and "LocationAbbr". Name this data "yts". 
 
 ```{r 1.0response}
-library(jhur)
-yts <- read_yts() %>% select(Sample_Size, Education, LocationAbbr)
-# Alt:
-# yts <- read_csv("http://jhudatascience.org/intro_to_r/data/Youth_Tobacco_Survey_YTS_Data.csv")
+yts <- read_csv("http://jhudatascience.org/intro_to_r/data/Youth_Tobacco_Survey_YTS_Data.csv")
+yts <- yts %>% select(Sample_Size, Education, LocationAbbr)
 ```
 
 ### 1.1

diff --git a/modules/Functions/Functions.Rmd b/modules/Functions/Functions.Rmd
@@ -8,7 +8,6 @@ output:
 
 ```{r, echo = FALSE, message = FALSE}
 library(dplyr)
-library(jhur)
 library(knitr)
 library(stringr)
 library(tidyr)
@@ -189,7 +188,7 @@ We can use `filter(row_number() == n)` to extract a row of a tibble:
 ```{r message=FALSE}
 get_row <- function(dat, row) dat %>% filter(row_number() == row)
 
-cars <- read_kaggle()
+cars <- read_csv("http://jhudatascience.org/intro_to_r/data/kaggleCarAuction.csv")
 cars_1_8 <- cars %>% select(1:8)
 ```
 
@@ -403,7 +402,9 @@ or
 
 ```{r warning=FALSE, message=FALSE}
 # Child mortality data
-mort <- read_mortality() %>% rename(country = `...1`)
+mort <- 
+  read_csv("https://jhudatascience.org/intro_to_r/data/mortality.csv") %>% 
+  rename(country = `...1`)
 
 mort %>%
   select(country, starts_with("194")) %>%

diff --git a/modules/Manipulating_Data_in_R/Manipulating_Data_in_R.Rmd b/modules/Manipulating_Data_in_R/Manipulating_Data_in_R.Rmd
@@ -191,11 +191,8 @@ Newly created column names are enclosed in quotation marks.
 
 ## Data used: Charm City Circulator
 
-http://jhudatascience.org/intro_to_r/data/Charm_City_Circulator_Ridership.csv
-
 ```{r, message = FALSE}
-library(jhur)
-circ <- read_circulator()
+circ <- read_csv("http://jhudatascience.org/intro_to_r/data/Charm_City_Circulator_Ridership.csv")
 head(circ, 5)
 ```
 

diff --git a/modules/RStudio/lab/RStudio_Lab_Key.Rmd b/modules/RStudio/lab/RStudio_Lab_Key.Rmd
@@ -47,11 +47,10 @@ The gray area below is a code chunk that will set up our packages and data (this
 
 ```{r setup, message=FALSE}
 knitr::opts_chunk$set(echo = TRUE)
-library(jhur)
 library(ggplot2)
 library(dplyr)
 
-long <- read_circulator_long()
+long <- read_csv("https://jhudatascience.org/intro_to_r/data/circulator_long.csv")
 ## take just average ridership per day
 avg <- long %>%
   filter(type == "Average")

diff --git a/modules/Statistics/Statistics.Rmd b/modules/Statistics/Statistics.Rmd
@@ -18,8 +18,7 @@ opts_chunk$set(
 )
 library(dplyr)
 options(scipen = 999)
-library(readr)
-library(ggplot2)
+library(tidyverse)
 library(emo)
 ```
 
@@ -113,11 +112,8 @@ cor.test(x, y = NULL, alternative(c("two.sided", "less", "greater")),
 
 ## Correlation {.small}
 
-https://jhudatascience.org/intro_to_r/data/Charm_City_Circulator_Ridership.csv
-
 ```{r cor1, comment="", message = FALSE}
-library(jhur)
-circ <- read_circulator()
+circ <- read_csv("https://jhudatascience.org/intro_to_r/data/Charm_City_Circulator_Ridership.csv")
 head(circ)
 ```
 
@@ -541,7 +537,7 @@ summary(fit_3)
 Comparison group is not listed - treated as intercept. All other estimates are relative to the intercept.
 
 ```{r regress8, comment="", fig.height=4,fig.width=8}
-circ <- jhur::read_circulator()
+circ <- read_csv("https://jhudatascience.org/intro_to_r/data/Charm_City_Circulator_Ridership.csv")
 fit_4 <- glm(orangeBoardings ~ factor(day), data = circ)
 summary(fit_4)
 ```

diff --git a/modules/Statistics/lab/Statistics_Lab_Key.Rmd b/modules/Statistics/lab/Statistics_Lab_Key.Rmd
@@ -13,18 +13,17 @@ knitr::opts_chunk$set(echo = TRUE)
 
 ### 1.1
 
-Load the libraries needed in this lab. Then, read in the following child mortality data using `read_mortality()` function from `jhur` package. Assign it to the "mort" variable. Change its first column name from `...1` into `country`. You can also find the data here: https://jhudatascience.org/intro_to_r/data/mortality.csv
+Load the packages needed in this lab. Then, read in the following child mortality data. Assign it to the "mort" variable. Change its first column name from `...1` into `country`. You can find the data here: https://jhudatascience.org/intro_to_r/data/mortality.csv
 
 Note that the data has lots of `NA` values - don't worry if you see that.
 
 ```{r message = FALSE}
 library(dplyr)
-library(jhur)
 library(broom)
 ```
 
 ```{r 1.1response}
-mort <- read_mortality()
+mort <- read_csv("https://jhudatascience.org/intro_to_r/data/mortality.csv")
 
 mort <- mort %>%
   rename(country = `...1`)
@@ -86,10 +85,10 @@ tidy(t.test(x, y))
 
 ### 2.1
 
-Read in the Kaggle cars auction dataset using `read_kaggle()` from the `jhur` package.  Assign it to the "cars" variable. You can also find the data here:  http://jhudatascience.org/intro_to_r/data/kaggleCarAuction.csv.
+Read in the Kaggle cars auction dataset.  Assign it to the "cars" variable. You can find the data here:  http://jhudatascience.org/intro_to_r/data/kaggleCarAuction.csv.
 
 ```{r 2.1response}
-cars <- read_kaggle()
+cars <- read_csv("http://jhudatascience.org/intro_to_r/data/kaggleCarAuction.csv")
 ```
 
 ### 2.2

diff --git a/modules/Subsetting_Data_in_R/Subsetting_Data_in_R.Rmd b/modules/Subsetting_Data_in_R/Subsetting_Data_in_R.Rmd
@@ -13,7 +13,6 @@ library(knitr)
 opts_chunk$set(comment = "")
 suppressPackageStartupMessages(library(dplyr))
 library(dplyr)
-library(jhur)
 ```
 
 ## Reminder