diff --git a/Lectures/AtomicVectors_I.Rmd b/Lectures/AtomicVectors_I.Rmd new file mode 100644 index 0000000..463e377 --- /dev/null +++ b/Lectures/AtomicVectors_I.Rmd @@ -0,0 +1,274 @@ +--- +title: 'Atomic Vectors I' +author: "Nicholas J. Gotelli" +date: "1 February 2024" +output: + html_document: + highlight: tango + keep_md: no + theme: united + pdf_document: default +--- +## History of R +- Early languages FORTRAN, C +- S language in 1970s; programming plus stats, matrix algebra, graphics later +- Open-source R in 1992; first stable version in 2000 + +## Advantages of R +- interpreted (for interactive use) +- graphics, statistics +- very active community of contributors +- works on multiple platforms + +## Disadvantages of R +- interpreted (slow speed) +- lazy evaluation +- functions hard to learn +- poorly documented +- unreliable packages +- problems with big data + - subroutines coded for C or Fortran + - Julia? + +## R as a general programming language +- data structures and types +- writing functions +- for loops +- if..then, while statements +- structured programming +- annotation +- naming of variables +- using random numbers +- structured programming +- pseudocode + +### Using the assignment operator + +```{r, eval=FALSE} +# Using the assignment operator +x <- 5 # preferred +y = 4 # legal but not used except in function defaults +y = y + 1.1 +print(y) +y <- y + 1.1 +print(y) +``` + +# Variable names + +```{r} +z <- 3 # Begin with lower case letter +plantHeight <- 10 # option "camelCaseFormatting" +plant.height <- 4.2 # avoid periods +plant_height <- 3.3 # optimal "snake_case_formatting" +. <- 5.5 # reserve this for a generic temporary variable (more later) +``` + +## R's Four Data Types + +Dimensions | Homogeneous | Heterogeneous +------------- | ------------- | ------------- +1-dimension | Atomic Vector | List +2-dimensions | Matrix | Data Frame +n-dimenions | (array) | | + +## Types of Atomic Vectors +- character strings +- integers +- double +- integers, doubles are "numeric" +- logical +- (factor) +- vector of lists! + +### One dimensional atomic vectors +```{r, eval=FALSE} +# the combine function +z <- c(3.2, 5, 5, 6) +print(z) +typeof(z) +is.numeric(z) + +# c() always "flattens" to an atomic vector +z <- c(c(3,4),c(5,6)) +print(z) + +# character strings with single or double quotes +z <- c("perch","bass",'trout') +print(z) + +# use both with an internal quote +z <- c("This is only 'one' character string", 'a second') +print(z) +typeof(z) +is.character(z) + +# building logicals +# Boolean, not with quotes, all caps +z <- c(TRUE,TRUE,FALSE) +# avoid abbreviations T, F which will work +print(z) +typeof(z) +is.logical(z) +is.integer(z) + +``` +### Three Properties of a Vector + +#### Type + +```{r, eval=FALSE} +z <- c(1.1, 1.2, 3, 4.4) +typeof(z) # gives type +is.numeric(z) # is. gives logical +as.character(z) # as. coerces variable +print(z) +typeof(z) + +``` + +#### Length +```{r, eval=FALSE} +length(z) # gives number of elements +length(y) # throws error if variable does not exist + +``` + +#### Names +```{r, eval=FALSE} +z <- runif(5) +# optional attribute not initially assigned +names(z) +print(z) +# add names later after variable is created +names(z) <- c("chow","pug","beagle","greyhound","akita") +print(z) + +# add names when variable is built (with or without quotes) + z2 <- c(gold=3.3, silver=10, lead=2) +print(z2) + +# reset names +names(z2) <- NULL + +# names can be added for only a few elements +# names do not have to be distinct, but often are +names(z2) <- c("copper","zinc") +print(z2) + +``` + +#### Special Data Types +```{r, eval=FALSE} + +# NA values for missing data +z <- c(3.2,3.3,NA) # NA is a missing value +typeof(z) +length(z) +typeof(z[3]) # what is the type of third element + +z1 <- NA +typeof(z1) #different NA types + +is.na(z) # logical operator to find missing values +mean(z) # won't work because of NA +is.na(z)# evaluate to find midding values +!is.na(z) # use ! for NOT missing values +mean(!is.na(z)) # wrong answer based on TRUE FALSE!! +mean(z[!is.na(z)]) # correct use of indexing +#----------------------------- + +# NaN, -Inf, and Inf from numeric division +z <- 0/0 # NaN +typeof(z) +print(z) +z <- 1/0 # Inf +print(z) +z <-1/0 # - Inf +print(z) +#------------------------------- +# NULL is an object that is nothing! +# a reserved word in R +z <- NULL +typeof(z) +length(z) +is.null(z) # only operation that works on a null +``` +### Three Notable Features of Atomic Vectors + +#### Coercion + +```{r, eval=FALSE} +# All atomics are of the same type +# if they are different, R coerces them +# logical -> integer -> double -> character + +a <- c(2, 2.0) +print(a) +typeof(a) # technically integer coerced to numeric + +b <- c("purple","green") +typeof(b) + +d <- c(a,b) +print(d) +typeof(d) + +# "Mistakes" in numeric variables convert to strings +# Very useful when working with logical variables + +a <- runif(10) +print(a) + +# Comparison operators yield a logical result +a > 0.5 + +# do math on a logical and it coerces to an integer! + +# How many elements are greater than 0.5? +sum(a > 0.5) + +# What proportion of the vector elements are greater than 0.5? + +mean(a > 0.5) + +#Qualifying exam question! Approximately what proportion of observations drawn from a normal (0,1) distribution are larger than 2.0? + +mean(rnorm(1000) > 2) +``` + + +#### Vectorization + +```{r, eval=FALSE} +# adding a constant to a vector +z <- c(10,20,30) +z + 1 + +# what happens when vectors are added? + +y <- c(1,2,3) +z + y + +# results is an "element by element" operation on the vector +# most vector operations can be done this way + +z^2 + +``` + + + +#### Recycling +```{r, eval=FALSE} +# but what if vector lengths are not equal? +z <- c(10,20,30) +x <- c(1,2) +z + x + +# warning is issued by calculation is still made +# shorter vector is always "recycled" +# works with scalars (= vector of length(1)) + +``` + diff --git a/Lectures/AtomicVectors_I.html b/Lectures/AtomicVectors_I.html new file mode 100644 index 0000000..d38ecbe --- /dev/null +++ b/Lectures/AtomicVectors_I.html @@ -0,0 +1,765 @@ + + + + +
+ + + + + + + + + + +z <- 3 # Begin with lower case letter
+plantHeight <- 10 # option "camelCaseFormatting"
+plant.height <- 4.2 # avoid periods
+plant_height <- 3.3 # optimal "snake_case_formatting"
+. <- 5.5 # reserve this for a generic temporary variable (more later)
Dimensions | +Homogeneous | +Heterogeneous | +
---|---|---|
1-dimension | +Atomic Vector | +List | +
2-dimensions | +Matrix | +Data Frame | +
n-dimenions | +(array) | ++ |
# the combine function
+z <- c(3.2, 5, 5, 6)
+print(z)
+typeof(z)
+is.numeric(z)
+
+# c() always "flattens" to an atomic vector
+z <- c(c(3,4),c(5,6))
+print(z)
+
+# character strings with single or double quotes
+z <- c("perch","bass",'trout')
+print(z)
+
+# use both with an internal quote
+z <- c("This is only 'one' character string", 'a second')
+print(z)
+typeof(z)
+is.character(z)
+
+# building logicals
+# Boolean, not with quotes, all caps
+z <- c(TRUE,TRUE,FALSE)
+# avoid abbreviations T, F which will work
+print(z)
+typeof(z)
+is.logical(z)
+is.integer(z)
z <- runif(5)
+# optional attribute not initially assigned
+names(z)
+print(z)
+# add names later after variable is created
+names(z) <- c("chow","pug","beagle","greyhound","akita")
+print(z)
+
+# add names when variable is built (with or without quotes)
+ z2 <- c(gold=3.3, silver=10, lead=2)
+print(z2)
+
+# reset names
+names(z2) <- NULL
+
+# names can be added for only a few elements
+# names do not have to be distinct, but often are
+names(z2) <- c("copper","zinc")
+print(z2)
# NA values for missing data
+z <- c(3.2,3.3,NA) # NA is a missing value
+typeof(z)
+length(z)
+typeof(z[3]) # what is the type of third element
+
+z1 <- NA
+typeof(z1) #different NA types
+
+is.na(z) # logical operator to find missing values
+mean(z) # won't work because of NA
+is.na(z)# evaluate to find midding values
+!is.na(z) # use ! for NOT missing values
+mean(!is.na(z)) # wrong answer based on TRUE FALSE!!
+mean(z[!is.na(z)]) # correct use of indexing
+#-----------------------------
+
+# NaN, -Inf, and Inf from numeric division
+z <- 0/0 # NaN
+typeof(z)
+print(z)
+z <- 1/0 # Inf
+print(z)
+z <-1/0 # - Inf
+print(z)
+#-------------------------------
+# NULL is an object that is nothing!
+# a reserved word in R
+z <- NULL
+typeof(z)
+length(z)
+is.null(z) # only operation that works on a null
# All atomics are of the same type
+# if they are different, R coerces them
+# logical -> integer -> double -> character
+
+a <- c(2, 2.0)
+print(a)
+typeof(a) # technically integer coerced to numeric
+
+b <- c("purple","green")
+typeof(b)
+
+d <- c(a,b)
+print(d)
+typeof(d)
+
+# "Mistakes" in numeric variables convert to strings
+# Very useful when working with logical variables
+
+a <- runif(10)
+print(a)
+
+# Comparison operators yield a logical result
+a > 0.5
+
+# do math on a logical and it coerces to an integer!
+
+# How many elements are greater than 0.5?
+sum(a > 0.5)
+
+# What proportion of the vector elements are greater than 0.5?
+
+mean(a > 0.5)
+
+#Qualifying exam question! Approximately what proportion of observations drawn from a normal (0,1) distribution are larger than 2.0?
+
+mean(rnorm(1000) > 2)
Cheat Sheets & Programming Resources