From 1600852eec4a888fa430d7ab4054a72162b38933 Mon Sep 17 00:00:00 2001
From: ngotelli
Date: Thu, 1 Feb 2024 16:32:40 -0500
Subject: [PATCH] update Atomic Vectors I lecture
---
Lectures/AtomicVectors_I.Rmd | 274 ++++++++++++
Lectures/AtomicVectors_I.html | 765 ++++++++++++++++++++++++++++++++++
index.Rmd | 6 +-
index.html | 9 +-
4 files changed, 1047 insertions(+), 7 deletions(-)
create mode 100644 Lectures/AtomicVectors_I.Rmd
create mode 100644 Lectures/AtomicVectors_I.html
diff --git a/Lectures/AtomicVectors_I.Rmd b/Lectures/AtomicVectors_I.Rmd
new file mode 100644
index 0000000..463e377
--- /dev/null
+++ b/Lectures/AtomicVectors_I.Rmd
@@ -0,0 +1,274 @@
+---
+title: 'Atomic Vectors I'
+author: "Nicholas J. Gotelli"
+date: "1 February 2024"
+output:
+ html_document:
+ highlight: tango
+ keep_md: no
+ theme: united
+ pdf_document: default
+---
+## History of R
+- Early languages FORTRAN, C
+- S language in 1970s; programming plus stats, matrix algebra, graphics later
+- Open-source R in 1992; first stable version in 2000
+
+## Advantages of R
+- interpreted (for interactive use)
+- graphics, statistics
+- very active community of contributors
+- works on multiple platforms
+
+## Disadvantages of R
+- interpreted (slow speed)
+- lazy evaluation
+- functions hard to learn
+- poorly documented
+- unreliable packages
+- problems with big data
+ - subroutines coded for C or Fortran
+ - Julia?
+
+## R as a general programming language
+- data structures and types
+- writing functions
+- for loops
+- if..then, while statements
+- structured programming
+- annotation
+- naming of variables
+- using random numbers
+- structured programming
+- pseudocode
+
+### Using the assignment operator
+
+```{r, eval=FALSE}
+# Using the assignment operator
+x <- 5 # preferred
+y = 4 # legal but not used except in function defaults
+y = y + 1.1
+print(y)
+y <- y + 1.1
+print(y)
+```
+
+# Variable names
+
+```{r}
+z <- 3 # Begin with lower case letter
+plantHeight <- 10 # option "camelCaseFormatting"
+plant.height <- 4.2 # avoid periods
+plant_height <- 3.3 # optimal "snake_case_formatting"
+. <- 5.5 # reserve this for a generic temporary variable (more later)
+```
+
+## R's Four Data Types
+
+Dimensions | Homogeneous | Heterogeneous
+------------- | ------------- | -------------
+1-dimension | Atomic Vector | List
+2-dimensions | Matrix | Data Frame
+n-dimenions | (array) | |
+
+## Types of Atomic Vectors
+- character strings
+- integers
+- double
+- integers, doubles are "numeric"
+- logical
+- (factor)
+- vector of lists!
+
+### One dimensional atomic vectors
+```{r, eval=FALSE}
+# the combine function
+z <- c(3.2, 5, 5, 6)
+print(z)
+typeof(z)
+is.numeric(z)
+
+# c() always "flattens" to an atomic vector
+z <- c(c(3,4),c(5,6))
+print(z)
+
+# character strings with single or double quotes
+z <- c("perch","bass",'trout')
+print(z)
+
+# use both with an internal quote
+z <- c("This is only 'one' character string", 'a second')
+print(z)
+typeof(z)
+is.character(z)
+
+# building logicals
+# Boolean, not with quotes, all caps
+z <- c(TRUE,TRUE,FALSE)
+# avoid abbreviations T, F which will work
+print(z)
+typeof(z)
+is.logical(z)
+is.integer(z)
+
+```
+### Three Properties of a Vector
+
+#### Type
+
+```{r, eval=FALSE}
+z <- c(1.1, 1.2, 3, 4.4)
+typeof(z) # gives type
+is.numeric(z) # is. gives logical
+as.character(z) # as. coerces variable
+print(z)
+typeof(z)
+
+```
+
+#### Length
+```{r, eval=FALSE}
+length(z) # gives number of elements
+length(y) # throws error if variable does not exist
+
+```
+
+#### Names
+```{r, eval=FALSE}
+z <- runif(5)
+# optional attribute not initially assigned
+names(z)
+print(z)
+# add names later after variable is created
+names(z) <- c("chow","pug","beagle","greyhound","akita")
+print(z)
+
+# add names when variable is built (with or without quotes)
+ z2 <- c(gold=3.3, silver=10, lead=2)
+print(z2)
+
+# reset names
+names(z2) <- NULL
+
+# names can be added for only a few elements
+# names do not have to be distinct, but often are
+names(z2) <- c("copper","zinc")
+print(z2)
+
+```
+
+#### Special Data Types
+```{r, eval=FALSE}
+
+# NA values for missing data
+z <- c(3.2,3.3,NA) # NA is a missing value
+typeof(z)
+length(z)
+typeof(z[3]) # what is the type of third element
+
+z1 <- NA
+typeof(z1) #different NA types
+
+is.na(z) # logical operator to find missing values
+mean(z) # won't work because of NA
+is.na(z)# evaluate to find midding values
+!is.na(z) # use ! for NOT missing values
+mean(!is.na(z)) # wrong answer based on TRUE FALSE!!
+mean(z[!is.na(z)]) # correct use of indexing
+#-----------------------------
+
+# NaN, -Inf, and Inf from numeric division
+z <- 0/0 # NaN
+typeof(z)
+print(z)
+z <- 1/0 # Inf
+print(z)
+z <-1/0 # - Inf
+print(z)
+#-------------------------------
+# NULL is an object that is nothing!
+# a reserved word in R
+z <- NULL
+typeof(z)
+length(z)
+is.null(z) # only operation that works on a null
+```
+### Three Notable Features of Atomic Vectors
+
+#### Coercion
+
+```{r, eval=FALSE}
+# All atomics are of the same type
+# if they are different, R coerces them
+# logical -> integer -> double -> character
+
+a <- c(2, 2.0)
+print(a)
+typeof(a) # technically integer coerced to numeric
+
+b <- c("purple","green")
+typeof(b)
+
+d <- c(a,b)
+print(d)
+typeof(d)
+
+# "Mistakes" in numeric variables convert to strings
+# Very useful when working with logical variables
+
+a <- runif(10)
+print(a)
+
+# Comparison operators yield a logical result
+a > 0.5
+
+# do math on a logical and it coerces to an integer!
+
+# How many elements are greater than 0.5?
+sum(a > 0.5)
+
+# What proportion of the vector elements are greater than 0.5?
+
+mean(a > 0.5)
+
+#Qualifying exam question! Approximately what proportion of observations drawn from a normal (0,1) distribution are larger than 2.0?
+
+mean(rnorm(1000) > 2)
+```
+
+
+#### Vectorization
+
+```{r, eval=FALSE}
+# adding a constant to a vector
+z <- c(10,20,30)
+z + 1
+
+# what happens when vectors are added?
+
+y <- c(1,2,3)
+z + y
+
+# results is an "element by element" operation on the vector
+# most vector operations can be done this way
+
+z^2
+
+```
+
+
+
+#### Recycling
+```{r, eval=FALSE}
+# but what if vector lengths are not equal?
+z <- c(10,20,30)
+x <- c(1,2)
+z + x
+
+# warning is issued by calculation is still made
+# shorter vector is always "recycled"
+# works with scalars (= vector of length(1))
+
+```
+
diff --git a/Lectures/AtomicVectors_I.html b/Lectures/AtomicVectors_I.html
new file mode 100644
index 0000000..d38ecbe
--- /dev/null
+++ b/Lectures/AtomicVectors_I.html
@@ -0,0 +1,765 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Atomic Vectors I
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
History of R
+
+- Early languages FORTRAN, C
+- S language in 1970s; programming plus stats, matrix algebra,
+graphics later
+- Open-source R in 1992; first stable version in 2000
+
+
+
+
Advantages of R
+
+- interpreted (for interactive use)
+- graphics, statistics
+- very active community of contributors
+- works on multiple platforms
+
+
+
+
Disadvantages of R
+
+- interpreted (slow speed)
+- lazy evaluation
+- functions hard to learn
+- poorly documented
+- unreliable packages
+- problems with big data
+
+- subroutines coded for C or Fortran
+- Julia?
+
+
+
+
+
R as a general programming language
+
+- data structures and types
+- writing functions
+- for loops
+- if..then, while statements
+- structured programming
+- annotation
+- naming of variables
+- using random numbers
+- structured programming
+- pseudocode
+
+
+
Using the assignment operator
+
# Using the assignment operator
+x <- 5 # preferred
+y = 4 # legal but not used except in function defaults
+y = y + 1.1
+print(y)
+y <- y + 1.1
+print(y)
+
+
+
+
Variable names
+
z <- 3 # Begin with lower case letter
+plantHeight <- 10 # option "camelCaseFormatting"
+plant.height <- 4.2 # avoid periods
+plant_height <- 3.3 # optimal "snake_case_formatting"
+. <- 5.5 # reserve this for a generic temporary variable (more later)
+
+
R’s Four Data Types
+
+
+
+
+
+
+1-dimension |
+Atomic Vector |
+List |
+
+
+2-dimensions |
+Matrix |
+Data Frame |
+
+
+n-dimenions |
+(array) |
+ |
+
+
+
+
+
+
Types of Atomic Vectors
+
+- character strings
+- integers
+- double
+- integers, doubles are “numeric”
+- logical
+- (factor)
+- vector of lists!
+
+
+
One dimensional atomic vectors
+
# the combine function
+z <- c(3.2, 5, 5, 6)
+print(z)
+typeof(z)
+is.numeric(z)
+
+# c() always "flattens" to an atomic vector
+z <- c(c(3,4),c(5,6))
+print(z)
+
+# character strings with single or double quotes
+z <- c("perch","bass",'trout')
+print(z)
+
+# use both with an internal quote
+z <- c("This is only 'one' character string", 'a second')
+print(z)
+typeof(z)
+is.character(z)
+
+# building logicals
+# Boolean, not with quotes, all caps
+z <- c(TRUE,TRUE,FALSE)
+# avoid abbreviations T, F which will work
+print(z)
+typeof(z)
+is.logical(z)
+is.integer(z)
+
+
+
Three Properties of a Vector
+
+
Type
+
z <- c(1.1, 1.2, 3, 4.4)
+typeof(z) # gives type
+is.numeric(z) # is. gives logical
+as.character(z) # as. coerces variable
+print(z)
+typeof(z)
+
+
+
Length
+
length(z) # gives number of elements
+length(y) # throws error if variable does not exist
+
+
+
Names
+
z <- runif(5)
+# optional attribute not initially assigned
+names(z)
+print(z)
+# add names later after variable is created
+names(z) <- c("chow","pug","beagle","greyhound","akita")
+print(z)
+
+# add names when variable is built (with or without quotes)
+ z2 <- c(gold=3.3, silver=10, lead=2)
+print(z2)
+
+# reset names
+names(z2) <- NULL
+
+# names can be added for only a few elements
+# names do not have to be distinct, but often are
+names(z2) <- c("copper","zinc")
+print(z2)
+
+
+
Special Data Types
+
# NA values for missing data
+z <- c(3.2,3.3,NA) # NA is a missing value
+typeof(z)
+length(z)
+typeof(z[3]) # what is the type of third element
+
+z1 <- NA
+typeof(z1) #different NA types
+
+is.na(z) # logical operator to find missing values
+mean(z) # won't work because of NA
+is.na(z)# evaluate to find midding values
+!is.na(z) # use ! for NOT missing values
+mean(!is.na(z)) # wrong answer based on TRUE FALSE!!
+mean(z[!is.na(z)]) # correct use of indexing
+#-----------------------------
+
+# NaN, -Inf, and Inf from numeric division
+z <- 0/0 # NaN
+typeof(z)
+print(z)
+z <- 1/0 # Inf
+print(z)
+z <-1/0 # - Inf
+print(z)
+#-------------------------------
+# NULL is an object that is nothing!
+# a reserved word in R
+z <- NULL
+typeof(z)
+length(z)
+is.null(z) # only operation that works on a null
+
+
+
+
Three Notable Features of Atomic Vectors
+
+
Coercion
+
# All atomics are of the same type
+# if they are different, R coerces them
+# logical -> integer -> double -> character
+
+a <- c(2, 2.0)
+print(a)
+typeof(a) # technically integer coerced to numeric
+
+b <- c("purple","green")
+typeof(b)
+
+d <- c(a,b)
+print(d)
+typeof(d)
+
+# "Mistakes" in numeric variables convert to strings
+# Very useful when working with logical variables
+
+a <- runif(10)
+print(a)
+
+# Comparison operators yield a logical result
+a > 0.5
+
+# do math on a logical and it coerces to an integer!
+
+# How many elements are greater than 0.5?
+sum(a > 0.5)
+
+# What proportion of the vector elements are greater than 0.5?
+
+mean(a > 0.5)
+
+#Qualifying exam question! Approximately what proportion of observations drawn from a normal (0,1) distribution are larger than 2.0?
+
+mean(rnorm(1000) > 2)
+
+
+
Vectorization
+
# adding a constant to a vector
+z <- c(10,20,30)
+z + 1
+
+# what happens when vectors are added?
+
+y <- c(1,2,3)
+z + y
+
+# results is an "element by element" operation on the vector
+# most vector operations can be done this way
+
+z^2
+
+
+
Recycling
+
# but what if vector lengths are not equal?
+z <- c(10,20,30)
+x <- c(1,2)
+z + x
+
+# warning is issued by calculation is still made
+# shorter vector is always "recycled"
+# works with scalars (= vector of length(1))
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/index.Rmd b/index.Rmd
index aa49d54..c43a94b 100644
--- a/index.Rmd
+++ b/index.Rmd
@@ -35,10 +35,10 @@ January 17 | - | [Homework #1](Homeworks/Homework_01.html)
January 18 | [GitHub](Lectures/GitHub.html) | -
January 23 | [Markdown](Lectures/Markdown.html) | -
January 24 | - | [Homework #2](Homeworks/Homework_02.html)
-January 25 | [Regular Expressions](Lectures/RegularExpressions.html) | -
-January 30 | [Markdown II](Lectures/Lecture_05.xhtml) | -
+January 25 | [Markdown II](Lectures/Markdown.html) | -
+January 30 | [Regular Expressions](Lectures/RegularExpressions.html) | -
January 31 | - | [Homework #3](Homeworks/Homework_03.html)
-February 1 | [LaTeX & Typora](Lectures/Lecture_06.xhtml) | -
+February 1 | [Atomic Vectors I](Lectures/AtomicVectors_I.html) | -
February 6 | [Atomic Vectors I](Lectures/Lecture_07.xhtml) | -
February 7 | **Portfolio Check #1** | [Homework #4](Homeworks/Homework_04.html)
February 8 | [Atomic Vectors II](Lectures/Lecture_08.xhtml)| -
diff --git a/index.html b/index.html
index 4560ae3..0b8bc05 100644
--- a/index.html
+++ b/index.html
@@ -84,6 +84,7 @@ Course Materials
Installation
Course Syllabus
R Scripts
+Example Datasets
Cheat Sheets & Programming
Resources
@@ -131,13 +132,13 @@ Lecture Outlines & Homework Assignments
January 25 |
-Regular
-Expressions |
+Markdown II |
- |
January 30 |
-Markdown II |
+Regular
+Expressions |
- |
@@ -147,7 +148,7 @@ Lecture Outlines & Homework Assignments
February 1 |
-LaTeX & Typora |
+Atomic Vectors I |
- |